163 files changed, 4553 insertions, 2432 deletions
diff --git a/numpy/__init__.pxd b/numpy/__init__.pxd
new file mode 100644
index 000000000..23bd22e36
--- /dev/null
+++ b/numpy/__init__.pxd
@@ -0,0 +1,978 @@
+# NumPy static imports for Cython
+#
+# If any of the PyArray_* functions are called, import_array must be
+# called first.
+#
+# This also defines backwards-compatibility buffer acquisition
+# code for use in Python 2.x (or Python <= 2.5 when NumPy starts
+# implementing PEP-3118 directly).
+#
+# Because of laziness, the format string of the buffer is statically
+# allocated. Increase the size if this is not enough, or submit a
+# patch to do this properly.
+#
+# Author: Dag Sverre Seljebotn
+#
+
+DEF _buffer_format_string_len = 255
+
+cimport cpython.buffer as pybuf
+from cpython.ref cimport Py_INCREF
+from cpython.mem cimport PyObject_Malloc, PyObject_Free
+from cpython.object cimport PyObject, PyTypeObject
+from cpython.buffer cimport PyObject_GetBuffer
+from cpython.type cimport type
+cimport libc.stdio as stdio
+
+cdef extern from "Python.h":
+    ctypedef int Py_intptr_t
+
+cdef extern from "numpy/arrayobject.h":
+    ctypedef Py_intptr_t npy_intp
+    ctypedef size_t npy_uintp
+
+    cdef enum NPY_TYPES:
+        NPY_BOOL
+        NPY_BYTE
+        NPY_UBYTE
+        NPY_SHORT
+        NPY_USHORT
+        NPY_INT
+        NPY_UINT
+        NPY_LONG
+        NPY_ULONG
+        NPY_LONGLONG
+        NPY_ULONGLONG
+        NPY_FLOAT
+        NPY_DOUBLE
+        NPY_LONGDOUBLE
+        NPY_CFLOAT
+        NPY_CDOUBLE
+        NPY_CLONGDOUBLE
+        NPY_OBJECT
+        NPY_STRING
+        NPY_UNICODE
+        NPY_VOID
+        NPY_DATETIME
+        NPY_TIMEDELTA
+        NPY_NTYPES
+        NPY_NOTYPE
+
+        NPY_INT8
+        NPY_INT16
+        NPY_INT32
+        NPY_INT64
+        NPY_INT128
+        NPY_INT256
+        NPY_UINT8
+        NPY_UINT16
+        NPY_UINT32
+        NPY_UINT64
+        NPY_UINT128
+        NPY_UINT256
+        NPY_FLOAT16
+        NPY_FLOAT32
+        NPY_FLOAT64
+        NPY_FLOAT80
+        NPY_FLOAT96
+        NPY_FLOAT128
+        NPY_FLOAT256
+        NPY_COMPLEX32
+        NPY_COMPLEX64
+        NPY_COMPLEX128
+        NPY_COMPLEX160
+        NPY_COMPLEX192
+        NPY_COMPLEX256
+        NPY_COMPLEX512
+
+        NPY_INTP
+
+    ctypedef enum NPY_ORDER:
+        NPY_ANYORDER
+        NPY_CORDER
+        NPY_FORTRANORDER
+        NPY_KEEPORDER
+
+    ctypedef enum NPY_CASTING:
+        NPY_NO_CASTING
+        NPY_EQUIV_CASTING
+        NPY_SAFE_CASTING
+        NPY_SAME_KIND_CASTING
+        NPY_UNSAFE_CASTING
+
+    ctypedef enum NPY_CLIPMODE:
+        NPY_CLIP
+        NPY_WRAP
+        NPY_RAISE
+
+    ctypedef enum NPY_SCALARKIND:
+        NPY_NOSCALAR,
+        NPY_BOOL_SCALAR,
+        NPY_INTPOS_SCALAR,
+        NPY_INTNEG_SCALAR,
+        NPY_FLOAT_SCALAR,
+        NPY_COMPLEX_SCALAR,
+        NPY_OBJECT_SCALAR
+
+    ctypedef enum NPY_SORTKIND:
+        NPY_QUICKSORT
+        NPY_HEAPSORT
+        NPY_MERGESORT
+
+    ctypedef enum NPY_SEARCHSIDE:
+        NPY_SEARCHLEFT
+        NPY_SEARCHRIGHT
+
+    enum:
+        # DEPRECATED since NumPy 1.7 ! Do not use in new code!
+        NPY_C_CONTIGUOUS
+        NPY_F_CONTIGUOUS
+        NPY_CONTIGUOUS
+        NPY_FORTRAN
+        NPY_OWNDATA
+        NPY_FORCECAST
+        NPY_ENSURECOPY
+        NPY_ENSUREARRAY
+        NPY_ELEMENTSTRIDES
+        NPY_ALIGNED
+        NPY_NOTSWAPPED
+        NPY_WRITEABLE
+        NPY_UPDATEIFCOPY
+        NPY_ARR_HAS_DESCR
+
+        NPY_BEHAVED
+        NPY_BEHAVED_NS
+        NPY_CARRAY
+        NPY_CARRAY_RO
+        NPY_FARRAY
+        NPY_FARRAY_RO
+        NPY_DEFAULT
+
+        NPY_IN_ARRAY
+        NPY_OUT_ARRAY
+        NPY_INOUT_ARRAY
+        NPY_IN_FARRAY
+        NPY_OUT_FARRAY
+        NPY_INOUT_FARRAY
+
+        NPY_UPDATE_ALL
+
+    enum:
+        # Added in NumPy 1.7 to replace the deprecated enums above.
+        NPY_ARRAY_C_CONTIGUOUS
+        NPY_ARRAY_F_CONTIGUOUS
+        NPY_ARRAY_OWNDATA
+        NPY_ARRAY_FORCECAST
+        NPY_ARRAY_ENSURECOPY
+        NPY_ARRAY_ENSUREARRAY
+        NPY_ARRAY_ELEMENTSTRIDES
+        NPY_ARRAY_ALIGNED
+        NPY_ARRAY_NOTSWAPPED
+        NPY_ARRAY_WRITEABLE
+        NPY_ARRAY_UPDATEIFCOPY
+
+        NPY_ARRAY_BEHAVED
+        NPY_ARRAY_BEHAVED_NS
+        NPY_ARRAY_CARRAY
+        NPY_ARRAY_CARRAY_RO
+        NPY_ARRAY_FARRAY
+        NPY_ARRAY_FARRAY_RO
+        NPY_ARRAY_DEFAULT
+
+        NPY_ARRAY_IN_ARRAY
+        NPY_ARRAY_OUT_ARRAY
+        NPY_ARRAY_INOUT_ARRAY
+        NPY_ARRAY_IN_FARRAY
+        NPY_ARRAY_OUT_FARRAY
+        NPY_ARRAY_INOUT_FARRAY
+
+        NPY_ARRAY_UPDATE_ALL
+
+    cdef enum:
+        NPY_MAXDIMS
+
+    npy_intp NPY_MAX_ELSIZE
+
+    ctypedef void (*PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,  void *)
+
+    ctypedef struct PyArray_ArrayDescr:
+        # shape is a tuple, but Cython doesn't support "tuple shape"
+        # inside a non-PyObject declaration, so we have to declare it
+        # as just a PyObject*.
+        PyObject* shape
+
+    ctypedef struct PyArray_Descr:
+        pass
+
+    ctypedef class numpy.dtype [object PyArray_Descr, check_size ignore]:
+        # Use PyDataType_* macros when possible, however there are no macros
+        # for accessing some of the fields, so some are defined.
+        cdef PyTypeObject* typeobj
+        cdef char kind
+        cdef char type
+        # Numpy sometimes mutates this without warning (e.g. it'll
+        # sometimes change "|" to "<" in shared dtype objects on
+        # little-endian machines). If this matters to you, use
+        # PyArray_IsNativeByteOrder(dtype.byteorder) instead of
+        # directly accessing this field.
+        cdef char byteorder
+        cdef char flags
+        cdef int type_num
+        cdef int itemsize "elsize"
+        cdef int alignment
+        cdef dict fields
+        cdef tuple names
+        # Use PyDataType_HASSUBARRAY to test whether this field is
+        # valid (the pointer can be NULL). Most users should access
+        # this field via the inline helper method PyDataType_SHAPE.
+        cdef PyArray_ArrayDescr* subarray
+
+    ctypedef extern class numpy.flatiter [object PyArrayIterObject, check_size ignore]:
+        # Use through macros
+        pass
+
+    ctypedef extern class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]:
+        cdef int numiter
+        cdef npy_intp size, index
+        cdef int nd
+        cdef npy_intp *dimensions
+        cdef void **iters
+
+    ctypedef struct PyArrayObject:
+        # For use in situations where ndarray can't replace PyArrayObject*,
+        # like PyArrayObject**.
+        pass
+
+    ctypedef class numpy.ndarray [object PyArrayObject, check_size ignore]:
+        cdef __cythonbufferdefaults__ = {"mode": "strided"}
+
+        cdef:
+            # Only taking a few of the most commonly used and stable fields.
+            # One should use PyArray_* macros instead to access the C fields.
+            char *data
+            int ndim "nd"
+            npy_intp *shape "dimensions"
+            npy_intp *strides
+            dtype descr  # deprecated since NumPy 1.7 !
+            PyObject* base
+
+        # Note: This syntax (function definition in pxd files) is an
+        # experimental exception made for __getbuffer__ and __releasebuffer__
+        # -- the details of this may change.
+        def __getbuffer__(ndarray self, Py_buffer* info, int flags):
+            PyObject_GetBuffer(<object>self, info, flags);
+
+        def __releasebuffer__(ndarray self, Py_buffer* info):
+            # We should call a possible tp_bufferrelease(self, info) but no
+            # interface to that is exposed by cython or python. And currently
+            # the function is NULL in numpy, we rely on refcounting to release
+            # info when self is collected
+            pass
+
+
+    ctypedef unsigned char      npy_bool
+
+    ctypedef signed char      npy_byte
+    ctypedef signed short     npy_short
+    ctypedef signed int       npy_int
+    ctypedef signed long      npy_long
+    ctypedef signed long long npy_longlong
+
+    ctypedef unsigned char      npy_ubyte
+    ctypedef unsigned short     npy_ushort
+    ctypedef unsigned int       npy_uint
+    ctypedef unsigned long      npy_ulong
+    ctypedef unsigned long long npy_ulonglong
+
+    ctypedef float        npy_float
+    ctypedef double       npy_double
+    ctypedef long double  npy_longdouble
+
+    ctypedef signed char        npy_int8
+    ctypedef signed short       npy_int16
+    ctypedef signed int         npy_int32
+    ctypedef signed long long   npy_int64
+    ctypedef signed long long   npy_int96
+    ctypedef signed long long   npy_int128
+
+    ctypedef unsigned char      npy_uint8
+    ctypedef unsigned short     npy_uint16
+    ctypedef unsigned int       npy_uint32
+    ctypedef unsigned long long npy_uint64
+    ctypedef unsigned long long npy_uint96
+    ctypedef unsigned long long npy_uint128
+
+    ctypedef float        npy_float32
+    ctypedef double       npy_float64
+    ctypedef long double  npy_float80
+    ctypedef long double  npy_float96
+    ctypedef long double  npy_float128
+
+    ctypedef struct npy_cfloat:
+        double real
+        double imag
+
+    ctypedef struct npy_cdouble:
+        double real
+        double imag
+
+    ctypedef struct npy_clongdouble:
+        long double real
+        long double imag
+
+    ctypedef struct npy_complex64:
+        float real
+        float imag
+
+    ctypedef struct npy_complex128:
+        double real
+        double imag
+
+    ctypedef struct npy_complex160:
+        long double real
+        long double imag
+
+    ctypedef struct npy_complex192:
+        long double real
+        long double imag
+
+    ctypedef struct npy_complex256:
+        long double real
+        long double imag
+
+    ctypedef struct PyArray_Dims:
+        npy_intp *ptr
+        int len
+
+    int _import_array() except -1
+
+    #
+    # Macros from ndarrayobject.h
+    #
+    bint PyArray_CHKFLAGS(ndarray m, int flags)
+    bint PyArray_IS_C_CONTIGUOUS(ndarray arr)
+    bint PyArray_IS_F_CONTIGUOUS(ndarray arr)
+    bint PyArray_ISCONTIGUOUS(ndarray m)
+    bint PyArray_ISWRITEABLE(ndarray m)
+    bint PyArray_ISALIGNED(ndarray m)
+
+    int PyArray_NDIM(ndarray)
+    bint PyArray_ISONESEGMENT(ndarray)
+    bint PyArray_ISFORTRAN(ndarray)
+    int PyArray_FORTRANIF(ndarray)
+
+    void* PyArray_DATA(ndarray)
+    char* PyArray_BYTES(ndarray)
+    npy_intp* PyArray_DIMS(ndarray)
+    npy_intp* PyArray_STRIDES(ndarray)
+    npy_intp PyArray_DIM(ndarray, size_t)
+    npy_intp PyArray_STRIDE(ndarray, size_t)
+
+    PyObject *PyArray_BASE(ndarray)  # returns borrowed reference!
+    PyArray_Descr *PyArray_DESCR(ndarray) # returns borrowed reference to dtype!
+    int PyArray_FLAGS(ndarray)
+    npy_intp PyArray_ITEMSIZE(ndarray)
+    int PyArray_TYPE(ndarray arr)
+
+    object PyArray_GETITEM(ndarray arr, void *itemptr)
+    int PyArray_SETITEM(ndarray arr, void *itemptr, object obj)
+
+    bint PyTypeNum_ISBOOL(int)
+    bint PyTypeNum_ISUNSIGNED(int)
+    bint PyTypeNum_ISSIGNED(int)
+    bint PyTypeNum_ISINTEGER(int)
+    bint PyTypeNum_ISFLOAT(int)
+    bint PyTypeNum_ISNUMBER(int)
+    bint PyTypeNum_ISSTRING(int)
+    bint PyTypeNum_ISCOMPLEX(int)
+    bint PyTypeNum_ISPYTHON(int)
+    bint PyTypeNum_ISFLEXIBLE(int)
+    bint PyTypeNum_ISUSERDEF(int)
+    bint PyTypeNum_ISEXTENDED(int)
+    bint PyTypeNum_ISOBJECT(int)
+
+    bint PyDataType_ISBOOL(dtype)
+    bint PyDataType_ISUNSIGNED(dtype)
+    bint PyDataType_ISSIGNED(dtype)
+    bint PyDataType_ISINTEGER(dtype)
+    bint PyDataType_ISFLOAT(dtype)
+    bint PyDataType_ISNUMBER(dtype)
+    bint PyDataType_ISSTRING(dtype)
+    bint PyDataType_ISCOMPLEX(dtype)
+    bint PyDataType_ISPYTHON(dtype)
+    bint PyDataType_ISFLEXIBLE(dtype)
+    bint PyDataType_ISUSERDEF(dtype)
+    bint PyDataType_ISEXTENDED(dtype)
+    bint PyDataType_ISOBJECT(dtype)
+    bint PyDataType_HASFIELDS(dtype)
+    bint PyDataType_HASSUBARRAY(dtype)
+
+    bint PyArray_ISBOOL(ndarray)
+    bint PyArray_ISUNSIGNED(ndarray)
+    bint PyArray_ISSIGNED(ndarray)
+    bint PyArray_ISINTEGER(ndarray)
+    bint PyArray_ISFLOAT(ndarray)
+    bint PyArray_ISNUMBER(ndarray)
+    bint PyArray_ISSTRING(ndarray)
+    bint PyArray_ISCOMPLEX(ndarray)
+    bint PyArray_ISPYTHON(ndarray)
+    bint PyArray_ISFLEXIBLE(ndarray)
+    bint PyArray_ISUSERDEF(ndarray)
+    bint PyArray_ISEXTENDED(ndarray)
+    bint PyArray_ISOBJECT(ndarray)
+    bint PyArray_HASFIELDS(ndarray)
+
+    bint PyArray_ISVARIABLE(ndarray)
+
+    bint PyArray_SAFEALIGNEDCOPY(ndarray)
+    bint PyArray_ISNBO(char)              # works on ndarray.byteorder
+    bint PyArray_IsNativeByteOrder(char)  # works on ndarray.byteorder
+    bint PyArray_ISNOTSWAPPED(ndarray)
+    bint PyArray_ISBYTESWAPPED(ndarray)
+
+    bint PyArray_FLAGSWAP(ndarray, int)
+
+    bint PyArray_ISCARRAY(ndarray)
+    bint PyArray_ISCARRAY_RO(ndarray)
+    bint PyArray_ISFARRAY(ndarray)
+    bint PyArray_ISFARRAY_RO(ndarray)
+    bint PyArray_ISBEHAVED(ndarray)
+    bint PyArray_ISBEHAVED_RO(ndarray)
+
+
+    bint PyDataType_ISNOTSWAPPED(dtype)
+    bint PyDataType_ISBYTESWAPPED(dtype)
+
+    bint PyArray_DescrCheck(object)
+
+    bint PyArray_Check(object)
+    bint PyArray_CheckExact(object)
+
+    # Cannot be supported due to out arg:
+    # bint PyArray_HasArrayInterfaceType(object, dtype, object, object&)
+    # bint PyArray_HasArrayInterface(op, out)
+
+
+    bint PyArray_IsZeroDim(object)
+    # Cannot be supported due to ## ## in macro:
+    # bint PyArray_IsScalar(object, verbatim work)
+    bint PyArray_CheckScalar(object)
+    bint PyArray_IsPythonNumber(object)
+    bint PyArray_IsPythonScalar(object)
+    bint PyArray_IsAnyScalar(object)
+    bint PyArray_CheckAnyScalar(object)
+    ndarray PyArray_GETCONTIGUOUS(ndarray)
+    bint PyArray_SAMESHAPE(ndarray, ndarray)
+    npy_intp PyArray_SIZE(ndarray)
+    npy_intp PyArray_NBYTES(ndarray)
+
+    object PyArray_FROM_O(object)
+    object PyArray_FROM_OF(object m, int flags)
+    object PyArray_FROM_OT(object m, int type)
+    object PyArray_FROM_OTF(object m, int type, int flags)
+    object PyArray_FROMANY(object m, int type, int min, int max, int flags)
+    object PyArray_ZEROS(int nd, npy_intp* dims, int type, int fortran)
+    object PyArray_EMPTY(int nd, npy_intp* dims, int type, int fortran)
+    void PyArray_FILLWBYTE(object, int val)
+    npy_intp PyArray_REFCOUNT(object)
+    object PyArray_ContiguousFromAny(op, int, int min_depth, int max_depth)
+    unsigned char PyArray_EquivArrTypes(ndarray a1, ndarray a2)
+    bint PyArray_EquivByteorders(int b1, int b2)
+    object PyArray_SimpleNew(int nd, npy_intp* dims, int typenum)
+    object PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data)
+    #object PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, dtype descr)
+    object PyArray_ToScalar(void* data, ndarray arr)
+
+    void* PyArray_GETPTR1(ndarray m, npy_intp i)
+    void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j)
+    void* PyArray_GETPTR3(ndarray m, npy_intp i, npy_intp j, npy_intp k)
+    void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l)
+
+    void PyArray_XDECREF_ERR(ndarray)
+    # Cannot be supported due to out arg
+    # void PyArray_DESCR_REPLACE(descr)
+
+
+    object PyArray_Copy(ndarray)
+    object PyArray_FromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_ContiguousFromObject(object op, int type, int min_depth, int max_depth)
+    object PyArray_CopyFromObject(object op, int type, int min_depth, int max_depth)
+
+    object PyArray_Cast(ndarray mp, int type_num)
+    object PyArray_Take(ndarray ap, object items, int axis)
+    object PyArray_Put(ndarray ap, object items, object values)
+
+    void PyArray_ITER_RESET(flatiter it) nogil
+    void PyArray_ITER_NEXT(flatiter it) nogil
+    void PyArray_ITER_GOTO(flatiter it, npy_intp* destination) nogil
+    void PyArray_ITER_GOTO1D(flatiter it, npy_intp ind) nogil
+    void* PyArray_ITER_DATA(flatiter it) nogil
+    bint PyArray_ITER_NOTDONE(flatiter it) nogil
+
+    void PyArray_MultiIter_RESET(broadcast multi) nogil
+    void PyArray_MultiIter_NEXT(broadcast multi) nogil
+    void PyArray_MultiIter_GOTO(broadcast multi, npy_intp dest) nogil
+    void PyArray_MultiIter_GOTO1D(broadcast multi, npy_intp ind) nogil
+    void* PyArray_MultiIter_DATA(broadcast multi, npy_intp i) nogil
+    void PyArray_MultiIter_NEXTi(broadcast multi, npy_intp i) nogil
+    bint PyArray_MultiIter_NOTDONE(broadcast multi) nogil
+
+    # Functions from __multiarray_api.h
+
+    # Functions taking dtype and returning object/ndarray are disabled
+    # for now as they steal dtype references. I'm conservative and disable
+    # more than is probably needed until it can be checked further.
+    int PyArray_SetNumericOps        (object)
+    object PyArray_GetNumericOps ()
+    int PyArray_INCREF (ndarray)
+    int PyArray_XDECREF (ndarray)
+    void PyArray_SetStringFunction (object, int)
+    dtype PyArray_DescrFromType (int)
+    object PyArray_TypeObjectFromType (int)
+    char * PyArray_Zero (ndarray)
+    char * PyArray_One (ndarray)
+    #object PyArray_CastToType (ndarray, dtype, int)
+    int PyArray_CastTo (ndarray, ndarray)
+    int PyArray_CastAnyTo (ndarray, ndarray)
+    int PyArray_CanCastSafely (int, int)
+    npy_bool PyArray_CanCastTo (dtype, dtype)
+    int PyArray_ObjectType (object, int)
+    dtype PyArray_DescrFromObject (object, dtype)
+    #ndarray* PyArray_ConvertToCommonType (object, int *)
+    dtype PyArray_DescrFromScalar (object)
+    dtype PyArray_DescrFromTypeObject (object)
+    npy_intp PyArray_Size (object)
+    #object PyArray_Scalar (void *, dtype, object)
+    #object PyArray_FromScalar (object, dtype)
+    void PyArray_ScalarAsCtype (object, void *)
+    #int PyArray_CastScalarToCtype (object, void *, dtype)
+    #int PyArray_CastScalarDirect (object, dtype, void *, int)
+    object PyArray_ScalarFromObject (object)
+    #PyArray_VectorUnaryFunc * PyArray_GetCastFunc (dtype, int)
+    object PyArray_FromDims (int, int *, int)
+    #object PyArray_FromDimsAndDataAndDescr (int, int *, dtype, char *)
+    #object PyArray_FromAny (object, dtype, int, int, int, object)
+    object PyArray_EnsureArray (object)
+    object PyArray_EnsureAnyArray (object)
+    #object PyArray_FromFile (stdio.FILE *, dtype, npy_intp, char *)
+    #object PyArray_FromString (char *, npy_intp, dtype, npy_intp, char *)
+    #object PyArray_FromBuffer (object, dtype, npy_intp, npy_intp)
+    #object PyArray_FromIter (object, dtype, npy_intp)
+    object PyArray_Return (ndarray)
+    #object PyArray_GetField (ndarray, dtype, int)
+    #int PyArray_SetField (ndarray, dtype, int, object)
+    object PyArray_Byteswap (ndarray, npy_bool)
+    object PyArray_Resize (ndarray, PyArray_Dims *, int, NPY_ORDER)
+    int PyArray_MoveInto (ndarray, ndarray)
+    int PyArray_CopyInto (ndarray, ndarray)
+    int PyArray_CopyAnyInto (ndarray, ndarray)
+    int PyArray_CopyObject (ndarray, object)
+    object PyArray_NewCopy (ndarray, NPY_ORDER)
+    object PyArray_ToList (ndarray)
+    object PyArray_ToString (ndarray, NPY_ORDER)
+    int PyArray_ToFile (ndarray, stdio.FILE *, char *, char *)
+    int PyArray_Dump (object, object, int)
+    object PyArray_Dumps (object, int)
+    int PyArray_ValidType (int)
+    void PyArray_UpdateFlags (ndarray, int)
+    object PyArray_New (type, int, npy_intp *, int, npy_intp *, void *, int, int, object)
+    #object PyArray_NewFromDescr (type, dtype, int, npy_intp *, npy_intp *, void *, int, object)
+    #dtype PyArray_DescrNew (dtype)
+    dtype PyArray_DescrNewFromType (int)
+    double PyArray_GetPriority (object, double)
+    object PyArray_IterNew (object)
+    object PyArray_MultiIterNew (int, ...)
+
+    int PyArray_PyIntAsInt (object)
+    npy_intp PyArray_PyIntAsIntp (object)
+    int PyArray_Broadcast (broadcast)
+    void PyArray_FillObjectArray (ndarray, object)
+    int PyArray_FillWithScalar (ndarray, object)
+    npy_bool PyArray_CheckStrides (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *)
+    dtype PyArray_DescrNewByteorder (dtype, char)
+    object PyArray_IterAllButAxis (object, int *)
+    #object PyArray_CheckFromAny (object, dtype, int, int, int, object)
+    #object PyArray_FromArray (ndarray, dtype, int)
+    object PyArray_FromInterface (object)
+    object PyArray_FromStructInterface (object)
+    #object PyArray_FromArrayAttr (object, dtype, object)
+    #NPY_SCALARKIND PyArray_ScalarKind (int, ndarray*)
+    int PyArray_CanCoerceScalar (int, int, NPY_SCALARKIND)
+    object PyArray_NewFlagsObject (object)
+    npy_bool PyArray_CanCastScalar (type, type)
+    #int PyArray_CompareUCS4 (npy_ucs4 *, npy_ucs4 *, register size_t)
+    int PyArray_RemoveSmallest (broadcast)
+    int PyArray_ElementStrides (object)
+    void PyArray_Item_INCREF (char *, dtype)
+    void PyArray_Item_XDECREF (char *, dtype)
+    object PyArray_FieldNames (object)
+    object PyArray_Transpose (ndarray, PyArray_Dims *)
+    object PyArray_TakeFrom (ndarray, object, int, ndarray, NPY_CLIPMODE)
+    object PyArray_PutTo (ndarray, object, object, NPY_CLIPMODE)
+    object PyArray_PutMask (ndarray, object, object)
+    object PyArray_Repeat (ndarray, object, int)
+    object PyArray_Choose (ndarray, object, ndarray, NPY_CLIPMODE)
+    int PyArray_Sort (ndarray, int, NPY_SORTKIND)
+    object PyArray_ArgSort (ndarray, int, NPY_SORTKIND)
+    object PyArray_SearchSorted (ndarray, object, NPY_SEARCHSIDE)
+    object PyArray_ArgMax (ndarray, int, ndarray)
+    object PyArray_ArgMin (ndarray, int, ndarray)
+    object PyArray_Reshape (ndarray, object)
+    object PyArray_Newshape (ndarray, PyArray_Dims *, NPY_ORDER)
+    object PyArray_Squeeze (ndarray)
+    #object PyArray_View (ndarray, dtype, type)
+    object PyArray_SwapAxes (ndarray, int, int)
+    object PyArray_Max (ndarray, int, ndarray)
+    object PyArray_Min (ndarray, int, ndarray)
+    object PyArray_Ptp (ndarray, int, ndarray)
+    object PyArray_Mean (ndarray, int, int, ndarray)
+    object PyArray_Trace (ndarray, int, int, int, int, ndarray)
+    object PyArray_Diagonal (ndarray, int, int, int)
+    object PyArray_Clip (ndarray, object, object, ndarray)
+    object PyArray_Conjugate (ndarray, ndarray)
+    object PyArray_Nonzero (ndarray)
+    object PyArray_Std (ndarray, int, int, ndarray, int)
+    object PyArray_Sum (ndarray, int, int, ndarray)
+    object PyArray_CumSum (ndarray, int, int, ndarray)
+    object PyArray_Prod (ndarray, int, int, ndarray)
+    object PyArray_CumProd (ndarray, int, int, ndarray)
+    object PyArray_All (ndarray, int, ndarray)
+    object PyArray_Any (ndarray, int, ndarray)
+    object PyArray_Compress (ndarray, object, int, ndarray)
+    object PyArray_Flatten (ndarray, NPY_ORDER)
+    object PyArray_Ravel (ndarray, NPY_ORDER)
+    npy_intp PyArray_MultiplyList (npy_intp *, int)
+    int PyArray_MultiplyIntList (int *, int)
+    void * PyArray_GetPtr (ndarray, npy_intp*)
+    int PyArray_CompareLists (npy_intp *, npy_intp *, int)
+    #int PyArray_AsCArray (object*, void *, npy_intp *, int, dtype)
+    #int PyArray_As1D (object*, char **, int *, int)
+    #int PyArray_As2D (object*, char ***, int *, int *, int)
+    int PyArray_Free (object, void *)
+    #int PyArray_Converter (object, object*)
+    int PyArray_IntpFromSequence (object, npy_intp *, int)
+    object PyArray_Concatenate (object, int)
+    object PyArray_InnerProduct (object, object)
+    object PyArray_MatrixProduct (object, object)
+    object PyArray_CopyAndTranspose (object)
+    object PyArray_Correlate (object, object, int)
+    int PyArray_TypestrConvert (int, int)
+    #int PyArray_DescrConverter (object, dtype*)
+    #int PyArray_DescrConverter2 (object, dtype*)
+    int PyArray_IntpConverter (object, PyArray_Dims *)
+    #int PyArray_BufferConverter (object, chunk)
+    int PyArray_AxisConverter (object, int *)
+    int PyArray_BoolConverter (object, npy_bool *)
+    int PyArray_ByteorderConverter (object, char *)
+    int PyArray_OrderConverter (object, NPY_ORDER *)
+    unsigned char PyArray_EquivTypes (dtype, dtype)
+    #object PyArray_Zeros (int, npy_intp *, dtype, int)
+    #object PyArray_Empty (int, npy_intp *, dtype, int)
+    object PyArray_Where (object, object, object)
+    object PyArray_Arange (double, double, double, int)
+    #object PyArray_ArangeObj (object, object, object, dtype)
+    int PyArray_SortkindConverter (object, NPY_SORTKIND *)
+    object PyArray_LexSort (object, int)
+    object PyArray_Round (ndarray, int, ndarray)
+    unsigned char PyArray_EquivTypenums (int, int)
+    int PyArray_RegisterDataType (dtype)
+    int PyArray_RegisterCastFunc (dtype, int, PyArray_VectorUnaryFunc *)
+    int PyArray_RegisterCanCast (dtype, int, NPY_SCALARKIND)
+    #void PyArray_InitArrFuncs (PyArray_ArrFuncs *)
+    object PyArray_IntTupleFromIntp (int, npy_intp *)
+    int PyArray_TypeNumFromName (char *)
+    int PyArray_ClipmodeConverter (object, NPY_CLIPMODE *)
+    #int PyArray_OutputConverter (object, ndarray*)
+    object PyArray_BroadcastToShape (object, npy_intp *, int)
+    void _PyArray_SigintHandler (int)
+    void* _PyArray_GetSigintBuf ()
+    #int PyArray_DescrAlignConverter (object, dtype*)
+    #int PyArray_DescrAlignConverter2 (object, dtype*)
+    int PyArray_SearchsideConverter (object, void *)
+    object PyArray_CheckAxis (ndarray, int *, int)
+    npy_intp PyArray_OverflowMultiplyList (npy_intp *, int)
+    int PyArray_CompareString (char *, char *, size_t)
+    int PyArray_SetBaseObject(ndarray, base)  # NOTE: steals a reference to base! Use "set_array_base()" instead.
+
+
+# Typedefs that matches the runtime dtype objects in
+# the numpy module.
+
+# The ones that are commented out needs an IFDEF function
+# in Cython to enable them only on the right systems.
+
+ctypedef npy_int8       int8_t
+ctypedef npy_int16      int16_t
+ctypedef npy_int32      int32_t
+ctypedef npy_int64      int64_t
+#ctypedef npy_int96      int96_t
+#ctypedef npy_int128     int128_t
+
+ctypedef npy_uint8      uint8_t
+ctypedef npy_uint16     uint16_t
+ctypedef npy_uint32     uint32_t
+ctypedef npy_uint64     uint64_t
+#ctypedef npy_uint96     uint96_t
+#ctypedef npy_uint128    uint128_t
+
+ctypedef npy_float32    float32_t
+ctypedef npy_float64    float64_t
+#ctypedef npy_float80    float80_t
+#ctypedef npy_float128   float128_t
+
+ctypedef float complex  complex64_t
+ctypedef double complex complex128_t
+
+# The int types are mapped a bit surprising --
+# numpy.int corresponds to 'l' and numpy.long to 'q'
+ctypedef npy_long       int_t
+ctypedef npy_longlong   long_t
+ctypedef npy_longlong   longlong_t
+
+ctypedef npy_ulong      uint_t
+ctypedef npy_ulonglong  ulong_t
+ctypedef npy_ulonglong  ulonglong_t
+
+ctypedef npy_intp       intp_t
+ctypedef npy_uintp      uintp_t
+
+ctypedef npy_double     float_t
+ctypedef npy_double     double_t
+ctypedef npy_longdouble longdouble_t
+
+ctypedef npy_cfloat      cfloat_t
+ctypedef npy_cdouble     cdouble_t
+ctypedef npy_clongdouble clongdouble_t
+
+ctypedef npy_cdouble     complex_t
+
+cdef inline object PyArray_MultiIterNew1(a):
+    return PyArray_MultiIterNew(1, <void*>a)
+
+cdef inline object PyArray_MultiIterNew2(a, b):
+    return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+
+cdef inline object PyArray_MultiIterNew3(a, b, c):
+    return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+
+cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+    return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+
+cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+    return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+
+cdef inline tuple PyDataType_SHAPE(dtype d):
+    if PyDataType_HASSUBARRAY(d):
+        return <tuple>d.subarray.shape
+    else:
+        return ()
+
+cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
+    # Recursive utility function used in __getbuffer__ to get format
+    # string. The new location in the format string is returned.
+
+    cdef dtype child
+    cdef int endian_detector = 1
+    cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+    cdef tuple fields
+
+    for childname in descr.names:
+        fields = descr.fields[childname]
+        child, new_offset = fields
+
+        if (end - f) - <int>(new_offset - offset[0]) < 15:
+            raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+
+        if ((child.byteorder == c'>' and little_endian) or
+            (child.byteorder == c'<' and not little_endian)):
+            raise ValueError(u"Non-native byte order not supported")
+            # One could encode it in the format string and have Cython
+            # complain instead, BUT: < and > in format strings also imply
+            # standardized sizes for datatypes, and we rely on native in
+            # order to avoid reencoding data types based on their size.
+            #
+            # A proper PEP 3118 exporter for other clients than Cython
+            # must deal properly with this!
+
+        # Output padding bytes
+        while offset[0] < new_offset:
+            f[0] = 120 # "x"; pad byte
+            f += 1
+            offset[0] += 1
+
+        offset[0] += child.itemsize
+
+        if not PyDataType_HASFIELDS(child):
+            t = child.type_num
+            if end - f < 5:
+                raise RuntimeError(u"Format string allocated too short.")
+
+            # Until ticket #99 is fixed, use integers to avoid warnings
+            if   t == NPY_BYTE:        f[0] =  98 #"b"
+            elif t == NPY_UBYTE:       f[0] =  66 #"B"
+            elif t == NPY_SHORT:       f[0] = 104 #"h"
+            elif t == NPY_USHORT:      f[0] =  72 #"H"
+            elif t == NPY_INT:         f[0] = 105 #"i"
+            elif t == NPY_UINT:        f[0] =  73 #"I"
+            elif t == NPY_LONG:        f[0] = 108 #"l"
+            elif t == NPY_ULONG:       f[0] = 76  #"L"
+            elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+            elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+            elif t == NPY_FLOAT:       f[0] = 102 #"f"
+            elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+            elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+            elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+            elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+            elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+            elif t == NPY_OBJECT:      f[0] = 79 #"O"
+            else:
+                raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+            f += 1
+        else:
+            # Cython ignores struct boundary information ("T{...}"),
+            # so don't output it
+            f = _util_dtypestring(child, f, end, offset)
+    return f
+
+
+#
+# ufunc API
+#
+
+cdef extern from "numpy/ufuncobject.h":
+
+    ctypedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *)
+
+    ctypedef extern class numpy.ufunc [object PyUFuncObject, check_size ignore]:
+        cdef:
+            int nin, nout, nargs
+            int identity
+            PyUFuncGenericFunction *functions
+            void **data
+            int ntypes
+            int check_return
+            char *name
+            char *types
+            char *doc
+            void *ptr
+            PyObject *obj
+            PyObject *userloops
+
+    cdef enum:
+        PyUFunc_Zero
+        PyUFunc_One
+        PyUFunc_None
+        UFUNC_ERR_IGNORE
+        UFUNC_ERR_WARN
+        UFUNC_ERR_RAISE
+        UFUNC_ERR_CALL
+        UFUNC_ERR_PRINT
+        UFUNC_ERR_LOG
+        UFUNC_MASK_DIVIDEBYZERO
+        UFUNC_MASK_OVERFLOW
+        UFUNC_MASK_UNDERFLOW
+        UFUNC_MASK_INVALID
+        UFUNC_SHIFT_DIVIDEBYZERO
+        UFUNC_SHIFT_OVERFLOW
+        UFUNC_SHIFT_UNDERFLOW
+        UFUNC_SHIFT_INVALID
+        UFUNC_FPE_DIVIDEBYZERO
+        UFUNC_FPE_OVERFLOW
+        UFUNC_FPE_UNDERFLOW
+        UFUNC_FPE_INVALID
+        UFUNC_ERR_DEFAULT
+        UFUNC_ERR_DEFAULT2
+
+    object PyUFunc_FromFuncAndData(PyUFuncGenericFunction *,
+          void **, char *, int, int, int, int, char *, char *, int)
+    int PyUFunc_RegisterLoopForType(ufunc, int,
+                                    PyUFuncGenericFunction, int *, void *)
+    int PyUFunc_GenericFunction \
+        (ufunc, PyObject *, PyObject *, PyArrayObject **)
+    void PyUFunc_f_f_As_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_d_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_f_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_g_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F_As_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_F_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_D_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_G_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f_As_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_ff_f \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_dd_d \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_gg_g \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F_As_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_DD_D \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_FF_F \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_GG_G \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_O_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_OO_O_method \
+         (char **, npy_intp *, npy_intp *, void *)
+    void PyUFunc_On_Om \
+         (char **, npy_intp *, npy_intp *, void *)
+    int PyUFunc_GetPyValues \
+        (char *, int *, int *, PyObject **)
+    int PyUFunc_checkfperr \
+           (int, PyObject *, int *)
+    void PyUFunc_clearfperr()
+    int PyUFunc_getfperr()
+    int PyUFunc_handlefperr \
+        (int, PyObject *, int, int *)
+    int PyUFunc_ReplaceLoopBySignature \
+        (ufunc, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *)
+    object PyUFunc_FromFuncAndDataAndSignature \
+             (PyUFuncGenericFunction *, void **, char *, int, int, int,
+              int, char *, char *, int, char *)
+
+    int _import_umath() except -1
+
+cdef inline void set_array_base(ndarray arr, object base):
+    Py_INCREF(base) # important to do this before stealing the reference below!
+    PyArray_SetBaseObject(arr, base)
+
+cdef inline object get_array_base(ndarray arr):
+    base = PyArray_BASE(arr)
+    if base is NULL:
+        return None
+    return <object>base
+
+# Versions of the import_* functions which are more suitable for
+# Cython code.
+cdef inline int import_array() except -1:
+    try:
+        _import_array()
+    except Exception:
+        raise ImportError("numpy.core.multiarray failed to import")
+
+cdef inline int import_umath() except -1:
+    try:
+        _import_umath()
+    except Exception:
+        raise ImportError("numpy.core.umath failed to import")
+
+cdef inline int import_ufunc() except -1:
+    try:
+        _import_umath()
+    except Exception:
+        raise ImportError("numpy.core.umath failed to import")
diff --git a/numpy/__init__.py b/numpy/__init__.py
index ba88c733f..fef8245de 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -143,7 +143,9 @@ else:
     from .core import *
     from . import compat
     from . import lib
+    # FIXME: why have numpy.lib if everything is imported here??
     from .lib import *
+
     from . import linalg
     from . import fft
     from . import polynomial
@@ -166,12 +168,22 @@ else:
     # now that numpy modules are imported, can initialize limits
     core.getlimits._register_known_types()
 
+    __all__.extend(['bool', 'int', 'float', 'complex', 'object', 'unicode',
+                    'str'])
     __all__.extend(['__version__', 'show_config'])
     __all__.extend(core.__all__)
     __all__.extend(_mat.__all__)
     __all__.extend(lib.__all__)
     __all__.extend(['linalg', 'fft', 'random', 'ctypeslib', 'ma'])
 
+    # Remove things that are in the numpy.lib but not in the numpy namespace
+    # Note that there is a test (numpy/tests/test_public_api.py:test_numpy_namespace)
+    # that prevents adding more things to the main namespace by accident.
+    # The list below will grow until the `from .lib import *` fixme above is
+    # taken care of
+    __all__.remove('Arrayterator')
+    del Arrayterator
+
     # Filter out Cython harmless warnings
     warnings.filterwarnings("ignore", message="numpy.dtype size changed")
     warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
@@ -182,9 +194,34 @@ else:
     oldnumeric = 'removed'
     numarray = 'removed'
 
-    # We don't actually use this ourselves anymore, but I'm not 100% sure that
-    # no-one else in the world is using it (though I hope not)
-    from .testing import Tester
+    if sys.version_info[:2] >= (3, 7):
+        # Importing Tester requires importing all of UnitTest which is not a
+        # cheap import Since it is mainly used in test suits, we lazy import it
+        # here to save on the order of 10 ms of import time for most users
+        #
+        # The previous way Tester was imported also had a side effect of adding
+        # the full `numpy.testing` namespace
+        #
+        # module level getattr is only supported in 3.7 onwards
+        # https://www.python.org/dev/peps/pep-0562/
+        def __getattr__(attr):
+            if attr == 'testing':
+                import numpy.testing as testing
+                return testing
+            elif attr == 'Tester':
+                from .testing import Tester
+                return Tester
+            else:
+                raise AttributeError("module {!r} has no attribute "
+                                     "{!r}".format(__name__, attr))
+
+        def __dir__():
+            return __all__ + ['Tester', 'testing']
+
+    else:
+        # We don't actually use this ourselves anymore, but I'm not 100% sure that
+        # no-one else in the world is using it (though I hope not)
+        from .testing import Tester
 
     # Pytest testing
     from numpy._pytesttester import PytestTester
diff --git a/numpy/_pytesttester.py b/numpy/_pytesttester.py
index 8d1a3811c..b25224c20 100644
--- a/numpy/_pytesttester.py
+++ b/numpy/_pytesttester.py
@@ -48,10 +48,9 @@ class PytestTester(object):
     """
     Pytest test runner.
 
-    This class is made available in ``numpy.testing``, and a test function
-    is typically added to a package's __init__.py like so::
+    A test function is typically added to a package's __init__.py like so::
 
-      from numpy.testing import PytestTester
+      from numpy._pytesttester import PytestTester
       test = PytestTester(__name__).test
       del PytestTester
 
@@ -68,6 +67,12 @@ class PytestTester(object):
     module_name : module name
         The name of the module to test.
 
+    Notes
+    -----
+    Unlike the previous ``nose``-based implementation, this class is not
+    publicly exposed as it performs some ``numpy``-specific warning
+    suppression.
+
     """
     def __init__(self, module_name):
         self.module_name = module_name
diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py
index ce443bb22..c3b3f0392 100644
--- a/numpy/core/__init__.py
+++ b/numpy/core/__init__.py
@@ -1,6 +1,13 @@
+"""
+Contains the core of NumPy: ndarray, ufuncs, dtypes, etc.
+
+Please note that this module is private.  All functions and objects
+are available in the main ``numpy`` namespace - use that instead.
+
+"""
+
 from __future__ import division, absolute_import, print_function
 
-from .info import __doc__
 from numpy.version import version as __version__
 
 import os
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index f041e0cd6..dbe3d226f 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -386,12 +386,12 @@ add_newdoc('numpy.core', 'nditer',
     >>> luf(lambda i,j:i*i + j/2, a, b)
     array([  0.5,   1.5,   4.5,   9.5,  16.5])
 
-    If operand flags `"writeonly"` or `"readwrite"` are used the operands may
-    be views into the original data with the `WRITEBACKIFCOPY` flag. In this case
-    nditer must be used as a context manager or the nditer.close
-    method must be called before using the result. The temporary
-    data will be written back to the original data when the `__exit__`
-    function is called but not before:
+    If operand flags `"writeonly"` or `"readwrite"` are used the
+    operands may be views into the original data with the
+    `WRITEBACKIFCOPY` flag. In this case `nditer` must be used as a
+    context manager or the `nditer.close` method must be called before
+    using the result. The temporary data will be written back to the
+    original data when the `__exit__` function is called but not before:
 
     >>> a = np.arange(6, dtype='i4')[::-2]
     >>> with np.nditer(a, [],
@@ -413,6 +413,8 @@ add_newdoc('numpy.core', 'nditer',
     `x.data` will still point at some part of `a.data`, and writing to
     one will affect the other.
 
+    Context management and the `close` method appeared in version 1.15.0.
+
     """)
 
 # nditer methods
@@ -568,6 +570,8 @@ add_newdoc('numpy.core', 'nditer', ('close',
 
     Resolve all writeback semantics in writeable operands.
 
+    .. versionadded:: 1.15.0
+
     See Also
     --------
 
@@ -1342,7 +1346,7 @@ add_newdoc('numpy.core.multiarray', 'arange',
 add_newdoc('numpy.core.multiarray', '_get_ndarray_c_version',
     """_get_ndarray_c_version()
 
-    Return the compile time NDARRAY_VERSION number.
+    Return the compile time NPY_VERSION (formerly called NDARRAY_VERSION) number.
 
     """)
 
diff --git a/numpy/core/_dtype.py b/numpy/core/_dtype.py
index 092b848dc..df1ff180e 100644
--- a/numpy/core/_dtype.py
+++ b/numpy/core/_dtype.py
@@ -316,26 +316,39 @@ def _subarray_str(dtype):
     )
 
 
+def _name_includes_bit_suffix(dtype):
+    if dtype.type == np.object_:
+        # pointer size varies by system, best to omit it
+        return False
+    elif dtype.type == np.bool_:
+        # implied
+        return False
+    elif np.issubdtype(dtype, np.flexible) and _isunsized(dtype):
+        # unspecified
+        return False
+    else:
+        return True
+
+
 def _name_get(dtype):
-    # provides dtype.name.__get__
+    # provides dtype.name.__get__, documented as returning a "bit name"
 
     if dtype.isbuiltin == 2:
         # user dtypes don't promise to do anything special
         return dtype.type.__name__
 
-    # Builtin classes are documented as returning a "bit name"
-    name = dtype.type.__name__
-
-    # handle bool_, str_, etc
-    if name[-1] == '_':
-        name = name[:-1]
+    if issubclass(dtype.type, np.void):
+        # historically, void subclasses preserve their name, eg `record64`
+        name = dtype.type.__name__
+    else:
+        name = _kind_name(dtype)
 
-    # append bit counts to str, unicode, and void
-    if np.issubdtype(dtype, np.flexible) and not _isunsized(dtype):
+    # append bit counts
+    if _name_includes_bit_suffix(dtype):
         name += "{}".format(dtype.itemsize * 8)
 
     # append metadata to datetimes
-    elif dtype.type in (np.datetime64, np.timedelta64):
+    if dtype.type in (np.datetime64, np.timedelta64):
         name += _datetime_metadata_str(dtype)
 
     return name
diff --git a/numpy/core/_exceptions.py b/numpy/core/_exceptions.py
index a1af7a78d..88a45561f 100644
--- a/numpy/core/_exceptions.py
+++ b/numpy/core/_exceptions.py
@@ -27,6 +27,7 @@ def _display_as_base(cls):
     assert issubclass(cls, Exception)
     cls.__name__ = cls.__base__.__name__
     cls.__qualname__ = cls.__base__.__qualname__
+    set_module(cls.__base__.__module__)(cls)
     return cls
 
 
@@ -146,6 +147,54 @@ class _ArrayMemoryError(MemoryError):
         self.shape = shape
         self.dtype = dtype
 
-    def __str__(self):
-        return "Unable to allocate array with shape {} and data type {}".format(self.shape, self.dtype)
+    @property
+    def _total_size(self):
+        num_bytes = self.dtype.itemsize
+        for dim in self.shape:
+            num_bytes *= dim
+        return num_bytes
+
+    @staticmethod
+    def _size_to_string(num_bytes):
+        """ Convert a number of bytes into a binary size string """
+        import math
+
+        # https://en.wikipedia.org/wiki/Binary_prefix
+        LOG2_STEP = 10
+        STEP = 1024
+        units = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB']
+
+        unit_i = max(num_bytes.bit_length() - 1, 1) // LOG2_STEP
+        unit_val = 1 << (unit_i * LOG2_STEP)
+        n_units = num_bytes / unit_val
+        del unit_val
+
+        # ensure we pick a unit that is correct after rounding
+        if round(n_units) == STEP:
+            unit_i += 1
+            n_units /= STEP
+
+        # deal with sizes so large that we don't have units for them
+        if unit_i >= len(units):
+            new_unit_i = len(units) - 1
+            n_units *= 1 << ((unit_i - new_unit_i) * LOG2_STEP)
+            unit_i = new_unit_i
+
+        unit_name = units[unit_i]
+        # format with a sensible number of digits
+        if unit_i == 0:
+            # no decimal point on bytes
+            return '{:.0f} {}'.format(n_units, unit_name)
+        elif round(n_units) < 1000:
+            # 3 significant figures, if none are dropped to the left of the .
+            return '{:#.3g} {}'.format(n_units, unit_name)
+        else:
+            # just give all the digits otherwise
+            return '{:#.0f} {}'.format(n_units, unit_name)
 
+    def __str__(self):
+        size_str = self._size_to_string(self._total_size)
+        return (
+            "Unable to allocate {} for an array with shape {} and data type {}"
+            .format(size_str, self.shape, self.dtype)
+        )
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index c70718cb6..b0ea603e1 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -459,7 +459,7 @@ def _getfield_is_safe(oldtype, newtype, offset):
     if newtype.hasobject or oldtype.hasobject:
         if offset == 0 and newtype == oldtype:
             return
-        if oldtype.names:
+        if oldtype.names is not None:
             for name in oldtype.names:
                 if (oldtype.fields[name][1] == offset and
                         oldtype.fields[name][0] == newtype):
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index ecd05d3ac..8a7626d9d 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -194,12 +194,14 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
 
     See Also
     --------
-    get_printoptions, set_string_function, array2string
+    get_printoptions, printoptions, set_string_function, array2string
 
     Notes
     -----
     `formatter` is always reset with a call to `set_printoptions`.
 
+    Use `printoptions` as a context manager to set the values temporarily.
+
     Examples
     --------
     Floating point precision can be set:
@@ -236,9 +238,16 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
 
     To put back the default options, you can use:
 
-    >>> np.set_printoptions(edgeitems=3,infstr='inf',
+    >>> np.set_printoptions(edgeitems=3, infstr='inf',
     ... linewidth=75, nanstr='nan', precision=8,
     ... suppress=False, threshold=1000, formatter=None)
+
+    Also to temporarily override options, use `printoptions` as a context manager:
+
+    >>> with np.printoptions(precision=2, suppress=True, threshold=5):
+    ...     np.linspace(0, 10, 10)
+    array([ 0.  ,  1.11,  2.22, ...,  7.78,  8.89, 10.  ])
+
     """
     legacy = kwarg.pop('legacy', None)
     if kwarg:
@@ -285,7 +294,7 @@ def get_printoptions():
 
     See Also
     --------
-    set_printoptions, set_string_function
+    set_printoptions, printoptions, set_string_function
 
     """
     return _format_options.copy()
@@ -685,7 +694,7 @@ def array2string(a, max_line_width=None, precision=None,
         if style is np._NoValue:
             style = repr
 
-        if a.shape == () and not a.dtype.names:
+        if a.shape == () and a.dtype.names is None:
             return style(a.item())
     elif style is not np._NoValue:
         # Deprecation 11-9-2017  v1.14
@@ -984,20 +993,6 @@ class FloatingFormat(object):
                                       pad_left=self.pad_left,
                                       pad_right=self.pad_right)
 
-# for back-compatibility, we keep the classes for each float type too
-class FloatFormat(FloatingFormat):
-    def __init__(self, *args, **kwargs):
-        warnings.warn("FloatFormat has been replaced by FloatingFormat",
-                      DeprecationWarning, stacklevel=2)
-        super(FloatFormat, self).__init__(*args, **kwargs)
-
-
-class LongFloatFormat(FloatingFormat):
-    def __init__(self, *args, **kwargs):
-        warnings.warn("LongFloatFormat has been replaced by FloatingFormat",
-                      DeprecationWarning, stacklevel=2)
-        super(LongFloatFormat, self).__init__(*args, **kwargs)
-
 
 @set_module('numpy')
 def format_float_scientific(x, precision=None, unique=True, trim='k',
@@ -1196,21 +1191,6 @@ class ComplexFloatingFormat(object):
 
         return r + i
 
-# for back-compatibility, we keep the classes for each complex type too
-class ComplexFormat(ComplexFloatingFormat):
-    def __init__(self, *args, **kwargs):
-        warnings.warn(
-            "ComplexFormat has been replaced by ComplexFloatingFormat",
-            DeprecationWarning, stacklevel=2)
-        super(ComplexFormat, self).__init__(*args, **kwargs)
-
-class LongComplexFormat(ComplexFloatingFormat):
-    def __init__(self, *args, **kwargs):
-        warnings.warn(
-            "LongComplexFormat has been replaced by ComplexFloatingFormat",
-            DeprecationWarning, stacklevel=2)
-        super(LongComplexFormat, self).__init__(*args, **kwargs)
-
 
 class _TimelikeFormat(object):
     def __init__(self, data):
@@ -1321,16 +1301,6 @@ class StructuredVoidFormat(object):
             return "({})".format(", ".join(str_fields))
 
 
-# for backwards compatibility
-class StructureFormat(StructuredVoidFormat):
-    def __init__(self, *args, **kwargs):
-        # NumPy 1.14, 2018-02-14
-        warnings.warn(
-            "StructureFormat has been replaced by StructuredVoidFormat",
-            DeprecationWarning, stacklevel=2)
-        super(StructureFormat, self).__init__(*args, **kwargs)
-
-
 def _void_scalar_repr(x):
     """
     Implements the repr for structured-void scalars. It is called from the
@@ -1509,7 +1479,11 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
         arr, max_line_width, precision, suppress_small)
 
 
-_guarded_str = _recursive_guard()(str)
+@_recursive_guard()
+def _guarded_repr_or_str(v):
+    if isinstance(v, bytes):
+        return repr(v)
+    return str(v)
 
 
 def _array_str_implementation(
@@ -1527,7 +1501,7 @@ def _array_str_implementation(
         # obtain a scalar and call str on it, avoiding problems for subclasses
         # for which indexing with () returns a 0d instead of a scalar by using
         # ndarray's getindex. Also guard against recursive 0d object arrays.
-        return _guarded_str(np.ndarray.__getitem__(a, ()))
+        return _guarded_repr_or_str(np.ndarray.__getitem__(a, ()))
 
     return array2string(a, max_line_width, precision, suppress_small, ' ', "")
 
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index 923c34425..7336e5e13 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -259,7 +259,8 @@ def find_functions(filename, tag='API'):
             elif state == STATE_ARGS:
                 if line.startswith('{'):
                     # finished
-                    fargs_str = ' '.join(function_args).rstrip(' )')
+                    # remove any white space and the closing bracket:
+                    fargs_str = ' '.join(function_args).rstrip()[:-1].rstrip()
                     fargs = split_arguments(fargs_str)
                     f = Function(function_name, return_type, fargs,
                                  '\n'.join(doclist))
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index ae871ea6f..0d3bbffe9 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -358,14 +358,14 @@ defdict = {
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.square'),
           None,
-          TD(ints+inexact, simd=[('avx2', ints)]),
+          TD(ints+inexact, simd=[('avx2', ints), ('fma', 'fd'), ('avx512f', 'fd')]),
           TD(O, f='Py_square'),
           ),
 'reciprocal':
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.reciprocal'),
           None,
-          TD(ints+inexact, simd=[('avx2', ints)]),
+          TD(ints+inexact, simd=[('avx2', ints), ('fma', 'fd'), ('avx512f','fd')]),
           TD(O, f='Py_reciprocal'),
           ),
 # This is no longer used as numpy.ones_like, however it is
@@ -395,7 +395,7 @@ defdict = {
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.absolute'),
           'PyUFunc_AbsoluteTypeResolver',
-          TD(bints+flts+timedeltaonly),
+          TD(bints+flts+timedeltaonly, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
           TD(cmplx, out=('f', 'd', 'g')),
           TD(O, f='PyNumber_Absolute'),
           ),
@@ -664,7 +664,7 @@ defdict = {
           None,
           TD('e', f='cos', astype={'e':'f'}),
           TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]),
-          TD(inexact, f='cos', astype={'e':'f'}),
+          TD('fdg' + cmplx, f='cos'),
           TD(P, f='cos'),
           ),
 'sin':
@@ -673,7 +673,7 @@ defdict = {
           None,
           TD('e', f='sin', astype={'e':'f'}),
           TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]),
-          TD(inexact, f='sin', astype={'e':'f'}),
+          TD('fdg' + cmplx, f='sin'),
           TD(P, f='sin'),
           ),
 'tan':
@@ -710,7 +710,7 @@ defdict = {
           None,
           TD('e', f='exp', astype={'e':'f'}),
           TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]),
-          TD(inexact, f='exp', astype={'e':'f'}),
+          TD('fdg' + cmplx, f='exp'),
           TD(P, f='exp'),
           ),
 'exp2':
@@ -733,7 +733,7 @@ defdict = {
           None,
           TD('e', f='log', astype={'e':'f'}),
           TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]),
-          TD(inexact, f='log', astype={'e':'f'}),
+          TD('fdg' + cmplx, f='log'),
           TD(P, f='log'),
           ),
 'log2':
@@ -762,8 +762,8 @@ defdict = {
           docstrings.get('numpy.core.umath.sqrt'),
           None,
           TD('e', f='sqrt', astype={'e':'f'}),
-          TD(inexactvec),
-          TD(inexact, f='sqrt', astype={'e':'f'}),
+          TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
+          TD('fdg' + cmplx, f='sqrt'),
           TD(P, f='sqrt'),
           ),
 'cbrt':
@@ -777,14 +777,18 @@ defdict = {
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.ceil'),
           None,
-          TD(flts, f='ceil', astype={'e':'f'}),
+          TD('e', f='ceil', astype={'e':'f'}),
+          TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
+          TD('fdg', f='ceil'),
           TD(O, f='npy_ObjectCeil'),
           ),
 'trunc':
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.trunc'),
           None,
-          TD(flts, f='trunc', astype={'e':'f'}),
+          TD('e', f='trunc', astype={'e':'f'}),
+          TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
+          TD('fdg', f='trunc'),
           TD(O, f='npy_ObjectTrunc'),
           ),
 'fabs':
@@ -798,14 +802,18 @@ defdict = {
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.floor'),
           None,
-          TD(flts, f='floor', astype={'e':'f'}),
+          TD('e', f='floor', astype={'e':'f'}),
+          TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
+          TD('fdg', f='floor'),
           TD(O, f='npy_ObjectFloor'),
           ),
 'rint':
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.rint'),
           None,
-          TD(inexact, f='rint', astype={'e':'f'}),
+          TD('e', f='rint', astype={'e':'f'}),
+          TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
+          TD('fdg' + cmplx, f='rint'),
           TD(P, f='rint'),
           ),
 'arctan2':
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index fb418aadc..1ac477b54 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -2183,7 +2183,7 @@ add_newdoc('numpy.core.umath', 'logical_and',
     Returns
     -------
     y : ndarray or bool
-        Boolean result of the logical OR operation applied to the elements
+        Boolean result of the logical AND operation applied to the elements
         of `x1` and `x2`; the shape is determined by broadcasting.
         $OUT_SCALAR_2
 
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index d7ecce1b4..a941c5b81 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -1840,7 +1840,7 @@ class chararray(ndarray):
     This constructor creates the array, using `buffer` (with `offset`
     and `strides`) if it is not ``None``. If `buffer` is ``None``, then
     constructs a new array with `strides` in "C order", unless both
-    ``len(shape) >= 2`` and ``order='Fortran'``, in which case `strides`
+    ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
     is in "Fortran order".
 
     Methods
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index bde37fca3..6c0b9cde9 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -1775,11 +1775,13 @@ def nonzero(a):
     which returns a row for each non-zero element.
 
     .. note::
-        When called on a zero-d array or scalar, ``nonzero(a)`` is treated
-        as ``nonzero(atleast1d(a))``.
 
-        ..deprecated:: 1.17.0
-            Use `atleast1d` explicitly if this behavior is deliberate.
+       When called on a zero-d array or scalar, ``nonzero(a)`` is treated
+       as ``nonzero(atleast1d(a))``.
+
+       .. deprecated:: 1.17.0
+
+          Use `atleast1d` explicitly if this behavior is deliberate.
 
     Parameters
     ----------
@@ -3125,10 +3127,37 @@ def around(a, decimals=0, out=None):
     -----
     For values exactly halfway between rounded decimal values, NumPy
     rounds to the nearest even value. Thus 1.5 and 2.5 round to 2.0,
-    -0.5 and 0.5 round to 0.0, etc. Results may also be surprising due
-    to the inexact representation of decimal fractions in the IEEE
-    floating point standard [1]_ and errors introduced when scaling
-    by powers of ten.
+    -0.5 and 0.5 round to 0.0, etc.
+
+    ``np.around`` uses a fast but sometimes inexact algorithm to round
+    floating-point datatypes. For positive `decimals` it is equivalent to
+    ``np.true_divide(np.rint(a * 10**decimals), 10**decimals)``, which has
+    error due to the inexact representation of decimal fractions in the IEEE
+    floating point standard [1]_ and errors introduced when scaling by powers
+    of ten. For instance, note the extra "1" in the following:
+
+        >>> np.round(56294995342131.5, 3)
+        56294995342131.51
+
+    If your goal is to print such values with a fixed number of decimals, it is
+    preferable to use numpy's float printing routines to limit the number of
+    printed decimals:
+
+        >>> np.format_float_positional(56294995342131.5, precision=3)
+        '56294995342131.5'
+
+    The float printing routines use an accurate but much more computationally
+    demanding algorithm to compute the number of digits after the decimal
+    point.
+
+    Alternatively, Python's builtin `round` function uses a more accurate
+    but slower algorithm for 64-bit floating point values:
+
+        >>> round(56294995342131.5, 3)
+        56294995342131.5
+        >>> np.round(16.055, 2), round(16.055, 2)  # equals 16.0549999999999997
+        (16.06, 16.05)
+
 
     References
     ----------
@@ -3419,7 +3448,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
         instead of a single axis or all the axes as before.
     dtype : data-type, optional
         Type to use in computing the variance.  For arrays of integer type
-        the default is `float32`; for arrays of float types it is the same as
+        the default is `float64`; for arrays of float types it is the same as
         the array type.
     out : ndarray, optional
         Alternate output array in which to place the result.  It must have
diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py
index d83af9911..42604ec3f 100644
--- a/numpy/core/function_base.py
+++ b/numpy/core/function_base.py
@@ -18,18 +18,6 @@ array_function_dispatch = functools.partial(
     overrides.array_function_dispatch, module='numpy')
 
 
-def _index_deprecate(i, stacklevel=2):
-    try:
-        i = operator.index(i)
-    except TypeError:
-        msg = ("object of type {} cannot be safely interpreted as "
-               "an integer.".format(type(i)))
-        i = int(i)
-        stacklevel += 1
-        warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)
-    return i
-
-
 def _linspace_dispatcher(start, stop, num=None, endpoint=None, retstep=None,
                          dtype=None, axis=None):
     return (start, stop)
@@ -125,8 +113,13 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None,
     >>> plt.show()
 
     """
-    # 2016-02-25, 1.12
-    num = _index_deprecate(num)
+    try:
+        num = operator.index(num)
+    except TypeError:
+        raise TypeError(
+            "object of type {} cannot be safely interpreted as an integer."
+                .format(type(num)))
+
     if num < 0:
         raise ValueError("Number of samples, %s, must be non-negative." % num)
     div = (num - 1) if endpoint else num
diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h
index 2cc7ced35..95e9cb060 100644
--- a/numpy/core/include/numpy/ndarrayobject.h
+++ b/numpy/core/include/numpy/ndarrayobject.h
@@ -23,7 +23,7 @@ extern "C" {
 
 /* C-API that requires previous API to be defined */
 
-#define PyArray_DescrCheck(op) (((PyObject*)(op))->ob_type==&PyArrayDescr_Type)
+#define PyArray_DescrCheck(op) PyObject_TypeCheck(op, &PyArrayDescr_Type)
 
 #define PyArray_Check(op) PyObject_TypeCheck(op, &PyArray_Type)
 #define PyArray_CheckExact(op) (((PyObject*)(op))->ob_type == &PyArray_Type)
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 1221aeece..ad98d562b 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1095,7 +1095,8 @@ typedef struct PyArrayIterObject_tag PyArrayIterObject;
  * type of the function which translates a set of coordinates to a
  * pointer to the data
  */
-typedef char* (*npy_iter_get_dataptr_t)(PyArrayIterObject* iter, npy_intp*);
+typedef char* (*npy_iter_get_dataptr_t)(
+        PyArrayIterObject* iter, const npy_intp*);
 
 struct PyArrayIterObject_tag {
         PyObject_HEAD
@@ -1695,7 +1696,8 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
 #define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num)
 #define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL)
 #define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL)
-#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == 0)
+#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == 0 && \
+                                      !PyDataType_HASFIELDS(dtype))
 #define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = 0)
 
 #define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj))
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index 126b861bf..69e690f28 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -177,6 +177,28 @@ NPY_INPLACE npy_long npy_lcml(npy_long a, npy_long b);
 NPY_INPLACE npy_longlong npy_gcdll(npy_longlong a, npy_longlong b);
 NPY_INPLACE npy_longlong npy_lcmll(npy_longlong a, npy_longlong b);
 
+NPY_INPLACE npy_ubyte npy_rshiftuhh(npy_ubyte a, npy_ubyte b);
+NPY_INPLACE npy_ubyte npy_lshiftuhh(npy_ubyte a, npy_ubyte b);
+NPY_INPLACE npy_ushort npy_rshiftuh(npy_ushort a, npy_ushort b);
+NPY_INPLACE npy_ushort npy_lshiftuh(npy_ushort a, npy_ushort b);
+NPY_INPLACE npy_uint npy_rshiftu(npy_uint a, npy_uint b);
+NPY_INPLACE npy_uint npy_lshiftu(npy_uint a, npy_uint b);
+NPY_INPLACE npy_ulong npy_rshiftul(npy_ulong a, npy_ulong b);
+NPY_INPLACE npy_ulong npy_lshiftul(npy_ulong a, npy_ulong b);
+NPY_INPLACE npy_ulonglong npy_rshiftull(npy_ulonglong a, npy_ulonglong b);
+NPY_INPLACE npy_ulonglong npy_lshiftull(npy_ulonglong a, npy_ulonglong b);
+
+NPY_INPLACE npy_byte npy_rshifthh(npy_byte a, npy_byte b);
+NPY_INPLACE npy_byte npy_lshifthh(npy_byte a, npy_byte b);
+NPY_INPLACE npy_short npy_rshifth(npy_short a, npy_short b);
+NPY_INPLACE npy_short npy_lshifth(npy_short a, npy_short b);
+NPY_INPLACE npy_int npy_rshift(npy_int a, npy_int b);
+NPY_INPLACE npy_int npy_lshift(npy_int a, npy_int b);
+NPY_INPLACE npy_long npy_rshiftl(npy_long a, npy_long b);
+NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b);
+NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b);
+NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b);
+
 /*
  * avx function has a common API for both sin & cos. This enum is used to
  * distinguish between the two
diff --git a/numpy/core/info.py b/numpy/core/info.py
deleted file mode 100644
index c6f7bbcf2..000000000
--- a/numpy/core/info.py
+++ /dev/null
@@ -1,87 +0,0 @@
-"""Defines a multi-dimensional array and useful procedures for Numerical computation.
-
-Functions
-
--   array                      - NumPy Array construction
--   zeros                      - Return an array of all zeros
--   empty                      - Return an uninitialized array
--   shape                      - Return shape of sequence or array
--   rank                       - Return number of dimensions
--   size                       - Return number of elements in entire array or a
-                                 certain dimension
--   fromstring                 - Construct array from (byte) string
--   take                       - Select sub-arrays using sequence of indices
--   put                        - Set sub-arrays using sequence of 1-D indices
--   putmask                    - Set portion of arrays using a mask
--   reshape                    - Return array with new shape
--   repeat                     - Repeat elements of array
--   choose                     - Construct new array from indexed array tuple
--   correlate                  - Correlate two 1-d arrays
--   searchsorted               - Search for element in 1-d array
--   sum                        - Total sum over a specified dimension
--   average                    - Average, possibly weighted, over axis or array.
--   cumsum                     - Cumulative sum over a specified dimension
--   product                    - Total product over a specified dimension
--   cumproduct                 - Cumulative product over a specified dimension
--   alltrue                    - Logical and over an entire axis
--   sometrue                   - Logical or over an entire axis
--   allclose                   - Tests if sequences are essentially equal
-
-More Functions:
-
--   arange                     - Return regularly spaced array
--   asarray                    - Guarantee NumPy array
--   convolve                   - Convolve two 1-d arrays
--   swapaxes                   - Exchange axes
--   concatenate                - Join arrays together
--   transpose                  - Permute axes
--   sort                       - Sort elements of array
--   argsort                    - Indices of sorted array
--   argmax                     - Index of largest value
--   argmin                     - Index of smallest value
--   inner                      - Innerproduct of two arrays
--   dot                        - Dot product (matrix multiplication)
--   outer                      - Outerproduct of two arrays
--   resize                     - Return array with arbitrary new shape
--   indices                    - Tuple of indices
--   fromfunction               - Construct array from universal function
--   diagonal                   - Return diagonal array
--   trace                      - Trace of array
--   dump                       - Dump array to file object (pickle)
--   dumps                      - Return pickled string representing data
--   load                       - Return array stored in file object
--   loads                      - Return array from pickled string
--   ravel                      - Return array as 1-D
--   nonzero                    - Indices of nonzero elements for 1-D array
--   shape                      - Shape of array
--   where                      - Construct array from binary result
--   compress                   - Elements of array where condition is true
--   clip                       - Clip array between two values
--   ones                       - Array of all ones
--   identity                   - 2-D identity array (matrix)
-
-(Universal) Math Functions
-
-       add                    logical_or             exp
-       subtract               logical_xor            log
-       multiply               logical_not            log10
-       divide                 maximum                sin
-       divide_safe            minimum                sinh
-       conjugate              bitwise_and            sqrt
-       power                  bitwise_or             tan
-       absolute               bitwise_xor            tanh
-       negative               invert                 ceil
-       greater                left_shift             fabs
-       greater_equal          right_shift            floor
-       less                   arccos                 arctan2
-       less_equal             arcsin                 fmod
-       equal                  arctan                 hypot
-       not_equal              cos                    around
-       logical_and            cosh                   sign
-       arccosh                arcsinh                arctanh
-
-"""
-from __future__ import division, absolute_import, print_function
-
-depends = ['testing']
-global_symbols = ['*']
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index ff8c58867..6d25f864b 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -26,6 +26,7 @@ if sys.version_info[0] < 3:
 
 from . import overrides
 from . import umath
+from . import shape_base
 from .overrides import set_module
 from .umath import (multiply, invert, sin, PINF, NAN)
 from . import numerictypes
@@ -48,14 +49,6 @@ array_function_dispatch = functools.partial(
     overrides.array_function_dispatch, module='numpy')
 
 
-def loads(*args, **kwargs):
-    # NumPy 1.15.0, 2017-12-10
-    warnings.warn(
-        "np.core.numeric.loads is deprecated, use pickle.loads instead",
-        DeprecationWarning, stacklevel=2)
-    return pickle.loads(*args, **kwargs)
-
-
 __all__ = [
     'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc',
     'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast', 'dtype',
@@ -66,7 +59,7 @@ __all__ = [
     'correlate', 'convolve', 'inner', 'dot', 'outer', 'vdot', 'roll',
     'rollaxis', 'moveaxis', 'cross', 'tensordot', 'little_endian',
     'fromiter', 'array_equal', 'array_equiv', 'indices', 'fromfunction',
-    'isclose', 'load', 'loads', 'isscalar', 'binary_repr', 'base_repr', 'ones',
+    'isclose', 'isscalar', 'binary_repr', 'base_repr', 'ones',
     'identity', 'allclose', 'compare_chararrays', 'putmask',
     'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN',
     'False_', 'True_', 'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS',
@@ -530,7 +523,7 @@ def isfortran(a):
 
     C-ordered arrays evaluate as False even if they are also FORTRAN-ordered.
 
-    >>> np.isfortran(np.array([1, 2], order='FORTRAN'))
+    >>> np.isfortran(np.array([1, 2], order='F'))
     False
 
     """
@@ -553,8 +546,10 @@ def argwhere(a):
 
     Returns
     -------
-    index_array : ndarray
+    index_array : (N, a.ndim) ndarray
         Indices of elements that are non-zero. Indices are grouped by element.
+        This array will have shape ``(N, a.ndim)`` where ``N`` is the number of
+        non-zero items.
 
     See Also
     --------
@@ -562,7 +557,8 @@ def argwhere(a):
 
     Notes
     -----
-    ``np.argwhere(a)`` is the same as ``np.transpose(np.nonzero(a))``.
+    ``np.argwhere(a)`` is almost the same as ``np.transpose(np.nonzero(a))``,
+    but produces a result of the correct shape for a 0D array.
 
     The output of ``argwhere`` is not suitable for indexing arrays.
     For this purpose use ``nonzero(a)`` instead.
@@ -580,6 +576,11 @@ def argwhere(a):
            [1, 2]])
 
     """
+    # nonzero does not behave well on 0d, so promote to 1d
+    if np.ndim(a) == 0:
+        a = shape_base.atleast_1d(a)
+        # then remove the added dimension
+        return argwhere(a)[:,:0]
     return transpose(nonzero(a))
 
 
@@ -937,7 +938,7 @@ def tensordot(a, b, axes=2):
     Returns
     -------
     output : ndarray
-        The tensor dot product of the input.  
+        The tensor dot product of the input.
 
     See Also
     --------
@@ -1935,6 +1936,10 @@ def binary_repr(num, width=None):
                 "will raise an error in the future.", DeprecationWarning,
                 stacklevel=3)
 
+    # Ensure that num is a Python integer to avoid overflow or unwanted
+    # casts to floating point.
+    num = operator.index(num)
+
     if num == 0:
         return '0' * (width or 1)
 
@@ -2024,30 +2029,6 @@ def base_repr(number, base=2, padding=0):
     return ''.join(reversed(res or '0'))
 
 
-def load(file):
-    """
-    Wrapper around cPickle.load which accepts either a file-like object or
-    a filename.
-
-    Note that the NumPy binary format is not based on pickle/cPickle anymore.
-    For details on the preferred way of loading and saving files, see `load`
-    and `save`.
-
-    See Also
-    --------
-    load, save
-
-    """
-    # NumPy 1.15.0, 2017-12-10
-    warnings.warn(
-        "np.core.numeric.load is deprecated, use pickle.load instead",
-        DeprecationWarning, stacklevel=2)
-    if isinstance(file, type("")):
-        with open(file, "rb") as file_pointer:
-            return pickle.load(file_pointer)
-    return pickle.load(file)
-
-
 # These are all essentially abbreviations
 # These might wind up in a special abbreviations module
 
diff --git a/numpy/core/records.py b/numpy/core/records.py
index 0576005e7..a1439f9df 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -268,8 +268,8 @@ class record(nt.void):
             except AttributeError:
                 #happens if field is Object type
                 return obj
-            if dt.fields:
-                return obj.view((self.__class__, obj.dtype.fields))
+            if dt.names is not None:
+                return obj.view((self.__class__, obj.dtype))
             return obj
         else:
             raise AttributeError("'record' object has no "
@@ -293,8 +293,8 @@ class record(nt.void):
         obj = nt.void.__getitem__(self, indx)
 
         # copy behavior of record.__getattribute__,
-        if isinstance(obj, nt.void) and obj.dtype.fields:
-            return obj.view((self.__class__, obj.dtype.fields))
+        if isinstance(obj, nt.void) and obj.dtype.names is not None:
+            return obj.view((self.__class__, obj.dtype))
         else:
             # return a single element
             return obj
@@ -444,7 +444,7 @@ class recarray(ndarray):
         return self
 
     def __array_finalize__(self, obj):
-        if self.dtype.type is not record and self.dtype.fields:
+        if self.dtype.type is not record and self.dtype.names is not None:
             # if self.dtype is not np.record, invoke __setattr__ which will
             # convert it to a record if it is a void dtype.
             self.dtype = self.dtype
@@ -472,7 +472,7 @@ class recarray(ndarray):
         # with void type convert it to the same dtype.type (eg to preserve
         # numpy.record type if present), since nested structured fields do not
         # inherit type. Don't do this for non-void structures though.
-        if obj.dtype.fields:
+        if obj.dtype.names is not None:
             if issubclass(obj.dtype.type, nt.void):
                 return obj.view(dtype=(self.dtype.type, obj.dtype))
             return obj
@@ -487,7 +487,7 @@ class recarray(ndarray):
 
         # Automatically convert (void) structured types to records
         # (but not non-void structures, subarrays, or non-structured voids)
-        if attr == 'dtype' and issubclass(val.type, nt.void) and val.fields:
+        if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
             val = sb.dtype((record, val))
 
         newattr = attr not in self.__dict__
@@ -521,7 +521,7 @@ class recarray(ndarray):
         # copy behavior of getattr, except that here
         # we might also be returning a single element
         if isinstance(obj, ndarray):
-            if obj.dtype.fields:
+            if obj.dtype.names is not None:
                 obj = obj.view(type(self))
                 if issubclass(obj.dtype.type, nt.void):
                     return obj.view(dtype=(self.dtype.type, obj.dtype))
@@ -577,7 +577,7 @@ class recarray(ndarray):
 
         if val is None:
             obj = self.getfield(*res)
-            if obj.dtype.fields:
+            if obj.dtype.names is not None:
                 return obj
             return obj.view(ndarray)
         else:
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 338502791..5f2f4a7b2 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -463,8 +463,14 @@ def configuration(parent_package='',top_path=None):
             rep = check_long_double_representation(config_cmd)
             moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1))
 
+            if check_for_right_shift_internal_compiler_error(config_cmd):
+                moredefs.append('NPY_DO_NOT_OPTIMIZE_LONG_right_shift')
+                moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONG_right_shift')
+                moredefs.append('NPY_DO_NOT_OPTIMIZE_LONGLONG_right_shift')
+                moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONGLONG_right_shift')
+
             # Py3K check
-            if sys.version_info[0] == 3:
+            if sys.version_info[0] >= 3:
                 moredefs.append(('NPY_PY3K', 1))
 
             # Generate the config.h file from moredefs
@@ -491,10 +497,10 @@ def configuration(parent_package='',top_path=None):
                     #endif
                     """))
 
-            print('File:', target)
+            log.info('File: %s' % target)
             with open(target) as target_f:
-                print(target_f.read())
-            print('EOF')
+                log.info(target_f.read())
+            log.info('EOF')
         else:
             mathlibs = []
             with open(target) as target_f:
@@ -581,10 +587,10 @@ def configuration(parent_package='',top_path=None):
                     """))
 
             # Dump the numpyconfig.h header to stdout
-            print('File: %s' % target)
+            log.info('File: %s' % target)
             with open(target) as target_f:
-                print(target_f.read())
-            print('EOF')
+                log.info(target_f.read())
+            log.info('EOF')
         config.add_data_files((header_dir, target))
         return target
 
@@ -633,23 +639,6 @@ def configuration(parent_package='',top_path=None):
             ]
 
     #######################################################################
-    #                            dummy module                             #
-    #######################################################################
-
-    # npymath needs the config.h and numpyconfig.h files to be generated, but
-    # build_clib cannot handle generate_config_h and generate_numpyconfig_h
-    # (don't ask). Because clib are generated before extensions, we have to
-    # explicitly add an extension which has generate_config_h and
-    # generate_numpyconfig_h as sources *before* adding npymath.
-
-    config.add_extension('_dummy',
-                         sources=[join('src', 'dummymodule.c'),
-                                  generate_config_h,
-                                  generate_numpyconfig_h,
-                                  generate_numpy_api]
-                         )
-
-    #######################################################################
     #                          npymath library                            #
     #######################################################################
 
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index a3f7acd6d..84b78b585 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -5,6 +5,7 @@ import sys
 import warnings
 import copy
 import binascii
+import textwrap
 
 from numpy.distutils.misc_util import mingw32
 
@@ -415,3 +416,41 @@ def long_double_representation(lines):
     else:
         # We never detected the after_sequence
         raise ValueError("Could not lock sequences (%s)" % saw)
+
+
+def check_for_right_shift_internal_compiler_error(cmd):
+    """
+    On our arm CI, this fails with an internal compilation error
+
+    The failure looks like the following, and can be reproduced on ARM64 GCC 5.4:
+
+        <source>: In function 'right_shift':
+        <source>:4:20: internal compiler error: in expand_shift_1, at expmed.c:2349
+               ip1[i] = ip1[i] >> in2;
+                      ^
+        Please submit a full bug report,
+        with preprocessed source if appropriate.
+        See <http://gcc.gnu.org/bugs.html> for instructions.
+        Compiler returned: 1
+
+    This function returns True if this compiler bug is present, and we need to
+    turn off optimization for the function
+    """
+    cmd._check_compiler()
+    has_optimize = cmd.try_compile(textwrap.dedent("""\
+        __attribute__((optimize("O3"))) void right_shift() {}
+        """), None, None)
+    if not has_optimize:
+        return False
+
+    no_err = cmd.try_compile(textwrap.dedent("""\
+        typedef long the_type;  /* fails also for unsigned and long long */
+        __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) {
+            for (int i = 0; i < n; i++) {
+                if (in2 < (the_type)sizeof(the_type) * 8) {
+                    ip1[i] = ip1[i] >> in2;
+                }
+            }
+        }
+        """), None, None)
+    return not no_err
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index 710f64827..d7e769e62 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -9,8 +9,9 @@ import warnings
 
 from . import numeric as _nx
 from . import overrides
-from .numeric import array, asanyarray, newaxis
+from ._asarray import array, asanyarray
 from .multiarray import normalize_axis_index
+from . import fromnumeric as _from_nx
 
 
 array_function_dispatch = functools.partial(
@@ -123,7 +124,7 @@ def atleast_2d(*arys):
         if ary.ndim == 0:
             result = ary.reshape(1, 1)
         elif ary.ndim == 1:
-            result = ary[newaxis, :]
+            result = ary[_nx.newaxis, :]
         else:
             result = ary
         res.append(result)
@@ -193,9 +194,9 @@ def atleast_3d(*arys):
         if ary.ndim == 0:
             result = ary.reshape(1, 1, 1)
         elif ary.ndim == 1:
-            result = ary[newaxis, :, newaxis]
+            result = ary[_nx.newaxis, :, _nx.newaxis]
         elif ary.ndim == 2:
-            result = ary[:, :, newaxis]
+            result = ary[:, :, _nx.newaxis]
         else:
             result = ary
         res.append(result)
@@ -435,9 +436,9 @@ def stack(arrays, axis=0, out=None):
 # Internal functions to eliminate the overhead of repeated dispatch in one of
 # the two possible paths inside np.block.
 # Use getattr to protect against __array_function__ being disabled.
-_size = getattr(_nx.size, '__wrapped__', _nx.size)
-_ndim = getattr(_nx.ndim, '__wrapped__', _nx.ndim)
-_concatenate = getattr(_nx.concatenate, '__wrapped__', _nx.concatenate)
+_size = getattr(_from_nx.size, '__wrapped__', _from_nx.size)
+_ndim = getattr(_from_nx.ndim, '__wrapped__', _from_nx.ndim)
+_concatenate = getattr(_from_nx.concatenate, '__wrapped__', _from_nx.concatenate)
 
 
 def _block_format_index(index):
diff --git a/numpy/core/src/common/npy_partition.h.src b/numpy/core/src/common/npy_partition.h.src
index a22cf911c..97dc2536b 100644
--- a/numpy/core/src/common/npy_partition.h.src
+++ b/numpy/core/src/common/npy_partition.h.src
@@ -113,9 +113,6 @@ get_argpartition_func(int type, NPY_SELECTKIND which)
     npy_intp i;
     npy_intp ntypes = ARRAY_SIZE(_part_map);
 
-    if (which >= NPY_NSELECTS) {
-        return NULL;
-    }
     for (i = 0; i < ntypes; i++) {
         if (type == _part_map[i].typenum) {
             return _part_map[i].argpart[which];
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index 1365e87bb..fa2efb428 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -593,6 +593,25 @@ fail:
     return NULL;
 }
 
+/*
+ * Helper to test fromstring of 0 terminated strings, as the C-API supports
+ * the -1 length identifier.
+ */
+static PyObject *
+fromstring_null_term_c_api(PyObject *dummy, PyObject *byte_obj)
+{
+    char *string;
+    PyArray_Descr *descr;
+
+    string = PyBytes_AsString(byte_obj);
+    if (string == NULL) {
+        return NULL;
+    }
+    descr = PyArray_DescrNewFromType(NPY_FLOAT64);
+    return PyArray_FromString(string, -1, descr, -1, " ");
+}
+
+
 /* check no elison for avoided increfs */
 static PyObject *
 incref_elide(PyObject *dummy, PyObject *args)
@@ -656,6 +675,43 @@ npy_updateifcopy_deprecation(PyObject* NPY_UNUSED(self), PyObject* args)
     Py_RETURN_NONE;
 }
 
+/* used to test PyArray_As1D usage emits not implemented error */
+static PyObject*
+npy_pyarrayas1d_deprecation(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args))
+{
+    PyObject *op = Py_BuildValue("i", 42);
+    PyObject *result = op;
+    int dim = 4;
+    double arg[2] = {1, 2};
+    int temp = PyArray_As1D(&result, (char **)&arg, &dim, NPY_DOUBLE);
+    if (temp < 0) {
+        Py_DECREF(op);
+        return NULL;
+    }
+    /* op != result */
+    Py_DECREF(op);
+    return result;
+}
+
+/* used to test PyArray_As2D usage emits not implemented error */
+static PyObject*
+npy_pyarrayas2d_deprecation(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args))
+{
+    PyObject *op = Py_BuildValue("i", 42);
+    PyObject *result = op;
+    int dim1 = 4;
+    int dim2 = 6;
+    double arg[2][2] = {{1, 2}, {3, 4}};
+    int temp = PyArray_As2D(&result, (char ***)&arg, &dim1, &dim2, NPY_DOUBLE);
+    if (temp < 0) {
+        Py_DECREF(op);
+        return NULL;
+    }
+    /* op != result */
+    Py_DECREF(op);
+    return result;
+}
+
 /* used to create array with WRITEBACKIFCOPY flag */
 static PyObject*
 npy_create_writebackifcopy(PyObject* NPY_UNUSED(self), PyObject* args)
@@ -857,6 +913,7 @@ static PyObject*
 get_c_wrapping_array(PyObject* NPY_UNUSED(self), PyObject* arg)
 {
     int writeable, flags;
+    PyArray_Descr *descr;
     npy_intp zero = 0;
 
     writeable = PyObject_IsTrue(arg);
@@ -866,7 +923,8 @@ get_c_wrapping_array(PyObject* NPY_UNUSED(self), PyObject* arg)
 
     flags = writeable ? NPY_ARRAY_WRITEABLE : 0;
     /* Create an empty array (which points to a random place) */
-    return PyArray_NewFromDescr(&PyArray_Type, PyArray_DescrFromType(NPY_INTP),
+    descr =  PyArray_DescrNewFromType(NPY_INTP);
+    return PyArray_NewFromDescr(&PyArray_Type, descr,
                                 1, &zero, NULL, &zero, flags, NULL);
 }
 
@@ -1927,6 +1985,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"test_inplace_increment",
         inplace_increment,
         METH_VARARGS, NULL},
+    {"fromstring_null_term_c_api",
+        fromstring_null_term_c_api,
+        METH_O, NULL},
     {"incref_elide",
         incref_elide,
         METH_VARARGS, NULL},
@@ -1939,6 +2000,12 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"npy_updateifcopy_deprecation",
         npy_updateifcopy_deprecation,
         METH_O, NULL},
+    {"npy_pyarrayas1d_deprecation",
+        npy_pyarrayas1d_deprecation,
+        METH_NOARGS, NULL},
+    {"npy_pyarrayas2d_deprecation",
+        npy_pyarrayas2d_deprecation,
+        METH_NOARGS, NULL},
     {"npy_create_writebackifcopy",
         npy_create_writebackifcopy,
         METH_O, NULL},
diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c
index addc9f006..a7f34cbe5 100644
--- a/numpy/core/src/multiarray/alloc.c
+++ b/numpy/core/src/multiarray/alloc.c
@@ -25,10 +25,14 @@
 
 #include <assert.h>
 
-#ifdef HAVE_SYS_MMAN_H
+#ifdef NPY_OS_LINUX
 #include <sys/mman.h>
-#if defined MADV_HUGEPAGE && defined HAVE_MADVISE
-#define HAVE_MADV_HUGEPAGE
+#ifndef MADV_HUGEPAGE
+/*
+ * Use code 14 (MADV_HUGEPAGE) if it isn't defined. This gives a chance of
+ * enabling huge pages even if built with linux kernel < 2.6.38
+ */
+#define MADV_HUGEPAGE 14
 #endif
 #endif
 
@@ -74,11 +78,15 @@ _npy_alloc_cache(npy_uintp nelem, npy_uintp esz, npy_uint msz,
 #ifdef _PyPyGC_AddMemoryPressure
         _PyPyPyGC_AddMemoryPressure(nelem * esz);
 #endif
-#ifdef HAVE_MADV_HUGEPAGE
+#ifdef NPY_OS_LINUX
         /* allow kernel allocating huge pages for large arrays */
         if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u)))) {
             npy_uintp offset = 4096u - (npy_uintp)p % (4096u);
             npy_uintp length = nelem * esz - offset;
+            /**
+             * Intentionally not checking for errors that may be returned by
+             * older kernel versions; optimistically tries enabling huge pages.
+             */
             madvise((void*)((npy_uintp)p + offset), length, MADV_HUGEPAGE);
         }
 #endif
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index eb939f47c..4e229e321 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -462,7 +462,7 @@ WARN_IN_DEALLOC(PyObject* warning, const char * msg) {
             PyErr_WriteUnraisable(Py_None);
         }
     }
-};
+}
 
 /* array object functions */
 
@@ -607,7 +607,7 @@ PyArray_DebugPrint(PyArrayObject *obj)
  * TO BE REMOVED - NOT USED INTERNALLY.
  */
 NPY_NO_EXPORT void
-PyArray_SetDatetimeParseFunction(PyObject *op)
+PyArray_SetDatetimeParseFunction(PyObject *NPY_UNUSED(op))
 {
 }
 
@@ -630,7 +630,7 @@ PyArray_CompareUCS4(npy_ucs4 *s1, npy_ucs4 *s2, size_t len)
 /*NUMPY_API
  */
 NPY_NO_EXPORT int
-PyArray_CompareString(char *s1, char *s2, size_t len)
+PyArray_CompareString(const char *s1, const char *s2, size_t len)
 {
     const unsigned char *c1 = (unsigned char *)s1;
     const unsigned char *c2 = (unsigned char *)s2;
@@ -1200,15 +1200,28 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
             }
         }
         if (res == NULL && !PyErr_Occurred()) {
-            PyErr_SetString(PyExc_ValueError, "No fields found.");
+            /* these dtypes had no fields. Use a MultiIter to broadcast them
+             * to an output array, and fill with True (for EQ)*/
+            PyArrayMultiIterObject *mit = (PyArrayMultiIterObject *)
+                                          PyArray_MultiIterNew(2, self, other);
+            if (mit == NULL) {
+                return NULL;
+            }
+
+            res = PyArray_NewFromDescr(&PyArray_Type,
+                                       PyArray_DescrFromType(NPY_BOOL),
+                                       mit->nd, mit->dimensions,
+                                       NULL, NULL, 0, NULL);
+            Py_DECREF(mit);
+            if (res) {
+                 PyArray_FILLWBYTE((PyArrayObject *)res,
+                                   cmp_op == Py_EQ ? 1 : 0);
+            }
         }
         return res;
     }
     else {
-        /*
-         * compare as a string. Assumes self and
-         * other have same descr->type
-         */
+        /* compare as a string. Assumes self and other have same descr->type */
         return _strings_richcompare(self, other, cmp_op, 0);
     }
 }
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index c38067681..055d3e60f 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -942,6 +942,20 @@ ravel_multi_index_loop(int ravel_ndim, npy_intp *ravel_dims,
     char invalid;
     npy_intp j, m;
 
+    /*
+     * Check for 0-dimensional axes unless there is nothing to do.
+     * An empty array/shape cannot be indexed at all.
+     */
+    if (count != 0) {
+        for (i = 0; i < ravel_ndim; ++i) {
+            if (ravel_dims[i] == 0) {
+                PyErr_SetString(PyExc_ValueError,
+                        "cannot unravel if shape has zero entries (is empty).");
+                return NPY_FAIL;
+            }
+        }
+    }
+
     NPY_BEGIN_ALLOW_THREADS;
     invalid = 0;
     while (count--) {
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index a370874a6..4baa02052 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -406,7 +406,6 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
     }
 
     *sortkind = NPY_QUICKSORT;
-        
 
     str = PyBytes_AsString(obj);
     if (!str) {
@@ -551,10 +550,9 @@ PyArray_OrderConverter(PyObject *object, NPY_ORDER *val)
         int ret;
         tmp = PyUnicode_AsASCIIString(object);
         if (tmp == NULL) {
-            PyErr_SetString(PyExc_ValueError, "Invalid unicode string passed in "
-                                              "for the array ordering. "
-                                              "Please pass in 'C', 'F', 'A' "
-                                              "or 'K' instead");
+            PyErr_SetString(PyExc_ValueError,
+                "Invalid unicode string passed in for the array ordering. "
+                "Please pass in 'C', 'F', 'A' or 'K' instead");
             return NPY_FAIL;
         }
         ret = PyArray_OrderConverter(tmp, val);
@@ -562,38 +560,18 @@ PyArray_OrderConverter(PyObject *object, NPY_ORDER *val)
         return ret;
     }
     else if (!PyBytes_Check(object) || PyBytes_GET_SIZE(object) < 1) {
-        /* 2015-12-14, 1.11 */
-        int ret = DEPRECATE("Non-string object detected for "
-                            "the array ordering. Please pass "
-                            "in 'C', 'F', 'A', or 'K' instead");
-
-        if (ret < 0) {
-            return -1;
-        }
-
-        if (PyObject_IsTrue(object)) {
-            *val = NPY_FORTRANORDER;
-        }
-        else {
-            *val = NPY_CORDER;
-        }
-        if (PyErr_Occurred()) {
-            return NPY_FAIL;
-        }
-        return NPY_SUCCEED;
+        PyErr_SetString(PyExc_ValueError,
+            "Non-string object detected for the array ordering. "
+            "Please pass in 'C', 'F', 'A', or 'K' instead");
+        return NPY_FAIL;
     }
     else {
         str = PyBytes_AS_STRING(object);
         if (strlen(str) != 1) {
-            /* 2015-12-14, 1.11 */
-            int ret = DEPRECATE("Non length-one string passed "
-                                "in for the array ordering. "
-                                "Please pass in 'C', 'F', 'A', "
-                                "or 'K' instead");
-
-            if (ret < 0) {
-                return -1;
-            }
+            PyErr_SetString(PyExc_ValueError,
+                "Non-string object detected for the array ordering. "
+                "Please pass in 'C', 'F', 'A', or 'K' instead");
+            return NPY_FAIL;
         }
 
         if (str[0] == 'C' || str[0] == 'c') {
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index 7db467308..aa4e40e66 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -543,35 +543,6 @@ PyArray_AssignZero(PyArrayObject *dst,
     return retcode;
 }
 
-/*
- * Fills an array with ones.
- *
- * dst: The destination array.
- * wheremask: If non-NULL, a boolean mask specifying where to set the values.
- *
- * Returns 0 on success, -1 on failure.
- */
-NPY_NO_EXPORT int
-PyArray_AssignOne(PyArrayObject *dst,
-                  PyArrayObject *wheremask)
-{
-    npy_bool value;
-    PyArray_Descr *bool_dtype;
-    int retcode;
-
-    /* Create a raw bool scalar with the value True */
-    bool_dtype = PyArray_DescrFromType(NPY_BOOL);
-    if (bool_dtype == NULL) {
-        return -1;
-    }
-    value = 1;
-
-    retcode = PyArray_AssignRawScalar(dst, bool_dtype, (char *)&value,
-                                      wheremask, NPY_SAFE_CASTING);
-
-    Py_DECREF(bool_dtype);
-    return retcode;
-}
 
 /*NUMPY_API
  * Copy an array.
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index bc327bf0d..5174bd889 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -40,9 +40,31 @@
  * regards to the handling of text representations.
  */
 
+/*
+ * Scanning function for next element parsing and seperator skipping.
+ * These functions return:
+ *   - 0 to indicate more data to read
+ *   - -1 when reading stopped at the end of the string/file
+ *   - -2 when reading stopped before the end was reached.
+ *
+ * The dtype specific parsing functions may set the python error state
+ * (they have to get the GIL first) additionally.
+ */
 typedef int (*next_element)(void **, void *, PyArray_Descr *, void *);
 typedef int (*skip_separator)(void **, const char *, void *);
 
+
+static npy_bool
+string_is_fully_read(char const* start, char const* end) {
+    if (end == NULL) {
+        return *start == '\0';  /* null terminated */
+    }
+    else {
+        return start >= end;  /* fixed length */
+    }
+}
+
+
 static int
 fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype,
                      const char *end)
@@ -50,19 +72,23 @@ fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype,
     char *e = *s;
     int r = dtype->f->fromstr(*s, dptr, &e, dtype);
     /*
-     * fromstr always returns 0 for basic dtypes
-     * s points to the end of the parsed string
-     * if an error occurs s is not changed
+     * fromstr always returns 0 for basic dtypes; s points to the end of the
+     * parsed string. If s is not changed an error occurred or the end was
+     * reached.
      */
-    if (*s == e) {
-        /* Nothing read */
-        return -1;
+    if (*s == e || r < 0) {
+        /* Nothing read, could be end of string or an error (or both) */
+        if (string_is_fully_read(*s, end)) {
+            return -1;
+        }
+        return -2;
     }
     *s = e;
     if (end != NULL && *s > end) {
+        /* Stop the iteration if we read far enough */
         return -1;
     }
-    return r;
+    return 0;
 }
 
 static int
@@ -75,9 +101,13 @@ fromfile_next_element(FILE **fp, void *dptr, PyArray_Descr *dtype,
     if (r == 1) {
         return 0;
     }
-    else {
+    else if (r == EOF) {
         return -1;
     }
+    else {
+        /* unable to read more, but EOF not reached indicating an error. */
+        return -2;
+    }
 }
 
 /*
@@ -143,9 +173,10 @@ fromstr_skip_separator(char **s, const char *sep, const char *end)
 {
     char *string = *s;
     int result = 0;
+
     while (1) {
         char c = *string;
-        if (c == '\0' || (end != NULL && string >= end)) {
+        if (string_is_fully_read(string, end)) {
             result = -1;
             break;
         }
@@ -936,6 +967,39 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
     return 0;
 }
 
+static PyObject *
+raise_memory_error(int nd, npy_intp *dims, PyArray_Descr *descr)
+{
+    static PyObject *exc_type = NULL;
+
+    npy_cache_import(
+        "numpy.core._exceptions", "_ArrayMemoryError",
+        &exc_type);
+    if (exc_type == NULL) {
+        goto fail;
+    }
+
+    PyObject *shape = PyArray_IntTupleFromIntp(nd, dims);
+    if (shape == NULL) {
+        goto fail;
+    }
+
+    /* produce an error object */
+    PyObject *exc_value = PyTuple_Pack(2, shape, (PyObject *)descr);
+    Py_DECREF(shape);
+    if (exc_value == NULL){
+        goto fail;
+    }
+    PyErr_SetObject(exc_type, exc_value);
+    Py_DECREF(exc_value);
+    return NULL;
+
+fail:
+    /* we couldn't raise the formatted exception for some reason */
+    PyErr_WriteUnraisable(NULL);
+    return PyErr_NoMemory();
+}
+
 /*
  * Generic new array creation routine.
  * Internal variant with calloc argument for PyArray_Zeros.
@@ -1113,30 +1177,7 @@ PyArray_NewFromDescr_int(
             data = npy_alloc_cache(nbytes);
         }
         if (data == NULL) {
-            static PyObject *exc_type = NULL;
-            
-            npy_cache_import(
-                "numpy.core._exceptions", "_ArrayMemoryError",
-                &exc_type);
-            if (exc_type == NULL) {
-                return NULL;
-            }
-            
-            PyObject *shape = PyArray_IntTupleFromIntp(fa->nd,fa->dimensions);
-            if (shape == NULL) {
-                return NULL;
-            }
-            
-            /* produce an error object */
-            PyObject *exc_value = PyTuple_Pack(2, shape, descr);
-            Py_DECREF(shape);
-            if (exc_value == NULL){
-                return NULL;
-            }
-            PyErr_SetObject(exc_type, exc_value);
-            Py_DECREF(exc_value);
-            return NULL;
-
+            return raise_memory_error(fa->nd, fa->dimensions, descr);
         }
         fa->flags |= NPY_ARRAY_OWNDATA;
 
@@ -1426,28 +1467,6 @@ _dtype_from_buffer_3118(PyObject *memoryview)
 }
 
 
-/*
- * Call the python _is_from_ctypes
- */
-NPY_NO_EXPORT int
-_is_from_ctypes(PyObject *obj) {
-    PyObject *ret_obj;
-    static PyObject *py_func = NULL;
-
-    npy_cache_import("numpy.core._internal", "_is_from_ctypes", &py_func);
-
-    if (py_func == NULL) {
-        return -1;
-    }
-    ret_obj = PyObject_CallFunctionObjArgs(py_func, obj, NULL);
-    if (ret_obj == NULL) {
-        return -1;
-    }
-
-    return PyObject_IsTrue(ret_obj);
-}
-
-
 NPY_NO_EXPORT PyObject *
 _array_from_buffer_3118(PyObject *memoryview)
 {
@@ -1849,13 +1868,6 @@ PyArray_GetArrayParamsFromObject(PyObject *op,
             *out_arr = NULL;
             return 0;
         }
-        if (is_object && (requested_dtype != NULL) && 
-                (requested_dtype->type_num != NPY_OBJECT)) {
-            PyErr_SetString(PyExc_ValueError,
-               "cannot create an array from unequal-length (ragged) sequences");
-            Py_DECREF(*out_dtype);
-            return -1;
-        }
         /* If object arrays are forced */
         if (is_object) {
             Py_DECREF(*out_dtype);
@@ -2777,9 +2789,9 @@ PyArray_DescrFromObject(PyObject *op, PyArray_Descr *mintype)
   Deprecated, use PyArray_NewFromDescr instead.
 */
 NPY_NO_EXPORT PyObject *
-PyArray_FromDimsAndDataAndDescr(int nd, int *d,
+PyArray_FromDimsAndDataAndDescr(int NPY_UNUSED(nd), int *NPY_UNUSED(d),
                                 PyArray_Descr *descr,
-                                char *data)
+                                char *NPY_UNUSED(data))
 {
     PyErr_SetString(PyExc_NotImplementedError,
                 "PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr.");
@@ -2791,7 +2803,7 @@ PyArray_FromDimsAndDataAndDescr(int nd, int *d,
   Deprecated, use PyArray_SimpleNew instead.
 */
 NPY_NO_EXPORT PyObject *
-PyArray_FromDims(int nd, int *d, int type)
+PyArray_FromDims(int NPY_UNUSED(nd), int *NPY_UNUSED(d), int NPY_UNUSED(type))
 {
     PyErr_SetString(PyExc_NotImplementedError,
                 "PyArray_FromDims: use PyArray_SimpleNew.");
@@ -3562,11 +3574,13 @@ PyArray_ArangeObj(PyObject *start, PyObject *stop, PyObject *step, PyArray_Descr
     return NULL;
 }
 
+/* This array creation function steals the reference to dtype. */
 static PyArrayObject *
 array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nread)
 {
     PyArrayObject *r;
     npy_off_t start, numbytes;
+    int elsize;
 
     if (num < 0) {
         int fail = 0;
@@ -3593,27 +3607,29 @@ array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, npy_intp num, size_t *nrea
         }
         num = numbytes / dtype->elsize;
     }
+
     /*
-     * When dtype->subarray is true, PyArray_NewFromDescr will decref dtype
-     * even on success, so make sure it stays around until exit.
+     * Array creation may move sub-array dimensions from the dtype to array
+     * dimensions, so we need to use the original element size when reading.
      */
-    Py_INCREF(dtype);
+    elsize = dtype->elsize;
+
     r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &num,
                                               NULL, NULL, 0, NULL);
     if (r == NULL) {
-        Py_DECREF(dtype);
         return NULL;
     }
+
     NPY_BEGIN_ALLOW_THREADS;
-    *nread = fread(PyArray_DATA(r), dtype->elsize, num, fp);
+    *nread = fread(PyArray_DATA(r), elsize, num, fp);
     NPY_END_ALLOW_THREADS;
-    Py_DECREF(dtype);
     return r;
 }
 
 /*
  * Create an array by reading from the given stream, using the passed
  * next_element and skip_separator functions.
+ * As typical for array creation functions, it steals the reference to dtype.
  */
 #define FROM_BUFFER_SIZE 4096
 static PyArrayObject *
@@ -3625,6 +3641,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread,
     npy_intp i;
     char *dptr, *clean_sep, *tmp;
     int err = 0;
+    int stop_reading_flag;  /* -1 indicates end reached; -2 a parsing error */
     npy_intp thisbuf = 0;
     npy_intp size;
     npy_intp bytes, totalbytes;
@@ -3632,10 +3649,11 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread,
     size = (num >= 0) ? num : FROM_BUFFER_SIZE;
 
     /*
-     * When dtype->subarray is true, PyArray_NewFromDescr will decref dtype
-     * even on success, so make sure it stays around until exit.
+     * Array creation may move sub-array dimensions from the dtype to array
+     * dimensions, so we need to use the original dtype when reading.
      */
     Py_INCREF(dtype);
+
     r = (PyArrayObject *)
         PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &size,
                              NULL, NULL, 0, NULL);
@@ -3643,6 +3661,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread,
         Py_DECREF(dtype);
         return NULL;
     }
+
     clean_sep = swab_separator(sep);
     if (clean_sep == NULL) {
         err = 1;
@@ -3652,9 +3671,9 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread,
     NPY_BEGIN_ALLOW_THREADS;
     totalbytes = bytes = size * dtype->elsize;
     dptr = PyArray_DATA(r);
-    for (i= 0; num < 0 || i < num; i++) {
-        if (next(&stream, dptr, dtype, stream_data) < 0) {
-            /* EOF */
+    for (i = 0; num < 0 || i < num; i++) {
+        stop_reading_flag = next(&stream, dptr, dtype, stream_data);
+        if (stop_reading_flag < 0) {
             break;
         }
         *nread += 1;
@@ -3671,7 +3690,12 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread,
             dptr = tmp + (totalbytes - bytes);
             thisbuf = 0;
         }
-        if (skip_sep(&stream, clean_sep, stream_data) < 0) {
+        stop_reading_flag = skip_sep(&stream, clean_sep, stream_data);
+        if (stop_reading_flag < 0) {
+            if (num == i + 1) {
+                /* if we read as much as requested sep is optional */
+                stop_reading_flag = -1;
+            }
             break;
         }
     }
@@ -3690,8 +3714,24 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread,
         }
     }
     NPY_END_ALLOW_THREADS;
+
     free(clean_sep);
 
+    if (stop_reading_flag == -2) {
+        if (PyErr_Occurred()) {
+            /* If an error is already set (unlikely), do not create new one */
+            Py_DECREF(r);
+            Py_DECREF(dtype);
+            return NULL;
+        }
+        /* 2019-09-12, NumPy 1.18 */
+        if (DEPRECATE(
+                "string or file could not be read to its end due to unmatched "
+                "data; this will raise a ValueError in the future.") < 0) {
+            goto fail;
+        }
+    }
+
 fail:
     Py_DECREF(dtype);
     if (err == 1) {
@@ -3710,9 +3750,8 @@ fail:
  * Given a ``FILE *`` pointer ``fp``, and a ``PyArray_Descr``, return an
  * array corresponding to the data encoded in that file.
  *
- * If the dtype is NULL, the default array type is used (double).
- * If non-null, the reference is stolen and if dtype->subarray is true dtype
- * will be decrefed even on success.
+ * The reference to `dtype` is stolen (it is possible that the passed in
+ * dtype is not held on to).
  *
  * The number of elements to read is given as ``num``; if it is < 0, then
  * then as many as possible are read.
@@ -3760,7 +3799,6 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep)
                 (skip_separator) fromfile_skip_separator, NULL);
     }
     if (ret == NULL) {
-        Py_DECREF(dtype);
         return NULL;
     }
     if (((npy_intp) nread) < num) {
@@ -3850,7 +3888,13 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
     s = (npy_intp)ts - offset;
     n = (npy_intp)count;
     itemsize = type->elsize;
-    if (n < 0 ) {
+    if (n < 0) {
+        if (itemsize == 0) {
+            PyErr_SetString(PyExc_ValueError,
+                            "cannot determine count if itemsize is 0");
+            Py_DECREF(type);
+            return NULL;
+        }
         if (s % itemsize != 0) {
             PyErr_SetString(PyExc_ValueError,
                             "buffer size must be a multiple"\
@@ -3955,6 +3999,11 @@ PyArray_FromString(char *data, npy_intp slen, PyArray_Descr *dtype,
                 return NULL;
             }
         }
+        /*
+         * NewFromDescr may replace dtype to absorb subarray shape
+         * into the array, so get size beforehand.
+         */
+        npy_intp size_to_copy = num*dtype->elsize;
         ret = (PyArrayObject *)
             PyArray_NewFromDescr(&PyArray_Type, dtype,
                                  1, &num, NULL, NULL,
@@ -3962,7 +4011,7 @@ PyArray_FromString(char *data, npy_intp slen, PyArray_Descr *dtype,
         if (ret == NULL) {
             return NULL;
         }
-        memcpy(PyArray_DATA(ret), data, num*dtype->elsize);
+        memcpy(PyArray_DATA(ret), data, size_to_copy);
     }
     else {
         /* read from character-based string */
@@ -4043,7 +4092,7 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
     }
     for (i = 0; (i < count || count == -1) &&
              (value = PyIter_Next(iter)); i++) {
-        if (i >= elcount) {
+        if (i >= elcount && elsize != 0) {
             npy_intp nbytes;
             /*
               Grow PyArray_DATA(ret):
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 60e6bbae2..d21bb9776 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -27,6 +27,40 @@
 #include "datetime_strings.h"
 
 /*
+ * Computes the python `ret, d = divmod(d, unit)`.
+ *
+ * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch
+ * for subsequent calls to this command - it is able to deduce that `*d >= 0`.
+ */
+static inline
+npy_int64 extract_unit_64(npy_int64 *d, npy_int64 unit) {
+    assert(unit > 0);
+    npy_int64 div = *d / unit;
+    npy_int64 mod = *d % unit;
+    if (mod < 0) {
+        mod += unit;
+        div -= 1;
+    }
+    assert(mod >= 0);
+    *d = mod;
+    return div;
+}
+
+static inline
+npy_int32 extract_unit_32(npy_int32 *d, npy_int32 unit) {
+    assert(unit > 0);
+    npy_int32 div = *d / unit;
+    npy_int32 mod = *d % unit;
+    if (mod < 0) {
+        mod += unit;
+        div -= 1;
+    }
+    assert(mod >= 0);
+    *d = mod;
+    return div;
+}
+
+/*
  * Imports the PyDateTime functions so we can create these objects.
  * This is called during module initialization
  */
@@ -160,17 +194,7 @@ days_to_yearsdays(npy_int64 *days_)
     npy_int64 year;
 
     /* Break down the 400 year cycle to get the year and day within the year */
-    if (days >= 0) {
-        year = 400 * (days / days_per_400years);
-        days = days % days_per_400years;
-    }
-    else {
-        year = 400 * ((days - (days_per_400years - 1)) / days_per_400years);
-        days = days % days_per_400years;
-        if (days < 0) {
-            days += days_per_400years;
-        }
-    }
+    year = 400 * extract_unit_64(&days, days_per_400years);
 
     /* Work out the year/day within the 400 year cycle */
     if (days >= 366) {
@@ -386,7 +410,8 @@ convert_datetimestruct_to_datetime(PyArray_DatetimeMetaData *meta,
  * TO BE REMOVED - NOT USED INTERNALLY.
  */
 NPY_NO_EXPORT npy_datetime
-PyArray_DatetimeStructToDatetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d)
+PyArray_DatetimeStructToDatetime(
+        NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_datetimestruct *NPY_UNUSED(d))
 {
     PyErr_SetString(PyExc_RuntimeError,
             "The NumPy PyArray_DatetimeStructToDatetime function has "
@@ -400,7 +425,8 @@ PyArray_DatetimeStructToDatetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d)
  * TO BE REMOVED - NOT USED INTERNALLY.
  */
 NPY_NO_EXPORT npy_datetime
-PyArray_TimedeltaStructToTimedelta(NPY_DATETIMEUNIT fr, npy_timedeltastruct *d)
+PyArray_TimedeltaStructToTimedelta(
+        NPY_DATETIMEUNIT NPY_UNUSED(fr), npy_timedeltastruct *NPY_UNUSED(d))
 {
     PyErr_SetString(PyExc_RuntimeError,
             "The NumPy PyArray_TimedeltaStructToTimedelta function has "
@@ -409,26 +435,6 @@ PyArray_TimedeltaStructToTimedelta(NPY_DATETIMEUNIT fr, npy_timedeltastruct *d)
 }
 
 /*
- * Computes the python `ret, d = divmod(d, unit)`.
- *
- * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch
- * for subsequent calls to this command - it is able to deduce that `*d >= 0`.
- */
-static inline
-npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) {
-    assert(unit > 0);
-    npy_int64 div = *d / unit;
-    npy_int64 mod = *d % unit;
-    if (mod < 0) {
-        mod += unit;
-        div -= 1;
-    }
-    assert(mod >= 0);
-    *d = mod;
-    return div;
-}
-
-/*
  * Converts a datetime based on the given metadata into a datetimestruct
  */
 NPY_NO_EXPORT int
@@ -436,7 +442,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
                                     npy_datetime dt,
                                     npy_datetimestruct *out)
 {
-    npy_int64 perday;
+    npy_int64 days;
 
     /* Initialize the output to all zeros */
     memset(out, 0, sizeof(npy_datetimestruct));
@@ -471,7 +477,7 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             break;
 
         case NPY_FR_M:
-            out->year  = 1970 + extract_unit(&dt, 12);
+            out->year  = 1970 + extract_unit_64(&dt, 12);
             out->month = dt + 1;
             break;
 
@@ -485,73 +491,67 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
             break;
 
         case NPY_FR_h:
-            perday = 24LL;
-
-            set_datetimestruct_days(extract_unit(&dt, perday), out);
+            days      = extract_unit_64(&dt, 24LL);
+            set_datetimestruct_days(days, out);
             out->hour = (int)dt;
             break;
 
         case NPY_FR_m:
-            perday = 24LL * 60;
-
-            set_datetimestruct_days(extract_unit(&dt, perday), out);
-            out->hour = (int)extract_unit(&dt, 60);
-            out->min = (int)dt;
+            days      =      extract_unit_64(&dt, 60LL*24);
+            set_datetimestruct_days(days, out);
+            out->hour = (int)extract_unit_64(&dt, 60LL);
+            out->min  = (int)dt;
             break;
 
         case NPY_FR_s:
-            perday = 24LL * 60 * 60;
-
-            set_datetimestruct_days(extract_unit(&dt, perday), out);
-            out->hour = (int)extract_unit(&dt, 60*60);
-            out->min  = (int)extract_unit(&dt, 60);
+            days      =      extract_unit_64(&dt, 60LL*60*24);
+            set_datetimestruct_days(days, out);
+            out->hour = (int)extract_unit_64(&dt, 60LL*60);
+            out->min  = (int)extract_unit_64(&dt, 60LL);
             out->sec  = (int)dt;
             break;
 
         case NPY_FR_ms:
-            perday = 24LL * 60 * 60 * 1000;
-
-            set_datetimestruct_days(extract_unit(&dt, perday), out);
-            out->hour = (int)extract_unit(&dt, 1000LL*60*60);
-            out->min  = (int)extract_unit(&dt, 1000LL*60);
-            out->sec  = (int)extract_unit(&dt, 1000LL);
+            days      =      extract_unit_64(&dt, 1000LL*60*60*24);
+            set_datetimestruct_days(days, out);
+            out->hour = (int)extract_unit_64(&dt, 1000LL*60*60);
+            out->min  = (int)extract_unit_64(&dt, 1000LL*60);
+            out->sec  = (int)extract_unit_64(&dt, 1000LL);
             out->us   = (int)(dt * 1000);
             break;
 
         case NPY_FR_us:
-            perday = 24LL * 60LL * 60LL * 1000LL * 1000LL;
-            set_datetimestruct_days(extract_unit(&dt, perday), out);
-            out->hour = (int)extract_unit(&dt, 1000LL*1000*60*60);
-            out->min  = (int)extract_unit(&dt, 1000LL*1000*60);
-            out->sec  = (int)extract_unit(&dt, 1000LL*1000);
+            days      =      extract_unit_64(&dt, 1000LL*1000*60*60*24);
+            set_datetimestruct_days(days, out);
+            out->hour = (int)extract_unit_64(&dt, 1000LL*1000*60*60);
+            out->min  = (int)extract_unit_64(&dt, 1000LL*1000*60);
+            out->sec  = (int)extract_unit_64(&dt, 1000LL*1000);
             out->us   = (int)dt;
             break;
 
         case NPY_FR_ns:
-            perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL;
-
-            set_datetimestruct_days(extract_unit(&dt, perday), out);
-            out->hour = (int)extract_unit(&dt, 1000LL*1000*1000*60*60);
-            out->min  = (int)extract_unit(&dt, 1000LL*1000*1000*60);
-            out->sec  = (int)extract_unit(&dt, 1000LL*1000*1000);
-            out->us   = (int)extract_unit(&dt, 1000LL);
+            days      =      extract_unit_64(&dt, 1000LL*1000*1000*60*60*24);
+            set_datetimestruct_days(days, out);
+            out->hour = (int)extract_unit_64(&dt, 1000LL*1000*1000*60*60);
+            out->min  = (int)extract_unit_64(&dt, 1000LL*1000*1000*60);
+            out->sec  = (int)extract_unit_64(&dt, 1000LL*1000*1000);
+            out->us   = (int)extract_unit_64(&dt, 1000LL);
             out->ps   = (int)(dt * 1000);
             break;
 
         case NPY_FR_ps:
-            perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
-
-            set_datetimestruct_days(extract_unit(&dt, perday), out);
-            out->hour = (int)extract_unit(&dt, 1000LL*1000*1000*1000*60*60);
-            out->min  = (int)extract_unit(&dt, 1000LL*1000*1000*1000*60);
-            out->sec  = (int)extract_unit(&dt, 1000LL*1000*1000*1000);
-            out->us   = (int)extract_unit(&dt, 1000LL*1000);
+            days      =      extract_unit_64(&dt, 1000LL*1000*1000*1000*60*60*24);
+            set_datetimestruct_days(days, out);
+            out->hour = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000*60*60);
+            out->min  = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000*60);
+            out->sec  = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000);
+            out->us   = (int)extract_unit_64(&dt, 1000LL*1000);
             out->ps   = (int)(dt);
             break;
 
         case NPY_FR_fs:
             /* entire range is only +- 2.6 hours */
-            out->hour = (int)extract_unit(&dt, 1000LL*1000*1000*1000*1000*60*60);
+            out->hour = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000*1000*60*60);
             if (out->hour < 0) {
                 out->year  = 1969;
                 out->month = 12;
@@ -559,16 +559,16 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
                 out->hour  += 24;
                 assert(out->hour >= 0);
             }
-            out->min  = (int)extract_unit(&dt, 1000LL*1000*1000*1000*1000*60);
-            out->sec  = (int)extract_unit(&dt, 1000LL*1000*1000*1000*1000);
-            out->us   = (int)extract_unit(&dt, 1000LL*1000*1000);
-            out->ps   = (int)extract_unit(&dt, 1000LL);
+            out->min  = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000*1000*60);
+            out->sec  = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000*1000);
+            out->us   = (int)extract_unit_64(&dt, 1000LL*1000*1000);
+            out->ps   = (int)extract_unit_64(&dt, 1000LL);
             out->as   = (int)(dt * 1000);
             break;
 
         case NPY_FR_as:
             /* entire range is only +- 9.2 seconds */
-            out->sec = (int)extract_unit(&dt, 1000LL*1000*1000*1000*1000*1000);
+            out->sec = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000*1000*1000);
             if (out->sec < 0) {
                 out->year  = 1969;
                 out->month = 12;
@@ -578,8 +578,8 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
                 out->sec   += 60;
                 assert(out->sec >= 0);
             }
-            out->us   = (int)extract_unit(&dt, 1000LL*1000*1000*1000);
-            out->ps   = (int)extract_unit(&dt, 1000LL*1000);
+            out->us   = (int)extract_unit_64(&dt, 1000LL*1000*1000*1000);
+            out->ps   = (int)extract_unit_64(&dt, 1000LL*1000);
             out->as   = (int)dt;
             break;
 
@@ -600,8 +600,9 @@ convert_datetime_to_datetimestruct(PyArray_DatetimeMetaData *meta,
  * TO BE REMOVED - NOT USED INTERNALLY.
  */
 NPY_NO_EXPORT void
-PyArray_DatetimeToDatetimeStruct(npy_datetime val, NPY_DATETIMEUNIT fr,
-                                 npy_datetimestruct *result)
+PyArray_DatetimeToDatetimeStruct(
+        npy_datetime NPY_UNUSED(val), NPY_DATETIMEUNIT NPY_UNUSED(fr),
+        npy_datetimestruct *result)
 {
     PyErr_SetString(PyExc_RuntimeError,
             "The NumPy PyArray_DatetimeToDatetimeStruct function has "
@@ -621,8 +622,9 @@ PyArray_DatetimeToDatetimeStruct(npy_datetime val, NPY_DATETIMEUNIT fr,
  * TO BE REMOVED - NOT USED INTERNALLY.
  */
 NPY_NO_EXPORT void
-PyArray_TimedeltaToTimedeltaStruct(npy_timedelta val, NPY_DATETIMEUNIT fr,
-                                 npy_timedeltastruct *result)
+PyArray_TimedeltaToTimedeltaStruct(
+        npy_timedelta NPY_UNUSED(val), NPY_DATETIMEUNIT NPY_UNUSED(fr),
+        npy_timedeltastruct *result)
 {
     PyErr_SetString(PyExc_RuntimeError,
             "The NumPy PyArray_TimedeltaToTimedeltaStruct function has "
@@ -2013,20 +2015,8 @@ add_seconds_to_datetimestruct(npy_datetimestruct *dts, int seconds)
     int minutes;
 
     dts->sec += seconds;
-    if (dts->sec < 0) {
-        minutes = dts->sec / 60;
-        dts->sec = dts->sec % 60;
-        if (dts->sec < 0) {
-            --minutes;
-            dts->sec += 60;
-        }
-        add_minutes_to_datetimestruct(dts, minutes);
-    }
-    else if (dts->sec >= 60) {
-        minutes = dts->sec / 60;
-        dts->sec = dts->sec % 60;
-        add_minutes_to_datetimestruct(dts, minutes);
-    }
+    minutes = extract_unit_32(&dts->sec, 60);
+    add_minutes_to_datetimestruct(dts, minutes);
 }
 
 /*
@@ -2038,28 +2028,13 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes)
 {
     int isleap;
 
-    /* MINUTES */
     dts->min += minutes;
-    while (dts->min < 0) {
-        dts->min += 60;
-        dts->hour--;
-    }
-    while (dts->min >= 60) {
-        dts->min -= 60;
-        dts->hour++;
-    }
 
-    /* HOURS */
-    while (dts->hour < 0) {
-        dts->hour += 24;
-        dts->day--;
-    }
-    while (dts->hour >= 24) {
-        dts->hour -= 24;
-        dts->day++;
-    }
+    /* propagate invalid minutes into hour and day changes */
+    dts->hour += extract_unit_32(&dts->min,  60);
+    dts->day  += extract_unit_32(&dts->hour, 24);
 
-    /* DAYS */
+    /* propagate invalid days into month and year changes */
     if (dts->day < 1) {
         dts->month--;
         if (dts->month < 1) {
@@ -2886,7 +2861,6 @@ convert_datetime_to_pyobject(npy_datetime dt, PyArray_DatetimeMetaData *meta)
 NPY_NO_EXPORT PyObject *
 convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
 {
-    PyObject *ret = NULL;
     npy_timedelta value;
     int days = 0, seconds = 0, useconds = 0;
 
@@ -2916,54 +2890,47 @@ convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
     /* Convert to days/seconds/useconds */
     switch (meta->base) {
         case NPY_FR_W:
-            value *= 7;
+            days = value * 7;
             break;
         case NPY_FR_D:
+            days = value;
             break;
         case NPY_FR_h:
-            seconds = (int)((value % 24) * (60*60));
-            value = value / 24;
+            days = extract_unit_64(&value, 24ULL);
+            seconds = value*60*60;
             break;
         case NPY_FR_m:
-            seconds = (int)(value % (24*60)) * 60;
-            value = value / (24*60);
+            days = extract_unit_64(&value, 60ULL*24);
+            seconds = value*60;
             break;
         case NPY_FR_s:
-            seconds = (int)(value % (24*60*60));
-            value = value / (24*60*60);
+            days = extract_unit_64(&value, 60ULL*60*24);
+            seconds = value;
             break;
         case NPY_FR_ms:
-            useconds = (int)(value % 1000) * 1000;
-            value = value / 1000;
-            seconds = (int)(value % (24*60*60));
-            value = value / (24*60*60);
+            days     = extract_unit_64(&value, 1000ULL*60*60*24);
+            seconds  = extract_unit_64(&value, 1000ULL);
+            useconds = value*1000;
             break;
         case NPY_FR_us:
-            useconds = (int)(value % (1000*1000));
-            value = value / (1000*1000);
-            seconds = (int)(value % (24*60*60));
-            value = value / (24*60*60);
+            days     = extract_unit_64(&value, 1000ULL*1000*60*60*24);
+            seconds  = extract_unit_64(&value, 1000ULL*1000);
+            useconds = value;
             break;
         default:
+            // unreachable, handled by the `if` above
+            assert(NPY_FALSE);
             break;
     }
     /*
-     * 'value' represents days, and seconds/useconds are filled.
-     *
      * If it would overflow the datetime.timedelta days, return a raw int
      */
-    if (value < -999999999 || value > 999999999) {
+    if (days < -999999999 || days > 999999999) {
         return PyLong_FromLongLong(td);
     }
     else {
-        days = (int)value;
-        ret = PyDelta_FromDSU(days, seconds, useconds);
-        if (ret == NULL) {
-            return NULL;
-        }
+        return PyDelta_FromDSU(days, seconds, useconds);
     }
-
-    return ret;
 }
 
 /*
@@ -3128,7 +3095,7 @@ is_any_numpy_datetime_or_timedelta(PyObject *obj)
  */
 NPY_NO_EXPORT int
 convert_pyobjects_to_datetimes(int count,
-                               PyObject **objs, int *type_nums,
+                               PyObject **objs, const int *type_nums,
                                NPY_CASTING casting,
                                npy_int64 *out_values,
                                PyArray_DatetimeMetaData *inout_meta)
diff --git a/numpy/core/src/multiarray/datetime_busday.c b/numpy/core/src/multiarray/datetime_busday.c
index c04a6c125..cdeb65d0e 100644
--- a/numpy/core/src/multiarray/datetime_busday.c
+++ b/numpy/core/src/multiarray/datetime_busday.c
@@ -48,7 +48,7 @@ get_day_of_week(npy_datetime date)
  */
 static int
 is_holiday(npy_datetime date,
-            npy_datetime *holidays_begin, npy_datetime *holidays_end)
+            npy_datetime *holidays_begin, const npy_datetime *holidays_end)
 {
     npy_datetime *trial;
 
@@ -88,7 +88,7 @@ is_holiday(npy_datetime date,
  */
 static npy_datetime *
 find_earliest_holiday_on_or_after(npy_datetime date,
-            npy_datetime *holidays_begin, npy_datetime *holidays_end)
+            npy_datetime *holidays_begin, const npy_datetime *holidays_end)
 {
     npy_datetime *trial;
 
@@ -127,7 +127,7 @@ find_earliest_holiday_on_or_after(npy_datetime date,
  */
 static npy_datetime *
 find_earliest_holiday_after(npy_datetime date,
-            npy_datetime *holidays_begin, npy_datetime *holidays_end)
+            npy_datetime *holidays_begin, const npy_datetime *holidays_end)
 {
     npy_datetime *trial;
 
@@ -159,7 +159,7 @@ static int
 apply_business_day_roll(npy_datetime date, npy_datetime *out,
                     int *out_day_of_week,
                     NPY_BUSDAY_ROLL roll,
-                    npy_bool *weekmask,
+                    const npy_bool *weekmask,
                     npy_datetime *holidays_begin, npy_datetime *holidays_end)
 {
     int day_of_week;
@@ -361,7 +361,7 @@ apply_business_day_offset(npy_datetime date, npy_int64 offset,
 static int
 apply_business_day_count(npy_datetime date_begin, npy_datetime date_end,
                     npy_int64 *out,
-                    npy_bool *weekmask, int busdays_in_weekmask,
+                    const npy_bool *weekmask, int busdays_in_weekmask,
                     npy_datetime *holidays_begin, npy_datetime *holidays_end)
 {
     npy_int64 count, whole_weeks;
@@ -722,7 +722,7 @@ finish:
  */
 NPY_NO_EXPORT PyArrayObject *
 is_business_day(PyArrayObject *dates, PyArrayObject *out,
-                    npy_bool *weekmask, int busdays_in_weekmask,
+                    const npy_bool *weekmask, int busdays_in_weekmask,
                     npy_datetime *holidays_begin, npy_datetime *holidays_end)
 {
     PyArray_DatetimeMetaData temp_meta;
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index c7db092e6..23d140cf6 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -149,7 +149,7 @@ array_set_typeDict(PyObject *NPY_UNUSED(ignored), PyObject *args)
                              arg == '|' || arg == '=')
 
 static int
-_check_for_commastring(char *type, Py_ssize_t len)
+_check_for_commastring(const char *type, Py_ssize_t len)
 {
     Py_ssize_t i;
     int sqbracket;
@@ -1385,7 +1385,6 @@ NPY_NO_EXPORT int
 PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
 {
     int check_num = NPY_NOTYPE + 10;
-    PyObject *item;
     int elsize = 0;
     char endian = '=';
 
@@ -1664,16 +1663,22 @@ finish:
         PyErr_Clear();
         /* Now check to see if the object is registered in typeDict */
         if (typeDict != NULL) {
-            item = PyDict_GetItem(typeDict, obj);
+            PyObject *item = NULL;
 #if defined(NPY_PY3K)
-            if (!item && PyBytes_Check(obj)) {
+            if (PyBytes_Check(obj)) {
                 PyObject *tmp;
                 tmp = PyUnicode_FromEncodedObject(obj, "ascii", "strict");
-                if (tmp != NULL) {
-                    item = PyDict_GetItem(typeDict, tmp);
-                    Py_DECREF(tmp);
+                if (tmp == NULL) {
+                    goto fail;
                 }
+                item = PyDict_GetItem(typeDict, tmp);
+                Py_DECREF(tmp);
+            }
+            else {
+                item = PyDict_GetItem(typeDict, obj);
             }
+#else
+            item = PyDict_GetItem(typeDict, obj);
 #endif
             if (item) {
                 /* Check for a deprecated Numeric-style typecode */
@@ -3277,7 +3282,7 @@ arraydescr_richcompare(PyArray_Descr *self, PyObject *other, int cmp_op)
 }
 
 static int
-descr_nonzero(PyObject *self)
+descr_nonzero(PyObject *NPY_UNUSED(self))
 {
     /* `bool(np.dtype(...)) == True` for all dtypes. Needed to override default
      * nonzero implementation, which checks if `len(object) > 0`. */
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index a90416a40..ef0dd4a01 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -3337,7 +3337,7 @@ get_decsrcref_transfer_function(int aligned,
     /* If there are subarrays, need to wrap it */
     else if (PyDataType_HASSUBARRAY(src_dtype)) {
         PyArray_Dims src_shape = {NULL, -1};
-        npy_intp src_size = 1;
+        npy_intp src_size;
         PyArray_StridedUnaryOp *stransfer;
         NpyAuxData *data;
 
diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c
index bed92403f..116e37ce5 100644
--- a/numpy/core/src/multiarray/getset.c
+++ b/numpy/core/src/multiarray/getset.c
@@ -190,7 +190,7 @@ array_strides_set(PyArrayObject *self, PyObject *obj)
 
 
 static PyObject *
-array_priority_get(PyArrayObject *self)
+array_priority_get(PyArrayObject *NPY_UNUSED(self))
 {
     return PyFloat_FromDouble(NPY_PRIORITY);
 }
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 762563eb5..a6ac902d3 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -1336,7 +1336,11 @@ PyArray_ArgPartition(PyArrayObject *op, PyArrayObject *ktharray, int axis,
     PyArray_ArgSortFunc *argsort;
     PyObject *ret;
 
-    if (which < 0 || which >= NPY_NSELECTS) {
+    /*
+     * As a C-exported function, enum NPY_SELECTKIND loses its enum property
+     * Check the values to make sure they are in range
+     */
+    if ((int)which < 0 || (int)which >= NPY_NSELECTS) {
         PyErr_SetString(PyExc_ValueError,
                         "not a valid partition kind");
         return NULL;
@@ -1456,8 +1460,8 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
 
     /* Now we can check the axis */
     nd = PyArray_NDIM(mps[0]);
-    if ((nd == 0) || (PyArray_SIZE(mps[0]) == 1)) {
-        /* single element case */
+    if ((nd == 0) || (PyArray_SIZE(mps[0]) <= 1)) {
+        /* empty/single element case */
         ret = (PyArrayObject *)PyArray_NewFromDescr(
             &PyArray_Type, PyArray_DescrFromType(NPY_INTP),
             PyArray_NDIM(mps[0]), PyArray_DIMS(mps[0]), NULL, NULL,
@@ -1466,7 +1470,9 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
         if (ret == NULL) {
             goto fail;
         }
-        *((npy_intp *)(PyArray_DATA(ret))) = 0;
+        if (PyArray_SIZE(mps[0]) > 0) {
+            *((npy_intp *)(PyArray_DATA(ret))) = 0;
+        }
         goto finish;
     }
     if (check_and_adjust_axis(&axis, nd) < 0) {
@@ -1516,19 +1522,28 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
         char *valbuffer, *indbuffer;
         int *swaps;
 
-        if (N == 0 || maxelsize == 0 || sizeof(npy_intp) == 0) {
-            goto fail;
+        assert(N > 0);  /* Guaranteed and assumed by indbuffer */
+        npy_intp valbufsize = N * maxelsize;
+        if (NPY_UNLIKELY(valbufsize) == 0) {
+            valbufsize = 1;  /* Ensure allocation is not empty */
         }
-        valbuffer = PyDataMem_NEW(N * maxelsize);
+
+        valbuffer = PyDataMem_NEW(valbufsize);
         if (valbuffer == NULL) {
             goto fail;
         }
         indbuffer = PyDataMem_NEW(N * sizeof(npy_intp));
         if (indbuffer == NULL) {
+            PyDataMem_FREE(valbuffer);
+            goto fail;
+        }
+        swaps = malloc(NPY_LIKELY(n > 0) ? n * sizeof(int) : 1);
+        if (swaps == NULL) {
+            PyDataMem_FREE(valbuffer);
             PyDataMem_FREE(indbuffer);
             goto fail;
         }
-        swaps = malloc(n*sizeof(int));
+
         for (j = 0; j < n; j++) {
             swaps[j] = PyArray_ISBYTESWAPPED(mps[j]);
         }
@@ -1557,8 +1572,8 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
 #else
                 if (rcode < 0) {
 #endif
-                    npy_free_cache(valbuffer, N * maxelsize);
-                    npy_free_cache(indbuffer, N * sizeof(npy_intp));
+                    PyDataMem_FREE(valbuffer);
+                    PyDataMem_FREE(indbuffer);
                     free(swaps);
                     goto fail;
                 }
@@ -2464,7 +2479,7 @@ finish:
  * array of values, which must be of length PyArray_NDIM(self).
  */
 NPY_NO_EXPORT PyObject *
-PyArray_MultiIndexGetItem(PyArrayObject *self, npy_intp *multi_index)
+PyArray_MultiIndexGetItem(PyArrayObject *self, const npy_intp *multi_index)
 {
     int idim, ndim = PyArray_NDIM(self);
     char *data = PyArray_DATA(self);
@@ -2492,7 +2507,7 @@ PyArray_MultiIndexGetItem(PyArrayObject *self, npy_intp *multi_index)
  * Returns 0 on success, -1 on failure.
  */
 NPY_NO_EXPORT int
-PyArray_MultiIndexSetItem(PyArrayObject *self, npy_intp *multi_index,
+PyArray_MultiIndexSetItem(PyArrayObject *self, const npy_intp *multi_index,
                                                 PyObject *obj)
 {
     int idim, ndim = PyArray_NDIM(self);
diff --git a/numpy/core/src/multiarray/item_selection.h b/numpy/core/src/multiarray/item_selection.h
index 90bb5100d..2276b4db7 100644
--- a/numpy/core/src/multiarray/item_selection.h
+++ b/numpy/core/src/multiarray/item_selection.h
@@ -15,7 +15,7 @@ count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides);
  * array of values, which must be of length PyArray_NDIM(self).
  */
 NPY_NO_EXPORT PyObject *
-PyArray_MultiIndexGetItem(PyArrayObject *self, npy_intp *multi_index);
+PyArray_MultiIndexGetItem(PyArrayObject *self, const npy_intp *multi_index);
 
 /*
  * Sets a single item in the array, based on a single multi-index
@@ -24,7 +24,7 @@ PyArray_MultiIndexGetItem(PyArrayObject *self, npy_intp *multi_index);
  * Returns 0 on success, -1 on failure.
  */
 NPY_NO_EXPORT int
-PyArray_MultiIndexSetItem(PyArrayObject *self, npy_intp *multi_index,
+PyArray_MultiIndexSetItem(PyArrayObject *self, const npy_intp *multi_index,
                                                 PyObject *obj);
 
 #endif
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 0d7679fe7..e66bb36aa 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -98,7 +98,7 @@ parse_index_entry(PyObject *op, npy_intp *step_size,
 
 /* get the dataptr from its current coordinates for simple iterator */
 static char*
-get_ptr_simple(PyArrayIterObject* iter, npy_intp *coordinates)
+get_ptr_simple(PyArrayIterObject* iter, const npy_intp *coordinates)
 {
     npy_intp i;
     char *ret;
@@ -840,7 +840,6 @@ iter_ass_subscript(PyArrayIterObject *self, PyObject *ind, PyObject *val)
         if (check_and_adjust_index(&start, self->size, -1, NULL) < 0) {
             goto finish;
         }
-        retval = 0;
         PyArray_ITER_GOTO1D(self, start);
         retval = type->f->setitem(val, self->dataptr, self->ao);
         PyArray_ITER_RESET(self);
@@ -1666,7 +1665,7 @@ static char* _set_constant(PyArrayNeighborhoodIterObject* iter,
 
 /* set the dataptr from its current coordinates */
 static char*
-get_ptr_constant(PyArrayIterObject* _iter, npy_intp *coordinates)
+get_ptr_constant(PyArrayIterObject* _iter, const npy_intp *coordinates)
 {
     int i;
     npy_intp bd, _coordinates[NPY_MAXDIMS];
@@ -1721,7 +1720,7 @@ __npy_pos_remainder(npy_intp i, npy_intp n)
 
 /* set the dataptr from its current coordinates */
 static char*
-get_ptr_mirror(PyArrayIterObject* _iter, npy_intp *coordinates)
+get_ptr_mirror(PyArrayIterObject* _iter, const npy_intp *coordinates)
 {
     int i;
     npy_intp bd, _coordinates[NPY_MAXDIMS], lb;
@@ -1755,7 +1754,7 @@ __npy_euclidean_division(npy_intp i, npy_intp n)
     _coordinates[c] = lb + __npy_euclidean_division(bd, p->limits_sizes[c]);
 
 static char*
-get_ptr_circular(PyArrayIterObject* _iter, npy_intp *coordinates)
+get_ptr_circular(PyArrayIterObject* _iter, const npy_intp *coordinates)
 {
     int i;
     npy_intp bd, _coordinates[NPY_MAXDIMS], lb;
@@ -1777,7 +1776,7 @@ get_ptr_circular(PyArrayIterObject* _iter, npy_intp *coordinates)
  * A Neighborhood Iterator object.
 */
 NPY_NO_EXPORT PyObject*
-PyArray_NeighborhoodIterNew(PyArrayIterObject *x, npy_intp *bounds,
+PyArray_NeighborhoodIterNew(PyArrayIterObject *x, const npy_intp *bounds,
                             int mode, PyArrayObject* fill)
 {
     int i;
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 9bb85e320..247864775 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -176,7 +176,7 @@ unpack_tuple(PyTupleObject *index, PyObject **result, npy_intp result_n)
 
 /* Unpack a single scalar index, taking a new reference to match unpack_tuple */
 static NPY_INLINE npy_intp
-unpack_scalar(PyObject *index, PyObject **result, npy_intp result_n)
+unpack_scalar(PyObject *index, PyObject **result, npy_intp NPY_UNUSED(result_n))
 {
     Py_INCREF(index);
     result[0] = index;
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 79c60aa2e..e5845f2f6 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -1051,7 +1051,7 @@ any_array_ufunc_overrides(PyObject *args, PyObject *kwds)
 
 
 NPY_NO_EXPORT PyObject *
-array_ufunc(PyArrayObject *self, PyObject *args, PyObject *kwds)
+array_ufunc(PyArrayObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
 {
     PyObject *ufunc, *method_name, *normal_args, *ufunc_method;
     PyObject *result = NULL;
@@ -1100,7 +1100,7 @@ cleanup:
 }
 
 static PyObject *
-array_function(PyArrayObject *self, PyObject *c_args, PyObject *c_kwds)
+array_function(PyArrayObject *NPY_UNUSED(self), PyObject *c_args, PyObject *c_kwds)
 {
     PyObject *func, *types, *args, *kwargs, *result;
     static char *kwlist[] = {"func", "types", "args", "kwargs", NULL};
@@ -1179,7 +1179,7 @@ array_resize(PyArrayObject *self, PyObject *args, PyObject *kwds)
         return NULL;
     }
 
-    ret = PyArray_Resize(self, &newshape, refcheck, NPY_CORDER);
+    ret = PyArray_Resize(self, &newshape, refcheck, NPY_ANYORDER);
     npy_free_cache_dim_obj(newshape);
     if (ret == NULL) {
         return NULL;
@@ -1732,7 +1732,7 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
 }
 
 static PyObject *
-array_reduce_ex_regular(PyArrayObject *self, int protocol)
+array_reduce_ex_regular(PyArrayObject *self, int NPY_UNUSED(protocol))
 {
     PyObject *subclass_array_reduce = NULL;
     PyObject *ret;
@@ -1861,7 +1861,7 @@ array_reduce_ex(PyArrayObject *self, PyObject *args)
         PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
         (PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
          ((PyObject*)self)->ob_type != &PyArray_Type) ||
-        PyDataType_ISUNSIZED(descr)) {
+        descr->elsize == 0) {
         /* The PickleBuffer class from version 5 of the pickle protocol
          * can only be used for arrays backed by a contiguous data buffer.
          * For all other cases we fallback to the generic array_reduce
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index bef978c94..441567049 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -286,7 +286,8 @@ PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int nd,
  * Convert to a 1D C-array
  */
 NPY_NO_EXPORT int
-PyArray_As1D(PyObject **op, char **ptr, int *d1, int typecode)
+PyArray_As1D(PyObject **NPY_UNUSED(op), char **NPY_UNUSED(ptr),
+             int *NPY_UNUSED(d1), int NPY_UNUSED(typecode))
 {
     /* 2008-07-14, 1.5 */
     PyErr_SetString(PyExc_NotImplementedError,
@@ -298,7 +299,8 @@ PyArray_As1D(PyObject **op, char **ptr, int *d1, int typecode)
  * Convert to a 2D C-array
  */
 NPY_NO_EXPORT int
-PyArray_As2D(PyObject **op, char ***ptr, int *d1, int *d2, int typecode)
+PyArray_As2D(PyObject **NPY_UNUSED(op), char ***NPY_UNUSED(ptr),
+             int *NPY_UNUSED(d1), int *NPY_UNUSED(d2), int NPY_UNUSED(typecode))
 {
     /* 2008-07-14, 1.5 */
     PyErr_SetString(PyExc_NotImplementedError,
@@ -1560,7 +1562,8 @@ _array_fromobject(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kws)
     PyArrayObject *oparr = NULL, *ret = NULL;
     npy_bool subok = NPY_FALSE;
     npy_bool copy = NPY_TRUE;
-    int ndmin = 0, nd;
+    int nd;
+    npy_intp ndmin = 0;
     PyArray_Descr *type = NULL;
     PyArray_Descr *oldtype = NULL;
     NPY_ORDER order = NPY_KEEPORDER;
@@ -1631,12 +1634,10 @@ _array_fromobject(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kws)
                 }
             }
 
-            /* copy=False with default dtype, order and ndim */
-            if (STRIDING_OK(oparr, order)) {
-                ret = oparr;
-                Py_INCREF(ret);
-                goto finish;
-            }
+            /* copy=False with default dtype, order (any is OK) and ndim */
+            ret = oparr;
+            Py_INCREF(ret);
+            goto finish;
         }
     }
 
@@ -3781,7 +3782,7 @@ _vec_string_no_args(PyArrayObject* char_array,
 }
 
 static PyObject *
-_vec_string(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
+_vec_string(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUSED(kwds))
 {
     PyArrayObject* char_array = NULL;
     PyArray_Descr *type;
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 18ca127e1..db0bfcece 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -1628,15 +1628,12 @@ npyiter_coalesce_axes(NpyIter *iter)
     npy_intp istrides, nstrides = NAD_NSTRIDES();
     NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
     npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-    NpyIter_AxisData *ad_compress;
+    NpyIter_AxisData *ad_compress = axisdata;
     npy_intp new_ndim = 1;
 
     /* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */
     NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);
 
-    axisdata = NIT_AXISDATA(iter);
-    ad_compress = axisdata;
-
     for (idim = 0; idim < ndim-1; ++idim) {
         int can_coalesce = 1;
         npy_intp shape0 = NAD_SHAPE(ad_compress);
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 3b3635afe..d40836dc2 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -24,7 +24,7 @@ static int
 npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags);
 static int
 npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
-                        npy_intp *itershape);
+                        const npy_intp *itershape);
 static int
 npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
                        int oa_ndim);
@@ -55,7 +55,7 @@ npyiter_check_casting(int nop, PyArrayObject **op,
 static int
 npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
                     char **op_dataptr,
-                    npy_uint32 *op_flags, int **op_axes,
+                    const npy_uint32 *op_flags, int **op_axes,
                     npy_intp *itershape);
 static void
 npyiter_replace_axisdata(NpyIter *iter, int iop,
@@ -74,23 +74,23 @@ static void
 npyiter_find_best_axis_ordering(NpyIter *iter);
 static PyArray_Descr *
 npyiter_get_common_dtype(int nop, PyArrayObject **op,
-                        npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
+                        const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
                         PyArray_Descr **op_request_dtypes,
                         int only_inputs);
 static PyArrayObject *
 npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
                 npy_uint32 flags, npyiter_opitflags *op_itflags,
                 int op_ndim, npy_intp *shape,
-                PyArray_Descr *op_dtype, int *op_axes);
+                PyArray_Descr *op_dtype, const int *op_axes);
 static int
 npyiter_allocate_arrays(NpyIter *iter,
                         npy_uint32 flags,
                         PyArray_Descr **op_dtype, PyTypeObject *subtype,
-                        npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
+                        const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
                         int **op_axes);
 static void
 npyiter_get_priority_subtype(int nop, PyArrayObject **op,
-                            npyiter_opitflags *op_itflags,
+                            const npyiter_opitflags *op_itflags,
                             double *subtype_priority, PyTypeObject **subtype);
 static int
 npyiter_allocate_transfer_functions(NpyIter *iter);
@@ -787,7 +787,7 @@ npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
 
 static int
 npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
-                        npy_intp *itershape)
+                        const npy_intp *itershape)
 {
     char axes_dupcheck[NPY_MAXDIMS];
     int iop, idim;
@@ -1423,7 +1423,7 @@ check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
 static int
 npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
                     char **op_dataptr,
-                    npy_uint32 *op_flags, int **op_axes,
+                    const npy_uint32 *op_flags, int **op_axes,
                     npy_intp *itershape)
 {
     npy_uint32 itflags = NIT_ITFLAGS(iter);
@@ -2409,7 +2409,7 @@ npyiter_find_best_axis_ordering(NpyIter *iter)
  */
 static PyArray_Descr *
 npyiter_get_common_dtype(int nop, PyArrayObject **op,
-                        npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
+                        const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
                         PyArray_Descr **op_request_dtypes,
                         int only_inputs)
 {
@@ -2477,7 +2477,7 @@ static PyArrayObject *
 npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
                 npy_uint32 flags, npyiter_opitflags *op_itflags,
                 int op_ndim, npy_intp *shape,
-                PyArray_Descr *op_dtype, int *op_axes)
+                PyArray_Descr *op_dtype, const int *op_axes)
 {
     npy_uint32 itflags = NIT_ITFLAGS(iter);
     int idim, ndim = NIT_NDIM(iter);
@@ -2706,7 +2706,7 @@ static int
 npyiter_allocate_arrays(NpyIter *iter,
                         npy_uint32 flags,
                         PyArray_Descr **op_dtype, PyTypeObject *subtype,
-                        npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
+                        const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
                         int **op_axes)
 {
     npy_uint32 itflags = NIT_ITFLAGS(iter);
@@ -3109,7 +3109,7 @@ npyiter_allocate_arrays(NpyIter *iter,
  */
 static void
 npyiter_get_priority_subtype(int nop, PyArrayObject **op,
-                            npyiter_opitflags *op_itflags,
+                            const npyiter_opitflags *op_itflags,
                             double *subtype_priority,
                             PyTypeObject **subtype)
 {
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index ffea08bb3..4b9d41aa4 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -82,7 +82,8 @@ static int npyiter_cache_values(NewNpyArrayIterObject *self)
 }
 
 static PyObject *
-npyiter_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
+npyiter_new(PyTypeObject *subtype, PyObject *NPY_UNUSED(args),
+            PyObject *NPY_UNUSED(kwds))
 {
     NewNpyArrayIterObject *self;
 
@@ -535,7 +536,7 @@ try_single_dtype:
 }
 
 static int
-npyiter_convert_op_axes(PyObject *op_axes_in, npy_intp nop,
+npyiter_convert_op_axes(PyObject *op_axes_in, int nop,
                         int **op_axes, int *oa_ndim)
 {
     PyObject *a;
@@ -2365,7 +2366,7 @@ npyiter_close(NewNpyArrayIterObject *self)
 }
 
 static PyObject *
-npyiter_exit(NewNpyArrayIterObject *self, PyObject *args)
+npyiter_exit(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
 {
     /* even if called via exception handling, writeback any data */
     return npyiter_close(self);
diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c
index 0ceb994ef..dabc866ff 100644
--- a/numpy/core/src/multiarray/number.c
+++ b/numpy/core/src/multiarray/number.c
@@ -391,7 +391,8 @@ array_matrix_multiply(PyArrayObject *m1, PyObject *m2)
 }
 
 static PyObject *
-array_inplace_matrix_multiply(PyArrayObject *m1, PyObject *m2)
+array_inplace_matrix_multiply(
+        PyArrayObject *NPY_UNUSED(m1), PyObject *NPY_UNUSED(m2))
 {
     PyErr_SetString(PyExc_TypeError,
                     "In-place matrix multiplication is not (yet) supported. "
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 34839b866..9adca6773 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -4492,6 +4492,36 @@ initialize_numeric_types(void)
 
     PyArrayIter_Type.tp_iter = PyObject_SelfIter;
     PyArrayMapIter_Type.tp_iter = PyObject_SelfIter;
+
+    /*
+     * Give types different names when they are the same size (gh-9799).
+     * `np.intX` always refers to the first int of that size in the sequence
+     * `['LONG', 'LONGLONG', 'INT', 'SHORT', 'BYTE']`.
+     */
+#if (NPY_SIZEOF_BYTE == NPY_SIZEOF_SHORT)
+    PyByteArrType_Type.tp_name = "numpy.byte";
+    PyUByteArrType_Type.tp_name = "numpy.ubyte";
+#endif
+#if (NPY_SIZEOF_SHORT == NPY_SIZEOF_INT)
+    PyShortArrType_Type.tp_name = "numpy.short";
+    PyUShortArrType_Type.tp_name = "numpy.ushort";
+#endif
+#if (NPY_SIZEOF_INT == NPY_SIZEOF_LONG)
+    PyIntArrType_Type.tp_name = "numpy.intc";
+    PyUIntArrType_Type.tp_name = "numpy.uintc";
+#endif
+#if (NPY_SIZEOF_LONGLONG == NPY_SIZEOF_LONG)
+    PyLongLongArrType_Type.tp_name = "numpy.longlong";
+    PyULongLongArrType_Type.tp_name = "numpy.ulonglong";
+#endif
+
+    /*
+    Do the same for longdouble
+    */
+#if (NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE)
+    PyLongDoubleArrType_Type.tp_name = "numpy.longdouble";
+    PyCLongDoubleArrType_Type.tp_name = "numpy.clongdouble";
+#endif
 }
 
 typedef struct {
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 30820737e..4e31f003b 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -26,7 +26,7 @@ static int
 _fix_unknown_dimension(PyArray_Dims *newshape, PyArrayObject *arr);
 
 static int
-_attempt_nocopy_reshape(PyArrayObject *self, int newnd, npy_intp* newdims,
+_attempt_nocopy_reshape(PyArrayObject *self, int newnd, const npy_intp *newdims,
                         npy_intp *newstrides, int is_f_order);
 
 static void
@@ -40,11 +40,11 @@ _putzero(char *optr, PyObject *zero, PyArray_Descr *dtype);
  */
 NPY_NO_EXPORT PyObject *
 PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
-               NPY_ORDER order)
+               NPY_ORDER NPY_UNUSED(order))
 {
     npy_intp oldnbytes, newnbytes;
     npy_intp oldsize, newsize;
-    int new_nd=newshape->len, k, n, elsize;
+    int new_nd=newshape->len, k, elsize;
     int refcnt;
     npy_intp* new_dimensions=newshape->ptr;
     npy_intp new_strides[NPY_MAXDIMS];
@@ -136,8 +136,8 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
             PyObject *zero = PyInt_FromLong(0);
             char *optr;
             optr = PyArray_BYTES(self) + oldnbytes;
-            n = newsize - oldsize;
-            for (k = 0; k < n; k++) {
+            npy_intp n_new = newsize - oldsize;
+            for (npy_intp i = 0; i < n_new; i++) {
                 _putzero((char *)optr, zero, PyArray_DESCR(self));
                 optr += elsize;
             }
@@ -361,7 +361,7 @@ _putzero(char *optr, PyObject *zero, PyArray_Descr *dtype)
  * stride of the next-fastest index.
  */
 static int
-_attempt_nocopy_reshape(PyArrayObject *self, int newnd, npy_intp* newdims,
+_attempt_nocopy_reshape(PyArrayObject *self, int newnd, const npy_intp *newdims,
                         npy_intp *newstrides, int is_f_order)
 {
     int oldnd;
@@ -766,7 +766,7 @@ static int _npy_stride_sort_item_comparator(const void *a, const void *b)
  * [(2, 12), (0, 4), (1, -2)].
  */
 NPY_NO_EXPORT void
-PyArray_CreateSortedStridePerm(int ndim, npy_intp *strides,
+PyArray_CreateSortedStridePerm(int ndim, npy_intp const *strides,
                         npy_stride_sort_item *out_strideperm)
 {
     int i;
@@ -1048,7 +1048,7 @@ build_shape_string(npy_intp n, npy_intp *vals)
  * from a reduction result once its computation is complete.
  */
 NPY_NO_EXPORT void
-PyArray_RemoveAxesInPlace(PyArrayObject *arr, npy_bool *flags)
+PyArray_RemoveAxesInPlace(PyArrayObject *arr, const npy_bool *flags)
 {
     PyArrayObject_fields *fa = (PyArrayObject_fields *)arr;
     npy_intp *shape = fa->dimensions, *strides = fa->strides;
diff --git a/numpy/core/src/npymath/npy_math_complex.c.src b/numpy/core/src/npymath/npy_math_complex.c.src
index dad381232..8c432e483 100644
--- a/numpy/core/src/npymath/npy_math_complex.c.src
+++ b/numpy/core/src/npymath/npy_math_complex.c.src
@@ -40,13 +40,14 @@
  * flag in an efficient way. The flag is IEEE specific. See
  * https://github.com/freebsd/freebsd/blob/4c6378299/lib/msun/src/catrig.c#L42
  */
+#if !defined(HAVE_CACOSF) || !defined(HAVE_CACOSL) || !defined(HAVE_CASINHF) || !defined(HAVE_CASINHL)
 #define raise_inexact() do {                        \
     volatile npy_float NPY_UNUSED(junk) = 1 + tiny; \
 } while (0)
 
 
 static const volatile npy_float tiny = 3.9443045e-31f;
-
+#endif
 
 /**begin repeat
  * #type = npy_float, npy_double, npy_longdouble#
@@ -64,9 +65,6 @@ static const volatile npy_float tiny = 3.9443045e-31f;
  * Constants
  *=========================================================*/
 static const @ctype@ c_1@c@ = {1.0@C@, 0.0};
-static const @ctype@ c_half@c@ = {0.5@C@, 0.0};
-static const @ctype@ c_i@c@ = {0.0, 1.0@C@};
-static const @ctype@ c_ihalf@c@ = {0.0, 0.5@C@};
 
 /*==========================================================
  * Helper functions
@@ -76,22 +74,6 @@ static const @ctype@ c_ihalf@c@ = {0.0, 0.5@C@};
  *=========================================================*/
 static NPY_INLINE
 @ctype@
-cadd@c@(@ctype@ a, @ctype@ b)
-{
-    return npy_cpack@c@(npy_creal@c@(a) + npy_creal@c@(b),
-                        npy_cimag@c@(a) + npy_cimag@c@(b));
-}
-
-static NPY_INLINE
-@ctype@
-csub@c@(@ctype@ a, @ctype@ b)
-{
-    return npy_cpack@c@(npy_creal@c@(a) - npy_creal@c@(b),
-                        npy_cimag@c@(a) - npy_cimag@c@(b));
-}
-
-static NPY_INLINE
-@ctype@
 cmul@c@(@ctype@ a, @ctype@ b)
 {
     @type@ ar, ai, br, bi;
@@ -132,20 +114,6 @@ cdiv@c@(@ctype@ a, @ctype@ b)
     }
 }
 
-static NPY_INLINE
-@ctype@
-cneg@c@(@ctype@ a)
-{
-    return npy_cpack@c@(-npy_creal@c@(a), -npy_cimag@c@(a));
-}
-
-static NPY_INLINE
-@ctype@
-cmuli@c@(@ctype@ a)
-{
-    return npy_cpack@c@(-npy_cimag@c@(a), npy_creal@c@(a));
-}
-
 /*==========================================================
  * Custom implementation of missing complex C99 functions
  *=========================================================*/
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index fa820baac..18b6d1434 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -716,3 +716,44 @@ npy_@func@@c@(@type@ a, @type@ b)
     return npy_@func@u@c@(a < 0 ? -a : a, b < 0 ? -b : b);
 }
 /**end repeat**/
+
+/* Unlike LCM and GCD, we need byte and short variants for the shift operators,
+ * since the result is dependent on the width of the type
+ */
+/**begin repeat
+ *
+ * #type = byte, short, int, long, longlong#
+ * #c = hh,h,,l,ll#
+ */
+/**begin repeat1
+ *
+ * #u         = u,#
+ * #is_signed = 0,1#
+ */
+NPY_INPLACE npy_@u@@type@
+npy_lshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
+{
+    if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) {
+        return a << b;
+    }
+    else {
+        return 0;
+    }
+}
+NPY_INPLACE npy_@u@@type@
+npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
+{
+    if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) {
+        return a >> b;
+    }
+#if @is_signed@
+    else if (a < 0) {
+        return (npy_@u@@type@)-1;  /* preserve the sign bit */
+    }
+#endif
+    else {
+        return 0;
+    }
+}
+/**end repeat1**/
+/**end repeat**/
diff --git a/numpy/core/src/npysort/radixsort.c.src b/numpy/core/src/npysort/radixsort.c.src
index c90b06974..72887d7e4 100644
--- a/numpy/core/src/npysort/radixsort.c.src
+++ b/numpy/core/src/npysort/radixsort.c.src
@@ -198,9 +198,9 @@ aradixsort_@suff@(void *start, npy_intp* tosort, npy_intp num, void *NPY_UNUSED(
         return 0;
     }
 
-    k1 = KEY_OF(arr[0]);
+    k1 = KEY_OF(arr[tosort[0]]);
     for (npy_intp i = 1; i < num; i++) {
-        k2 = KEY_OF(arr[i]);
+        k2 = KEY_OF(arr[tosort[i]]);
         if (k1 > k2) {
             all_sorted = 0;
             break;
diff --git a/numpy/core/src/umath/_rational_tests.c.src b/numpy/core/src/umath/_rational_tests.c.src
index 9e74845df..615e395c7 100644
--- a/numpy/core/src/umath/_rational_tests.c.src
+++ b/numpy/core/src/umath/_rational_tests.c.src
@@ -539,11 +539,11 @@ static PyObject*
 pyrational_str(PyObject* self) {
     rational x = ((PyRational*)self)->r;
     if (d(x)!=1) {
-        return PyString_FromFormat(
+        return PyUString_FromFormat(
                 "%ld/%ld",(long)x.n,(long)d(x));
     }
     else {
-        return PyString_FromFormat(
+        return PyUString_FromFormat(
                 "%ld",(long)x.n);
     }
 }
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 2028a5712..d948e25bb 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -699,6 +699,7 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
  * #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double,
  *          npy_double, npy_double, npy_double, npy_double#
  * #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0#
+ * #c = hh,uhh,h,uh,,u,l,ul,ll,ull#
  */
 
 #define @TYPE@_floor_divide @TYPE@_divide
@@ -776,16 +777,15 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
 
 /**begin repeat2
  * Arithmetic
- * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor,
- *          left_shift, right_shift#
- * #OP = +, -,*, &, |, ^, <<, >>#
+ * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor#
+ * #OP = +, -, *, &, |, ^#
  */
 
 #if @CHK@
 NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
 @TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
-    if(IS_BINARY_REDUCE) {
+    if (IS_BINARY_REDUCE) {
         BINARY_REDUCE_LOOP(@type@) {
             io1 @OP@= *(@type@ *)ip2;
         }
@@ -799,6 +799,47 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
 
 /**end repeat2**/
 
+/*
+ * Arithmetic bit shift operations.
+ *
+ * Intel hardware masks bit shift values, so large shifts wrap around
+ * and can produce surprising results. The special handling ensures that
+ * behavior is independent of compiler or hardware.
+ * TODO: We could implement consistent behavior for negative shifts,
+ *       which is undefined in C.
+ */
+
+#define INT_left_shift_needs_clear_floatstatus
+#define UINT_left_shift_needs_clear_floatstatus
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_left_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps,
+                  void *NPY_UNUSED(func))
+{
+    BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2));
+
+#ifdef @TYPE@_left_shift_needs_clear_floatstatus
+    // For some reason, our macOS CI sets an "invalid" flag here, but only
+    // for some types.
+    npy_clear_floatstatus_barrier((char*)dimensions);
+#endif
+}
+
+#undef INT_left_shift_needs_clear_floatstatus
+#undef UINT_left_shift_needs_clear_floatstatus
+
+NPY_NO_EXPORT
+#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift
+NPY_GCC_OPT_3
+#endif
+void
+@TYPE@_right_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps,
+                   void *NPY_UNUSED(func))
+{
+    BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2));
+}
+
+
 /**begin repeat2
  * #kind = equal, not_equal, greater, greater_equal, less, less_equal,
  *         logical_and, logical_or#
@@ -1594,6 +1635,30 @@ NPY_NO_EXPORT void
 /**end repeat**/
 
 /**begin repeat
+ *  #func = rint, ceil, floor, trunc#
+ *  #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
+ */
+
+/**begin repeat1
+*  #TYPE = FLOAT, DOUBLE#
+*  #type = npy_float, npy_double#
+*  #typesub = f, #
+*/
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
+{
+    UNARY_LOOP {
+        const @type@ in1 = *(@type@ *)ip1;
+        *(@type@ *)op1 = @scalarf@@typesub@(in1);
+    }
+}
+
+
+/**end repeat1**/
+/**end repeat**/
+
+/**begin repeat
  *  #func = sin, cos, exp, log#
  *  #scalarf = npy_sinf, npy_cosf, npy_expf, npy_logf#
  */
@@ -1616,6 +1681,78 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
  */
 
 /**begin repeat1
+ *  #TYPE = FLOAT, DOUBLE#
+ *  #type = npy_float, npy_double#
+ *  #typesub = f, #
+ */
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_sqrt_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
+{
+    if (!run_unary_@isa@_sqrt_@TYPE@(args, dimensions, steps)) {
+        UNARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            *(@type@ *)op1 = npy_sqrt@typesub@(in1);
+        }
+    }
+}
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_absolute_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
+{
+    if (!run_unary_@isa@_absolute_@TYPE@(args, dimensions, steps)) {
+        UNARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            const @type@ tmp = in1 > 0 ? in1 : -in1;
+            /* add 0 to clear -0.0 */
+            *((@type@ *)op1) = tmp + 0;
+        }
+    }
+    npy_clear_floatstatus_barrier((char*)dimensions);
+}
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_square_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
+{
+    if (!run_unary_@isa@_square_@TYPE@(args, dimensions, steps)) {
+        UNARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            *(@type@ *)op1 = in1*in1;
+        }
+    }
+}
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_reciprocal_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
+{
+    if (!run_unary_@isa@_reciprocal_@TYPE@(args, dimensions, steps)) {
+        UNARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            *(@type@ *)op1 = 1.0f/in1;
+        }
+    }
+}
+
+/**begin repeat2
+ *  #func = rint, ceil, floor, trunc#
+ *  #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
+ */
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
+{
+    if (!run_unary_@isa@_@func@_@TYPE@(args, dimensions, steps)) {
+        UNARY_LOOP {
+            const @type@ in1 = *(@type@ *)ip1;
+            *(@type@ *)op1 = @scalarf@@typesub@(in1);
+        }
+    }
+}
+
+/**end repeat2**/
+/**end repeat1**/
+
+/**begin repeat1
  *  #func = exp, log#
  *  #scalarf = npy_expf, npy_logf#
  */
@@ -1665,10 +1802,9 @@ FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY
 }
 
 /**end repeat1**/
-
-
 /**end repeat**/
 
+
 /**begin repeat
  * Float types
  *  #type = npy_float, npy_double, npy_longdouble, npy_float#
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index 5070ab38b..e98a1ac3c 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -175,6 +175,19 @@ NPY_NO_EXPORT void
  */
 NPY_NO_EXPORT void
 @TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
+/**begin repeat1
+ * #isa = avx512f, fma#
+ */
+
+/**begin repeat2
+ * #func = sqrt, absolute, square, reciprocal#
+ */
+NPY_NO_EXPORT void
+@TYPE@_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
+/**end repeat2**/
+/**end repeat1**/
 /**end repeat**/
 
 /**begin repeat
@@ -194,6 +207,26 @@ FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY
 /**end repeat**/
 
 /**begin repeat
+ *  #func = rint, ceil, floor, trunc#
+ */
+
+/**begin repeat1
+*  #TYPE = FLOAT, DOUBLE#
+*/
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data));
+
+/**begin repeat2
+ * #isa = avx512f, fma#
+ */
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data));
+/**end repeat2**/
+/**end repeat1**/
+/**end repeat**/
+
+/**begin repeat
  * Float types
  *  #TYPE = HALF, FLOAT, DOUBLE, LONGDOUBLE#
  *  #c = f, f, , l#
diff --git a/numpy/core/src/umath/matmul.c.src b/numpy/core/src/umath/matmul.c.src
index 480c0c72f..b5204eca5 100644
--- a/numpy/core/src/umath/matmul.c.src
+++ b/numpy/core/src/umath/matmul.c.src
@@ -196,16 +196,14 @@ NPY_NO_EXPORT void
  *          FLOAT, DOUBLE, HALF,
  *          CFLOAT, CDOUBLE, CLONGDOUBLE,
  *          UBYTE, USHORT, UINT, ULONG, ULONGLONG,
- *          BYTE, SHORT, INT, LONG, LONGLONG,
- *          BOOL#
+ *          BYTE, SHORT, INT, LONG, LONGLONG#
  *  #typ = npy_longdouble,
  *         npy_float,npy_double,npy_half,
  *         npy_cfloat, npy_cdouble, npy_clongdouble,
  *         npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong,
- *         npy_byte, npy_short, npy_int, npy_long, npy_longlong,
- *         npy_bool#
- * #IS_COMPLEX = 0, 0, 0, 0, 1, 1, 1, 0*11#
- * #IS_HALF = 0, 0, 0, 1, 0*14#
+ *         npy_byte, npy_short, npy_int, npy_long, npy_longlong#
+ * #IS_COMPLEX = 0, 0, 0, 0, 1, 1, 1, 0*10#
+ * #IS_HALF = 0, 0, 0, 1, 0*13#
  */
 
 NPY_NO_EXPORT void
@@ -266,7 +264,44 @@ NPY_NO_EXPORT void
 }
 
 /**end repeat**/
+NPY_NO_EXPORT void
+BOOL_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n,
+                           void *_ip2, npy_intp is2_n, npy_intp is2_p,
+                           void *_op, npy_intp os_m, npy_intp os_p,
+                           npy_intp dm, npy_intp dn, npy_intp dp)
+                           
+{
+    npy_intp m, n, p;
+    npy_intp ib2_p, ob_p;
+    char *ip1 = (char *)_ip1, *ip2 = (char *)_ip2, *op = (char *)_op;
 
+    ib2_p = is2_p * dp;
+    ob_p  = os_p * dp;
+
+    for (m = 0; m < dm; m++) {
+        for (p = 0; p < dp; p++) {
+            char *ip1tmp = ip1;
+            char *ip2tmp = ip2;
+            *(npy_bool *)op = NPY_FALSE;
+            for (n = 0; n < dn; n++) {
+                npy_bool val1 = (*(npy_bool *)ip1tmp);
+                npy_bool val2 = (*(npy_bool *)ip2tmp);
+                if (val1 != 0 && val2 != 0) {
+                    *(npy_bool *)op = NPY_TRUE;
+                    break;
+                }
+                ip2tmp += is2_n;
+                ip1tmp += is1_n;
+            }
+            op  +=  os_p;
+            ip2 += is2_p;
+        }
+        op -= ob_p;
+        ip2 -= ib2_p;
+        ip1 += is1_m;
+        op  +=  os_m;
+    }
+}
 
 NPY_NO_EXPORT void
 OBJECT_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n,
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 8ae2f65e0..4ce8d8ab7 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -36,7 +36,7 @@
  * If 'dtype' isn't NULL, this function steals its reference.
  */
 static PyArrayObject *
-allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags,
+allocate_reduce_result(PyArrayObject *arr, const npy_bool *axis_flags,
                         PyArray_Descr *dtype, int subok)
 {
     npy_intp strides[NPY_MAXDIMS], stride;
@@ -84,7 +84,7 @@ allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags,
  * The return value is a view into 'out'.
  */
 static PyArrayObject *
-conform_reduce_result(int ndim, npy_bool *axis_flags,
+conform_reduce_result(int ndim, const npy_bool *axis_flags,
                       PyArrayObject *out, int keepdims, const char *funcname,
                       int need_copy)
 {
@@ -251,7 +251,7 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
  * Count the number of dimensions selected in 'axis_flags'
  */
 static int
-count_axes(int ndim, npy_bool *axis_flags)
+count_axes(int ndim, const npy_bool *axis_flags)
 {
     int idim;
     int naxes = 0;
@@ -299,7 +299,7 @@ count_axes(int ndim, npy_bool *axis_flags)
 NPY_NO_EXPORT PyArrayObject *
 PyArray_InitializeReduceResult(
                     PyArrayObject *result, PyArrayObject *operand,
-                    npy_bool *axis_flags,
+                    const npy_bool *axis_flags,
                     npy_intp *out_skip_first_count, const char *funcname)
 {
     npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS];
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index a7987acda..d5d8d659b 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -246,25 +246,26 @@ static void
 /**end repeat**/
 
 
-
-/* QUESTION:  Should we check for overflow / underflow in (l,r)shift? */
-
 /**begin repeat
  * #name = byte, ubyte, short, ushort, int, uint,
  *         long, ulong, longlong, ulonglong#
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong#
+ * #suffix = hh,uhh,h,uh,,u,l,ul,ll,ull#
  */
 
 /**begin repeat1
- * #oper = and, xor, or, lshift, rshift#
- * #op = &, ^, |, <<, >>#
+ * #oper = and, xor, or#
+ * #op = &, ^, |#
  */
 
 #define @name@_ctype_@oper@(arg1, arg2, out) *(out) = (arg1) @op@ (arg2)
 
 /**end repeat1**/
 
+#define @name@_ctype_lshift(arg1, arg2, out) *(out) = npy_lshift@suffix@(arg1, arg2)
+#define @name@_ctype_rshift(arg1, arg2, out) *(out) = npy_rshift@suffix@(arg1, arg2)
+
 /**end repeat**/
 
 /**begin repeat
@@ -405,21 +406,22 @@ half_ctype_divmod(npy_half a, npy_half b, npy_half *out1, npy_half *out2) {
 /**begin repeat
  * #name = float, double, longdouble#
  * #type = npy_float, npy_double, npy_longdouble#
+ * #c = f,,l#
  */
-static npy_@name@ (*_basic_@name@_pow)(@type@ a, @type@ b);
 
 static void
 @name@_ctype_power(@type@ a, @type@ b, @type@ *out)
 {
-    *out = _basic_@name@_pow(a, b);
+    *out = npy_pow@c@(a, b);
 }
+
 /**end repeat**/
 static void
 half_ctype_power(npy_half a, npy_half b, npy_half *out)
 {
     const npy_float af = npy_half_to_float(a);
     const npy_float bf = npy_half_to_float(b);
-    const npy_float outf = _basic_float_pow(af,bf);
+    const npy_float outf = npy_powf(af,bf);
     *out = npy_float_to_half(outf);
 }
 
@@ -476,14 +478,10 @@ static void
 }
 /**end repeat**/
 
-/*
- * Get the nc_powf, nc_pow, and nc_powl functions from
- * the data area of the power ufunc in umathmodule.
- */
-
 /**begin repeat
  * #name = cfloat, cdouble, clongdouble#
  * #type = npy_cfloat, npy_cdouble, npy_clongdouble#
+ * #c = f,,l#
  */
 static void
 @name@_ctype_positive(@type@ a, @type@ *out)
@@ -492,12 +490,10 @@ static void
     out->imag = a.imag;
 }
 
-static void (*_basic_@name@_pow)(@type@ *, @type@ *, @type@ *);
-
 static void
 @name@_ctype_power(@type@ a, @type@ b, @type@ *out)
 {
-    _basic_@name@_pow(&a, &b, out);
+    *out = npy_cpow@c@(a, b);
 }
 /**end repeat**/
 
@@ -570,7 +566,7 @@ static void
  * 1) Convert the types to the common type if both are scalars (0 return)
  * 2) If both are not scalars use ufunc machinery (-2 return)
  * 3) If both are scalars but cannot be cast to the right type
- * return NotImplmented (-1 return)
+ * return NotImplemented (-1 return)
  *
  * 4) Perform the function on the C-type.
  * 5) If an error condition occurred, check to see
@@ -1429,24 +1425,53 @@ static PyObject *
 
 /**begin repeat
  *
+ * #name = byte, ubyte, short, ushort, int, uint,
+ *             long, ulong, longlong, ulonglong,
+ *             half, float, double, longdouble,
+ *             cfloat, cdouble, clongdouble#
+ * #Name = Byte, UByte, Short, UShort, Int, UInt,
+ *             Long, ULong, LongLong, ULongLong,
+ *             Half, Float, Double, LongDouble,
+ *             CFloat, CDouble, CLongDouble#
+ * #cmplx = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1#
+ * #to_ctype = , , , , , , , , , , npy_half_to_double, , , , , , #
+ * #func = PyFloat_FromDouble*17#
+ */
+static NPY_INLINE PyObject *
+@name@_float(PyObject *obj)
+{
+#if @cmplx@
+    if (emit_complexwarning() < 0) {
+        return NULL;
+    }
+    return @func@(@to_ctype@(PyArrayScalar_VAL(obj, @Name@).real));
+#else
+    return @func@(@to_ctype@(PyArrayScalar_VAL(obj, @Name@)));
+#endif
+}
+/**end repeat**/
+
+
+#if !defined(NPY_PY3K)
+
+/**begin repeat
+ *
  * #name = (byte, ubyte, short, ushort, int, uint,
  *             long, ulong, longlong, ulonglong,
  *             half, float, double, longdouble,
- *             cfloat, cdouble, clongdouble)*2#
+ *             cfloat, cdouble, clongdouble)#
  * #Name = (Byte, UByte, Short, UShort, Int, UInt,
  *             Long, ULong, LongLong, ULongLong,
  *             Half, Float, Double, LongDouble,
- *             CFloat, CDouble, CLongDouble)*2#
- * #cmplx = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1)*2#
- * #to_ctype = (, , , , , , , , , , npy_half_to_double, , , , , , )*2#
- * #which = long*17, float*17#
+ *             CFloat, CDouble, CLongDouble)#
+ * #cmplx = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1)#
+ * #to_ctype = (, , , , , , , , , , npy_half_to_double, , , , , , )#
  * #func = (PyLong_FromLongLong,  PyLong_FromUnsignedLongLong)*5,
  *         PyLong_FromDouble*3, npy_longdouble_to_PyLong,
- *         PyLong_FromDouble*2, npy_longdouble_to_PyLong,
- *         PyFloat_FromDouble*17#
+ *         PyLong_FromDouble*2, npy_longdouble_to_PyLong#
  */
 static NPY_INLINE PyObject *
-@name@_@which@(PyObject *obj)
+@name@_long(PyObject *obj)
 {
 #if @cmplx@
     if (emit_complexwarning() < 0) {
@@ -1459,8 +1484,6 @@ static NPY_INLINE PyObject *
 }
 /**end repeat**/
 
-#if !defined(NPY_PY3K)
-
 /**begin repeat
  *
  * #name = (byte, ubyte, short, ushort, int, uint,
@@ -1652,52 +1675,9 @@ add_scalarmath(void)
     /**end repeat**/
 }
 
-static int
-get_functions(PyObject * mm)
-{
-    PyObject *obj;
-    void **funcdata;
-    char *signatures;
-    int i, j;
-    int ret = -1;
-
-    /* Get the nc_pow functions */
-    /* Get the pow functions */
-    obj = PyObject_GetAttrString(mm, "power");
-    if (obj == NULL) {
-        goto fail;
-    }
-    funcdata = ((PyUFuncObject *)obj)->data;
-    signatures = ((PyUFuncObject *)obj)->types;
-
-    i = 0;
-    j = 0;
-    while (signatures[i] != NPY_FLOAT) {
-        i += 3;
-        j++;
-    }
-    _basic_float_pow = funcdata[j];
-    _basic_double_pow = funcdata[j + 1];
-    _basic_longdouble_pow = funcdata[j + 2];
-    _basic_cfloat_pow = funcdata[j + 3];
-    _basic_cdouble_pow = funcdata[j + 4];
-    _basic_clongdouble_pow = funcdata[j + 5];
-    Py_DECREF(obj);
-
-    return ret = 0;
-
- fail:
-    Py_DECREF(mm);
-    return ret;
-}
-
 
 NPY_NO_EXPORT int initscalarmath(PyObject * m)
 {
-    if (get_functions(m) < 0) {
-        return -1;
-    }
-
     add_scalarmath();
 
     return 0;
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 7aec1ff49..74f52cc9d 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -139,6 +139,37 @@ abs_ptrdiff(char *a, char *b)
 /* prototypes */
 
 /**begin repeat1
+ * #type = npy_float, npy_double#
+ * #TYPE = FLOAT, DOUBLE#
+ */
+
+/**begin repeat2
+ *  #func = sqrt, absolute, square, reciprocal, rint, floor, ceil, trunc#
+ */
+
+#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
+static NPY_INLINE NPY_GCC_TARGET_@ISA@ void
+@ISA@_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n, const npy_intp stride);
+#endif
+
+static NPY_INLINE int
+run_unary_@isa@_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
+    if (IS_OUTPUT_BLOCKABLE_UNARY(sizeof(@type@), @REGISTER_SIZE@)) {
+        @ISA@_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0], steps[0]);
+        return 1;
+    }
+    else
+        return 0;
+#endif
+    return 0;
+}
+
+/**end repeat2**/
+/**end repeat1**/
+
+/**begin repeat1
  * #func = exp, log#
  */
 
@@ -185,7 +216,6 @@ run_unary_@isa@_sincos_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps,
 /**end repeat**/
 
 
-
 /**begin repeat
  * Float types
  *  #type = npy_float, npy_double, npy_longdouble#
@@ -1017,7 +1047,7 @@ sse2_sqrt_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n)
     LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) {
         op[i] = @scalarf@(ip[i]);
     }
-    assert(n < (VECTOR_SIZE_BYTES / sizeof(@type@)) ||
+    assert((npy_uintp)n < (VECTOR_SIZE_BYTES / sizeof(@type@)) ||
            npy_is_aligned(&op[i], VECTOR_SIZE_BYTES));
     if (npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)) {
         LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) {
@@ -1069,7 +1099,7 @@ sse2_@kind@_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n)
     LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) {
         op[i] = @scalar@_@type@(ip[i]);
     }
-    assert(n < (VECTOR_SIZE_BYTES / sizeof(@type@)) ||
+    assert((npy_uintp)n < (VECTOR_SIZE_BYTES / sizeof(@type@)) ||
            npy_is_aligned(&op[i], VECTOR_SIZE_BYTES));
     if (npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)) {
         LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) {
@@ -1104,7 +1134,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
         /* Order of operations important for MSVC 2015 */
         *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
     }
-    assert(n < (stride) || npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES));
+    assert((npy_uintp)n < (stride) || npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES));
     if (i + 3 * stride <= n) {
         /* load the first elements */
         @vtype@ c1 = @vpre@_load_@vsuf@((@type@*)&ip[i]);
@@ -1144,41 +1174,76 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
 
 #if defined HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
-fma_get_full_load_mask(void)
+fma_get_full_load_mask_ps(void)
 {
     return _mm256_set1_ps(-1.0);
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256i
+fma_get_full_load_mask_pd(void)
+{
+    return _mm256_castpd_si256(_mm256_set1_pd(-1.0));
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
-fma_get_partial_load_mask(const npy_int num_lanes, const npy_int total_elem)
+fma_get_partial_load_mask_ps(const npy_int num_elem, const npy_int num_lanes)
 {
     float maskint[16] = {-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,
                             1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0};
-    float* addr = maskint + total_elem - num_lanes;
+    float* addr = maskint + num_lanes - num_elem;
     return _mm256_loadu_ps(addr);
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256i
+fma_get_partial_load_mask_pd(const npy_int num_elem, const npy_int num_lanes)
+{
+    npy_int maskint[16] = {-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1};
+    npy_int* addr = maskint + 2*num_lanes - 2*num_elem;
+    return _mm256_loadu_si256((__m256i*) addr);
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
-fma_masked_gather(__m256 src,
-                   npy_float* addr,
-                   __m256i vindex,
-                   __m256 mask)
+fma_masked_gather_ps(__m256 src,
+                     npy_float* addr,
+                     __m256i vindex,
+                     __m256 mask)
 {
     return _mm256_mask_i32gather_ps(src, addr, vindex, mask, 4);
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256d
+fma_masked_gather_pd(__m256d src,
+                     npy_double* addr,
+                     __m128i vindex,
+                     __m256d mask)
+{
+    return _mm256_mask_i32gather_pd(src, addr, vindex, mask, 8);
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
-fma_masked_load(__m256 mask, npy_float* addr)
+fma_masked_load_ps(__m256 mask, npy_float* addr)
 {
     return _mm256_maskload_ps(addr, _mm256_cvtps_epi32(mask));
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256d
+fma_masked_load_pd(__m256i mask, npy_double* addr)
+{
+    return _mm256_maskload_pd(addr, mask);
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
-fma_set_masked_lanes(__m256 x, __m256 val, __m256 mask)
+fma_set_masked_lanes_ps(__m256 x, __m256 val, __m256 mask)
 {
     return _mm256_blendv_ps(x, val, mask);
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256d
+fma_set_masked_lanes_pd(__m256d x, __m256d val, __m256d mask)
+{
+    return _mm256_blendv_pd(x, val, mask);
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
 fma_blend(__m256 x, __m256 y, __m256 ymask)
 {
@@ -1186,6 +1251,18 @@ fma_blend(__m256 x, __m256 y, __m256 ymask)
 }
 
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
+fma_invert_mask_ps(__m256 ymask)
+{
+    return _mm256_andnot_ps(ymask, _mm256_set1_ps(-1.0));
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256i
+fma_invert_mask_pd(__m256i ymask)
+{
+    return _mm256_andnot_si256(ymask, _mm256_set1_epi32(0xFFFFFFFF));
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA __m256
 fma_should_calculate_sine(__m256i k, __m256i andop, __m256i cmp)
 {
    return _mm256_cvtepi32_ps(
@@ -1290,42 +1367,115 @@ fma_scalef_ps(__m256 poly, __m256 quadrant)
      }
 }
 
+/**begin repeat
+ *  #vsub = ps, pd#
+ *  #vtype = __m256, __m256d#
+ */
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
+fma_abs_@vsub@(@vtype@ x)
+{
+    return _mm256_andnot_@vsub@(_mm256_set1_@vsub@(-0.0), x);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
+fma_reciprocal_@vsub@(@vtype@ x)
+{
+    return _mm256_div_@vsub@(_mm256_set1_@vsub@(1.0f), x);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
+fma_rint_@vsub@(@vtype@ x)
+{
+    return _mm256_round_@vsub@(x, _MM_FROUND_TO_NEAREST_INT);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
+fma_floor_@vsub@(@vtype@ x)
+{
+    return _mm256_round_@vsub@(x, _MM_FROUND_TO_NEG_INF);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
+fma_ceil_@vsub@(@vtype@ x)
+{
+    return _mm256_round_@vsub@(x, _MM_FROUND_TO_POS_INF);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
+fma_trunc_@vsub@(@vtype@ x)
+{
+    return _mm256_round_@vsub@(x, _MM_FROUND_TO_ZERO);
+}
+/**end repeat**/
 #endif
 
 #if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
-avx512_get_full_load_mask(void)
+avx512_get_full_load_mask_ps(void)
 {
     return 0xFFFF;
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask8
+avx512_get_full_load_mask_pd(void)
+{
+    return 0xFF;
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
-avx512_get_partial_load_mask(const npy_int num_elem, const npy_int total_elem)
+avx512_get_partial_load_mask_ps(const npy_int num_elem, const npy_int total_elem)
 {
     return (0x0001 << num_elem) - 0x0001;
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask8
+avx512_get_partial_load_mask_pd(const npy_int num_elem, const npy_int total_elem)
+{
+    return (0x01 << num_elem) - 0x01;
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512
-avx512_masked_gather(__m512 src,
-                     npy_float* addr,
-                     __m512i vindex,
-                     __mmask16 kmask)
+avx512_masked_gather_ps(__m512 src,
+                        npy_float* addr,
+                        __m512i vindex,
+                        __mmask16 kmask)
 {
     return _mm512_mask_i32gather_ps(src, kmask, vindex, addr, 4);
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512d
+avx512_masked_gather_pd(__m512d src,
+                        npy_double* addr,
+                        __m256i vindex,
+                        __mmask8 kmask)
+{
+    return _mm512_mask_i32gather_pd(src, kmask, vindex, addr, 8);
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512
-avx512_masked_load(__mmask16 mask, npy_float* addr)
+avx512_masked_load_ps(__mmask16 mask, npy_float* addr)
 {
     return _mm512_maskz_loadu_ps(mask, (__m512 *)addr);
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512d
+avx512_masked_load_pd(__mmask8 mask, npy_double* addr)
+{
+    return _mm512_maskz_loadu_pd(mask, (__m512d *)addr);
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512
-avx512_set_masked_lanes(__m512 x, __m512 val, __mmask16 mask)
+avx512_set_masked_lanes_ps(__m512 x, __m512 val, __mmask16 mask)
 {
     return _mm512_mask_blend_ps(mask, x, val);
 }
 
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512d
+avx512_set_masked_lanes_pd(__m512d x, __m512d val, __mmask8 mask)
+{
+    return _mm512_mask_blend_pd(mask, x, val);
+}
+
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512
 avx512_blend(__m512 x, __m512 y, __mmask16 ymask)
 {
@@ -1333,6 +1483,18 @@ avx512_blend(__m512 x, __m512 y, __mmask16 ymask)
 }
 
 static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
+avx512_invert_mask_ps(__mmask16 ymask)
+{
+    return _mm512_knot(ymask);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask8
+avx512_invert_mask_pd(__mmask8 ymask)
+{
+    return _mm512_knot(ymask);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __mmask16
 avx512_should_calculate_sine(__m512i k, __m512i andop, __m512i cmp)
 {
     return _mm512_cmpeq_epi32_mask(_mm512_and_epi32(k, andop), cmp);
@@ -1361,6 +1523,49 @@ avx512_scalef_ps(__m512 poly, __m512 quadrant)
 {
     return _mm512_scalef_ps(poly, quadrant);
 }
+/**begin repeat
+ *  #vsub  = ps, pd#
+ *  #epi_vsub  = epi32, epi64#
+ *  #vtype = __m512, __m512d#
+ *  #and_const = 0x7fffffff, 0x7fffffffffffffffLL#
+ */
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
+avx512_abs_@vsub@(@vtype@ x)
+{
+    return (@vtype@) _mm512_and_@epi_vsub@((__m512i) x,
+				    _mm512_set1_@epi_vsub@ (@and_const@));
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
+avx512_reciprocal_@vsub@(@vtype@ x)
+{
+    return _mm512_div_@vsub@(_mm512_set1_@vsub@(1.0f), x);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
+avx512_rint_@vsub@(@vtype@ x)
+{
+    return _mm512_roundscale_@vsub@(x, 0x08);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
+avx512_floor_@vsub@(@vtype@ x)
+{
+    return _mm512_roundscale_@vsub@(x, 0x09);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
+avx512_ceil_@vsub@(@vtype@ x)
+{
+    return _mm512_roundscale_@vsub@(x, 0x0A);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
+avx512_trunc_@vsub@(@vtype@ x)
+{
+    return _mm512_roundscale_@vsub@(x, 0x0B);
+}
+/**end repeat**/
 #endif
 
 /**begin repeat
@@ -1438,7 +1643,187 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
     sin = @fmadd@(sin, x, x);
     return sin;
 }
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
+@isa@_sqrt_ps(@vtype@ x)
+{
+    return _mm@vsize@_sqrt_ps(x);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@d
+@isa@_sqrt_pd(@vtype@d x)
+{
+    return _mm@vsize@_sqrt_pd(x);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
+@isa@_square_ps(@vtype@ x)
+{
+    return _mm@vsize@_mul_ps(x,x);
+}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@d
+@isa@_square_pd(@vtype@d x)
+{
+    return _mm@vsize@_mul_pd(x,x);
+}
+
+#endif
+/**end repeat**/
+
+
+/**begin repeat
+ * #ISA = FMA, AVX512F#
+ * #isa = fma, avx512#
+ * #vsize = 256, 512#
+ * #BYTES = 32, 64#
+ * #cvtps_epi32 = _mm256_cvtps_epi32, #
+ * #mask = __m256, __mmask16#
+ * #vsub = , _mask#
+ * #vtype = __m256, __m512#
+ * #cvtps_epi32 = _mm256_cvtps_epi32, #
+ * #masked_store = _mm256_maskstore_ps, _mm512_mask_storeu_ps#
+ * #CHK = HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS, HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS#
+ */
+
+/**begin repeat1
+ *  #func = sqrt, absolute, square, reciprocal, rint, ceil, floor, trunc#
+ *  #vectorf = sqrt, abs, square, reciprocal, rint, ceil, floor, trunc#
+ *  #replace_0_with_1 = 0, 0, 0, 1, 0, 0, 0, 0#
+ */
+
+#if defined @CHK@
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
+@ISA@_@func@_FLOAT(npy_float* op,
+                   npy_float* ip,
+                   const npy_intp array_size,
+                   const npy_intp steps)
+{
+    const npy_intp stride = steps/sizeof(npy_float);
+    const npy_int num_lanes = @BYTES@/sizeof(npy_float);
+    npy_intp num_remaining_elements = array_size;
+    @vtype@ ones_f = _mm@vsize@_set1_ps(1.0f);
+    @mask@ load_mask = @isa@_get_full_load_mask_ps();
+#if @replace_0_with_1@
+    @mask@ inv_load_mask = @isa@_invert_mask_ps(load_mask);
+#endif
+    npy_int indexarr[16];
+    for (npy_int ii = 0; ii < 16; ii++) {
+        indexarr[ii] = ii*stride;
+    }
+    @vtype@i vindex = _mm@vsize@_loadu_si@vsize@((@vtype@i*)&indexarr[0]);
+
+    while (num_remaining_elements > 0) {
+        if (num_remaining_elements < num_lanes) {
+            load_mask = @isa@_get_partial_load_mask_ps(num_remaining_elements,
+                                                       num_lanes);
+#if @replace_0_with_1@
+            inv_load_mask = @isa@_invert_mask_ps(load_mask);
+#endif
+        }
+        @vtype@ x;
+        if (stride == 1) {
+            x = @isa@_masked_load_ps(load_mask, ip);
+#if @replace_0_with_1@
+            /*
+             * Replace masked elements with 1.0f to avoid divide by zero fp
+             * exception in reciprocal
+             */
+            x = @isa@_set_masked_lanes_ps(x, ones_f, inv_load_mask);
+#endif
+        }
+        else {
+            x = @isa@_masked_gather_ps(ones_f, ip, vindex, load_mask);
+        }
+        @vtype@ out = @isa@_@vectorf@_ps(x);
+        @masked_store@(op, @cvtps_epi32@(load_mask), out);
+
+        ip += num_lanes*stride;
+        op += num_lanes;
+        num_remaining_elements -= num_lanes;
+    }
+}
+#endif
+/**end repeat1**/
+/**end repeat**/
+
+/**begin repeat
+ * #ISA = FMA, AVX512F#
+ * #isa = fma, avx512#
+ * #vsize = 256, 512#
+ * #BYTES = 32, 64#
+ * #cvtps_epi32 = _mm256_cvtps_epi32, #
+ * #mask = __m256i, __mmask8#
+ * #vsub = , _mask#
+ * #vtype = __m256d, __m512d#
+ * #vindextype = __m128i, __m256i#
+ * #vindexsize = 128, 256#
+ * #vindexload = _mm_loadu_si128, _mm256_loadu_si256#
+ * #cvtps_epi32 = _mm256_cvtpd_epi32, #
+ * #castmask = _mm256_castsi256_pd, #
+ * #masked_store = _mm256_maskstore_pd, _mm512_mask_storeu_pd#
+ * #CHK = HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS, HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS#
+ */
+
+/**begin repeat1
+ *  #func = sqrt, absolute, square, reciprocal, rint, ceil, floor, trunc#
+ *  #vectorf = sqrt, abs, square, reciprocal, rint, ceil, floor, trunc#
+ *  #replace_0_with_1 = 0, 0, 0, 1, 0, 0, 0, 0#
+ */
+
+#if defined @CHK@
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
+@ISA@_@func@_DOUBLE(npy_double* op,
+                    npy_double* ip,
+                    const npy_intp array_size,
+                    const npy_intp steps)
+{
+    const npy_intp stride = steps/sizeof(npy_double);
+    const npy_int num_lanes = @BYTES@/sizeof(npy_double);
+    npy_intp num_remaining_elements = array_size;
+    @mask@ load_mask = @isa@_get_full_load_mask_pd();
+#if @replace_0_with_1@
+    @mask@ inv_load_mask = @isa@_invert_mask_pd(load_mask);
+#endif
+    @vtype@ ones_d = _mm@vsize@_set1_pd(1.0f);
+    npy_int indexarr[8];
+    for (npy_int ii = 0; ii < 8; ii++) {
+        indexarr[ii] = ii*stride;
+    }
+    @vindextype@ vindex = @vindexload@((@vindextype@*)&indexarr[0]);
+
+    while (num_remaining_elements > 0) {
+        if (num_remaining_elements < num_lanes) {
+            load_mask = @isa@_get_partial_load_mask_pd(num_remaining_elements,
+                                                       num_lanes);
+#if @replace_0_with_1@
+            inv_load_mask = @isa@_invert_mask_pd(load_mask);
 #endif
+        }
+        @vtype@ x;
+        if (stride == 1) {
+            x = @isa@_masked_load_pd(load_mask, ip);
+#if @replace_0_with_1@
+            /*
+             * Replace masked elements with 1.0f to avoid divide by zero fp
+             * exception in reciprocal
+             */
+            x = @isa@_set_masked_lanes_pd(x, ones_d, @castmask@(inv_load_mask));
+#endif
+        }
+        else {
+            x = @isa@_masked_gather_pd(ones_d, ip, vindex, @castmask@(load_mask));
+        }
+        @vtype@ out = @isa@_@vectorf@_pd(x);
+        @masked_store@(op, load_mask, out);
+
+        ip += num_lanes*stride;
+        op += num_lanes;
+        num_remaining_elements -= num_lanes;
+    }
+}
+#endif
+/**end repeat1**/
 /**end repeat**/
 
 /**begin repeat
@@ -1460,7 +1845,6 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ @vtype@
  * #CHK = HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS, HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS#
  */
 
-
 /*
  * Vectorized approximate sine/cosine algorithms: The following code is a
  * vectorized version of the algorithm presented here:
@@ -1519,7 +1903,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
     @vtype@ quadrant, reduced_x, reduced_x2, cos, sin;
     @vtype@i iquadrant;
     @mask@ nan_mask, glibc_mask, sine_mask, negate_mask;
-    @mask@ load_mask = @isa@_get_full_load_mask();
+    @mask@ load_mask = @isa@_get_full_load_mask_ps();
     npy_intp num_remaining_elements = array_size;
     npy_int indexarr[16];
     for (npy_int ii = 0; ii < 16; ii++) {
@@ -1530,16 +1914,16 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
     while (num_remaining_elements > 0) {
 
         if (num_remaining_elements < num_lanes) {
-            load_mask = @isa@_get_partial_load_mask(num_remaining_elements,
+            load_mask = @isa@_get_partial_load_mask_ps(num_remaining_elements,
                                                          num_lanes);
         }
 
         @vtype@ x;
         if (stride == 1) {
-            x = @isa@_masked_load(load_mask, ip);
+            x = @isa@_masked_load_ps(load_mask, ip);
         }
         else {
-            x = @isa@_masked_gather(zero_f, ip, vindex, load_mask);
+            x = @isa@_masked_gather_ps(zero_f, ip, vindex, load_mask);
         }
 
         /*
@@ -1551,7 +1935,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
         glibc_mask = @isa@_in_range_mask(x, large_number,-large_number);
         glibc_mask = @and_masks@(load_mask, glibc_mask);
         nan_mask = _mm@vsize@_cmp_ps@vsub@(x, x, _CMP_NEQ_UQ);
-        x = @isa@_set_masked_lanes(x, zero_f, @or_masks@(nan_mask, glibc_mask));
+        x = @isa@_set_masked_lanes_ps(x, zero_f, @or_masks@(nan_mask, glibc_mask));
         npy_int iglibc_mask = @mask_to_int@(glibc_mask);
 
         if (iglibc_mask != @full_mask@) {
@@ -1584,7 +1968,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
             /* multiply by -1 for appropriate elements */
             negate_mask = @isa@_should_negate(iquadrant, twos, twos);
             cos = @isa@_blend(cos, _mm@vsize@_sub_ps(zero_f, cos), negate_mask);
-            cos = @isa@_set_masked_lanes(cos, _mm@vsize@_set1_ps(NPY_NANF), nan_mask);
+            cos = @isa@_set_masked_lanes_ps(cos, _mm@vsize@_set1_ps(NPY_NANF), nan_mask);
 
             @masked_store@(op, @cvtps_epi32@(load_mask), cos);
         }
@@ -1662,27 +2046,27 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
     @vtype@i vindex = _mm@vsize@_loadu_si@vsize@((@vtype@i*)&indexarr[0]);
 
     @mask@ xmax_mask, xmin_mask, nan_mask, inf_mask;
-    @mask@ overflow_mask = @isa@_get_partial_load_mask(0, num_lanes);
-    @mask@ load_mask = @isa@_get_full_load_mask();
+    @mask@ overflow_mask = @isa@_get_partial_load_mask_ps(0, num_lanes);
+    @mask@ load_mask = @isa@_get_full_load_mask_ps();
     npy_intp num_remaining_elements = array_size;
 
     while (num_remaining_elements > 0) {
 
         if (num_remaining_elements < num_lanes) {
-            load_mask = @isa@_get_partial_load_mask(num_remaining_elements,
-                                                    num_lanes);
+            load_mask = @isa@_get_partial_load_mask_ps(num_remaining_elements,
+                                                       num_lanes);
         }
 
         @vtype@ x;
         if (stride == 1) {
-            x = @isa@_masked_load(load_mask, ip);
+            x = @isa@_masked_load_ps(load_mask, ip);
         }
         else {
-            x = @isa@_masked_gather(zeros_f, ip, vindex, load_mask);
+            x = @isa@_masked_gather_ps(zeros_f, ip, vindex, load_mask);
         }
 
         nan_mask = _mm@vsize@_cmp_ps@vsub@(x, x, _CMP_NEQ_UQ);
-        x = @isa@_set_masked_lanes(x, zeros_f, nan_mask);
+        x = @isa@_set_masked_lanes_ps(x, zeros_f, nan_mask);
 
         xmax_mask = _mm@vsize@_cmp_ps@vsub@(x, _mm@vsize@_set1_ps(xmax), _CMP_GE_OQ);
         xmin_mask = _mm@vsize@_cmp_ps@vsub@(x, _mm@vsize@_set1_ps(xmin), _CMP_LE_OQ);
@@ -1690,7 +2074,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
         overflow_mask = @or_masks@(overflow_mask,
                                     @xor_masks@(xmax_mask, inf_mask));
 
-        x = @isa@_set_masked_lanes(x, zeros_f, @or_masks@(
+        x = @isa@_set_masked_lanes_ps(x, zeros_f, @or_masks@(
                                     @or_masks@(nan_mask, xmin_mask), xmax_mask));
 
         quadrant = _mm@vsize@_mul_ps(x, log2e);
@@ -1723,9 +2107,9 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
          * elem < xmin; return 0.0f
          * elem = +/- nan, return nan
          */
-        poly = @isa@_set_masked_lanes(poly, _mm@vsize@_set1_ps(NPY_NANF), nan_mask);
-        poly = @isa@_set_masked_lanes(poly, inf, xmax_mask);
-        poly = @isa@_set_masked_lanes(poly, zeros_f, xmin_mask);
+        poly = @isa@_set_masked_lanes_ps(poly, _mm@vsize@_set1_ps(NPY_NANF), nan_mask);
+        poly = @isa@_set_masked_lanes_ps(poly, inf, xmax_mask);
+        poly = @isa@_set_masked_lanes_ps(poly, zeros_f, xmin_mask);
 
         @masked_store@(op, @cvtps_epi32@(load_mask), poly);
 
@@ -1790,24 +2174,24 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
     @vtype@ poly, num_poly, denom_poly, exponent;
 
     @mask@ inf_mask, nan_mask, sqrt2_mask, zero_mask, negx_mask;
-    @mask@ invalid_mask = @isa@_get_partial_load_mask(0, num_lanes);
+    @mask@ invalid_mask = @isa@_get_partial_load_mask_ps(0, num_lanes);
     @mask@ divide_by_zero_mask = invalid_mask;
-    @mask@ load_mask = @isa@_get_full_load_mask();
+    @mask@ load_mask = @isa@_get_full_load_mask_ps();
     npy_intp num_remaining_elements = array_size;
 
     while (num_remaining_elements > 0) {
 
         if (num_remaining_elements < num_lanes) {
-            load_mask = @isa@_get_partial_load_mask(num_remaining_elements,
-                                                    num_lanes);
+            load_mask = @isa@_get_partial_load_mask_ps(num_remaining_elements,
+                                                       num_lanes);
         }
 
         @vtype@ x_in;
         if (stride == 1) {
-            x_in = @isa@_masked_load(load_mask, ip);
+            x_in = @isa@_masked_load_ps(load_mask, ip);
         }
         else {
-            x_in  = @isa@_masked_gather(zeros_f, ip, vindex, load_mask);
+            x_in  = @isa@_masked_gather_ps(zeros_f, ip, vindex, load_mask);
         }
 
         negx_mask = _mm@vsize@_cmp_ps@vsub@(x_in, zeros_f, _CMP_LT_OQ);
@@ -1818,7 +2202,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
                                         @and_masks@(zero_mask, load_mask));
         invalid_mask = @or_masks@(invalid_mask, negx_mask);
 
-        @vtype@ x = @isa@_set_masked_lanes(x_in, zeros_f, negx_mask);
+        @vtype@ x = @isa@_set_masked_lanes_ps(x_in, zeros_f, negx_mask);
 
         /* set x = normalized mantissa */
         exponent = @isa@_get_exponent(x);
@@ -1852,10 +2236,10 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
          * x = +/- NAN; return NAN
          * x = 0.0f; return -INF
          */
-        poly = @isa@_set_masked_lanes(poly, nan, nan_mask);
-        poly = @isa@_set_masked_lanes(poly, neg_nan, negx_mask);
-        poly = @isa@_set_masked_lanes(poly, neg_inf, zero_mask);
-        poly = @isa@_set_masked_lanes(poly, inf, inf_mask);
+        poly = @isa@_set_masked_lanes_ps(poly, nan, nan_mask);
+        poly = @isa@_set_masked_lanes_ps(poly, neg_nan, negx_mask);
+        poly = @isa@_set_masked_lanes_ps(poly, neg_inf, zero_mask);
+        poly = @isa@_set_masked_lanes_ps(poly, inf, inf_mask);
 
         @masked_store@(op, @cvtps_epi32@(load_mask), poly);
 
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 5f9a0f7f4..e4ad3dc84 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -908,7 +908,7 @@ parse_ufunc_keywords(PyUFuncObject *ufunc, PyObject *kwds, PyObject **kwnames, .
     typedef int converter(PyObject *, void *);
 
     while (PyDict_Next(kwds, &pos, &key, &value)) {
-        int i;
+        npy_intp i;
         converter *convert;
         void *output = NULL;
         npy_intp index = locate_key(kwnames, key);
@@ -1193,34 +1193,11 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
                 }
             }
             else {
-                /*
-                 * If the deprecated behavior is ever removed,
-                 * keep only the else branch of this if-else
-                 */
-                if (PyArray_Check(out_kwd) || out_kwd == Py_None) {
-                    if (DEPRECATE("passing a single array to the "
-                                  "'out' keyword argument of a "
-                                  "ufunc with\n"
-                                  "more than one output will "
-                                  "result in an error in the "
-                                  "future") < 0) {
-                        /* The future error message */
-                        PyErr_SetString(PyExc_TypeError,
-                                        "'out' must be a tuple of arrays");
-                        goto fail;
-                    }
-                    if (_set_out_array(out_kwd, out_op+nin) < 0) {
-                        goto fail;
-                    }
-                }
-                else {
-                    PyErr_SetString(PyExc_TypeError,
-                                    nout > 1 ? "'out' must be a tuple "
-                                    "of arrays" :
-                                    "'out' must be an array or a "
-                                    "tuple of a single array");
-                    goto fail;
-                }
+                PyErr_SetString(PyExc_TypeError,
+                        nout > 1 ? "'out' must be a tuple of arrays" :
+                                   "'out' must be an array or a tuple with "
+                                   "a single array");
+                goto fail;
             }
         }
         /*
@@ -2297,7 +2274,7 @@ _parse_axes_arg(PyUFuncObject *ufunc, int op_core_num_dims[], PyObject *axes,
  * Returns 0 on success, and -1 on failure
  */
 static int
-_parse_axis_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axis,
+_parse_axis_arg(PyUFuncObject *ufunc, const int core_num_dims[], PyObject *axis,
                 PyArrayObject **op, int broadcast_ndim, int **remap_axis) {
     int nop = ufunc->nargs;
     int iop, axis_int;
@@ -2368,7 +2345,7 @@ _parse_axis_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axis,
  */
 static int
 _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
-                   int *op_core_num_dims, npy_uint32 *core_dim_flags,
+                   const int *op_core_num_dims, npy_uint32 *core_dim_flags,
                    npy_intp *core_dim_sizes, int **remap_axis) {
     int i;
     int nin = ufunc->nin;
@@ -4053,14 +4030,14 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
     int *op_axes[3] = {op_axes_arrays[0], op_axes_arrays[1],
                             op_axes_arrays[2]};
     npy_uint32 op_flags[3];
-    int i, idim, ndim, otype_final;
+    int idim, ndim, otype_final;
     int need_outer_iterator = 0;
 
     NpyIter *iter = NULL;
 
     /* The reduceat indices - ind must be validated outside this call */
     npy_intp *reduceat_ind;
-    npy_intp ind_size, red_axis_size;
+    npy_intp i, ind_size, red_axis_size;
     /* The selected inner loop */
     PyUFuncGenericFunction innerloop = NULL;
     void *innerloopdata = NULL;
@@ -4146,7 +4123,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
 #endif
 
     /* Set up the op_axes for the outer loop */
-    for (i = 0, idim = 0; idim < ndim; ++idim) {
+    for (idim = 0; idim < ndim; ++idim) {
         /* Use the i-th iteration dimension to match up ind */
         if (idim == axis) {
             op_axes_arrays[0][idim] = axis;
@@ -4866,7 +4843,7 @@ ufunc_seterr(PyObject *NPY_UNUSED(dummy), PyObject *args)
 NPY_NO_EXPORT int
 PyUFunc_ReplaceLoopBySignature(PyUFuncObject *func,
                                PyUFuncGenericFunction newfunc,
-                               int *signature,
+                               const int *signature,
                                PyUFuncGenericFunction *oldfunc)
 {
     int i, j;
@@ -4921,7 +4898,7 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
                                      char *types, int ntypes,
                                      int nin, int nout, int identity,
                                      const char *name, const char *doc,
-                                     int unused, const char *signature,
+                                     const int unused, const char *signature,
                                      PyObject *identity_value)
 {
     PyUFuncObject *ufunc;
@@ -5223,7 +5200,7 @@ NPY_NO_EXPORT int
 PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
                             int usertype,
                             PyUFuncGenericFunction function,
-                            int *arg_types,
+                            const int *arg_types,
                             void *data)
 {
     PyArray_Descr *descr;
diff --git a/numpy/core/tests/test__exceptions.py b/numpy/core/tests/test__exceptions.py
new file mode 100644
index 000000000..494b51f34
--- /dev/null
+++ b/numpy/core/tests/test__exceptions.py
@@ -0,0 +1,42 @@
+"""
+Tests of the ._exceptions module. Primarily for exercising the __str__ methods.
+"""
+import numpy as np
+
+_ArrayMemoryError = np.core._exceptions._ArrayMemoryError
+
+class TestArrayMemoryError:
+    def test_str(self):
+        e = _ArrayMemoryError((1023,), np.dtype(np.uint8))
+        str(e)  # not crashing is enough
+
+    # testing these properties is easier than testing the full string repr
+    def test__size_to_string(self):
+        """ Test e._size_to_string """
+        f = _ArrayMemoryError._size_to_string
+        Ki = 1024
+        assert f(0) == '0 bytes'
+        assert f(1) == '1 bytes'
+        assert f(1023) == '1023 bytes'
+        assert f(Ki) == '1.00 KiB'
+        assert f(Ki+1) == '1.00 KiB'
+        assert f(10*Ki) == '10.0 KiB'
+        assert f(int(999.4*Ki)) == '999. KiB'
+        assert f(int(1023.4*Ki)) == '1023. KiB'
+        assert f(int(1023.5*Ki)) == '1.00 MiB'
+        assert f(Ki*Ki) == '1.00 MiB'
+
+        # 1023.9999 Mib should round to 1 GiB
+        assert f(int(Ki*Ki*Ki*0.9999)) == '1.00 GiB'
+        assert f(Ki*Ki*Ki*Ki*Ki*Ki) == '1.00 EiB'
+        # larger than sys.maxsize, adding larger prefices isn't going to help
+        # anyway.
+        assert f(Ki*Ki*Ki*Ki*Ki*Ki*123456) == '123456. EiB'
+
+    def test__total_size(self):
+        """ Test e._total_size """
+        e = _ArrayMemoryError((1,), np.dtype(np.uint8))
+        assert e._total_size == 1
+
+        e = _ArrayMemoryError((2, 4), np.dtype((np.uint64, 16)))
+        assert e._total_size == 1024
diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py
index 75a794369..702e68e76 100644
--- a/numpy/core/tests/test_arrayprint.py
+++ b/numpy/core/tests/test_arrayprint.py
@@ -262,11 +262,6 @@ class TestArray2String(object):
         assert_(np.array2string(s, formatter={'numpystr':lambda s: s*2}) ==
                 '[abcabc defdef]')
 
-        # check for backcompat that using FloatFormat works and emits warning
-        with assert_warns(DeprecationWarning):
-            fmt = np.core.arrayprint.FloatFormat(x, 9, 'maxprec', False)
-        assert_equal(np.array2string(x, formatter={'float_kind': fmt}),
-                     '[0. 1. 2.]')
 
     def test_structure_format(self):
         dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))])
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index e8aa0c70b..8bffaa9af 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -10,12 +10,16 @@ import sys
 import operator
 import warnings
 import pytest
+import shutil
+import tempfile
 
 import numpy as np
 from numpy.testing import (
-    assert_raises, assert_warns, assert_
+    assert_raises, assert_warns, assert_, assert_array_equal
     )
 
+from numpy.core._multiarray_tests import fromstring_null_term_c_api
+
 try:
     import pytz
     _has_pytz = True
@@ -271,36 +275,6 @@ class TestNonCContiguousViewDeprecation(_DeprecationTestCase):
         self.assert_deprecated(np.ones((2,2)).T.view, args=(np.int8,))
 
 
-class TestInvalidOrderParameterInputForFlattenArrayDeprecation(_DeprecationTestCase):
-    """Invalid arguments to the ORDER parameter in array.flatten() should not be
-    allowed and should raise an error.  However, in the interests of not breaking
-    code that may inadvertently pass invalid arguments to this parameter, a
-    DeprecationWarning will be issued instead for the time being to give developers
-    time to refactor relevant code.
-    """
-
-    def test_flatten_array_non_string_arg(self):
-        x = np.zeros((3, 5))
-        self.message = ("Non-string object detected for "
-                        "the array ordering. Please pass "
-                        "in 'C', 'F', 'A', or 'K' instead")
-        self.assert_deprecated(x.flatten, args=(np.pi,))
-
-    def test_flatten_array_invalid_string_arg(self):
-        # Tests that a DeprecationWarning is raised
-        # when a string of length greater than one
-        # starting with "C", "F", "A", or "K" (case-
-        # and unicode-insensitive) is passed in for
-        # the ORDER parameter. Otherwise, a TypeError
-        # will be raised!
-
-        x = np.zeros((3, 5))
-        self.message = ("Non length-one string passed "
-                        "in for the array ordering. Please "
-                        "pass in 'C', 'F', 'A', or 'K' instead")
-        self.assert_deprecated(x.flatten, args=("FACK",))
-
-
 class TestArrayDataAttributeAssignmentDeprecation(_DeprecationTestCase):
     """Assigning the 'data' attribute of an ndarray is unsafe as pointed
      out in gh-7093. Eventually, such assignment should NOT be allowed, but
@@ -319,22 +293,6 @@ class TestArrayDataAttributeAssignmentDeprecation(_DeprecationTestCase):
         self.assert_deprecated(a.__setattr__, args=('data', b.data))
 
 
-class TestLinspaceInvalidNumParameter(_DeprecationTestCase):
-    """Argument to the num parameter in linspace that cannot be
-    safely interpreted as an integer is deprecated in 1.12.0.
-
-    Argument to the num parameter in linspace that cannot be
-    safely interpreted as an integer should not be allowed.
-    In the interest of not breaking code that passes
-    an argument that could still be interpreted as an integer, a
-    DeprecationWarning will be issued for the time being to give
-    developers time to refactor relevant code.
-    """
-    def test_float_arg(self):
-        # 2016-02-25, PR#7328
-        self.assert_deprecated(np.linspace, args=(0, 10, 2.5))
-
-
 class TestBinaryReprInsufficientWidthParameterForRepresentation(_DeprecationTestCase):
     """
     If a 'width' parameter is passed into ``binary_repr`` that is insufficient to
@@ -442,6 +400,18 @@ class TestNPY_CHAR(_DeprecationTestCase):
         assert_(npy_char_deprecation() == 'S1')
 
 
+class TestPyArray_AS1D(_DeprecationTestCase):
+    def test_npy_pyarrayas1d_deprecation(self):
+        from numpy.core._multiarray_tests import npy_pyarrayas1d_deprecation
+        assert_raises(NotImplementedError, npy_pyarrayas1d_deprecation)
+
+
+class TestPyArray_AS2D(_DeprecationTestCase):
+    def test_npy_pyarrayas2d_deprecation(self):
+        from numpy.core._multiarray_tests import npy_pyarrayas2d_deprecation
+        assert_raises(NotImplementedError, npy_pyarrayas2d_deprecation)
+
+
 class Test_UPDATEIFCOPY(_DeprecationTestCase):
     """
     v1.14 deprecates creating an array with the UPDATEIFCOPY flag, use
@@ -514,17 +484,71 @@ class TestPositiveOnNonNumerical(_DeprecationTestCase):
     def test_positive_on_non_number(self):
         self.assert_deprecated(operator.pos, args=(np.array('foo'),))
 
+
 class TestFromstring(_DeprecationTestCase):
     # 2017-10-19, 1.14
     def test_fromstring(self):
         self.assert_deprecated(np.fromstring, args=('\x00'*80,))
 
+
+class TestFromStringAndFileInvalidData(_DeprecationTestCase):
+    # 2019-06-08, 1.17.0
+    # Tests should be moved to real tests when deprecation is done.
+    message = "string or file could not be read to its end"
+
+    @pytest.mark.parametrize("invalid_str", [",invalid_data", "invalid_sep"])
+    def test_deprecate_unparsable_data_file(self, invalid_str):
+        x = np.array([1.51, 2, 3.51, 4], dtype=float)
+
+        with tempfile.TemporaryFile(mode="w") as f:
+            x.tofile(f, sep=',', format='%.2f')
+            f.write(invalid_str)
+
+            f.seek(0)
+            self.assert_deprecated(lambda: np.fromfile(f, sep=","))
+            f.seek(0)
+            self.assert_deprecated(lambda: np.fromfile(f, sep=",", count=5))
+            # Should not raise:
+            with warnings.catch_warnings():
+                warnings.simplefilter("error", DeprecationWarning)
+                f.seek(0)
+                res = np.fromfile(f, sep=",", count=4)
+                assert_array_equal(res, x)
+
+    @pytest.mark.parametrize("invalid_str", [",invalid_data", "invalid_sep"])
+    def test_deprecate_unparsable_string(self, invalid_str):
+        x = np.array([1.51, 2, 3.51, 4], dtype=float)
+        x_str = "1.51,2,3.51,4{}".format(invalid_str)
+
+        self.assert_deprecated(lambda: np.fromstring(x_str, sep=","))
+        self.assert_deprecated(lambda: np.fromstring(x_str, sep=",", count=5))
+
+        # The C-level API can use not fixed size, but 0 terminated strings,
+        # so test that as well:
+        bytestr = x_str.encode("ascii")
+        self.assert_deprecated(lambda: fromstring_null_term_c_api(bytestr))
+
+        with assert_warns(DeprecationWarning):
+            # this is slightly strange, in that fromstring leaves data
+            # potentially uninitialized (would be good to error when all is
+            # read, but count is larger then actual data maybe).
+            res = np.fromstring(x_str, sep=",", count=5)
+            assert_array_equal(res[:-1], x)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", DeprecationWarning)
+
+            # Should not raise:
+            res = np.fromstring(x_str, sep=",", count=4)
+            assert_array_equal(res, x)
+
+
 class Test_GetSet_NumericOps(_DeprecationTestCase):
     # 2018-09-20, 1.16.0
     def test_get_numeric_ops(self):
         from numpy.core._multiarray_tests import getset_numericops
         self.assert_deprecated(getset_numericops, num=2)
-        
+
         # empty kwargs prevents any state actually changing which would break
         # other tests.
         self.assert_deprecated(np.set_numeric_ops, kwargs={})
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index f60eab696..d2fbbae5b 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -419,6 +419,31 @@ class TestRecord(object):
         assert_raises(ValueError, np.dtype,
                 {'formats': ['i4', 'i4'], 'f0': ('i4', 0), 'f1':('i4', 4)})
 
+    def test_fieldless_views(self):
+        a = np.zeros(2, dtype={'names':[], 'formats':[], 'offsets':[], 
+                               'itemsize':8})
+        assert_raises(ValueError, a.view, np.dtype([]))
+
+        d = np.dtype((np.dtype([]), 10))
+        assert_equal(d.shape, (10,))
+        assert_equal(d.itemsize, 0)
+        assert_equal(d.base, np.dtype([]))
+
+        arr = np.fromiter((() for i in range(10)), [])
+        assert_equal(arr.dtype, np.dtype([]))
+        assert_raises(ValueError, np.frombuffer, b'', dtype=[])
+        assert_equal(np.frombuffer(b'', dtype=[], count=2),
+                     np.empty(2, dtype=[]))
+
+        assert_raises(ValueError, np.dtype, ([], 'f8'))
+        assert_raises(ValueError, np.zeros(1, dtype='i4').view, [])
+
+        assert_equal(np.zeros(2, dtype=[]) == np.zeros(2, dtype=[]),
+                     np.ones(2, dtype=bool))
+
+        assert_equal(np.zeros((1, 2), dtype=[]) == a,
+                     np.ones((1, 2), dtype=bool))
+
 
 class TestSubarray(object):
     def test_single_subarray(self):
@@ -938,13 +963,6 @@ class TestDtypeAttributes(object):
         new_dtype = np.dtype(dtype.descr)
         assert_equal(new_dtype.itemsize, 16)
 
-    @pytest.mark.parametrize('t', np.typeDict.values())
-    def test_name_builtin(self, t):
-        name = t.__name__
-        if name.endswith('_'):
-            name = name[:-1]
-        assert_equal(np.dtype(t).name, name)
-
     def test_name_dtype_subclass(self):
         # Ticket #4357
         class user_def_subcls(np.void):
diff --git a/numpy/core/tests/test_function_base.py b/numpy/core/tests/test_function_base.py
index 6f5709372..84b60b19c 100644
--- a/numpy/core/tests/test_function_base.py
+++ b/numpy/core/tests/test_function_base.py
@@ -236,10 +236,7 @@ class TestLinspace(object):
     def test_corner(self):
         y = list(linspace(0, 1, 1))
         assert_(y == [0.0], y)
-        with suppress_warnings() as sup:
-            sup.filter(DeprecationWarning, ".*safely interpreted as an integer")
-            y = list(linspace(0, 1, 2.5))
-            assert_(y == [0.0, 1.0])
+        assert_raises(TypeError, linspace, 0, 1, num=2.5)
 
     def test_type(self):
         t1 = linspace(0, 1, 0).dtype
diff --git a/numpy/core/tests/test_longdouble.py b/numpy/core/tests/test_longdouble.py
index ee4197f8f..59ac5923c 100644
--- a/numpy/core/tests/test_longdouble.py
+++ b/numpy/core/tests/test_longdouble.py
@@ -5,7 +5,8 @@ import pytest
 
 import numpy as np
 from numpy.testing import (
-    assert_, assert_equal, assert_raises, assert_array_equal, temppath,
+    assert_, assert_equal, assert_raises, assert_warns, assert_array_equal,
+    temppath,
     )
 from numpy.core.tests._locales import CommaDecimalPointLocale
 
@@ -71,18 +72,21 @@ def test_fromstring():
 
 
 def test_fromstring_bogus():
-    assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "),
-                 np.array([1., 2., 3.]))
+    with assert_warns(DeprecationWarning):
+        assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "),
+                     np.array([1., 2., 3.]))
 
 
 def test_fromstring_empty():
-    assert_equal(np.fromstring("xxxxx", sep="x"),
-                 np.array([]))
+    with assert_warns(DeprecationWarning):
+        assert_equal(np.fromstring("xxxxx", sep="x"),
+                     np.array([]))
 
 
 def test_fromstring_missing():
-    assert_equal(np.fromstring("1xx3x4x5x6", sep="x"),
-                 np.array([1]))
+    with assert_warns(DeprecationWarning):
+        assert_equal(np.fromstring("1xx3x4x5x6", sep="x"),
+                     np.array([1]))
 
 
 class TestFileBased(object):
@@ -95,7 +99,9 @@ class TestFileBased(object):
         with temppath() as path:
             with open(path, 'wt') as f:
                 f.write("1. 2. 3. flop 4.\n")
-            res = np.fromfile(path, dtype=float, sep=" ")
+
+            with assert_warns(DeprecationWarning):
+                res = np.fromfile(path, dtype=float, sep=" ")
         assert_equal(res, np.array([1., 2., 3.]))
 
     @pytest.mark.skipif(string_to_longdouble_inaccurate,
@@ -186,12 +192,14 @@ class TestCommaDecimalPointLocale(CommaDecimalPointLocale):
         assert_equal(a[0], f)
 
     def test_fromstring_best_effort_float(self):
-        assert_equal(np.fromstring("1,234", dtype=float, sep=" "),
-                     np.array([1.]))
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1,234", dtype=float, sep=" "),
+                         np.array([1.]))
 
     def test_fromstring_best_effort(self):
-        assert_equal(np.fromstring("1,234", dtype=np.longdouble, sep=" "),
-                     np.array([1.]))
+        with assert_warns(DeprecationWarning):
+            assert_equal(np.fromstring("1,234", dtype=np.longdouble, sep=" "),
+                         np.array([1.]))
 
     def test_fromstring_foreign(self):
         s = "1.234"
@@ -204,8 +212,10 @@ class TestCommaDecimalPointLocale(CommaDecimalPointLocale):
         assert_array_equal(a, b)
 
     def test_fromstring_foreign_value(self):
-        b = np.fromstring("1,234", dtype=np.longdouble, sep=" ")
-        assert_array_equal(b[0], 1)
+        with assert_warns(DeprecationWarning):
+            b = np.fromstring("1,234", dtype=np.longdouble, sep=" ")
+            assert_array_equal(b[0], 1)
+
 
 @pytest.mark.parametrize("int_val", [
     # cases discussed in gh-10723
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 6a115f41b..9b124f603 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -44,7 +44,7 @@ from numpy.testing import (
     assert_, assert_raises, assert_warns, assert_equal, assert_almost_equal,
     assert_array_equal, assert_raises_regex, assert_array_almost_equal,
     assert_allclose, IS_PYPY, HAS_REFCOUNT, assert_array_less, runstring,
-    temppath, suppress_warnings, break_cycles, assert_raises_regex,
+    temppath, suppress_warnings, break_cycles,
     )
 from numpy.core.tests._locales import CommaDecimalPointLocale
 
@@ -114,7 +114,7 @@ class TestFlags(object):
         # Ensure that any base being writeable is sufficient to change flag;
         # this is especially interesting for arrays from an array interface.
         arr = np.arange(10)
-        
+
         class subclass(np.ndarray):
             pass
 
@@ -497,9 +497,6 @@ class TestArrayConstruction(object):
         assert_(np.ascontiguousarray(d).flags.c_contiguous)
         assert_(np.asfortranarray(d).flags.f_contiguous)
 
-    def test_ragged(self):
-        assert_raises_regex(ValueError, 'ragged',
-                             np.array, [[1], [2, 3]], dtype=int)
 
 class TestAssignment(object):
     def test_assignment_broadcasting(self):
@@ -2792,6 +2789,12 @@ class TestMethods(object):
         assert_equal(x1.flatten('F'), y1f)
         assert_equal(x1.flatten('F'), x1.T.flatten())
 
+    def test_flatten_invalid_order(self):
+        # invalid after gh-14596
+        for order in ['Z', 'c', False, True, 0, 8]:
+            x = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
+            assert_raises(ValueError, x.flatten, {"order": order})
+
     @pytest.mark.parametrize('func', (np.dot, np.matmul))
     def test_arr_mult(self, func):
         a = np.array([[1, 0], [0, 1]])
@@ -3967,13 +3970,13 @@ class TestPickling(object):
 
     def test_datetime64_byteorder(self):
         original = np.array([['2015-02-24T00:00:00.000000000']], dtype='datetime64[ns]')
-    
+
         original_byte_reversed = original.copy(order='K')
         original_byte_reversed.dtype = original_byte_reversed.dtype.newbyteorder('S')
         original_byte_reversed.byteswap(inplace=True)
 
         new = pickle.loads(pickle.dumps(original_byte_reversed))
-    
+
         assert_equal(original.dtype, new.dtype)
 
 
@@ -4590,18 +4593,26 @@ class TestTake(object):
         assert_equal(y, np.array([1, 2, 3]))
 
 class TestLexsort(object):
-    def test_basic(self):
-        a = [1, 2, 1, 3, 1, 5]
-        b = [0, 4, 5, 6, 2, 3]
+    @pytest.mark.parametrize('dtype',[
+        np.uint8, np.uint16, np.uint32, np.uint64,
+        np.int8, np.int16, np.int32, np.int64,
+        np.float16, np.float32, np.float64
+    ])
+    def test_basic(self, dtype):
+        a = np.array([1, 2, 1, 3, 1, 5], dtype=dtype)
+        b = np.array([0, 4, 5, 6, 2, 3], dtype=dtype)
         idx = np.lexsort((b, a))
         expected_idx = np.array([0, 4, 2, 1, 3, 5])
         assert_array_equal(idx, expected_idx)
+        assert_array_equal(a[idx], np.sort(a))
 
-        x = np.vstack((b, a))
-        idx = np.lexsort(x)
-        assert_array_equal(idx, expected_idx)
+    def test_mixed(self):
+        a = np.array([1, 2, 1, 3, 1, 5])
+        b = np.array([0, 4, 5, 6, 2, 3], dtype='datetime64[D]')
 
-        assert_array_equal(x[1][idx], np.sort(x[1]))
+        idx = np.lexsort((b, a))
+        expected_idx = np.array([0, 4, 2, 1, 3, 5])
+        assert_array_equal(idx, expected_idx)
 
     def test_datetime(self):
         a = np.array([0,0,0], dtype='datetime64[D]')
@@ -4868,7 +4879,7 @@ class TestIO(object):
             offset_bytes = self.dtype.itemsize
             z = np.fromfile(f, dtype=self.dtype, offset=offset_bytes)
             assert_array_equal(z, self.x.flat[offset_items+count_items+1:])
-        
+
         with open(self.filename, 'wb') as f:
             self.x.tofile(f, sep=",")
 
@@ -4958,7 +4969,8 @@ class TestIO(object):
         self._check_from(b'1,2,3,4', [1., 2., 3., 4.], dtype=float, sep=',')
 
     def test_malformed(self):
-        self._check_from(b'1.234 1,234', [1.234, 1.], sep=' ')
+        with assert_warns(DeprecationWarning):
+            self._check_from(b'1.234 1,234', [1.234, 1.], sep=' ')
 
     def test_long_sep(self):
         self._check_from(b'1_x_3_x_4_x_5', [1, 3, 4, 5], sep='_x_')
@@ -5011,6 +5023,19 @@ class TestIO(object):
             self.test_tofile_sep()
             self.test_tofile_format()
 
+    def test_fromfile_subarray_binary(self):
+        # Test subarray dtypes which are absorbed into the shape
+        x = np.arange(24, dtype="i4").reshape(2, 3, 4)
+        x.tofile(self.filename)
+        res = np.fromfile(self.filename, dtype="(3,4)i4")
+        assert_array_equal(x, res)
+
+        x_str = x.tobytes()
+        with assert_warns(DeprecationWarning):
+            # binary fromstring is deprecated
+            res = np.fromstring(x_str, dtype="(3,4)i4")
+            assert_array_equal(x, res)
+
 
 class TestFromBuffer(object):
     @pytest.mark.parametrize('byteorder', ['<', '>'])
@@ -6225,14 +6250,14 @@ class TestMatmul(MatmulCommon):
 
         r3 = np.matmul(args[0].copy(), args[1].copy())
         assert_equal(r1, r3)
-    
+
     def test_matmul_object(self):
         import fractions
 
         f = np.vectorize(fractions.Fraction)
         def random_ints():
             return np.random.randint(1, 1000, size=(10, 3, 3))
-        M1 = f(random_ints(), random_ints()) 
+        M1 = f(random_ints(), random_ints())
         M2 = f(random_ints(), random_ints())
 
         M3 = self.matmul(M1, M2)
@@ -6271,6 +6296,23 @@ class TestMatmul(MatmulCommon):
         with assert_raises(TypeError):
             b = np.matmul(a, a)
 
+    def test_matmul_bool(self):
+        # gh-14439
+        a = np.array([[1, 0],[1, 1]], dtype=bool)
+        assert np.max(a.view(np.uint8)) == 1
+        b = np.matmul(a, a)
+        # matmul with boolean output should always be 0, 1
+        assert np.max(b.view(np.uint8)) == 1
+
+        rg = np.random.default_rng(np.random.PCG64(43))
+        d = rg.integers(2, size=4*5, dtype=np.int8)
+        d = d.reshape(4, 5) > 0
+        out1 = np.matmul(d, d.reshape(5, 4))
+        out2 = np.dot(d, d.reshape(5, 4))
+        assert_equal(out1, out2)
+
+        c = np.matmul(np.zeros((2, 0), dtype=bool), np.zeros(0, dtype=bool))
+        assert not np.any(c)
 
 
 if sys.version_info[:2] >= (3, 5):
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 3e85054b7..1358b45e9 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -1341,6 +1341,11 @@ class TestBinaryRepr(object):
             exp = '1' + (width - 1) * '0'
             assert_equal(np.binary_repr(num, width=width), exp)
 
+    def test_large_neg_int64(self):
+        # See gh-14289.
+        assert_equal(np.binary_repr(np.int64(-2**62), width=64),
+                     '11' + '0'*62)
+
 
 class TestBaseRepr(object):
     def test_base3(self):
@@ -2578,6 +2583,30 @@ class TestConvolve(object):
 
 
 class TestArgwhere(object):
+
+    @pytest.mark.parametrize('nd', [0, 1, 2])
+    def test_nd(self, nd):
+        # get an nd array with multiple elements in every dimension
+        x = np.empty((2,)*nd, bool)
+
+        # none
+        x[...] = False
+        assert_equal(np.argwhere(x).shape, (0, nd))
+
+        # only one
+        x[...] = False
+        x.flat[0] = True
+        assert_equal(np.argwhere(x).shape, (1, nd))
+
+        # all but one
+        x[...] = True
+        x.flat[0] = False
+        assert_equal(np.argwhere(x).shape, (x.size - 1, nd))
+
+        # all
+        x[...] = True
+        assert_equal(np.argwhere(x).shape, (x.size, nd))
+
     def test_2D(self):
         x = np.arange(6).reshape((2, 3))
         assert_array_equal(np.argwhere(x > 1),
diff --git a/numpy/core/tests/test_numerictypes.py b/numpy/core/tests/test_numerictypes.py
index d0ff5578a..387740e35 100644
--- a/numpy/core/tests/test_numerictypes.py
+++ b/numpy/core/tests/test_numerictypes.py
@@ -498,3 +498,32 @@ class TestDocStrings(object):
             assert_('int64' in np.int_.__doc__)
         elif np.int64 is np.longlong:
             assert_('int64' in np.longlong.__doc__)
+
+
+class TestScalarTypeNames:
+    # gh-9799
+
+    numeric_types = [
+        np.byte, np.short, np.intc, np.int_, np.longlong,
+        np.ubyte, np.ushort, np.uintc, np.uint, np.ulonglong,
+        np.half, np.single, np.double, np.longdouble,
+        np.csingle, np.cdouble, np.clongdouble,
+    ]
+
+    def test_names_are_unique(self):
+        # none of the above may be aliases for each other
+        assert len(set(self.numeric_types)) == len(self.numeric_types)
+
+        # names must be unique
+        names = [t.__name__ for t in self.numeric_types]
+        assert len(set(names)) == len(names)
+
+    @pytest.mark.parametrize('t', numeric_types)
+    def test_names_reflect_attributes(self, t):
+        """ Test that names correspond to where the type is under ``np.`` """
+        assert getattr(np, t.__name__) is t
+
+    @pytest.mark.parametrize('t', numeric_types)
+    def test_names_are_undersood_by_dtype(self, t):
+        """ Test the dtype constructor maps names back to the type """
+        assert np.dtype(t.__name__).type is t
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index 14413224e..c1b794145 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -444,6 +444,48 @@ class TestRecord(object):
         ]
         arr = np.rec.fromarrays(arrays)  # ValueError?
 
+    @pytest.mark.parametrize('nfields', [0, 1, 2])
+    def test_assign_dtype_attribute(self, nfields):
+        dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)][:nfields])
+        data = np.zeros(3, dt).view(np.recarray)
+
+        # the original and resulting dtypes differ on whether they are records
+        assert data.dtype.type == np.record
+        assert dt.type != np.record
+
+        # ensure that the dtype remains a record even when assigned
+        data.dtype = dt
+        assert data.dtype.type == np.record
+
+    @pytest.mark.parametrize('nfields', [0, 1, 2])
+    def test_nested_fields_are_records(self, nfields):
+        """ Test that nested structured types are treated as records too """
+        dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)][:nfields])
+        dt_outer = np.dtype([('inner', dt)])
+
+        data = np.zeros(3, dt_outer).view(np.recarray)
+        assert isinstance(data, np.recarray)
+        assert isinstance(data['inner'], np.recarray)
+
+        data0 = data[0]
+        assert isinstance(data0, np.record)
+        assert isinstance(data0['inner'], np.record)
+
+    def test_nested_dtype_padding(self):
+        """ test that trailing padding is preserved """
+        # construct a dtype with padding at the end
+        dt = np.dtype([('a', np.uint8), ('b', np.uint8), ('c', np.uint8)])
+        dt_padded_end = dt[['a', 'b']]
+        assert dt_padded_end.itemsize == dt.itemsize
+
+        dt_outer = np.dtype([('inner', dt_padded_end)])
+
+        data = np.zeros(3, dt_outer).view(np.recarray)
+        assert_equal(data['inner'].dtype, dt_padded_end)
+
+        data0 = data[0]
+        assert_equal(data0['inner'].dtype, dt_padded_end)
+
 
 def test_find_duplicate():
     l1 = [1, 2, 3, 4, 5, 6]
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index e564ae300..9dc231deb 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -436,6 +436,32 @@ class TestRegression(object):
 
         assert_raises(KeyError, np.lexsort, BuggySequence())
 
+    def test_lexsort_zerolen_custom_strides(self):
+        # Ticket #14228
+        xs = np.array([], dtype='i8')
+        assert xs.strides == (8,)
+        assert np.lexsort((xs,)).shape[0] == 0 # Works
+
+        xs.strides = (16,)
+        assert np.lexsort((xs,)).shape[0] == 0 # Was: MemoryError
+
+    def test_lexsort_zerolen_custom_strides_2d(self):
+        xs = np.array([], dtype='i8')
+
+        xs.shape = (0, 2)
+        xs.strides = (16, 16)
+        assert np.lexsort((xs,), axis=0).shape[0] == 0
+
+        xs.shape = (2, 0)
+        xs.strides = (16, 16)
+        assert np.lexsort((xs,), axis=0).shape[0] == 2
+
+    def test_lexsort_zerolen_element(self):
+        dt = np.dtype([])  # a void dtype with no fields
+        xs = np.empty(4, dt)
+
+        assert np.lexsort((xs,)).shape[0] == xs.shape[0]
+
     def test_pickle_py2_bytes_encoding(self):
         # Check that arrays and scalars pickled on Py2 are
         # unpickleable on Py3 using encoding='bytes'
@@ -468,7 +494,7 @@ class TestRegression(object):
                 result = pickle.loads(data, encoding='bytes')
                 assert_equal(result, original)
 
-                if isinstance(result, np.ndarray) and result.dtype.names:
+                if isinstance(result, np.ndarray) and result.dtype.names is not None:
                     for name in result.dtype.names:
                         assert_(isinstance(name, str))
 
@@ -1513,7 +1539,8 @@ class TestRegression(object):
 
     def test_fromstring_crash(self):
         # Ticket #1345: the following should not cause a crash
-        np.fromstring(b'aa, aa, 1.0', sep=',')
+        with assert_warns(DeprecationWarning):
+            np.fromstring(b'aa, aa, 1.0', sep=',')
 
     def test_ticket_1539(self):
         dtypes = [x for x in np.typeDict.values()
@@ -2474,3 +2501,13 @@ class TestRegression(object):
         t = T()
         #gh-13659, would raise in broadcasting [x=t for x in result]
         np.array([t])
+
+    @pytest.mark.skipif(sys.maxsize < 2 ** 31 + 1, reason='overflows 32-bit python')
+    @pytest.mark.skipif(sys.platform == 'win32' and sys.version_info[:2] < (3, 8),
+                        reason='overflows on windows, fixed in bpo-16865')
+    def test_to_ctypes(self):
+        #gh-14214
+        arr = np.zeros((2 ** 31 + 1,), 'b')
+        assert arr.size * arr.itemsize > 2 ** 31
+        c_arr = np.ctypeslib.as_ctypes(arr)
+        assert_equal(c_arr._length_, arr.size)
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index ebba457e3..854df5590 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -664,3 +664,31 @@ class TestAbs(object):
 
     def test_numpy_abs(self):
         self._test_abs_func(np.abs)
+
+
+class TestBitShifts(object):
+
+    @pytest.mark.parametrize('type_code', np.typecodes['AllInteger'])
+    @pytest.mark.parametrize('op',
+        [operator.rshift, operator.lshift], ids=['>>', '<<'])
+    def test_shift_all_bits(self, type_code, op):
+        """ Shifts where the shift amount is the width of the type or wider """
+        # gh-2449
+        dt = np.dtype(type_code)
+        nbits = dt.itemsize * 8
+        for val in [5, -5]:
+            for shift in [nbits, nbits + 4]:
+                val_scl = dt.type(val)
+                shift_scl = dt.type(shift)
+                res_scl = op(val_scl, shift_scl)
+                if val_scl < 0 and op is operator.rshift:
+                    # sign bit is preserved
+                    assert_equal(res_scl, -1)
+                else:
+                    assert_equal(res_scl, 0)
+
+                # Result on scalars should be the same as on arrays
+                val_arr = np.array([val]*32, dtype=dt)
+                shift_arr = np.array([shift]*32, dtype=dt)
+                res_arr = op(val_arr, shift_arr)
+                assert_equal(res_arr, res_scl)
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index ef48fed05..9b4ce9e47 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -75,11 +75,9 @@ class TestOut(object):
             assert_(r1 is o1)
             assert_(r2 is o2)
 
-            with warnings.catch_warnings(record=True) as w:
-                warnings.filterwarnings('always', '', DeprecationWarning)
+            with assert_raises(TypeError):
+                # Out argument must be tuple, since there are multiple outputs.
                 r1, r2 = np.frexp(d, out=o1, subok=subok)
-                assert_(r1 is o1)
-                assert_(w[0].category is DeprecationWarning)
 
             assert_raises(ValueError, np.add, a, 2, o, o, subok=subok)
             assert_raises(ValueError, np.add, a, 2, o, out=o, subok=subok)
@@ -165,14 +163,9 @@ class TestOut(object):
             else:
                 assert_(type(r1) == np.ndarray)
 
-            with warnings.catch_warnings(record=True) as w:
-                warnings.filterwarnings('always', '', DeprecationWarning)
+            with assert_raises(TypeError):
+                # Out argument must be tuple, since there are multiple outputs.
                 r1, r2 = np.frexp(d, out=o1, subok=subok)
-                if subok:
-                    assert_(isinstance(r2, ArrayWrap))
-                else:
-                    assert_(type(r2) == np.ndarray)
-                assert_(w[0].category is DeprecationWarning)
 
 
 class TestComparisons(object):
@@ -694,8 +687,96 @@ class TestSpecialFloats(object):
             assert_raises(FloatingPointError, np.cos, np.float32(-np.inf))
             assert_raises(FloatingPointError, np.cos, np.float32(np.inf))
 
+    def test_sqrt_values(self):
+        with np.errstate(all='ignore'):
+            x = [np.nan,  np.nan, np.inf, np.nan, 0.]
+            y = [np.nan, -np.nan, np.inf, -np.inf, 0.]
+            for dt in ['f', 'd', 'g']:
+                xf = np.array(x, dtype=dt)
+                yf = np.array(y, dtype=dt)
+                assert_equal(np.sqrt(yf), xf)
+
+        #with np.errstate(invalid='raise'):
+        #    for dt in ['f', 'd', 'g']:
+        #        assert_raises(FloatingPointError, np.sqrt, np.array(-100., dtype=dt))
 
-class TestSIMDFloat32(object):
+    def test_abs_values(self):
+        x = [np.nan,  np.nan, np.inf, np.inf, 0., 0., 1.0, 1.0]
+        y = [np.nan, -np.nan, np.inf, -np.inf, 0., -0., -1.0, 1.0]
+        for dt in ['f', 'd', 'g']:
+            xf = np.array(x, dtype=dt)
+            yf = np.array(y, dtype=dt)
+            assert_equal(np.abs(yf), xf)
+
+    def test_square_values(self):
+        x = [np.nan,  np.nan, np.inf, np.inf]
+        y = [np.nan, -np.nan, np.inf, -np.inf]
+        with np.errstate(all='ignore'):
+            for dt in ['f', 'd', 'g']:
+                xf = np.array(x, dtype=dt)
+                yf = np.array(y, dtype=dt)
+                assert_equal(np.square(yf), xf)
+
+        with np.errstate(over='raise'):
+            assert_raises(FloatingPointError, np.square, np.array(1E32,  dtype='f'))
+            assert_raises(FloatingPointError, np.square, np.array(1E200, dtype='d'))
+
+    def test_reciprocal_values(self):
+        with np.errstate(all='ignore'):
+            x = [np.nan,  np.nan, 0.0, -0.0, np.inf, -np.inf]
+            y = [np.nan, -np.nan, np.inf, -np.inf, 0., -0.]
+            for dt in ['f', 'd', 'g']:
+                xf = np.array(x, dtype=dt)
+                yf = np.array(y, dtype=dt)
+                assert_equal(np.reciprocal(yf), xf)
+
+        with np.errstate(divide='raise'):
+            for dt in ['f', 'd', 'g']:
+                assert_raises(FloatingPointError, np.reciprocal, np.array(-0.0, dtype=dt))
+
+# func : [maxulperror, low, high]
+avx_ufuncs = {'sqrt'        :[1,  0.,   100.],
+              'absolute'    :[0, -100., 100.],
+              'reciprocal'  :[1,  1.,   100.],
+              'square'      :[1, -100., 100.],
+              'rint'        :[0, -100., 100.],
+              'floor'       :[0, -100., 100.],
+              'ceil'        :[0, -100., 100.],
+              'trunc'       :[0, -100., 100.]}
+
+class TestAVXUfuncs(object):
+    def test_avx_based_ufunc(self):
+        strides = np.array([-4,-3,-2,-1,1,2,3,4])
+        np.random.seed(42)
+        for func, prop in avx_ufuncs.items():
+            maxulperr = prop[0]
+            minval = prop[1]
+            maxval = prop[2]
+            # various array sizes to ensure masking in AVX is tested
+            for size in range(1,32):
+                myfunc = getattr(np, func)
+                x_f32 = np.float32(np.random.uniform(low=minval, high=maxval,
+                    size=size))
+                x_f64 = np.float64(x_f32)
+                x_f128 = np.longdouble(x_f32)
+                y_true128 = myfunc(x_f128)
+                if maxulperr == 0:
+                    assert_equal(myfunc(x_f32), np.float32(y_true128))
+                    assert_equal(myfunc(x_f64), np.float64(y_true128))
+                else:
+                    assert_array_max_ulp(myfunc(x_f32), np.float32(y_true128),
+                            maxulp=maxulperr)
+                    assert_array_max_ulp(myfunc(x_f64), np.float64(y_true128),
+                            maxulp=maxulperr)
+                # various strides to test gather instruction
+                if size > 1:
+                    y_true32 = myfunc(x_f32)
+                    y_true64 = myfunc(x_f64)
+                    for jj in strides:
+                        assert_equal(myfunc(x_f64[::jj]), y_true64[::jj])
+                        assert_equal(myfunc(x_f32[::jj]), y_true32[::jj])
+
+class TestAVXFloat32Transcendental(object):
     def test_exp_float32(self):
         np.random.seed(42)
         x_f32 = np.float32(np.random.uniform(low=0.0,high=88.1,size=1000000))
@@ -722,8 +803,8 @@ class TestSIMDFloat32(object):
 
     def test_strided_float32(self):
         np.random.seed(42)
-        strides = np.random.randint(low=-100, high=100, size=100)
-        sizes = np.random.randint(low=1, high=2000, size=100)
+        strides = np.array([-4,-3,-2,-1,1,2,3,4])
+        sizes = np.arange(2,100)
         for ii in sizes:
             x_f32 = np.float32(np.random.uniform(low=0.01,high=88.1,size=ii))
             exp_true = np.exp(x_f32)
@@ -2161,10 +2242,9 @@ class TestSpecialMethods(object):
         assert_(np.modf(a, None) == {})
         assert_(np.modf(a, None, None) == {})
         assert_(np.modf(a, out=(None, None)) == {})
-        with warnings.catch_warnings(record=True) as w:
-            warnings.filterwarnings('always', '', DeprecationWarning)
-            assert_(np.modf(a, out=None) == {})
-            assert_(w[0].category is DeprecationWarning)
+        with assert_raises(TypeError):
+            # Out argument must be tuple, since there are multiple outputs.
+            np.modf(a, out=None)
 
         # don't give positional and output argument, or too many arguments.
         # wrong number of arguments in the tuple is an error too.
diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py
index fcbed0dd3..0bab04df2 100644
--- a/numpy/core/tests/test_umath_accuracy.py
+++ b/numpy/core/tests/test_umath_accuracy.py
@@ -35,7 +35,8 @@ class TestAccuracy(object):
             for filename in files:
                 data_dir = path.join(path.dirname(__file__), 'data')
                 filepath = path.join(data_dir, filename)
-                file_without_comments = (r for r in open(filepath) if not r[0] in ('$', '#'))
+                with open(filepath) as fid:
+                    file_without_comments = (r for r in fid if not r[0] in ('$', '#'))
                 data = np.genfromtxt(file_without_comments,
                                      dtype=('|S39','|S39','|S39',np.int),
                                      names=('type','input','output','ulperr'),
diff --git a/numpy/ctypeslib.py b/numpy/ctypeslib.py
index 1f842d003..58f3ef9d3 100644
--- a/numpy/ctypeslib.py
+++ b/numpy/ctypeslib.py
@@ -92,11 +92,11 @@ else:
     # Adapted from Albert Strasheim
     def load_library(libname, loader_path):
         """
-        It is possible to load a library using 
+        It is possible to load a library using
         >>> lib = ctypes.cdll[<full_path_name>] # doctest: +SKIP
 
         But there are cross-platform considerations, such as library file extensions,
-        plus the fact Windows will just load the first library it finds with that name.  
+        plus the fact Windows will just load the first library it finds with that name.
         NumPy supplies the load_library function as a convenience.
 
         Parameters
@@ -110,12 +110,12 @@ else:
         Returns
         -------
         ctypes.cdll[libpath] : library object
-           A ctypes library object 
+           A ctypes library object
 
         Raises
         ------
         OSError
-            If there is no library with the expected extension, or the 
+            If there is no library with the expected extension, or the
             library is defective and cannot be loaded.
         """
         if ctypes.__version__ < '1.0.1':
@@ -321,7 +321,7 @@ def ndpointer(dtype=None, ndim=None, shape=None, flags=None):
     # produce a name for the new type
     if dtype is None:
         name = 'any'
-    elif dtype.names:
+    elif dtype.names is not None:
         name = str(id(dtype))
     else:
         name = dtype.str
@@ -535,7 +535,10 @@ if ctypes is not None:
         if readonly:
             raise TypeError("readonly arrays unsupported")
 
-        dtype = _dtype((ai["typestr"], ai["shape"]))
-        result = as_ctypes_type(dtype).from_address(addr)
+        # can't use `_dtype((ai["typestr"], ai["shape"]))` here, as it overflows
+        # dtype.itemsize (gh-14214)
+        ctype_scalar = as_ctypes_type(ai["typestr"])
+        result_type = _ctype_ndarray(ctype_scalar, ai["shape"])
+        result = result_type.from_address(addr)
         result.__keep = obj
         return result
diff --git a/numpy/distutils/__init__.py b/numpy/distutils/__init__.py
index 55514750e..8dbb63b28 100644
--- a/numpy/distutils/__init__.py
+++ b/numpy/distutils/__init__.py
@@ -1,12 +1,31 @@
+"""
+An enhanced distutils, providing support for Fortran compilers, for BLAS,
+LAPACK and other common libraries for numerical computing, and more.
+
+Public submodules are::
+
+    misc_util
+    system_info
+    cpu_info
+    log
+    exec_command
+
+For details, please see the *Packaging* and *NumPy Distutils User Guide*
+sections of the NumPy Reference Guide.
+
+For configuring the preference for and location of libraries like BLAS and
+LAPACK, and for setting include paths and similar build options, please see
+``site.cfg.example`` in the root of the NumPy repository or sdist.
+
+"""
+
 from __future__ import division, absolute_import, print_function
 
-from .__version__ import version as __version__
 # Must import local ccompiler ASAP in order to get
 # customized CCompiler.spawn effective.
 from . import ccompiler
 from . import unixccompiler
 
-from .info import __doc__
 from .npy_pkg_config import *
 
 # If numpy is installed, add distutils.test()
@@ -28,7 +47,7 @@ def customized_fcompiler(plat=None, compiler=None):
     c.customize()
     return c
 
-def customized_ccompiler(plat=None, compiler=None):
-    c = ccompiler.new_compiler(plat=plat, compiler=compiler)
+def customized_ccompiler(plat=None, compiler=None, verbose=1):
+    c = ccompiler.new_compiler(plat=plat, compiler=compiler, verbose=verbose)
     c.customize('')
     return c
diff --git a/numpy/distutils/__version__.py b/numpy/distutils/__version__.py
deleted file mode 100644
index 969decbba..000000000
--- a/numpy/distutils/__version__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from __future__ import division, absolute_import, print_function
-
-major = 0
-minor = 4
-micro = 0
-version = '%(major)d.%(minor)d.%(micro)d' % (locals())
diff --git a/numpy/distutils/ccompiler.py b/numpy/distutils/ccompiler.py
index 14451fa66..643879023 100644
--- a/numpy/distutils/ccompiler.py
+++ b/numpy/distutils/ccompiler.py
@@ -140,7 +140,10 @@ def CCompiler_spawn(self, cmd, display=None):
             display = ' '.join(list(display))
     log.info(display)
     try:
-        subprocess.check_output(cmd)
+        if self.verbose:
+            subprocess.check_output(cmd)
+        else:
+            subprocess.check_output(cmd, stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as exc:
         o = exc.output
         s = exc.returncode
@@ -162,7 +165,8 @@ def CCompiler_spawn(self, cmd, display=None):
     if is_sequence(cmd):
         cmd = ' '.join(list(cmd))
 
-    forward_bytes_to_stdout(o)
+    if self.verbose:
+        forward_bytes_to_stdout(o)
 
     if re.search(b'Too many open files', o):
         msg = '\nTry rerunning setup command until build succeeds.'
@@ -727,10 +731,12 @@ if sys.platform == 'win32':
 _distutils_new_compiler = new_compiler
 def new_compiler (plat=None,
                   compiler=None,
-                  verbose=0,
+                  verbose=None,
                   dry_run=0,
                   force=0):
     # Try first C compilers from numpy.distutils.
+    if verbose is None:
+        verbose = log.get_threshold() <= log.INFO
     if plat is None:
         plat = os.name
     try:
@@ -763,6 +769,7 @@ def new_compiler (plat=None,
         raise DistutilsModuleError(("can't compile C/C++ code: unable to find class '%s' " +
                "in module '%s'") % (class_name, module_name))
     compiler = klass(None, dry_run, force)
+    compiler.verbose = verbose
     log.debug('new_compiler returns %s' % (klass))
     return compiler
 
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index 3d7101582..5a9da1217 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -16,8 +16,8 @@ class build(old_build):
     user_options = old_build.user_options + [
         ('fcompiler=', None,
          "specify the Fortran compiler type"),
-        ('parallel=', 'j',
-         "number of parallel jobs"),
+        ('warn-error', None,
+         "turn all warnings into errors (-Werror)"),
         ]
 
     help_options = old_build.help_options + [
@@ -28,17 +28,12 @@ class build(old_build):
     def initialize_options(self):
         old_build.initialize_options(self)
         self.fcompiler = None
-        self.parallel = None
+        self.warn_error = False
 
     def finalize_options(self):
-        if self.parallel:
-            try:
-                self.parallel = int(self.parallel)
-            except ValueError:
-                raise ValueError("--parallel/-j argument must be an integer")
         build_scripts = self.build_scripts
         old_build.finalize_options(self)
-        plat_specifier = ".%s-%s" % (get_platform(), sys.version[0:3])
+        plat_specifier = ".{}-{}.{}".format(get_platform(), *sys.version_info[:2])
         if build_scripts is None:
             self.build_scripts = os.path.join(self.build_base,
                                               'scripts' + plat_specifier)
diff --git a/numpy/distutils/command/build_clib.py b/numpy/distutils/command/build_clib.py
index 910493a77..13edf0717 100644
--- a/numpy/distutils/command/build_clib.py
+++ b/numpy/distutils/command/build_clib.py
@@ -33,15 +33,18 @@ class build_clib(old_build_clib):
         ('inplace', 'i', 'Build in-place'),
         ('parallel=', 'j',
          "number of parallel jobs"),
+        ('warn-error', None,
+         "turn all warnings into errors (-Werror)"),
     ]
 
-    boolean_options = old_build_clib.boolean_options + ['inplace']
+    boolean_options = old_build_clib.boolean_options + ['inplace', 'warn-error']
 
     def initialize_options(self):
         old_build_clib.initialize_options(self)
         self.fcompiler = None
         self.inplace = 0
         self.parallel = None
+        self.warn_error = None
 
     def finalize_options(self):
         if self.parallel:
@@ -50,7 +53,10 @@ class build_clib(old_build_clib):
             except ValueError:
                 raise ValueError("--parallel/-j argument must be an integer")
         old_build_clib.finalize_options(self)
-        self.set_undefined_options('build', ('parallel', 'parallel'))
+        self.set_undefined_options('build',
+                                        ('parallel', 'parallel'),
+                                        ('warn_error', 'warn_error'),
+                                  )
 
     def have_f_sources(self):
         for (lib_name, build_info) in self.libraries:
@@ -86,6 +92,10 @@ class build_clib(old_build_clib):
         self.compiler.customize(self.distribution,
                                 need_cxx=self.have_cxx_sources())
 
+        if self.warn_error:
+            self.compiler.compiler.append('-Werror')
+            self.compiler.compiler_so.append('-Werror')
+
         libraries = self.libraries
         self.libraries = None
         self.compiler.customize_cmd(self)
diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py
index ef54fb25e..cd9b1c6f1 100644
--- a/numpy/distutils/command/build_ext.py
+++ b/numpy/distutils/command/build_ext.py
@@ -33,6 +33,8 @@ class build_ext (old_build_ext):
          "specify the Fortran compiler type"),
         ('parallel=', 'j',
          "number of parallel jobs"),
+        ('warn-error', None,
+         "turn all warnings into errors (-Werror)"),
     ]
 
     help_options = old_build_ext.help_options + [
@@ -40,10 +42,13 @@ class build_ext (old_build_ext):
          show_fortran_compilers),
     ]
 
+    boolean_options = old_build_ext.boolean_options + ['warn-error']
+
     def initialize_options(self):
         old_build_ext.initialize_options(self)
         self.fcompiler = None
         self.parallel = None
+        self.warn_error = None
 
     def finalize_options(self):
         if self.parallel:
@@ -69,7 +74,10 @@ class build_ext (old_build_ext):
         self.include_dirs.extend(incl_dirs)
 
         old_build_ext.finalize_options(self)
-        self.set_undefined_options('build', ('parallel', 'parallel'))
+        self.set_undefined_options('build',
+                                        ('parallel', 'parallel'),
+                                        ('warn_error', 'warn_error'),
+                                  )
 
     def run(self):
         if not self.extensions:
@@ -116,6 +124,11 @@ class build_ext (old_build_ext):
                                      force=self.force)
         self.compiler.customize(self.distribution)
         self.compiler.customize_cmd(self)
+
+        if self.warn_error:
+            self.compiler.compiler.append('-Werror')
+            self.compiler.compiler_so.append('-Werror')
+
         self.compiler.show_customization()
 
         # Setup directory for storing generated extra DLL files on Windows
diff --git a/numpy/distutils/command/build_src.py b/numpy/distutils/command/build_src.py
index 41bb01da5..3e0522c5f 100644
--- a/numpy/distutils/command/build_src.py
+++ b/numpy/distutils/command/build_src.py
@@ -53,9 +53,12 @@ class build_src(build_ext.build_ext):
         ('inplace', 'i',
          "ignore build-lib and put compiled extensions into the source " +
          "directory alongside your pure Python modules"),
+        ('verbose-cfg', None,
+         "change logging level from WARN to INFO which will show all " +
+         "compiler output")
         ]
 
-    boolean_options = ['force', 'inplace']
+    boolean_options = ['force', 'inplace', 'verbose-cfg']
 
     help_options = []
 
@@ -76,6 +79,7 @@ class build_src(build_ext.build_ext):
         self.swig_opts = None
         self.swig_cpp = None
         self.swig = None
+        self.verbose_cfg = None
 
     def finalize_options(self):
         self.set_undefined_options('build',
@@ -90,7 +94,7 @@ class build_src(build_ext.build_ext):
         self.data_files = self.distribution.data_files or []
 
         if self.build_src is None:
-            plat_specifier = ".%s-%s" % (get_platform(), sys.version[0:3])
+            plat_specifier = ".{}-{}.{}".format(get_platform(), *sys.version_info[:2])
             self.build_src = os.path.join(self.build_base, 'src'+plat_specifier)
 
         # py_modules_dict is used in build_py.find_package_modules
@@ -365,6 +369,13 @@ class build_src(build_ext.build_ext):
             build_dir = os.path.join(*([self.build_src]
                                        +name.split('.')[:-1]))
         self.mkpath(build_dir)
+
+        if self.verbose_cfg:
+            new_level = log.INFO
+        else:
+            new_level = log.WARN
+        old_level = log.set_threshold(new_level)
+
         for func in func_sources:
             source = func(extension, build_dir)
             if not source:
@@ -375,7 +386,7 @@ class build_src(build_ext.build_ext):
             else:
                 log.info("  adding '%s' to sources." % (source,))
                 new_sources.append(source)
-
+        log.set_threshold(old_level)
         return new_sources
 
     def filter_py_files(self, sources):
diff --git a/numpy/distutils/extension.py b/numpy/distutils/extension.py
index 935f3eec9..872bd5362 100644
--- a/numpy/distutils/extension.py
+++ b/numpy/distutils/extension.py
@@ -19,8 +19,24 @@ if sys.version_info[0] >= 3:
 cxx_ext_re = re.compile(r'.*[.](cpp|cxx|cc)\Z', re.I).match
 fortran_pyf_ext_re = re.compile(r'.*[.](f90|f95|f77|for|ftn|f|pyf)\Z', re.I).match
 
+
 class Extension(old_Extension):
-    def __init__ (
+    """
+    Parameters
+    ----------
+    name : str
+        Extension name.
+    sources : list of str
+        List of source file locations relative to the top directory of
+        the package.
+    extra_compile_args : list of str
+        Extra command line arguments to pass to the compiler.
+    extra_f77_compile_args : list of str
+        Extra command line arguments to pass to the fortran77 compiler.
+    extra_f90_compile_args : list of str
+        Extra command line arguments to pass to the fortran90 compiler.
+    """
+    def __init__(
             self, name, sources,
             include_dirs=None,
             define_macros=None,
diff --git a/numpy/distutils/fcompiler/environment.py b/numpy/distutils/fcompiler/environment.py
index 73a5e98e1..bb362d483 100644
--- a/numpy/distutils/fcompiler/environment.py
+++ b/numpy/distutils/fcompiler/environment.py
@@ -59,17 +59,13 @@ class EnvironmentConfig(object):
             if envvar_contents is not None:
                 envvar_contents = convert(envvar_contents)
                 if var and append:
-                    if os.environ.get('NPY_DISTUTILS_APPEND_FLAGS', '0') == '1':
+                    if os.environ.get('NPY_DISTUTILS_APPEND_FLAGS', '1') == '1':
                         var.extend(envvar_contents)
                     else:
+                        # NPY_DISTUTILS_APPEND_FLAGS was explicitly set to 0
+                        # to keep old (overwrite flags rather than append to
+                        # them) behavior
                         var = envvar_contents
-                        if 'NPY_DISTUTILS_APPEND_FLAGS' not in os.environ.keys():
-                            msg = "{} is used as is, not appended ".format(envvar) + \
-                                  "to flags already defined " + \
-                                  "by numpy.distutils! Use NPY_DISTUTILS_APPEND_FLAGS=1 " + \
-                                  "to obtain appending behavior instead (this " + \
-                                  "behavior will become default in a future release)."
-                            warnings.warn(msg, UserWarning, stacklevel=3)
                 else:
                     var = envvar_contents
         if confvar is not None and self._conf:
diff --git a/numpy/distutils/info.py b/numpy/distutils/info.py
deleted file mode 100644
index 2f5310665..000000000
--- a/numpy/distutils/info.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""
-Enhanced distutils with Fortran compilers support and more.
-"""
-from __future__ import division, absolute_import, print_function
-
-postpone_import = True
diff --git a/numpy/distutils/log.py b/numpy/distutils/log.py
index 37f9fe5dd..ff7de86b1 100644
--- a/numpy/distutils/log.py
+++ b/numpy/distutils/log.py
@@ -67,6 +67,8 @@ def set_threshold(level, force=False):
                 ' %s to %s' % (prev_level, level))
     return prev_level
 
+def get_threshold():
+	return _global_log.threshold
 
 def set_verbosity(v, force=False):
     prev_level = _global_log.threshold
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index 89171eede..7ba8ad862 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -859,7 +859,7 @@ class Configuration(object):
             print(message)
 
     def warn(self, message):
-        sys.stderr.write('Warning: %s' % (message,))
+        sys.stderr.write('Warning: %s\n' % (message,))
 
     def set_options(self, **options):
         """
@@ -1687,6 +1687,41 @@ class Configuration(object):
 
         and will be installed as foo.ini in the 'lib' subpath.
 
+        When cross-compiling with numpy distutils, it might be necessary to
+        use modified npy-pkg-config files.  Using the default/generated files
+        will link with the host libraries (i.e. libnpymath.a).  For
+        cross-compilation you of-course need to link with target libraries,
+        while using the host Python installation.
+
+        You can copy out the numpy/core/lib/npy-pkg-config directory, add a
+        pkgdir value to the .ini files and set NPY_PKG_CONFIG_PATH environment
+        variable to point to the directory with the modified npy-pkg-config
+        files.
+
+        Example npymath.ini modified for cross-compilation::
+
+            [meta]
+            Name=npymath
+            Description=Portable, core math library implementing C99 standard
+            Version=0.1
+
+            [variables]
+            pkgname=numpy.core
+            pkgdir=/build/arm-linux-gnueabi/sysroot/usr/lib/python3.7/site-packages/numpy/core
+            prefix=${pkgdir}
+            libdir=${prefix}/lib
+            includedir=${prefix}/include
+
+            [default]
+            Libs=-L${libdir} -lnpymath
+            Cflags=-I${includedir}
+            Requires=mlib
+
+            [msvc]
+            Libs=/LIBPATH:${libdir} npymath.lib
+            Cflags=/INCLUDE:${includedir}
+            Requires=mlib
+
         """
         if subst_dict is None:
             subst_dict = {}
@@ -2092,9 +2127,22 @@ def get_numpy_include_dirs():
     return include_dirs
 
 def get_npy_pkg_dir():
-    """Return the path where to find the npy-pkg-config directory."""
+    """Return the path where to find the npy-pkg-config directory.
+
+    If the NPY_PKG_CONFIG_PATH environment variable is set, the value of that
+    is returned.  Otherwise, a path inside the location of the numpy module is
+    returned.
+
+    The NPY_PKG_CONFIG_PATH can be useful when cross-compiling, maintaining
+    customized npy-pkg-config .ini files for the cross-compilation
+    environment, and using them when cross-compiling.
+
+    """
     # XXX: import here for bootstrapping reasons
     import numpy
+    d = os.environ.get('NPY_PKG_CONFIG_PATH')
+    if d is not None:
+        return d
     d = os.path.join(os.path.dirname(numpy.__file__),
             'core', 'lib', 'npy-pkg-config')
     return d
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index 6cfce3b1c..5fd1003ab 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -146,7 +146,7 @@ else:
 from distutils.errors import DistutilsError
 from distutils.dist import Distribution
 import distutils.sysconfig
-from distutils import log
+from numpy.distutils import log
 from distutils.util import get_platform
 
 from numpy.distutils.exec_command import (
@@ -550,7 +550,6 @@ class system_info(object):
     dir_env_var = None
     search_static_first = 0  # XXX: disabled by default, may disappear in
                             # future unless it is proved to be useful.
-    verbosity = 1
     saved_results = {}
 
     notfounderror = NotFoundError
@@ -558,7 +557,6 @@ class system_info(object):
     def __init__(self,
                   default_lib_dirs=default_lib_dirs,
                   default_include_dirs=default_include_dirs,
-                  verbosity=1,
                   ):
         self.__class__.info = {}
         self.local_prefixes = []
@@ -704,7 +702,7 @@ class system_info(object):
                 log.info('  FOUND:')
 
         res = self.saved_results.get(self.__class__.__name__)
-        if self.verbosity > 0 and flag:
+        if log.get_threshold() <= log.INFO and flag:
             for k, v in res.items():
                 v = str(v)
                 if k in ['sources', 'libraries'] and len(v) > 270:
@@ -914,7 +912,7 @@ class system_info(object):
         """Return a list of existing paths composed by all combinations
         of items from the arguments.
         """
-        return combine_paths(*args, **{'verbosity': self.verbosity})
+        return combine_paths(*args)
 
 
 class fft_opt_info(system_info):
@@ -1531,12 +1529,12 @@ def get_atlas_version(**config):
     try:
         s, o = c.get_output(atlas_version_c_text,
                             libraries=libraries, library_dirs=library_dirs,
-                            use_tee=(system_info.verbosity > 0))
+                           )
         if s and re.search(r'undefined reference to `_gfortran', o, re.M):
             s, o = c.get_output(atlas_version_c_text,
                                 libraries=libraries + ['gfortran'],
                                 library_dirs=library_dirs,
-                                use_tee=(system_info.verbosity > 0))
+                               )
             if not s:
                 warnings.warn(textwrap.dedent("""
                     *****************************************************
diff --git a/numpy/distutils/tests/test_fcompiler.py b/numpy/distutils/tests/test_fcompiler.py
index ba19a97ea..6d245fbd4 100644
--- a/numpy/distutils/tests/test_fcompiler.py
+++ b/numpy/distutils/tests/test_fcompiler.py
@@ -45,37 +45,3 @@ def test_fcompiler_flags(monkeypatch):
         else:
             assert_(new_flags == prev_flags + [new_flag])
 
-
-def test_fcompiler_flags_append_warning(monkeypatch):
-    # Test to check that the warning for append behavior changing in future
-    # is triggered.  Need to use a real compiler instance so that we have
-    # non-empty flags to start with (otherwise the "if var and append" check
-    # will always be false).
-    try:
-        with suppress_warnings() as sup:
-            sup.record()
-            fc = numpy.distutils.fcompiler.new_fcompiler(compiler='gnu95')
-            fc.customize()
-    except numpy.distutils.fcompiler.CompilerNotFound:
-        pytest.skip("gfortran not found, so can't execute this test")
-
-    # Ensure NPY_DISTUTILS_APPEND_FLAGS not defined
-    monkeypatch.delenv('NPY_DISTUTILS_APPEND_FLAGS', raising=False)
-
-    for opt, envvar in customizable_flags:
-        new_flag = '-dummy-{}-flag'.format(opt)
-        with suppress_warnings() as sup:
-            sup.record()
-            prev_flags = getattr(fc.flag_vars, opt)
-
-        monkeypatch.setenv(envvar, new_flag)
-        with suppress_warnings() as sup:
-            sup.record()
-            new_flags = getattr(fc.flag_vars, opt)
-            if prev_flags:
-                # Check that warning was issued
-                assert len(sup.log) == 1
-
-        monkeypatch.delenv(envvar)
-        assert_(new_flags == [new_flag])
-
diff --git a/numpy/doc/broadcasting.py b/numpy/doc/broadcasting.py
index f7bd2515b..cb548a0d0 100644
--- a/numpy/doc/broadcasting.py
+++ b/numpy/doc/broadcasting.py
@@ -61,8 +61,7 @@ dimensions are compatible when
 If these conditions are not met, a
 ``ValueError: operands could not be broadcast together`` exception is 
 thrown, indicating that the arrays have incompatible shapes. The size of 
-the resulting array is the maximum size along each dimension of the input 
-arrays.
+the resulting array is the size that is not 1 along each axis of the inputs.
 
 Arrays do not need to have the same *number* of dimensions.  For example,
 if you have a ``256x256x3`` array of RGB values, and you want to scale
diff --git a/numpy/doc/dispatch.py b/numpy/doc/dispatch.py
index 8db607131..c9029941b 100644
--- a/numpy/doc/dispatch.py
+++ b/numpy/doc/dispatch.py
@@ -223,7 +223,7 @@ calls ``numpy.sum(self)``, and the same for ``mean``.
 ...     return arr._i * arr._N
 ...
 >>> @implements(np.mean)
-... def sum(arr):
+... def mean(arr):
 ...     "Implementation of np.mean for DiagonalArray objects"
 ...     return arr._i / arr._N
 ...
diff --git a/numpy/doc/subclassing.py b/numpy/doc/subclassing.py
index 4b983893a..d0685328e 100644
--- a/numpy/doc/subclassing.py
+++ b/numpy/doc/subclassing.py
@@ -118,7 +118,8 @@ For example, consider the following Python code:
       def __new__(cls, *args):
           print('Cls in __new__:', cls)
           print('Args in __new__:', args)
-          return object.__new__(cls, *args)
+          # The `object` type __new__ method takes a single argument.
+          return object.__new__(cls)
 
       def __init__(self, *args):
           print('type(self) in __init__:', type(self))
diff --git a/numpy/f2py/__init__.py b/numpy/f2py/__init__.py
index d146739bb..42e3632fd 100644
--- a/numpy/f2py/__init__.py
+++ b/numpy/f2py/__init__.py
@@ -109,6 +109,7 @@ def compile(source,
             output = ''
         else:
             status = 0
+            output = output.decode()
         if verbose:
             print(output)
     finally:
diff --git a/numpy/f2py/info.py b/numpy/f2py/info.py
deleted file mode 100644
index c895c5de2..000000000
--- a/numpy/f2py/info.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""Fortran to Python Interface Generator.
-
-"""
-from __future__ import division, absolute_import, print_function
-
-postpone_import = True
diff --git a/numpy/fft/README.md b/numpy/fft/README.md
index 7040a2e9b..f79188139 100644
--- a/numpy/fft/README.md
+++ b/numpy/fft/README.md
@@ -10,11 +10,6 @@ advantages:
 - worst case complexity for transform sizes with large prime factors is
   `N*log(N)`, because Bluestein's algorithm [3] is used for these cases.
 
-License
--------
-
-3-clause BSD (see LICENSE.md)
-
 
 Some code details
 -----------------
diff --git a/numpy/fft/__init__.py b/numpy/fft/__init__.py
index 64b35bc19..fe95d8b17 100644
--- a/numpy/fft/__init__.py
+++ b/numpy/fft/__init__.py
@@ -1,9 +1,191 @@
-from __future__ import division, absolute_import, print_function
+"""
+Discrete Fourier Transform (:mod:`numpy.fft`)
+=============================================
+
+.. currentmodule:: numpy.fft
+
+Standard FFTs
+-------------
+
+.. autosummary::
+   :toctree: generated/
+
+   fft       Discrete Fourier transform.
+   ifft      Inverse discrete Fourier transform.
+   fft2      Discrete Fourier transform in two dimensions.
+   ifft2     Inverse discrete Fourier transform in two dimensions.
+   fftn      Discrete Fourier transform in N-dimensions.
+   ifftn     Inverse discrete Fourier transform in N dimensions.
+
+Real FFTs
+---------
+
+.. autosummary::
+   :toctree: generated/
+
+   rfft      Real discrete Fourier transform.
+   irfft     Inverse real discrete Fourier transform.
+   rfft2     Real discrete Fourier transform in two dimensions.
+   irfft2    Inverse real discrete Fourier transform in two dimensions.
+   rfftn     Real discrete Fourier transform in N dimensions.
+   irfftn    Inverse real discrete Fourier transform in N dimensions.
+
+Hermitian FFTs
+--------------
+
+.. autosummary::
+   :toctree: generated/
+
+   hfft      Hermitian discrete Fourier transform.
+   ihfft     Inverse Hermitian discrete Fourier transform.
+
+Helper routines
+---------------
+
+.. autosummary::
+   :toctree: generated/
+
+   fftfreq   Discrete Fourier Transform sample frequencies.
+   rfftfreq  DFT sample frequencies (for usage with rfft, irfft).
+   fftshift  Shift zero-frequency component to center of spectrum.
+   ifftshift Inverse of fftshift.
+
+
+Background information
+----------------------
+
+Fourier analysis is fundamentally a method for expressing a function as a
+sum of periodic components, and for recovering the function from those
+components.  When both the function and its Fourier transform are
+replaced with discretized counterparts, it is called the discrete Fourier
+transform (DFT).  The DFT has become a mainstay of numerical computing in
+part because of a very fast algorithm for computing it, called the Fast
+Fourier Transform (FFT), which was known to Gauss (1805) and was brought
+to light in its current form by Cooley and Tukey [CT]_.  Press et al. [NR]_
+provide an accessible introduction to Fourier analysis and its
+applications.
+
+Because the discrete Fourier transform separates its input into
+components that contribute at discrete frequencies, it has a great number
+of applications in digital signal processing, e.g., for filtering, and in
+this context the discretized input to the transform is customarily
+referred to as a *signal*, which exists in the *time domain*.  The output
+is called a *spectrum* or *transform* and exists in the *frequency
+domain*.
+
+Implementation details
+----------------------
+
+There are many ways to define the DFT, varying in the sign of the
+exponent, normalization, etc.  In this implementation, the DFT is defined
+as
+
+.. math::
+   A_k =  \\sum_{m=0}^{n-1} a_m \\exp\\left\\{-2\\pi i{mk \\over n}\\right\\}
+   \\qquad k = 0,\\ldots,n-1.
+
+The DFT is in general defined for complex inputs and outputs, and a
+single-frequency component at linear frequency :math:`f` is
+represented by a complex exponential
+:math:`a_m = \\exp\\{2\\pi i\\,f m\\Delta t\\}`, where :math:`\\Delta t`
+is the sampling interval.
 
-# To get sub-modules
-from .info import __doc__
+The values in the result follow so-called "standard" order: If ``A =
+fft(a, n)``, then ``A[0]`` contains the zero-frequency term (the sum of
+the signal), which is always purely real for real inputs. Then ``A[1:n/2]``
+contains the positive-frequency terms, and ``A[n/2+1:]`` contains the
+negative-frequency terms, in order of decreasingly negative frequency.
+For an even number of input points, ``A[n/2]`` represents both positive and
+negative Nyquist frequency, and is also purely real for real input.  For
+an odd number of input points, ``A[(n-1)/2]`` contains the largest positive
+frequency, while ``A[(n+1)/2]`` contains the largest negative frequency.
+The routine ``np.fft.fftfreq(n)`` returns an array giving the frequencies
+of corresponding elements in the output.  The routine
+``np.fft.fftshift(A)`` shifts transforms and their frequencies to put the
+zero-frequency components in the middle, and ``np.fft.ifftshift(A)`` undoes
+that shift.
+
+When the input `a` is a time-domain signal and ``A = fft(a)``, ``np.abs(A)``
+is its amplitude spectrum and ``np.abs(A)**2`` is its power spectrum.
+The phase spectrum is obtained by ``np.angle(A)``.
+
+The inverse DFT is defined as
+
+.. math::
+   a_m = \\frac{1}{n}\\sum_{k=0}^{n-1}A_k\\exp\\left\\{2\\pi i{mk\\over n}\\right\\}
+   \\qquad m = 0,\\ldots,n-1.
+
+It differs from the forward transform by the sign of the exponential
+argument and the default normalization by :math:`1/n`.
+
+Normalization
+-------------
+The default normalization has the direct transforms unscaled and the inverse
+transforms are scaled by :math:`1/n`. It is possible to obtain unitary
+transforms by setting the keyword argument ``norm`` to ``"ortho"`` (default is
+`None`) so that both direct and inverse transforms will be scaled by
+:math:`1/\\sqrt{n}`.
+
+Real and Hermitian transforms
+-----------------------------
+
+When the input is purely real, its transform is Hermitian, i.e., the
+component at frequency :math:`f_k` is the complex conjugate of the
+component at frequency :math:`-f_k`, which means that for real
+inputs there is no information in the negative frequency components that
+is not already available from the positive frequency components.
+The family of `rfft` functions is
+designed to operate on real inputs, and exploits this symmetry by
+computing only the positive frequency components, up to and including the
+Nyquist frequency.  Thus, ``n`` input points produce ``n/2+1`` complex
+output points.  The inverses of this family assumes the same symmetry of
+its input, and for an output of ``n`` points uses ``n/2+1`` input points.
+
+Correspondingly, when the spectrum is purely real, the signal is
+Hermitian.  The `hfft` family of functions exploits this symmetry by
+using ``n/2+1`` complex points in the input (time) domain for ``n`` real
+points in the frequency domain.
+
+In higher dimensions, FFTs are used, e.g., for image analysis and
+filtering.  The computational efficiency of the FFT means that it can
+also be a faster way to compute large convolutions, using the property
+that a convolution in the time domain is equivalent to a point-by-point
+multiplication in the frequency domain.
+
+Higher dimensions
+-----------------
+
+In two dimensions, the DFT is defined as
+
+.. math::
+   A_{kl} =  \\sum_{m=0}^{M-1} \\sum_{n=0}^{N-1}
+   a_{mn}\\exp\\left\\{-2\\pi i \\left({mk\\over M}+{nl\\over N}\\right)\\right\\}
+   \\qquad k = 0, \\ldots, M-1;\\quad l = 0, \\ldots, N-1,
+
+which extends in the obvious way to higher dimensions, and the inverses
+in higher dimensions also extend in the same way.
+
+References
+----------
+
+.. [CT] Cooley, James W., and John W. Tukey, 1965, "An algorithm for the
+        machine calculation of complex Fourier series," *Math. Comput.*
+        19: 297-301.
+
+.. [NR] Press, W., Teukolsky, S., Vetterline, W.T., and Flannery, B.P.,
+        2007, *Numerical Recipes: The Art of Scientific Computing*, ch.
+        12-13.  Cambridge Univ. Press, Cambridge, UK.
+
+Examples
+--------
+
+For examples, see the various functions.
+
+"""
+
+from __future__ import division, absolute_import, print_function
 
-from .pocketfft import *
+from ._pocketfft import *
 from .helper import *
 
 from numpy._pytesttester import PytestTester
diff --git a/numpy/fft/pocketfft.c b/numpy/fft/_pocketfft.c
index 9d1218e6b..d75b9983c 100644
--- a/numpy/fft/pocketfft.c
+++ b/numpy/fft/_pocketfft.c
@@ -2362,7 +2362,7 @@ static struct PyMethodDef methods[] = {
 #if PY_MAJOR_VERSION >= 3
 static struct PyModuleDef moduledef = {
         PyModuleDef_HEAD_INIT,
-        "pocketfft_internal",
+        "_pocketfft_internal",
         NULL,
         -1,
         methods,
@@ -2376,11 +2376,11 @@ static struct PyModuleDef moduledef = {
 /* Initialization function for the module */
 #if PY_MAJOR_VERSION >= 3
 #define RETVAL(x) x
-PyMODINIT_FUNC PyInit_pocketfft_internal(void)
+PyMODINIT_FUNC PyInit__pocketfft_internal(void)
 #else
 #define RETVAL(x)
 PyMODINIT_FUNC
-initpocketfft_internal(void)
+init_pocketfft_internal(void)
 #endif
 {
     PyObject *m;
@@ -2389,7 +2389,7 @@ initpocketfft_internal(void)
 #else
     static const char module_documentation[] = "";
 
-    m = Py_InitModule4("pocketfft_internal", methods,
+    m = Py_InitModule4("_pocketfft_internal", methods,
             module_documentation,
             (PyObject*)NULL,PYTHON_API_VERSION);
 #endif
diff --git a/numpy/fft/pocketfft.py b/numpy/fft/_pocketfft.py
index 77ea6e3ba..50720cda4 100644
--- a/numpy/fft/pocketfft.py
+++ b/numpy/fft/_pocketfft.py
@@ -35,7 +35,7 @@ __all__ = ['fft', 'ifft', 'rfft', 'irfft', 'hfft', 'ihfft', 'rfftn',
 import functools
 
 from numpy.core import asarray, zeros, swapaxes, conjugate, take, sqrt
-from . import pocketfft_internal as pfi
+from . import _pocketfft_internal as pfi
 from numpy.core.multiarray import normalize_axis_index
 from numpy.core import overrides
 
@@ -44,7 +44,11 @@ array_function_dispatch = functools.partial(
     overrides.array_function_dispatch, module='numpy.fft')
 
 
-def _raw_fft(a, n, axis, is_real, is_forward, fct):
+# `inv_norm` is a float by which the result of the transform needs to be
+# divided. This replaces the original, more intuitive 'fct` parameter to avoid
+# divisions by zero (or alternatively additional checks) in the case of
+# zero-length axes during its computation.
+def _raw_fft(a, n, axis, is_real, is_forward, inv_norm):
     axis = normalize_axis_index(axis, a.ndim)
     if n is None:
         n = a.shape[axis]
@@ -53,6 +57,8 @@ def _raw_fft(a, n, axis, is_real, is_forward, fct):
         raise ValueError("Invalid number of FFT data points (%d) specified."
                          % n)
 
+    fct = 1/inv_norm
+
     if a.shape[axis] != n:
         s = list(a.shape)
         if s[axis] > n:
@@ -176,10 +182,10 @@ def fft(a, n=None, axis=-1, norm=None):
     a = asarray(a)
     if n is None:
         n = a.shape[axis]
-    fct = 1
+    inv_norm = 1
     if norm is not None and _unitary(norm):
-        fct = 1 / sqrt(n)
-    output = _raw_fft(a, n, axis, False, True, fct)
+        inv_norm = sqrt(n)
+    output = _raw_fft(a, n, axis, False, True, inv_norm)
     return output
 
 
@@ -272,10 +278,10 @@ def ifft(a, n=None, axis=-1, norm=None):
     if n is None:
         n = a.shape[axis]
     if norm is not None and _unitary(norm):
-        fct = 1/sqrt(max(n, 1))
+        inv_norm = sqrt(max(n, 1))
     else:
-        fct = 1/max(n, 1)
-    output = _raw_fft(a, n, axis, False, False, fct)
+        inv_norm = n
+    output = _raw_fft(a, n, axis, False, False, inv_norm)
     return output
 
 
@@ -360,12 +366,12 @@ def rfft(a, n=None, axis=-1, norm=None):
 
     """
     a = asarray(a)
-    fct = 1
+    inv_norm = 1
     if norm is not None and _unitary(norm):
         if n is None:
             n = a.shape[axis]
-        fct = 1/sqrt(n)
-    output = _raw_fft(a, n, axis, True, True, fct)
+        inv_norm = sqrt(n)
+    output = _raw_fft(a, n, axis, True, True, inv_norm)
     return output
 
 
@@ -462,10 +468,10 @@ def irfft(a, n=None, axis=-1, norm=None):
     a = asarray(a)
     if n is None:
         n = (a.shape[axis] - 1) * 2
-    fct = 1/n
+    inv_norm = n
     if norm is not None and _unitary(norm):
-        fct = 1/sqrt(n)
-    output = _raw_fft(a, n, axis, True, False, fct)
+        inv_norm = sqrt(n)
+    output = _raw_fft(a, n, axis, True, False, inv_norm)
     return output
 
 
diff --git a/numpy/fft/info.py b/numpy/fft/info.py
deleted file mode 100644
index cb6526b44..000000000
--- a/numpy/fft/info.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""
-Discrete Fourier Transform (:mod:`numpy.fft`)
-=============================================
-
-.. currentmodule:: numpy.fft
-
-Standard FFTs
--------------
-
-.. autosummary::
-   :toctree: generated/
-
-   fft       Discrete Fourier transform.
-   ifft      Inverse discrete Fourier transform.
-   fft2      Discrete Fourier transform in two dimensions.
-   ifft2     Inverse discrete Fourier transform in two dimensions.
-   fftn      Discrete Fourier transform in N-dimensions.
-   ifftn     Inverse discrete Fourier transform in N dimensions.
-
-Real FFTs
----------
-
-.. autosummary::
-   :toctree: generated/
-
-   rfft      Real discrete Fourier transform.
-   irfft     Inverse real discrete Fourier transform.
-   rfft2     Real discrete Fourier transform in two dimensions.
-   irfft2    Inverse real discrete Fourier transform in two dimensions.
-   rfftn     Real discrete Fourier transform in N dimensions.
-   irfftn    Inverse real discrete Fourier transform in N dimensions.
-
-Hermitian FFTs
---------------
-
-.. autosummary::
-   :toctree: generated/
-
-   hfft      Hermitian discrete Fourier transform.
-   ihfft     Inverse Hermitian discrete Fourier transform.
-
-Helper routines
----------------
-
-.. autosummary::
-   :toctree: generated/
-
-   fftfreq   Discrete Fourier Transform sample frequencies.
-   rfftfreq  DFT sample frequencies (for usage with rfft, irfft).
-   fftshift  Shift zero-frequency component to center of spectrum.
-   ifftshift Inverse of fftshift.
-
-
-Background information
-----------------------
-
-Fourier analysis is fundamentally a method for expressing a function as a
-sum of periodic components, and for recovering the function from those
-components.  When both the function and its Fourier transform are
-replaced with discretized counterparts, it is called the discrete Fourier
-transform (DFT).  The DFT has become a mainstay of numerical computing in
-part because of a very fast algorithm for computing it, called the Fast
-Fourier Transform (FFT), which was known to Gauss (1805) and was brought
-to light in its current form by Cooley and Tukey [CT]_.  Press et al. [NR]_
-provide an accessible introduction to Fourier analysis and its
-applications.
-
-Because the discrete Fourier transform separates its input into
-components that contribute at discrete frequencies, it has a great number
-of applications in digital signal processing, e.g., for filtering, and in
-this context the discretized input to the transform is customarily
-referred to as a *signal*, which exists in the *time domain*.  The output
-is called a *spectrum* or *transform* and exists in the *frequency
-domain*.
-
-Implementation details
-----------------------
-
-There are many ways to define the DFT, varying in the sign of the
-exponent, normalization, etc.  In this implementation, the DFT is defined
-as
-
-.. math::
-   A_k =  \\sum_{m=0}^{n-1} a_m \\exp\\left\\{-2\\pi i{mk \\over n}\\right\\}
-   \\qquad k = 0,\\ldots,n-1.
-
-The DFT is in general defined for complex inputs and outputs, and a
-single-frequency component at linear frequency :math:`f` is
-represented by a complex exponential
-:math:`a_m = \\exp\\{2\\pi i\\,f m\\Delta t\\}`, where :math:`\\Delta t`
-is the sampling interval.
-
-The values in the result follow so-called "standard" order: If ``A =
-fft(a, n)``, then ``A[0]`` contains the zero-frequency term (the sum of
-the signal), which is always purely real for real inputs. Then ``A[1:n/2]``
-contains the positive-frequency terms, and ``A[n/2+1:]`` contains the
-negative-frequency terms, in order of decreasingly negative frequency.
-For an even number of input points, ``A[n/2]`` represents both positive and
-negative Nyquist frequency, and is also purely real for real input.  For
-an odd number of input points, ``A[(n-1)/2]`` contains the largest positive
-frequency, while ``A[(n+1)/2]`` contains the largest negative frequency.
-The routine ``np.fft.fftfreq(n)`` returns an array giving the frequencies
-of corresponding elements in the output.  The routine
-``np.fft.fftshift(A)`` shifts transforms and their frequencies to put the
-zero-frequency components in the middle, and ``np.fft.ifftshift(A)`` undoes
-that shift.
-
-When the input `a` is a time-domain signal and ``A = fft(a)``, ``np.abs(A)``
-is its amplitude spectrum and ``np.abs(A)**2`` is its power spectrum.
-The phase spectrum is obtained by ``np.angle(A)``.
-
-The inverse DFT is defined as
-
-.. math::
-   a_m = \\frac{1}{n}\\sum_{k=0}^{n-1}A_k\\exp\\left\\{2\\pi i{mk\\over n}\\right\\}
-   \\qquad m = 0,\\ldots,n-1.
-
-It differs from the forward transform by the sign of the exponential
-argument and the default normalization by :math:`1/n`.
-
-Normalization
--------------
-The default normalization has the direct transforms unscaled and the inverse
-transforms are scaled by :math:`1/n`. It is possible to obtain unitary
-transforms by setting the keyword argument ``norm`` to ``"ortho"`` (default is
-`None`) so that both direct and inverse transforms will be scaled by
-:math:`1/\\sqrt{n}`.
-
-Real and Hermitian transforms
------------------------------
-
-When the input is purely real, its transform is Hermitian, i.e., the
-component at frequency :math:`f_k` is the complex conjugate of the
-component at frequency :math:`-f_k`, which means that for real
-inputs there is no information in the negative frequency components that
-is not already available from the positive frequency components.
-The family of `rfft` functions is
-designed to operate on real inputs, and exploits this symmetry by
-computing only the positive frequency components, up to and including the
-Nyquist frequency.  Thus, ``n`` input points produce ``n/2+1`` complex
-output points.  The inverses of this family assumes the same symmetry of
-its input, and for an output of ``n`` points uses ``n/2+1`` input points.
-
-Correspondingly, when the spectrum is purely real, the signal is
-Hermitian.  The `hfft` family of functions exploits this symmetry by
-using ``n/2+1`` complex points in the input (time) domain for ``n`` real
-points in the frequency domain.
-
-In higher dimensions, FFTs are used, e.g., for image analysis and
-filtering.  The computational efficiency of the FFT means that it can
-also be a faster way to compute large convolutions, using the property
-that a convolution in the time domain is equivalent to a point-by-point
-multiplication in the frequency domain.
-
-Higher dimensions
------------------
-
-In two dimensions, the DFT is defined as
-
-.. math::
-   A_{kl} =  \\sum_{m=0}^{M-1} \\sum_{n=0}^{N-1}
-   a_{mn}\\exp\\left\\{-2\\pi i \\left({mk\\over M}+{nl\\over N}\\right)\\right\\}
-   \\qquad k = 0, \\ldots, M-1;\\quad l = 0, \\ldots, N-1,
-
-which extends in the obvious way to higher dimensions, and the inverses
-in higher dimensions also extend in the same way.
-
-References
-----------
-
-.. [CT] Cooley, James W., and John W. Tukey, 1965, "An algorithm for the
-        machine calculation of complex Fourier series," *Math. Comput.*
-        19: 297-301.
-
-.. [NR] Press, W., Teukolsky, S., Vetterline, W.T., and Flannery, B.P.,
-        2007, *Numerical Recipes: The Art of Scientific Computing*, ch.
-        12-13.  Cambridge Univ. Press, Cambridge, UK.
-
-Examples
---------
-
-For examples, see the various functions.
-
-"""
-from __future__ import division, absolute_import, print_function
-
-depends = ['core']
diff --git a/numpy/fft/setup.py b/numpy/fft/setup.py
index 6c3548b65..8c3a31557 100644
--- a/numpy/fft/setup.py
+++ b/numpy/fft/setup.py
@@ -8,8 +8,8 @@ def configuration(parent_package='',top_path=None):
     config.add_data_dir('tests')
 
     # Configure pocketfft_internal
-    config.add_extension('pocketfft_internal',
-                         sources=['pocketfft.c']
+    config.add_extension('_pocketfft_internal',
+                         sources=['_pocketfft.c']
                          )
 
     return config
diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py
index c1757150e..2db12d9a4 100644
--- a/numpy/lib/__init__.py
+++ b/numpy/lib/__init__.py
@@ -1,14 +1,31 @@
+"""
+**Note:** almost all functions in the ``numpy.lib`` namespace
+are also present in the main ``numpy`` namespace.  Please use the
+functions as ``np.<funcname>`` where possible.
+
+``numpy.lib`` is mostly a space for implementing functions that don't
+belong in core or in another NumPy submodule with a clear purpose
+(e.g. ``random``, ``fft``, ``linalg``, ``ma``).
+
+Most contains basic functions that are used by several submodules and are
+useful to have in the main name-space.
+
+"""
 from __future__ import division, absolute_import, print_function
 
 import math
 
-from .info import __doc__
 from numpy.version import version as __version__
 
+# Public submodules
+# Note: recfunctions and (maybe) format are public too, but not imported
+from . import mixins
+from . import scimath as emath
+
+# Private submodules
 from .type_check import *
 from .index_tricks import *
 from .function_base import *
-from .mixins import *
 from .nanfunctions import *
 from .shape_base import *
 from .stride_tricks import *
@@ -16,9 +33,7 @@ from .twodim_base import *
 from .ufunclike import *
 from .histograms import *
 
-from . import scimath as emath
 from .polynomial import *
-#import convertcode
 from .utils import *
 from .arraysetops import *
 from .npyio import *
@@ -28,11 +43,10 @@ from .arraypad import *
 from ._version import *
 from numpy.core._multiarray_umath import tracemalloc_domain
 
-__all__ = ['emath', 'math', 'tracemalloc_domain']
+__all__ = ['emath', 'math', 'tracemalloc_domain', 'Arrayterator']
 __all__ += type_check.__all__
 __all__ += index_tricks.__all__
 __all__ += function_base.__all__
-__all__ += mixins.__all__
 __all__ += shape_base.__all__
 __all__ += stride_tricks.__all__
 __all__ += twodim_base.__all__
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 0ebd39b8c..c392929fd 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -121,7 +121,7 @@ def has_nested_fields(ndtype):
 
     """
     for name in ndtype.names or ():
-        if ndtype[name].names:
+        if ndtype[name].names is not None:
             return True
     return False
 
@@ -931,28 +931,27 @@ def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
         names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
         ndtype = np.dtype(dict(formats=ndtype, names=names))
     else:
-        nbtypes = len(ndtype)
         # Explicit names
         if names is not None:
             validate = NameValidator(**validationargs)
             if isinstance(names, basestring):
                 names = names.split(",")
             # Simple dtype: repeat to match the nb of names
-            if nbtypes == 0:
+            if ndtype.names is None:
                 formats = tuple([ndtype.type] * len(names))
                 names = validate(names, defaultfmt=defaultfmt)
                 ndtype = np.dtype(list(zip(names, formats)))
             # Structured dtype: just validate the names as needed
             else:
-                ndtype.names = validate(names, nbfields=nbtypes,
+                ndtype.names = validate(names, nbfields=len(ndtype.names),
                                         defaultfmt=defaultfmt)
         # No implicit names
-        elif (nbtypes > 0):
+        elif ndtype.names is not None:
             validate = NameValidator(**validationargs)
             # Default initial names : should we change the format ?
-            if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and
+            if ((ndtype.names == tuple("f%i" % i for i in range(len(ndtype.names)))) and
                     (defaultfmt != "f%i")):
-                ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt)
+                ndtype.names = validate([''] * len(ndtype.names), defaultfmt=defaultfmt)
             # Explicit initial names : just validate
             else:
                 ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py
index 62330e692..33e64708d 100644
--- a/numpy/lib/arraypad.py
+++ b/numpy/lib/arraypad.py
@@ -17,66 +17,6 @@ __all__ = ['pad']
 # Private utility functions.
 
 
-def _linear_ramp(ndim, axis, start, stop, size, reverse=False):
-    """
-    Create a linear ramp of `size` in `axis` with `ndim`.
-
-    This algorithm behaves like a vectorized version of `numpy.linspace`.
-    The resulting linear ramp is broadcastable to any array that matches the
-    ramp in `shape[axis]` and `ndim`.
-
-    Parameters
-    ----------
-    ndim : int
-        Number of dimensions of the resulting array. All dimensions except
-        the one specified by `axis` will have the size 1.
-    axis : int
-        The dimension that contains the linear ramp of `size`.
-    start : int or ndarray
-        The starting value(s) of the linear ramp. If given as an array, its
-        size must match `size`.
-    stop : int or ndarray
-        The stop value(s) (not included!) of the linear ramp. If given as an
-        array, its size must match `size`.
-    size : int
-        The number of elements in the linear ramp. If this argument is 0 the
-        dimensions of `ramp` will all be of length 1 except for the one given
-        by `axis` which will be 0.
-    reverse : bool
-        If False, increment in a positive fashion, otherwise decrement.
-
-    Returns
-    -------
-    ramp : ndarray
-        Output array of dtype np.float64 that in- or decrements along the given
-        `axis`.
-
-    Examples
-    --------
-    >>> _linear_ramp(ndim=2, axis=0, start=np.arange(3), stop=10, size=2)
-    array([[0. , 1. , 2. ],
-           [5. , 5.5, 6. ]])
-    >>> _linear_ramp(ndim=3, axis=0, start=2, stop=0, size=0)
-    array([], shape=(0, 1, 1), dtype=float64)
-    """
-    # Create initial ramp
-    ramp = np.arange(size, dtype=np.float64)
-    if reverse:
-        ramp = ramp[::-1]
-
-    # Make sure, that ramp is broadcastable
-    init_shape = (1,) * axis + (size,) + (1,) * (ndim - axis - 1)
-    ramp = ramp.reshape(init_shape)
-
-    if size != 0:
-        # And scale to given start and stop values
-        gain = (stop - start) / float(size)
-        ramp = ramp * gain
-        ramp += start
-
-    return ramp
-
-
 def _round_if_needed(arr, dtype):
     """
     Rounds arr inplace if destination dtype is integer.
@@ -269,17 +209,25 @@ def _get_linear_ramps(padded, axis, width_pair, end_value_pair):
     """
     edge_pair = _get_edges(padded, axis, width_pair)
 
-    left_ramp = _linear_ramp(
-        padded.ndim, axis, start=end_value_pair[0], stop=edge_pair[0],
-        size=width_pair[0], reverse=False
+    left_ramp = np.linspace(
+        start=end_value_pair[0],
+        stop=edge_pair[0].squeeze(axis),  # Dimensions is replaced by linspace
+        num=width_pair[0],
+        endpoint=False,
+        dtype=padded.dtype,
+        axis=axis,
     )
-    _round_if_needed(left_ramp, padded.dtype)
 
-    right_ramp = _linear_ramp(
-        padded.ndim, axis, start=end_value_pair[1], stop=edge_pair[1],
-        size=width_pair[1], reverse=True
+    right_ramp = np.linspace(
+        start=end_value_pair[1],
+        stop=edge_pair[1].squeeze(axis),  # Dimension is replaced by linspace
+        num=width_pair[1],
+        endpoint=False,
+        dtype=padded.dtype,
+        axis=axis,
     )
-    _round_if_needed(right_ramp, padded.dtype)
+    # Reverse linear space in appropriate dimension
+    right_ramp = right_ramp[_slice_at_axis(slice(None, None, -1), axis)]
 
     return left_ramp, right_ramp
 
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index f3f4bc17e..2309f7e42 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -213,6 +213,7 @@ def unique(ar, return_index=False, return_inverse=False,
     -----
     When an axis is specified the subarrays indexed by the axis are sorted.
     This is done by making the specified axis the first dimension of the array
+    (move the axis to the first dimension to keep the order of the other axes)
     and then flattening the subarrays in C order. The flattened subarrays are
     then viewed as a structured type with each element given a label, with the
     effect that we end up with a 1-D array of structured types that can be
@@ -264,7 +265,7 @@ def unique(ar, return_index=False, return_inverse=False,
 
     # axis was specified and not None
     try:
-        ar = np.swapaxes(ar, axis, 0)
+        ar = np.moveaxis(ar, axis, 0)
     except np.AxisError:
         # this removes the "axis1" or "axis2" prefix from the error message
         raise np.AxisError(axis, ar.ndim)
@@ -285,7 +286,7 @@ def unique(ar, return_index=False, return_inverse=False,
     def reshape_uniq(uniq):
         uniq = uniq.view(orig_dtype)
         uniq = uniq.reshape(-1, *orig_shape[1:])
-        uniq = np.swapaxes(uniq, 0, axis)
+        uniq = np.moveaxis(uniq, 0, axis)
         return uniq
 
     output = _unique1d(consolidated, return_index,
diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py
index 216687475..d72384e99 100644
--- a/numpy/lib/financial.py
+++ b/numpy/lib/financial.py
@@ -715,8 +715,6 @@ def irr(values):
     >>> round(np.irr([-5, 10.5, 1, -8, 1]), 5)
     0.0886
 
-    (Compare with the Example given for numpy.lib.financial.npv)
-
     """
     # `np.roots` call is why this function does not support Decimal type.
     #
@@ -763,6 +761,15 @@ def npv(rate, values):
         The NPV of the input cash flow series `values` at the discount
         `rate`.
 
+    Warnings
+    --------
+    ``npv`` considers a series of cashflows starting in the present (t = 0).
+    NPV can also be defined with a series of future cashflows, paid at the
+    end, rather than the start, of each period. If future cashflows are used,
+    the first cashflow `values[0]` must be zeroed and added to the net
+    present value of the future cashflows. This is demonstrated in the
+    examples.
+
     Notes
     -----
     Returns the result of: [G]_
@@ -776,10 +783,24 @@ def npv(rate, values):
 
     Examples
     --------
-    >>> np.npv(0.281,[-100, 39, 59, 55, 20])
-    -0.0084785916384548798 # may vary
-
-    (Compare with the Example given for numpy.lib.financial.irr)
+    Consider a potential project with an initial investment of $40 000 and
+    projected cashflows of $5 000, $8 000, $12 000 and $30 000 at the end of
+    each period discounted at a rate of 8% per period. To find the project's
+    net present value:
+
+    >>> rate, cashflows = 0.08, [-40_000, 5_000, 8_000, 12_000, 30_000]
+    >>> np.npv(rate, cashflows).round(5)
+    3065.22267
+
+    It may be preferable to split the projected cashflow into an initial
+    investment and expected future cashflows. In this case, the value of
+    the initial cashflow is zero and the initial investment is later added
+    to the future cashflows net present value:
+
+    >>> initial_cashflow = cashflows[0]
+    >>> cashflows[0] = 0
+    >>> np.round(np.npv(rate, cashflows) + initial_cashflow, 5)
+    3065.22267
 
     """
     values = np.asarray(values)
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 3bf818812..1ecd72815 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -173,6 +173,9 @@ from numpy.compat import (
     )
 
 
+__all__ = []
+
+
 MAGIC_PREFIX = b'\x93NUMPY'
 MAGIC_LEN = len(MAGIC_PREFIX) + 2
 ARRAY_ALIGN = 64 # plausible values are powers of 2 between 16 and 4096
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 9d380e67d..46950bc95 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -316,14 +316,17 @@ def average(a, axis=None, weights=None, returned=False):
         The weights array can either be 1-D (in which case its length must be
         the size of `a` along the given axis) or of the same shape as `a`.
         If `weights=None`, then all data in `a` are assumed to have a
-        weight equal to one.
+        weight equal to one.  The 1-D calculation is::
+
+            avg = sum(a * weights) / sum(weights)
+
+        The only constraint on `weights` is that `sum(weights)` must not be 0.
     returned : bool, optional
         Default is `False`. If `True`, the tuple (`average`, `sum_of_weights`)
         is returned, otherwise only the average is returned.
         If `weights=None`, `sum_of_weights` is equivalent to the number of
         elements over which the average is taken.
 
-
     Returns
     -------
     retval, [sum_of_weights] : array_type or double
@@ -679,11 +682,7 @@ def select(condlist, choicelist, default=0):
 
     # Now that the dtype is known, handle the deprecated select([], []) case
     if len(condlist) == 0:
-        # 2014-02-24, 1.9
-        warnings.warn("select with an empty condition list is not possible"
-                      "and will be deprecated",
-                      DeprecationWarning, stacklevel=3)
-        return np.asarray(default)[()]
+        raise ValueError("select with an empty condition list is not possible")
 
     choicelist = [np.asarray(choice) for choice in choicelist]
     choicelist.append(np.asarray(default))
@@ -699,25 +698,11 @@ def select(condlist, choicelist, default=0):
     choicelist = np.broadcast_arrays(*choicelist)
 
     # If cond array is not an ndarray in boolean format or scalar bool, abort.
-    deprecated_ints = False
     for i in range(len(condlist)):
         cond = condlist[i]
         if cond.dtype.type is not np.bool_:
-            if np.issubdtype(cond.dtype, np.integer):
-                # A previous implementation accepted int ndarrays accidentally.
-                # Supported here deliberately, but deprecated.
-                condlist[i] = condlist[i].astype(bool)
-                deprecated_ints = True
-            else:
-                raise ValueError(
-                    'invalid entry {} in condlist: should be boolean ndarray'.format(i))
-
-    if deprecated_ints:
-        # 2014-02-24, 1.9
-        msg = "select condlists containing integer ndarrays is deprecated " \
-            "and will be removed in the future. Use `.astype(bool)` to " \
-            "convert to bools."
-        warnings.warn(msg, DeprecationWarning, stacklevel=3)
+            raise TypeError(
+                'invalid entry {} in condlist: should be boolean ndarray'.format(i))
 
     if choicelist[0].ndim == 0:
         # This may be common, so avoid the call.
@@ -1164,11 +1149,13 @@ def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue):
         The axis along which the difference is taken, default is the
         last axis.
     prepend, append : array_like, optional
-        Values to prepend or append to "a" along axis prior to
+        Values to prepend or append to `a` along axis prior to
         performing the difference.  Scalar values are expanded to
         arrays with length 1 in the direction of axis and the shape
         of the input array in along all other axes.  Otherwise the
-        dimension and shape must match "a" except along axis.
+        dimension and shape must match `a` except along axis.
+
+        .. versionadded:: 1.16.0
 
     Returns
     -------
@@ -3310,13 +3297,6 @@ def sinc(x):
     Text(0.5, 0, 'X')
     >>> plt.show()
 
-    It works in 2-D as well:
-
-    >>> x = np.linspace(-4, 4, 401)
-    >>> xx = np.outer(x, x)
-    >>> plt.imshow(np.sinc(xx))
-    <matplotlib.image.AxesImage object at 0x...>
-
     """
     x = np.asanyarray(x)
     y = pi * where(x == 0, 1.0e-20, x)
diff --git a/numpy/lib/info.py b/numpy/lib/info.py
deleted file mode 100644
index 8815a52f0..000000000
--- a/numpy/lib/info.py
+++ /dev/null
@@ -1,160 +0,0 @@
-"""
-Basic functions used by several sub-packages and
-useful to have in the main name-space.
-
-Type Handling
--------------
-================ ===================
-iscomplexobj     Test for complex object, scalar result
-isrealobj        Test for real object, scalar result
-iscomplex        Test for complex elements, array result
-isreal           Test for real elements, array result
-imag             Imaginary part
-real             Real part
-real_if_close    Turns complex number with tiny imaginary part to real
-isneginf         Tests for negative infinity, array result
-isposinf         Tests for positive infinity, array result
-isnan            Tests for nans, array result
-isinf            Tests for infinity, array result
-isfinite         Tests for finite numbers, array result
-isscalar         True if argument is a scalar
-nan_to_num       Replaces NaN's with 0 and infinities with large numbers
-cast             Dictionary of functions to force cast to each type
-common_type      Determine the minimum common type code for a group
-                 of arrays
-mintypecode      Return minimal allowed common typecode.
-================ ===================
-
-Index Tricks
-------------
-================ ===================
-mgrid            Method which allows easy construction of N-d
-                 'mesh-grids'
-``r_``           Append and construct arrays: turns slice objects into
-                 ranges and concatenates them, for 2d arrays appends rows.
-index_exp        Konrad Hinsen's index_expression class instance which
-                 can be useful for building complicated slicing syntax.
-================ ===================
-
-Useful Functions
-----------------
-================ ===================
-select           Extension of where to multiple conditions and choices
-extract          Extract 1d array from flattened array according to mask
-insert           Insert 1d array of values into Nd array according to mask
-linspace         Evenly spaced samples in linear space
-logspace         Evenly spaced samples in logarithmic space
-fix              Round x to nearest integer towards zero
-mod              Modulo mod(x,y) = x % y except keeps sign of y
-amax             Array maximum along axis
-amin             Array minimum along axis
-ptp              Array max-min along axis
-cumsum           Cumulative sum along axis
-prod             Product of elements along axis
-cumprod          Cumluative product along axis
-diff             Discrete differences along axis
-angle            Returns angle of complex argument
-unwrap           Unwrap phase along given axis (1-d algorithm)
-sort_complex     Sort a complex-array (based on real, then imaginary)
-trim_zeros       Trim the leading and trailing zeros from 1D array.
-vectorize        A class that wraps a Python function taking scalar
-                 arguments into a generalized function which can handle
-                 arrays of arguments using the broadcast rules of
-                 numerix Python.
-================ ===================
-
-Shape Manipulation
-------------------
-================ ===================
-squeeze          Return a with length-one dimensions removed.
-atleast_1d       Force arrays to be >= 1D
-atleast_2d       Force arrays to be >= 2D
-atleast_3d       Force arrays to be >= 3D
-vstack           Stack arrays vertically (row on row)
-hstack           Stack arrays horizontally (column on column)
-column_stack     Stack 1D arrays as columns into 2D array
-dstack           Stack arrays depthwise (along third dimension)
-stack            Stack arrays along a new axis
-split            Divide array into a list of sub-arrays
-hsplit           Split into columns
-vsplit           Split into rows
-dsplit           Split along third dimension
-================ ===================
-
-Matrix (2D Array) Manipulations
--------------------------------
-================ ===================
-fliplr           2D array with columns flipped
-flipud           2D array with rows flipped
-rot90            Rotate a 2D array a multiple of 90 degrees
-eye              Return a 2D array with ones down a given diagonal
-diag             Construct a 2D array from a vector, or return a given
-                 diagonal from a 2D array.
-mat              Construct a Matrix
-bmat             Build a Matrix from blocks
-================ ===================
-
-Polynomials
------------
-================ ===================
-poly1d           A one-dimensional polynomial class
-poly             Return polynomial coefficients from roots
-roots            Find roots of polynomial given coefficients
-polyint          Integrate polynomial
-polyder          Differentiate polynomial
-polyadd          Add polynomials
-polysub          Subtract polynomials
-polymul          Multiply polynomials
-polydiv          Divide polynomials
-polyval          Evaluate polynomial at given argument
-================ ===================
-
-Iterators
----------
-================ ===================
-Arrayterator     A buffered iterator for big arrays.
-================ ===================
-
-Import Tricks
--------------
-================ ===================
-ppimport         Postpone module import until trying to use it
-ppimport_attr    Postpone module import until trying to use its attribute
-ppresolve        Import postponed module and return it.
-================ ===================
-
-Machine Arithmetics
--------------------
-================ ===================
-machar_single    Single precision floating point arithmetic parameters
-machar_double    Double precision floating point arithmetic parameters
-================ ===================
-
-Threading Tricks
-----------------
-================ ===================
-ParallelExec     Execute commands in parallel thread.
-================ ===================
-
-Array Set Operations
------------------------
-Set operations for numeric arrays based on sort() function.
-
-================ ===================
-unique           Unique elements of an array.
-isin             Test whether each element of an ND array is present 
-                 anywhere within a second array.
-ediff1d          Array difference (auxiliary function).
-intersect1d      Intersection of 1D arrays with unique elements.
-setxor1d         Set exclusive-or of 1D arrays with unique elements.
-in1d             Test whether elements in a 1D array are also present in
-                 another array.
-union1d          Union of 1D arrays with unique elements.
-setdiff1d        Set difference of 1D arrays with unique elements.
-================ ===================
-
-"""
-from __future__ import division, absolute_import, print_function
-
-depends = ['core', 'testing']
-global_symbols = ['*']
diff --git a/numpy/lib/mixins.py b/numpy/lib/mixins.py
index 52ad45b68..f974a7724 100644
--- a/numpy/lib/mixins.py
+++ b/numpy/lib/mixins.py
@@ -5,8 +5,8 @@ import sys
 
 from numpy.core import umath as um
 
-# Nothing should be exposed in the top-level NumPy module.
-__all__ = []
+
+__all__ = ['NDArrayOperatorsMixin']
 
 
 def _disables_array_ufunc(obj):
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 9a03d0b39..6cffab6ac 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -1443,7 +1443,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
         the variance of the flattened array.
     dtype : data-type, optional
         Type to use in computing the variance.  For arrays of integer type
-        the default is `float32`; for arrays of float types it is the same as
+        the default is `float64`; for arrays of float types it is the same as
         the array type.
     out : ndarray, optional
         Alternate output array in which to place the result.  It must have
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index c45622edd..e57a6dd47 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -2180,7 +2180,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
             outputmask = np.array(masks, dtype=mdtype)
     else:
         # Overwrite the initial dtype names if needed
-        if names and dtype.names:
+        if names and dtype.names is not None:
             dtype.names = names
         # Case 1. We have a structured type
         if len(dtype_flat) > 1:
@@ -2230,7 +2230,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
             #
             output = np.array(data, dtype)
             if usemask:
-                if dtype.names:
+                if dtype.names is not None:
                     mdtype = [(_, bool) for _ in dtype.names]
                 else:
                     mdtype = bool
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index 6e257bb3f..927161ddb 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -72,7 +72,7 @@ def recursive_fill_fields(input, output):
             current = input[field]
         except ValueError:
             continue
-        if current.dtype.names:
+        if current.dtype.names is not None:
             recursive_fill_fields(current, output[field])
         else:
             output[field][:len(current)] = current
@@ -139,11 +139,11 @@ def get_names(adtype):
     names = adtype.names
     for name in names:
         current = adtype[name]
-        if current.names:
+        if current.names is not None:
             listnames.append((name, tuple(get_names(current))))
         else:
             listnames.append(name)
-    return tuple(listnames) or None
+    return tuple(listnames)
 
 
 def get_names_flat(adtype):
@@ -176,9 +176,9 @@ def get_names_flat(adtype):
     for name in names:
         listnames.append(name)
         current = adtype[name]
-        if current.names:
+        if current.names is not None:
             listnames.extend(get_names_flat(current))
-    return tuple(listnames) or None
+    return tuple(listnames)
 
 
 def flatten_descr(ndtype):
@@ -200,7 +200,7 @@ def flatten_descr(ndtype):
         descr = []
         for field in names:
             (typ, _) = ndtype.fields[field]
-            if typ.names:
+            if typ.names is not None:
                 descr.extend(flatten_descr(typ))
             else:
                 descr.append((field, typ))
@@ -215,8 +215,8 @@ def _zip_dtype(seqarrays, flatten=False):
     else:
         for a in seqarrays:
             current = a.dtype
-            if current.names and len(current.names) <= 1:
-                # special case - dtypes of 0 or 1 field are flattened
+            if current.names is not None and len(current.names) == 1:
+                # special case - dtypes of 1 field are flattened
                 newdtype.extend(_get_fieldspec(current))
             else:
                 newdtype.append(('', current))
@@ -268,7 +268,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,):
     names = adtype.names
     for name in names:
         current = adtype[name]
-        if current.names:
+        if current.names is not None:
             if lastname:
                 parents[name] = [lastname, ]
             else:
@@ -281,7 +281,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,):
             elif lastname:
                 lastparent = [lastname, ]
             parents[name] = lastparent or []
-    return parents or None
+    return parents
 
 
 def _izip_fields_flat(iterable):
@@ -435,7 +435,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False,
     if isinstance(seqarrays, (ndarray, np.void)):
         seqdtype = seqarrays.dtype
         # Make sure we have named fields
-        if not seqdtype.names:
+        if seqdtype.names is None:
             seqdtype = np.dtype([('', seqdtype)])
         if not flatten or _zip_dtype((seqarrays,), flatten=True) == seqdtype:
             # Minimal processing needed: just make sure everythng's a-ok
@@ -527,6 +527,10 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
 
     Nested fields are supported.
 
+    ..versionchanged: 1.18.0
+        `drop_fields` returns an array with 0 fields if all fields are dropped,
+        rather than returning ``None`` as it did previously.
+
     Parameters
     ----------
     base : array
@@ -566,7 +570,7 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
             current = ndtype[name]
             if name in drop_names:
                 continue
-            if current.names:
+            if current.names is not None:
                 descr = _drop_descr(current, drop_names)
                 if descr:
                     newdtype.append((name, descr))
@@ -575,8 +579,6 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
         return newdtype
 
     newdtype = _drop_descr(base.dtype, drop_names)
-    if not newdtype:
-        return None
 
     output = np.empty(base.shape, dtype=newdtype)
     output = recursive_fill_fields(base, output)
@@ -653,7 +655,7 @@ def rename_fields(base, namemapper):
         for name in ndtype.names:
             newname = namemapper.get(name, name)
             current = ndtype[name]
-            if current.names:
+            if current.names is not None:
                 newdtype.append(
                     (newname, _recursive_rename_fields(current, namemapper))
                     )
@@ -874,16 +876,35 @@ def _get_fields_and_offsets(dt, offset=0):
     scalar fields in the dtype "dt", including nested fields, in left
     to right order.
     """
+
+    # counts up elements in subarrays, including nested subarrays, and returns
+    # base dtype and count
+    def count_elem(dt):
+        count = 1
+        while dt.shape != ():
+            for size in dt.shape:
+                count *= size
+            dt = dt.base
+        return dt, count
+
     fields = []
     for name in dt.names:
         field = dt.fields[name]
-        if field[0].names is None:
-            count = 1
-            for size in field[0].shape:
-                count *= size
-            fields.append((field[0], count, field[1] + offset))
+        f_dt, f_offset = field[0], field[1]
+        f_dt, n = count_elem(f_dt)
+
+        if f_dt.names is None:
+            fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset))
         else:
-            fields.extend(_get_fields_and_offsets(field[0], field[1] + offset))
+            subfields = _get_fields_and_offsets(f_dt, f_offset + offset)
+            size = f_dt.itemsize
+
+            for i in range(n):
+                if i == 0:
+                    # optimization: avoid list comprehension if no subarray
+                    fields.extend(subfields)
+                else:
+                    fields.extend([(d, c, o + i*size) for d, c, o in subfields])
     return fields
 
 
@@ -948,6 +969,12 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'):
 
     fields = _get_fields_and_offsets(arr.dtype)
     n_fields = len(fields)
+    if n_fields == 0 and dtype is None:
+        raise ValueError("arr has no fields. Unable to guess dtype")
+    elif n_fields == 0:
+        # too many bugs elsewhere for this to work now
+        raise NotImplementedError("arr with no fields is not supported")
+
     dts, counts, offsets = zip(*fields)
     names = ['f{}'.format(n) for n in range(n_fields)]
 
@@ -1039,6 +1066,9 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
     if arr.shape == ():
         raise ValueError('arr must have at least one dimension')
     n_elem = arr.shape[-1]
+    if n_elem == 0:
+        # too many bugs elsewhere for this to work now
+        raise NotImplementedError("last axis with size 0 is not supported")
 
     if dtype is None:
         if names is None:
@@ -1051,7 +1081,11 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False,
             raise ValueError("don't supply both dtype and names")
         # sanity check of the input dtype
         fields = _get_fields_and_offsets(dtype)
-        dts, counts, offsets = zip(*fields)
+        if len(fields) == 0:
+            dts, counts, offsets = [], [], []
+        else:
+            dts, counts, offsets = zip(*fields)
+
         if n_elem != sum(counts):
             raise ValueError('The length of the last dimension of arr must '
                              'be equal to the number of fields in dtype')
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index a5d0040aa..92d52109e 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -782,7 +782,7 @@ def _split_dispatcher(ary, indices_or_sections, axis=None):
 @array_function_dispatch(_split_dispatcher)
 def split(ary, indices_or_sections, axis=0):
     """
-    Split an array into multiple sub-arrays.
+    Split an array into multiple sub-arrays as views into `ary`.
 
     Parameters
     ----------
@@ -809,7 +809,7 @@ def split(ary, indices_or_sections, axis=0):
     Returns
     -------
     sub-arrays : list of ndarrays
-        A list of sub-arrays.
+        A list of sub-arrays as views into `ary`.
 
     Raises
     ------
@@ -854,8 +854,7 @@ def split(ary, indices_or_sections, axis=0):
         if N % sections:
             raise ValueError(
                 'array split does not result in an equal division')
-    res = array_split(ary, indices_or_sections, axis)
-    return res
+    return array_split(ary, indices_or_sections, axis)
 
 
 def _hvdsplit_dispatcher(ary, indices_or_sections):
diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py
index b6dd3b31c..65593dd29 100644
--- a/numpy/lib/tests/test_arraypad.py
+++ b/numpy/lib/tests/test_arraypad.py
@@ -2,7 +2,6 @@
 
 """
 from __future__ import division, absolute_import, print_function
-from itertools import chain
 
 import pytest
 
@@ -11,6 +10,12 @@ from numpy.testing import assert_array_equal, assert_allclose, assert_equal
 from numpy.lib.arraypad import _as_pairs
 
 
+_numeric_dtypes = (
+    np.sctypes["uint"]
+    + np.sctypes["int"]
+    + np.sctypes["float"]
+    + np.sctypes["complex"]
+)
 _all_modes = {
     'constant': {'constant_values': 0},
     'edge': {},
@@ -738,6 +743,24 @@ class TestLinearRamp(object):
         assert_equal(a[0, :], 0.)
         assert_equal(a[-1, :], 0.)
 
+    @pytest.mark.parametrize("dtype", _numeric_dtypes)
+    def test_negative_difference(self, dtype):
+        """
+        Check correct behavior of unsigned dtypes if there is a negative
+        difference between the edge to pad and `end_values`. Check both cases
+        to be independent of implementation. Test behavior for all other dtypes
+        in case dtype casting interferes with complex dtypes. See gh-14191.
+        """
+        x = np.array([3], dtype=dtype)
+        result = np.pad(x, 3, mode="linear_ramp", end_values=0)
+        expected = np.array([0, 1, 2, 3, 2, 1, 0], dtype=dtype)
+        assert_equal(result, expected)
+
+        x = np.array([0], dtype=dtype)
+        result = np.pad(x, 3, mode="linear_ramp", end_values=3)
+        expected = np.array([3, 2, 1, 0, 1, 2, 3], dtype=dtype)
+        assert_equal(result, expected)
+
 
 class TestReflect(object):
     def test_check_simple(self):
@@ -1330,13 +1353,7 @@ def test_memory_layout_persistence(mode):
     assert np.pad(x, 5, mode).flags["F_CONTIGUOUS"]
 
 
-@pytest.mark.parametrize("dtype", chain(
-    # Skip "other" dtypes as they are not supported by all modes
-    np.sctypes["int"],
-    np.sctypes["uint"],
-    np.sctypes["float"],
-    np.sctypes["complex"]
-))
+@pytest.mark.parametrize("dtype", _numeric_dtypes)
 @pytest.mark.parametrize("mode", _all_modes.keys())
 def test_dtype_persistence(dtype, mode):
     arr = np.zeros((3, 2, 1), dtype=dtype)
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index dd8a38248..fd21a7f76 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -600,8 +600,11 @@ class TestUnique(object):
         assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)
 
         msg = 'Unique with 3d array and axis=2 failed'
-        data3d = np.dstack([data] * 3)
-        result = data3d[..., :1]
+        data3d = np.array([[[1, 1],
+                            [1, 0]],
+                           [[0, 1],
+                            [0, 0]]]).astype(dtype)
+        result = np.take(data3d, [1, 0], axis=2)
         assert_array_equal(unique(data3d, axis=2), result, msg)
 
         uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
diff --git a/numpy/lib/tests/test_financial.py b/numpy/lib/tests/test_financial.py
index 524915041..21088765f 100644
--- a/numpy/lib/tests/test_financial.py
+++ b/numpy/lib/tests/test_financial.py
@@ -9,6 +9,12 @@ from numpy.testing import (
 
 
 class TestFinancial(object):
+    def test_npv_irr_congruence(self):
+        # IRR is defined as the rate required for the present value of a
+        # a series of cashflows to be zero i.e. NPV(IRR(x), x) = 0
+        cashflows = np.array([-40000, 5000, 8000, 12000, 30000])
+        assert_allclose(np.npv(np.irr(cashflows), cashflows), 0, atol=1e-10, rtol=0)
+
     def test_rate(self):
         assert_almost_equal(
             np.rate(10, 0, -3500, 10000),
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index eae52c002..1eae8ccfb 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -423,27 +423,17 @@ class TestSelect(object):
         assert_equal(select([m], [d]), [0, 0, 0, np.nan, 0, 0])
 
     def test_deprecated_empty(self):
-        with warnings.catch_warnings(record=True):
-            warnings.simplefilter("always")
-            assert_equal(select([], [], 3j), 3j)
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("always")
-            assert_warns(DeprecationWarning, select, [], [])
-            warnings.simplefilter("error")
-            assert_raises(DeprecationWarning, select, [], [])
+        assert_raises(ValueError, select, [], [], 3j)
+        assert_raises(ValueError, select, [], [])
 
     def test_non_bool_deprecation(self):
         choices = self.choices
         conditions = self.conditions[:]
-        with warnings.catch_warnings():
-            warnings.filterwarnings("always")
-            conditions[0] = conditions[0].astype(np.int_)
-            assert_warns(DeprecationWarning, select, conditions, choices)
-            conditions[0] = conditions[0].astype(np.uint8)
-            assert_warns(DeprecationWarning, select, conditions, choices)
-            warnings.filterwarnings("error")
-            assert_raises(DeprecationWarning, select, conditions, choices)
+        conditions[0] = conditions[0].astype(np.int_)
+        assert_raises(TypeError, select, conditions, choices)
+        conditions[0] = conditions[0].astype(np.uint8)
+        assert_raises(TypeError, select, conditions, choices)
+        assert_raises(TypeError, select, conditions, choices)
 
     def test_many_arguments(self):
         # This used to be limited by NPY_MAXARGS == 32
diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py
index a5cdda074..dbe445c2c 100644
--- a/numpy/lib/tests/test_index_tricks.py
+++ b/numpy/lib/tests/test_index_tricks.py
@@ -175,6 +175,24 @@ class TestRavelUnravelIndex(object):
         assert_raises_regex(
             ValueError, "out of bounds", np.unravel_index, [1], ())
 
+    @pytest.mark.parametrize("mode", ["clip", "wrap", "raise"])
+    def test_empty_array_ravel(self, mode):
+        res = np.ravel_multi_index(
+                    np.zeros((3, 0), dtype=np.intp), (2, 1, 0), mode=mode)
+        assert(res.shape == (0,))
+
+        with assert_raises(ValueError):
+            np.ravel_multi_index(
+                    np.zeros((3, 1), dtype=np.intp), (2, 1, 0), mode=mode)
+
+    def test_empty_array_unravel(self):
+        res = np.unravel_index(np.zeros(0, dtype=np.intp), (2, 1, 0))
+        # res is a tuple of three empty arrays
+        assert(len(res) == 3)
+        assert(all(a.shape == (0,) for a in res))
+
+        with assert_raises(ValueError):
+            np.unravel_index([1], (2, 1, 0))
 
 class TestGrid(object):
     def test_basic(self):
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 407bb56bf..1181fe986 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -1565,6 +1565,13 @@ M   33  21.99
             test = np.genfromtxt(TextIO(data), delimiter=";",
                                  dtype=ndtype, converters=converters)
 
+        # nested but empty fields also aren't supported
+        ndtype = [('idx', int), ('code', object), ('nest', [])]
+        with assert_raises_regex(NotImplementedError,
+                                 'Nested fields.* not supported.*'):
+            test = np.genfromtxt(TextIO(data), delimiter=";",
+                                 dtype=ndtype, converters=converters)
+
     def test_userconverters_with_explicit_dtype(self):
         # Test user_converters w/ explicit (standard) dtype
         data = TextIO('skip,skip,2001-01-01,1.0,skip')
@@ -1864,7 +1871,7 @@ M   33  21.99
         data = ["1, 1, 1, 1, -1.1"] * 50
         mdata = TextIO("\n".join(data))
 
-        converters = {4: lambda x: "(%s)" % x}
+        converters = {4: lambda x: "(%s)" % x.decode()}
         kwargs = dict(delimiter=",", converters=converters,
                       dtype=[(_, int) for _ in 'abcde'],)
         assert_raises(ValueError, np.genfromtxt, mdata, **kwargs)
diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py
index 0126ccaf8..fa5f4dec2 100644
--- a/numpy/lib/tests/test_recfunctions.py
+++ b/numpy/lib/tests/test_recfunctions.py
@@ -91,8 +91,10 @@ class TestRecFunctions(object):
         control = np.array([(1,), (4,)], dtype=[('a', int)])
         assert_equal(test, control)
 
+        # dropping all fields results in an array with no fields
         test = drop_fields(a, ['a', 'b'])
-        assert_(test is None)
+        control = np.array([(), ()], dtype=[])
+        assert_equal(test, control)
 
     def test_rename_fields(self):
         # Test rename fields
@@ -115,6 +117,14 @@ class TestRecFunctions(object):
         test = get_names(ndtype)
         assert_equal(test, ('a', ('b', ('ba', 'bb'))))
 
+        ndtype = np.dtype([('a', int), ('b', [])])
+        test = get_names(ndtype)
+        assert_equal(test, ('a', ('b', ())))
+
+        ndtype = np.dtype([])
+        test = get_names(ndtype)
+        assert_equal(test, ())
+
     def test_get_names_flat(self):
         # Test get_names_flat
         ndtype = np.dtype([('A', '|S3'), ('B', float)])
@@ -125,6 +135,14 @@ class TestRecFunctions(object):
         test = get_names_flat(ndtype)
         assert_equal(test, ('a', 'b', 'ba', 'bb'))
 
+        ndtype = np.dtype([('a', int), ('b', [])])
+        test = get_names_flat(ndtype)
+        assert_equal(test, ('a', 'b'))
+
+        ndtype = np.dtype([])
+        test = get_names_flat(ndtype)
+        assert_equal(test, ())
+
     def test_get_fieldstructure(self):
         # Test get_fieldstructure
 
@@ -147,6 +165,11 @@ class TestRecFunctions(object):
                    'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']}
         assert_equal(test, control)
 
+        # 0 fields
+        ndtype = np.dtype([])
+        test = get_fieldstructure(ndtype)
+        assert_equal(test, {})
+
     def test_find_duplicates(self):
         # Test find_duplicates
         a = ma.array([(2, (2., 'B')), (1, (2., 'B')), (2, (2., 'B')),
@@ -248,7 +271,8 @@ class TestRecFunctions(object):
         # including uniform fields with subarrays unpacked
         d = np.array([(1, [2,  3], [[ 4,  5], [ 6,  7]]),
                       (8, [9, 10], [[11, 12], [13, 14]])],
-                     dtype=[('x0', 'i4'), ('x1', ('i4', 2)), ('x2', ('i4', (2, 2)))])
+                     dtype=[('x0', 'i4'), ('x1', ('i4', 2)),
+                            ('x2', ('i4', (2, 2)))])
         dd = structured_to_unstructured(d)
         ddd = unstructured_to_structured(dd, d.dtype)
         assert_(dd.base is d)
@@ -262,6 +286,40 @@ class TestRecFunctions(object):
         assert_equal(res, np.zeros((10, 6), dtype=int))
 
 
+        # test nested combinations of subarrays and structured arrays, gh-13333
+        def subarray(dt, shape):
+            return np.dtype((dt, shape))
+
+        def structured(*dts):
+            return np.dtype([('x{}'.format(i), dt) for i, dt in enumerate(dts)])
+
+        def inspect(dt, dtype=None):
+            arr = np.zeros((), dt)
+            ret = structured_to_unstructured(arr, dtype=dtype)
+            backarr = unstructured_to_structured(ret, dt)
+            return ret.shape, ret.dtype, backarr.dtype
+
+        dt = structured(subarray(structured(np.int32, np.int32), 3))
+        assert_equal(inspect(dt), ((6,), np.int32, dt))
+
+        dt = structured(subarray(subarray(np.int32, 2), 2))
+        assert_equal(inspect(dt), ((4,), np.int32, dt))
+
+        dt = structured(np.int32)
+        assert_equal(inspect(dt), ((1,), np.int32, dt))
+
+        dt = structured(np.int32, subarray(subarray(np.int32, 2), 2))
+        assert_equal(inspect(dt), ((5,), np.int32, dt))
+
+        dt = structured()
+        assert_raises(ValueError, structured_to_unstructured, np.zeros(3, dt))
+
+        # these currently don't work, but we may make it work in the future
+        assert_raises(NotImplementedError, structured_to_unstructured,
+                                           np.zeros(3, dt), dtype=np.int32)
+        assert_raises(NotImplementedError, unstructured_to_structured,
+                                           np.zeros((3,0), dtype=np.int32))
+
     def test_field_assignment_by_name(self):
         a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')])
         newdt = [('b', 'f4'), ('c', 'u1')]
@@ -322,8 +380,8 @@ class TestMergeArrays(object):
         z = np.array(
             [('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)])
         w = np.array(
-            [(1, (2, 3.0)), (4, (5, 6.0))],
-            dtype=[('a', int), ('b', [('ba', float), ('bb', int)])])
+            [(1, (2, 3.0, ())), (4, (5, 6.0, ()))],
+            dtype=[('a', int), ('b', [('ba', float), ('bb', int), ('bc', [])])])
         self.data = (w, x, y, z)
 
     def test_solo(self):
@@ -394,8 +452,8 @@ class TestMergeArrays(object):
         test = merge_arrays((x, w), flatten=False)
         controldtype = [('f0', int),
                                 ('f1', [('a', int),
-                                        ('b', [('ba', float), ('bb', int)])])]
-        control = np.array([(1., (1, (2, 3.0))), (2, (4, (5, 6.0)))],
+                                        ('b', [('ba', float), ('bb', int), ('bc', [])])])]
+        control = np.array([(1., (1, (2, 3.0, ()))), (2, (4, (5, 6.0, ())))],
                            dtype=controldtype)
         assert_equal(test, control)
 
diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
index c7dbcc5f9..3c71d2a7c 100644
--- a/numpy/lib/utils.py
+++ b/numpy/lib/utils.py
@@ -788,13 +788,8 @@ def lookfor(what, module=None, import_modules=True, regenerate=False,
         if kind in ('module', 'object'):
             # don't show modules or objects
             continue
-        ok = True
         doc = docstring.lower()
-        for w in whats:
-            if w not in doc:
-                ok = False
-                break
-        if ok:
+        if all(w in doc for w in whats):
             found.append(name)
 
     # Relevance sort
@@ -1003,93 +998,6 @@ def _getmembers(item):
                    if hasattr(item, x)]
     return members
 
-#-----------------------------------------------------------------------------
-
-# The following SafeEval class and company are adapted from Michael Spencer's
-# ASPN Python Cookbook recipe: https://code.activestate.com/recipes/364469/
-#
-# Accordingly it is mostly Copyright 2006 by Michael Spencer.
-# The recipe, like most of the other ASPN Python Cookbook recipes was made
-# available under the Python license.
-#   https://en.wikipedia.org/wiki/Python_License
-
-# It has been modified to:
-#   * handle unary -/+
-#   * support True/False/None
-#   * raise SyntaxError instead of a custom exception.
-
-class SafeEval(object):
-    """
-    Object to evaluate constant string expressions.
-
-    This includes strings with lists, dicts and tuples using the abstract
-    syntax tree created by ``compiler.parse``.
-
-    .. deprecated:: 1.10.0
-
-    See Also
-    --------
-    safe_eval
-
-    """
-    def __init__(self):
-        # 2014-10-15, 1.10
-        warnings.warn("SafeEval is deprecated in 1.10 and will be removed.",
-                      DeprecationWarning, stacklevel=2)
-
-    def visit(self, node):
-        cls = node.__class__
-        meth = getattr(self, 'visit' + cls.__name__, self.default)
-        return meth(node)
-
-    def default(self, node):
-        raise SyntaxError("Unsupported source construct: %s"
-                          % node.__class__)
-
-    def visitExpression(self, node):
-        return self.visit(node.body)
-
-    def visitNum(self, node):
-        return node.n
-
-    def visitStr(self, node):
-        return node.s
-
-    def visitBytes(self, node):
-        return node.s
-
-    def visitDict(self, node,**kw):
-        return dict([(self.visit(k), self.visit(v))
-                     for k, v in zip(node.keys, node.values)])
-
-    def visitTuple(self, node):
-        return tuple([self.visit(i) for i in node.elts])
-
-    def visitList(self, node):
-        return [self.visit(i) for i in node.elts]
-
-    def visitUnaryOp(self, node):
-        import ast
-        if isinstance(node.op, ast.UAdd):
-            return +self.visit(node.operand)
-        elif isinstance(node.op, ast.USub):
-            return -self.visit(node.operand)
-        else:
-            raise SyntaxError("Unknown unary op: %r" % node.op)
-
-    def visitName(self, node):
-        if node.id == 'False':
-            return False
-        elif node.id == 'True':
-            return True
-        elif node.id == 'None':
-            return None
-        else:
-            raise SyntaxError("Unknown name: %s" % node.id)
-
-    def visitNameConstant(self, node):
-        return node.value
-
 
 def safe_eval(source):
     """
diff --git a/numpy/linalg/__init__.py b/numpy/linalg/__init__.py
index 4b696c883..55560815d 100644
--- a/numpy/linalg/__init__.py
+++ b/numpy/linalg/__init__.py
@@ -1,53 +1,77 @@
 """
-Core Linear Algebra Tools
-=========================
-
-=============== ==========================================================
-Linear algebra basics
-==========================================================================
-norm            Vector or matrix norm
-inv             Inverse of a square matrix
-solve           Solve a linear system of equations
-det             Determinant of a square matrix
-slogdet         Logarithm of the determinant of a square matrix
-lstsq           Solve linear least-squares problem
-pinv            Pseudo-inverse (Moore-Penrose) calculated using a singular
-                value decomposition
-matrix_power    Integer power of a square matrix
-matrix_rank     Calculate matrix rank using an SVD-based method
-=============== ==========================================================
-
-=============== ==========================================================
-Eigenvalues and decompositions
-==========================================================================
-eig             Eigenvalues and vectors of a square matrix
-eigh            Eigenvalues and eigenvectors of a Hermitian matrix
-eigvals         Eigenvalues of a square matrix
-eigvalsh        Eigenvalues of a Hermitian matrix
-qr              QR decomposition of a matrix
-svd             Singular value decomposition of a matrix
-cholesky        Cholesky decomposition of a matrix
-=============== ==========================================================
-
-=============== ==========================================================
-Tensor operations
-==========================================================================
-tensorsolve     Solve a linear tensor equation
-tensorinv       Calculate an inverse of a tensor
-=============== ==========================================================
-
-=============== ==========================================================
+``numpy.linalg``
+================
+
+The NumPy linear algebra functions rely on BLAS and LAPACK to provide efficient
+low level implementations of standard linear algebra algorithms. Those
+libraries may be provided by NumPy itself using C versions of a subset of their
+reference implementations but, when possible, highly optimized libraries that
+take advantage of specialized processor functionality are preferred. Examples
+of such libraries are OpenBLAS, MKL (TM), and ATLAS. Because those libraries
+are multithreaded and processor dependent, environmental variables and external
+packages such as threadpoolctl may be needed to control the number of threads
+or specify the processor architecture.
+
+- OpenBLAS: https://www.openblas.net/
+- threadpoolctl: https://github.com/joblib/threadpoolctl
+
+Please note that the most-used linear algebra functions in NumPy are present in
+the main ``numpy`` namespace rather than in ``numpy.linalg``.  There are:
+``dot``, ``vdot``, ``inner``, ``outer``, ``matmul``, ``tensordot``, ``einsum``,
+``einsum_path`` and ``kron``.
+
+Functions present in numpy.linalg are listed below.
+
+
+Matrix and vector products
+--------------------------
+
+   multi_dot
+   matrix_power
+
+Decompositions
+--------------
+
+   cholesky
+   qr
+   svd
+
+Matrix eigenvalues
+------------------
+
+   eig
+   eigh
+   eigvals
+   eigvalsh
+
+Norms and other numbers
+-----------------------
+
+   norm
+   cond
+   det
+   matrix_rank
+   slogdet
+
+Solving equations and inverting matrices
+----------------------------------------
+
+   solve
+   tensorsolve
+   lstsq
+   inv
+   pinv
+   tensorinv
+
 Exceptions
-==========================================================================
-LinAlgError     Indicates a failed linear algebra operation
-=============== ==========================================================
+----------
+
+   LinAlgError
 
 """
 from __future__ import division, absolute_import, print_function
 
 # To get sub-modules
-from .info import __doc__
-
 from .linalg import *
 
 from numpy._pytesttester import PytestTester
diff --git a/numpy/linalg/info.py b/numpy/linalg/info.py
deleted file mode 100644
index 646ecda04..000000000
--- a/numpy/linalg/info.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""\
-Core Linear Algebra Tools
--------------------------
-Linear algebra basics:
-
-- norm            Vector or matrix norm
-- inv             Inverse of a square matrix
-- solve           Solve a linear system of equations
-- det             Determinant of a square matrix
-- lstsq           Solve linear least-squares problem
-- pinv            Pseudo-inverse (Moore-Penrose) calculated using a singular
-                  value decomposition
-- matrix_power    Integer power of a square matrix
-
-Eigenvalues and decompositions:
-
-- eig             Eigenvalues and vectors of a square matrix
-- eigh            Eigenvalues and eigenvectors of a Hermitian matrix
-- eigvals         Eigenvalues of a square matrix
-- eigvalsh        Eigenvalues of a Hermitian matrix
-- qr              QR decomposition of a matrix
-- svd             Singular value decomposition of a matrix
-- cholesky        Cholesky decomposition of a matrix
-
-Tensor operations:
-
-- tensorsolve     Solve a linear tensor equation
-- tensorinv       Calculate an inverse of a tensor
-
-Exceptions:
-
-- LinAlgError     Indicates a failed linear algebra operation
-
-"""
-from __future__ import division, absolute_import, print_function
-
-depends = ['core']
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 95b799f6d..bb3788c9a 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -59,14 +59,14 @@ __all__ = [
     'choose', 'clip', 'common_fill_value', 'compress', 'compressed',
     'concatenate', 'conjugate', 'convolve', 'copy', 'correlate', 'cos', 'cosh',
     'count', 'cumprod', 'cumsum', 'default_fill_value', 'diag', 'diagonal',
-    'diff', 'divide', 'dump', 'dumps', 'empty', 'empty_like', 'equal', 'exp',
+    'diff', 'divide', 'empty', 'empty_like', 'equal', 'exp',
     'expand_dims', 'fabs', 'filled', 'fix_invalid', 'flatten_mask',
     'flatten_structured_array', 'floor', 'floor_divide', 'fmod',
     'frombuffer', 'fromflex', 'fromfunction', 'getdata', 'getmask',
     'getmaskarray', 'greater', 'greater_equal', 'harden_mask', 'hypot',
     'identity', 'ids', 'indices', 'inner', 'innerproduct', 'isMA',
     'isMaskedArray', 'is_mask', 'is_masked', 'isarray', 'left_shift',
-    'less', 'less_equal', 'load', 'loads', 'log', 'log10', 'log2',
+    'less', 'less_equal', 'log', 'log10', 'log2',
     'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'make_mask',
     'make_mask_descr', 'make_mask_none', 'mask_or', 'masked',
     'masked_array', 'masked_equal', 'masked_greater',
@@ -7886,93 +7886,6 @@ def _pickle_warn(method):
         stacklevel=3)
 
 
-def dump(a, F):
-    """
-    Pickle a masked array to a file.
-
-    This is a wrapper around ``cPickle.dump``.
-
-    Parameters
-    ----------
-    a : MaskedArray
-        The array to be pickled.
-    F : str or file-like object
-        The file to pickle `a` to. If a string, the full path to the file.
-
-    """
-    _pickle_warn('dump')
-    if not hasattr(F, 'readline'):
-        with open(F, 'w') as F:
-            pickle.dump(a, F)
-    else:
-        pickle.dump(a, F)
-
-
-def dumps(a):
-    """
-    Return a string corresponding to the pickling of a masked array.
-
-    This is a wrapper around ``cPickle.dumps``.
-
-    Parameters
-    ----------
-    a : MaskedArray
-        The array for which the string representation of the pickle is
-        returned.
-
-    """
-    _pickle_warn('dumps')
-    return pickle.dumps(a)
-
-
-def load(F):
-    """
-    Wrapper around ``cPickle.load`` which accepts either a file-like object
-    or a filename.
-
-    Parameters
-    ----------
-    F : str or file
-        The file or file name to load.
-
-    See Also
-    --------
-    dump : Pickle an array
-
-    Notes
-    -----
-    This is different from `numpy.load`, which does not use cPickle but loads
-    the NumPy binary .npy format.
-
-    """
-    _pickle_warn('load')
-    if not hasattr(F, 'readline'):
-        with open(F, 'r') as F:
-            return pickle.load(F)
-    else:
-        return pickle.load(F)
-
-
-def loads(strg):
-    """
-    Load a pickle from the current string.
-
-    The result of ``cPickle.loads(strg)`` is returned.
-
-    Parameters
-    ----------
-    strg : str
-        The string to load.
-
-    See Also
-    --------
-    dumps : Return a string corresponding to the pickling of a masked array.
-
-    """
-    _pickle_warn('loads')
-    return pickle.loads(strg)
-
-
 def fromfile(file, dtype=float, count=-1, sep=''):
     raise NotImplementedError(
         "fromfile() not yet implemented for a MaskedArray.")
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 639b3dd1f..de1aa3af8 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -549,8 +549,11 @@ def average(a, axis=None, weights=None, returned=False):
         The weights array can either be 1-D (in which case its length must be
         the size of `a` along the given axis) or of the same shape as `a`.
         If ``weights=None``, then all data in `a` are assumed to have a
-        weight equal to one.   If `weights` is complex, the imaginary parts
-        are ignored.
+        weight equal to one.  The 1-D calculation is::
+
+            avg = sum(a * weights) / sum(weights)
+
+        The only constraint on `weights` is that `sum(weights)` must not be 0.
     returned : bool, optional
         Flag indicating whether a tuple ``(result, sum of weights)``
         should be returned as output (True), or just the result (False).
diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py
index 931a7e8b9..826fb0f64 100644
--- a/numpy/ma/mrecords.py
+++ b/numpy/ma/mrecords.py
@@ -208,7 +208,7 @@ class MaskedRecords(MaskedArray, object):
         _localdict = ndarray.__getattribute__(self, '__dict__')
         _data = ndarray.view(self, _localdict['_baseclass'])
         obj = _data.getfield(*res)
-        if obj.dtype.fields:
+        if obj.dtype.names is not None:
             raise NotImplementedError("MaskedRecords is currently limited to"
                                       "simple records.")
         # Get some special attributes
diff --git a/numpy/ma/version.py b/numpy/ma/version.py
deleted file mode 100644
index a2c5c42a8..000000000
--- a/numpy/ma/version.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""Version number
-
-"""
-from __future__ import division, absolute_import, print_function
-
-version = '1.00'
-release = False
-
-if not release:
-    from . import core
-    from . import extras
-    revision = [core.__revision__.split(':')[-1][:-1].strip(),
-                extras.__revision__.split(':')[-1][:-1].strip(),]
-    version += '.dev%04i' % max([int(rev) for rev in revision])
diff --git a/numpy/matlib.py b/numpy/matlib.py
index 9e115943a..604ef470b 100644
--- a/numpy/matlib.py
+++ b/numpy/matlib.py
@@ -2,7 +2,7 @@ from __future__ import division, absolute_import, print_function
 
 import numpy as np
 from numpy.matrixlib.defmatrix import matrix, asmatrix
-# need * as we're copying the numpy namespace
+# need * as we're copying the numpy namespace (FIXME: this makes little sense)
 from numpy import *
 
 __version__ = np.__version__
diff --git a/numpy/polynomial/polyutils.py b/numpy/polynomial/polyutils.py
index a9059f522..35b24d1ab 100644
--- a/numpy/polynomial/polyutils.py
+++ b/numpy/polynomial/polyutils.py
@@ -426,10 +426,7 @@ def _vander2d(vander_f, x, y, deg):
     x, y, deg :
         See the ``<type>vander2d`` functions for more detail
     """
-    degx, degy = [
-        _deprecate_as_int(d, "degrees")
-        for d in deg
-    ]
+    degx, degy = deg
     x, y = np.array((x, y), copy=False) + 0.0
 
     vx = vander_f(x, degx)
@@ -449,10 +446,7 @@ def _vander3d(vander_f, x, y, z, deg):
     x, y, z, deg :
         See the ``<type>vander3d`` functions for more detail
     """
-    degx, degy, degz = [
-        _deprecate_as_int(d, "degrees")
-        for d in deg
-    ]
+    degx, degy, degz = deg
     x, y, z = np.array((x, y, z), copy=False) + 0.0
 
     vx = vander_f(x, degx)
diff --git a/numpy/random/__init__.py b/numpy/random/__init__.py
index e7eecc5cd..f7c248451 100644
--- a/numpy/random/__init__.py
+++ b/numpy/random/__init__.py
@@ -181,7 +181,6 @@ __all__ = [
 from . import _pickle
 from . import common
 from . import bounded_integers
-from . import entropy
 
 from .mtrand import *
 from .generator import Generator, default_rng
diff --git a/numpy/random/bit_generator.pxd b/numpy/random/bit_generator.pxd
index 79fe69275..984033f17 100644
--- a/numpy/random/bit_generator.pxd
+++ b/numpy/random/bit_generator.pxd
@@ -1,5 +1,5 @@
 
-from .common cimport bitgen_t
+from .common cimport bitgen_t, uint32_t
 cimport numpy as np
 
 cdef class BitGenerator():
@@ -14,9 +14,9 @@ cdef class BitGenerator():
 cdef class SeedSequence():
     cdef readonly object entropy
     cdef readonly tuple spawn_key
-    cdef readonly int pool_size
+    cdef readonly uint32_t pool_size
     cdef readonly object pool
-    cdef readonly int n_children_spawned
+    cdef readonly uint32_t n_children_spawned
 
     cdef mix_entropy(self, np.ndarray[np.npy_uint32, ndim=1] mixer,
                      np.ndarray[np.npy_uint32, ndim=1] entropy_array)
diff --git a/numpy/random/bit_generator.pyx b/numpy/random/bit_generator.pyx
index 6694e5e4d..eb608af6c 100644
--- a/numpy/random/bit_generator.pyx
+++ b/numpy/random/bit_generator.pyx
@@ -116,7 +116,7 @@ def _coerce_to_uint32_array(x):
     Examples
     --------
     >>> import numpy as np
-    >>> from np.random.bit_generator import _coerce_to_uint32_array
+    >>> from numpy.random.bit_generator import _coerce_to_uint32_array
     >>> _coerce_to_uint32_array(12345)
     array([12345], dtype=uint32)
     >>> _coerce_to_uint32_array('12345')
@@ -458,6 +458,8 @@ cdef class SeedSequence():
         -------
         seqs : list of `SeedSequence` s
         """
+        cdef uint32_t i
+
         seqs = []
         for i in range(self.n_children_spawned,
                        self.n_children_spawned + n_children):
diff --git a/numpy/random/common.pxd b/numpy/random/common.pxd
index 2f7baa06e..ac0a94bb0 100644
--- a/numpy/random/common.pxd
+++ b/numpy/random/common.pxd
@@ -5,7 +5,7 @@ from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t,
                           uintptr_t)
 from libc.math cimport sqrt
 
-cdef extern from "numpy/random/bitgen.h":
+cdef extern from "src/bitgen.h":
     struct bitgen:
         void *state
         uint64_t (*next_uint64)(void *st) nogil
diff --git a/numpy/random/entropy.pyx b/numpy/random/entropy.pyx
deleted file mode 100644
index 95bf7c177..000000000
--- a/numpy/random/entropy.pyx
+++ /dev/null
@@ -1,155 +0,0 @@
-cimport numpy as np
-import numpy as np
-
-from libc.stdint cimport uint32_t, uint64_t
-
-__all__ = ['random_entropy', 'seed_by_array']
-
-np.import_array()
-
-cdef extern from "src/splitmix64/splitmix64.h":
-    cdef uint64_t splitmix64_next(uint64_t *state)  nogil
-
-cdef extern from "src/entropy/entropy.h":
-    cdef bint entropy_getbytes(void* dest, size_t size)
-    cdef bint entropy_fallback_getbytes(void *dest, size_t size)
-
-cdef Py_ssize_t compute_numel(size):
-    cdef Py_ssize_t i, n = 1
-    if isinstance(size, tuple):
-        for i in range(len(size)):
-            n *= size[i]
-    else:
-        n = size
-    return n
-
-
-def seed_by_array(object seed, Py_ssize_t n):
-    """
-    Transforms a seed array into an initial state
-
-    Parameters
-    ----------
-    seed: ndarray, 1d, uint64
-        Array to use.  If seed is a scalar, promote to array.
-    n : int
-        Number of 64-bit unsigned integers required
-
-    Notes
-    -----
-    Uses splitmix64 to perform the transformation
-    """
-    cdef uint64_t seed_copy = 0
-    cdef uint64_t[::1] seed_array
-    cdef uint64_t[::1] initial_state
-    cdef Py_ssize_t seed_size, iter_bound
-    cdef int i, loc = 0
-
-    if hasattr(seed, 'squeeze'):
-        seed = seed.squeeze()
-    arr = np.asarray(seed)
-    if arr.shape == ():
-        err_msg = 'Scalar seeds must be integers between 0 and 2**64 - 1'
-        if not np.isreal(arr):
-            raise TypeError(err_msg)
-        int_seed = int(seed)
-        if int_seed != seed:
-            raise TypeError(err_msg)
-        if int_seed < 0 or int_seed > 2**64 - 1:
-            raise ValueError(err_msg)
-        seed_array = np.array([int_seed], dtype=np.uint64)
-    elif issubclass(arr.dtype.type, np.inexact):
-        raise TypeError('seed array must be integers')
-    else:
-        err_msg = "Seed values must be integers between 0 and 2**64 - 1"
-        obj = np.asarray(seed).astype(np.object)
-        if obj.ndim != 1:
-            raise ValueError('Array-valued seeds must be 1-dimensional')
-        if not np.isreal(obj).all():
-            raise TypeError(err_msg)
-        if ((obj > int(2**64 - 1)) | (obj < 0)).any():
-            raise ValueError(err_msg)
-        try:
-            obj_int = obj.astype(np.uint64, casting='unsafe')
-        except ValueError:
-            raise ValueError(err_msg)
-        if not (obj == obj_int).all():
-            raise TypeError(err_msg)
-        seed_array = obj_int
-
-    seed_size = seed_array.shape[0]
-    iter_bound = n if n > seed_size else seed_size
-
-    initial_state = <np.ndarray>np.empty(n, dtype=np.uint64)
-    for i in range(iter_bound):
-        if i < seed_size:
-            seed_copy ^= seed_array[i]
-        initial_state[loc] = splitmix64_next(&seed_copy)
-        loc += 1
-        if loc == n:
-            loc = 0
-
-    return np.array(initial_state)
-
-
-def random_entropy(size=None, source='system'):
-    """
-    random_entropy(size=None, source='system')
-
-    Read entropy from the system cryptographic provider
-
-    Parameters
-    ----------
-    size : int or tuple of ints, optional
-        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
-        ``m * n * k`` samples are drawn.  Default is None, in which case a
-        single value is returned.
-    source : str {'system', 'fallback'}
-        Source of entropy.  'system' uses system cryptographic pool.
-        'fallback' uses a hash of the time and process id.
-
-    Returns
-    -------
-    entropy : scalar or array
-        Entropy bits in 32-bit unsigned integers. A scalar is returned if size
-        is `None`.
-
-    Notes
-    -----
-    On Unix-like machines, reads from ``/dev/urandom``. On Windows machines
-    reads from the RSA algorithm provided by the cryptographic service
-    provider.
-
-    This function reads from the system entropy pool and so samples are
-    not reproducible.  In particular, it does *NOT* make use of a
-    BitGenerator, and so ``seed`` and setting ``state`` have no
-    effect.
-
-    Raises RuntimeError if the command fails.
-    """
-    cdef bint success = True
-    cdef Py_ssize_t n = 0
-    cdef uint32_t random = 0
-    cdef uint32_t [:] randoms
-
-    if source not in ('system', 'fallback'):
-        raise ValueError('Unknown value in source.')
-
-    if size is None:
-        if source == 'system':
-            success = entropy_getbytes(<void *>&random, 4)
-        else:
-            success = entropy_fallback_getbytes(<void *>&random, 4)
-    else:
-        n = compute_numel(size)
-        randoms = np.zeros(n, dtype=np.uint32)
-        if source == 'system':
-            success = entropy_getbytes(<void *>(&randoms[0]), 4 * n)
-        else:
-            success = entropy_fallback_getbytes(<void *>(&randoms[0]), 4 * n)
-    if not success:
-        raise RuntimeError('Unable to read from system cryptographic provider')
-
-    if n == 0:
-        return random
-    return np.asarray(randoms).reshape(size)
diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx
index c7432d8c1..df7485a97 100644
--- a/numpy/random/generator.pyx
+++ b/numpy/random/generator.pyx
@@ -4,6 +4,7 @@ import operator
 import warnings
 
 import numpy as np
+from numpy.core.multiarray import normalize_axis_index
 
 from .bounded_integers import _integers_types
 from .pcg64 import PCG64
@@ -3783,20 +3784,21 @@ cdef class Generator:
         return diric
 
     # Shuffling and permutations:
-    def shuffle(self, object x):
+    def shuffle(self, object x, axis=0):
         """
-        shuffle(x)
+        shuffle(x, axis=0)
 
         Modify a sequence in-place by shuffling its contents.
 
-        This function only shuffles the array along the first axis of a
-        multi-dimensional array. The order of sub-arrays is changed but
-        their contents remains the same.
+        The order of sub-arrays is changed but their contents remains the same.
 
         Parameters
         ----------
         x : array_like
             The array or list to be shuffled.
+        axis : int, optional
+            The axis which `x` is shuffled along. Default is 0.
+            It is only supported on `ndarray` objects.
 
         Returns
         -------
@@ -3810,8 +3812,6 @@ cdef class Generator:
         >>> arr
         [1 7 5 2 9 4 3 6 0 8] # random
 
-        Multi-dimensional arrays are only shuffled along the first axis:
-
         >>> arr = np.arange(9).reshape((3, 3))
         >>> rng.shuffle(arr)
         >>> arr
@@ -3819,17 +3819,25 @@ cdef class Generator:
                [6, 7, 8],
                [0, 1, 2]])
 
+        >>> arr = np.arange(9).reshape((3, 3))
+        >>> rng.shuffle(arr, axis=1)
+        >>> arr
+        array([[2, 0, 1], # random
+               [5, 3, 4],
+               [8, 6, 7]])
         """
         cdef:
             np.npy_intp i, j, n = len(x), stride, itemsize
             char* x_ptr
             char* buf_ptr
 
+        axis = normalize_axis_index(axis, np.ndim(x))
+
         if type(x) is np.ndarray and x.ndim == 1 and x.size:
             # Fast, statically typed path: shuffle the underlying buffer.
             # Only for non-empty, 1d objects of class ndarray (subclasses such
             # as MaskedArrays may not support this approach).
-            x_ptr = <char*><size_t>x.ctypes.data
+            x_ptr = <char*><size_t>np.PyArray_DATA(x)
             stride = x.strides[0]
             itemsize = x.dtype.itemsize
             # As the array x could contain python objects we use a buffer
@@ -3837,7 +3845,7 @@ cdef class Generator:
             # within the buffer and erroneously decrementing it's refcount
             # when the function exits.
             buf = np.empty(itemsize, dtype=np.int8)  # GC'd at function exit
-            buf_ptr = <char*><size_t>buf.ctypes.data
+            buf_ptr = <char*><size_t>np.PyArray_DATA(buf)
             with self.lock:
                 # We trick gcc into providing a specialized implementation for
                 # the most common case, yielding a ~33% performance improvement.
@@ -3847,9 +3855,10 @@ cdef class Generator:
                 else:
                     self._shuffle_raw(n, 1, itemsize, stride, x_ptr, buf_ptr)
         elif isinstance(x, np.ndarray) and x.ndim and x.size:
+            x = np.swapaxes(x, 0, axis)
             buf = np.empty_like(x[0, ...])
             with self.lock:
-                for i in reversed(range(1, n)):
+                for i in reversed(range(1, len(x))):
                     j = random_interval(&self._bitgen, i)
                     if i == j:
                         # i == j is not needed and memcpy is undefined.
@@ -3859,6 +3868,9 @@ cdef class Generator:
                     x[i] = buf
         else:
             # Untyped path.
+            if axis != 0:
+                raise NotImplementedError("Axis argument is only supported "
+                                          "on ndarray objects")
             with self.lock:
                 for i in reversed(range(1, n)):
                     j = random_interval(&self._bitgen, i)
@@ -3914,21 +3926,20 @@ cdef class Generator:
             data[j] = data[i]
             data[i] = temp
 
-    def permutation(self, object x):
+    def permutation(self, object x, axis=0):
         """
-        permutation(x)
+        permutation(x, axis=0)
 
         Randomly permute a sequence, or return a permuted range.
 
-        If `x` is a multi-dimensional array, it is only shuffled along its
-        first index.
-
         Parameters
         ----------
         x : int or array_like
             If `x` is an integer, randomly permute ``np.arange(x)``.
             If `x` is an array, make a copy and shuffle the elements
             randomly.
+        axis : int, optional
+            The axis which `x` is shuffled along. Default is 0.
 
         Returns
         -------
@@ -3950,6 +3961,17 @@ cdef class Generator:
                [0, 1, 2],
                [3, 4, 5]])
 
+        >>> rng.permutation("abc")
+        Traceback (most recent call last):
+            ...
+        numpy.AxisError: x must be an integer or at least 1-dimensional
+
+        >>> arr = np.arange(9).reshape((3, 3))
+        >>> rng.permutation(arr, axis=1)
+        array([[0, 2, 1], # random
+               [3, 5, 4],
+               [6, 8, 7]])
+
         """
         if isinstance(x, (int, np.integer)):
             arr = np.arange(x)
@@ -3958,6 +3980,8 @@ cdef class Generator:
 
         arr = np.asarray(x)
 
+        axis = normalize_axis_index(axis, arr.ndim)
+
         # shuffle has fast-path for 1-d
         if arr.ndim == 1:
             # Return a copy if same memory
@@ -3967,9 +3991,11 @@ cdef class Generator:
             return arr
 
         # Shuffle index array, dtype to ensure fast path
-        idx = np.arange(arr.shape[0], dtype=np.intp)
+        idx = np.arange(arr.shape[axis], dtype=np.intp)
         self.shuffle(idx)
-        return arr[idx]
+        slices = [slice(None)]*arr.ndim
+        slices[axis] = idx
+        return arr[tuple(slices)]
 
 
 def default_rng(seed=None):
diff --git a/numpy/random/info.py b/numpy/random/info.py
deleted file mode 100644
index b9fd7f26a..000000000
--- a/numpy/random/info.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from __future__ import division, absolute_import, print_function
-
-from .. import __doc__
-
-depends = ['core']
diff --git a/numpy/random/legacy_distributions.pxd b/numpy/random/legacy_distributions.pxd
index 7ba058054..c681388db 100644
--- a/numpy/random/legacy_distributions.pxd
+++ b/numpy/random/legacy_distributions.pxd
@@ -34,6 +34,8 @@ cdef extern from "legacy-distributions.h":
                             double nonc) nogil
     double legacy_wald(aug_bitgen_t *aug_state, double mean, double scale) nogil
     double legacy_lognormal(aug_bitgen_t *aug_state, double mean, double sigma) nogil
+    int64_t legacy_random_binomial(bitgen_t *bitgen_state, double p,
+                                   int64_t n, binomial_t *binomial) nogil
     int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, double p) nogil
     int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, int64_t sample) nogil
     int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p) nogil
diff --git a/numpy/random/mt19937.pyx b/numpy/random/mt19937.pyx
index 49c3622f5..7d0f6cd22 100644
--- a/numpy/random/mt19937.pyx
+++ b/numpy/random/mt19937.pyx
@@ -5,7 +5,6 @@ cimport numpy as np
 
 from .common cimport *
 from .bit_generator cimport BitGenerator, SeedSequence
-from .entropy import random_entropy
 
 __all__ = ['MT19937']
 
@@ -156,7 +155,8 @@ cdef class MT19937(BitGenerator):
             Random seed initializing the pseudo-random number generator.
             Can be an integer in [0, 2**32-1], array of integers in
             [0, 2**32-1], a `SeedSequence, or ``None``. If `seed`
-            is ``None``, then sample entropy for a seed.
+            is ``None``, then fresh, unpredictable entropy will be pulled from
+            the OS.
 
         Raises
         ------
@@ -167,7 +167,8 @@ cdef class MT19937(BitGenerator):
         with self.lock:
             try:
                 if seed is None:
-                    val = random_entropy(RK_STATE_LEN)
+                    seed = SeedSequence()
+                    val = seed.generate_state(RK_STATE_LEN)
                     # MSB is 1; assuring non-zero initial array
                     self.rng_state.key[0] = 0x80000000UL
                     for i in range(1, RK_STATE_LEN):
diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx
index 46b6b3388..c469a4645 100644
--- a/numpy/random/mtrand.pyx
+++ b/numpy/random/mtrand.pyx
@@ -83,8 +83,8 @@ cdef class RandomState:
     See Also
     --------
     Generator
-    mt19937.MT19937
-    Bit_Generators
+    MT19937
+    :ref:`bit_generator`
 
     """
     cdef public object _bit_generator
@@ -3086,7 +3086,9 @@ cdef class RandomState:
                 for i in range(cnt):
                     _dp = (<double*>np.PyArray_MultiIter_DATA(it, 1))[0]
                     _in = (<long*>np.PyArray_MultiIter_DATA(it, 2))[0]
-                    (<long*>np.PyArray_MultiIter_DATA(it, 0))[0] = random_binomial(&self._bitgen, _dp, _in, &self._binomial)
+                    (<long*>np.PyArray_MultiIter_DATA(it, 0))[0] = \
+                        legacy_random_binomial(&self._bitgen, _dp, _in,
+                                               &self._binomial)
 
                     np.PyArray_MultiIter_NEXT(it)
 
@@ -3099,7 +3101,8 @@ cdef class RandomState:
 
         if size is None:
             with self.lock:
-                return random_binomial(&self._bitgen, _dp, _in, &self._binomial)
+                return <long>legacy_random_binomial(&self._bitgen, _dp, _in,
+                                                    &self._binomial)
 
         randoms = <np.ndarray>np.empty(size, int)
         cnt = np.PyArray_SIZE(randoms)
@@ -3107,8 +3110,8 @@ cdef class RandomState:
 
         with self.lock, nogil:
             for i in range(cnt):
-                randoms_data[i] = random_binomial(&self._bitgen, _dp, _in,
-                                                  &self._binomial)
+                randoms_data[i] = legacy_random_binomial(&self._bitgen, _dp, _in,
+                                                         &self._binomial)
 
         return randoms
 
@@ -3517,7 +3520,7 @@ cdef class RandomState:
         # Convert to int64, if necessary, to use int64 infrastructure
         ongood = ongood.astype(np.int64)
         onbad = onbad.astype(np.int64)
-        onbad = onbad.astype(np.int64)
+        onsample = onsample.astype(np.int64)
         out = discrete_broadcast_iii(&legacy_random_hypergeometric,&self._bitgen, size, self.lock,
                                      ongood, 'ngood', CONS_NON_NEGATIVE,
                                      onbad, 'nbad', CONS_NON_NEGATIVE,
@@ -4070,7 +4073,7 @@ cdef class RandomState:
             # Fast, statically typed path: shuffle the underlying buffer.
             # Only for non-empty, 1d objects of class ndarray (subclasses such
             # as MaskedArrays may not support this approach).
-            x_ptr = <char*><size_t>x.ctypes.data
+            x_ptr = <char*><size_t>np.PyArray_DATA(x)
             stride = x.strides[0]
             itemsize = x.dtype.itemsize
             # As the array x could contain python objects we use a buffer
@@ -4078,7 +4081,7 @@ cdef class RandomState:
             # within the buffer and erroneously decrementing it's refcount
             # when the function exits.
             buf = np.empty(itemsize, dtype=np.int8)  # GC'd at function exit
-            buf_ptr = <char*><size_t>buf.ctypes.data
+            buf_ptr = <char*><size_t>np.PyArray_DATA(buf)
             with self.lock:
                 # We trick gcc into providing a specialized implementation for
                 # the most common case, yielding a ~33% performance improvement.
@@ -4134,6 +4137,7 @@ cdef class RandomState:
         out : ndarray
             Permuted sequence or array range.
 
+
         Examples
         --------
         >>> np.random.permutation(10)
@@ -4149,12 +4153,15 @@ cdef class RandomState:
                [3, 4, 5]])
 
         """
+
         if isinstance(x, (int, np.integer)):
             arr = np.arange(x)
             self.shuffle(arr)
             return arr
 
         arr = np.asarray(x)
+        if arr.ndim < 1:
+            raise IndexError("x must be an integer or at least 1-dimensional")
 
         # shuffle has fast-path for 1-d
         if arr.ndim == 1:
diff --git a/numpy/random/setup.py b/numpy/random/setup.py
index a1bf3b83c..ce7f0565f 100644
--- a/numpy/random/setup.py
+++ b/numpy/random/setup.py
@@ -34,8 +34,6 @@ def configuration(parent_package='', top_path=None):
 
     defs.append(('NPY_NO_DEPRECATED_API', 0))
     config.add_data_dir('tests')
-    config.add_data_files('common.pxd')
-    config.add_data_files('bit_generator.pxd')
 
     EXTRA_LINK_ARGS = []
     # Math lib
@@ -49,8 +47,8 @@ def configuration(parent_package='', top_path=None):
     elif not is_msvc:
         # Some bit generators require c99
         EXTRA_COMPILE_ARGS += ['-std=c99']
-        INTEL_LIKE = any([val in k.lower() for k in platform.uname()
-                          for val in ('x86', 'i686', 'i386', 'amd64')])
+        INTEL_LIKE = any(arch in platform.machine() 
+                         for arch in ('x86', 'i686', 'i386', 'amd64'))
         if INTEL_LIKE:
             # Assumes GCC or GCC-like compiler
             EXTRA_COMPILE_ARGS += ['-msse2']
@@ -61,18 +59,6 @@ def configuration(parent_package='', top_path=None):
     # One can force emulated 128-bit arithmetic if one wants.
     #PCG64_DEFS += [('PCG_FORCE_EMULATED_128BIT_MATH', '1')]
 
-    config.add_extension('entropy',
-                         sources=['entropy.c', 'src/entropy/entropy.c'] +
-                                 [generate_libraries],
-                         libraries=EXTRA_LIBRARIES,
-                         extra_compile_args=EXTRA_COMPILE_ARGS,
-                         extra_link_args=EXTRA_LINK_ARGS,
-                         depends=[join('src', 'splitmix64', 'splitmix.h'),
-                                  join('src', 'entropy', 'entropy.h'),
-                                  'entropy.pyx',
-                                  ],
-                         define_macros=defs,
-                         )
     for gen in ['mt19937']:
         # gen.pyx, src/gen/gen.c, src/gen/gen-jump.c
         config.add_extension(gen,
diff --git a/numpy/core/include/numpy/random/bitgen.h b/numpy/random/src/bitgen.h
index 0adaaf2ee..0adaaf2ee 100644
--- a/numpy/core/include/numpy/random/bitgen.h
+++ b/numpy/random/src/bitgen.h
diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c
index 65257ecbf..1244ffe65 100644
--- a/numpy/random/src/distributions/distributions.c
+++ b/numpy/random/src/distributions/distributions.c
@@ -901,8 +901,8 @@ RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state, RAND_INT_TYPE n,
   return X;
 }
 
-RAND_INT_TYPE random_binomial(bitgen_t *bitgen_state, double p, RAND_INT_TYPE n,
-                              binomial_t *binomial) {
+int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n,
+                        binomial_t *binomial) {
   double q;
 
   if ((n == 0LL) || (p == 0.0f))
@@ -1478,7 +1478,7 @@ uint64_t random_bounded_uint64(bitgen_t *bitgen_state, uint64_t off,
                                uint64_t rng, uint64_t mask, bool use_masked) {
   if (rng == 0) {
     return off;
-  } else if (rng < 0xFFFFFFFFUL) {
+  } else if (rng <= 0xFFFFFFFFUL) {
     /* Call 32-bit generator if range in 32-bit. */
     if (use_masked) {
       return off + buffered_bounded_masked_uint32(bitgen_state, rng, mask, NULL,
@@ -1592,7 +1592,7 @@ void random_bounded_uint64_fill(bitgen_t *bitgen_state, uint64_t off,
     for (i = 0; i < cnt; i++) {
       out[i] = off;
     }
-  } else if (rng < 0xFFFFFFFFUL) {
+  } else if (rng <= 0xFFFFFFFFUL) {
     uint32_t buf = 0;
     int bcnt = 0;
 
diff --git a/numpy/random/src/distributions/distributions.h b/numpy/random/src/distributions/distributions.h
index c8cdfd20f..2a6b2a045 100644
--- a/numpy/random/src/distributions/distributions.h
+++ b/numpy/random/src/distributions/distributions.h
@@ -1,15 +1,14 @@
 #ifndef _RANDOMDGEN__DISTRIBUTIONS_H_
 #define _RANDOMDGEN__DISTRIBUTIONS_H_
 
-#pragma once
+#include "Python.h"
+#include "numpy/npy_common.h"
 #include <stddef.h>
 #include <stdbool.h>
 #include <stdint.h>
 
-#include "Python.h"
-#include "numpy/npy_common.h"
 #include "numpy/npy_math.h"
-#include "numpy/random/bitgen.h"
+#include "src/bitgen.h"
 
 /*
  * RAND_INT_TYPE is used to share integer generators with RandomState which
@@ -43,11 +42,11 @@
 typedef struct s_binomial_t {
   int has_binomial; /* !=0: following parameters initialized for binomial */
   double psave;
-  int64_t nsave;
+  RAND_INT_TYPE nsave;
   double r;
   double q;
   double fm;
-  int64_t m;
+  RAND_INT_TYPE m;
   double p1;
   double xm;
   double xl;
@@ -148,8 +147,18 @@ DECLDIR double random_triangular(bitgen_t *bitgen_state, double left, double mod
 DECLDIR RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam);
 DECLDIR RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n,
                                          double p);
-DECLDIR RAND_INT_TYPE random_binomial(bitgen_t *bitgen_state, double p, RAND_INT_TYPE n,
-                                binomial_t *binomial);
+
+DECLDIR RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state,
+                                           RAND_INT_TYPE n,
+                                           double p,
+                                           binomial_t *binomial);
+DECLDIR RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state,
+                                                RAND_INT_TYPE n,
+                                                double p,
+                                                binomial_t *binomial);
+DECLDIR int64_t random_binomial(bitgen_t *bitgen_state, double p,
+                                int64_t n, binomial_t *binomial);
+
 DECLDIR RAND_INT_TYPE random_logseries(bitgen_t *bitgen_state, double p);
 DECLDIR RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p);
 DECLDIR RAND_INT_TYPE random_geometric_inversion(bitgen_t *bitgen_state, double p);
diff --git a/numpy/random/src/distributions/random_hypergeometric.c b/numpy/random/src/distributions/random_hypergeometric.c
index 59a3a4b9b..94dc6380f 100644
--- a/numpy/random/src/distributions/random_hypergeometric.c
+++ b/numpy/random/src/distributions/random_hypergeometric.c
@@ -1,6 +1,6 @@
-#include <stdint.h>
 #include "distributions.h"
 #include "logfactorial.h"
+#include <stdint.h>
 
 /*
  *  Generate a sample from the hypergeometric distribution.
diff --git a/numpy/random/src/entropy/entropy.c b/numpy/random/src/entropy/entropy.c
deleted file mode 100644
index eaca37a9c..000000000
--- a/numpy/random/src/entropy/entropy.c
+++ /dev/null
@@ -1,114 +0,0 @@
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "entropy.h"
-#ifdef _WIN32
-/* Windows */
-#include <sys/timeb.h>
-#include <time.h>
-#include <windows.h>
-
-#include <wincrypt.h>
-#else
-/* Unix */
-#include <sys/time.h>
-#include <time.h>
-#include <unistd.h>
-#include <fcntl.h>
-#endif
-
-bool entropy_getbytes(void *dest, size_t size) {
-#ifndef _WIN32
-
-  int fd = open("/dev/urandom", O_RDONLY);
-  if (fd < 0)
-    return false;
-  ssize_t sz = read(fd, dest, size);
-  if ((sz < 0) || ((size_t)sz < size))
-    return false;
-  return close(fd) == 0;
-
-#else
-
-  HCRYPTPROV hCryptProv;
-  BOOL done;
-
-  if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL,
-                           CRYPT_VERIFYCONTEXT) ||
-      !hCryptProv) {
-    return true;
-  }
-  done = CryptGenRandom(hCryptProv, (DWORD)size, (unsigned char *)dest);
-  CryptReleaseContext(hCryptProv, 0);
-  if (!done) {
-    return false;
-  }
-
-  return true;
-#endif
-}
-
-/* Thomas Wang 32/64 bits integer hash function */
-uint32_t entropy_hash_32(uint32_t key) {
-  key += ~(key << 15);
-  key ^= (key >> 10);
-  key += (key << 3);
-  key ^= (key >> 6);
-  key += ~(key << 11);
-  key ^= (key >> 16);
-  return key;
-}
-
-uint64_t entropy_hash_64(uint64_t key) {
-  key = (~key) + (key << 21); // key = (key << 21) - key - 1;
-  key = key ^ (key >> 24);
-  key = (key + (key << 3)) + (key << 8); // key * 265
-  key = key ^ (key >> 14);
-  key = (key + (key << 2)) + (key << 4); // key * 21
-  key = key ^ (key >> 28);
-  key = key + (key << 31);
-  return key;
-}
-
-uint32_t entropy_randombytes(void) {
-
-#ifndef _WIN32
-  struct timeval tv;
-  gettimeofday(&tv, NULL);
-  return entropy_hash_32(getpid()) ^ entropy_hash_32(tv.tv_sec) ^
-         entropy_hash_32(tv.tv_usec) ^ entropy_hash_32(clock());
-#else
-  uint32_t out = 0;
-  int64_t counter;
-  struct _timeb tv;
-  _ftime_s(&tv);
-  out = entropy_hash_32(GetCurrentProcessId()) ^
-        entropy_hash_32((uint32_t)tv.time) ^ entropy_hash_32(tv.millitm) ^
-        entropy_hash_32(clock());
-  if (QueryPerformanceCounter((LARGE_INTEGER *)&counter) != 0)
-    out ^= entropy_hash_32((uint32_t)(counter & 0xffffffff));
-  return out;
-#endif
-}
-
-bool entropy_fallback_getbytes(void *dest, size_t size) {
-  int hashes = (int)size;
-  uint32_t *hash = malloc(hashes * sizeof(uint32_t));
-  int i;
-  for (i = 0; i < hashes; i++) {
-    hash[i] = entropy_randombytes();
-  }
-  memcpy(dest, (void *)hash, size);
-  free(hash);
-  return true;
-}
-
-void entropy_fill(void *dest, size_t size) {
-  bool success;
-  success = entropy_getbytes(dest, size);
-  if (!success) {
-    entropy_fallback_getbytes(dest, size);
-  }
-}
diff --git a/numpy/random/src/entropy/entropy.h b/numpy/random/src/entropy/entropy.h
deleted file mode 100644
index f00caf61d..000000000
--- a/numpy/random/src/entropy/entropy.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _RANDOMDGEN__ENTROPY_H_
-#define _RANDOMDGEN__ENTROPY_H_
-
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdint.h>
-
-extern void entropy_fill(void *dest, size_t size);
-
-extern bool entropy_getbytes(void *dest, size_t size);
-
-extern bool entropy_fallback_getbytes(void *dest, size_t size);
-
-#endif
diff --git a/numpy/random/src/legacy/legacy-distributions.c b/numpy/random/src/legacy/legacy-distributions.c
index 4741a0352..684b3d762 100644
--- a/numpy/random/src/legacy/legacy-distributions.c
+++ b/numpy/random/src/legacy/legacy-distributions.c
@@ -215,6 +215,37 @@ double legacy_exponential(aug_bitgen_t *aug_state, double scale) {
 }
 
 
+static RAND_INT_TYPE legacy_random_binomial_original(bitgen_t *bitgen_state,
+                                                     double p,
+                                                     RAND_INT_TYPE n,
+                                                     binomial_t *binomial) {
+  double q;
+
+  if (p <= 0.5) {
+    if (p * n <= 30.0) {
+      return random_binomial_inversion(bitgen_state, n, p, binomial);
+    } else {
+      return random_binomial_btpe(bitgen_state, n, p, binomial);
+    }
+  } else {
+    q = 1.0 - p;
+    if (q * n <= 30.0) {
+      return n - random_binomial_inversion(bitgen_state, n, q, binomial);
+    } else {
+      return n - random_binomial_btpe(bitgen_state, n, q, binomial);
+    }
+  }
+}
+
+
+int64_t legacy_random_binomial(bitgen_t *bitgen_state, double p,
+                               int64_t n, binomial_t *binomial) {
+  return (int64_t) legacy_random_binomial_original(bitgen_state, p,
+                                                   (RAND_INT_TYPE) n,
+                                                   binomial);
+}
+
+
 static RAND_INT_TYPE random_hypergeometric_hyp(bitgen_t *bitgen_state,
                                                RAND_INT_TYPE good,
                                                RAND_INT_TYPE bad,
diff --git a/numpy/random/src/legacy/legacy-distributions.h b/numpy/random/src/legacy/legacy-distributions.h
index 005c4e5d2..4bc15d58e 100644
--- a/numpy/random/src/legacy/legacy-distributions.h
+++ b/numpy/random/src/legacy/legacy-distributions.h
@@ -16,26 +16,23 @@ extern double legacy_pareto(aug_bitgen_t *aug_state, double a);
 extern double legacy_weibull(aug_bitgen_t *aug_state, double a);
 extern double legacy_power(aug_bitgen_t *aug_state, double a);
 extern double legacy_gamma(aug_bitgen_t *aug_state, double shape, double scale);
-extern double legacy_pareto(aug_bitgen_t *aug_state, double a);
-extern double legacy_weibull(aug_bitgen_t *aug_state, double a);
 extern double legacy_chisquare(aug_bitgen_t *aug_state, double df);
 extern double legacy_noncentral_chisquare(aug_bitgen_t *aug_state, double df,
                                           double nonc);
-
 extern double legacy_noncentral_f(aug_bitgen_t *aug_state, double dfnum,
                                   double dfden, double nonc);
 extern double legacy_wald(aug_bitgen_t *aug_state, double mean, double scale);
 extern double legacy_lognormal(aug_bitgen_t *aug_state, double mean,
                                double sigma);
 extern double legacy_standard_t(aug_bitgen_t *aug_state, double df);
-extern int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n,
-                                        double p);
 extern double legacy_standard_cauchy(aug_bitgen_t *state);
 extern double legacy_beta(aug_bitgen_t *aug_state, double a, double b);
 extern double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden);
 extern double legacy_normal(aug_bitgen_t *aug_state, double loc, double scale);
 extern double legacy_standard_gamma(aug_bitgen_t *aug_state, double shape);
 extern double legacy_exponential(aug_bitgen_t *aug_state, double scale);
+extern int64_t legacy_random_binomial(bitgen_t *bitgen_state, double p,
+                                      int64_t n, binomial_t *binomial);
 extern int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n,
                                         double p);
 extern int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state,
diff --git a/numpy/random/src/philox/philox.h b/numpy/random/src/philox/philox.h
index 309d89eae..c72424a97 100644
--- a/numpy/random/src/philox/philox.h
+++ b/numpy/random/src/philox/philox.h
@@ -1,8 +1,8 @@
 #ifndef _RANDOMDGEN__PHILOX_H_
 #define _RANDOMDGEN__PHILOX_H_
 
-#include <inttypes.h>
 #include "numpy/npy_common.h"
+#include <inttypes.h>
 
 #define PHILOX_BUFFER_SIZE 4L
 
diff --git a/numpy/random/src/sfc64/sfc64.h b/numpy/random/src/sfc64/sfc64.h
index 6674ae69c..75c4118d3 100644
--- a/numpy/random/src/sfc64/sfc64.h
+++ b/numpy/random/src/sfc64/sfc64.h
@@ -1,11 +1,11 @@
 #ifndef _RANDOMDGEN__SFC64_H_
 #define _RANDOMDGEN__SFC64_H_
 
+#include "numpy/npy_common.h"
 #include <inttypes.h>
 #ifdef _WIN32
 #include <stdlib.h>
 #endif
-#include "numpy/npy_common.h"
 
 typedef struct s_sfc64_state {
   uint64_t s[4];
diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
index a962fe84e..391c33c1a 100644
--- a/numpy/random/tests/test_generator_mt19937.py
+++ b/numpy/random/tests/test_generator_mt19937.py
@@ -732,6 +732,29 @@ class TestRandomDist(object):
             desired = conv([4, 1, 9, 8, 0, 5, 3, 6, 2, 7])
             assert_array_equal(actual, desired)
 
+    def test_shuffle_custom_axis(self):
+        random = Generator(MT19937(self.seed))
+        actual = np.arange(16).reshape((4, 4))
+        random.shuffle(actual, axis=1)
+        desired = np.array([[ 0,  3,  1,  2],
+                            [ 4,  7,  5,  6],
+                            [ 8, 11,  9, 10],
+                            [12, 15, 13, 14]])
+        assert_array_equal(actual, desired)
+        random = Generator(MT19937(self.seed))
+        actual = np.arange(16).reshape((4, 4))
+        random.shuffle(actual, axis=-1)
+        assert_array_equal(actual, desired)
+
+    def test_shuffle_axis_nonsquare(self):
+        y1 = np.arange(20).reshape(2, 10)
+        y2 = y1.copy()
+        random = Generator(MT19937(self.seed))
+        random.shuffle(y1, axis=1)
+        random = Generator(MT19937(self.seed))
+        random.shuffle(y2.T)
+        assert_array_equal(y1, y2)
+
     def test_shuffle_masked(self):
         # gh-3263
         a = np.ma.masked_values(np.reshape(range(20), (5, 4)) % 3 - 1, -1)
@@ -746,6 +769,16 @@ class TestRandomDist(object):
             assert_equal(
                 sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask]))
 
+    def test_shuffle_exceptions(self):
+        random = Generator(MT19937(self.seed))
+        arr = np.arange(10)
+        assert_raises(np.AxisError, random.shuffle, arr, 1)
+        arr = np.arange(9).reshape((3, 3))
+        assert_raises(np.AxisError, random.shuffle, arr, 3)
+        assert_raises(TypeError, random.shuffle, arr, slice(1, 2, None))
+        arr = [[1, 2, 3], [4, 5, 6]]
+        assert_raises(NotImplementedError, random.shuffle, arr, 1)
+
     def test_permutation(self):
         random = Generator(MT19937(self.seed))
         alist = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
@@ -757,6 +790,40 @@ class TestRandomDist(object):
         arr_2d = np.atleast_2d([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]).T
         actual = random.permutation(arr_2d)
         assert_array_equal(actual, np.atleast_2d(desired).T)
+        
+        bad_x_str = "abcd"
+        assert_raises(np.AxisError, random.permutation, bad_x_str)
+
+        bad_x_float = 1.2
+        assert_raises(np.AxisError, random.permutation, bad_x_float)
+
+        random = Generator(MT19937(self.seed))
+        integer_val = 10
+        desired = [3, 0, 8, 7, 9, 4, 2, 5, 1, 6]
+
+        actual = random.permutation(integer_val)
+        assert_array_equal(actual, desired)
+
+    def test_permutation_custom_axis(self):
+        a = np.arange(16).reshape((4, 4))
+        desired = np.array([[ 0,  3,  1,  2],
+                            [ 4,  7,  5,  6],
+                            [ 8, 11,  9, 10],
+                            [12, 15, 13, 14]])
+        random = Generator(MT19937(self.seed))
+        actual = random.permutation(a, axis=1)
+        assert_array_equal(actual, desired)
+        random = Generator(MT19937(self.seed))
+        actual = random.permutation(a, axis=-1)
+        assert_array_equal(actual, desired)
+
+    def test_permutation_exceptions(self):
+        random = Generator(MT19937(self.seed))
+        arr = np.arange(10)
+        assert_raises(np.AxisError, random.permutation, arr, 1)
+        arr = np.arange(9).reshape((3, 3))
+        assert_raises(np.AxisError, random.permutation, arr, 3)
+        assert_raises(TypeError, random.permutation, arr, slice(1, 2, None))
 
     def test_beta(self):
         random = Generator(MT19937(self.seed))
diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py
index 3b5a279a3..a0edc5c23 100644
--- a/numpy/random/tests/test_randomstate.py
+++ b/numpy/random/tests/test_randomstate.py
@@ -686,6 +686,21 @@ class TestRandomDist(object):
         actual = random.permutation(arr_2d)
         assert_array_equal(actual, np.atleast_2d(desired).T)
 
+        random.seed(self.seed)
+        bad_x_str = "abcd"
+        assert_raises(IndexError, random.permutation, bad_x_str)
+
+        random.seed(self.seed)
+        bad_x_float = 1.2
+        assert_raises(IndexError, random.permutation, bad_x_float)
+
+        integer_val = 10
+        desired = [9, 0, 8, 5, 1, 3, 4, 7, 6, 2]
+
+        random.seed(self.seed)
+        actual = random.permutation(integer_val)
+        assert_array_equal(actual, desired)
+
     def test_beta(self):
         random.seed(self.seed)
         actual = random.beta(.1, .9, size=(3, 2))
diff --git a/numpy/random/tests/test_randomstate_regression.py b/numpy/random/tests/test_randomstate_regression.py
index 29870534a..edf32ea97 100644
--- a/numpy/random/tests/test_randomstate_regression.py
+++ b/numpy/random/tests/test_randomstate_regression.py
@@ -181,3 +181,30 @@ class TestRegression(object):
         assert c.dtype == np.dtype(int)
         c = np.random.choice(10, replace=False, size=2)
         assert c.dtype == np.dtype(int)
+
+    @pytest.mark.skipif(np.iinfo('l').max < 2**32,
+                        reason='Cannot test with 32-bit C long')
+    def test_randint_117(self):
+        # GH 14189
+        random.seed(0)
+        expected = np.array([2357136044, 2546248239, 3071714933, 3626093760,
+                             2588848963, 3684848379, 2340255427, 3638918503,
+                             1819583497, 2678185683], dtype='int64')
+        actual = random.randint(2**32, size=10)
+        assert_array_equal(actual, expected)
+
+    def test_p_zero_stream(self):
+        # Regression test for gh-14522.  Ensure that future versions
+        # generate the same variates as version 1.16.
+        np.random.seed(12345)
+        assert_array_equal(random.binomial(1, [0, 0.25, 0.5, 0.75, 1]),
+                           [0, 0, 0, 1, 1])
+
+    def test_n_zero_stream(self):
+        # Regression test for gh-14522.  Ensure that future versions
+        # generate the same variates as version 1.16.
+        np.random.seed(8675309)
+        expected = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                             [3, 4, 2, 3, 3, 1, 5, 3, 1, 3]])
+        assert_array_equal(random.binomial([[0], [10]], 0.25, size=(2, 10)),
+                           expected)
diff --git a/numpy/random/tests/test_smoke.py b/numpy/random/tests/test_smoke.py
index 84d261e5e..6e641b5f4 100644
--- a/numpy/random/tests/test_smoke.py
+++ b/numpy/random/tests/test_smoke.py
@@ -5,7 +5,7 @@ from functools import partial
 import numpy as np
 import pytest
 from numpy.testing import assert_equal, assert_, assert_array_equal
-from numpy.random import (Generator, MT19937, PCG64, Philox, SFC64, entropy)
+from numpy.random import (Generator, MT19937, PCG64, Philox, SFC64)
 
 @pytest.fixture(scope='module',
                 params=(np.bool, np.int8, np.int16, np.int32, np.int64,
@@ -806,23 +806,3 @@ class TestDefaultRNG(RNG):
             np.random.default_rng(-1)
         with pytest.raises(ValueError):
             np.random.default_rng([12345, -1])
-
-
-class TestEntropy(object):
-    def test_entropy(self):
-        e1 = entropy.random_entropy()
-        e2 = entropy.random_entropy()
-        assert_((e1 != e2))
-        e1 = entropy.random_entropy(10)
-        e2 = entropy.random_entropy(10)
-        assert_((e1 != e2).all())
-        e1 = entropy.random_entropy(10, source='system')
-        e2 = entropy.random_entropy(10, source='system')
-        assert_((e1 != e2).all())
-
-    def test_fallback(self):
-        e1 = entropy.random_entropy(source='fallback')
-        time.sleep(0.1)
-        e2 = entropy.random_entropy(source='fallback')
-        assert_((e1 != e2))
-
diff --git a/numpy/testing/_private/parameterized.py b/numpy/testing/_private/parameterized.py
index a5fa4fb5e..489d8e09a 100644
--- a/numpy/testing/_private/parameterized.py
+++ b/numpy/testing/_private/parameterized.py
@@ -45,11 +45,18 @@ except ImportError:
 
 from unittest import TestCase
 
-PY3 = sys.version_info[0] == 3
 PY2 = sys.version_info[0] == 2
 
 
-if PY3:
+if PY2:
+    from types import InstanceType
+    lzip = zip
+    text_type = unicode
+    bytes_type = str
+    string_types = basestring,
+    def make_method(func, instance, type):
+        return MethodType(func, instance, type)
+else:
     # Python 3 doesn't have an InstanceType, so just use a dummy type.
     class InstanceType():
         pass
@@ -61,14 +68,6 @@ if PY3:
         if instance is None:
             return func
         return MethodType(func, instance)
-else:
-    from types import InstanceType
-    lzip = zip
-    text_type = unicode
-    bytes_type = str
-    string_types = basestring,
-    def make_method(func, instance, type):
-        return MethodType(func, instance, type)
 
 _param = namedtuple("param", "args kwargs")
 
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index 7aa5ef033..8a31fcf15 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -21,7 +21,6 @@ import pprint
 
 from numpy.core import(
      intp, float32, empty, arange, array_repr, ndarray, isnat, array)
-from numpy.lib.utils import deprecate
 
 if sys.version_info[0] >= 3:
     from io import StringIO
@@ -33,7 +32,7 @@ __all__ = [
         'assert_array_equal', 'assert_array_less', 'assert_string_equal',
         'assert_array_almost_equal', 'assert_raises', 'build_err_msg',
         'decorate_methods', 'jiffies', 'memusage', 'print_assert_equal',
-        'raises', 'rand', 'rundocs', 'runstring', 'verbose', 'measure',
+        'raises', 'rundocs', 'runstring', 'verbose', 'measure',
         'assert_', 'assert_array_almost_equal_nulp', 'assert_raises_regex',
         'assert_array_max_ulp', 'assert_warns', 'assert_no_warnings',
         'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings',
@@ -154,22 +153,6 @@ def gisinf(x):
     return st
 
 
-@deprecate(message="numpy.testing.rand is deprecated in numpy 1.11. "
-                   "Use numpy.random.rand instead.")
-def rand(*args):
-    """Returns an array of random numbers with the given shape.
-
-    This only uses the standard library, so it is useful for testing purposes.
-    """
-    import random
-    from numpy.core import zeros, float64
-    results = zeros(args, float64)
-    f = results.flat
-    for i in range(len(f)):
-        f[i] = random.random()
-    return results
-
-
 if os.name == 'nt':
     # Code "stolen" from enthought/debug/memusage.py
     def GetPerformanceAttributes(object, counter, instance=None,
@@ -703,7 +686,7 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
                          header='', precision=6, equal_nan=True,
                          equal_inf=True):
     __tracebackhide__ = True  # Hide traceback for py.test
-    from numpy.core import array, array2string, isnan, inf, bool_, errstate
+    from numpy.core import array, array2string, isnan, inf, bool_, errstate, all, max, object_
 
     x = array(x, copy=False, subok=True)
     y = array(y, copy=False, subok=True)
@@ -805,17 +788,18 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
         # np.ma.masked, which is falsy).
         if cond != True:
             n_mismatch = reduced.size - reduced.sum(dtype=intp)
-            percent_mismatch = 100 * n_mismatch / ox.size
+            n_elements = flagged.size if flagged.ndim != 0 else reduced.size
+            percent_mismatch = 100 * n_mismatch / n_elements
             remarks = [
                 'Mismatched elements: {} / {} ({:.3g}%)'.format(
-                    n_mismatch, ox.size, percent_mismatch)]
+                    n_mismatch, n_elements, percent_mismatch)]
 
             with errstate(invalid='ignore', divide='ignore'):
                 # ignore errors for non-numeric types
                 with contextlib.suppress(TypeError):
                     error = abs(x - y)
-                    max_abs_error = error.max()
-                    if error.dtype == 'object':
+                    max_abs_error = max(error)
+                    if getattr(error, 'dtype', object_) == object_:
                         remarks.append('Max absolute difference: '
                                         + str(max_abs_error))
                     else:
@@ -824,8 +808,13 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
 
                     # note: this definition of relative error matches that one
                     # used by assert_allclose (found in np.isclose)
-                    max_rel_error = (error / abs(y)).max()
-                    if error.dtype == 'object':
+                    # Filter values where the divisor would be zero
+                    nonzero = bool_(y != 0)
+                    if all(~nonzero):
+                        max_rel_error = array(inf)
+                    else:
+                        max_rel_error = max(error[nonzero] / abs(y[nonzero]))
+                    if getattr(error, 'dtype', object_) == object_:
                         remarks.append('Max relative difference: '
                                         + str(max_rel_error))
                     else:
diff --git a/numpy/testing/decorators.py b/numpy/testing/decorators.py
deleted file mode 100644
index bf78be500..000000000
--- a/numpy/testing/decorators.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-Back compatibility decorators module. It will import the appropriate
-set of tools
-
-"""
-from __future__ import division, absolute_import, print_function
-
-import warnings
-
-# 2018-04-04, numpy 1.15.0
-warnings.warn("Importing from numpy.testing.decorators is deprecated "
-              "since numpy 1.15.0, import from numpy.testing instead.",
-              DeprecationWarning, stacklevel=2)
-
-from ._private.decorators import *
diff --git a/numpy/testing/noseclasses.py b/numpy/testing/noseclasses.py
deleted file mode 100644
index 5748a9a0f..000000000
--- a/numpy/testing/noseclasses.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""
-Back compatibility noseclasses module. It will import the appropriate
-set of tools
-"""
-from __future__ import division, absolute_import, print_function
-
-import warnings
-
-# 2018-04-04, numpy 1.15.0
-warnings.warn("Importing from numpy.testing.noseclasses is deprecated "
-              "since 1.15.0, import from numpy.testing instead",
-              DeprecationWarning, stacklevel=2)
-
-from ._private.noseclasses import *
diff --git a/numpy/testing/nosetester.py b/numpy/testing/nosetester.py
deleted file mode 100644
index 2ac212eee..000000000
--- a/numpy/testing/nosetester.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-Back compatibility nosetester module. It will import the appropriate
-set of tools
-
-"""
-from __future__ import division, absolute_import, print_function
-
-import warnings
-
-# 2018-04-04, numpy 1.15.0
-warnings.warn("Importing from numpy.testing.nosetester is deprecated "
-              "since 1.15.0, import from numpy.testing instead.",
-              DeprecationWarning, stacklevel=2)
-
-from ._private.nosetester import *
-
-__all__ = ['get_package_name', 'run_module_suite', 'NoseTester',
-           '_numpy_tester', 'get_package_name', 'import_nose',
-           'suppress_warnings']
diff --git a/numpy/testing/print_coercion_tables.py b/numpy/testing/print_coercion_tables.py
index 3a359f472..72b22cee1 100755
--- a/numpy/testing/print_coercion_tables.py
+++ b/numpy/testing/print_coercion_tables.py
@@ -70,22 +70,24 @@ def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray,
             print(char, end=' ')
         print()
 
-print("can cast")
-print_cancast_table(np.typecodes['All'])
-print()
-print("In these tables, ValueError is '!', OverflowError is '@', TypeError is '#'")
-print()
-print("scalar + scalar")
-print_coercion_table(np.typecodes['All'], 0, 0, False)
-print()
-print("scalar + neg scalar")
-print_coercion_table(np.typecodes['All'], 0, -1, False)
-print()
-print("array + scalar")
-print_coercion_table(np.typecodes['All'], 0, 0, True)
-print()
-print("array + neg scalar")
-print_coercion_table(np.typecodes['All'], 0, -1, True)
-print()
-print("promote_types")
-print_coercion_table(np.typecodes['All'], 0, 0, False, True)
+
+if __name__ == '__main__':
+    print("can cast")
+    print_cancast_table(np.typecodes['All'])
+    print()
+    print("In these tables, ValueError is '!', OverflowError is '@', TypeError is '#'")
+    print()
+    print("scalar + scalar")
+    print_coercion_table(np.typecodes['All'], 0, 0, False)
+    print()
+    print("scalar + neg scalar")
+    print_coercion_table(np.typecodes['All'], 0, -1, False)
+    print()
+    print("array + scalar")
+    print_coercion_table(np.typecodes['All'], 0, 0, True)
+    print()
+    print("array + neg scalar")
+    print_coercion_table(np.typecodes['All'], 0, -1, True)
+    print()
+    print("promote_types")
+    print_coercion_table(np.typecodes['All'], 0, 0, False, True)
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index 4f1b46d4f..44f93a693 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -564,6 +564,26 @@ class TestAlmostEqual(_GenericTest):
         assert_equal(msgs[4], 'Max absolute difference: 2')
         assert_equal(msgs[5], 'Max relative difference: inf')
 
+    def test_error_message_2(self):
+        """Check the message is formatted correctly when either x or y is a scalar."""
+        x = 2
+        y = np.ones(20)
+        with pytest.raises(AssertionError) as exc_info:
+            self._assert_func(x, y)
+        msgs = str(exc_info.value).split('\n')
+        assert_equal(msgs[3], 'Mismatched elements: 20 / 20 (100%)')
+        assert_equal(msgs[4], 'Max absolute difference: 1.')
+        assert_equal(msgs[5], 'Max relative difference: 1.')
+
+        y = 2
+        x = np.ones(20)
+        with pytest.raises(AssertionError) as exc_info:
+            self._assert_func(x, y)
+        msgs = str(exc_info.value).split('\n')
+        assert_equal(msgs[3], 'Mismatched elements: 20 / 20 (100%)')
+        assert_equal(msgs[4], 'Max absolute difference: 1.')
+        assert_equal(msgs[5], 'Max relative difference: 0.5')
+
     def test_subclass_that_cannot_be_bool(self):
         # While we cannot guarantee testing functions will always work for
         # subclasses, the tests should ideally rely only on subclasses having
@@ -881,6 +901,15 @@ class TestAssertAllclose(object):
         assert_array_less(a, b)
         assert_allclose(a, b)
 
+    def test_report_max_relative_error(self):
+        a = np.array([0, 1])
+        b = np.array([0, 2])
+
+        with pytest.raises(AssertionError) as exc_info:
+            assert_allclose(a, b)
+        msg = str(exc_info.value)
+        assert_('Max relative difference: 0.5' in msg)
+
 
 class TestArrayAlmostEqualNulp(object):
 
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index 98f19e348..975f6ad5d 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -7,10 +7,11 @@ from __future__ import division, absolute_import, print_function
 
 import warnings
 
-# 2018-04-04, numpy 1.15.0
+# 2018-04-04, numpy 1.15.0 ImportWarning
+# 2019-09-18, numpy 1.18.0 DeprecatonWarning (changed)
 warnings.warn("Importing from numpy.testing.utils is deprecated "
               "since 1.15.0, import from numpy.testing instead.",
-              ImportWarning, stacklevel=2)
+              DeprecationWarning, stacklevel=2)
 
 from ._private.utils import *
 
@@ -19,7 +20,7 @@ __all__ = [
         'assert_array_equal', 'assert_array_less', 'assert_string_equal',
         'assert_array_almost_equal', 'assert_raises', 'build_err_msg',
         'decorate_methods', 'jiffies', 'memusage', 'print_assert_equal',
-        'raises', 'rand', 'rundocs', 'runstring', 'verbose', 'measure',
+        'raises', 'rundocs', 'runstring', 'verbose', 'measure',
         'assert_', 'assert_array_almost_equal_nulp', 'assert_raises_regex',
         'assert_array_max_ulp', 'assert_warns', 'assert_no_warnings',
         'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings',
diff --git a/numpy/tests/test_public_api.py b/numpy/tests/test_public_api.py
index 807c98652..e3621c0fd 100644
--- a/numpy/tests/test_public_api.py
+++ b/numpy/tests/test_public_api.py
@@ -1,14 +1,22 @@
 from __future__ import division, absolute_import, print_function
 
 import sys
+import subprocess
+import pkgutil
+import types
+import importlib
+import warnings
 
 import numpy as np
+import numpy
 import pytest
+
 try:
     import ctypes
 except ImportError:
     ctypes = None
 
+
 def check_dir(module, module_name=None):
     """Returns a mapping of all objects with the wrong __module__ attribute."""
     if module_name is None:
@@ -26,7 +34,8 @@ def check_dir(module, module_name=None):
     sys.version_info[0] < 3,
     reason="NumPy exposes slightly different functions on Python 2")
 def test_numpy_namespace():
-    # None of these objects are publicly documented.
+    # None of these objects are publicly documented to be part of the main
+    # NumPy namespace (some are useful though, others need to be cleaned up)
     undocumented = {
         'Tester': 'numpy.testing._private.nosetester.NoseTester',
         '_add_newdoc_ufunc': 'numpy.core._multiarray_umath._add_newdoc_ufunc',
@@ -69,6 +78,28 @@ def test_numpy_namespace():
     assert bad_results == whitelist
 
 
+@pytest.mark.parametrize('name', ['testing', 'Tester'])
+def test_import_lazy_import(name):
+    """Make sure we can actually use the modules we lazy load.
+
+    While not exported as part of the public API, it was accessible.  With the
+    use of __getattr__ and __dir__, this isn't always true It can happen that
+    an infinite recursion may happen.
+
+    This is the only way I found that would force the failure to appear on the
+    badly implemented code.
+
+    We also test for the presence of the lazily imported modules in dir
+
+    """
+    exe = (sys.executable, '-c', "import numpy; numpy." + name)
+    result = subprocess.check_output(exe)
+    assert not result
+
+    # Make sure they are still in the __dir__
+    assert name in dir(np)
+
+
 def test_numpy_linalg():
     bad_results = check_dir(np.linalg)
     assert bad_results == {}
@@ -78,6 +109,7 @@ def test_numpy_fft():
     bad_results = check_dir(np.fft)
     assert bad_results == {}
 
+
 @pytest.mark.skipif(ctypes is None,
                     reason="ctypes not available in this python")
 def test_NPY_NO_EXPORT():
@@ -86,3 +118,381 @@ def test_NPY_NO_EXPORT():
     f = getattr(cdll, 'test_not_exported', None)
     assert f is None, ("'test_not_exported' is mistakenly exported, "
                       "NPY_NO_EXPORT does not work")
+
+
+# Historically NumPy has not used leading underscores for private submodules
+# much.  This has resulted in lots of things that look like public modules
+# (i.e. things that can be imported as `import numpy.somesubmodule.somefile`),
+# but were never intended to be public.  The PUBLIC_MODULES list contains
+# modules that are either public because they were meant to be, or because they
+# contain public functions/objects that aren't present in any other namespace
+# for whatever reason and therefore should be treated as public.
+#
+# The PRIVATE_BUT_PRESENT_MODULES list contains modules that look public (lack
+# of underscores) but should not be used.  For many of those modules the
+# current status is fine.  For others it may make sense to work on making them
+# private, to clean up our public API and avoid confusion.
+PUBLIC_MODULES = ['numpy.' + s for s in [
+    "ctypeslib",
+    "distutils",
+    "distutils.cpuinfo",
+    "distutils.exec_command",
+    "distutils.misc_util",
+    "distutils.log",
+    "distutils.system_info",
+    "doc",
+    "doc.basics",
+    "doc.broadcasting",
+    "doc.byteswapping",
+    "doc.constants",
+    "doc.creation",
+    "doc.dispatch",
+    "doc.glossary",
+    "doc.indexing",
+    "doc.internals",
+    "doc.misc",
+    "doc.structured_arrays",
+    "doc.subclassing",
+    "doc.ufuncs",
+    "dual",
+    "f2py",
+    "fft",
+    "lib",
+    "lib.format",  # was this meant to be public?
+    "lib.mixins",
+    "lib.recfunctions",
+    "lib.scimath",
+    "linalg",
+    "ma",
+    "ma.extras",
+    "ma.mrecords",
+    "matlib",
+    "polynomial",
+    "polynomial.chebyshev",
+    "polynomial.hermite",
+    "polynomial.hermite_e",
+    "polynomial.laguerre",
+    "polynomial.legendre",
+    "polynomial.polynomial",
+    "polynomial.polyutils",
+    "random",
+    "testing",
+    "version",
+]]
+
+
+PUBLIC_ALIASED_MODULES = [
+    "numpy.char",
+    "numpy.emath",
+    "numpy.rec",
+]
+
+
+PRIVATE_BUT_PRESENT_MODULES = ['numpy.' + s for s in [
+    "compat",
+    "compat.py3k",
+    "conftest",
+    "core",
+    "core.arrayprint",
+    "core.defchararray",
+    "core.einsumfunc",
+    "core.fromnumeric",
+    "core.function_base",
+    "core.getlimits",
+    "core.machar",
+    "core.memmap",
+    "core.multiarray",
+    "core.numeric",
+    "core.numerictypes",
+    "core.overrides",
+    "core.records",
+    "core.shape_base",
+    "core.umath",
+    "core.umath_tests",
+    "distutils.ccompiler",
+    "distutils.command",
+    "distutils.command.autodist",
+    "distutils.command.bdist_rpm",
+    "distutils.command.build",
+    "distutils.command.build_clib",
+    "distutils.command.build_ext",
+    "distutils.command.build_py",
+    "distutils.command.build_scripts",
+    "distutils.command.build_src",
+    "distutils.command.config",
+    "distutils.command.config_compiler",
+    "distutils.command.develop",
+    "distutils.command.egg_info",
+    "distutils.command.install",
+    "distutils.command.install_clib",
+    "distutils.command.install_data",
+    "distutils.command.install_headers",
+    "distutils.command.sdist",
+    "distutils.compat",
+    "distutils.conv_template",
+    "distutils.core",
+    "distutils.extension",
+    "distutils.fcompiler",
+    "distutils.fcompiler.absoft",
+    "distutils.fcompiler.compaq",
+    "distutils.fcompiler.environment",
+    "distutils.fcompiler.g95",
+    "distutils.fcompiler.gnu",
+    "distutils.fcompiler.hpux",
+    "distutils.fcompiler.ibm",
+    "distutils.fcompiler.intel",
+    "distutils.fcompiler.lahey",
+    "distutils.fcompiler.mips",
+    "distutils.fcompiler.nag",
+    "distutils.fcompiler.none",
+    "distutils.fcompiler.pathf95",
+    "distutils.fcompiler.pg",
+    "distutils.fcompiler.sun",
+    "distutils.fcompiler.vast",
+    "distutils.from_template",
+    "distutils.intelccompiler",
+    "distutils.lib2def",
+    "distutils.line_endings",
+    "distutils.mingw32ccompiler",
+    "distutils.msvccompiler",
+    "distutils.npy_pkg_config",
+    "distutils.numpy_distribution",
+    "distutils.pathccompiler",
+    "distutils.unixccompiler",
+    "f2py.auxfuncs",
+    "f2py.capi_maps",
+    "f2py.cb_rules",
+    "f2py.cfuncs",
+    "f2py.common_rules",
+    "f2py.crackfortran",
+    "f2py.diagnose",
+    "f2py.f2py2e",
+    "f2py.f2py_testing",
+    "f2py.f90mod_rules",
+    "f2py.func2subr",
+    "f2py.rules",
+    "f2py.use_rules",
+    "fft.helper",
+    "lib.arraypad",
+    "lib.arraysetops",
+    "lib.arrayterator",
+    "lib.financial",
+    "lib.function_base",
+    "lib.histograms",
+    "lib.index_tricks",
+    "lib.nanfunctions",
+    "lib.npyio",
+    "lib.polynomial",
+    "lib.shape_base",
+    "lib.stride_tricks",
+    "lib.twodim_base",
+    "lib.type_check",
+    "lib.ufunclike",
+    "lib.user_array",  # note: not in np.lib, but probably should just be deleted
+    "lib.utils",
+    "linalg.lapack_lite",
+    "linalg.linalg",
+    "ma.bench",
+    "ma.core",
+    "ma.testutils",
+    "ma.timer_comparison",
+    "matrixlib",
+    "matrixlib.defmatrix",
+    "random.bit_generator",
+    "random.bounded_integers",
+    "random.common",
+    "random.generator",
+    "random.mt19937",
+    "random.mtrand",
+    "random.pcg64",
+    "random.philox",
+    "random.sfc64",
+    "testing.print_coercion_tables",
+    "testing.utils",
+]]
+
+
+def is_unexpected(name):
+    """Check if this needs to be considered."""
+    if '._' in name or '.tests' in name or '.setup' in name:
+        return False
+
+    if name in PUBLIC_MODULES:
+        return False
+
+    if name in PUBLIC_ALIASED_MODULES:
+        return False
+
+    if name in PRIVATE_BUT_PRESENT_MODULES:
+        return False
+
+    return True
+
+
+# These are present in a directory with an __init__.py but cannot be imported
+# code_generators/ isn't installed, but present for an inplace build
+SKIP_LIST = [
+    "numpy.core.code_generators",
+    "numpy.core.code_generators.genapi",
+    "numpy.core.code_generators.generate_umath",
+    "numpy.core.code_generators.ufunc_docstrings",
+    "numpy.core.code_generators.generate_numpy_api",
+    "numpy.core.code_generators.generate_ufunc_api",
+    "numpy.core.code_generators.numpy_api",
+    "numpy.core.cversions",
+    "numpy.core.generate_numpy_api",
+    "numpy.distutils.msvc9compiler",
+]
+
+
+def test_all_modules_are_expected():
+    """
+    Test that we don't add anything that looks like a new public module by
+    accident.  Check is based on filenames.
+    """
+
+    modnames = []
+    for _, modname, ispkg in pkgutil.walk_packages(path=np.__path__,
+                                                   prefix=np.__name__ + '.',
+                                                   onerror=None):
+        if is_unexpected(modname) and modname not in SKIP_LIST:
+            # We have a name that is new.  If that's on purpose, add it to
+            # PUBLIC_MODULES.  We don't expect to have to add anything to
+            # PRIVATE_BUT_PRESENT_MODULES.  Use an underscore in the name!
+            modnames.append(modname)
+
+    if modnames:
+        raise AssertionError("Found unexpected modules: {}".format(modnames))
+
+
+# Stuff that clearly shouldn't be in the API and is detected by the next test
+# below
+SKIP_LIST_2 = [
+    'numpy.math',
+    'numpy.distutils.log.sys',
+    'numpy.distutils.system_info.copy',
+    'numpy.distutils.system_info.distutils',
+    'numpy.distutils.system_info.log',
+    'numpy.distutils.system_info.os',
+    'numpy.distutils.system_info.platform',
+    'numpy.distutils.system_info.re',
+    'numpy.distutils.system_info.shutil',
+    'numpy.distutils.system_info.subprocess',
+    'numpy.distutils.system_info.sys',
+    'numpy.distutils.system_info.tempfile',
+    'numpy.distutils.system_info.textwrap',
+    'numpy.distutils.system_info.warnings',
+    'numpy.doc.constants.re',
+    'numpy.doc.constants.textwrap',
+    'numpy.lib.emath',
+    'numpy.lib.math',
+    'numpy.matlib.char',
+    'numpy.matlib.rec',
+    'numpy.matlib.emath',
+    'numpy.matlib.math',
+    'numpy.matlib.linalg',
+    'numpy.matlib.fft',
+    'numpy.matlib.random',
+    'numpy.matlib.ctypeslib',
+    'numpy.matlib.ma'
+]
+
+
+def test_all_modules_are_expected_2():
+    """
+    Method checking all objects. The pkgutil-based method in
+    `test_all_modules_are_expected` does not catch imports into a namespace,
+    only filenames.  So this test is more thorough, and checks this like:
+
+        import .lib.scimath as emath
+
+    To check if something in a module is (effectively) public, one can check if
+    there's anything in that namespace that's a public function/object but is
+    not exposed in a higher-level namespace.  For example for a `numpy.lib`
+    submodule::
+
+        mod = np.lib.mixins
+        for obj in mod.__all__:
+            if obj in np.__all__:
+                continue
+            elif obj in np.lib.__all__:
+                continue
+
+            else:
+                print(obj)
+
+    """
+
+    def find_unexpected_members(mod_name):
+        members = []
+        module = importlib.import_module(mod_name)
+        if hasattr(module, '__all__'):
+            objnames = module.__all__
+        else:
+            objnames = dir(module)
+
+        for objname in objnames:
+            if not objname.startswith('_'):
+                fullobjname = mod_name + '.' + objname
+                if isinstance(getattr(module, objname), types.ModuleType):
+                    if is_unexpected(fullobjname):
+                        if fullobjname not in SKIP_LIST_2:
+                            members.append(fullobjname)
+
+        return members
+
+    unexpected_members = find_unexpected_members("numpy")
+    for modname in PUBLIC_MODULES:
+        unexpected_members.extend(find_unexpected_members(modname))
+
+    if unexpected_members:
+        raise AssertionError("Found unexpected object(s) that look like "
+                             "modules: {}".format(unexpected_members))
+
+
+def test_api_importable():
+    """
+    Check that all submodules listed higher up in this file can be imported
+
+    Note that if a PRIVATE_BUT_PRESENT_MODULES entry goes missing, it may
+    simply need to be removed from the list (deprecation may or may not be
+    needed - apply common sense).
+    """
+    def check_importable(module_name):
+        try:
+            importlib.import_module(module_name)
+        except (ImportError, AttributeError):
+            return False
+
+        return True
+
+    module_names = []
+    for module_name in PUBLIC_MODULES:
+        if not check_importable(module_name):
+            module_names.append(module_name)
+
+    if module_names:
+        raise AssertionError("Modules in the public API that cannot be "
+                             "imported: {}".format(module_names))
+
+    for module_name in PUBLIC_ALIASED_MODULES:
+        try:
+            eval(module_name)
+        except AttributeError:
+            module_names.append(module_name)
+
+    if module_names:
+        raise AssertionError("Modules in the public API that were not "
+                             "found: {}".format(module_names))
+
+    with warnings.catch_warnings(record=True) as w:
+        warnings.filterwarnings('always', category=DeprecationWarning)
+        warnings.filterwarnings('always', category=ImportWarning)
+        for module_name in PRIVATE_BUT_PRESENT_MODULES:
+            if not check_importable(module_name):
+                module_names.append(module_name)
+
+    if module_names:
+        raise AssertionError("Modules that are not really public but looked "
+                             "public and can not be imported: "
+                             "{}".format(module_names))