31 files changed, 2180 insertions, 1365 deletions
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index ffd4971f5..ea472f1b3 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -947,66 +947,6 @@ add_newdoc('numpy.core.multiarray', 'empty',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'empty_like',
-    """
-    empty_like(prototype, dtype=None, order='K', subok=True)
-
-    Return a new array with the same shape and type as a given array.
-
-    Parameters
-    ----------
-    prototype : array_like
-        The shape and data-type of `prototype` define these same attributes
-        of the returned array.
-    dtype : data-type, optional
-        Overrides the data type of the result.
-
-        .. versionadded:: 1.6.0
-    order : {'C', 'F', 'A', or 'K'}, optional
-        Overrides the memory layout of the result. 'C' means C-order,
-        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
-        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
-        as closely as possible.
-
-        .. versionadded:: 1.6.0
-    subok : bool, optional.
-        If True, then the newly created array will use the sub-class
-        type of 'a', otherwise it will be a base-class array. Defaults
-        to True.
-
-    Returns
-    -------
-    out : ndarray
-        Array of uninitialized (arbitrary) data with the same
-        shape and type as `prototype`.
-
-    See Also
-    --------
-    ones_like : Return an array of ones with shape and type of input.
-    zeros_like : Return an array of zeros with shape and type of input.
-    full_like : Return a new array with shape of input filled with value.
-    empty : Return a new uninitialized array.
-
-    Notes
-    -----
-    This function does *not* initialize the returned array; to do that use
-    `zeros_like` or `ones_like` instead.  It may be marginally faster than
-    the functions that do set the array values.
-
-    Examples
-    --------
-    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
-    >>> np.empty_like(a)
-    array([[-1073741821, -1073741821,           3],    #random
-           [          0,           0, -1073741821]])
-    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
-    >>> np.empty_like(a)
-    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
-           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
-
-    """)
-
-
 add_newdoc('numpy.core.multiarray', 'scalar',
     """
     scalar(dtype, obj)
@@ -1284,163 +1224,6 @@ add_newdoc('numpy.core.multiarray', 'frombuffer',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'concatenate',
-    """
-    concatenate((a1, a2, ...), axis=0, out=None)
-
-    Join a sequence of arrays along an existing axis.
-
-    Parameters
-    ----------
-    a1, a2, ... : sequence of array_like
-        The arrays must have the same shape, except in the dimension
-        corresponding to `axis` (the first, by default).
-    axis : int, optional
-        The axis along which the arrays will be joined.  If axis is None,
-        arrays are flattened before use.  Default is 0.
-    out : ndarray, optional
-        If provided, the destination to place the result. The shape must be
-        correct, matching that of what concatenate would have returned if no
-        out argument were specified.
-
-    Returns
-    -------
-    res : ndarray
-        The concatenated array.
-
-    See Also
-    --------
-    ma.concatenate : Concatenate function that preserves input masks.
-    array_split : Split an array into multiple sub-arrays of equal or
-                  near-equal size.
-    split : Split array into a list of multiple sub-arrays of equal size.
-    hsplit : Split array into multiple sub-arrays horizontally (column wise)
-    vsplit : Split array into multiple sub-arrays vertically (row wise)
-    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
-    stack : Stack a sequence of arrays along a new axis.
-    hstack : Stack arrays in sequence horizontally (column wise)
-    vstack : Stack arrays in sequence vertically (row wise)
-    dstack : Stack arrays in sequence depth wise (along third dimension)
-    block : Assemble arrays from blocks.
-
-    Notes
-    -----
-    When one or more of the arrays to be concatenated is a MaskedArray,
-    this function will return a MaskedArray object instead of an ndarray,
-    but the input masks are *not* preserved. In cases where a MaskedArray
-    is expected as input, use the ma.concatenate function from the masked
-    array module instead.
-
-    Examples
-    --------
-    >>> a = np.array([[1, 2], [3, 4]])
-    >>> b = np.array([[5, 6]])
-    >>> np.concatenate((a, b), axis=0)
-    array([[1, 2],
-           [3, 4],
-           [5, 6]])
-    >>> np.concatenate((a, b.T), axis=1)
-    array([[1, 2, 5],
-           [3, 4, 6]])
-    >>> np.concatenate((a, b), axis=None)
-    array([1, 2, 3, 4, 5, 6])
-
-    This function will not preserve masking of MaskedArray inputs.
-
-    >>> a = np.ma.arange(3)
-    >>> a[1] = np.ma.masked
-    >>> b = np.arange(2, 5)
-    >>> a
-    masked_array(data=[0, --, 2],
-                 mask=[False,  True, False],
-           fill_value=999999)
-    >>> b
-    array([2, 3, 4])
-    >>> np.concatenate([a, b])
-    masked_array(data=[0, 1, 2, 2, 3, 4],
-                 mask=False,
-           fill_value=999999)
-    >>> np.ma.concatenate([a, b])
-    masked_array(data=[0, --, 2, 2, 3, 4],
-                 mask=[False,  True, False, False, False, False],
-           fill_value=999999)
-
-    """)
-
-add_newdoc('numpy.core', 'inner',
-    """
-    inner(a, b)
-
-    Inner product of two arrays.
-
-    Ordinary inner product of vectors for 1-D arrays (without complex
-    conjugation), in higher dimensions a sum product over the last axes.
-
-    Parameters
-    ----------
-    a, b : array_like
-        If `a` and `b` are nonscalar, their last dimensions must match.
-
-    Returns
-    -------
-    out : ndarray
-        `out.shape = a.shape[:-1] + b.shape[:-1]`
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` and `b` has different size.
-
-    See Also
-    --------
-    tensordot : Sum products over arbitrary axes.
-    dot : Generalised matrix product, using second last dimension of `b`.
-    einsum : Einstein summation convention.
-
-    Notes
-    -----
-    For vectors (1-D arrays) it computes the ordinary inner-product::
-
-        np.inner(a, b) = sum(a[:]*b[:])
-
-    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
-
-        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
-
-    or explicitly::
-
-        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
-             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
-
-    In addition `a` or `b` may be scalars, in which case::
-
-       np.inner(a,b) = a*b
-
-    Examples
-    --------
-    Ordinary inner product for vectors:
-
-    >>> a = np.array([1,2,3])
-    >>> b = np.array([0,1,0])
-    >>> np.inner(a, b)
-    2
-
-    A multidimensional example:
-
-    >>> a = np.arange(24).reshape((2,3,4))
-    >>> b = np.arange(4)
-    >>> np.inner(a, b)
-    array([[ 14,  38,  62],
-           [ 86, 110, 134]])
-
-    An example where `b` is a scalar:
-
-    >>> np.inner(np.eye(2), 7)
-    array([[ 7.,  0.],
-           [ 0.,  7.]])
-
-    """)
-
 add_newdoc('numpy.core', 'fastCopyAndTranspose',
     """_fastCopyAndTranspose(a)""")
 
@@ -1575,263 +1358,6 @@ add_newdoc('numpy.core.multiarray', 'set_numeric_ops',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'where',
-    """
-    where(condition, [x, y])
-
-    Return elements chosen from `x` or `y` depending on `condition`.
-
-    .. note::
-        When only `condition` is provided, this function is a shorthand for
-        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
-        preferred, as it behaves correctly for subclasses. The rest of this
-        documentation covers only the case where all three arguments are
-        provided.
-
-    Parameters
-    ----------
-    condition : array_like, bool
-        Where True, yield `x`, otherwise yield `y`.
-    x, y : array_like
-        Values from which to choose. `x`, `y` and `condition` need to be
-        broadcastable to some shape.
-
-    Returns
-    -------
-    out : ndarray
-        An array with elements from `x` where `condition` is True, and elements
-        from `y` elsewhere.
-
-    See Also
-    --------
-    choose
-    nonzero : The function that is called when x and y are omitted
-
-    Notes
-    -----
-    If all the arrays are 1-D, `where` is equivalent to::
-
-        [xv if c else yv
-         for c, xv, yv in zip(condition, x, y)]
-
-    Examples
-    --------
-    >>> a = np.arange(10)
-    >>> a
-    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-    >>> np.where(a < 5, a, 10*a)
-    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
-
-    This can be used on multidimensional arrays too:
-
-    >>> np.where([[True, False], [True, True]],
-    ...          [[1, 2], [3, 4]],
-    ...          [[9, 8], [7, 6]])
-    array([[1, 8],
-           [3, 4]])
-
-    The shapes of x, y, and the condition are broadcast together:
-
-    >>> x, y = np.ogrid[:3, :4]
-    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
-    array([[10,  0,  0,  0],
-           [10, 11,  1,  1],
-           [10, 11, 12,  2]])
-
-    >>> a = np.array([[0, 1, 2],
-    ...               [0, 2, 4],
-    ...               [0, 3, 6]])
-    >>> np.where(a < 4, a, -1)  # -1 is broadcast
-    array([[ 0,  1,  2],
-           [ 0,  2, -1],
-           [ 0,  3, -1]])
-    """)
-
-
-add_newdoc('numpy.core.multiarray', 'lexsort',
-    """
-    lexsort(keys, axis=-1)
-
-    Perform an indirect stable sort using a sequence of keys.
-
-    Given multiple sorting keys, which can be interpreted as columns in a
-    spreadsheet, lexsort returns an array of integer indices that describes
-    the sort order by multiple columns. The last key in the sequence is used
-    for the primary sort order, the second-to-last key for the secondary sort
-    order, and so on. The keys argument must be a sequence of objects that
-    can be converted to arrays of the same shape. If a 2D array is provided
-    for the keys argument, it's rows are interpreted as the sorting keys and
-    sorting is according to the last row, second last row etc.
-
-    Parameters
-    ----------
-    keys : (k, N) array or tuple containing k (N,)-shaped sequences
-        The `k` different "columns" to be sorted.  The last column (or row if
-        `keys` is a 2D array) is the primary sort key.
-    axis : int, optional
-        Axis to be indirectly sorted.  By default, sort over the last axis.
-
-    Returns
-    -------
-    indices : (N,) ndarray of ints
-        Array of indices that sort the keys along the specified axis.
-
-    See Also
-    --------
-    argsort : Indirect sort.
-    ndarray.sort : In-place sort.
-    sort : Return a sorted copy of an array.
-
-    Examples
-    --------
-    Sort names: first by surname, then by name.
-
-    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
-    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
-    >>> ind = np.lexsort((first_names, surnames))
-    >>> ind
-    array([1, 2, 0])
-
-    >>> [surnames[i] + ", " + first_names[i] for i in ind]
-    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
-
-    Sort two columns of numbers:
-
-    >>> a = [1,5,1,4,3,4,4] # First column
-    >>> b = [9,4,0,4,0,2,1] # Second column
-    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
-    >>> print(ind)
-    [2 0 4 6 5 3 1]
-
-    >>> [(a[i],b[i]) for i in ind]
-    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
-
-    Note that sorting is first according to the elements of ``a``.
-    Secondary sorting is according to the elements of ``b``.
-
-    A normal ``argsort`` would have yielded:
-
-    >>> [(a[i],b[i]) for i in np.argsort(a)]
-    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
-
-    Structured arrays are sorted lexically by ``argsort``:
-
-    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
-    ...              dtype=np.dtype([('x', int), ('y', int)]))
-
-    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
-    array([2, 0, 4, 6, 5, 3, 1])
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'can_cast',
-    """
-    can_cast(from_, to, casting='safe')
-
-    Returns True if cast between data types can occur according to the
-    casting rule.  If from is a scalar or array scalar, also returns
-    True if the scalar value can be cast without overflow or truncation
-    to an integer.
-
-    Parameters
-    ----------
-    from_ : dtype, dtype specifier, scalar, or array
-        Data type, scalar, or array to cast from.
-    to : dtype or dtype specifier
-        Data type to cast to.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
-        Controls what kind of data casting may occur.
-
-          * 'no' means the data types should not be cast at all.
-          * 'equiv' means only byte-order changes are allowed.
-          * 'safe' means only casts which can preserve values are allowed.
-          * 'same_kind' means only safe casts or casts within a kind,
-            like float64 to float32, are allowed.
-          * 'unsafe' means any data conversions may be done.
-
-    Returns
-    -------
-    out : bool
-        True if cast can occur according to the casting rule.
-
-    Notes
-    -----
-    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
-    casting mode for integer/float dtype and string dtype if the string dtype
-    length is not long enough to store the max integer/float value converted
-    to a string. Previously can_cast in 'safe' mode returned True for
-    integer/float dtype and a string dtype of any length.
-
-    See also
-    --------
-    dtype, result_type
-
-    Examples
-    --------
-    Basic examples
-
-    >>> np.can_cast(np.int32, np.int64)
-    True
-    >>> np.can_cast(np.float64, complex)
-    True
-    >>> np.can_cast(complex, float)
-    False
-
-    >>> np.can_cast('i8', 'f8')
-    True
-    >>> np.can_cast('i8', 'f4')
-    False
-    >>> np.can_cast('i4', 'S4')
-    False
-
-    Casting scalars
-
-    >>> np.can_cast(100, 'i1')
-    True
-    >>> np.can_cast(150, 'i1')
-    False
-    >>> np.can_cast(150, 'u1')
-    True
-
-    >>> np.can_cast(3.5e100, np.float32)
-    False
-    >>> np.can_cast(1000.0, np.float32)
-    True
-
-    Array scalar checks the value, array does not
-
-    >>> np.can_cast(np.array(1000.0), np.float32)
-    True
-    >>> np.can_cast(np.array([1000.0]), np.float32)
-    False
-
-    Using the casting rules
-
-    >>> np.can_cast('i8', 'i8', 'no')
-    True
-    >>> np.can_cast('<i8', '>i8', 'no')
-    False
-
-    >>> np.can_cast('<i8', '>i8', 'equiv')
-    True
-    >>> np.can_cast('<i4', '>i8', 'equiv')
-    False
-
-    >>> np.can_cast('<i4', '>i8', 'safe')
-    True
-    >>> np.can_cast('<i8', '>i4', 'safe')
-    False
-
-    >>> np.can_cast('<i8', '>i4', 'same_kind')
-    True
-    >>> np.can_cast('<i8', '>u4', 'same_kind')
-    False
-
-    >>> np.can_cast('<i8', '>u4', 'unsafe')
-    True
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'promote_types',
     """
     promote_types(type1, type2)
@@ -1892,123 +1418,6 @@ add_newdoc('numpy.core.multiarray', 'promote_types',
 
     """)
 
-add_newdoc('numpy.core.multiarray', 'min_scalar_type',
-    """
-    min_scalar_type(a)
-
-    For scalar ``a``, returns the data type with the smallest size
-    and smallest scalar kind which can hold its value.  For non-scalar
-    array ``a``, returns the vector's dtype unmodified.
-
-    Floating point values are not demoted to integers,
-    and complex values are not demoted to floats.
-
-    Parameters
-    ----------
-    a : scalar or array_like
-        The value whose minimal data type is to be found.
-
-    Returns
-    -------
-    out : dtype
-        The minimal data type.
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    See Also
-    --------
-    result_type, promote_types, dtype, can_cast
-
-    Examples
-    --------
-    >>> np.min_scalar_type(10)
-    dtype('uint8')
-
-    >>> np.min_scalar_type(-260)
-    dtype('int16')
-
-    >>> np.min_scalar_type(3.1)
-    dtype('float16')
-
-    >>> np.min_scalar_type(1e50)
-    dtype('float64')
-
-    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
-    dtype('float64')
-
-    """)
-
-add_newdoc('numpy.core.multiarray', 'result_type',
-    """
-    result_type(*arrays_and_dtypes)
-
-    Returns the type that results from applying the NumPy
-    type promotion rules to the arguments.
-
-    Type promotion in NumPy works similarly to the rules in languages
-    like C++, with some slight differences.  When both scalars and
-    arrays are used, the array's type takes precedence and the actual value
-    of the scalar is taken into account.
-
-    For example, calculating 3*a, where a is an array of 32-bit floats,
-    intuitively should result in a 32-bit float output.  If the 3 is a
-    32-bit integer, the NumPy rules indicate it can't convert losslessly
-    into a 32-bit float, so a 64-bit float should be the result type.
-    By examining the value of the constant, '3', we see that it fits in
-    an 8-bit integer, which can be cast losslessly into the 32-bit float.
-
-    Parameters
-    ----------
-    arrays_and_dtypes : list of arrays and dtypes
-        The operands of some operation whose result type is needed.
-
-    Returns
-    -------
-    out : dtype
-        The result type.
-
-    See also
-    --------
-    dtype, promote_types, min_scalar_type, can_cast
-
-    Notes
-    -----
-    .. versionadded:: 1.6.0
-
-    The specific algorithm used is as follows.
-
-    Categories are determined by first checking which of boolean,
-    integer (int/uint), or floating point (float/complex) the maximum
-    kind of all the arrays and the scalars are.
-
-    If there are only scalars or the maximum category of the scalars
-    is higher than the maximum category of the arrays,
-    the data types are combined with :func:`promote_types`
-    to produce the return value.
-
-    Otherwise, `min_scalar_type` is called on each array, and
-    the resulting data types are all combined with :func:`promote_types`
-    to produce the return value.
-
-    The set of int values is not a subset of the uint values for types
-    with the same number of bits, something not reflected in
-    :func:`min_scalar_type`, but handled as a special case in `result_type`.
-
-    Examples
-    --------
-    >>> np.result_type(3, np.arange(7, dtype='i1'))
-    dtype('int8')
-
-    >>> np.result_type('i4', 'c8')
-    dtype('complex128')
-
-    >>> np.result_type(3.0, -2)
-    dtype('float64')
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'newbuffer',
     """
     newbuffer(size)
@@ -2061,91 +1470,6 @@ add_newdoc('numpy.core.multiarray', 'getbuffer',
 
     """)
 
-add_newdoc('numpy.core', 'dot',
-    """
-    dot(a, b, out=None)
-
-    Dot product of two arrays. Specifically,
-
-    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
-      (without complex conjugation).
-
-    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
-      but using :func:`matmul` or ``a @ b`` is preferred.
-
-    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
-      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
-
-    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
-      the last axis of `a` and `b`.
-
-    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
-      sum product over the last axis of `a` and the second-to-last axis of `b`::
-
-        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
-
-    Parameters
-    ----------
-    a : array_like
-        First argument.
-    b : array_like
-        Second argument.
-    out : ndarray, optional
-        Output argument. This must have the exact kind that would be returned
-        if it was not used. In particular, it must have the right type, must be
-        C-contiguous, and its dtype must be the dtype that would be returned
-        for `dot(a,b)`. This is a performance feature. Therefore, if these
-        conditions are not met, an exception is raised, instead of attempting
-        to be flexible.
-
-    Returns
-    -------
-    output : ndarray
-        Returns the dot product of `a` and `b`.  If `a` and `b` are both
-        scalars or both 1-D arrays then a scalar is returned; otherwise
-        an array is returned.
-        If `out` is given, then it is returned.
-
-    Raises
-    ------
-    ValueError
-        If the last dimension of `a` is not the same size as
-        the second-to-last dimension of `b`.
-
-    See Also
-    --------
-    vdot : Complex-conjugating dot product.
-    tensordot : Sum products over arbitrary axes.
-    einsum : Einstein summation convention.
-    matmul : '@' operator as method with out parameter.
-
-    Examples
-    --------
-    >>> np.dot(3, 4)
-    12
-
-    Neither argument is complex-conjugated:
-
-    >>> np.dot([2j, 3j], [2j, 3j])
-    (-13+0j)
-
-    For 2-D arrays it is the matrix product:
-
-    >>> a = [[1, 0], [0, 1]]
-    >>> b = [[4, 1], [2, 2]]
-    >>> np.dot(a, b)
-    array([[4, 1],
-           [2, 2]])
-
-    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
-    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
-    >>> np.dot(a, b)[2,3,2,1,2,2]
-    499128
-    >>> sum(a[2,3,2,:] * b[1,2,:,2])
-    499128
-
-    """)
-
 add_newdoc('numpy.core', 'matmul',
     """
     matmul(a, b, out=None)
@@ -2269,61 +1593,6 @@ add_newdoc('numpy.core', 'matmul',
 
     """)
 
-add_newdoc('numpy.core', 'vdot',
-    """
-    vdot(a, b)
-
-    Return the dot product of two vectors.
-
-    The vdot(`a`, `b`) function handles complex numbers differently than
-    dot(`a`, `b`).  If the first argument is complex the complex conjugate
-    of the first argument is used for the calculation of the dot product.
-
-    Note that `vdot` handles multidimensional arrays differently than `dot`:
-    it does *not* perform a matrix product, but flattens input arguments
-    to 1-D vectors first. Consequently, it should only be used for vectors.
-
-    Parameters
-    ----------
-    a : array_like
-        If `a` is complex the complex conjugate is taken before calculation
-        of the dot product.
-    b : array_like
-        Second argument to the dot product.
-
-    Returns
-    -------
-    output : ndarray
-        Dot product of `a` and `b`.  Can be an int, float, or
-        complex depending on the types of `a` and `b`.
-
-    See Also
-    --------
-    dot : Return the dot product without using the complex conjugate of the
-          first argument.
-
-    Examples
-    --------
-    >>> a = np.array([1+2j,3+4j])
-    >>> b = np.array([5+6j,7+8j])
-    >>> np.vdot(a, b)
-    (70-8j)
-    >>> np.vdot(b, a)
-    (70+8j)
-
-    Note that higher-dimensional arrays are flattened!
-
-    >>> a = np.array([[1, 4], [5, 6]])
-    >>> b = np.array([[4, 1], [2, 2]])
-    >>> np.vdot(a, b)
-    30
-    >>> np.vdot(b, a)
-    30
-    >>> 1*4 + 4*1 + 5*2 + 6*2
-    30
-
-    """)
-
 add_newdoc('numpy.core.multiarray', 'c_einsum',
     """
     c_einsum(subscripts, *operands, out=None, dtype=None, order='K',
@@ -6795,211 +6064,6 @@ add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('weekmask',
 add_newdoc('numpy.core.multiarray', 'busdaycalendar', ('holidays',
     """A copy of the holiday array indicating additional invalid days."""))
 
-add_newdoc('numpy.core.multiarray', 'is_busday',
-    """
-    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    Calculates which of the given dates are valid days, and which are not.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of bool, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of bool
-        An array with the same shape as ``dates``, containing True for
-        each valid day, and False for each invalid day.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    busday_offset : Applies an offset counted in valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # The weekdays are Friday, Saturday, and Monday
-    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
-    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
-    array([False, False,  True], dtype='bool')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_offset',
-    """
-    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
-
-    First adjusts the date to fall on a valid day according to
-    the ``roll`` rule, then applies offsets to the given dates
-    counted in valid days.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    dates : array_like of datetime64[D]
-        The array of dates to process.
-    offsets : array_like of int
-        The array of offsets, which is broadcast with ``dates``.
-    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
-        How to treat dates that do not fall on a valid day. The default
-        is 'raise'.
-
-          * 'raise' means to raise an exception for an invalid day.
-          * 'nat' means to return a NaT (not-a-time) for an invalid day.
-          * 'forward' and 'following' mean to take the first valid day
-            later in time.
-          * 'backward' and 'preceding' mean to take the first valid day
-            earlier in time.
-          * 'modifiedfollowing' means to take the first valid day
-            later in time unless it is across a Month boundary, in which
-            case to take the first valid day earlier in time.
-          * 'modifiedpreceding' means to take the first valid day
-            earlier in time unless it is across a Month boundary, in which
-            case to take the first valid day later in time.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of datetime64[D], optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of datetime64[D]
-        An array with a shape from broadcasting ``dates`` and ``offsets``
-        together, containing the dates with offsets applied.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_count : Counts how many valid days are in a half-open date range.
-
-    Examples
-    --------
-    >>> # First business day in October 2011 (not accounting for holidays)
-    ... np.busday_offset('2011-10', 0, roll='forward')
-    numpy.datetime64('2011-10-03','D')
-    >>> # Last business day in February 2012 (not accounting for holidays)
-    ... np.busday_offset('2012-03', -1, roll='forward')
-    numpy.datetime64('2012-02-29','D')
-    >>> # Third Wednesday in January 2011
-    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
-    numpy.datetime64('2011-01-19','D')
-    >>> # 2012 Mother's Day in Canada and the U.S.
-    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
-    numpy.datetime64('2012-05-13','D')
-
-    >>> # First business day on or after a date
-    ... np.busday_offset('2011-03-20', 0, roll='forward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 0, roll='forward')
-    numpy.datetime64('2011-03-22','D')
-    >>> # First business day after a date
-    ... np.busday_offset('2011-03-20', 1, roll='backward')
-    numpy.datetime64('2011-03-21','D')
-    >>> np.busday_offset('2011-03-22', 1, roll='backward')
-    numpy.datetime64('2011-03-23','D')
-    """)
-
-add_newdoc('numpy.core.multiarray', 'busday_count',
-    """
-    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
-
-    Counts the number of valid days between `begindates` and
-    `enddates`, not including the day of `enddates`.
-
-    If ``enddates`` specifies a date value that is earlier than the
-    corresponding ``begindates`` date value, the count will be negative.
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    ----------
-    begindates : array_like of datetime64[D]
-        The array of the first dates for counting.
-    enddates : array_like of datetime64[D]
-        The array of the end dates for counting, which are excluded
-        from the count themselves.
-    weekmask : str or array_like of bool, optional
-        A seven-element array indicating which of Monday through Sunday are
-        valid days. May be specified as a length-seven list or array, like
-        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
-        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
-        weekdays, optionally separated by white space. Valid abbreviations
-        are: Mon Tue Wed Thu Fri Sat Sun
-    holidays : array_like of datetime64[D], optional
-        An array of dates to consider as invalid dates.  They may be
-        specified in any order, and NaT (not-a-time) dates are ignored.
-        This list is saved in a normalized form that is suited for
-        fast calculations of valid days.
-    busdaycal : busdaycalendar, optional
-        A `busdaycalendar` object which specifies the valid days. If this
-        parameter is provided, neither weekmask nor holidays may be
-        provided.
-    out : array of int, optional
-        If provided, this array is filled with the result.
-
-    Returns
-    -------
-    out : array of int
-        An array with a shape from broadcasting ``begindates`` and ``enddates``
-        together, containing the number of valid days between
-        the begin and end dates.
-
-    See Also
-    --------
-    busdaycalendar: An object that specifies a custom set of valid days.
-    is_busday : Returns a boolean array indicating valid days.
-    busday_offset : Applies an offset counted in valid days.
-
-    Examples
-    --------
-    >>> # Number of weekdays in January 2011
-    ... np.busday_count('2011-01', '2011-02')
-    21
-    >>> # Number of weekdays in 2011
-    ...  np.busday_count('2011', '2012')
-    260
-    >>> # Number of Saturdays in 2011
-    ... np.busday_count('2011', '2012', weekmask='Sat')
-    53
-    """)
-
 add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     """
     normalize_axis_index(axis, ndim, msg_prefix=None)
@@ -7051,67 +6115,6 @@ add_newdoc('numpy.core.multiarray', 'normalize_axis_index',
     AxisError: axes_arg: axis -4 is out of bounds for array of dimension 3
     """)
 
-add_newdoc('numpy.core.multiarray', 'datetime_as_string',
-    """
-    datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind')
-
-    Convert an array of datetimes into an array of strings.
-
-    Parameters
-    ----------
-    arr : array_like of datetime64
-        The array of UTC timestamps to format.
-    unit : str
-        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
-    timezone : {'naive', 'UTC', 'local'} or tzinfo
-        Timezone information to use when displaying the datetime. If 'UTC', end
-        with a Z to indicate UTC time. If 'local', convert to the local timezone
-        first, and suffix with a +-#### timezone offset. If a tzinfo object,
-        then do as with 'local', but use the specified timezone.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
-        Casting to allow when changing between datetime units.
-
-    Returns
-    -------
-    str_arr : ndarray
-        An array of strings the same shape as `arr`.
-
-    Examples
-    --------
-    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
-    >>> d
-    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
-           '2002-10-27T07:30'], dtype='datetime64[m]')
-
-    Setting the timezone to UTC shows the same information, but with a Z suffix
-
-    >>> np.datetime_as_string(d, timezone='UTC')
-    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
-           '2002-10-27T07:30Z'], dtype='<U35')
-
-    Note that we picked datetimes that cross a DST boundary. Passing in a
-    ``pytz`` timezone object will print the appropriate offset
-
-    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
-    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
-           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
-
-    Passing in a unit will change the precision
-
-    >>> np.datetime_as_string(d, unit='h')
-    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
-          dtype='<U32')
-    >>> np.datetime_as_string(d, unit='s')
-    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
-           '2002-10-27T07:30:00'], dtype='<U38')
-
-    'casting' can be used to specify whether precision can be changed
-
-    >>> np.datetime_as_string(d, unit='h', casting='safe')
-    TypeError: Cannot create a datetime string as units 'h' from a NumPy
-    datetime with units 'm' according to the rule 'safe'
-    """)
-
 add_newdoc('numpy.core.multiarray', 'datetime_data',
     """
     datetime_data(dtype, /)
diff --git a/numpy/core/_dtype.py b/numpy/core/_dtype.py
index 26c44eaaf..d115e0fa6 100644
--- a/numpy/core/_dtype.py
+++ b/numpy/core/_dtype.py
@@ -5,9 +5,44 @@ String handling is much easier to do correctly in python.
 """
 from __future__ import division, absolute_import, print_function
 
+import sys
+
 import numpy as np
 
 
+_kind_to_stem = {
+    'u': 'uint',
+    'i': 'int',
+    'c': 'complex',
+    'f': 'float',
+    'b': 'bool',
+    'V': 'void',
+    'O': 'object',
+    'M': 'datetime',
+    'm': 'timedelta'
+}
+if sys.version_info[0] >= 3:
+    _kind_to_stem.update({
+        'S': 'bytes',
+        'U': 'str'
+    })
+else:
+    _kind_to_stem.update({
+        'S': 'string',
+        'U': 'unicode'
+    })
+
+
+def _kind_name(dtype):
+    try:
+        return _kind_to_stem[dtype.kind]
+    except KeyError:
+        raise RuntimeError(
+            "internal dtype error, unknown kind {!r}"
+            .format(dtype.kind)
+        )
+
+
 def __str__(dtype):
     if dtype.fields is not None:
         return _struct_str(dtype, include_align=True)
@@ -122,20 +157,7 @@ def _scalar_str(dtype, short):
 
         # Longer repr, like 'float64'
         else:
-            kindstrs = {
-                'u': "uint",
-                'i': "int",
-                'f': "float",
-                'c': "complex"
-            }
-            try:
-                kindstr = kindstrs[dtype.kind]
-            except KeyError:
-                raise RuntimeError(
-                    "internal dtype repr error, unknown kind {!r}"
-                    .format(dtype.kind)
-                )
-            return "'%s%d'" % (kindstr, 8*dtype.itemsize)
+            return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize)
 
     elif dtype.isbuiltin == 2:
         return dtype.type.__name__
diff --git a/numpy/core/_type_aliases.py b/numpy/core/_type_aliases.py
index 8d629aa07..cce6c0425 100644
--- a/numpy/core/_type_aliases.py
+++ b/numpy/core/_type_aliases.py
@@ -29,6 +29,7 @@ from numpy.compat import unicode
 from numpy._globals import VisibleDeprecationWarning
 from numpy.core._string_helpers import english_lower, english_capitalize
 from numpy.core.multiarray import typeinfo, dtype
+from numpy.core._dtype import _kind_name
 
 
 sctypeDict = {}      # Contains all leaf-node scalar types with aliases
@@ -61,28 +62,6 @@ for k, v in typeinfo.items():
 
 _concrete_types = set(v.type for k, v in _concrete_typeinfo.items())
 
-_kind_to_stem = {
-    'u': 'uint',
-    'i': 'int',
-    'c': 'complex',
-    'f': 'float',
-    'b': 'bool',
-    'V': 'void',
-    'O': 'object',
-    'M': 'datetime',
-    'm': 'timedelta'
-}
-if sys.version_info[0] >= 3:
-    _kind_to_stem.update({
-        'S': 'bytes',
-        'U': 'str'
-    })
-else:
-    _kind_to_stem.update({
-        'S': 'string',
-        'U': 'unicode'
-    })
-
 
 def _bits_of(obj):
     try:
@@ -100,8 +79,9 @@ def _bits_of(obj):
 def bitname(obj):
     """Return a bit-width name for a given type object"""
     bits = _bits_of(obj)
-    char = dtype(obj).kind
-    base = _kind_to_stem[char]
+    dt = dtype(obj)
+    char = dt.kind
+    base = _kind_name(dt)
 
     if base == 'object':
         bits = 0
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index 43c32eac6..c8b998bfc 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -43,3 +43,5 @@
 # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase.
 0x0000000c = a1bc756c5782853ec2e3616cf66869d8
 
+# Version 13 (Numpy 1.16) Added fields core_dim_flags and core_dim_sizes to PyUFuncObject
+0x0000000d = a1bc756c5782853ec2e3616cf66869d8
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 6d0a0add5..0a8c7bbec 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -22,6 +22,7 @@ from .numerictypes import string_, unicode_, integer, object_, bool_, character
 from .numeric import ndarray, compare_chararrays
 from .numeric import array as narray
 from numpy.core.multiarray import _vec_string
+from numpy.core.overrides import array_function_dispatch
 from numpy.compat import asbytes, long
 import numpy
 
@@ -95,6 +96,11 @@ def _get_num_chars(a):
     return a.itemsize
 
 
+def _binary_op_dispatcher(x1, x2):
+    return (x1, x2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
 def equal(x1, x2):
     """
     Return (x1 == x2) element-wise.
@@ -119,6 +125,8 @@ def equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '==', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def not_equal(x1, x2):
     """
     Return (x1 != x2) element-wise.
@@ -143,6 +151,8 @@ def not_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '!=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater_equal(x1, x2):
     """
     Return (x1 >= x2) element-wise.
@@ -168,6 +178,8 @@ def greater_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '>=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less_equal(x1, x2):
     """
     Return (x1 <= x2) element-wise.
@@ -192,6 +204,8 @@ def less_equal(x1, x2):
     """
     return compare_chararrays(x1, x2, '<=', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def greater(x1, x2):
     """
     Return (x1 > x2) element-wise.
@@ -216,6 +230,8 @@ def greater(x1, x2):
     """
     return compare_chararrays(x1, x2, '>', True)
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def less(x1, x2):
     """
     Return (x1 < x2) element-wise.
@@ -240,6 +256,12 @@ def less(x1, x2):
     """
     return compare_chararrays(x1, x2, '<', True)
 
+
+def _unary_op_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_unary_op_dispatcher)
 def str_len(a):
     """
     Return len(a) element-wise.
@@ -259,6 +281,8 @@ def str_len(a):
     """
     return _vec_string(a, integer, '__len__')
 
+
+@array_function_dispatch(_binary_op_dispatcher)
 def add(x1, x2):
     """
     Return element-wise string concatenation for two arrays of str or unicode.
@@ -285,6 +309,12 @@ def add(x1, x2):
     dtype = _use_unicode(arr1, arr2)
     return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
 
+
+def _multiply_dispatcher(a, i):
+    return (a,)
+
+
+@array_function_dispatch(_multiply_dispatcher)
 def multiply(a, i):
     """
     Return (a * i), that is string multiple concatenation,
@@ -313,6 +343,12 @@ def multiply(a, i):
     return _vec_string(
         a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
 
+
+def _mod_dispatcher(a, values):
+    return (a, values)
+
+
+@array_function_dispatch(_mod_dispatcher)
 def mod(a, values):
     """
     Return (a % i), that is pre-Python 2.6 string formatting
@@ -339,6 +375,8 @@ def mod(a, values):
     return _to_string_or_unicode_array(
         _vec_string(a, object_, '__mod__', (values,)))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def capitalize(a):
     """
     Return a copy of `a` with only the first character of each element
@@ -377,6 +415,11 @@ def capitalize(a):
     return _vec_string(a_arr, a_arr.dtype, 'capitalize')
 
 
+def _center_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_center_dispatcher)
 def center(a, width, fillchar=' '):
     """
     Return a copy of `a` with its elements centered in a string of
@@ -413,6 +456,11 @@ def center(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
 
 
+def _count_dispatcher(a, sub, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_count_dispatcher)
 def count(a, sub, start=0, end=None):
     """
     Returns an array with the number of non-overlapping occurrences of
@@ -459,6 +507,11 @@ def count(a, sub, start=0, end=None):
     return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
 
 
+def _code_dispatcher(a, encoding=None, errors=None):
+    return (a,)
+
+
+@array_function_dispatch(_code_dispatcher)
 def decode(a, encoding=None, errors=None):
     """
     Calls `str.decode` element-wise.
@@ -505,6 +558,7 @@ def decode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
 
 
+@array_function_dispatch(_code_dispatcher)
 def encode(a, encoding=None, errors=None):
     """
     Calls `str.encode` element-wise.
@@ -540,6 +594,11 @@ def encode(a, encoding=None, errors=None):
         _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
 
 
+def _endswith_dispatcher(a, suffix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_endswith_dispatcher)
 def endswith(a, suffix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -584,6 +643,11 @@ def endswith(a, suffix, start=0, end=None):
         a, bool_, 'endswith', [suffix, start] + _clean_args(end))
 
 
+def _expandtabs_dispatcher(a, tabsize=None):
+    return (a,)
+
+
+@array_function_dispatch(_expandtabs_dispatcher)
 def expandtabs(a, tabsize=8):
     """
     Return a copy of each string element where all tab characters are
@@ -619,6 +683,7 @@ def expandtabs(a, tabsize=8):
         _vec_string(a, object_, 'expandtabs', (tabsize,)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def find(a, sub, start=0, end=None):
     """
     For each element, return the lowest index in the string where
@@ -654,6 +719,7 @@ def find(a, sub, start=0, end=None):
         a, integer, 'find', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def index(a, sub, start=0, end=None):
     """
     Like `find`, but raises `ValueError` when the substring is not found.
@@ -681,6 +747,8 @@ def index(a, sub, start=0, end=None):
     return _vec_string(
         a, integer, 'index', [sub, start] + _clean_args(end))
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalnum(a):
     """
     Returns true for each element if all characters in the string are
@@ -705,6 +773,8 @@ def isalnum(a):
     """
     return _vec_string(a, bool_, 'isalnum')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isalpha(a):
     """
     Returns true for each element if all characters in the string are
@@ -729,6 +799,8 @@ def isalpha(a):
     """
     return _vec_string(a, bool_, 'isalpha')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isdigit(a):
     """
     Returns true for each element if all characters in the string are
@@ -753,6 +825,8 @@ def isdigit(a):
     """
     return _vec_string(a, bool_, 'isdigit')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def islower(a):
     """
     Returns true for each element if all cased characters in the
@@ -778,6 +852,8 @@ def islower(a):
     """
     return _vec_string(a, bool_, 'islower')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isspace(a):
     """
     Returns true for each element if there are only whitespace
@@ -803,6 +879,8 @@ def isspace(a):
     """
     return _vec_string(a, bool_, 'isspace')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def istitle(a):
     """
     Returns true for each element if the element is a titlecased
@@ -827,6 +905,8 @@ def istitle(a):
     """
     return _vec_string(a, bool_, 'istitle')
 
+
+@array_function_dispatch(_unary_op_dispatcher)
 def isupper(a):
     """
     Returns true for each element if all cased characters in the
@@ -852,6 +932,12 @@ def isupper(a):
     """
     return _vec_string(a, bool_, 'isupper')
 
+
+def _join_dispatcher(sep, seq):
+    return (sep, seq)
+
+
+@array_function_dispatch(_join_dispatcher)
 def join(sep, seq):
     """
     Return a string which is the concatenation of the strings in the
@@ -877,6 +963,12 @@ def join(sep, seq):
         _vec_string(sep, object_, 'join', (seq,)))
 
 
+
+def _just_dispatcher(a, width, fillchar=None):
+    return (a,)
+
+
+@array_function_dispatch(_just_dispatcher)
 def ljust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` left-justified in a
@@ -912,6 +1004,7 @@ def ljust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def lower(a):
     """
     Return an array with the elements converted to lowercase.
@@ -948,6 +1041,11 @@ def lower(a):
     return _vec_string(a_arr, a_arr.dtype, 'lower')
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def lstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading characters
@@ -1005,6 +1103,11 @@ def lstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
 
 
+def _partition_dispatcher(a, sep):
+    return (a,)
+
+
+@array_function_dispatch(_partition_dispatcher)
 def partition(a, sep):
     """
     Partition each element in `a` around `sep`.
@@ -1040,6 +1143,11 @@ def partition(a, sep):
         _vec_string(a, object_, 'partition', (sep,)))
 
 
+def _replace_dispatcher(a, old, new, count=None):
+    return (a,)
+
+
+@array_function_dispatch(_replace_dispatcher)
 def replace(a, old, new, count=None):
     """
     For each element in `a`, return a copy of the string with all
@@ -1072,6 +1180,7 @@ def replace(a, old, new, count=None):
             a, object_, 'replace', [old, new] + _clean_args(count)))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rfind(a, sub, start=0, end=None):
     """
     For each element in `a`, return the highest index in the string
@@ -1104,6 +1213,7 @@ def rfind(a, sub, start=0, end=None):
         a, integer, 'rfind', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_count_dispatcher)
 def rindex(a, sub, start=0, end=None):
     """
     Like `rfind`, but raises `ValueError` when the substring `sub` is
@@ -1133,6 +1243,7 @@ def rindex(a, sub, start=0, end=None):
         a, integer, 'rindex', [sub, start] + _clean_args(end))
 
 
+@array_function_dispatch(_just_dispatcher)
 def rjust(a, width, fillchar=' '):
     """
     Return an array with the elements of `a` right-justified in a
@@ -1168,6 +1279,7 @@ def rjust(a, width, fillchar=' '):
         a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
 
 
+@array_function_dispatch(_partition_dispatcher)
 def rpartition(a, sep):
     """
     Partition (split) each element around the right-most separator.
@@ -1203,6 +1315,11 @@ def rpartition(a, sep):
         _vec_string(a, object_, 'rpartition', (sep,)))
 
 
+def _split_dispatcher(a, sep=None, maxsplit=None):
+    return (a,)
+
+
+@array_function_dispatch(_split_dispatcher)
 def rsplit(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1240,6 +1357,11 @@ def rsplit(a, sep=None, maxsplit=None):
         a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
 
 
+def _strip_dispatcher(a, chars=None):
+    return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
 def rstrip(a, chars=None):
     """
     For each element in `a`, return a copy with the trailing
@@ -1284,6 +1406,7 @@ def rstrip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
 
 
+@array_function_dispatch(_split_dispatcher)
 def split(a, sep=None, maxsplit=None):
     """
     For each element in `a`, return a list of the words in the
@@ -1318,6 +1441,11 @@ def split(a, sep=None, maxsplit=None):
         a, object_, 'split', [sep] + _clean_args(maxsplit))
 
 
+def _splitlines_dispatcher(a, keepends=None):
+    return (a,)
+
+
+@array_function_dispatch(_splitlines_dispatcher)
 def splitlines(a, keepends=None):
     """
     For each element in `a`, return a list of the lines in the
@@ -1347,6 +1475,11 @@ def splitlines(a, keepends=None):
         a, object_, 'splitlines', _clean_args(keepends))
 
 
+def _startswith_dispatcher(a, prefix, start=None, end=None):
+    return (a,)
+
+
+@array_function_dispatch(_startswith_dispatcher)
 def startswith(a, prefix, start=0, end=None):
     """
     Returns a boolean array which is `True` where the string element
@@ -1378,6 +1511,7 @@ def startswith(a, prefix, start=0, end=None):
         a, bool_, 'startswith', [prefix, start] + _clean_args(end))
 
 
+@array_function_dispatch(_strip_dispatcher)
 def strip(a, chars=None):
     """
     For each element in `a`, return a copy with the leading and
@@ -1426,6 +1560,7 @@ def strip(a, chars=None):
     return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def swapcase(a):
     """
     Return element-wise a copy of the string with
@@ -1463,6 +1598,7 @@ def swapcase(a):
     return _vec_string(a_arr, a_arr.dtype, 'swapcase')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def title(a):
     """
     Return element-wise title cased version of string or unicode.
@@ -1502,6 +1638,11 @@ def title(a):
     return _vec_string(a_arr, a_arr.dtype, 'title')
 
 
+def _translate_dispatcher(a, table, deletechars=None):
+    return (a,)
+
+
+@array_function_dispatch(_translate_dispatcher)
 def translate(a, table, deletechars=None):
     """
     For each element in `a`, return a copy of the string where all
@@ -1538,6 +1679,7 @@ def translate(a, table, deletechars=None):
             a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def upper(a):
     """
     Return an array with the elements converted to uppercase.
@@ -1574,6 +1716,11 @@ def upper(a):
     return _vec_string(a_arr, a_arr.dtype, 'upper')
 
 
+def _zfill_dispatcher(a, width):
+    return (a,)
+
+
+@array_function_dispatch(_zfill_dispatcher)
 def zfill(a, width):
     """
     Return the numeric string left-filled with zeros
@@ -1604,6 +1751,7 @@ def zfill(a, width):
         a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isnumeric(a):
     """
     For each element, return True if there are only numeric
@@ -1635,6 +1783,7 @@ def isnumeric(a):
     return _vec_string(a, bool_, 'isnumeric')
 
 
+@array_function_dispatch(_unary_op_dispatcher)
 def isdecimal(a):
     """
     For each element, return True if there are only decimal
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index b189dae5f..2fdbf3e23 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -1071,10 +1071,10 @@ def argmax(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmax(a)
     5
     >>> np.argmax(a, axis=0)
@@ -1088,7 +1088,7 @@ def argmax(a, axis=None, out=None):
     >>> ind
     (1, 2)
     >>> a[ind]
-    5
+    15
 
     >>> b = np.arange(6)
     >>> b[1] = 5
@@ -1140,10 +1140,10 @@ def argmin(a, axis=None, out=None):
 
     Examples
     --------
-    >>> a = np.arange(6).reshape(2,3)
+    >>> a = np.arange(6).reshape(2,3) + 10
     >>> a
-    array([[0, 1, 2],
-           [3, 4, 5]])
+    array([[10, 11, 12],
+           [13, 14, 15]])
     >>> np.argmin(a)
     0
     >>> np.argmin(a, axis=0)
@@ -1157,12 +1157,12 @@ def argmin(a, axis=None, out=None):
     >>> ind
     (0, 0)
     >>> a[ind]
-    0
+    10
 
-    >>> b = np.arange(6)
-    >>> b[4] = 0
+    >>> b = np.arange(6) + 10
+    >>> b[4] = 10
     >>> b
-    array([0, 1, 2, 3, 0, 5])
+    array([10, 11, 12, 13, 10, 15])
     >>> np.argmin(b)  # Only the first occurrence is returned.
     0
 
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 4b1b3d325..85f8a6c08 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -209,9 +209,32 @@ typedef struct _tagPyUFuncObject {
          * set by nditer object.
          */
         npy_uint32 iter_flags;
+
+        /* New in NPY_API_VERSION 0x0000000D and above */
+
+        /*
+         * for each core_num_dim_ix distinct dimension names,
+         * the possible "frozen" size (-1 if not frozen).
+         */
+        npy_intp *core_dim_sizes;
+
+        /*
+         * for each distinct core dimension, a set of UFUNC_CORE_DIM* flags
+         */
+        npy_uint32 *core_dim_flags;
+
+
+
 } PyUFuncObject;
 
 #include "arrayobject.h"
+/* Generalized ufunc; 0x0001 reserved for possible use as CORE_ENABLED */
+/* the core dimension's size will be determined by the operands. */
+#define UFUNC_CORE_DIM_SIZE_INFERRED 0x0002
+/* the core dimension may be absent */
+#define UFUNC_CORE_DIM_CAN_IGNORE 0x0004
+/* flags inferred during execution */
+#define UFUNC_CORE_DIM_MISSING 0x00040000
 
 #define UFUNC_ERR_IGNORE 0
 #define UFUNC_ERR_WARN   1
@@ -314,22 +337,6 @@ typedef struct _loop1d_info {
                                 &(arg)->first))) \
                 goto fail;} while (0)
 
-
-/* keep in sync with ieee754.c.src */
-#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \
-      defined(__NetBSD__) || \
-      defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114)) || \
-      defined(_AIX) || \
-      defined(_MSC_VER) || \
-      defined(__osf__) && defined(__alpha)
-#else
-#define NO_FLOATING_POINT_SUPPORT
-#endif
-
-
 /*
  * THESE MACROS ARE DEPRECATED.
  * Use npy_set_floatstatus_* in the npymath library.
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 673328397..4dbd3b0fd 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -7,6 +7,8 @@ by importing from the extension module.
 """
 
 from . import _multiarray_umath
+from .overrides import array_function_dispatch
+import numpy as np
 from numpy.core._multiarray_umath import *
 from numpy.core._multiarray_umath import (
     _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string,
@@ -35,3 +37,1117 @@ __all__ = [
     'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot',
     'where', 'zeros']
 
+
+def _empty_like_dispatcher(prototype, dtype=None, order=None, subok=None):
+    return (prototype,)
+
+
+@array_function_dispatch(_empty_like_dispatcher)
+def empty_like(prototype, dtype=None, order='K', subok=True):
+    """Return a new array with the same shape and type as a given array.
+
+    Parameters
+    ----------
+    prototype : array_like
+        The shape and data-type of `prototype` define these same attributes
+        of the returned array.
+    dtype : data-type, optional
+        Overrides the data type of the result.
+
+        .. versionadded:: 1.6.0
+    order : {'C', 'F', 'A', or 'K'}, optional
+        Overrides the memory layout of the result. 'C' means C-order,
+        'F' means F-order, 'A' means 'F' if ``prototype`` is Fortran
+        contiguous, 'C' otherwise. 'K' means match the layout of ``prototype``
+        as closely as possible.
+
+        .. versionadded:: 1.6.0
+    subok : bool, optional.
+        If True, then the newly created array will use the sub-class
+        type of 'a', otherwise it will be a base-class array. Defaults
+        to True.
+
+    Returns
+    -------
+    out : ndarray
+        Array of uninitialized (arbitrary) data with the same
+        shape and type as `prototype`.
+
+    See Also
+    --------
+    ones_like : Return an array of ones with shape and type of input.
+    zeros_like : Return an array of zeros with shape and type of input.
+    full_like : Return a new array with shape of input filled with value.
+    empty : Return a new uninitialized array.
+
+    Notes
+    -----
+    This function does *not* initialize the returned array; to do that use
+    `zeros_like` or `ones_like` instead.  It may be marginally faster than
+    the functions that do set the array values.
+
+    Examples
+    --------
+    >>> a = ([1,2,3], [4,5,6])                         # a is array-like
+    >>> np.empty_like(a)
+    array([[-1073741821, -1073741821,           3],    #random
+           [          0,           0, -1073741821]])
+    >>> a = np.array([[1., 2., 3.],[4.,5.,6.]])
+    >>> np.empty_like(a)
+    array([[ -2.00000715e+000,   1.48219694e-323,  -2.00000572e+000],#random
+           [  4.38791518e-305,  -2.00000715e+000,   4.17269252e-309]])
+
+    """
+    return _multiarray_umath.empty_like(prototype, dtype, order, subok)
+
+
+def _concatenate_dispatcher(arrays, axis=None, out=None):
+    for array in arrays:
+        yield array
+    yield out
+
+
+@array_function_dispatch(_concatenate_dispatcher)
+def concatenate(arrays, axis=0, out=None):
+    """
+    concatenate((a1, a2, ...), axis=0, out=None)
+
+    Join a sequence of arrays along an existing axis.
+
+    Parameters
+    ----------
+    a1, a2, ... : sequence of array_like
+        The arrays must have the same shape, except in the dimension
+        corresponding to `axis` (the first, by default).
+    axis : int, optional
+        The axis along which the arrays will be joined.  If axis is None,
+        arrays are flattened before use.  Default is 0.
+    out : ndarray, optional
+        If provided, the destination to place the result. The shape must be
+        correct, matching that of what concatenate would have returned if no
+        out argument were specified.
+
+    Returns
+    -------
+    res : ndarray
+        The concatenated array.
+
+    See Also
+    --------
+    ma.concatenate : Concatenate function that preserves input masks.
+    array_split : Split an array into multiple sub-arrays of equal or
+                  near-equal size.
+    split : Split array into a list of multiple sub-arrays of equal size.
+    hsplit : Split array into multiple sub-arrays horizontally (column wise)
+    vsplit : Split array into multiple sub-arrays vertically (row wise)
+    dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
+    stack : Stack a sequence of arrays along a new axis.
+    hstack : Stack arrays in sequence horizontally (column wise)
+    vstack : Stack arrays in sequence vertically (row wise)
+    dstack : Stack arrays in sequence depth wise (along third dimension)
+    block : Assemble arrays from blocks.
+
+    Notes
+    -----
+    When one or more of the arrays to be concatenated is a MaskedArray,
+    this function will return a MaskedArray object instead of an ndarray,
+    but the input masks are *not* preserved. In cases where a MaskedArray
+    is expected as input, use the ma.concatenate function from the masked
+    array module instead.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> b = np.array([[5, 6]])
+    >>> np.concatenate((a, b), axis=0)
+    array([[1, 2],
+           [3, 4],
+           [5, 6]])
+    >>> np.concatenate((a, b.T), axis=1)
+    array([[1, 2, 5],
+           [3, 4, 6]])
+    >>> np.concatenate((a, b), axis=None)
+    array([1, 2, 3, 4, 5, 6])
+
+    This function will not preserve masking of MaskedArray inputs.
+
+    >>> a = np.ma.arange(3)
+    >>> a[1] = np.ma.masked
+    >>> b = np.arange(2, 5)
+    >>> a
+    masked_array(data=[0, --, 2],
+                 mask=[False,  True, False],
+           fill_value=999999)
+    >>> b
+    array([2, 3, 4])
+    >>> np.concatenate([a, b])
+    masked_array(data=[0, 1, 2, 2, 3, 4],
+                 mask=False,
+           fill_value=999999)
+    >>> np.ma.concatenate([a, b])
+    masked_array(data=[0, --, 2, 2, 3, 4],
+                 mask=[False,  True, False, False, False, False],
+           fill_value=999999)
+
+    """
+    return _multiarray_umath.concatenate(arrays, axis, out)
+
+
+def _inner_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_inner_dispatcher)
+def inner(a, b):
+    """
+    Inner product of two arrays.
+
+    Ordinary inner product of vectors for 1-D arrays (without complex
+    conjugation), in higher dimensions a sum product over the last axes.
+
+    Parameters
+    ----------
+    a, b : array_like
+        If `a` and `b` are nonscalar, their last dimensions must match.
+
+    Returns
+    -------
+    out : ndarray
+        `out.shape = a.shape[:-1] + b.shape[:-1]`
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` and `b` has different size.
+
+    See Also
+    --------
+    tensordot : Sum products over arbitrary axes.
+    dot : Generalised matrix product, using second last dimension of `b`.
+    einsum : Einstein summation convention.
+
+    Notes
+    -----
+    For vectors (1-D arrays) it computes the ordinary inner-product::
+
+        np.inner(a, b) = sum(a[:]*b[:])
+
+    More generally, if `ndim(a) = r > 0` and `ndim(b) = s > 0`::
+
+        np.inner(a, b) = np.tensordot(a, b, axes=(-1,-1))
+
+    or explicitly::
+
+        np.inner(a, b)[i0,...,ir-1,j0,...,js-1]
+             = sum(a[i0,...,ir-1,:]*b[j0,...,js-1,:])
+
+    In addition `a` or `b` may be scalars, in which case::
+
+       np.inner(a,b) = a*b
+
+    Examples
+    --------
+    Ordinary inner product for vectors:
+
+    >>> a = np.array([1,2,3])
+    >>> b = np.array([0,1,0])
+    >>> np.inner(a, b)
+    2
+
+    A multidimensional example:
+
+    >>> a = np.arange(24).reshape((2,3,4))
+    >>> b = np.arange(4)
+    >>> np.inner(a, b)
+    array([[ 14,  38,  62],
+           [ 86, 110, 134]])
+
+    An example where `b` is a scalar:
+
+    >>> np.inner(np.eye(2), 7)
+    array([[ 7.,  0.],
+           [ 0.,  7.]])
+
+    """
+    return _multiarray_umath.inner(a, b)
+
+
+def _where_dispatcher(condition, x=None, y=None):
+    return (condition, x, y)
+
+
+@array_function_dispatch(_where_dispatcher)
+def where(condition, x=np._NoValue, y=np._NoValue):
+    """
+    where(condition, [x, y])
+
+    Return elements chosen from `x` or `y` depending on `condition`.
+
+    .. note::
+        When only `condition` is provided, this function is a shorthand for
+        ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
+        preferred, as it behaves correctly for subclasses. The rest of this
+        documentation covers only the case where all three arguments are
+        provided.
+
+    Parameters
+    ----------
+    condition : array_like, bool
+        Where True, yield `x`, otherwise yield `y`.
+    x, y : array_like
+        Values from which to choose. `x`, `y` and `condition` need to be
+        broadcastable to some shape.
+
+    Returns
+    -------
+    out : ndarray
+        An array with elements from `x` where `condition` is True, and elements
+        from `y` elsewhere.
+
+    See Also
+    --------
+    choose
+    nonzero : The function that is called when x and y are omitted
+
+    Notes
+    -----
+    If all the arrays are 1-D, `where` is equivalent to::
+
+        [xv if c else yv
+         for c, xv, yv in zip(condition, x, y)]
+
+    Examples
+    --------
+    >>> a = np.arange(10)
+    >>> a
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> np.where(a < 5, a, 10*a)
+    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
+
+    This can be used on multidimensional arrays too:
+
+    >>> np.where([[True, False], [True, True]],
+    ...          [[1, 2], [3, 4]],
+    ...          [[9, 8], [7, 6]])
+    array([[1, 8],
+           [3, 4]])
+
+    The shapes of x, y, and the condition are broadcast together:
+
+    >>> x, y = np.ogrid[:3, :4]
+    >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
+    array([[10,  0,  0,  0],
+           [10, 11,  1,  1],
+           [10, 11, 12,  2]])
+
+    >>> a = np.array([[0, 1, 2],
+    ...               [0, 2, 4],
+    ...               [0, 3, 6]])
+    >>> np.where(a < 4, a, -1)  # -1 is broadcast
+    array([[ 0,  1,  2],
+           [ 0,  2, -1],
+           [ 0,  3, -1]])
+    """
+    # _multiarray_umath.where only accepts positional arguments
+    args = tuple(a for a in (x, y) if a is not np._NoValue)
+    return _multiarray_umath.where(condition, *args)
+
+
+def _lexsort_dispatcher(keys, axis=None):
+    if isinstance(keys, tuple):
+        return keys
+    else:
+        return (keys,)
+
+
+@array_function_dispatch(_lexsort_dispatcher)
+def lexsort(keys, axis=-1):
+    """
+    Perform an indirect stable sort using a sequence of keys.
+
+    Given multiple sorting keys, which can be interpreted as columns in a
+    spreadsheet, lexsort returns an array of integer indices that describes
+    the sort order by multiple columns. The last key in the sequence is used
+    for the primary sort order, the second-to-last key for the secondary sort
+    order, and so on. The keys argument must be a sequence of objects that
+    can be converted to arrays of the same shape. If a 2D array is provided
+    for the keys argument, it's rows are interpreted as the sorting keys and
+    sorting is according to the last row, second last row etc.
+
+    Parameters
+    ----------
+    keys : (k, N) array or tuple containing k (N,)-shaped sequences
+        The `k` different "columns" to be sorted.  The last column (or row if
+        `keys` is a 2D array) is the primary sort key.
+    axis : int, optional
+        Axis to be indirectly sorted.  By default, sort over the last axis.
+
+    Returns
+    -------
+    indices : (N,) ndarray of ints
+        Array of indices that sort the keys along the specified axis.
+
+    See Also
+    --------
+    argsort : Indirect sort.
+    ndarray.sort : In-place sort.
+    sort : Return a sorted copy of an array.
+
+    Examples
+    --------
+    Sort names: first by surname, then by name.
+
+    >>> surnames =    ('Hertz',    'Galilei', 'Hertz')
+    >>> first_names = ('Heinrich', 'Galileo', 'Gustav')
+    >>> ind = np.lexsort((first_names, surnames))
+    >>> ind
+    array([1, 2, 0])
+
+    >>> [surnames[i] + ", " + first_names[i] for i in ind]
+    ['Galilei, Galileo', 'Hertz, Gustav', 'Hertz, Heinrich']
+
+    Sort two columns of numbers:
+
+    >>> a = [1,5,1,4,3,4,4] # First column
+    >>> b = [9,4,0,4,0,2,1] # Second column
+    >>> ind = np.lexsort((b,a)) # Sort by a, then by b
+    >>> print(ind)
+    [2 0 4 6 5 3 1]
+
+    >>> [(a[i],b[i]) for i in ind]
+    [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]
+
+    Note that sorting is first according to the elements of ``a``.
+    Secondary sorting is according to the elements of ``b``.
+
+    A normal ``argsort`` would have yielded:
+
+    >>> [(a[i],b[i]) for i in np.argsort(a)]
+    [(1, 9), (1, 0), (3, 0), (4, 4), (4, 2), (4, 1), (5, 4)]
+
+    Structured arrays are sorted lexically by ``argsort``:
+
+    >>> x = np.array([(1,9), (5,4), (1,0), (4,4), (3,0), (4,2), (4,1)],
+    ...              dtype=np.dtype([('x', int), ('y', int)]))
+
+    >>> np.argsort(x) # or np.argsort(x, order=('x', 'y'))
+    array([2, 0, 4, 6, 5, 3, 1])
+
+    """
+    return _multiarray_umath.lexsort(keys, axis)
+
+
+def _can_cast_dispatcher(from_, to, casting=None):
+    return (from_,)
+
+
+@array_function_dispatch(_can_cast_dispatcher)
+def can_cast(from_, to, casting='safe'):
+    """
+    Returns True if cast between data types can occur according to the
+    casting rule.  If from is a scalar or array scalar, also returns
+    True if the scalar value can be cast without overflow or truncation
+    to an integer.
+
+    Parameters
+    ----------
+    from_ : dtype, dtype specifier, scalar, or array
+        Data type, scalar, or array to cast from.
+    to : dtype or dtype specifier
+        Data type to cast to.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        Controls what kind of data casting may occur.
+
+          * 'no' means the data types should not be cast at all.
+          * 'equiv' means only byte-order changes are allowed.
+          * 'safe' means only casts which can preserve values are allowed.
+          * 'same_kind' means only safe casts or casts within a kind,
+            like float64 to float32, are allowed.
+          * 'unsafe' means any data conversions may be done.
+
+    Returns
+    -------
+    out : bool
+        True if cast can occur according to the casting rule.
+
+    Notes
+    -----
+    Starting in NumPy 1.9, can_cast function now returns False in 'safe'
+    casting mode for integer/float dtype and string dtype if the string dtype
+    length is not long enough to store the max integer/float value converted
+    to a string. Previously can_cast in 'safe' mode returned True for
+    integer/float dtype and a string dtype of any length.
+
+    See also
+    --------
+    dtype, result_type
+
+    Examples
+    --------
+    Basic examples
+
+    >>> np.can_cast(np.int32, np.int64)
+    True
+    >>> np.can_cast(np.float64, complex)
+    True
+    >>> np.can_cast(complex, float)
+    False
+
+    >>> np.can_cast('i8', 'f8')
+    True
+    >>> np.can_cast('i8', 'f4')
+    False
+    >>> np.can_cast('i4', 'S4')
+    False
+
+    Casting scalars
+
+    >>> np.can_cast(100, 'i1')
+    True
+    >>> np.can_cast(150, 'i1')
+    False
+    >>> np.can_cast(150, 'u1')
+    True
+
+    >>> np.can_cast(3.5e100, np.float32)
+    False
+    >>> np.can_cast(1000.0, np.float32)
+    True
+
+    Array scalar checks the value, array does not
+
+    >>> np.can_cast(np.array(1000.0), np.float32)
+    True
+    >>> np.can_cast(np.array([1000.0]), np.float32)
+    False
+
+    Using the casting rules
+
+    >>> np.can_cast('i8', 'i8', 'no')
+    True
+    >>> np.can_cast('<i8', '>i8', 'no')
+    False
+
+    >>> np.can_cast('<i8', '>i8', 'equiv')
+    True
+    >>> np.can_cast('<i4', '>i8', 'equiv')
+    False
+
+    >>> np.can_cast('<i4', '>i8', 'safe')
+    True
+    >>> np.can_cast('<i8', '>i4', 'safe')
+    False
+
+    >>> np.can_cast('<i8', '>i4', 'same_kind')
+    True
+    >>> np.can_cast('<i8', '>u4', 'same_kind')
+    False
+
+    >>> np.can_cast('<i8', '>u4', 'unsafe')
+    True
+
+    """
+    return _multiarray_umath.can_cast(from_, to, casting)
+
+
+def _min_scalar_type_dispatcher(a):
+    return (a,)
+
+
+@array_function_dispatch(_min_scalar_type_dispatcher)
+def min_scalar_type(a):
+    """
+    For scalar ``a``, returns the data type with the smallest size
+    and smallest scalar kind which can hold its value.  For non-scalar
+    array ``a``, returns the vector's dtype unmodified.
+
+    Floating point values are not demoted to integers,
+    and complex values are not demoted to floats.
+
+    Parameters
+    ----------
+    a : scalar or array_like
+        The value whose minimal data type is to be found.
+
+    Returns
+    -------
+    out : dtype
+        The minimal data type.
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    See Also
+    --------
+    result_type, promote_types, dtype, can_cast
+
+    Examples
+    --------
+    >>> np.min_scalar_type(10)
+    dtype('uint8')
+
+    >>> np.min_scalar_type(-260)
+    dtype('int16')
+
+    >>> np.min_scalar_type(3.1)
+    dtype('float16')
+
+    >>> np.min_scalar_type(1e50)
+    dtype('float64')
+
+    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
+    dtype('float64')
+
+    """
+    return _multiarray_umath.min_scalar_type(a)
+
+
+def _result_type_dispatcher(*arrays_and_dtypes):
+    return arrays_and_dtypes
+
+
+@array_function_dispatch(_result_type_dispatcher)
+def result_type(*arrays_and_dtypes):
+    """
+    Returns the type that results from applying the NumPy
+    type promotion rules to the arguments.
+
+    Type promotion in NumPy works similarly to the rules in languages
+    like C++, with some slight differences.  When both scalars and
+    arrays are used, the array's type takes precedence and the actual value
+    of the scalar is taken into account.
+
+    For example, calculating 3*a, where a is an array of 32-bit floats,
+    intuitively should result in a 32-bit float output.  If the 3 is a
+    32-bit integer, the NumPy rules indicate it can't convert losslessly
+    into a 32-bit float, so a 64-bit float should be the result type.
+    By examining the value of the constant, '3', we see that it fits in
+    an 8-bit integer, which can be cast losslessly into the 32-bit float.
+
+    Parameters
+    ----------
+    arrays_and_dtypes : list of arrays and dtypes
+        The operands of some operation whose result type is needed.
+
+    Returns
+    -------
+    out : dtype
+        The result type.
+
+    See also
+    --------
+    dtype, promote_types, min_scalar_type, can_cast
+
+    Notes
+    -----
+    .. versionadded:: 1.6.0
+
+    The specific algorithm used is as follows.
+
+    Categories are determined by first checking which of boolean,
+    integer (int/uint), or floating point (float/complex) the maximum
+    kind of all the arrays and the scalars are.
+
+    If there are only scalars or the maximum category of the scalars
+    is higher than the maximum category of the arrays,
+    the data types are combined with :func:`promote_types`
+    to produce the return value.
+
+    Otherwise, `min_scalar_type` is called on each array, and
+    the resulting data types are all combined with :func:`promote_types`
+    to produce the return value.
+
+    The set of int values is not a subset of the uint values for types
+    with the same number of bits, something not reflected in
+    :func:`min_scalar_type`, but handled as a special case in `result_type`.
+
+    Examples
+    --------
+    >>> np.result_type(3, np.arange(7, dtype='i1'))
+    dtype('int8')
+
+    >>> np.result_type('i4', 'c8')
+    dtype('complex128')
+
+    >>> np.result_type(3.0, -2)
+    dtype('float64')
+
+    """
+    return _multiarray_umath.result_type(*arrays_and_dtypes)
+
+
+def _dot_dispatcher(a, b, out=None):
+    return (a, b, out)
+
+
+@array_function_dispatch(_dot_dispatcher)
+def dot(a, b, out=None):
+    """
+    Dot product of two arrays. Specifically,
+
+    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
+      (without complex conjugation).
+
+    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
+      but using :func:`matmul` or ``a @ b`` is preferred.
+
+    - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
+      and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.
+
+    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
+      the last axis of `a` and `b`.
+
+    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
+      sum product over the last axis of `a` and the second-to-last axis of `b`::
+
+        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
+
+    Parameters
+    ----------
+    a : array_like
+        First argument.
+    b : array_like
+        Second argument.
+    out : ndarray, optional
+        Output argument. This must have the exact kind that would be returned
+        if it was not used. In particular, it must have the right type, must be
+        C-contiguous, and its dtype must be the dtype that would be returned
+        for `dot(a,b)`. This is a performance feature. Therefore, if these
+        conditions are not met, an exception is raised, instead of attempting
+        to be flexible.
+
+    Returns
+    -------
+    output : ndarray
+        Returns the dot product of `a` and `b`.  If `a` and `b` are both
+        scalars or both 1-D arrays then a scalar is returned; otherwise
+        an array is returned.
+        If `out` is given, then it is returned.
+
+    Raises
+    ------
+    ValueError
+        If the last dimension of `a` is not the same size as
+        the second-to-last dimension of `b`.
+
+    See Also
+    --------
+    vdot : Complex-conjugating dot product.
+    tensordot : Sum products over arbitrary axes.
+    einsum : Einstein summation convention.
+    matmul : '@' operator as method with out parameter.
+
+    Examples
+    --------
+    >>> np.dot(3, 4)
+    12
+
+    Neither argument is complex-conjugated:
+
+    >>> np.dot([2j, 3j], [2j, 3j])
+    (-13+0j)
+
+    For 2-D arrays it is the matrix product:
+
+    >>> a = [[1, 0], [0, 1]]
+    >>> b = [[4, 1], [2, 2]]
+    >>> np.dot(a, b)
+    array([[4, 1],
+           [2, 2]])
+
+    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
+    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
+    >>> np.dot(a, b)[2,3,2,1,2,2]
+    499128
+    >>> sum(a[2,3,2,:] * b[1,2,:,2])
+    499128
+
+    """
+    return _multiarray_umath.dot(a, b, out)
+
+
+def _vdot_dispatcher(a, b):
+    return (a, b)
+
+
+@array_function_dispatch(_vdot_dispatcher)
+def vdot(a, b):
+    """
+    Return the dot product of two vectors.
+
+    The vdot(`a`, `b`) function handles complex numbers differently than
+    dot(`a`, `b`).  If the first argument is complex the complex conjugate
+    of the first argument is used for the calculation of the dot product.
+
+    Note that `vdot` handles multidimensional arrays differently than `dot`:
+    it does *not* perform a matrix product, but flattens input arguments
+    to 1-D vectors first. Consequently, it should only be used for vectors.
+
+    Parameters
+    ----------
+    a : array_like
+        If `a` is complex the complex conjugate is taken before calculation
+        of the dot product.
+    b : array_like
+        Second argument to the dot product.
+
+    Returns
+    -------
+    output : ndarray
+        Dot product of `a` and `b`.  Can be an int, float, or
+        complex depending on the types of `a` and `b`.
+
+    See Also
+    --------
+    dot : Return the dot product without using the complex conjugate of the
+          first argument.
+
+    Examples
+    --------
+    >>> a = np.array([1+2j,3+4j])
+    >>> b = np.array([5+6j,7+8j])
+    >>> np.vdot(a, b)
+    (70-8j)
+    >>> np.vdot(b, a)
+    (70+8j)
+
+    Note that higher-dimensional arrays are flattened!
+
+    >>> a = np.array([[1, 4], [5, 6]])
+    >>> b = np.array([[4, 1], [2, 2]])
+    >>> np.vdot(a, b)
+    30
+    >>> np.vdot(b, a)
+    30
+    >>> 1*4 + 4*1 + 5*2 + 6*2
+    30
+
+    """
+    return _multiarray_umath.vdot(a, b)
+
+
+def _is_busday_dispatcher(
+        dates, weekmask=None, holidays=None, busdaycal=None, out=None):
+    return (dates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_is_busday_dispatcher)
+def is_busday(dates, weekmask=None, holidays=None, busdaycal=None,
+              out=None):
+    """
+    is_busday(dates, weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    Calculates which of the given dates are valid days, and which are not.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of bool, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of bool
+        An array with the same shape as ``dates``, containing True for
+        each valid day, and False for each invalid day.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    busday_offset : Applies an offset counted in valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # The weekdays are Friday, Saturday, and Monday
+    ... np.is_busday(['2011-07-01', '2011-07-02', '2011-07-18'],
+    ...                 holidays=['2011-07-01', '2011-07-04', '2011-07-17'])
+    array([False, False,  True], dtype='bool')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.is_busday(dates, **kwargs)
+
+
+def _busday_offset_dispatcher(dates, offsets, roll=None, weekmask=None,
+                              holidays=None, busdaycal=None, out=None):
+    return (dates, offsets, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_offset_dispatcher)
+def busday_offset(dates, offsets, roll='raise', weekmask=None,
+                  holidays=None, busdaycal=None, out=None):
+    """
+    busday_offset(dates, offsets, roll='raise', weekmask='1111100', holidays=None, busdaycal=None, out=None)
+
+    First adjusts the date to fall on a valid day according to
+    the ``roll`` rule, then applies offsets to the given dates
+    counted in valid days.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    dates : array_like of datetime64[D]
+        The array of dates to process.
+    offsets : array_like of int
+        The array of offsets, which is broadcast with ``dates``.
+    roll : {'raise', 'nat', 'forward', 'following', 'backward', 'preceding', 'modifiedfollowing', 'modifiedpreceding'}, optional
+        How to treat dates that do not fall on a valid day. The default
+        is 'raise'.
+
+          * 'raise' means to raise an exception for an invalid day.
+          * 'nat' means to return a NaT (not-a-time) for an invalid day.
+          * 'forward' and 'following' mean to take the first valid day
+            later in time.
+          * 'backward' and 'preceding' mean to take the first valid day
+            earlier in time.
+          * 'modifiedfollowing' means to take the first valid day
+            later in time unless it is across a Month boundary, in which
+            case to take the first valid day earlier in time.
+          * 'modifiedpreceding' means to take the first valid day
+            earlier in time unless it is across a Month boundary, in which
+            case to take the first valid day later in time.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of datetime64[D], optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of datetime64[D]
+        An array with a shape from broadcasting ``dates`` and ``offsets``
+        together, containing the dates with offsets applied.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_count : Counts how many valid days are in a half-open date range.
+
+    Examples
+    --------
+    >>> # First business day in October 2011 (not accounting for holidays)
+    ... np.busday_offset('2011-10', 0, roll='forward')
+    numpy.datetime64('2011-10-03','D')
+    >>> # Last business day in February 2012 (not accounting for holidays)
+    ... np.busday_offset('2012-03', -1, roll='forward')
+    numpy.datetime64('2012-02-29','D')
+    >>> # Third Wednesday in January 2011
+    ... np.busday_offset('2011-01', 2, roll='forward', weekmask='Wed')
+    numpy.datetime64('2011-01-19','D')
+    >>> # 2012 Mother's Day in Canada and the U.S.
+    ... np.busday_offset('2012-05', 1, roll='forward', weekmask='Sun')
+    numpy.datetime64('2012-05-13','D')
+
+    >>> # First business day on or after a date
+    ... np.busday_offset('2011-03-20', 0, roll='forward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 0, roll='forward')
+    numpy.datetime64('2011-03-22','D')
+    >>> # First business day after a date
+    ... np.busday_offset('2011-03-20', 1, roll='backward')
+    numpy.datetime64('2011-03-21','D')
+    >>> np.busday_offset('2011-03-22', 1, roll='backward')
+    numpy.datetime64('2011-03-23','D')
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_offset(dates, offsets, roll, **kwargs)
+
+
+def _busday_count_dispatcher(begindates, enddates, weekmask=None,
+                             holidays=None, busdaycal=None, out=None):
+    return (begindates, enddates, weekmask, holidays, out)
+
+
+@array_function_dispatch(_busday_count_dispatcher)
+def busday_count(begindates, enddates, weekmask=None, holidays=None,
+                 busdaycal=None, out=None):
+    """
+    busday_count(begindates, enddates, weekmask='1111100', holidays=[], busdaycal=None, out=None)
+
+    Counts the number of valid days between `begindates` and
+    `enddates`, not including the day of `enddates`.
+
+    If ``enddates`` specifies a date value that is earlier than the
+    corresponding ``begindates`` date value, the count will be negative.
+
+    .. versionadded:: 1.7.0
+
+    Parameters
+    ----------
+    begindates : array_like of datetime64[D]
+        The array of the first dates for counting.
+    enddates : array_like of datetime64[D]
+        The array of the end dates for counting, which are excluded
+        from the count themselves.
+    weekmask : str or array_like of bool, optional
+        A seven-element array indicating which of Monday through Sunday are
+        valid days. May be specified as a length-seven list or array, like
+        [1,1,1,1,1,0,0]; a length-seven string, like '1111100'; or a string
+        like "Mon Tue Wed Thu Fri", made up of 3-character abbreviations for
+        weekdays, optionally separated by white space. Valid abbreviations
+        are: Mon Tue Wed Thu Fri Sat Sun
+    holidays : array_like of datetime64[D], optional
+        An array of dates to consider as invalid dates.  They may be
+        specified in any order, and NaT (not-a-time) dates are ignored.
+        This list is saved in a normalized form that is suited for
+        fast calculations of valid days.
+    busdaycal : busdaycalendar, optional
+        A `busdaycalendar` object which specifies the valid days. If this
+        parameter is provided, neither weekmask nor holidays may be
+        provided.
+    out : array of int, optional
+        If provided, this array is filled with the result.
+
+    Returns
+    -------
+    out : array of int
+        An array with a shape from broadcasting ``begindates`` and ``enddates``
+        together, containing the number of valid days between
+        the begin and end dates.
+
+    See Also
+    --------
+    busdaycalendar: An object that specifies a custom set of valid days.
+    is_busday : Returns a boolean array indicating valid days.
+    busday_offset : Applies an offset counted in valid days.
+
+    Examples
+    --------
+    >>> # Number of weekdays in January 2011
+    ... np.busday_count('2011-01', '2011-02')
+    21
+    >>> # Number of weekdays in 2011
+    ...  np.busday_count('2011', '2012')
+    260
+    >>> # Number of Saturdays in 2011
+    ... np.busday_count('2011', '2012', weekmask='Sat')
+    53
+    """
+    kwargs = {}
+    if weekmask is not None:
+        kwargs['weekmask'] = weekmask
+    if holidays is not None:
+        kwargs['holidays'] = holidays
+    if busdaycal is not None:
+        kwargs['busdaycal'] = busdaycal
+    if out is not None:
+        kwargs['out'] = out
+    return _multiarray_umath.busday_count(begindates, enddates, **kwargs)
+
+
+def _datetime_as_string_dispatcher(
+        arr, unit=None, timezone=None, casting=None):
+    return (arr,)
+
+
+@array_function_dispatch(_datetime_as_string_dispatcher)
+def datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind'):
+    """
+    Convert an array of datetimes into an array of strings.
+
+    Parameters
+    ----------
+    arr : array_like of datetime64
+        The array of UTC timestamps to format.
+    unit : str
+        One of None, 'auto', or a :ref:`datetime unit <arrays.dtypes.dateunits>`.
+    timezone : {'naive', 'UTC', 'local'} or tzinfo
+        Timezone information to use when displaying the datetime. If 'UTC', end
+        with a Z to indicate UTC time. If 'local', convert to the local timezone
+        first, and suffix with a +-#### timezone offset. If a tzinfo object,
+        then do as with 'local', but use the specified timezone.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}
+        Casting to allow when changing between datetime units.
+
+    Returns
+    -------
+    str_arr : ndarray
+        An array of strings the same shape as `arr`.
+
+    Examples
+    --------
+    >>> d = np.arange('2002-10-27T04:30', 4*60, 60, dtype='M8[m]')
+    >>> d
+    array(['2002-10-27T04:30', '2002-10-27T05:30', '2002-10-27T06:30',
+           '2002-10-27T07:30'], dtype='datetime64[m]')
+
+    Setting the timezone to UTC shows the same information, but with a Z suffix
+
+    >>> np.datetime_as_string(d, timezone='UTC')
+    array(['2002-10-27T04:30Z', '2002-10-27T05:30Z', '2002-10-27T06:30Z',
+           '2002-10-27T07:30Z'], dtype='<U35')
+
+    Note that we picked datetimes that cross a DST boundary. Passing in a
+    ``pytz`` timezone object will print the appropriate offset
+
+    >>> np.datetime_as_string(d, timezone=pytz.timezone('US/Eastern'))
+    array(['2002-10-27T00:30-0400', '2002-10-27T01:30-0400',
+           '2002-10-27T01:30-0500', '2002-10-27T02:30-0500'], dtype='<U39')
+
+    Passing in a unit will change the precision
+
+    >>> np.datetime_as_string(d, unit='h')
+    array(['2002-10-27T04', '2002-10-27T05', '2002-10-27T06', '2002-10-27T07'],
+          dtype='<U32')
+    >>> np.datetime_as_string(d, unit='s')
+    array(['2002-10-27T04:30:00', '2002-10-27T05:30:00', '2002-10-27T06:30:00',
+           '2002-10-27T07:30:00'], dtype='<U38')
+
+    'casting' can be used to specify whether precision can be changed
+
+    >>> np.datetime_as_string(d, unit='h', casting='safe')
+    TypeError: Cannot create a datetime string as units 'h' from a NumPy
+    datetime with units 'm' according to the rule 'safe'
+    """
+    return _multiarray_umath.datetime_as_string(arr, unit, timezone, casting)
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index 3ff9ceef0..2fb841f7c 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -116,8 +116,8 @@ from ._type_aliases import (
     _concrete_types,
     _concrete_typeinfo,
     _bits_of,
-    _kind_to_stem,
 )
+from ._dtype import _kind_name
 
 # we don't export these for import *, but we do want them accessible
 # as numerictypes.bool, etc.
@@ -181,8 +181,7 @@ def maximum_sctype(t):
     if g is None:
         return t
     t = g
-    bits = _bits_of(t)
-    base = _kind_to_stem[dtype(t).kind]
+    base = _kind_name(dtype(t))
     if base in sctypes:
         return sctypes[base][-1]
     else:
diff --git a/numpy/core/overrides.py b/numpy/core/overrides.py
index 906292613..5be60cd29 100644
--- a/numpy/core/overrides.py
+++ b/numpy/core/overrides.py
@@ -5,7 +5,7 @@ TODO: rewrite this in C for performance.
 import collections
 import functools
 
-from numpy.core.multiarray import ndarray
+from numpy.core._multiarray_umath import ndarray
 from numpy.compat._inspect import getargspec
 
 
@@ -71,8 +71,8 @@ def array_function_implementation_or_override(
         Function that implements the operation on NumPy array without
         overrides when called like ``implementation(*args, **kwargs)``.
     public_api : function
-        Function exposed by NumPy's public API riginally called like
-        ``public_api(*args, **kwargs`` on which arguments are now being
+        Function exposed by NumPy's public API originally called like
+        ``public_api(*args, **kwargs)`` on which arguments are now being
         checked.
     relevant_args : iterable
         Iterable of arguments to check for __array_function__ methods.
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index bea9ff392..fc15fe59f 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -737,6 +737,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'ucsnarrow.h'),
             join('src', 'common', 'ufunc_override.h'),
             join('src', 'common', 'umathmodule.h'),
+            join('src', 'common', 'numpyos.h'),
             ]
 
     common_src = [
@@ -746,6 +747,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'templ_common.h.src'),
             join('src', 'common', 'ucsnarrow.c'),
             join('src', 'common', 'ufunc_override.c'),
+            join('src', 'common', 'numpyos.c'),
             ]
 
     blas_info = get_info('blas_opt', 0)
@@ -785,7 +787,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'multiarraymodule.h'),
             join('src', 'multiarray', 'nditer_impl.h'),
             join('src', 'multiarray', 'number.h'),
-            join('src', 'multiarray', 'numpyos.h'),
             join('src', 'multiarray', 'refcount.h'),
             join('src', 'multiarray', 'scalartypes.h'),
             join('src', 'multiarray', 'sequence.h'),
@@ -851,7 +852,6 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'nditer_constr.c'),
             join('src', 'multiarray', 'nditer_pywrap.c'),
             join('src', 'multiarray', 'number.c'),
-            join('src', 'multiarray', 'numpyos.c'),
             join('src', 'multiarray', 'refcount.c'),
             join('src', 'multiarray', 'sequence.c'),
             join('src', 'multiarray', 'shape.c'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index e637dbc20..f837df112 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -41,7 +41,8 @@ C_ABI_VERSION = 0x01000009
 # 0x0000000b - 1.13.x
 # 0x0000000c - 1.14.x
 # 0x0000000c - 1.15.x
-C_API_VERSION = 0x0000000c
+# 0x0000000d - 1.16.x
+C_API_VERSION = 0x0000000d
 
 class MismatchCAPIWarning(Warning):
     pass
diff --git a/numpy/core/src/multiarray/numpyos.c b/numpy/core/src/common/numpyos.c
index 52dcbf3c8..d60b1ca17 100644
--- a/numpy/core/src/multiarray/numpyos.c
+++ b/numpy/core/src/common/numpyos.c
@@ -769,3 +769,31 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value)
     }
     return r;
 }
+
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOLL
+    return strtoll(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoi64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtol(str, endptr, base);
+#endif
+}
+
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOULL
+    return strtoull(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoui64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtoul(str, endptr, base);
+#endif
+}
+
+
diff --git a/numpy/core/src/multiarray/numpyos.h b/numpy/core/src/common/numpyos.h
index 7ca795a6f..4deed8400 100644
--- a/numpy/core/src/multiarray/numpyos.h
+++ b/numpy/core/src/common/numpyos.h
@@ -31,4 +31,11 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value);
 NPY_NO_EXPORT int
 NumPyOS_ascii_isspace(int c);
 
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base);
+
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base);
 #endif
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 0e69cfc07..46a3ffb3d 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -150,32 +150,6 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
 
 /**end repeat**/
 
-static npy_longlong
-npy_strtoll(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOLL
-    return strtoll(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoi64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtol(str, endptr, base);
-#endif
-}
-
-static npy_ulonglong
-npy_strtoull(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOULL
-    return strtoull(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoui64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtoul(str, endptr, base);
-#endif
-}
-
 /*
  *****************************************************************************
  **                         GETITEM AND SETITEM                             **
@@ -1796,8 +1770,8 @@ BOOL_scan(FILE *fp, npy_bool *ip, void *NPY_UNUSED(ignore),
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *         npy_datetime, npy_timedelta#
- * #func = (PyOS_strtol, PyOS_strtoul)*4, npy_strtoll, npy_strtoull,
- *         npy_strtoll*2#
+ * #func = (PyOS_strtol, PyOS_strtoul)*4, NumPyOS_strtoll, NumPyOS_strtoull,
+ *         NumPyOS_strtoll*2#
  * #btype = (npy_long, npy_ulong)*4, npy_longlong, npy_ulonglong,
  *          npy_longlong*2#
  */
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 7f837901c..a8550d958 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -2845,6 +2845,16 @@ convert_pyobject_to_timedelta(PyArray_DatetimeMetaData *meta, PyObject *obj,
         *out = NPY_DATETIME_NAT;
         return 0;
     }
+    else if (PyArray_IsScalar(obj, Integer)) {
+        /* Use the default unit if none was specified */
+        if (meta->base == NPY_FR_ERROR) {
+            meta->base = NPY_DATETIME_DEFAULTUNIT;
+            meta->num = 1;
+        }
+
+        *out = PyLong_AsLongLong(obj);
+        return 0;
+    }
     else {
         PyErr_SetString(PyExc_ValueError,
                 "Could not convert object to NumPy timedelta");
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 3ac71e285..30820737e 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -89,11 +89,19 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
             return NULL;
         }
 
+        if (PyArray_BASE(self) != NULL
+              || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+            PyErr_SetString(PyExc_ValueError,
+                    "cannot resize an array that "
+                    "references or is referenced\n"
+                    "by another array in this way. Use the np.resize function.");
+            return NULL;
+        }
         if (refcheck) {
 #ifdef PYPY_VERSION
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array with refcheck=True on PyPy.\n"
-                    "Use the resize function or refcheck=False");
+                    "Use the np.resize function or refcheck=False");
             return NULL;
 #else
             refcnt = PyArray_REFCOUNT(self);
@@ -102,13 +110,12 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
         else {
             refcnt = 1;
         }
-        if ((refcnt > 2)
-                || (PyArray_BASE(self) != NULL)
-                || (((PyArrayObject_fields *)self)->weakreflist != NULL)) {
+        if (refcnt > 2) {
             PyErr_SetString(PyExc_ValueError,
                     "cannot resize an array that "
                     "references or is referenced\n"
-                    "by another array in this way.  Use the resize function");
+                    "by another array in this way.\n"
+                    "Use the np.resize function or refcheck=False");
             return NULL;
         }
 
diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src
index 8b5eef87a..d960838c8 100644
--- a/numpy/core/src/npymath/ieee754.c.src
+++ b/numpy/core/src/npymath/ieee754.c.src
@@ -568,13 +568,21 @@ int npy_get_floatstatus() {
 
 /*
  * Functions to set the floating point status word.
- * keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
  */
 
 #if (defined(__unix__) || defined(unix)) && !defined(USG)
 #include <sys/param.h>
 #endif
 
+
+/*
+ * Define floating point status functions. We must define
+ * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier,
+ * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid}
+ * for all supported platforms.
+ */
+
+
 /* Solaris --------------------------------------------------------*/
 /* --------ignoring SunOS ieee_flags approach, someone else can
 **         deal with that! */
@@ -626,117 +634,94 @@ void npy_set_floatstatus_invalid(void)
     fpsetsticky(FP_X_INV);
 }
 
+#elif defined(_AIX)
+#include <float.h>
+#include <fpxcp.h>
 
-#elif defined(__GLIBC__) || defined(__APPLE__) || \
-      defined(__CYGWIN__) || defined(__MINGW32__) || \
-      (defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
-#  include <fenv.h>
-
-int npy_get_floatstatus_barrier(char* param)
+int npy_get_floatstatus_barrier(char *param)
 {
-    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                                FE_UNDERFLOW | FE_INVALID);
+    int fpstatus = fp_read_flag();
     /*
      * By using a volatile, the compiler cannot reorder this call
      */
     if (param != NULL) {
         volatile char NPY_UNUSED(c) = *(char*)param;
     }
-
-    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
+    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
 int npy_clear_floatstatus_barrier(char * param)
 {
-    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    if (fpstatus != 0) {
-        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
-                      FE_UNDERFLOW | FE_INVALID);
-    }
+    fp_swap_flag(0);
 
     return fpstatus;
 }
 
-
 void npy_set_floatstatus_divbyzero(void)
 {
-    feraiseexcept(FE_DIVBYZERO);
+    fp_raise_xcp(FP_DIV_BY_ZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    feraiseexcept(FE_OVERFLOW);
+    fp_raise_xcp(FP_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    feraiseexcept(FE_UNDERFLOW);
+    fp_raise_xcp(FP_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    feraiseexcept(FE_INVALID);
-}
-
-#elif defined(_AIX)
-#include <float.h>
-#include <fpxcp.h>
-
-int npy_get_floatstatus_barrier(char *param)
-{
-    int fpstatus = fp_read_flag();
-    /*
-     * By using a volatile, the compiler cannot reorder this call
-     */
-    if (param != NULL) {
-        volatile char NPY_UNUSED(c) = *(char*)param;
-    }
-    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
-           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
-           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
-           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
+    fp_raise_xcp(FP_INVALID);
 }
 
-int npy_clear_floatstatus_barrier(char * param)
-{
-    int fpstatus = npy_get_floatstatus_barrier(param);
-    fp_swap_flag(0);
+#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha))
 
-    return fpstatus;
-}
+/*
+ * By using a volatile floating point value,
+ * the compiler is forced to actually do the requested
+ * operations because of potential concurrency.
+ *
+ * We shouldn't write multiple values to a single
+ * global here, because that would cause
+ * a race condition.
+ */
+static volatile double _npy_floatstatus_x,
+    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
+    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    fp_raise_xcp(FP_DIV_BY_ZERO);
+    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    fp_raise_xcp(FP_OVERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    fp_raise_xcp(FP_UNDERFLOW);
+    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    fp_raise_xcp(FP_INVALID);
+    _npy_floatstatus_inf = NPY_INFINITY;
+    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
 }
 
-#else
-
 /* MS Windows -----------------------------------------------------*/
 #if defined(_MSC_VER)
 
 #include <float.h>
 
-
 int npy_get_floatstatus_barrier(char *param)
 {
     /*
@@ -796,53 +781,61 @@ int npy_clear_floatstatus_barrier(char *param)
     return fpstatus;
 }
 
+#endif
+/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */
+
 #else
+/* General GCC code, should work on most platforms */
+#  include <fenv.h>
 
-int npy_get_floatstatus_barrier(char *NPY_UNUSED(param))
+int npy_get_floatstatus_barrier(char* param)
 {
-    return 0;
+    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                                FE_UNDERFLOW | FE_INVALID);
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
+
+    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
+           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
+           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
+           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus_barrier(char *param)
+int npy_clear_floatstatus_barrier(char * param)
 {
+    /* testing float status is 50-100 times faster than clearing on x86 */
     int fpstatus = npy_get_floatstatus_barrier(param);
-    return 0;
-}
+    if (fpstatus != 0) {
+        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
+                      FE_UNDERFLOW | FE_INVALID);
+    }
 
-#endif
+    return fpstatus;
+}
 
-/*
- * By using a volatile floating point value,
- * the compiler is forced to actually do the requested
- * operations because of potential concurrency.
- *
- * We shouldn't write multiple values to a single
- * global here, because that would cause
- * a race condition.
- */
-static volatile double _npy_floatstatus_x,
-    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
-    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
 
 void npy_set_floatstatus_divbyzero(void)
 {
-    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
+    feraiseexcept(FE_DIVBYZERO);
 }
 
 void npy_set_floatstatus_overflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
+    feraiseexcept(FE_OVERFLOW);
 }
 
 void npy_set_floatstatus_underflow(void)
 {
-    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
+    feraiseexcept(FE_UNDERFLOW);
 }
 
 void npy_set_floatstatus_invalid(void)
 {
-    _npy_floatstatus_inf = NPY_INFINITY;
-    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
+    feraiseexcept(FE_INVALID);
 }
 
 #endif
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index fcbdbe330..8cb74f177 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -128,6 +128,8 @@ static void
 /**end repeat**/
 
 char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
+/* for use with matrix_multiply code, but different signature */
+char *matmul_signature = "(m?,n),(n,p?)->(m?,p?)";
 
 /**begin repeat
 
@@ -195,6 +197,45 @@ static void
 
 /**end repeat**/
 
+char *cross1d_signature = "(3),(3)->(3)";
+
+/**begin repeat
+
+   #TYPE=LONG,DOUBLE#
+   #typ=npy_long, npy_double#
+*/
+
+/*
+ *  This implements the cross product:
+ *        out[n, 0] = in1[n, 1]*in2[n, 2] - in1[n, 2]*in2[n, 1]
+ *        out[n, 1] = in1[n, 2]*in2[n, 0] - in1[n, 0]*in2[n, 2]
+ *        out[n, 2] = in1[n, 0]*in2[n, 1] - in1[n, 1]*in2[n, 0]
+ */
+static void
+@TYPE@_cross1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_3
+    npy_intp is1=steps[0], is2=steps[1], os = steps[2];
+    BEGIN_OUTER_LOOP_3
+        @typ@ i1_x = *(@typ@ *)(args[0] + 0*is1);
+        @typ@ i1_y = *(@typ@ *)(args[0] + 1*is1);
+        @typ@ i1_z = *(@typ@ *)(args[0] + 2*is1);
+
+        @typ@ i2_x = *(@typ@ *)(args[1] + 0*is2);
+        @typ@ i2_y = *(@typ@ *)(args[1] + 1*is2);
+        @typ@ i2_z = *(@typ@ *)(args[1] + 2*is2);
+        char *op = args[2];
+
+        *(@typ@ *)op = i1_y * i2_z - i1_z * i2_y;
+        op += os;
+        *(@typ@ *)op = i1_z * i2_x - i1_x * i2_z;
+        op += os;
+        *(@typ@ *)op = i1_x * i2_y - i1_y * i2_x;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
 char *euclidean_pdist_signature = "(n,d)->(p)";
 
 /**begin repeat
@@ -285,17 +326,39 @@ static void
 
 /**end repeat**/
 
+/*  The following lines were generated using a slightly modified
+    version of code_generators/generate_umath.py and adding these
+    lines to defdict:
+
+defdict = {
+'inner1d' :
+    Ufunc(2, 1, None_,
+        r'''inner on the last dimension and broadcast on the rest \n"
+        "     \"(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+'innerwt' :
+    Ufunc(3, 1, None_,
+        r'''inner1d with a weight argument \n"
+        "     \"(i),(i),(i)->()\" \n''',
+        TD('ld'),
+        ),
+}
+
+*/
 
 static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d };
-static void * inner1d_data[] = { (void *)NULL, (void *)NULL };
+static void *inner1d_data[] = { (void *)NULL, (void *)NULL };
 static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction innerwt_functions[] = { LONG_innerwt, DOUBLE_innerwt };
-static void * innerwt_data[] = { (void *)NULL, (void *)NULL };
+static void *innerwt_data[] = { (void *)NULL, (void *)NULL };
 static char innerwt_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multiply, FLOAT_matrix_multiply, DOUBLE_matrix_multiply };
 static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
 static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG,  NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,  NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
-
+static PyUFuncGenericFunction cross1d_functions[] = { LONG_cross1d, DOUBLE_cross1d };
+static void *cross1d_data[] = { (void *)NULL, (void *)NULL };
+static char cross1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 static PyUFuncGenericFunction euclidean_pdist_functions[] =
                             { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist };
 static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL };
@@ -303,7 +366,7 @@ static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT,
                                              NPY_DOUBLE, NPY_DOUBLE };
 
 static PyUFuncGenericFunction cumsum_functions[] = { LONG_cumsum, DOUBLE_cumsum };
-static void * cumsum_data[] = { (void *)NULL, (void *)NULL };
+static void *cumsum_data[] = { (void *)NULL, (void *)NULL };
 static char cumsum_signatures[] = { NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE };
 
 
@@ -346,6 +409,17 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "matrix_multiply", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions,
+                    matrix_multiply_data, matrix_multiply_signatures,
+                    3, 2, 1, PyUFunc_None, "matmul",
+                    "matmul on last two dimensions, with some being optional\n"
+                    "     \"(m?,n),(n,p?)->(m?,p?)\" \n",
+                    0, matmul_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "matmul", f);
+    Py_DECREF(f);
     f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions,
                     eucldiean_pdist_data, euclidean_pdist_signatures,
                     2, 1, 1, PyUFunc_None, "euclidean_pdist",
@@ -376,6 +450,16 @@ addUfuncs(PyObject *dictionary) {
     }
     PyDict_SetItemString(dictionary, "inner1d_no_doc", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(cross1d_functions, cross1d_data,
+                    cross1d_signatures, 2, 2, 1, PyUFunc_None, "cross1d",
+                    "cross product on the last dimension and broadcast on the rest \n"\
+                    "     \"(3),(3)->(3)\" \n",
+                    0, cross1d_signature);
+    if (f == NULL) {
+        return -1;
+    }
+    PyDict_SetItemString(dictionary, "cross1d", f);
+    Py_DECREF(f);
 
     return 0;
 }
@@ -385,9 +469,10 @@ static PyObject *
 UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
 {
     int nin, nout, i;
-    PyObject *signature, *sig_str;
-    PyUFuncObject *f = NULL;
-    PyObject *core_num_dims = NULL, *core_dim_ixs = NULL;
+    PyObject *signature=NULL, *sig_str=NULL;
+    PyUFuncObject *f=NULL;
+    PyObject *core_num_dims=NULL, *core_dim_ixs=NULL;
+    PyObject *core_dim_flags=NULL, *core_dim_sizes=NULL;
     int core_enabled;
     int core_num_ixs = 0;
 
@@ -442,7 +527,7 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
             goto fail;
         }
         for (i = 0; i < core_num_ixs; i++) {
-            PyObject * val = PyLong_FromLong(f->core_dim_ixs[i]);
+            PyObject *val = PyLong_FromLong(f->core_dim_ixs[i]);
             PyTuple_SET_ITEM(core_dim_ixs, i, val);
         }
     }
@@ -450,13 +535,44 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
         Py_INCREF(Py_None);
         core_dim_ixs = Py_None;
     }
+    if (f->core_dim_flags != NULL) {
+        core_dim_flags = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_flags == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_flags[i]);
+            PyTuple_SET_ITEM(core_dim_flags, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_flags = Py_None;
+    }
+    if (f->core_dim_sizes != NULL) {
+        core_dim_sizes = PyTuple_New(f->core_num_dim_ix);
+        if (core_dim_sizes == NULL) {
+            goto fail;
+        }
+        for (i = 0; i < f->core_num_dim_ix; i++) {
+            PyObject *val = PyLong_FromLong(f->core_dim_sizes[i]);
+            PyTuple_SET_ITEM(core_dim_sizes, i, val);
+        }
+    }
+    else {
+        Py_INCREF(Py_None);
+        core_dim_sizes = Py_None;
+    }
     Py_DECREF(f);
-    return Py_BuildValue("iOO", core_enabled, core_num_dims, core_dim_ixs);
+    return Py_BuildValue("iOOOO", core_enabled, core_num_dims,
+                         core_dim_ixs, core_dim_flags, core_dim_sizes);
 
 fail:
     Py_XDECREF(f);
     Py_XDECREF(core_num_dims);
     Py_XDECREF(core_dim_ixs);
+    Py_XDECREF(core_dim_flags);
+    Py_XDECREF(core_dim_sizes);
     return NULL;
 }
 
@@ -464,8 +580,8 @@ static PyMethodDef UMath_TestsMethods[] = {
     {"test_signature",  UMath_Tests_test_signature, METH_VARARGS,
      "Test signature parsing of ufunc. \n"
      "Arguments: nin nout signature \n"
-     "If fails, it returns NULL. Otherwise it will returns 0 for scalar ufunc "
-     "and 1 for generalized ufunc. \n",
+     "If fails, it returns NULL. Otherwise it returns a tuple of ufunc "
+     "internals. \n",
      },
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
@@ -504,6 +620,7 @@ PyMODINIT_FUNC init_umath_tests(void) {
     if (m == NULL) {
         return RETVAL(NULL);
     }
+
     import_array();
     import_ufunc();
 
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 5c0568c12..47f9168e5 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -17,8 +17,6 @@
 
 #include "lowlevel_strided_loops.h"
 #include "numpy/npy_common.h"
-/* for NO_FLOATING_POINT_SUPPORT */
-#include "numpy/ufuncobject.h"
 #include "numpy/npy_math.h"
 #ifdef NPY_HAVE_SSE2_INTRINSICS
 #include <emmintrin.h>
@@ -132,7 +130,6 @@ abs_ptrdiff(char *a, char *b)
  * #func = sqrt, absolute, negative, minimum, maximum#
  * #check = IS_BLOCKABLE_UNARY*3, IS_BLOCKABLE_REDUCE*2 #
  * #name = unary*3, unary_reduce*2#
- * #minmax = 0*3, 1*2#
  */
 
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
@@ -146,9 +143,6 @@ sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n);
 static NPY_INLINE int
 run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
 {
-#if @minmax@ && (defined NO_FLOATING_POINT_SUPPORT)
-    return 0;
-#else
 #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
     if (@check@(sizeof(@type@), 16)) {
         sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]);
@@ -156,7 +150,6 @@ run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
     }
 #endif
     return 0;
-#endif
 }
 
 /**end repeat1**/
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 459b0a594..b82c74109 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -46,6 +46,7 @@
 #include "npy_import.h"
 #include "extobj.h"
 #include "common.h"
+#include "numpyos.h"
 
 /********** PRINTF DEBUG TRACING **************/
 #define NPY_UF_DBG_TRACING 0
@@ -480,7 +481,27 @@ _is_alnum_underscore(char ch)
 }
 
 /*
- * Return the ending position of a variable name
+ * Convert a string into a number
+ */
+static npy_intp
+_get_size(const char* str)
+{
+    char *stop;
+    npy_longlong size = NumPyOS_strtoll(str, &stop, 10);
+
+    if (stop == str || _is_alpha_underscore(*stop)) {
+        /* not a well formed number */
+         return -1;
+    }
+    if (size >= NPY_MAX_INTP || size <= NPY_MIN_INTP) {
+        /* len(str) too long */
+        return -1;
+    }
+    return size;
+ }
+
+/*
+ * Return the ending position of a variable name including optional modifier
  */
 static int
 _get_end_of_name(const char* str, int offset)
@@ -489,6 +510,9 @@ _get_end_of_name(const char* str, int offset)
     while (_is_alnum_underscore(str[ret])) {
         ret++;
     }
+    if (str[ret] == '?') {
+        ret ++;
+    }
     return ret;
 }
 
@@ -530,7 +554,6 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
                         "_parse_signature with NULL signature");
         return -1;
     }
-
     len = strlen(signature);
     ufunc->core_signature = PyArray_malloc(sizeof(char) * (len+1));
     if (ufunc->core_signature) {
@@ -546,13 +569,22 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
     ufunc->core_enabled = 1;
     ufunc->core_num_dim_ix = 0;
     ufunc->core_num_dims = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
     ufunc->core_offsets = PyArray_malloc(sizeof(int) * ufunc->nargs);
-    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL
-        || ufunc->core_offsets == NULL) {
+    /* The next three items will be shrunk later */
+    ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len);
+    ufunc->core_dim_sizes = PyArray_malloc(sizeof(npy_intp) * len);
+    ufunc->core_dim_flags = PyArray_malloc(sizeof(npy_uint32) * len);
+
+    if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL ||
+        ufunc->core_offsets == NULL ||
+        ufunc->core_dim_sizes == NULL ||
+        ufunc->core_dim_flags == NULL) {
         PyErr_NoMemory();
         goto fail;
     }
+    for (i = 0; i < len; i++) {
+        ufunc->core_dim_flags[i] = 0;
+    }
 
     i = _next_non_white_space(signature, 0);
     while (signature[i] != '\0') {
@@ -577,26 +609,70 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         i = _next_non_white_space(signature, i + 1);
         while (signature[i] != ')') {
             /* loop over core dimensions */
-            int j = 0;
-            if (!_is_alpha_underscore(signature[i])) {
-                parse_error = "expect dimension name";
+            int ix, i_end;
+            npy_intp frozen_size;
+            npy_bool can_ignore;
+
+            if (signature[i] == '\0') {
+                parse_error = "unexpected end of signature string";
                 goto fail;
             }
-            while (j < ufunc->core_num_dim_ix) {
-                if (_is_same_name(signature+i, var_names[j])) {
+            /*
+             * Is this a variable or a fixed size dimension?
+             */
+            if (_is_alpha_underscore(signature[i])) {
+                frozen_size = -1;
+            }
+            else {
+                frozen_size = (npy_intp)_get_size(signature + i);
+                if (frozen_size <= 0) {
+                    parse_error = "expect dimension name or non-zero frozen size";
+                    goto fail;
+                }
+            }
+            /* Is this dimension flexible? */
+            i_end = _get_end_of_name(signature, i);
+            can_ignore = (i_end > 0 && signature[i_end - 1] == '?');
+            /*
+             * Determine whether we already saw this dimension name,
+             * get its index, and set its properties
+             */
+            for(ix = 0; ix < ufunc->core_num_dim_ix; ix++) {
+                if (frozen_size > 0 ?
+                    frozen_size == ufunc->core_dim_sizes[ix] :
+                    _is_same_name(signature + i, var_names[ix])) {
                     break;
                 }
-                j++;
             }
-            if (j >= ufunc->core_num_dim_ix) {
-                var_names[j] = signature+i;
+            /*
+             * If a new dimension, store its properties; if old, check consistency.
+             */
+            if (ix == ufunc->core_num_dim_ix) {
                 ufunc->core_num_dim_ix++;
+                var_names[ix] = signature + i;
+                ufunc->core_dim_sizes[ix] = frozen_size;
+                if (frozen_size < 0) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_SIZE_INFERRED;
+                }
+                if (can_ignore) {
+                    ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_CAN_IGNORE;
+                }
+            } else {
+                if (can_ignore && !(ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? cannot be used, name already seen without ?";
+                    goto fail;
+                }
+                if (!can_ignore && (ufunc->core_dim_flags[ix] &
+                                    UFUNC_CORE_DIM_CAN_IGNORE)) {
+                    parse_error = "? must be used, name already seen with ?";
+                    goto fail;
+                }
             }
-            ufunc->core_dim_ixs[cur_core_dim] = j;
+            ufunc->core_dim_ixs[cur_core_dim] = ix;
             cur_core_dim++;
             nd++;
-            i = _get_end_of_name(signature, i);
-            i = _next_non_white_space(signature, i);
+            i = _next_non_white_space(signature, i_end);
             if (signature[i] != ',' && signature[i] != ')') {
                 parse_error = "expect ',' or ')'";
                 goto fail;
@@ -633,7 +709,14 @@ _parse_signature(PyUFuncObject *ufunc, const char *signature)
         goto fail;
     }
     ufunc->core_dim_ixs = PyArray_realloc(ufunc->core_dim_ixs,
-            sizeof(int)*cur_core_dim);
+            sizeof(int) * cur_core_dim);
+    ufunc->core_dim_sizes = PyArray_realloc(
+            ufunc->core_dim_sizes,
+            sizeof(npy_intp) * ufunc->core_num_dim_ix);
+    ufunc->core_dim_flags = PyArray_realloc(
+            ufunc->core_dim_flags,
+            sizeof(npy_uint32) * ufunc->core_num_dim_ix);
+
     /* check for trivial core-signature, e.g. "(),()->()" */
     if (cur_core_dim == 0) {
         ufunc->core_enabled = 0;
@@ -1935,6 +2018,72 @@ fail:
 }
 
 /*
+ * Validate that operands have enough dimensions, accounting for
+ * possible flexible dimensions that may be absent.
+ */
+static int
+_validate_num_dims(PyUFuncObject *ufunc, PyArrayObject **op,
+                   npy_uint32 *core_dim_flags,
+                   int *op_core_num_dims) {
+    int i, j;
+    int nin = ufunc->nin;
+    int nop = ufunc->nargs;
+
+    for (i = 0; i < nop; i++) {
+        if (op[i] != NULL) {
+            int op_ndim = PyArray_NDIM(op[i]);
+
+            if (op_ndim < op_core_num_dims[i]) {
+                int core_offset = ufunc->core_offsets[i];
+                /* We've too few, but some dimensions might be flexible */
+                for (j = core_offset;
+                     j < core_offset + ufunc->core_num_dims[i]; j++) {
+                    int core_dim_index = ufunc->core_dim_ixs[j];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_CAN_IGNORE)) {
+                        int i1, j1, k;
+                        /*
+                         * Found a dimension that can be ignored. Flag that
+                         * it is missing, and unflag that it can be ignored,
+                         * since we are doing so already.
+                         */
+                        core_dim_flags[core_dim_index] |= UFUNC_CORE_DIM_MISSING;
+                        core_dim_flags[core_dim_index] ^= UFUNC_CORE_DIM_CAN_IGNORE;
+                        /*
+                         * Reduce the number of core dimensions for all
+                         * operands that use this one (including ours),
+                         * and check whether we're now OK.
+                         */
+                        for (i1 = 0, k=0; i1 < nop; i1++) {
+                            for (j1 = 0; j1 < ufunc->core_num_dims[i1]; j1++) {
+                                if (ufunc->core_dim_ixs[k++] == core_dim_index) {
+                                    op_core_num_dims[i1]--;
+                                }
+                            }
+                        }
+                        if (op_ndim == op_core_num_dims[i]) {
+                            break;
+                        }
+                    }
+                }
+                if (op_ndim < op_core_num_dims[i]) {
+                    PyErr_Format(PyExc_ValueError,
+                         "%s: %s operand %d does not have enough "
+                         "dimensions (has %d, gufunc core with "
+                         "signature %s requires %d)",
+                         ufunc_get_name_cstr(ufunc),
+                         i < nin ? "Input" : "Output",
+                         i < nin ? i : i - nin, PyArray_NDIM(op[i]),
+                         ufunc->core_signature, op_core_num_dims[i]);
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/*
  * Check whether any of the outputs of a gufunc has core dimensions.
  */
 static int
@@ -2007,7 +2156,7 @@ _check_keepdims_support(PyUFuncObject *ufunc) {
  * Returns 0 on success, and -1 on failure
  */
 static int
-_parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
+_parse_axes_arg(PyUFuncObject *ufunc, int op_core_num_dims[], PyObject *axes,
                 PyArrayObject **op, int broadcast_ndim, int **remap_axis) {
     int nin = ufunc->nin;
     int nop = ufunc->nargs;
@@ -2037,7 +2186,7 @@ _parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
         PyObject *op_axes_tuple, *axis_item;
         int axis, op_axis;
 
-        op_ncore = core_num_dims[iop];
+        op_ncore = op_core_num_dims[iop];
         if (op[iop] != NULL) {
             op_ndim = PyArray_NDIM(op[iop]);
             op_nbroadcast = op_ndim - op_ncore;
@@ -2191,57 +2340,72 @@ _parse_axis_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axis,
  *
  * Returns 0 on success, and -1 on failure
  *
- * The behavior has been changed in NumPy 1.10.0, and the following
+ * The behavior has been changed in NumPy 1.16.0, and the following
  * requirements must be fulfilled or an error will be raised:
  *  * Arguments, both input and output, must have at least as many
  *    dimensions as the corresponding number of core dimensions. In
- *    previous versions, 1's were prepended to the shape as needed.
+ *    versions before 1.10, 1's were prepended to the shape as needed.
  *  * Core dimensions with same labels must have exactly matching sizes.
- *    In previous versions, core dimensions of size 1 would broadcast
+ *    In versions before 1.10, core dimensions of size 1 would broadcast
  *    against other core dimensions with the same label.
  *  * All core dimensions must have their size specified by a passed in
- *    input or output argument. In previous versions, core dimensions in
+ *    input or output argument. In versions before 1.10, core dimensions in
  *    an output argument that were not specified in an input argument,
  *    and whose size could not be inferred from a passed in output
  *    argument, would have their size set to 1.
+ *  * Core dimensions may be fixed, new in NumPy 1.16
  */
 static int
 _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
-                   npy_intp* core_dim_sizes, int **remap_axis) {
+                   int *op_core_num_dims, npy_uint32 *core_dim_flags,
+                   npy_intp *core_dim_sizes, int **remap_axis) {
     int i;
     int nin = ufunc->nin;
     int nout = ufunc->nout;
     int nop = nin + nout;
 
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        core_dim_sizes[i] = -1;
-    }
     for (i = 0; i < nop; ++i) {
         if (op[i] != NULL) {
             int idim;
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = ufunc->core_num_dims[i];
-            int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
+            int core_start_dim = PyArray_NDIM(op[i]) - op_core_num_dims[i];
+            int dim_delta = 0;
+
+            /* checked before this routine gets called */
+            assert(core_start_dim >= 0);
+
             /*
              * Make sure every core dimension exactly matches all other core
-             * dimensions with the same label.
+             * dimensions with the same label. Note that flexible dimensions
+             * may have been removed at this point, if so, they are marked
+             * with UFUNC_CORE_DIM_MISSING.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                int core_dim_index = ufunc->core_dim_ixs[dim_offset+idim];
-                npy_intp op_dim_size = PyArray_DIM(
-                    op[i], REMAP_AXIS(i, core_start_dim+idim));
-
-                if (core_dim_sizes[core_dim_index] == -1) {
+            for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                int core_index = dim_offset + idim;
+                int core_dim_index = ufunc->core_dim_ixs[core_index];
+                npy_intp core_dim_size = core_dim_sizes[core_dim_index];
+                npy_intp op_dim_size;
+
+                /* can only happen if flexible; dimension missing altogether */
+                if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                    op_dim_size = 1;
+                    dim_delta++; /* for indexing in dimensions */
+                }
+                else {
+                    op_dim_size = PyArray_DIM(op[i],
+                             REMAP_AXIS(i, core_start_dim + idim - dim_delta));
+                }
+                if (core_dim_sizes[core_dim_index] < 0) {
                     core_dim_sizes[core_dim_index] = op_dim_size;
                 }
-                else if (op_dim_size != core_dim_sizes[core_dim_index]) {
+                else if (op_dim_size != core_dim_size) {
                     PyErr_Format(PyExc_ValueError,
                             "%s: %s operand %d has a mismatch in its "
                             "core dimension %d, with gufunc "
                             "signature %s (size %zd is different "
                             "from %zd)",
                             ufunc_get_name_cstr(ufunc), i < nin ? "Input" : "Output",
-                            i < nin ? i : i - nin, idim,
+                            i < nin ? i : i - nin, idim - dim_delta,
                             ufunc->core_signature, op_dim_size,
                             core_dim_sizes[core_dim_index]);
                     return -1;
@@ -2253,39 +2417,29 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
     /*
      * Make sure no core dimension is unspecified.
      */
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        if (core_dim_sizes[i] == -1) {
-            break;
-        }
-    }
-    if (i != ufunc->core_num_dim_ix) {
-        /*
-         * There is at least one core dimension missing, find in which
-         * operand it comes up first (it has to be an output operand).
-         */
-        const int missing_core_dim = i;
-        int out_op;
-        for (out_op = nin; out_op < nop; ++out_op) {
-            int first_idx = ufunc->core_offsets[out_op];
-            int last_idx = first_idx + ufunc->core_num_dims[out_op];
-            for (i = first_idx; i < last_idx; ++i) {
-                if (ufunc->core_dim_ixs[i] == missing_core_dim) {
-                    break;
-                }
-            }
-            if (i < last_idx) {
-                /* Change index offsets for error message */
-                out_op -= nin;
-                i -= first_idx;
-                break;
+    for (i = nin; i < nop; ++i) {
+        int idim;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim];
+
+            /* check all cases where the size has not yet been set */
+            if (core_dim_sizes[core_dim_index] < 0) {
+                /*
+                 * Oops, this dimension was never specified
+                 * (can only happen if output op not given)
+                 */
+                PyErr_Format(PyExc_ValueError,
+                        "%s: Output operand %d has core dimension %d "
+                        "unspecified, with gufunc signature %s",
+                        ufunc_get_name_cstr(ufunc), i - nin, idim,
+                        ufunc->core_signature);
+                return -1;
             }
         }
-        PyErr_Format(PyExc_ValueError,
-                     "%s: Output operand %d has core dimension %d "
-                     "unspecified, with gufunc signature %s",
-                     ufunc_get_name_cstr(ufunc), out_op, i, ufunc->core_signature);
-        return -1;
     }
+
     return 0;
 }
 
@@ -2324,6 +2478,26 @@ _get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) {
     }
 }
 
+/*
+ * Copy over parts of the ufunc structure that may need to be
+ * changed during execution.  Returns 0 on success; -1 otherwise.
+ */
+static int
+_initialize_variable_parts(PyUFuncObject *ufunc,
+                           int op_core_num_dims[],
+                           npy_intp core_dim_sizes[],
+                           npy_uint32 core_dim_flags[]) {
+    int i;
+
+    for (i = 0; i < ufunc->nargs; i++) {
+        op_core_num_dims[i] = ufunc->core_num_dims[i];
+    }
+    for (i = 0; i < ufunc->core_num_dim_ix; i++) {
+        core_dim_sizes[i] = ufunc->core_dim_sizes[i];
+        core_dim_flags[i] = ufunc->core_dim_flags[i];
+    }
+    return 0;
+}
 
 static int
 PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
@@ -2340,10 +2514,10 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 
     /* Use remapped axes for generalized ufunc */
     int broadcast_ndim, iter_ndim;
-    int core_num_dims_array[NPY_MAXARGS];
-    int *core_num_dims;
+    int op_core_num_dims[NPY_MAXARGS];
     int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
     int *op_axes[NPY_MAXARGS];
+    npy_uint32 core_dim_flags[NPY_MAXARGS];
 
     npy_uint32 op_flags[NPY_MAXARGS];
     npy_intp iter_shape[NPY_MAXARGS];
@@ -2398,6 +2572,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         dtypes[i] = NULL;
         arr_prep[i] = NULL;
     }
+    /* Initialize possibly variable parts to the values from the ufunc */
+    retval = _initialize_variable_parts(ufunc, op_core_num_dims,
+                                        core_dim_sizes, core_dim_flags);
+    if (retval < 0) {
+        goto fail;
+    }
 
     NPY_UF_DBG_PRINT("Getting arguments\n");
 
@@ -2429,41 +2609,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         }
     }
     /*
-     * If keepdims is set and true, signal all dimensions will be the same.
+     * If keepdims is set and true, which means all input dimensions are
+     * the same, signal that all output dimensions will be the same too.
      */
     if (keepdims == 1) {
-        int num_dims = ufunc->core_num_dims[0];
-        for (i = 0; i < nop; ++i) {
-            core_num_dims_array[i] = num_dims;
+        int num_dims = op_core_num_dims[0];
+        for (i = nin; i < nop; ++i) {
+            op_core_num_dims[i] = num_dims;
         }
-        core_num_dims = core_num_dims_array;
     }
     else {
         /* keepdims was not set or was false; no adjustment necessary */
-        core_num_dims = ufunc->core_num_dims;
         keepdims = 0;
     }
     /*
      * Check that operands have the minimum dimensions required.
      * (Just checks core; broadcast dimensions are tested by the iterator.)
      */
-    for (i = 0; i < nop; i++) {
-        if (op[i] != NULL && PyArray_NDIM(op[i]) < core_num_dims[i]) {
-            PyErr_Format(PyExc_ValueError,
-                         "%s: %s operand %d does not have enough "
-                         "dimensions (has %d, gufunc core with "
-                         "signature %s requires %d)",
-                         ufunc_name,
-                         i < nin ? "Input" : "Output",
-                         i < nin ? i : i - nin,
-                         PyArray_NDIM(op[i]),
-                         ufunc->core_signature,
-                         core_num_dims[i]);
-            retval = -1;
-            goto fail;
-        }
+    retval = _validate_num_dims(ufunc, op, core_dim_flags,
+                                op_core_num_dims);
+    if (retval < 0) {
+        goto fail;
     }
-
     /*
      * Figure out the number of iteration dimensions, which
      * is the broadcast result of all the input non-core
@@ -2471,30 +2638,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     broadcast_ndim = 0;
     for (i = 0; i < nin; ++i) {
-        int n = PyArray_NDIM(op[i]) - core_num_dims[i];
+        int n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         if (n > broadcast_ndim) {
             broadcast_ndim = n;
         }
     }
 
-    /*
-     * Figure out the number of iterator creation dimensions,
-     * which is the broadcast dimensions + all the core dimensions of
-     * the outputs, so that the iterator can allocate those output
-     * dimensions following the rules of order='F', for example.
-     */
-    iter_ndim = broadcast_ndim;
-    for (i = nin; i < nop; ++i) {
-        iter_ndim += core_num_dims[i];
-    }
-    if (iter_ndim > NPY_MAXDIMS) {
-        PyErr_Format(PyExc_ValueError,
-                    "too many dimensions for generalized ufunc %s",
-                    ufunc_name);
-        retval = -1;
-        goto fail;
-    }
-
     /* Possibly remap axes. */
     if (axes != NULL || axis != NULL) {
         remap_axis = PyArray_malloc(sizeof(remap_axis[0]) * nop);
@@ -2508,11 +2657,11 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
             remap_axis[i] = remap_axis_memory + i * NPY_MAXDIMS;
         }
         if (axis) {
-            retval = _parse_axis_arg(ufunc, core_num_dims, axis, op,
+            retval = _parse_axis_arg(ufunc, op_core_num_dims, axis, op,
                                      broadcast_ndim, remap_axis);
         }
         else {
-            retval = _parse_axes_arg(ufunc, core_num_dims, axes, op,
+            retval = _parse_axes_arg(ufunc, op_core_num_dims, axes, op,
                                      broadcast_ndim, remap_axis);
         }
         if(retval < 0) {
@@ -2521,10 +2670,28 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     }
 
     /* Collect the lengths of the labelled core dimensions */
-    retval = _get_coredim_sizes(ufunc, op, core_dim_sizes, remap_axis);
+    retval = _get_coredim_sizes(ufunc, op, op_core_num_dims, core_dim_flags,
+                                core_dim_sizes, remap_axis);
     if(retval < 0) {
         goto fail;
     }
+    /*
+     * Figure out the number of iterator creation dimensions,
+     * which is the broadcast dimensions + all the core dimensions of
+     * the outputs, so that the iterator can allocate those output
+     * dimensions following the rules of order='F', for example.
+     */
+    iter_ndim = broadcast_ndim;
+    for (i = nin; i < nop; ++i) {
+        iter_ndim += op_core_num_dims[i];
+    }
+    if (iter_ndim > NPY_MAXDIMS) {
+        PyErr_Format(PyExc_ValueError,
+                    "too many dimensions for generalized ufunc %s",
+                    ufunc_name);
+        retval = -1;
+        goto fail;
+    }
 
     /* Fill in the initial part of 'iter_shape' */
     for (idim = 0; idim < broadcast_ndim; ++idim) {
@@ -2537,11 +2704,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         int n;
 
         if (op[i]) {
-            /*
-             * Note that n may be negative if broadcasting
-             * extends into the core dimensions.
-             */
-            n = PyArray_NDIM(op[i]) - core_num_dims[i];
+            n = PyArray_NDIM(op[i]) - op_core_num_dims[i];
         }
         else {
             n = broadcast_ndim;
@@ -2565,24 +2728,49 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         /* Except for when it belongs to this output */
         if (i >= nin) {
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = core_num_dims[i];
+            int num_removed = 0;
             /*
              * Fill in 'iter_shape' and 'op_axes' for the core dimensions
              * of this output. Here, we have to be careful: if keepdims
-             * was used, then this axis is not a real core dimension,
-             * but is being added back for broadcasting, so its size is 1.
+             * was used, then the axes are not real core dimensions, but
+             * are being added back for broadcasting, so their size is 1.
+             * If the axis was removed, we should skip altogether.
              */
-            for (idim = 0; idim < num_dims; ++idim) {
-                iter_shape[j] = keepdims ? 1 : core_dim_sizes[
-                                        ufunc->core_dim_ixs[dim_offset + idim]];
-                op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
-                ++j;
+            if (keepdims) {
+                for (idim = 0; idim < op_core_num_dims[i]; ++idim) {
+                    iter_shape[j] = 1;
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
+                    ++j;
+                }
+            }
+            else {
+                for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
+                    int core_index = dim_offset + idim;
+                    int core_dim_index = ufunc->core_dim_ixs[core_index];
+                    if ((core_dim_flags[core_dim_index] &
+                         UFUNC_CORE_DIM_MISSING)) {
+                        /* skip it */
+                        num_removed++;
+                        continue;
+                    }
+                    iter_shape[j] = core_dim_sizes[ufunc->core_dim_ixs[core_index]];
+                    op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim - num_removed);
+                    ++j;
+                }
             }
         }
 
         op_axes[i] = op_axes_arrays[i];
     }
 
+#if NPY_UF_DBG_TRACING
+    printf("iter shapes:");
+    for (j=0; j < iter_ndim; j++) {
+        printf(" %ld", iter_shape[j]);
+    }
+    printf("\n");
+#endif
+
     /* Get the buffersize and errormask */
     if (_get_bufsize_errmask(extobj, ufunc_name, &buffersize, &errormask) < 0) {
         retval = -1;
@@ -2705,8 +2893,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     /* Copy the strides after the first nop */
     idim = nop;
     for (i = 0; i < nop; ++i) {
-        int num_dims = ufunc->core_num_dims[i];
-        int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
         /*
          * Need to use the arrays in the iterator, not op, because
          * a copy with a different-sized type may have been made.
@@ -2714,20 +2900,31 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         PyArrayObject *arr = NpyIter_GetOperandArray(iter)[i];
         npy_intp *shape = PyArray_SHAPE(arr);
         npy_intp *strides = PyArray_STRIDES(arr);
-        for (j = 0; j < num_dims; ++j) {
-            if (core_start_dim + j >= 0) {
-                /*
-                 * Force the stride to zero when the shape is 1, so
-                 * that the broadcasting works right.
-                 */
-                int remapped_axis = REMAP_AXIS(i, core_start_dim + j);
+        /*
+         * Could be negative if flexible dims are used, but not for
+         * keepdims, since those dimensions are allocated in arr.
+         */
+        int core_start_dim = PyArray_NDIM(arr) - op_core_num_dims[i];
+        int num_removed = 0;
+        int dim_offset = ufunc->core_offsets[i];
+
+        for (j = 0; j < ufunc->core_num_dims[i]; ++j) {
+            int core_dim_index = ufunc->core_dim_ixs[dim_offset + j];
+            /*
+             * Force zero stride when the shape is 1 (always the case for
+             * for missing dimensions), so that broadcasting works right.
+             */
+            if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) {
+                num_removed++;
+                inner_strides[idim++] = 0;
+            }
+            else {
+                int remapped_axis = REMAP_AXIS(i, core_start_dim + j - num_removed);
                 if (shape[remapped_axis] != 1) {
                     inner_strides[idim++] = strides[remapped_axis];
                 } else {
                     inner_strides[idim++] = 0;
                 }
-            } else {
-                inner_strides[idim++] = 0;
             }
         }
     }
@@ -4644,7 +4841,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
                                      int unused, const char *signature)
 {
     PyUFuncObject *ufunc;
-
     if (nin + nout > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
                      "Cannot construct a ufunc with more than %d operands "
@@ -4657,11 +4853,9 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     if (ufunc == NULL) {
         return NULL;
     }
+    memset(ufunc, 0, sizeof(PyUFuncObject));
     PyObject_Init((PyObject *)ufunc, &PyUFunc_Type);
 
-    ufunc->reserved1 = 0;
-    ufunc->reserved2 = NULL;
-
     ufunc->nin = nin;
     ufunc->nout = nout;
     ufunc->nargs = nin+nout;
@@ -4671,9 +4865,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     ufunc->data = data;
     ufunc->types = types;
     ufunc->ntypes = ntypes;
-    ufunc->ptr = NULL;
-    ufunc->obj = NULL;
-    ufunc->userloops=NULL;
 
     /* Type resolution and inner loop selection functions */
     ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;
@@ -4694,15 +4885,6 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     }
     memset(ufunc->op_flags, 0, sizeof(npy_uint32)*ufunc->nargs);
 
-    ufunc->iter_flags = 0;
-
-    /* generalized ufunc */
-    ufunc->core_enabled = 0;
-    ufunc->core_num_dim_ix = 0;
-    ufunc->core_num_dims = NULL;
-    ufunc->core_dim_ixs = NULL;
-    ufunc->core_offsets = NULL;
-    ufunc->core_signature = NULL;
     if (signature != NULL) {
         if (_parse_signature(ufunc, signature) != 0) {
             Py_DECREF(ufunc);
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index fe0e425fd..e4446e07f 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -257,6 +257,21 @@ class TestDateTime(object):
         arr = np.array([dt, dt]).astype('datetime64')
         assert_equal(arr.dtype, np.dtype('M8[us]'))
 
+    @pytest.mark.parametrize("unit", [
+    # test all date / time units and use
+    # "generic" to select generic unit
+    ("Y"), ("M"), ("W"), ("D"), ("h"), ("m"),
+    ("s"), ("ms"), ("us"), ("ns"), ("ps"),
+    ("fs"), ("as"), ("generic") ])
+    def test_timedelta_np_int_construction(self, unit):
+        # regression test for gh-7617
+        if unit != "generic":
+            assert_equal(np.timedelta64(np.int64(123), unit),
+                         np.timedelta64(123, unit))
+        else:
+            assert_equal(np.timedelta64(np.int64(123)),
+                         np.timedelta64(123))
+
     def test_timedelta_scalar_construction(self):
         # Construct with different units
         assert_equal(np.timedelta64(7, 'D'),
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 8cd0f4d92..4b2a38990 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -4829,6 +4829,12 @@ class TestResize(object):
         x_view.resize((0, 10))
         x_view.resize((0, 100))
 
+    def test_check_weakref(self):
+        x = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+        xref = weakref.ref(x)
+        assert_raises(ValueError, x.resize, (5, 1))
+        del xref  # avoid pyflakes unused variable warning.
+
 
 class TestRecord(object):
     def test_field_rename(self):
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 3881d3cb1..b83b8ccff 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -288,27 +288,96 @@ class TestUfunc(object):
         """
         pass
 
+    # from include/numpy/ufuncobject.h
+    size_inferred = 2
+    can_ignore = 4
     def test_signature0(self):
         # the arguments to test_signature are: nin, nout, core_signature
-        # pass
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i),(i)->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i),(i)->()")
         assert_equal(enabled, 1)
         assert_equal(num_dims, (1,  1,  0))
         assert_equal(ixs, (0, 0))
+        assert_equal(flags, (self.size_inferred,))
+        assert_equal(sizes, (-1,))
 
     def test_signature1(self):
         # empty core signature; treat as plain ufunc (with trivial core)
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(),()->()")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(),()->()")
         assert_equal(enabled, 0)
         assert_equal(num_dims, (0,  0,  0))
         assert_equal(ixs, ())
+        assert_equal(flags, ())
+        assert_equal(sizes, ())
 
     def test_signature2(self):
         # more complicated names for variables
-        enabled, num_dims, ixs = umt.test_signature(2, 1, "(i1,i2),(J_1)->(_kAB)")
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(i1,i2),(J_1)->(_kAB)")
         assert_equal(enabled, 1)
         assert_equal(num_dims, (2, 1, 1))
         assert_equal(ixs, (0, 1, 2, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature3(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, u"(i1, i12),   (J_1)->(i12, i2)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 1, 2))
+        assert_equal(ixs, (0, 1, 2, 1, 3))
+        assert_equal(flags, (self.size_inferred,)*4)
+        assert_equal(sizes, (-1, -1, -1, -1))
+
+    def test_signature4(self):
+        # matrix_multiply signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n,k),(k,m)->(n,m)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred,)*3)
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature5(self):
+        # matmul signature from _umath_tests
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            2, 1, "(n?,k),(k,m?)->(n?,m?)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (2, 2, 2))
+        assert_equal(ixs, (0, 1, 1, 2, 0, 2))
+        assert_equal(flags, (self.size_inferred | self.can_ignore,
+                             self.size_inferred,
+                             self.size_inferred | self.can_ignore))
+        assert_equal(sizes, (-1, -1, -1))
+
+    def test_signature6(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            1, 1, "(3)->()")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 0))
+        assert_equal(ixs, (0,))
+        assert_equal(flags, (0,))
+        assert_equal(sizes, (3,))
+
+    def test_signature7(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3),(03,3),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (0, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
+
+    def test_signature8(self):
+        enabled, num_dims, ixs, flags, sizes = umt.test_signature(
+            3, 1, "(3?),(3?,3?),(n)->(9)")
+        assert_equal(enabled, 1)
+        assert_equal(num_dims, (1, 2, 1, 1))
+        assert_equal(ixs, (0, 0, 0, 1, 2))
+        assert_equal(flags, (self.can_ignore, self.size_inferred, 0))
+        assert_equal(sizes, (3, -1, 9))
 
     def test_signature_failure0(self):
         # in the following calls, a ValueError should be raised because
@@ -874,6 +943,89 @@ class TestUfunc(object):
         w = np.array([], dtype='f8')
         assert_array_equal(umt.innerwt(a, b, w), np.sum(a*b*w, axis=-1))
 
+    def test_cross1d(self):
+        """Test with fixed-sized signature."""
+        a = np.eye(3)
+        assert_array_equal(umt.cross1d(a, a), np.zeros((3, 3)))
+        out = np.zeros((3, 3))
+        result = umt.cross1d(a[0], a, out)
+        assert_(result is out)
+        assert_array_equal(result, np.vstack((np.zeros(3), a[2], -a[1])))
+        assert_raises(ValueError, umt.cross1d, np.eye(4), np.eye(4))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(4.))
+        assert_raises(ValueError, umt.cross1d, a, np.arange(3.), np.zeros((3, 4)))
+
+    def test_can_ignore_signature(self):
+        # Comparing the effects of ? in signature:
+        # matrix_multiply: (m,n),(n,p)->(m,p)    # all must be there.
+        # matmul:        (m?,n),(n,p?)->(m?,p?)  # allow missing m, p.
+        mat = np.arange(12).reshape((2, 3, 2))
+        single_vec = np.arange(2)
+        col_vec = single_vec[:, np.newaxis]
+        col_vec_array = np.arange(8).reshape((2, 2, 2, 1)) + 1
+        # matrix @ single column vector with proper dimension
+        mm_col_vec = umt.matrix_multiply(mat, col_vec)
+        # matmul does the same thing
+        matmul_col_vec = umt.matmul(mat, col_vec)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # matrix @ vector without dimension making it a column vector.
+        # matrix multiply fails -> missing core dim.
+        assert_raises(ValueError, umt.matrix_multiply, mat, single_vec)
+        # matmul mimicker passes, and returns a vector.
+        matmul_col = umt.matmul(mat, single_vec)
+        assert_array_equal(matmul_col, mm_col_vec.squeeze())
+        # Now with a column array: same as for column vector,
+        # broadcasting sensibly.
+        mm_col_vec = umt.matrix_multiply(mat, col_vec_array)
+        matmul_col_vec = umt.matmul(mat, col_vec_array)
+        assert_array_equal(matmul_col_vec, mm_col_vec)
+        # As above, but for row vector
+        single_vec = np.arange(3)
+        row_vec = single_vec[np.newaxis, :]
+        row_vec_array = np.arange(24).reshape((4, 2, 1, 1, 3)) + 1
+        # row vector @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec, mat)
+        matmul_row_vec = umt.matmul(row_vec, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # single row vector @ matrix
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, mat)
+        matmul_row = umt.matmul(single_vec, mat)
+        assert_array_equal(matmul_row, mm_row_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_vec = umt.matrix_multiply(row_vec_array, mat)
+        matmul_row_vec = umt.matmul(row_vec_array, mat)
+        assert_array_equal(matmul_row_vec, mm_row_vec)
+        # Now for vector combinations
+        # row vector @ column vector
+        col_vec = row_vec.T
+        col_vec_array = row_vec_array.swapaxes(-2, -1)
+        mm_row_col_vec = umt.matrix_multiply(row_vec, col_vec)
+        matmul_row_col_vec = umt.matmul(row_vec, col_vec)
+        assert_array_equal(matmul_row_col_vec, mm_row_col_vec)
+        # single row vector @ single col vector
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec)
+        matmul_row_col = umt.matmul(single_vec, single_vec)
+        assert_array_equal(matmul_row_col, mm_row_col_vec.squeeze())
+        # row vector array @ matrix
+        mm_row_col_array = umt.matrix_multiply(row_vec_array, col_vec_array)
+        matmul_row_col_array = umt.matmul(row_vec_array, col_vec_array)
+        assert_array_equal(matmul_row_col_array, mm_row_col_array)
+        # Finally, check that things are *not* squeezed if one gives an
+        # output.
+        out = np.zeros_like(mm_row_col_array)
+        out = umt.matrix_multiply(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        out[:] = 0
+        out = umt.matmul(row_vec_array, col_vec_array, out=out)
+        assert_array_equal(out, mm_row_col_array)
+        # And check one cannot put missing dimensions back.
+        out = np.zeros_like(mm_row_col_vec)
+        assert_raises(ValueError, umt.matrix_multiply, single_vec, single_vec,
+                      out)
+        # But fine for matmul, since it is just a broadcast.
+        out = umt.matmul(single_vec, single_vec, out)
+        assert_array_equal(out, mm_row_col_vec.squeeze())
+
     def test_matrix_multiply(self):
         self.compare_matrix_multiply_results(np.long)
         self.compare_matrix_multiply_results(np.double)
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index 073e841e8..eba0d9ba1 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -13,7 +13,6 @@ import multiprocessing
 
 import distutils
 from distutils.errors import DistutilsError
-from distutils.msvccompiler import get_build_architecture
 try:
     from threading import local as tlocal
 except ImportError:
@@ -2336,3 +2335,9 @@ def msvc_version(compiler):
         raise ValueError("Compiler instance is not msvc (%s)"\
                          % compiler.compiler_type)
     return compiler._MSVCCompiler__version
+
+def get_build_architecture():
+    # Importing distutils.msvccompiler triggers a warning on non-Windows
+    # systems, so delay the import to here.
+    from distutils.msvccompiler import get_build_architecture
+    return get_build_architecture()
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 2f8c07114..ec62cd7a6 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -738,7 +738,7 @@ def setdiff1d(ar1, ar2, assume_unique=False):
     """
     Find the set difference of two arrays.
 
-    Return the sorted, unique values in `ar1` that are not in `ar2`.
+    Return the unique values in `ar1` that are not in `ar2`.
 
     Parameters
     ----------
@@ -753,7 +753,9 @@ def setdiff1d(ar1, ar2, assume_unique=False):
     Returns
     -------
     setdiff1d : ndarray
-        Sorted 1D array of values in `ar1` that are not in `ar2`.
+        1D array of values in `ar1` that are not in `ar2`. The result
+        is sorted when `assume_unique=False`, but otherwise only sorted
+        if the input is sorted.
 
     See Also
     --------
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index 06bb54bc1..26243d231 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -200,9 +200,6 @@ class nd_grid(object):
             else:
                 return _nx.arange(start, stop, step)
 
-    def __len__(self):
-        return 0
-
 
 class MGridClass(nd_grid):
     """
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 4b61726d2..fef06ba53 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -388,6 +388,13 @@ class TestSetOps(object):
         a = np.array((), np.uint32)
         assert_equal(setdiff1d(a, []).dtype, np.uint32)
 
+    def test_setdiff1d_unique(self):
+        a = np.array([3, 2, 1])
+        b = np.array([7, 5, 2])
+        expected = np.array([3, 1])
+        actual = setdiff1d(a, b, assume_unique=True)
+        assert_equal(actual, expected)
+
     def test_setdiff1d_char_array(self):
         a = np.array(['a', 'b', 'c'])
         b = np.array(['a', 'b', 's'])
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index fa6ad989f..1b5a71d0e 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -249,6 +249,12 @@ class TestHistogram(object):
         assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
         assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
 
+    def test_invalid_range(self):
+        # start of range must be < end of range
+        vals = np.linspace(0.0, 1.0, num=100)
+        with assert_raises_regex(ValueError, "max must be larger than"):
+            np.histogram(vals, range=[0.1, 0.01])
+
     def test_bin_edge_cases(self):
         # Ensure that floating-point computations correctly place edge cases.
         arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
@@ -265,6 +271,13 @@ class TestHistogram(object):
         hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
         assert_equal(hist[-1], 1)
 
+    def test_bin_array_dims(self):
+        # gracefully handle bins object > 1 dimension
+        vals = np.linspace(0.0, 1.0, num=100)
+        bins = np.array([[0, 0.5], [0.6, 1.0]])
+        with assert_raises_regex(ValueError, "must be 1d"):
+            np.histogram(vals, bins=bins)
+
     def test_unsigned_monotonicity_check(self):
         # Ensures ValueError is raised if bins not increasing monotonically
         # when bins contain unsigned values (see #9222)
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index 6e4cd225d..a7f5ca7db 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -461,6 +461,26 @@ class TestColumnStack(object):
     def test_non_iterable(self):
         assert_raises(TypeError, column_stack, 1)
 
+    def test_1D_arrays(self):
+        # example from docstring
+        a = np.array((1, 2, 3))
+        b = np.array((2, 3, 4))
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
+
+    def test_2D_arrays(self):
+        # same as hstack 2D docstring example
+        a = np.array([[1], [2], [3]])
+        b = np.array([[2], [3], [4]])
+        expected = np.array([[1, 2],
+                             [2, 3],
+                             [3, 4]])
+        actual = np.column_stack((a, b))
+        assert_equal(actual, expected)
+
 
 class TestDstack(object):
     def test_non_iterable(self):
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
index 320d123e7..0e94c2633 100644
--- a/numpy/linalg/tests/test_linalg.py
+++ b/numpy/linalg/tests/test_linalg.py
@@ -1835,6 +1835,14 @@ class TestMultiDot(object):
         assert_almost_equal(multi_dot([A, B, C]), A.dot(B).dot(C))
         assert_almost_equal(multi_dot([A, B, C]), np.dot(A, np.dot(B, C)))
 
+    def test_basic_function_with_two_arguments(self):
+        # separate code path with two arguments
+        A = np.random.random((6, 2))
+        B = np.random.random((2, 6))
+
+        assert_almost_equal(multi_dot([A, B]), A.dot(B))
+        assert_almost_equal(multi_dot([A, B]), np.dot(A, B))
+
     def test_basic_function_with_dynamic_programing_optimization(self):
         # multi_dot with four or more arguments uses the dynamic programing
         # optimization and therefore deserve a separate