11 files changed, 287 insertions, 81 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 316704b42..44bd48df7 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -160,7 +160,7 @@ class LineSplitter(object):
     delimiter : str, int, or sequence of ints, optional
         If a string, character used to delimit consecutive fields.
         If an integer or a sequence of integers, width(s) of each field.
-    comment : str, optional
+    comments : str, optional
         Character used to mark the beginning of a comment. Default is '#'.
     autostrip : bool, optional
         Whether to strip each individual field. Default is True.
@@ -271,7 +271,7 @@ class NameValidator(object):
     deletechars : str, optional
         A string combining invalid characters that must be deleted from the
         names.
-    casesensitive : {True, False, 'upper', 'lower'}, optional
+    case_sensitive : {True, False, 'upper', 'lower'}, optional
         * If True, field names are case-sensitive.
         * If False or 'upper', field names are converted to upper case.
         * If 'lower', field names are converted to lower case.
@@ -341,7 +341,7 @@ class NameValidator(object):
         defaultfmt : str, optional
             Default format string, used if validating a given string
             reduces its length to zero.
-        nboutput : integer, optional
+        nbfields : integer, optional
             Final number of validated names, used to expand or shrink the
             initial list of names.
 
@@ -518,12 +518,18 @@ class StringConverter(object):
     """
     #
     _mapper = [(nx.bool_, str2bool, False),
-               (nx.integer, int, -1),
-               (nx.floating, float, nx.nan),
-               (complex, _bytes_to_complex, nx.nan + 0j),
-               (nx.string_, bytes, asbytes('???'))]
+               (nx.integer, int, -1)]
+
+    # On 32-bit systems, we need to make sure that we explicitly include
+    # nx.int64 since ns.integer is nx.int32.
+    if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize:
+        _mapper.append((nx.int64, int, -1))
+
+    _mapper.extend([(nx.floating, float, nx.nan),
+                    (complex, _bytes_to_complex, nx.nan + 0j),
+                    (nx.string_, bytes, asbytes('???'))])
+
     (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
-    #
 
     @classmethod
     def _getdtype(cls, val):
@@ -677,7 +683,22 @@ class StringConverter(object):
 
     def _strict_call(self, value):
         try:
-            return self.func(value)
+
+            # We check if we can convert the value using the current function
+            new_value = self.func(value)
+
+            # In addition to having to check whether func can convert the
+            # value, we also have to make sure that we don't get overflow
+            # errors for integers.
+            if self.func is int:
+                try:
+                    np.array(value, dtype=self.type)
+                except OverflowError:
+                    raise ValueError
+
+            # We're still here so we can now return the new value
+            return new_value
+
         except ValueError:
             if value.strip() in self.missing_values:
                 if not self._status:
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index cb24eb24e..7776d7e76 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -97,10 +97,11 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
     """
     Find the unique elements of an array.
 
-    Returns the sorted unique elements of an array. There are two optional
+    Returns the sorted unique elements of an array. There are three optional
     outputs in addition to the unique elements: the indices of the input array
-    that give the unique values, and the indices of the unique array that
-    reconstruct the input array.
+    that give the unique values, the indices of the unique array that
+    reconstruct the input array, and the number of times each unique value
+    comes up in the input array.
 
     Parameters
     ----------
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 2baf83830..d58492a67 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1965,11 +1965,11 @@ def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None):
     ----------
     x : array_like
         A 1-D or 2-D array containing multiple variables and observations.
-        Each row of `m` represents a variable, and each column a single
+        Each row of `x` represents a variable, and each column a single
         observation of all those variables. Also see `rowvar` below.
     y : array_like, optional
         An additional set of variables and observations. `y` has the same
-        shape as `m`.
+        shape as `x`.
     rowvar : int, optional
         If `rowvar` is non-zero (default), then each row represents a
         variable, with observations in the columns. Otherwise, the relationship
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index eb9aad6ad..e97338106 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -480,7 +480,7 @@ class ndenumerate(object):
 
     Parameters
     ----------
-    a : ndarray
+    arr : ndarray
       Input array.
 
     See Also
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 0632ba1f8..2b01caed9 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1240,8 +1240,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         The string used to separate values.  By default, any consecutive
         whitespaces act as delimiter.  An integer or sequence of integers
         can also be provided as width(s) of each field.
-    skip_rows : int, optional
-        `skip_rows` was deprecated in numpy 1.5, and will be removed in
+    skiprows : int, optional
+        `skiprows` was deprecated in numpy 1.5, and will be removed in
         numpy 2.0. Please use `skip_header` instead.
     skip_header : int, optional
         The number of lines to skip at the beginning of the file.
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index a61b1749b..4ae1079d2 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -268,7 +268,7 @@ def izip_records(seqarrays, fill_value=None, flatten=True):
 
     Parameters
     ----------
-    seqarray : sequence of arrays
+    seqarrays : sequence of arrays
         Sequence of arrays.
     fill_value : {None, integer}
         Value used to pad shorter iterables.
@@ -683,7 +683,7 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
 
     Parameters
     ----------
-    seqarrays : array or sequence
+    arrays : array or sequence
         Sequence of input arrays.
     defaults : dictionary, optional
         Dictionary mapping field names to the corresponding default values.
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index b81307a65..a5f247abf 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -9,7 +9,8 @@ from __future__ import division, absolute_import, print_function
 
 import numpy as np
 
-__all__ = ['broadcast_arrays']
+__all__ = ['broadcast_to', 'broadcast_arrays']
+
 
 class DummyArray(object):
     """Dummy object that just exists to hang __array_interface__ dictionaries
@@ -20,6 +21,20 @@ class DummyArray(object):
         self.__array_interface__ = interface
         self.base = base
 
+
+def _maybe_view_as_subclass(original_array, new_array):
+    if type(original_array) is not type(new_array):
+        # if input was an ndarray subclass and subclasses were OK,
+        # then view the result as that subclass.
+        new_array = new_array.view(type=type(original_array))
+        # Since we have done something akin to a view from original_array, we
+        # should let the subclass finalize (if it has it implemented, i.e., is
+        # not None).
+        if new_array.__array_finalize__:
+            new_array.__array_finalize__(original_array)
+    return new_array
+
+
 def as_strided(x, shape=None, strides=None, subok=False):
     """ Make an ndarray from the given array with the given shape and strides.
     """
@@ -34,15 +49,80 @@ def as_strided(x, shape=None, strides=None, subok=False):
     # Make sure dtype is correct in case of custom dtype
     if array.dtype.kind == 'V':
         array.dtype = x.dtype
-    if type(x) is not type(array):
-        # if input was an ndarray subclass and subclasses were OK,
-        # then view the result as that subclass.
-        array = array.view(type=type(x))
-        # Since we have done something akin to a view from x, we should let
-        # the subclass finalize (if it has it implemented, i.e., is not None).
-        if array.__array_finalize__:
-            array.__array_finalize__(x)
-    return array
+    return _maybe_view_as_subclass(x, array)
+
+
+def _broadcast_to(array, shape, subok, readonly):
+    shape = tuple(shape) if np.iterable(shape) else (shape,)
+    array = np.array(array, copy=False, subok=subok)
+    if not shape and array.shape:
+        raise ValueError('cannot broadcast a non-scalar to a scalar array')
+    if any(size < 0 for size in shape):
+        raise ValueError('all elements of broadcast shape must be non-'
+                         'negative')
+    broadcast = np.nditer(
+        (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'],
+        op_flags=['readonly'], itershape=shape, order='C').itviews[0]
+    result = _maybe_view_as_subclass(array, broadcast)
+    if not readonly and array.flags.writeable:
+        result.flags.writeable = True
+    return result
+
+
+def broadcast_to(array, shape, subok=False):
+    """Broadcast an array to a new shape.
+
+    Parameters
+    ----------
+    array : array_like
+        The array to broadcast.
+    shape : tuple
+        The shape of the desired array.
+    subok : bool, optional
+        If True, then sub-classes will be passed-through, otherwise
+        the returned array will be forced to be a base-class array (default).
+
+    Returns
+    -------
+    broadcast : array
+        A readonly view on the original array with the given shape. It is
+        typically not contiguous. Furthermore, more than one element of a
+        broadcasted array may refer to a single memory location.
+
+    Raises
+    ------
+    ValueError
+        If the array is not compatible with the new shape according to NumPy's
+        broadcasting rules.
+
+    Examples
+    --------
+    >>> x = np.array([1, 2, 3])
+    >>> np.broadcast_to(x, (3, 3))
+    array([[1, 2, 3],
+           [1, 2, 3],
+           [1, 2, 3]])
+    """
+    return _broadcast_to(array, shape, subok=subok, readonly=True)
+
+
+def _broadcast_shape(*args):
+    """Returns the shape of the ararys that would result from broadcasting the
+    supplied arrays against each other.
+    """
+    if not args:
+        raise ValueError('must provide at least one argument')
+    if len(args) == 1:
+        # a single argument does not work with np.broadcast
+        return np.asarray(args[0]).shape
+    # use the old-iterator because np.nditer does not handle size 0 arrays
+    # consistently
+    b = np.broadcast(*args[:32])
+    # unfortunately, it cannot handle 32 or more arguments directly
+    for pos in range(32, len(args), 31):
+        b = np.broadcast(b, *args[pos:(pos + 31)])
+    return b.shape
+
 
 def broadcast_arrays(*args, **kwargs):
     """
@@ -87,55 +167,24 @@ def broadcast_arrays(*args, **kwargs):
            [3, 3, 3]])]
 
     """
+    # nditer is not used here to avoid the limit of 32 arrays.
+    # Otherwise, something like the following one-liner would suffice:
+    # return np.nditer(args, flags=['multi_index', 'zerosize_ok'],
+    #                  order='C').itviews
+
     subok = kwargs.pop('subok', False)
     if kwargs:
         raise TypeError('broadcast_arrays() got an unexpected keyword '
                         'argument {}'.format(kwargs.pop()))
     args = [np.array(_m, copy=False, subok=subok) for _m in args]
-    shapes = [x.shape for x in args]
-    if len(set(shapes)) == 1:
+
+    shape = _broadcast_shape(*args)
+
+    if all(array.shape == shape for array in args):
         # Common case where nothing needs to be broadcasted.
         return args
-    shapes = [list(s) for s in shapes]
-    strides = [list(x.strides) for x in args]
-    nds = [len(s) for s in shapes]
-    biggest = max(nds)
-    # Go through each array and prepend dimensions of length 1 to each of
-    # the shapes in order to make the number of dimensions equal.
-    for i in range(len(args)):
-        diff = biggest - nds[i]
-        if diff > 0:
-            shapes[i] = [1] * diff + shapes[i]
-            strides[i] = [0] * diff + strides[i]
-    # Chech each dimension for compatibility. A dimension length of 1 is
-    # accepted as compatible with any other length.
-    common_shape = []
-    for axis in range(biggest):
-        lengths = [s[axis] for s in shapes]
-        unique = set(lengths + [1])
-        if len(unique) > 2:
-            # There must be at least two non-1 lengths for this axis.
-            raise ValueError("shape mismatch: two or more arrays have "
-                "incompatible dimensions on axis %r." % (axis,))
-        elif len(unique) == 2:
-            # There is exactly one non-1 length. The common shape will take
-            # this value.
-            unique.remove(1)
-            new_length = unique.pop()
-            common_shape.append(new_length)
-            # For each array, if this axis is being broadcasted from a
-            # length of 1, then set its stride to 0 so that it repeats its
-            # data.
-            for i in range(len(args)):
-                if shapes[i][axis] == 1:
-                    shapes[i][axis] = new_length
-                    strides[i][axis] = 0
-        else:
-            # Every array has a length of 1 on this axis. Strides can be
-            # left alone as nothing is broadcasted.
-            common_shape.append(1)
-
-    # Construct the new arrays.
-    broadcasted = [as_strided(x, shape=sh, strides=st, subok=subok)
-                   for (x, sh, st) in zip(args, shapes, strides)]
-    return broadcasted
+
+    # TODO: consider making the results of broadcast_arrays readonly to match
+    # broadcast_to. This will require a deprecation cycle.
+    return [_broadcast_to(array, shape, subok=subok, readonly=False)
+            for array in args]
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
index 060f815d5..e0a917a21 100644
--- a/numpy/lib/tests/test__iotools.py
+++ b/numpy/lib/tests/test__iotools.py
@@ -152,17 +152,31 @@ class TestStringConverter(TestCase):
 
     def test_upgrade(self):
         "Tests the upgrade method."
+
         converter = StringConverter()
         assert_equal(converter._status, 0)
+
         # test int
         assert_equal(converter.upgrade(asbytes('0')), 0)
         assert_equal(converter._status, 1)
+
+        # On systems where integer defaults to 32-bit, the statuses will be
+        # offset by one, so we check for this here.
+        import numpy.core.numeric as nx
+        status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize)
+
+        # test int > 2**32
+        assert_equal(converter.upgrade(asbytes('17179869184')), 17179869184)
+        assert_equal(converter._status, 1 + status_offset)
+
         # test float
         assert_allclose(converter.upgrade(asbytes('0.')), 0.0)
-        assert_equal(converter._status, 2)
+        assert_equal(converter._status, 2 + status_offset)
+
         # test complex
         assert_equal(converter.upgrade(asbytes('0j')), complex('0j'))
-        assert_equal(converter._status, 3)
+        assert_equal(converter._status, 3 + status_offset)
+
         # test str
         assert_equal(converter.upgrade(asbytes('a')), asbytes('a'))
         assert_equal(converter._status, len(converter._mapper) - 1)
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 03521ca4c..a37c527d9 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -668,7 +668,7 @@ class TestVectorize(TestCase):
         args = np.array([0, 0.5*np.pi, np.pi, 1.5*np.pi, 2*np.pi])
         r1 = f(args)
         r2 = np.cos(args)
-        assert_array_equal(r1, r2)
+        assert_array_almost_equal(r1, r2)
 
     def test_keywords(self):
         import math
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 7054ab1fe..2598a6cfb 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -18,7 +18,7 @@ from numpy.lib._iotools import (ConverterError, ConverterLockError,
 from numpy.compat import asbytes, asbytes_nested, bytes, asstr
 from nose import SkipTest
 from numpy.ma.testutils import (
-    TestCase, assert_equal, assert_array_equal,
+    TestCase, assert_equal, assert_array_equal, assert_allclose,
     assert_raises, assert_raises_regex, run_module_suite
 )
 from numpy.testing import assert_warns, assert_, build_err_msg
@@ -216,7 +216,7 @@ class TestSavezLoad(RoundtripTest, TestCase):
         l = np.load(c)
         assert_equal(a, l['file_a'])
         assert_equal(b, l['file_b'])
-    
+
     def test_BagObj(self):
         a = np.array([[1, 2], [3, 4]], float)
         b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
@@ -1762,6 +1762,31 @@ M   33  21.99
         res = np.genfromtxt(count())
         assert_array_equal(res, np.arange(10))
 
+    def test_auto_dtype_largeint(self):
+        """
+        Regression test for numpy/numpy#5635 whereby large integers could
+        cause OverflowErrors.
+        """
+        "Test the automatic definition of the output dtype"
+
+        # 2**66 = 73786976294838206464 => should convert to float
+        # 2**34 = 17179869184 => should convert to int64
+        # 2**10 = 1024 => should convert to int (int32 on 32-bit systems,
+        #                 int64 on 64-bit systems)
+
+        data = TextIO('73786976294838206464 17179869184 1024')
+
+        test = np.ndfromtxt(data, dtype=None)
+
+        assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
+
+        assert test.dtype['f0'] == np.float
+        assert test.dtype['f1'] == np.int64
+        assert test.dtype['f2'] == np.integer
+
+        assert_allclose(test['f0'], 73786976294838206464.)
+        assert_equal(test['f1'], 17179869184)
+        assert_equal(test['f2'], 1024)
 
 def test_gzip_load():
     a = np.random.random((5, 5))
diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py
index bc7e30ca4..ef483921c 100644
--- a/numpy/lib/tests/test_stride_tricks.py
+++ b/numpy/lib/tests/test_stride_tricks.py
@@ -5,8 +5,9 @@ from numpy.testing import (
     run_module_suite, assert_equal, assert_array_equal,
     assert_raises, assert_
     )
-from numpy.lib.stride_tricks import as_strided, broadcast_arrays
-
+from numpy.lib.stride_tricks import (
+    as_strided, broadcast_arrays, _broadcast_shape, broadcast_to
+)
 
 def assert_shapes_correct(input_shapes, expected_shape):
     # Broadcast a list of arrays with the given input shapes and check the
@@ -217,6 +218,62 @@ def test_same_as_ufunc():
             assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True)
             assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True)
 
+
+def test_broadcast_to_succeeds():
+    data = [
+        [np.array(0), (0,), np.array(0)],
+        [np.array(0), (1,), np.zeros(1)],
+        [np.array(0), (3,), np.zeros(3)],
+        [np.ones(1), (1,), np.ones(1)],
+        [np.ones(1), (2,), np.ones(2)],
+        [np.ones(1), (1, 2, 3), np.ones((1, 2, 3))],
+        [np.arange(3), (3,), np.arange(3)],
+        [np.arange(3), (1, 3), np.arange(3).reshape(1, -1)],
+        [np.arange(3), (2, 3), np.array([[0, 1, 2], [0, 1, 2]])],
+        # test if shape is not a tuple
+        [np.ones(0), 0, np.ones(0)],
+        [np.ones(1), 1, np.ones(1)],
+        [np.ones(1), 2, np.ones(2)],
+        # these cases with size 0 are strange, but they reproduce the behavior
+        # of broadcasting with ufuncs (see test_same_as_ufunc above)
+        [np.ones(1), (0,), np.ones(0)],
+        [np.ones((1, 2)), (0, 2), np.ones((0, 2))],
+        [np.ones((2, 1)), (2, 0), np.ones((2, 0))],
+    ]
+    for input_array, shape, expected in data:
+        actual = broadcast_to(input_array, shape)
+        assert_array_equal(expected, actual)
+
+
+def test_broadcast_to_raises():
+    data = [
+        [(0,), ()],
+        [(1,), ()],
+        [(3,), ()],
+        [(3,), (1,)],
+        [(3,), (2,)],
+        [(3,), (4,)],
+        [(1, 2), (2, 1)],
+        [(1, 1), (1,)],
+        [(1,), -1],
+        [(1,), (-1,)],
+        [(1, 2), (-1, 2)],
+    ]
+    for orig_shape, target_shape in data:
+        arr = np.zeros(orig_shape)
+        assert_raises(ValueError, lambda: broadcast_to(arr, target_shape))
+
+
+def test_broadcast_shape():
+    # broadcast_shape is already exercized indirectly by broadcast_arrays
+    assert_raises(ValueError, _broadcast_shape)
+    assert_equal(_broadcast_shape([1, 2]), (2,))
+    assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1))
+    assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4))
+    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2))
+    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2))
+
+
 def test_as_strided():
     a = np.array([None])
     a_view = as_strided(a)
@@ -277,6 +334,45 @@ def test_subclasses():
     assert_(type(b_view) is np.ndarray)
     assert_(a_view.shape == b_view.shape)
 
+    # and for broadcast_to
+    shape = (2, 4)
+    a_view = broadcast_to(a, shape)
+    assert_(type(a_view) is np.ndarray)
+    assert_(a_view.shape == shape)
+    a_view = broadcast_to(a, shape, subok=True)
+    assert_(type(a_view) is SimpleSubClass)
+    assert_(a_view.info == 'simple finalized')
+    assert_(a_view.shape == shape)
+
+
+def test_writeable():
+    # broadcast_to should return a readonly array
+    original = np.array([1, 2, 3])
+    result = broadcast_to(original, (2, 3))
+    assert_equal(result.flags.writeable, False)
+    assert_raises(ValueError, result.__setitem__, slice(None), 0)
+
+    # but the result of broadcast_arrays needs to be writeable (for now), to
+    # preserve backwards compatibility
+    for results in [broadcast_arrays(original),
+                    broadcast_arrays(0, original)]:
+        for result in results:
+            assert_equal(result.flags.writeable, True)
+    # keep readonly input readonly
+    original.flags.writeable = False
+    _, result = broadcast_arrays(0, original)
+    assert_equal(result.flags.writeable, False)
+
+
+def test_reference_types():
+    input_array = np.array('a', dtype=object)
+    expected = np.array(['a'] * 3, dtype=object)
+    actual = broadcast_to(input_array, (3,))
+    assert_array_equal(expected, actual)
+
+    actual, _ = broadcast_arrays(input_array, np.ones(3))
+    assert_array_equal(expected, actual)
+
 
 if __name__ == "__main__":
     run_module_suite()