diff options
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/_iotools.py | 39 | ||||
-rw-r--r-- | numpy/lib/arraysetops.py | 7 | ||||
-rw-r--r-- | numpy/lib/function_base.py | 4 | ||||
-rw-r--r-- | numpy/lib/index_tricks.py | 2 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 4 | ||||
-rw-r--r-- | numpy/lib/recfunctions.py | 4 | ||||
-rw-r--r-- | numpy/lib/stride_tricks.py | 159 | ||||
-rw-r--r-- | numpy/lib/tests/test__iotools.py | 18 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 2 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 29 | ||||
-rw-r--r-- | numpy/lib/tests/test_stride_tricks.py | 100 |
11 files changed, 287 insertions, 81 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 316704b42..44bd48df7 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -160,7 +160,7 @@ class LineSplitter(object): delimiter : str, int, or sequence of ints, optional If a string, character used to delimit consecutive fields. If an integer or a sequence of integers, width(s) of each field. - comment : str, optional + comments : str, optional Character used to mark the beginning of a comment. Default is '#'. autostrip : bool, optional Whether to strip each individual field. Default is True. @@ -271,7 +271,7 @@ class NameValidator(object): deletechars : str, optional A string combining invalid characters that must be deleted from the names. - casesensitive : {True, False, 'upper', 'lower'}, optional + case_sensitive : {True, False, 'upper', 'lower'}, optional * If True, field names are case-sensitive. * If False or 'upper', field names are converted to upper case. * If 'lower', field names are converted to lower case. @@ -341,7 +341,7 @@ class NameValidator(object): defaultfmt : str, optional Default format string, used if validating a given string reduces its length to zero. - nboutput : integer, optional + nbfields : integer, optional Final number of validated names, used to expand or shrink the initial list of names. @@ -518,12 +518,18 @@ class StringConverter(object): """ # _mapper = [(nx.bool_, str2bool, False), - (nx.integer, int, -1), - (nx.floating, float, nx.nan), - (complex, _bytes_to_complex, nx.nan + 0j), - (nx.string_, bytes, asbytes('???'))] + (nx.integer, int, -1)] + + # On 32-bit systems, we need to make sure that we explicitly include + # nx.int64 since ns.integer is nx.int32. + if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize: + _mapper.append((nx.int64, int, -1)) + + _mapper.extend([(nx.floating, float, nx.nan), + (complex, _bytes_to_complex, nx.nan + 0j), + (nx.string_, bytes, asbytes('???'))]) + (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) - # @classmethod def _getdtype(cls, val): @@ -677,7 +683,22 @@ class StringConverter(object): def _strict_call(self, value): try: - return self.func(value) + + # We check if we can convert the value using the current function + new_value = self.func(value) + + # In addition to having to check whether func can convert the + # value, we also have to make sure that we don't get overflow + # errors for integers. + if self.func is int: + try: + np.array(value, dtype=self.type) + except OverflowError: + raise ValueError + + # We're still here so we can now return the new value + return new_value + except ValueError: if value.strip() in self.missing_values: if not self._status: diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index cb24eb24e..7776d7e76 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -97,10 +97,11 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False): """ Find the unique elements of an array. - Returns the sorted unique elements of an array. There are two optional + Returns the sorted unique elements of an array. There are three optional outputs in addition to the unique elements: the indices of the input array - that give the unique values, and the indices of the unique array that - reconstruct the input array. + that give the unique values, the indices of the unique array that + reconstruct the input array, and the number of times each unique value + comes up in the input array. Parameters ---------- diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 2baf83830..d58492a67 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1965,11 +1965,11 @@ def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None): ---------- x : array_like A 1-D or 2-D array containing multiple variables and observations. - Each row of `m` represents a variable, and each column a single + Each row of `x` represents a variable, and each column a single observation of all those variables. Also see `rowvar` below. y : array_like, optional An additional set of variables and observations. `y` has the same - shape as `m`. + shape as `x`. rowvar : int, optional If `rowvar` is non-zero (default), then each row represents a variable, with observations in the columns. Otherwise, the relationship diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py index eb9aad6ad..e97338106 100644 --- a/numpy/lib/index_tricks.py +++ b/numpy/lib/index_tricks.py @@ -480,7 +480,7 @@ class ndenumerate(object): Parameters ---------- - a : ndarray + arr : ndarray Input array. See Also diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 0632ba1f8..2b01caed9 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1240,8 +1240,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, The string used to separate values. By default, any consecutive whitespaces act as delimiter. An integer or sequence of integers can also be provided as width(s) of each field. - skip_rows : int, optional - `skip_rows` was deprecated in numpy 1.5, and will be removed in + skiprows : int, optional + `skiprows` was deprecated in numpy 1.5, and will be removed in numpy 2.0. Please use `skip_header` instead. skip_header : int, optional The number of lines to skip at the beginning of the file. diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index a61b1749b..4ae1079d2 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -268,7 +268,7 @@ def izip_records(seqarrays, fill_value=None, flatten=True): Parameters ---------- - seqarray : sequence of arrays + seqarrays : sequence of arrays Sequence of arrays. fill_value : {None, integer} Value used to pad shorter iterables. @@ -683,7 +683,7 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, Parameters ---------- - seqarrays : array or sequence + arrays : array or sequence Sequence of input arrays. defaults : dictionary, optional Dictionary mapping field names to the corresponding default values. diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py index b81307a65..a5f247abf 100644 --- a/numpy/lib/stride_tricks.py +++ b/numpy/lib/stride_tricks.py @@ -9,7 +9,8 @@ from __future__ import division, absolute_import, print_function import numpy as np -__all__ = ['broadcast_arrays'] +__all__ = ['broadcast_to', 'broadcast_arrays'] + class DummyArray(object): """Dummy object that just exists to hang __array_interface__ dictionaries @@ -20,6 +21,20 @@ class DummyArray(object): self.__array_interface__ = interface self.base = base + +def _maybe_view_as_subclass(original_array, new_array): + if type(original_array) is not type(new_array): + # if input was an ndarray subclass and subclasses were OK, + # then view the result as that subclass. + new_array = new_array.view(type=type(original_array)) + # Since we have done something akin to a view from original_array, we + # should let the subclass finalize (if it has it implemented, i.e., is + # not None). + if new_array.__array_finalize__: + new_array.__array_finalize__(original_array) + return new_array + + def as_strided(x, shape=None, strides=None, subok=False): """ Make an ndarray from the given array with the given shape and strides. """ @@ -34,15 +49,80 @@ def as_strided(x, shape=None, strides=None, subok=False): # Make sure dtype is correct in case of custom dtype if array.dtype.kind == 'V': array.dtype = x.dtype - if type(x) is not type(array): - # if input was an ndarray subclass and subclasses were OK, - # then view the result as that subclass. - array = array.view(type=type(x)) - # Since we have done something akin to a view from x, we should let - # the subclass finalize (if it has it implemented, i.e., is not None). - if array.__array_finalize__: - array.__array_finalize__(x) - return array + return _maybe_view_as_subclass(x, array) + + +def _broadcast_to(array, shape, subok, readonly): + shape = tuple(shape) if np.iterable(shape) else (shape,) + array = np.array(array, copy=False, subok=subok) + if not shape and array.shape: + raise ValueError('cannot broadcast a non-scalar to a scalar array') + if any(size < 0 for size in shape): + raise ValueError('all elements of broadcast shape must be non-' + 'negative') + broadcast = np.nditer( + (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'], + op_flags=['readonly'], itershape=shape, order='C').itviews[0] + result = _maybe_view_as_subclass(array, broadcast) + if not readonly and array.flags.writeable: + result.flags.writeable = True + return result + + +def broadcast_to(array, shape, subok=False): + """Broadcast an array to a new shape. + + Parameters + ---------- + array : array_like + The array to broadcast. + shape : tuple + The shape of the desired array. + subok : bool, optional + If True, then sub-classes will be passed-through, otherwise + the returned array will be forced to be a base-class array (default). + + Returns + ------- + broadcast : array + A readonly view on the original array with the given shape. It is + typically not contiguous. Furthermore, more than one element of a + broadcasted array may refer to a single memory location. + + Raises + ------ + ValueError + If the array is not compatible with the new shape according to NumPy's + broadcasting rules. + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> np.broadcast_to(x, (3, 3)) + array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + """ + return _broadcast_to(array, shape, subok=subok, readonly=True) + + +def _broadcast_shape(*args): + """Returns the shape of the ararys that would result from broadcasting the + supplied arrays against each other. + """ + if not args: + raise ValueError('must provide at least one argument') + if len(args) == 1: + # a single argument does not work with np.broadcast + return np.asarray(args[0]).shape + # use the old-iterator because np.nditer does not handle size 0 arrays + # consistently + b = np.broadcast(*args[:32]) + # unfortunately, it cannot handle 32 or more arguments directly + for pos in range(32, len(args), 31): + b = np.broadcast(b, *args[pos:(pos + 31)]) + return b.shape + def broadcast_arrays(*args, **kwargs): """ @@ -87,55 +167,24 @@ def broadcast_arrays(*args, **kwargs): [3, 3, 3]])] """ + # nditer is not used here to avoid the limit of 32 arrays. + # Otherwise, something like the following one-liner would suffice: + # return np.nditer(args, flags=['multi_index', 'zerosize_ok'], + # order='C').itviews + subok = kwargs.pop('subok', False) if kwargs: raise TypeError('broadcast_arrays() got an unexpected keyword ' 'argument {}'.format(kwargs.pop())) args = [np.array(_m, copy=False, subok=subok) for _m in args] - shapes = [x.shape for x in args] - if len(set(shapes)) == 1: + + shape = _broadcast_shape(*args) + + if all(array.shape == shape for array in args): # Common case where nothing needs to be broadcasted. return args - shapes = [list(s) for s in shapes] - strides = [list(x.strides) for x in args] - nds = [len(s) for s in shapes] - biggest = max(nds) - # Go through each array and prepend dimensions of length 1 to each of - # the shapes in order to make the number of dimensions equal. - for i in range(len(args)): - diff = biggest - nds[i] - if diff > 0: - shapes[i] = [1] * diff + shapes[i] - strides[i] = [0] * diff + strides[i] - # Chech each dimension for compatibility. A dimension length of 1 is - # accepted as compatible with any other length. - common_shape = [] - for axis in range(biggest): - lengths = [s[axis] for s in shapes] - unique = set(lengths + [1]) - if len(unique) > 2: - # There must be at least two non-1 lengths for this axis. - raise ValueError("shape mismatch: two or more arrays have " - "incompatible dimensions on axis %r." % (axis,)) - elif len(unique) == 2: - # There is exactly one non-1 length. The common shape will take - # this value. - unique.remove(1) - new_length = unique.pop() - common_shape.append(new_length) - # For each array, if this axis is being broadcasted from a - # length of 1, then set its stride to 0 so that it repeats its - # data. - for i in range(len(args)): - if shapes[i][axis] == 1: - shapes[i][axis] = new_length - strides[i][axis] = 0 - else: - # Every array has a length of 1 on this axis. Strides can be - # left alone as nothing is broadcasted. - common_shape.append(1) - - # Construct the new arrays. - broadcasted = [as_strided(x, shape=sh, strides=st, subok=subok) - for (x, sh, st) in zip(args, shapes, strides)] - return broadcasted + + # TODO: consider making the results of broadcast_arrays readonly to match + # broadcast_to. This will require a deprecation cycle. + return [_broadcast_to(array, shape, subok=subok, readonly=False) + for array in args] diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index 060f815d5..e0a917a21 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -152,17 +152,31 @@ class TestStringConverter(TestCase): def test_upgrade(self): "Tests the upgrade method." + converter = StringConverter() assert_equal(converter._status, 0) + # test int assert_equal(converter.upgrade(asbytes('0')), 0) assert_equal(converter._status, 1) + + # On systems where integer defaults to 32-bit, the statuses will be + # offset by one, so we check for this here. + import numpy.core.numeric as nx + status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize) + + # test int > 2**32 + assert_equal(converter.upgrade(asbytes('17179869184')), 17179869184) + assert_equal(converter._status, 1 + status_offset) + # test float assert_allclose(converter.upgrade(asbytes('0.')), 0.0) - assert_equal(converter._status, 2) + assert_equal(converter._status, 2 + status_offset) + # test complex assert_equal(converter.upgrade(asbytes('0j')), complex('0j')) - assert_equal(converter._status, 3) + assert_equal(converter._status, 3 + status_offset) + # test str assert_equal(converter.upgrade(asbytes('a')), asbytes('a')) assert_equal(converter._status, len(converter._mapper) - 1) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 03521ca4c..a37c527d9 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -668,7 +668,7 @@ class TestVectorize(TestCase): args = np.array([0, 0.5*np.pi, np.pi, 1.5*np.pi, 2*np.pi]) r1 = f(args) r2 = np.cos(args) - assert_array_equal(r1, r2) + assert_array_almost_equal(r1, r2) def test_keywords(self): import math diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 7054ab1fe..2598a6cfb 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -18,7 +18,7 @@ from numpy.lib._iotools import (ConverterError, ConverterLockError, from numpy.compat import asbytes, asbytes_nested, bytes, asstr from nose import SkipTest from numpy.ma.testutils import ( - TestCase, assert_equal, assert_array_equal, + TestCase, assert_equal, assert_array_equal, assert_allclose, assert_raises, assert_raises_regex, run_module_suite ) from numpy.testing import assert_warns, assert_, build_err_msg @@ -216,7 +216,7 @@ class TestSavezLoad(RoundtripTest, TestCase): l = np.load(c) assert_equal(a, l['file_a']) assert_equal(b, l['file_b']) - + def test_BagObj(self): a = np.array([[1, 2], [3, 4]], float) b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) @@ -1762,6 +1762,31 @@ M 33 21.99 res = np.genfromtxt(count()) assert_array_equal(res, np.arange(10)) + def test_auto_dtype_largeint(self): + """ + Regression test for numpy/numpy#5635 whereby large integers could + cause OverflowErrors. + """ + "Test the automatic definition of the output dtype" + + # 2**66 = 73786976294838206464 => should convert to float + # 2**34 = 17179869184 => should convert to int64 + # 2**10 = 1024 => should convert to int (int32 on 32-bit systems, + # int64 on 64-bit systems) + + data = TextIO('73786976294838206464 17179869184 1024') + + test = np.ndfromtxt(data, dtype=None) + + assert_equal(test.dtype.names, ['f0', 'f1', 'f2']) + + assert test.dtype['f0'] == np.float + assert test.dtype['f1'] == np.int64 + assert test.dtype['f2'] == np.integer + + assert_allclose(test['f0'], 73786976294838206464.) + assert_equal(test['f1'], 17179869184) + assert_equal(test['f2'], 1024) def test_gzip_load(): a = np.random.random((5, 5)) diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py index bc7e30ca4..ef483921c 100644 --- a/numpy/lib/tests/test_stride_tricks.py +++ b/numpy/lib/tests/test_stride_tricks.py @@ -5,8 +5,9 @@ from numpy.testing import ( run_module_suite, assert_equal, assert_array_equal, assert_raises, assert_ ) -from numpy.lib.stride_tricks import as_strided, broadcast_arrays - +from numpy.lib.stride_tricks import ( + as_strided, broadcast_arrays, _broadcast_shape, broadcast_to +) def assert_shapes_correct(input_shapes, expected_shape): # Broadcast a list of arrays with the given input shapes and check the @@ -217,6 +218,62 @@ def test_same_as_ufunc(): assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True) assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True) + +def test_broadcast_to_succeeds(): + data = [ + [np.array(0), (0,), np.array(0)], + [np.array(0), (1,), np.zeros(1)], + [np.array(0), (3,), np.zeros(3)], + [np.ones(1), (1,), np.ones(1)], + [np.ones(1), (2,), np.ones(2)], + [np.ones(1), (1, 2, 3), np.ones((1, 2, 3))], + [np.arange(3), (3,), np.arange(3)], + [np.arange(3), (1, 3), np.arange(3).reshape(1, -1)], + [np.arange(3), (2, 3), np.array([[0, 1, 2], [0, 1, 2]])], + # test if shape is not a tuple + [np.ones(0), 0, np.ones(0)], + [np.ones(1), 1, np.ones(1)], + [np.ones(1), 2, np.ones(2)], + # these cases with size 0 are strange, but they reproduce the behavior + # of broadcasting with ufuncs (see test_same_as_ufunc above) + [np.ones(1), (0,), np.ones(0)], + [np.ones((1, 2)), (0, 2), np.ones((0, 2))], + [np.ones((2, 1)), (2, 0), np.ones((2, 0))], + ] + for input_array, shape, expected in data: + actual = broadcast_to(input_array, shape) + assert_array_equal(expected, actual) + + +def test_broadcast_to_raises(): + data = [ + [(0,), ()], + [(1,), ()], + [(3,), ()], + [(3,), (1,)], + [(3,), (2,)], + [(3,), (4,)], + [(1, 2), (2, 1)], + [(1, 1), (1,)], + [(1,), -1], + [(1,), (-1,)], + [(1, 2), (-1, 2)], + ] + for orig_shape, target_shape in data: + arr = np.zeros(orig_shape) + assert_raises(ValueError, lambda: broadcast_to(arr, target_shape)) + + +def test_broadcast_shape(): + # broadcast_shape is already exercized indirectly by broadcast_arrays + assert_raises(ValueError, _broadcast_shape) + assert_equal(_broadcast_shape([1, 2]), (2,)) + assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1)) + assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2)) + + def test_as_strided(): a = np.array([None]) a_view = as_strided(a) @@ -277,6 +334,45 @@ def test_subclasses(): assert_(type(b_view) is np.ndarray) assert_(a_view.shape == b_view.shape) + # and for broadcast_to + shape = (2, 4) + a_view = broadcast_to(a, shape) + assert_(type(a_view) is np.ndarray) + assert_(a_view.shape == shape) + a_view = broadcast_to(a, shape, subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + assert_(a_view.shape == shape) + + +def test_writeable(): + # broadcast_to should return a readonly array + original = np.array([1, 2, 3]) + result = broadcast_to(original, (2, 3)) + assert_equal(result.flags.writeable, False) + assert_raises(ValueError, result.__setitem__, slice(None), 0) + + # but the result of broadcast_arrays needs to be writeable (for now), to + # preserve backwards compatibility + for results in [broadcast_arrays(original), + broadcast_arrays(0, original)]: + for result in results: + assert_equal(result.flags.writeable, True) + # keep readonly input readonly + original.flags.writeable = False + _, result = broadcast_arrays(0, original) + assert_equal(result.flags.writeable, False) + + +def test_reference_types(): + input_array = np.array('a', dtype=object) + expected = np.array(['a'] * 3, dtype=object) + actual = broadcast_to(input_array, (3,)) + assert_array_equal(expected, actual) + + actual, _ = broadcast_arrays(input_array, np.ones(3)) + assert_array_equal(expected, actual) + if __name__ == "__main__": run_module_suite() |