summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/_iotools.py39
-rw-r--r--numpy/lib/arraysetops.py7
-rw-r--r--numpy/lib/function_base.py4
-rw-r--r--numpy/lib/index_tricks.py2
-rw-r--r--numpy/lib/npyio.py4
-rw-r--r--numpy/lib/recfunctions.py4
-rw-r--r--numpy/lib/stride_tricks.py159
-rw-r--r--numpy/lib/tests/test__iotools.py18
-rw-r--r--numpy/lib/tests/test_function_base.py2
-rw-r--r--numpy/lib/tests/test_io.py29
-rw-r--r--numpy/lib/tests/test_stride_tricks.py100
11 files changed, 287 insertions, 81 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 316704b42..44bd48df7 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -160,7 +160,7 @@ class LineSplitter(object):
delimiter : str, int, or sequence of ints, optional
If a string, character used to delimit consecutive fields.
If an integer or a sequence of integers, width(s) of each field.
- comment : str, optional
+ comments : str, optional
Character used to mark the beginning of a comment. Default is '#'.
autostrip : bool, optional
Whether to strip each individual field. Default is True.
@@ -271,7 +271,7 @@ class NameValidator(object):
deletechars : str, optional
A string combining invalid characters that must be deleted from the
names.
- casesensitive : {True, False, 'upper', 'lower'}, optional
+ case_sensitive : {True, False, 'upper', 'lower'}, optional
* If True, field names are case-sensitive.
* If False or 'upper', field names are converted to upper case.
* If 'lower', field names are converted to lower case.
@@ -341,7 +341,7 @@ class NameValidator(object):
defaultfmt : str, optional
Default format string, used if validating a given string
reduces its length to zero.
- nboutput : integer, optional
+ nbfields : integer, optional
Final number of validated names, used to expand or shrink the
initial list of names.
@@ -518,12 +518,18 @@ class StringConverter(object):
"""
#
_mapper = [(nx.bool_, str2bool, False),
- (nx.integer, int, -1),
- (nx.floating, float, nx.nan),
- (complex, _bytes_to_complex, nx.nan + 0j),
- (nx.string_, bytes, asbytes('???'))]
+ (nx.integer, int, -1)]
+
+ # On 32-bit systems, we need to make sure that we explicitly include
+ # nx.int64 since ns.integer is nx.int32.
+ if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize:
+ _mapper.append((nx.int64, int, -1))
+
+ _mapper.extend([(nx.floating, float, nx.nan),
+ (complex, _bytes_to_complex, nx.nan + 0j),
+ (nx.string_, bytes, asbytes('???'))])
+
(_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
- #
@classmethod
def _getdtype(cls, val):
@@ -677,7 +683,22 @@ class StringConverter(object):
def _strict_call(self, value):
try:
- return self.func(value)
+
+ # We check if we can convert the value using the current function
+ new_value = self.func(value)
+
+ # In addition to having to check whether func can convert the
+ # value, we also have to make sure that we don't get overflow
+ # errors for integers.
+ if self.func is int:
+ try:
+ np.array(value, dtype=self.type)
+ except OverflowError:
+ raise ValueError
+
+ # We're still here so we can now return the new value
+ return new_value
+
except ValueError:
if value.strip() in self.missing_values:
if not self._status:
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index cb24eb24e..7776d7e76 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -97,10 +97,11 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
"""
Find the unique elements of an array.
- Returns the sorted unique elements of an array. There are two optional
+ Returns the sorted unique elements of an array. There are three optional
outputs in addition to the unique elements: the indices of the input array
- that give the unique values, and the indices of the unique array that
- reconstruct the input array.
+ that give the unique values, the indices of the unique array that
+ reconstruct the input array, and the number of times each unique value
+ comes up in the input array.
Parameters
----------
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 2baf83830..d58492a67 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1965,11 +1965,11 @@ def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None):
----------
x : array_like
A 1-D or 2-D array containing multiple variables and observations.
- Each row of `m` represents a variable, and each column a single
+ Each row of `x` represents a variable, and each column a single
observation of all those variables. Also see `rowvar` below.
y : array_like, optional
An additional set of variables and observations. `y` has the same
- shape as `m`.
+ shape as `x`.
rowvar : int, optional
If `rowvar` is non-zero (default), then each row represents a
variable, with observations in the columns. Otherwise, the relationship
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index eb9aad6ad..e97338106 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -480,7 +480,7 @@ class ndenumerate(object):
Parameters
----------
- a : ndarray
+ arr : ndarray
Input array.
See Also
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 0632ba1f8..2b01caed9 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1240,8 +1240,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
The string used to separate values. By default, any consecutive
whitespaces act as delimiter. An integer or sequence of integers
can also be provided as width(s) of each field.
- skip_rows : int, optional
- `skip_rows` was deprecated in numpy 1.5, and will be removed in
+ skiprows : int, optional
+ `skiprows` was deprecated in numpy 1.5, and will be removed in
numpy 2.0. Please use `skip_header` instead.
skip_header : int, optional
The number of lines to skip at the beginning of the file.
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index a61b1749b..4ae1079d2 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -268,7 +268,7 @@ def izip_records(seqarrays, fill_value=None, flatten=True):
Parameters
----------
- seqarray : sequence of arrays
+ seqarrays : sequence of arrays
Sequence of arrays.
fill_value : {None, integer}
Value used to pad shorter iterables.
@@ -683,7 +683,7 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
Parameters
----------
- seqarrays : array or sequence
+ arrays : array or sequence
Sequence of input arrays.
defaults : dictionary, optional
Dictionary mapping field names to the corresponding default values.
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index b81307a65..a5f247abf 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -9,7 +9,8 @@ from __future__ import division, absolute_import, print_function
import numpy as np
-__all__ = ['broadcast_arrays']
+__all__ = ['broadcast_to', 'broadcast_arrays']
+
class DummyArray(object):
"""Dummy object that just exists to hang __array_interface__ dictionaries
@@ -20,6 +21,20 @@ class DummyArray(object):
self.__array_interface__ = interface
self.base = base
+
+def _maybe_view_as_subclass(original_array, new_array):
+ if type(original_array) is not type(new_array):
+ # if input was an ndarray subclass and subclasses were OK,
+ # then view the result as that subclass.
+ new_array = new_array.view(type=type(original_array))
+ # Since we have done something akin to a view from original_array, we
+ # should let the subclass finalize (if it has it implemented, i.e., is
+ # not None).
+ if new_array.__array_finalize__:
+ new_array.__array_finalize__(original_array)
+ return new_array
+
+
def as_strided(x, shape=None, strides=None, subok=False):
""" Make an ndarray from the given array with the given shape and strides.
"""
@@ -34,15 +49,80 @@ def as_strided(x, shape=None, strides=None, subok=False):
# Make sure dtype is correct in case of custom dtype
if array.dtype.kind == 'V':
array.dtype = x.dtype
- if type(x) is not type(array):
- # if input was an ndarray subclass and subclasses were OK,
- # then view the result as that subclass.
- array = array.view(type=type(x))
- # Since we have done something akin to a view from x, we should let
- # the subclass finalize (if it has it implemented, i.e., is not None).
- if array.__array_finalize__:
- array.__array_finalize__(x)
- return array
+ return _maybe_view_as_subclass(x, array)
+
+
+def _broadcast_to(array, shape, subok, readonly):
+ shape = tuple(shape) if np.iterable(shape) else (shape,)
+ array = np.array(array, copy=False, subok=subok)
+ if not shape and array.shape:
+ raise ValueError('cannot broadcast a non-scalar to a scalar array')
+ if any(size < 0 for size in shape):
+ raise ValueError('all elements of broadcast shape must be non-'
+ 'negative')
+ broadcast = np.nditer(
+ (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'],
+ op_flags=['readonly'], itershape=shape, order='C').itviews[0]
+ result = _maybe_view_as_subclass(array, broadcast)
+ if not readonly and array.flags.writeable:
+ result.flags.writeable = True
+ return result
+
+
+def broadcast_to(array, shape, subok=False):
+ """Broadcast an array to a new shape.
+
+ Parameters
+ ----------
+ array : array_like
+ The array to broadcast.
+ shape : tuple
+ The shape of the desired array.
+ subok : bool, optional
+ If True, then sub-classes will be passed-through, otherwise
+ the returned array will be forced to be a base-class array (default).
+
+ Returns
+ -------
+ broadcast : array
+ A readonly view on the original array with the given shape. It is
+ typically not contiguous. Furthermore, more than one element of a
+ broadcasted array may refer to a single memory location.
+
+ Raises
+ ------
+ ValueError
+ If the array is not compatible with the new shape according to NumPy's
+ broadcasting rules.
+
+ Examples
+ --------
+ >>> x = np.array([1, 2, 3])
+ >>> np.broadcast_to(x, (3, 3))
+ array([[1, 2, 3],
+ [1, 2, 3],
+ [1, 2, 3]])
+ """
+ return _broadcast_to(array, shape, subok=subok, readonly=True)
+
+
+def _broadcast_shape(*args):
+ """Returns the shape of the ararys that would result from broadcasting the
+ supplied arrays against each other.
+ """
+ if not args:
+ raise ValueError('must provide at least one argument')
+ if len(args) == 1:
+ # a single argument does not work with np.broadcast
+ return np.asarray(args[0]).shape
+ # use the old-iterator because np.nditer does not handle size 0 arrays
+ # consistently
+ b = np.broadcast(*args[:32])
+ # unfortunately, it cannot handle 32 or more arguments directly
+ for pos in range(32, len(args), 31):
+ b = np.broadcast(b, *args[pos:(pos + 31)])
+ return b.shape
+
def broadcast_arrays(*args, **kwargs):
"""
@@ -87,55 +167,24 @@ def broadcast_arrays(*args, **kwargs):
[3, 3, 3]])]
"""
+ # nditer is not used here to avoid the limit of 32 arrays.
+ # Otherwise, something like the following one-liner would suffice:
+ # return np.nditer(args, flags=['multi_index', 'zerosize_ok'],
+ # order='C').itviews
+
subok = kwargs.pop('subok', False)
if kwargs:
raise TypeError('broadcast_arrays() got an unexpected keyword '
'argument {}'.format(kwargs.pop()))
args = [np.array(_m, copy=False, subok=subok) for _m in args]
- shapes = [x.shape for x in args]
- if len(set(shapes)) == 1:
+
+ shape = _broadcast_shape(*args)
+
+ if all(array.shape == shape for array in args):
# Common case where nothing needs to be broadcasted.
return args
- shapes = [list(s) for s in shapes]
- strides = [list(x.strides) for x in args]
- nds = [len(s) for s in shapes]
- biggest = max(nds)
- # Go through each array and prepend dimensions of length 1 to each of
- # the shapes in order to make the number of dimensions equal.
- for i in range(len(args)):
- diff = biggest - nds[i]
- if diff > 0:
- shapes[i] = [1] * diff + shapes[i]
- strides[i] = [0] * diff + strides[i]
- # Chech each dimension for compatibility. A dimension length of 1 is
- # accepted as compatible with any other length.
- common_shape = []
- for axis in range(biggest):
- lengths = [s[axis] for s in shapes]
- unique = set(lengths + [1])
- if len(unique) > 2:
- # There must be at least two non-1 lengths for this axis.
- raise ValueError("shape mismatch: two or more arrays have "
- "incompatible dimensions on axis %r." % (axis,))
- elif len(unique) == 2:
- # There is exactly one non-1 length. The common shape will take
- # this value.
- unique.remove(1)
- new_length = unique.pop()
- common_shape.append(new_length)
- # For each array, if this axis is being broadcasted from a
- # length of 1, then set its stride to 0 so that it repeats its
- # data.
- for i in range(len(args)):
- if shapes[i][axis] == 1:
- shapes[i][axis] = new_length
- strides[i][axis] = 0
- else:
- # Every array has a length of 1 on this axis. Strides can be
- # left alone as nothing is broadcasted.
- common_shape.append(1)
-
- # Construct the new arrays.
- broadcasted = [as_strided(x, shape=sh, strides=st, subok=subok)
- for (x, sh, st) in zip(args, shapes, strides)]
- return broadcasted
+
+ # TODO: consider making the results of broadcast_arrays readonly to match
+ # broadcast_to. This will require a deprecation cycle.
+ return [_broadcast_to(array, shape, subok=subok, readonly=False)
+ for array in args]
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
index 060f815d5..e0a917a21 100644
--- a/numpy/lib/tests/test__iotools.py
+++ b/numpy/lib/tests/test__iotools.py
@@ -152,17 +152,31 @@ class TestStringConverter(TestCase):
def test_upgrade(self):
"Tests the upgrade method."
+
converter = StringConverter()
assert_equal(converter._status, 0)
+
# test int
assert_equal(converter.upgrade(asbytes('0')), 0)
assert_equal(converter._status, 1)
+
+ # On systems where integer defaults to 32-bit, the statuses will be
+ # offset by one, so we check for this here.
+ import numpy.core.numeric as nx
+ status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize)
+
+ # test int > 2**32
+ assert_equal(converter.upgrade(asbytes('17179869184')), 17179869184)
+ assert_equal(converter._status, 1 + status_offset)
+
# test float
assert_allclose(converter.upgrade(asbytes('0.')), 0.0)
- assert_equal(converter._status, 2)
+ assert_equal(converter._status, 2 + status_offset)
+
# test complex
assert_equal(converter.upgrade(asbytes('0j')), complex('0j'))
- assert_equal(converter._status, 3)
+ assert_equal(converter._status, 3 + status_offset)
+
# test str
assert_equal(converter.upgrade(asbytes('a')), asbytes('a'))
assert_equal(converter._status, len(converter._mapper) - 1)
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 03521ca4c..a37c527d9 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -668,7 +668,7 @@ class TestVectorize(TestCase):
args = np.array([0, 0.5*np.pi, np.pi, 1.5*np.pi, 2*np.pi])
r1 = f(args)
r2 = np.cos(args)
- assert_array_equal(r1, r2)
+ assert_array_almost_equal(r1, r2)
def test_keywords(self):
import math
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 7054ab1fe..2598a6cfb 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -18,7 +18,7 @@ from numpy.lib._iotools import (ConverterError, ConverterLockError,
from numpy.compat import asbytes, asbytes_nested, bytes, asstr
from nose import SkipTest
from numpy.ma.testutils import (
- TestCase, assert_equal, assert_array_equal,
+ TestCase, assert_equal, assert_array_equal, assert_allclose,
assert_raises, assert_raises_regex, run_module_suite
)
from numpy.testing import assert_warns, assert_, build_err_msg
@@ -216,7 +216,7 @@ class TestSavezLoad(RoundtripTest, TestCase):
l = np.load(c)
assert_equal(a, l['file_a'])
assert_equal(b, l['file_b'])
-
+
def test_BagObj(self):
a = np.array([[1, 2], [3, 4]], float)
b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
@@ -1762,6 +1762,31 @@ M 33 21.99
res = np.genfromtxt(count())
assert_array_equal(res, np.arange(10))
+ def test_auto_dtype_largeint(self):
+ """
+ Regression test for numpy/numpy#5635 whereby large integers could
+ cause OverflowErrors.
+ """
+ "Test the automatic definition of the output dtype"
+
+ # 2**66 = 73786976294838206464 => should convert to float
+ # 2**34 = 17179869184 => should convert to int64
+ # 2**10 = 1024 => should convert to int (int32 on 32-bit systems,
+ # int64 on 64-bit systems)
+
+ data = TextIO('73786976294838206464 17179869184 1024')
+
+ test = np.ndfromtxt(data, dtype=None)
+
+ assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
+
+ assert test.dtype['f0'] == np.float
+ assert test.dtype['f1'] == np.int64
+ assert test.dtype['f2'] == np.integer
+
+ assert_allclose(test['f0'], 73786976294838206464.)
+ assert_equal(test['f1'], 17179869184)
+ assert_equal(test['f2'], 1024)
def test_gzip_load():
a = np.random.random((5, 5))
diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py
index bc7e30ca4..ef483921c 100644
--- a/numpy/lib/tests/test_stride_tricks.py
+++ b/numpy/lib/tests/test_stride_tricks.py
@@ -5,8 +5,9 @@ from numpy.testing import (
run_module_suite, assert_equal, assert_array_equal,
assert_raises, assert_
)
-from numpy.lib.stride_tricks import as_strided, broadcast_arrays
-
+from numpy.lib.stride_tricks import (
+ as_strided, broadcast_arrays, _broadcast_shape, broadcast_to
+)
def assert_shapes_correct(input_shapes, expected_shape):
# Broadcast a list of arrays with the given input shapes and check the
@@ -217,6 +218,62 @@ def test_same_as_ufunc():
assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True)
assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True)
+
+def test_broadcast_to_succeeds():
+ data = [
+ [np.array(0), (0,), np.array(0)],
+ [np.array(0), (1,), np.zeros(1)],
+ [np.array(0), (3,), np.zeros(3)],
+ [np.ones(1), (1,), np.ones(1)],
+ [np.ones(1), (2,), np.ones(2)],
+ [np.ones(1), (1, 2, 3), np.ones((1, 2, 3))],
+ [np.arange(3), (3,), np.arange(3)],
+ [np.arange(3), (1, 3), np.arange(3).reshape(1, -1)],
+ [np.arange(3), (2, 3), np.array([[0, 1, 2], [0, 1, 2]])],
+ # test if shape is not a tuple
+ [np.ones(0), 0, np.ones(0)],
+ [np.ones(1), 1, np.ones(1)],
+ [np.ones(1), 2, np.ones(2)],
+ # these cases with size 0 are strange, but they reproduce the behavior
+ # of broadcasting with ufuncs (see test_same_as_ufunc above)
+ [np.ones(1), (0,), np.ones(0)],
+ [np.ones((1, 2)), (0, 2), np.ones((0, 2))],
+ [np.ones((2, 1)), (2, 0), np.ones((2, 0))],
+ ]
+ for input_array, shape, expected in data:
+ actual = broadcast_to(input_array, shape)
+ assert_array_equal(expected, actual)
+
+
+def test_broadcast_to_raises():
+ data = [
+ [(0,), ()],
+ [(1,), ()],
+ [(3,), ()],
+ [(3,), (1,)],
+ [(3,), (2,)],
+ [(3,), (4,)],
+ [(1, 2), (2, 1)],
+ [(1, 1), (1,)],
+ [(1,), -1],
+ [(1,), (-1,)],
+ [(1, 2), (-1, 2)],
+ ]
+ for orig_shape, target_shape in data:
+ arr = np.zeros(orig_shape)
+ assert_raises(ValueError, lambda: broadcast_to(arr, target_shape))
+
+
+def test_broadcast_shape():
+ # broadcast_shape is already exercized indirectly by broadcast_arrays
+ assert_raises(ValueError, _broadcast_shape)
+ assert_equal(_broadcast_shape([1, 2]), (2,))
+ assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1))
+ assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4))
+ assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2))
+ assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2))
+
+
def test_as_strided():
a = np.array([None])
a_view = as_strided(a)
@@ -277,6 +334,45 @@ def test_subclasses():
assert_(type(b_view) is np.ndarray)
assert_(a_view.shape == b_view.shape)
+ # and for broadcast_to
+ shape = (2, 4)
+ a_view = broadcast_to(a, shape)
+ assert_(type(a_view) is np.ndarray)
+ assert_(a_view.shape == shape)
+ a_view = broadcast_to(a, shape, subok=True)
+ assert_(type(a_view) is SimpleSubClass)
+ assert_(a_view.info == 'simple finalized')
+ assert_(a_view.shape == shape)
+
+
+def test_writeable():
+ # broadcast_to should return a readonly array
+ original = np.array([1, 2, 3])
+ result = broadcast_to(original, (2, 3))
+ assert_equal(result.flags.writeable, False)
+ assert_raises(ValueError, result.__setitem__, slice(None), 0)
+
+ # but the result of broadcast_arrays needs to be writeable (for now), to
+ # preserve backwards compatibility
+ for results in [broadcast_arrays(original),
+ broadcast_arrays(0, original)]:
+ for result in results:
+ assert_equal(result.flags.writeable, True)
+ # keep readonly input readonly
+ original.flags.writeable = False
+ _, result = broadcast_arrays(0, original)
+ assert_equal(result.flags.writeable, False)
+
+
+def test_reference_types():
+ input_array = np.array('a', dtype=object)
+ expected = np.array(['a'] * 3, dtype=object)
+ actual = broadcast_to(input_array, (3,))
+ assert_array_equal(expected, actual)
+
+ actual, _ = broadcast_arrays(input_array, np.ones(3))
+ assert_array_equal(expected, actual)
+
if __name__ == "__main__":
run_module_suite()