summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/_iotools.py39
-rw-r--r--numpy/lib/arraysetops.py7
-rw-r--r--numpy/lib/format.py68
-rw-r--r--numpy/lib/function_base.py54
-rw-r--r--numpy/lib/index_tricks.py2
-rw-r--r--numpy/lib/npyio.py133
-rw-r--r--numpy/lib/recfunctions.py4
-rw-r--r--numpy/lib/shape_base.py7
-rw-r--r--numpy/lib/stride_tricks.py165
-rw-r--r--numpy/lib/tests/data/py2-objarr.npybin0 -> 258 bytes
-rw-r--r--numpy/lib/tests/data/py2-objarr.npzbin0 -> 366 bytes
-rw-r--r--numpy/lib/tests/data/py3-objarr.npybin0 -> 341 bytes
-rw-r--r--numpy/lib/tests/data/py3-objarr.npzbin0 -> 449 bytes
-rw-r--r--numpy/lib/tests/test__iotools.py18
-rw-r--r--numpy/lib/tests/test_format.py127
-rw-r--r--numpy/lib/tests/test_function_base.py40
-rw-r--r--numpy/lib/tests/test_io.py77
-rw-r--r--numpy/lib/tests/test_shape_base.py6
-rw-r--r--numpy/lib/tests/test_stride_tricks.py123
-rw-r--r--numpy/lib/tests/test_type_check.py2
-rw-r--r--numpy/lib/type_check.py56
21 files changed, 739 insertions, 189 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 316704b42..44bd48df7 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -160,7 +160,7 @@ class LineSplitter(object):
delimiter : str, int, or sequence of ints, optional
If a string, character used to delimit consecutive fields.
If an integer or a sequence of integers, width(s) of each field.
- comment : str, optional
+ comments : str, optional
Character used to mark the beginning of a comment. Default is '#'.
autostrip : bool, optional
Whether to strip each individual field. Default is True.
@@ -271,7 +271,7 @@ class NameValidator(object):
deletechars : str, optional
A string combining invalid characters that must be deleted from the
names.
- casesensitive : {True, False, 'upper', 'lower'}, optional
+ case_sensitive : {True, False, 'upper', 'lower'}, optional
* If True, field names are case-sensitive.
* If False or 'upper', field names are converted to upper case.
* If 'lower', field names are converted to lower case.
@@ -341,7 +341,7 @@ class NameValidator(object):
defaultfmt : str, optional
Default format string, used if validating a given string
reduces its length to zero.
- nboutput : integer, optional
+ nbfields : integer, optional
Final number of validated names, used to expand or shrink the
initial list of names.
@@ -518,12 +518,18 @@ class StringConverter(object):
"""
#
_mapper = [(nx.bool_, str2bool, False),
- (nx.integer, int, -1),
- (nx.floating, float, nx.nan),
- (complex, _bytes_to_complex, nx.nan + 0j),
- (nx.string_, bytes, asbytes('???'))]
+ (nx.integer, int, -1)]
+
+ # On 32-bit systems, we need to make sure that we explicitly include
+ # nx.int64 since ns.integer is nx.int32.
+ if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize:
+ _mapper.append((nx.int64, int, -1))
+
+ _mapper.extend([(nx.floating, float, nx.nan),
+ (complex, _bytes_to_complex, nx.nan + 0j),
+ (nx.string_, bytes, asbytes('???'))])
+
(_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
- #
@classmethod
def _getdtype(cls, val):
@@ -677,7 +683,22 @@ class StringConverter(object):
def _strict_call(self, value):
try:
- return self.func(value)
+
+ # We check if we can convert the value using the current function
+ new_value = self.func(value)
+
+ # In addition to having to check whether func can convert the
+ # value, we also have to make sure that we don't get overflow
+ # errors for integers.
+ if self.func is int:
+ try:
+ np.array(value, dtype=self.type)
+ except OverflowError:
+ raise ValueError
+
+ # We're still here so we can now return the new value
+ return new_value
+
except ValueError:
if value.strip() in self.missing_values:
if not self._status:
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index cb24eb24e..7776d7e76 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -97,10 +97,11 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
"""
Find the unique elements of an array.
- Returns the sorted unique elements of an array. There are two optional
+ Returns the sorted unique elements of an array. There are three optional
outputs in addition to the unique elements: the indices of the input array
- that give the unique values, and the indices of the unique array that
- reconstruct the input array.
+ that give the unique values, the indices of the unique array that
+ reconstruct the input array, and the number of times each unique value
+ comes up in the input array.
Parameters
----------
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 4ff0a660f..66a1b356c 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -314,21 +314,19 @@ def _write_array_header(fp, d, version=None):
header = header + ' '*topad + '\n'
header = asbytes(_filter_header(header))
- if len(header) >= (256*256) and version == (1, 0):
- raise ValueError("header does not fit inside %s bytes required by the"
- " 1.0 format" % (256*256))
- if len(header) < (256*256):
- header_len_str = struct.pack('<H', len(header))
+ hlen = len(header)
+ if hlen < 256*256 and version in (None, (1, 0)):
version = (1, 0)
- elif len(header) < (2**32):
- header_len_str = struct.pack('<I', len(header))
+ header_prefix = magic(1, 0) + struct.pack('<H', hlen)
+ elif hlen < 2**32 and version in (None, (2, 0)):
version = (2, 0)
+ header_prefix = magic(2, 0) + struct.pack('<I', hlen)
else:
- raise ValueError("header does not fit inside 4 GiB required by "
- "the 2.0 format")
+ msg = "Header length %s too big for version=%s"
+ msg %= (hlen, version)
+ raise ValueError(msg)
- fp.write(magic(*version))
- fp.write(header_len_str)
+ fp.write(header_prefix)
fp.write(header)
return version
@@ -389,7 +387,7 @@ def read_array_header_1_0(fp):
If the data is invalid.
"""
- _read_array_header(fp, version=(1, 0))
+ return _read_array_header(fp, version=(1, 0))
def read_array_header_2_0(fp):
"""
@@ -422,7 +420,7 @@ def read_array_header_2_0(fp):
If the data is invalid.
"""
- _read_array_header(fp, version=(2, 0))
+ return _read_array_header(fp, version=(2, 0))
def _filter_header(s):
@@ -517,7 +515,7 @@ def _read_array_header(fp, version):
return d['shape'], d['fortran_order'], dtype
-def write_array(fp, array, version=None):
+def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None):
"""
Write an array to an NPY file, including a header.
@@ -535,11 +533,18 @@ def write_array(fp, array, version=None):
version : (int, int) or None, optional
The version number of the format. None means use the oldest
supported version that is able to store the data. Default: None
+ allow_pickle : bool, optional
+ Whether to allow writing pickled data. Default: True
+ pickle_kwargs : dict, optional
+ Additional keyword arguments to pass to pickle.dump, excluding
+ 'protocol'. These are only useful when pickling objects in object
+ arrays on Python 3 to Python 2 compatible format.
Raises
------
ValueError
- If the array cannot be persisted.
+ If the array cannot be persisted. This includes the case of
+ allow_pickle=False and array being an object array.
Various other errors
If the array contains Python objects as part of its dtype, the
process of pickling them may raise various errors if the objects
@@ -561,7 +566,12 @@ def write_array(fp, array, version=None):
# We contain Python objects so we cannot write out the data
# directly. Instead, we will pickle it out with version 2 of the
# pickle protocol.
- pickle.dump(array, fp, protocol=2)
+ if not allow_pickle:
+ raise ValueError("Object arrays cannot be saved when "
+ "allow_pickle=False")
+ if pickle_kwargs is None:
+ pickle_kwargs = {}
+ pickle.dump(array, fp, protocol=2, **pickle_kwargs)
elif array.flags.f_contiguous and not array.flags.c_contiguous:
if isfileobj(fp):
array.T.tofile(fp)
@@ -580,7 +590,7 @@ def write_array(fp, array, version=None):
fp.write(chunk.tobytes('C'))
-def read_array(fp):
+def read_array(fp, allow_pickle=True, pickle_kwargs=None):
"""
Read an array from an NPY file.
@@ -589,6 +599,12 @@ def read_array(fp):
fp : file_like object
If this is not a real file object, then this may take extra memory
and time.
+ allow_pickle : bool, optional
+ Whether to allow reading pickled data. Default: True
+ pickle_kwargs : dict
+ Additional keyword arguments to pass to pickle.load. These are only
+ useful when loading object arrays saved on Python 2 when using
+ Python 3.
Returns
-------
@@ -598,7 +614,8 @@ def read_array(fp):
Raises
------
ValueError
- If the data is invalid.
+ If the data is invalid, or allow_pickle=False and the file contains
+ an object array.
"""
version = read_magic(fp)
@@ -612,7 +629,20 @@ def read_array(fp):
# Now read the actual data.
if dtype.hasobject:
# The array contained Python objects. We need to unpickle the data.
- array = pickle.load(fp)
+ if not allow_pickle:
+ raise ValueError("Object arrays cannot be loaded when "
+ "allow_pickle=False")
+ if pickle_kwargs is None:
+ pickle_kwargs = {}
+ try:
+ array = pickle.load(fp, **pickle_kwargs)
+ except UnicodeError as err:
+ if sys.version_info[0] >= 3:
+ # Friendlier error message
+ raise UnicodeError("Unpickling a python object failed: %r\n"
+ "You may need to pass the encoding= option "
+ "to numpy.load" % (err,))
+ raise
else:
if isfileobj(fp):
# We can use the fast fromfile() function.
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 2baf83830..d22e8c047 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -906,9 +906,9 @@ def gradient(f, *varargs, **kwargs):
Returns
-------
- gradient : ndarray
- N arrays of the same shape as `f` giving the derivative of `f` with
- respect to each dimension.
+ gradient : list of ndarray
+ Each element of `list` has the same shape as `f` giving the derivative
+ of `f` with respect to each dimension.
Examples
--------
@@ -918,6 +918,10 @@ def gradient(f, *varargs, **kwargs):
>>> np.gradient(x, 2)
array([ 0.5 , 0.75, 1.25, 1.75, 2.25, 2.5 ])
+ For two dimensional arrays, the return will be two arrays ordered by
+ axis. In this example the first array stands for the gradient in
+ rows and the second one in columns direction:
+
>>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float))
[array([[ 2., 2., -1.],
[ 2., 2., -1.]]), array([[ 1. , 2.5, 4. ],
@@ -1949,54 +1953,59 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None):
return (dot(X, X.T.conj()) / fact).squeeze()
-def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None):
+def corrcoef(x, y=None, rowvar=1, bias=np._NoValue, ddof=np._NoValue):
"""
- Return correlation coefficients.
+ Return Pearson product-moment correlation coefficients.
Please refer to the documentation for `cov` for more detail. The
- relationship between the correlation coefficient matrix, `P`, and the
+ relationship between the correlation coefficient matrix, `R`, and the
covariance matrix, `C`, is
- .. math:: P_{ij} = \\frac{ C_{ij} } { \\sqrt{ C_{ii} * C_{jj} } }
+ .. math:: R_{ij} = \\frac{ C_{ij} } { \\sqrt{ C_{ii} * C_{jj} } }
- The values of `P` are between -1 and 1, inclusive.
+ The values of `R` are between -1 and 1, inclusive.
Parameters
----------
x : array_like
A 1-D or 2-D array containing multiple variables and observations.
- Each row of `m` represents a variable, and each column a single
+ Each row of `x` represents a variable, and each column a single
observation of all those variables. Also see `rowvar` below.
y : array_like, optional
An additional set of variables and observations. `y` has the same
- shape as `m`.
+ shape as `x`.
rowvar : int, optional
If `rowvar` is non-zero (default), then each row represents a
variable, with observations in the columns. Otherwise, the relationship
is transposed: each column represents a variable, while the rows
contain observations.
- bias : int, optional
- Default normalization is by ``(N - 1)``, where ``N`` is the number of
- observations (unbiased estimate). If `bias` is 1, then
- normalization is by ``N``. These values can be overridden by using
- the keyword ``ddof`` in numpy versions >= 1.5.
- ddof : int, optional
- .. versionadded:: 1.5
- If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
- the number of observations; this overrides the value implied by
- ``bias``. The default value is ``None``.
+ bias : _NoValue, optional
+ .. deprecated:: 1.10.0
+ Has no affect, do not use.
+ ddof : _NoValue, optional
+ .. deprecated:: 1.10.0
+ Has no affect, do not use.
Returns
-------
- out : ndarray
+ R : ndarray
The correlation coefficient matrix of the variables.
See Also
--------
cov : Covariance matrix
+ Notes
+ -----
+ This function accepts but discards arguments `bias` and `ddof`. This is
+ for backwards compatibility with previous versions of this function. These
+ arguments had no effect on the return values of the function and can be
+ safely ignored in this and previous versions of numpy.
"""
- c = cov(x, y, rowvar, bias, ddof)
+ if bias is not np._NoValue or ddof is not np._NoValue:
+ warnings.warn('bias and ddof have no affect and are deprecated',
+ DeprecationWarning)
+ c = cov(x, y, rowvar)
try:
d = diag(c)
except ValueError: # scalar covariance
@@ -3730,6 +3739,7 @@ def insert(arr, obj, values, axis=None):
[3, 5, 3]])
Difference between sequence and scalars:
+
>>> np.insert(a, [1], [[1],[2],[3]], axis=1)
array([[1, 1, 1],
[2, 2, 2],
diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py
index eb9aad6ad..e97338106 100644
--- a/numpy/lib/index_tricks.py
+++ b/numpy/lib/index_tricks.py
@@ -480,7 +480,7 @@ class ndenumerate(object):
Parameters
----------
- a : ndarray
+ arr : ndarray
Input array.
See Also
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 0632ba1f8..ec89397a0 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -164,6 +164,12 @@ class NpzFile(object):
f : BagObj instance
An object on which attribute can be performed as an alternative
to getitem access on the `NpzFile` instance itself.
+ allow_pickle : bool, optional
+ Allow loading pickled data. Default: True
+ pickle_kwargs : dict, optional
+ Additional keyword arguments to pass on to pickle.load.
+ These are only useful when loading object arrays saved on
+ Python 2 when using Python 3.
Parameters
----------
@@ -195,12 +201,15 @@ class NpzFile(object):
"""
- def __init__(self, fid, own_fid=False):
+ def __init__(self, fid, own_fid=False, allow_pickle=True,
+ pickle_kwargs=None):
# Import is postponed to here since zipfile depends on gzip, an
# optional component of the so-called standard library.
_zip = zipfile_factory(fid)
self._files = _zip.namelist()
self.files = []
+ self.allow_pickle = allow_pickle
+ self.pickle_kwargs = pickle_kwargs
for x in self._files:
if x.endswith('.npy'):
self.files.append(x[:-4])
@@ -256,7 +265,9 @@ class NpzFile(object):
bytes.close()
if magic == format.MAGIC_PREFIX:
bytes = self.zip.open(key)
- return format.read_array(bytes)
+ return format.read_array(bytes,
+ allow_pickle=self.allow_pickle,
+ pickle_kwargs=self.pickle_kwargs)
else:
return self.zip.read(key)
else:
@@ -289,7 +300,8 @@ class NpzFile(object):
return self.files.__contains__(key)
-def load(file, mmap_mode=None):
+def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
+ encoding='ASCII'):
"""
Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
@@ -306,6 +318,23 @@ def load(file, mmap_mode=None):
and sliced like any ndarray. Memory mapping is especially useful
for accessing small fragments of large files without reading the
entire file into memory.
+ allow_pickle : bool, optional
+ Allow loading pickled object arrays stored in npy files. Reasons for
+ disallowing pickles include security, as loading pickled data can
+ execute arbitrary code. If pickles are disallowed, loading object
+ arrays will fail.
+ Default: True
+ fix_imports : bool, optional
+ Only useful when loading Python 2 generated pickled files on Python 3,
+ which includes npy/npz files containing object arrays. If `fix_imports`
+ is True, pickle will try to map the old Python 2 names to the new names
+ used in Python 3.
+ encoding : str, optional
+ What encoding to use when reading Python 2 strings. Only useful when
+ loading Python 2 generated pickled files on Python 3, which includes
+ npy/npz files containing object arrays. Values other than 'latin1',
+ 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical
+ data. Default: 'ASCII'
Returns
-------
@@ -317,6 +346,8 @@ def load(file, mmap_mode=None):
------
IOError
If the input file does not exist or cannot be read.
+ ValueError
+ The file contains an object array, but allow_pickle=False given.
See Also
--------
@@ -381,6 +412,26 @@ def load(file, mmap_mode=None):
else:
fid = file
+ if encoding not in ('ASCII', 'latin1', 'bytes'):
+ # The 'encoding' value for pickle also affects what encoding
+ # the serialized binary data of Numpy arrays is loaded
+ # in. Pickle does not pass on the encoding information to
+ # Numpy. The unpickling code in numpy.core.multiarray is
+ # written to assume that unicode data appearing where binary
+ # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'.
+ #
+ # Other encoding values can corrupt binary data, and we
+ # purposefully disallow them. For the same reason, the errors=
+ # argument is not exposed, as values other than 'strict'
+ # result can similarly silently corrupt numerical data.
+ raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'")
+
+ if sys.version_info[0] >= 3:
+ pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports)
+ else:
+ # Nothing to do on Python 2
+ pickle_kwargs = {}
+
try:
# Code to distinguish from NumPy binary files and pickles.
_ZIP_PREFIX = asbytes('PK\x03\x04')
@@ -392,17 +443,22 @@ def load(file, mmap_mode=None):
# Transfer file ownership to NpzFile
tmp = own_fid
own_fid = False
- return NpzFile(fid, own_fid=tmp)
+ return NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle,
+ pickle_kwargs=pickle_kwargs)
elif magic == format.MAGIC_PREFIX:
# .npy file
if mmap_mode:
return format.open_memmap(file, mode=mmap_mode)
else:
- return format.read_array(fid)
+ return format.read_array(fid, allow_pickle=allow_pickle,
+ pickle_kwargs=pickle_kwargs)
else:
# Try a pickle
+ if not allow_pickle:
+ raise ValueError("allow_pickle=False, but file does not contain "
+ "non-pickled data")
try:
- return pickle.load(fid)
+ return pickle.load(fid, **pickle_kwargs)
except:
raise IOError(
"Failed to interpret file %s as a pickle" % repr(file))
@@ -411,7 +467,7 @@ def load(file, mmap_mode=None):
fid.close()
-def save(file, arr):
+def save(file, arr, allow_pickle=True, fix_imports=True):
"""
Save an array to a binary file in NumPy ``.npy`` format.
@@ -422,6 +478,19 @@ def save(file, arr):
then the filename is unchanged. If file is a string, a ``.npy``
extension will be appended to the file name if it does not already
have one.
+ allow_pickle : bool, optional
+ Allow saving object arrays using Python pickles. Reasons for disallowing
+ pickles include security (loading pickled data can execute arbitrary
+ code) and portability (pickled objects may not be loadable on different
+ Python installations, for example if the stored objects require libraries
+ that are not available, and not all pickled data is compatible between
+ Python 2 and Python 3).
+ Default: True
+ fix_imports : bool, optional
+ Only useful in forcing objects in object arrays on Python 3 to be
+ pickled in a Python 2 compatible way. If `fix_imports` is True, pickle
+ will try to map the new Python 3 names to the old module names used in
+ Python 2, so that the pickle data stream is readable with Python 2.
arr : array_like
Array data to be saved.
@@ -458,9 +527,16 @@ def save(file, arr):
else:
fid = file
+ if sys.version_info[0] >= 3:
+ pickle_kwargs = dict(fix_imports=fix_imports)
+ else:
+ # Nothing to do on Python 2
+ pickle_kwargs = None
+
try:
arr = np.asanyarray(arr)
- format.write_array(fid, arr)
+ format.write_array(fid, arr, allow_pickle=allow_pickle,
+ pickle_kwargs=pickle_kwargs)
finally:
if own_fid:
fid.close()
@@ -572,7 +648,7 @@ def savez_compressed(file, *args, **kwds):
_savez(file, args, kwds, True)
-def _savez(file, args, kwds, compress):
+def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
# Import is postponed to here since zipfile depends on gzip, an optional
# component of the so-called standard library.
import zipfile
@@ -606,7 +682,9 @@ def _savez(file, args, kwds, compress):
fname = key + '.npy'
fid = open(tmpfile, 'wb')
try:
- format.write_array(fid, np.asanyarray(val))
+ format.write_array(fid, np.asanyarray(val),
+ allow_pickle=allow_pickle,
+ pickle_kwargs=pickle_kwargs)
fid.close()
fid = None
zipf.write(tmpfile, arcname=fname)
@@ -640,7 +718,7 @@ def _getconv(dtype):
elif issubclass(typ, np.floating):
return floatconv
elif issubclass(typ, np.complex):
- return complex
+ return lambda x: complex(asstr(x))
elif issubclass(typ, np.bytes_):
return bytes
else:
@@ -667,8 +745,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
each row will be interpreted as an element of the array. In this
case, the number of columns used must match the number of fields in
the data-type.
- comments : str, optional
- The character used to indicate the start of a comment;
+ comments : str or sequence, optional
+ The characters or list of characters used to indicate the start of a
+ comment;
default: '#'.
delimiter : str, optional
The string used to separate values. By default, this is any
@@ -741,7 +820,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
"""
# Type conversions for Py3 convenience
if comments is not None:
- comments = asbytes(comments)
+ if isinstance(comments, (basestring, bytes)):
+ comments = [asbytes(comments)]
+ else:
+ comments = [asbytes(comment) for comment in comments]
+
+ # Compile regex for comments beforehand
+ comments = (re.escape(comment) for comment in comments)
+ regex_comments = re.compile(asbytes('|').join(comments))
user_converters = converters
if delimiter is not None:
delimiter = asbytes(delimiter)
@@ -813,11 +899,16 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
return tuple(ret)
def split_line(line):
- """Chop off comments, strip, and split at delimiter."""
- if comments is None:
- line = asbytes(line).strip(asbytes('\r\n'))
- else:
- line = asbytes(line).split(comments)[0].strip(asbytes('\r\n'))
+ """Chop off comments, strip, and split at delimiter.
+
+ Note that although the file is opened as text, this function
+ returns bytes.
+
+ """
+ line = asbytes(line)
+ if comments is not None:
+ line = regex_comments.split(asbytes(line), maxsplit=1)[0]
+ line = line.strip(asbytes('\r\n'))
if line:
return line.split(delimiter)
else:
@@ -1240,8 +1331,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
The string used to separate values. By default, any consecutive
whitespaces act as delimiter. An integer or sequence of integers
can also be provided as width(s) of each field.
- skip_rows : int, optional
- `skip_rows` was deprecated in numpy 1.5, and will be removed in
+ skiprows : int, optional
+ `skiprows` was deprecated in numpy 1.5, and will be removed in
numpy 2.0. Please use `skip_header` instead.
skip_header : int, optional
The number of lines to skip at the beginning of the file.
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index a61b1749b..4ae1079d2 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -268,7 +268,7 @@ def izip_records(seqarrays, fill_value=None, flatten=True):
Parameters
----------
- seqarray : sequence of arrays
+ seqarrays : sequence of arrays
Sequence of arrays.
fill_value : {None, integer}
Value used to pad shorter iterables.
@@ -683,7 +683,7 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False,
Parameters
----------
- seqarrays : array or sequence
+ arrays : array or sequence
Sequence of input arrays.
defaults : dictionary, optional
Dictionary mapping field names to the corresponding default values.
diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py
index 2d18c5bc8..011434dda 100644
--- a/numpy/lib/shape_base.py
+++ b/numpy/lib/shape_base.py
@@ -850,7 +850,12 @@ def tile(A, reps):
except TypeError:
tup = (reps,)
d = len(tup)
- c = _nx.array(A, copy=False, subok=True, ndmin=d)
+ if all(x == 1 for x in tup) and isinstance(A, _nx.ndarray):
+ # Fixes the problem that the function does not make a copy if A is a
+ # numpy array and the repetitions are 1 in all dimensions
+ return _nx.array(A, copy=True, subok=True, ndmin=d)
+ else:
+ c = _nx.array(A, copy=False, subok=True, ndmin=d)
shape = list(c.shape)
n = max(c.size, 1)
if (d < c.ndim):
diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py
index b81307a65..e7649cb60 100644
--- a/numpy/lib/stride_tricks.py
+++ b/numpy/lib/stride_tricks.py
@@ -9,7 +9,8 @@ from __future__ import division, absolute_import, print_function
import numpy as np
-__all__ = ['broadcast_arrays']
+__all__ = ['broadcast_to', 'broadcast_arrays']
+
class DummyArray(object):
"""Dummy object that just exists to hang __array_interface__ dictionaries
@@ -20,6 +21,20 @@ class DummyArray(object):
self.__array_interface__ = interface
self.base = base
+
+def _maybe_view_as_subclass(original_array, new_array):
+ if type(original_array) is not type(new_array):
+ # if input was an ndarray subclass and subclasses were OK,
+ # then view the result as that subclass.
+ new_array = new_array.view(type=type(original_array))
+ # Since we have done something akin to a view from original_array, we
+ # should let the subclass finalize (if it has it implemented, i.e., is
+ # not None).
+ if new_array.__array_finalize__:
+ new_array.__array_finalize__(original_array)
+ return new_array
+
+
def as_strided(x, shape=None, strides=None, subok=False):
""" Make an ndarray from the given array with the given shape and strides.
"""
@@ -31,18 +46,85 @@ def as_strided(x, shape=None, strides=None, subok=False):
if strides is not None:
interface['strides'] = tuple(strides)
array = np.asarray(DummyArray(interface, base=x))
- # Make sure dtype is correct in case of custom dtype
- if array.dtype.kind == 'V':
+
+ if array.dtype.fields is None and x.dtype.fields is not None:
+ # This should only happen if x.dtype is [('', 'Vx')]
array.dtype = x.dtype
- if type(x) is not type(array):
- # if input was an ndarray subclass and subclasses were OK,
- # then view the result as that subclass.
- array = array.view(type=type(x))
- # Since we have done something akin to a view from x, we should let
- # the subclass finalize (if it has it implemented, i.e., is not None).
- if array.__array_finalize__:
- array.__array_finalize__(x)
- return array
+
+ return _maybe_view_as_subclass(x, array)
+
+
+def _broadcast_to(array, shape, subok, readonly):
+ shape = tuple(shape) if np.iterable(shape) else (shape,)
+ array = np.array(array, copy=False, subok=subok)
+ if not shape and array.shape:
+ raise ValueError('cannot broadcast a non-scalar to a scalar array')
+ if any(size < 0 for size in shape):
+ raise ValueError('all elements of broadcast shape must be non-'
+ 'negative')
+ broadcast = np.nditer(
+ (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'],
+ op_flags=['readonly'], itershape=shape, order='C').itviews[0]
+ result = _maybe_view_as_subclass(array, broadcast)
+ if not readonly and array.flags.writeable:
+ result.flags.writeable = True
+ return result
+
+
+def broadcast_to(array, shape, subok=False):
+ """Broadcast an array to a new shape.
+
+ Parameters
+ ----------
+ array : array_like
+ The array to broadcast.
+ shape : tuple
+ The shape of the desired array.
+ subok : bool, optional
+ If True, then sub-classes will be passed-through, otherwise
+ the returned array will be forced to be a base-class array (default).
+
+ Returns
+ -------
+ broadcast : array
+ A readonly view on the original array with the given shape. It is
+ typically not contiguous. Furthermore, more than one element of a
+ broadcasted array may refer to a single memory location.
+
+ Raises
+ ------
+ ValueError
+ If the array is not compatible with the new shape according to NumPy's
+ broadcasting rules.
+
+ Examples
+ --------
+ >>> x = np.array([1, 2, 3])
+ >>> np.broadcast_to(x, (3, 3))
+ array([[1, 2, 3],
+ [1, 2, 3],
+ [1, 2, 3]])
+ """
+ return _broadcast_to(array, shape, subok=subok, readonly=True)
+
+
+def _broadcast_shape(*args):
+ """Returns the shape of the ararys that would result from broadcasting the
+ supplied arrays against each other.
+ """
+ if not args:
+ raise ValueError('must provide at least one argument')
+ if len(args) == 1:
+ # a single argument does not work with np.broadcast
+ return np.asarray(args[0]).shape
+ # use the old-iterator because np.nditer does not handle size 0 arrays
+ # consistently
+ b = np.broadcast(*args[:32])
+ # unfortunately, it cannot handle 32 or more arguments directly
+ for pos in range(32, len(args), 31):
+ b = np.broadcast(b, *args[pos:(pos + 31)])
+ return b.shape
+
def broadcast_arrays(*args, **kwargs):
"""
@@ -87,55 +169,24 @@ def broadcast_arrays(*args, **kwargs):
[3, 3, 3]])]
"""
+ # nditer is not used here to avoid the limit of 32 arrays.
+ # Otherwise, something like the following one-liner would suffice:
+ # return np.nditer(args, flags=['multi_index', 'zerosize_ok'],
+ # order='C').itviews
+
subok = kwargs.pop('subok', False)
if kwargs:
raise TypeError('broadcast_arrays() got an unexpected keyword '
'argument {}'.format(kwargs.pop()))
args = [np.array(_m, copy=False, subok=subok) for _m in args]
- shapes = [x.shape for x in args]
- if len(set(shapes)) == 1:
+
+ shape = _broadcast_shape(*args)
+
+ if all(array.shape == shape for array in args):
# Common case where nothing needs to be broadcasted.
return args
- shapes = [list(s) for s in shapes]
- strides = [list(x.strides) for x in args]
- nds = [len(s) for s in shapes]
- biggest = max(nds)
- # Go through each array and prepend dimensions of length 1 to each of
- # the shapes in order to make the number of dimensions equal.
- for i in range(len(args)):
- diff = biggest - nds[i]
- if diff > 0:
- shapes[i] = [1] * diff + shapes[i]
- strides[i] = [0] * diff + strides[i]
- # Chech each dimension for compatibility. A dimension length of 1 is
- # accepted as compatible with any other length.
- common_shape = []
- for axis in range(biggest):
- lengths = [s[axis] for s in shapes]
- unique = set(lengths + [1])
- if len(unique) > 2:
- # There must be at least two non-1 lengths for this axis.
- raise ValueError("shape mismatch: two or more arrays have "
- "incompatible dimensions on axis %r." % (axis,))
- elif len(unique) == 2:
- # There is exactly one non-1 length. The common shape will take
- # this value.
- unique.remove(1)
- new_length = unique.pop()
- common_shape.append(new_length)
- # For each array, if this axis is being broadcasted from a
- # length of 1, then set its stride to 0 so that it repeats its
- # data.
- for i in range(len(args)):
- if shapes[i][axis] == 1:
- shapes[i][axis] = new_length
- strides[i][axis] = 0
- else:
- # Every array has a length of 1 on this axis. Strides can be
- # left alone as nothing is broadcasted.
- common_shape.append(1)
-
- # Construct the new arrays.
- broadcasted = [as_strided(x, shape=sh, strides=st, subok=subok)
- for (x, sh, st) in zip(args, shapes, strides)]
- return broadcasted
+
+ # TODO: consider making the results of broadcast_arrays readonly to match
+ # broadcast_to. This will require a deprecation cycle.
+ return [_broadcast_to(array, shape, subok=subok, readonly=False)
+ for array in args]
diff --git a/numpy/lib/tests/data/py2-objarr.npy b/numpy/lib/tests/data/py2-objarr.npy
new file mode 100644
index 000000000..12936c92d
--- /dev/null
+++ b/numpy/lib/tests/data/py2-objarr.npy
Binary files differ
diff --git a/numpy/lib/tests/data/py2-objarr.npz b/numpy/lib/tests/data/py2-objarr.npz
new file mode 100644
index 000000000..68a3b53a1
--- /dev/null
+++ b/numpy/lib/tests/data/py2-objarr.npz
Binary files differ
diff --git a/numpy/lib/tests/data/py3-objarr.npy b/numpy/lib/tests/data/py3-objarr.npy
new file mode 100644
index 000000000..6776074b4
--- /dev/null
+++ b/numpy/lib/tests/data/py3-objarr.npy
Binary files differ
diff --git a/numpy/lib/tests/data/py3-objarr.npz b/numpy/lib/tests/data/py3-objarr.npz
new file mode 100644
index 000000000..05eac0b76
--- /dev/null
+++ b/numpy/lib/tests/data/py3-objarr.npz
Binary files differ
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
index 060f815d5..e0a917a21 100644
--- a/numpy/lib/tests/test__iotools.py
+++ b/numpy/lib/tests/test__iotools.py
@@ -152,17 +152,31 @@ class TestStringConverter(TestCase):
def test_upgrade(self):
"Tests the upgrade method."
+
converter = StringConverter()
assert_equal(converter._status, 0)
+
# test int
assert_equal(converter.upgrade(asbytes('0')), 0)
assert_equal(converter._status, 1)
+
+ # On systems where integer defaults to 32-bit, the statuses will be
+ # offset by one, so we check for this here.
+ import numpy.core.numeric as nx
+ status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize)
+
+ # test int > 2**32
+ assert_equal(converter.upgrade(asbytes('17179869184')), 17179869184)
+ assert_equal(converter._status, 1 + status_offset)
+
# test float
assert_allclose(converter.upgrade(asbytes('0.')), 0.0)
- assert_equal(converter._status, 2)
+ assert_equal(converter._status, 2 + status_offset)
+
# test complex
assert_equal(converter.upgrade(asbytes('0j')), complex('0j'))
- assert_equal(converter._status, 3)
+ assert_equal(converter._status, 3 + status_offset)
+
# test str
assert_equal(converter.upgrade(asbytes('a')), asbytes('a'))
assert_equal(converter._status, len(converter._mapper) - 1)
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index ee77386bc..4f8a65148 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -284,7 +284,7 @@ import warnings
from io import BytesIO
import numpy as np
-from numpy.compat import asbytes, asbytes_nested
+from numpy.compat import asbytes, asbytes_nested, sixu
from numpy.testing import (
run_module_suite, assert_, assert_array_equal, assert_raises, raises,
dec
@@ -534,6 +534,87 @@ def test_python2_python3_interoperability():
assert_array_equal(data, np.ones(2))
+def test_pickle_python2_python3():
+ # Test that loading object arrays saved on Python 2 works both on
+ # Python 2 and Python 3 and vice versa
+ data_dir = os.path.join(os.path.dirname(__file__), 'data')
+
+ if sys.version_info[0] >= 3:
+ xrange = range
+ else:
+ import __builtin__
+ xrange = __builtin__.xrange
+
+ expected = np.array([None, xrange, sixu('\u512a\u826f'),
+ asbytes('\xe4\xb8\x8d\xe8\x89\xaf')],
+ dtype=object)
+
+ for fname in ['py2-objarr.npy', 'py2-objarr.npz',
+ 'py3-objarr.npy', 'py3-objarr.npz']:
+ path = os.path.join(data_dir, fname)
+
+ if (fname.endswith('.npz') and sys.version_info[0] == 2 and
+ sys.version_info[1] < 7):
+ # Reading object arrays directly from zipfile appears to fail
+ # on Py2.6, see cfae0143b4
+ continue
+
+ for encoding in ['bytes', 'latin1']:
+ if (sys.version_info[0] >= 3 and sys.version_info[1] < 4 and
+ encoding == 'bytes'):
+ # The bytes encoding is available starting from Python 3.4
+ continue
+
+ data_f = np.load(path, encoding=encoding)
+ if fname.endswith('.npz'):
+ data = data_f['x']
+ data_f.close()
+ else:
+ data = data_f
+
+ if sys.version_info[0] >= 3:
+ if encoding == 'latin1' and fname.startswith('py2'):
+ assert_(isinstance(data[3], str))
+ assert_array_equal(data[:-1], expected[:-1])
+ # mojibake occurs
+ assert_array_equal(data[-1].encode(encoding), expected[-1])
+ else:
+ assert_(isinstance(data[3], bytes))
+ assert_array_equal(data, expected)
+ else:
+ assert_array_equal(data, expected)
+
+ if sys.version_info[0] >= 3:
+ if fname.startswith('py2'):
+ if fname.endswith('.npz'):
+ data = np.load(path)
+ assert_raises(UnicodeError, data.__getitem__, 'x')
+ data.close()
+ data = np.load(path, fix_imports=False, encoding='latin1')
+ assert_raises(ImportError, data.__getitem__, 'x')
+ data.close()
+ else:
+ assert_raises(UnicodeError, np.load, path)
+ assert_raises(ImportError, np.load, path,
+ encoding='latin1', fix_imports=False)
+
+
+def test_pickle_disallow():
+ data_dir = os.path.join(os.path.dirname(__file__), 'data')
+
+ path = os.path.join(data_dir, 'py2-objarr.npy')
+ assert_raises(ValueError, np.load, path,
+ allow_pickle=False, encoding='latin1')
+
+ path = os.path.join(data_dir, 'py2-objarr.npz')
+ f = np.load(path, allow_pickle=False, encoding='latin1')
+ assert_raises(ValueError, f.__getitem__, 'x')
+
+ path = os.path.join(tempdir, 'pickle-disabled.npy')
+ assert_raises(ValueError, np.save, path, np.array([None], dtype=object),
+ allow_pickle=False)
+
+
def test_version_2_0():
f = BytesIO()
# requires more than 2 byte for header
@@ -629,6 +710,26 @@ malformed_magic = asbytes_nested([
'',
])
+def test_read_magic():
+ s1 = BytesIO()
+ s2 = BytesIO()
+
+ arr = np.ones((3, 6), dtype=float)
+
+ format.write_array(s1, arr, version=(1, 0))
+ format.write_array(s2, arr, version=(2, 0))
+
+ s1.seek(0)
+ s2.seek(0)
+
+ version1 = format.read_magic(s1)
+ version2 = format.read_magic(s2)
+
+ assert_(version1 == (1, 0))
+ assert_(version2 == (2, 0))
+
+ assert_(s1.tell() == format.MAGIC_LEN)
+ assert_(s2.tell() == format.MAGIC_LEN)
def test_read_magic_bad_magic():
for magic in malformed_magic:
@@ -659,6 +760,30 @@ def test_large_header():
assert_raises(ValueError, format.write_array_header_1_0, s, d)
+def test_read_array_header_1_0():
+ s = BytesIO()
+
+ arr = np.ones((3, 6), dtype=float)
+ format.write_array(s, arr, version=(1, 0))
+
+ s.seek(format.MAGIC_LEN)
+ shape, fortran, dtype = format.read_array_header_1_0(s)
+
+ assert_((shape, fortran, dtype) == ((3, 6), False, float))
+
+
+def test_read_array_header_2_0():
+ s = BytesIO()
+
+ arr = np.ones((3, 6), dtype=float)
+ format.write_array(s, arr, version=(2, 0))
+
+ s.seek(format.MAGIC_LEN)
+ shape, fortran, dtype = format.read_array_header_2_0(s)
+
+ assert_((shape, fortran, dtype) == ((3, 6), False, float))
+
+
def test_bad_header():
# header of length less than 2 should fail
s = BytesIO()
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 03521ca4c..cf9fcf5e2 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -8,8 +8,9 @@ from numpy.testing import (
run_module_suite, TestCase, assert_, assert_equal, assert_array_equal,
assert_almost_equal, assert_array_almost_equal, assert_raises,
assert_allclose, assert_array_max_ulp, assert_warns,
- assert_raises_regex, dec
+ assert_raises_regex, dec, clear_and_catch_warnings
)
+import numpy.lib.function_base as nfb
from numpy.random import rand
from numpy.lib import *
from numpy.compat import long
@@ -668,7 +669,7 @@ class TestVectorize(TestCase):
args = np.array([0, 0.5*np.pi, np.pi, 1.5*np.pi, 2*np.pi])
r1 = f(args)
r2 = np.cos(args)
- assert_array_equal(r1, r2)
+ assert_array_almost_equal(r1, r2)
def test_keywords(self):
import math
@@ -1305,6 +1306,12 @@ class TestCheckFinite(TestCase):
assert_(a.dtype == np.float64)
+class catch_warn_nfb(clear_and_catch_warnings):
+ """ Context manager to catch, reset warnings in function_base module
+ """
+ class_modules = (nfb,)
+
+
class TestCorrCoef(TestCase):
A = np.array(
[[0.15391142, 0.18045767, 0.14197213],
@@ -1335,8 +1342,26 @@ class TestCorrCoef(TestCase):
assert_almost_equal(corrcoef(self.A, self.B), self.res2)
def test_ddof(self):
- assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1)
- assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2)
+ # ddof raises DeprecationWarning
+ with catch_warn_nfb():
+ warnings.simplefilter("always")
+ assert_warns(DeprecationWarning, corrcoef, self.A, ddof=-1)
+ warnings.simplefilter("ignore")
+ # ddof has no or negligible effect on the function
+ assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1)
+ assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2)
+ assert_almost_equal(corrcoef(self.A, ddof=3), self.res1)
+ assert_almost_equal(corrcoef(self.A, self.B, ddof=3), self.res2)
+
+ def test_bias(self):
+ # bias raises DeprecationWarning
+ with catch_warn_nfb():
+ warnings.simplefilter("always")
+ assert_warns(DeprecationWarning, corrcoef, self.A, self.B, 1, 0)
+ assert_warns(DeprecationWarning, corrcoef, self.A, bias=0)
+ warnings.simplefilter("ignore")
+ # bias has no or negligible effect on the function
+ assert_almost_equal(corrcoef(self.A, bias=1), self.res1)
def test_complex(self):
x = np.array([[1, 2, 3], [1j, 2j, 3j]])
@@ -1356,13 +1381,6 @@ class TestCorrCoef(TestCase):
assert_array_equal(corrcoef(np.array([]).reshape(2, 0)),
np.array([[np.nan, np.nan], [np.nan, np.nan]]))
- def test_wrong_ddof(self):
- x = np.array([[0, 2], [1, 1], [2, 0]]).T
- with warnings.catch_warnings(record=True):
- warnings.simplefilter('always', RuntimeWarning)
- assert_array_equal(corrcoef(x, ddof=5),
- np.array([[np.nan, np.nan], [np.nan, np.nan]]))
-
class TestCov(TestCase):
def test_basic(self):
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 7054ab1fe..8a939f85e 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -15,10 +15,10 @@ import numpy as np
import numpy.ma as ma
from numpy.lib._iotools import (ConverterError, ConverterLockError,
ConversionWarning)
-from numpy.compat import asbytes, asbytes_nested, bytes, asstr
+from numpy.compat import asbytes, bytes, unicode
from nose import SkipTest
from numpy.ma.testutils import (
- TestCase, assert_equal, assert_array_equal,
+ TestCase, assert_equal, assert_array_equal, assert_allclose,
assert_raises, assert_raises_regex, run_module_suite
)
from numpy.testing import assert_warns, assert_, build_err_msg
@@ -216,7 +216,7 @@ class TestSavezLoad(RoundtripTest, TestCase):
l = np.load(c)
assert_equal(a, l['file_a'])
assert_equal(b, l['file_b'])
-
+
def test_BagObj(self):
a = np.array([[1, 2], [3, 4]], float)
b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
@@ -553,15 +553,49 @@ class TestLoadTxt(TestCase):
a = np.array([[2, -999], [7, 9]], int)
assert_array_equal(x, a)
- def test_comments(self):
+ def test_comments_unicode(self):
+ c = TextIO()
+ c.write('# comment\n1,2,3,5\n')
+ c.seek(0)
+ x = np.loadtxt(c, dtype=int, delimiter=',',
+ comments=unicode('#'))
+ a = np.array([1, 2, 3, 5], int)
+ assert_array_equal(x, a)
+
+ def test_comments_byte(self):
c = TextIO()
c.write('# comment\n1,2,3,5\n')
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',',
- comments='#')
+ comments=b'#')
+ a = np.array([1, 2, 3, 5], int)
+ assert_array_equal(x, a)
+
+ def test_comments_multiple(self):
+ c = TextIO()
+ c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3')
+ c.seek(0)
+ x = np.loadtxt(c, dtype=int, delimiter=',',
+ comments=['#', '@', '//'])
+ a = np.array([[1, 2, 3], [4, 5, 6]], int)
+ assert_array_equal(x, a)
+
+ def test_comments_multi_chars(self):
+ c = TextIO()
+ c.write('/* comment\n1,2,3,5\n')
+ c.seek(0)
+ x = np.loadtxt(c, dtype=int, delimiter=',',
+ comments='/*')
a = np.array([1, 2, 3, 5], int)
assert_array_equal(x, a)
+ # Check that '/*' is not transformed to ['/', '*']
+ c = TextIO()
+ c.write('*/ comment\n1,2,3,5\n')
+ c.seek(0)
+ assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',',
+ comments='/*')
+
def test_skiprows(self):
c = TextIO()
c.write('comment\n1,2,3,5\n')
@@ -707,6 +741,14 @@ class TestLoadTxt(TestCase):
res = np.loadtxt(c, dtype=dt)
assert_equal(res, tgt, err_msg="%s" % dt)
+ def test_from_complex(self):
+ tgt = (complex(1, 1), complex(1, -1))
+ c = TextIO()
+ c.write("%s %s" % tgt)
+ c.seek(0)
+ res = np.loadtxt(c, dtype=np.complex)
+ assert_equal(res, tgt)
+
def test_universal_newline(self):
f, name = mkstemp()
os.write(f, b'1 21\r3 42\r')
@@ -1762,6 +1804,31 @@ M 33 21.99
res = np.genfromtxt(count())
assert_array_equal(res, np.arange(10))
+ def test_auto_dtype_largeint(self):
+ """
+ Regression test for numpy/numpy#5635 whereby large integers could
+ cause OverflowErrors.
+ """
+ "Test the automatic definition of the output dtype"
+
+ # 2**66 = 73786976294838206464 => should convert to float
+ # 2**34 = 17179869184 => should convert to int64
+ # 2**10 = 1024 => should convert to int (int32 on 32-bit systems,
+ # int64 on 64-bit systems)
+
+ data = TextIO('73786976294838206464 17179869184 1024')
+
+ test = np.ndfromtxt(data, dtype=None)
+
+ assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
+
+ assert test.dtype['f0'] == np.float
+ assert test.dtype['f1'] == np.int64
+ assert test.dtype['f2'] == np.integer
+
+ assert_allclose(test['f0'], 73786976294838206464.)
+ assert_equal(test['f1'], 17179869184)
+ assert_equal(test['f2'], 1024)
def test_gzip_load():
a = np.random.random((5, 5))
diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py
index 23f3edfbe..fb9d7f364 100644
--- a/numpy/lib/tests/test_shape_base.py
+++ b/numpy/lib/tests/test_shape_base.py
@@ -324,6 +324,12 @@ class TestTile(TestCase):
assert_equal(tile(b, (2, 2)), [[1, 2, 1, 2], [3, 4, 3, 4],
[1, 2, 1, 2], [3, 4, 3, 4]])
+ def test_tile_one_repetition_on_array_gh4679(self):
+ a = np.arange(5)
+ b = tile(a, 1)
+ b += 2
+ assert_equal(a, np.arange(5))
+
def test_empty(self):
a = np.array([[[]]])
d = tile(a, (3, 2, 5)).shape
diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py
index bc7e30ca4..e079e0bf4 100644
--- a/numpy/lib/tests/test_stride_tricks.py
+++ b/numpy/lib/tests/test_stride_tricks.py
@@ -5,8 +5,9 @@ from numpy.testing import (
run_module_suite, assert_equal, assert_array_equal,
assert_raises, assert_
)
-from numpy.lib.stride_tricks import as_strided, broadcast_arrays
-
+from numpy.lib.stride_tricks import (
+ as_strided, broadcast_arrays, _broadcast_shape, broadcast_to
+)
def assert_shapes_correct(input_shapes, expected_shape):
# Broadcast a list of arrays with the given input shapes and check the
@@ -217,6 +218,62 @@ def test_same_as_ufunc():
assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True)
assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True)
+
+def test_broadcast_to_succeeds():
+ data = [
+ [np.array(0), (0,), np.array(0)],
+ [np.array(0), (1,), np.zeros(1)],
+ [np.array(0), (3,), np.zeros(3)],
+ [np.ones(1), (1,), np.ones(1)],
+ [np.ones(1), (2,), np.ones(2)],
+ [np.ones(1), (1, 2, 3), np.ones((1, 2, 3))],
+ [np.arange(3), (3,), np.arange(3)],
+ [np.arange(3), (1, 3), np.arange(3).reshape(1, -1)],
+ [np.arange(3), (2, 3), np.array([[0, 1, 2], [0, 1, 2]])],
+ # test if shape is not a tuple
+ [np.ones(0), 0, np.ones(0)],
+ [np.ones(1), 1, np.ones(1)],
+ [np.ones(1), 2, np.ones(2)],
+ # these cases with size 0 are strange, but they reproduce the behavior
+ # of broadcasting with ufuncs (see test_same_as_ufunc above)
+ [np.ones(1), (0,), np.ones(0)],
+ [np.ones((1, 2)), (0, 2), np.ones((0, 2))],
+ [np.ones((2, 1)), (2, 0), np.ones((2, 0))],
+ ]
+ for input_array, shape, expected in data:
+ actual = broadcast_to(input_array, shape)
+ assert_array_equal(expected, actual)
+
+
+def test_broadcast_to_raises():
+ data = [
+ [(0,), ()],
+ [(1,), ()],
+ [(3,), ()],
+ [(3,), (1,)],
+ [(3,), (2,)],
+ [(3,), (4,)],
+ [(1, 2), (2, 1)],
+ [(1, 1), (1,)],
+ [(1,), -1],
+ [(1,), (-1,)],
+ [(1, 2), (-1, 2)],
+ ]
+ for orig_shape, target_shape in data:
+ arr = np.zeros(orig_shape)
+ assert_raises(ValueError, lambda: broadcast_to(arr, target_shape))
+
+
+def test_broadcast_shape():
+ # broadcast_shape is already exercized indirectly by broadcast_arrays
+ assert_raises(ValueError, _broadcast_shape)
+ assert_equal(_broadcast_shape([1, 2]), (2,))
+ assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1))
+ assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4))
+ assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2))
+ assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2))
+
+
def test_as_strided():
a = np.array([None])
a_view = as_strided(a)
@@ -233,6 +290,29 @@ def test_as_strided():
expected = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
assert_array_equal(a_view, expected)
+ # Regression test for gh-5081
+ dt = np.dtype([('num', 'i4'), ('obj', 'O')])
+ a = np.empty((4,), dtype=dt)
+ a['num'] = np.arange(1, 5)
+ a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize))
+ expected_num = [[1, 2, 3, 4]] * 3
+ expected_obj = [[None]*4]*3
+ assert_equal(a_view.dtype, dt)
+ assert_array_equal(expected_num, a_view['num'])
+ assert_array_equal(expected_obj, a_view['obj'])
+
+ # Make sure that void types without fields are kept unchanged
+ a = np.empty((4,), dtype='V4')
+ a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize))
+ assert_equal(a.dtype, a_view.dtype)
+
+ # Make sure that the only type that could fail is properly handled
+ dt = np.dtype({'names': [''], 'formats': ['V4']})
+ a = np.empty((4,), dtype=dt)
+ a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize))
+ assert_equal(a.dtype, a_view.dtype)
+
+
class VerySimpleSubClass(np.ndarray):
def __new__(cls, *args, **kwargs):
@@ -277,6 +357,45 @@ def test_subclasses():
assert_(type(b_view) is np.ndarray)
assert_(a_view.shape == b_view.shape)
+ # and for broadcast_to
+ shape = (2, 4)
+ a_view = broadcast_to(a, shape)
+ assert_(type(a_view) is np.ndarray)
+ assert_(a_view.shape == shape)
+ a_view = broadcast_to(a, shape, subok=True)
+ assert_(type(a_view) is SimpleSubClass)
+ assert_(a_view.info == 'simple finalized')
+ assert_(a_view.shape == shape)
+
+
+def test_writeable():
+ # broadcast_to should return a readonly array
+ original = np.array([1, 2, 3])
+ result = broadcast_to(original, (2, 3))
+ assert_equal(result.flags.writeable, False)
+ assert_raises(ValueError, result.__setitem__, slice(None), 0)
+
+ # but the result of broadcast_arrays needs to be writeable (for now), to
+ # preserve backwards compatibility
+ for results in [broadcast_arrays(original),
+ broadcast_arrays(0, original)]:
+ for result in results:
+ assert_equal(result.flags.writeable, True)
+ # keep readonly input readonly
+ original.flags.writeable = False
+ _, result = broadcast_arrays(0, original)
+ assert_equal(result.flags.writeable, False)
+
+
+def test_reference_types():
+ input_array = np.array('a', dtype=object)
+ expected = np.array(['a'] * 3, dtype=object)
+ actual = broadcast_to(input_array, (3,))
+ assert_array_equal(expected, actual)
+
+ actual, _ = broadcast_arrays(input_array, np.ones(3))
+ assert_array_equal(expected, actual)
+
if __name__ == "__main__":
run_module_suite()
diff --git a/numpy/lib/tests/test_type_check.py b/numpy/lib/tests/test_type_check.py
index 3931f95e5..7afd1206c 100644
--- a/numpy/lib/tests/test_type_check.py
+++ b/numpy/lib/tests/test_type_check.py
@@ -277,6 +277,8 @@ class TestNanToNum(TestCase):
def test_integer(self):
vals = nan_to_num(1)
assert_all(vals == 1)
+ vals = nan_to_num([1])
+ assert_array_equal(vals, np.array([1], np.int))
def test_complex_good(self):
vals = nan_to_num(1+1j)
diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py
index a45d0bd86..99677b394 100644
--- a/numpy/lib/type_check.py
+++ b/numpy/lib/type_check.py
@@ -324,12 +324,13 @@ def nan_to_num(x):
Returns
-------
- out : ndarray, float
- Array with the same shape as `x` and dtype of the element in `x` with
- the greatest precision. NaN is replaced by zero, and infinity
- (-infinity) is replaced by the largest (smallest or most negative)
- floating point value that fits in the output dtype. All finite numbers
- are upcast to the output dtype (default float64).
+ out : ndarray
+ New Array with the same shape as `x` and dtype of the element in
+ `x` with the greatest precision. If `x` is inexact, then NaN is
+ replaced by zero, and infinity (-infinity) is replaced by the
+ largest (smallest or most negative) floating point value that fits
+ in the output dtype. If `x` is not inexact, then a copy of `x` is
+ returned.
See Also
--------
@@ -354,33 +355,22 @@ def nan_to_num(x):
-1.28000000e+002, 1.28000000e+002])
"""
- try:
- t = x.dtype.type
- except AttributeError:
- t = obj2sctype(type(x))
- if issubclass(t, _nx.complexfloating):
- return nan_to_num(x.real) + 1j * nan_to_num(x.imag)
- else:
- try:
- y = x.copy()
- except AttributeError:
- y = array(x)
- if not issubclass(t, _nx.integer):
- if not y.shape:
- y = array([x])
- scalar = True
- else:
- scalar = False
- are_inf = isposinf(y)
- are_neg_inf = isneginf(y)
- are_nan = isnan(y)
- maxf, minf = _getmaxmin(y.dtype.type)
- y[are_nan] = 0
- y[are_inf] = maxf
- y[are_neg_inf] = minf
- if scalar:
- y = y[0]
- return y
+ x = _nx.array(x, subok=True)
+ xtype = x.dtype.type
+ if not issubclass(xtype, _nx.inexact):
+ return x
+
+ iscomplex = issubclass(xtype, _nx.complexfloating)
+ isscalar = (x.ndim == 0)
+
+ x = x[None] if isscalar else x
+ dest = (x.real, x.imag) if iscomplex else (x,)
+ maxf, minf = _getmaxmin(x.real.dtype)
+ for d in dest:
+ _nx.copyto(d, 0.0, where=isnan(d))
+ _nx.copyto(d, maxf, where=isposinf(d))
+ _nx.copyto(d, minf, where=isneginf(d))
+ return x[0] if isscalar else x
#-----------------------------------------------------------------------------