diff options
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/_iotools.py | 13 | ||||
-rw-r--r-- | numpy/lib/arraypad.py | 9 | ||||
-rw-r--r-- | numpy/lib/function_base.py | 7 | ||||
-rw-r--r-- | numpy/lib/nanfunctions.py | 2 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 28 | ||||
-rw-r--r-- | numpy/lib/recfunctions.py | 68 | ||||
-rw-r--r-- | numpy/lib/tests/test_arraypad.py | 23 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 11 | ||||
-rw-r--r-- | numpy/lib/tests/test_recfunctions.py | 58 | ||||
-rw-r--r-- | numpy/lib/type_check.py | 10 | ||||
-rw-r--r-- | numpy/lib/utils.py | 87 |
11 files changed, 190 insertions, 126 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 0ebd39b8c..c392929fd 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -121,7 +121,7 @@ def has_nested_fields(ndtype): """ for name in ndtype.names or (): - if ndtype[name].names: + if ndtype[name].names is not None: return True return False @@ -931,28 +931,27 @@ def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs): names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt) ndtype = np.dtype(dict(formats=ndtype, names=names)) else: - nbtypes = len(ndtype) # Explicit names if names is not None: validate = NameValidator(**validationargs) if isinstance(names, basestring): names = names.split(",") # Simple dtype: repeat to match the nb of names - if nbtypes == 0: + if ndtype.names is None: formats = tuple([ndtype.type] * len(names)) names = validate(names, defaultfmt=defaultfmt) ndtype = np.dtype(list(zip(names, formats))) # Structured dtype: just validate the names as needed else: - ndtype.names = validate(names, nbfields=nbtypes, + ndtype.names = validate(names, nbfields=len(ndtype.names), defaultfmt=defaultfmt) # No implicit names - elif (nbtypes > 0): + elif ndtype.names is not None: validate = NameValidator(**validationargs) # Default initial names : should we change the format ? - if ((ndtype.names == tuple("f%i" % i for i in range(nbtypes))) and + if ((ndtype.names == tuple("f%i" % i for i in range(len(ndtype.names)))) and (defaultfmt != "f%i")): - ndtype.names = validate([''] * nbtypes, defaultfmt=defaultfmt) + ndtype.names = validate([''] * len(ndtype.names), defaultfmt=defaultfmt) # Explicit initial names : just validate else: ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt) diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py index f08d425d6..62330e692 100644 --- a/numpy/lib/arraypad.py +++ b/numpy/lib/arraypad.py @@ -323,6 +323,12 @@ def _get_stats(padded, axis, width_pair, length_pair, stat_func): if right_length is None or max_length < right_length: right_length = max_length + if (left_length == 0 or right_length == 0) \ + and stat_func in {np.amax, np.amin}: + # amax and amin can't operate on an emtpy array, + # raise a more descriptive warning here instead of the default one + raise ValueError("stat_length of 0 yields no value for padding") + # Calculate statistic for the left side left_slice = _slice_at_axis( slice(left_index, left_index + left_length), axis) @@ -340,6 +346,7 @@ def _get_stats(padded, axis, width_pair, length_pair, stat_func): right_chunk = padded[right_slice] right_stat = stat_func(right_chunk, axis=axis, keepdims=True) _round_if_needed(right_stat, padded.dtype) + return left_stat, right_stat @@ -835,7 +842,7 @@ def pad(array, pad_width, mode='constant', **kwargs): raise ValueError("unsupported keyword arguments for mode '{}': {}" .format(mode, unsupported_kwargs)) - stat_functions = {"maximum": np.max, "minimum": np.min, + stat_functions = {"maximum": np.amax, "minimum": np.amin, "mean": np.mean, "median": np.median} # Create array with final shape and original values diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 9d380e67d..21532838b 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -316,14 +316,17 @@ def average(a, axis=None, weights=None, returned=False): The weights array can either be 1-D (in which case its length must be the size of `a` along the given axis) or of the same shape as `a`. If `weights=None`, then all data in `a` are assumed to have a - weight equal to one. + weight equal to one. The 1-D calculation is:: + + avg = sum(a * weights) / sum(weights) + + The only constraint on `weights` is that `sum(weights)` must not be 0. returned : bool, optional Default is `False`. If `True`, the tuple (`average`, `sum_of_weights`) is returned, otherwise only the average is returned. If `weights=None`, `sum_of_weights` is equivalent to the number of elements over which the average is taken. - Returns ------- retval, [sum_of_weights] : array_type or double diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 9a03d0b39..6cffab6ac 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1443,7 +1443,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): the variance of the flattened array. dtype : data-type, optional Type to use in computing the variance. For arrays of integer type - the default is `float32`; for arrays of float types it is the same as + the default is `float64`; for arrays of float types it is the same as the array type. out : ndarray, optional Alternate output array in which to place the result. It must have diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index b9dc444f8..e57a6dd47 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -506,7 +506,9 @@ def save(file, arr, allow_pickle=True, fix_imports=True): Notes ----- For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. - + + Any data saved to the file is appended to the end of the file. + Examples -------- >>> from tempfile import TemporaryFile @@ -519,6 +521,15 @@ def save(file, arr, allow_pickle=True, fix_imports=True): >>> np.load(outfile) array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + >>> with open('test.npy', 'wb') as f: + ... np.save(f, np.array([1, 2])) + ... np.save(f, np.array([1, 3])) + >>> with open('test.npy', 'rb') as f: + ... a = np.load(f) + ... b = np.load(f) + >>> print(a, b) + # [1 2] [1 3] """ own_fid = False if hasattr(file, 'write'): @@ -1776,12 +1787,13 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, replace_space=replace_space) # Skip the first `skip_header` rows - for i in range(skip_header): - next(fhd) - - # Keep on until we find the first valid values - first_values = None try: + for i in range(skip_header): + next(fhd) + + # Keep on until we find the first valid values + first_values = None + while not first_values: first_line = _decode_line(next(fhd), encoding) if (names is True) and (comments is not None): @@ -2168,7 +2180,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, outputmask = np.array(masks, dtype=mdtype) else: # Overwrite the initial dtype names if needed - if names and dtype.names: + if names and dtype.names is not None: dtype.names = names # Case 1. We have a structured type if len(dtype_flat) > 1: @@ -2218,7 +2230,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # output = np.array(data, dtype) if usemask: - if dtype.names: + if dtype.names is not None: mdtype = [(_, bool) for _ in dtype.names] else: mdtype = bool diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index 6e257bb3f..40060b41a 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -72,7 +72,7 @@ def recursive_fill_fields(input, output): current = input[field] except ValueError: continue - if current.dtype.names: + if current.dtype.names is not None: recursive_fill_fields(current, output[field]) else: output[field][:len(current)] = current @@ -139,11 +139,11 @@ def get_names(adtype): names = adtype.names for name in names: current = adtype[name] - if current.names: + if current.names is not None: listnames.append((name, tuple(get_names(current)))) else: listnames.append(name) - return tuple(listnames) or None + return tuple(listnames) def get_names_flat(adtype): @@ -176,9 +176,9 @@ def get_names_flat(adtype): for name in names: listnames.append(name) current = adtype[name] - if current.names: + if current.names is not None: listnames.extend(get_names_flat(current)) - return tuple(listnames) or None + return tuple(listnames) def flatten_descr(ndtype): @@ -215,8 +215,8 @@ def _zip_dtype(seqarrays, flatten=False): else: for a in seqarrays: current = a.dtype - if current.names and len(current.names) <= 1: - # special case - dtypes of 0 or 1 field are flattened + if current.names is not None and len(current.names) == 1: + # special case - dtypes of 1 field are flattened newdtype.extend(_get_fieldspec(current)) else: newdtype.append(('', current)) @@ -268,7 +268,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,): names = adtype.names for name in names: current = adtype[name] - if current.names: + if current.names is not None: if lastname: parents[name] = [lastname, ] else: @@ -281,7 +281,7 @@ def get_fieldstructure(adtype, lastname=None, parents=None,): elif lastname: lastparent = [lastname, ] parents[name] = lastparent or [] - return parents or None + return parents def _izip_fields_flat(iterable): @@ -435,7 +435,7 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype # Make sure we have named fields - if not seqdtype.names: + if seqdtype.names is None: seqdtype = np.dtype([('', seqdtype)]) if not flatten or _zip_dtype((seqarrays,), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok @@ -653,7 +653,7 @@ def rename_fields(base, namemapper): for name in ndtype.names: newname = namemapper.get(name, name) current = ndtype[name] - if current.names: + if current.names is not None: newdtype.append( (newname, _recursive_rename_fields(current, namemapper)) ) @@ -874,16 +874,35 @@ def _get_fields_and_offsets(dt, offset=0): scalar fields in the dtype "dt", including nested fields, in left to right order. """ + + # counts up elements in subarrays, including nested subarrays, and returns + # base dtype and count + def count_elem(dt): + count = 1 + while dt.shape != (): + for size in dt.shape: + count *= size + dt = dt.base + return dt, count + fields = [] for name in dt.names: field = dt.fields[name] - if field[0].names is None: - count = 1 - for size in field[0].shape: - count *= size - fields.append((field[0], count, field[1] + offset)) + f_dt, f_offset = field[0], field[1] + f_dt, n = count_elem(f_dt) + + if f_dt.names is None: + fields.append((np.dtype((f_dt, (n,))), n, f_offset + offset)) else: - fields.extend(_get_fields_and_offsets(field[0], field[1] + offset)) + subfields = _get_fields_and_offsets(f_dt, f_offset + offset) + size = f_dt.itemsize + + for i in range(n): + if i == 0: + # optimization: avoid list comprehension if no subarray + fields.extend(subfields) + else: + fields.extend([(d, c, o + i*size) for d, c, o in subfields]) return fields @@ -948,6 +967,12 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): fields = _get_fields_and_offsets(arr.dtype) n_fields = len(fields) + if n_fields == 0 and dtype is None: + raise ValueError("arr has no fields. Unable to guess dtype") + elif n_fields == 0: + # too many bugs elsewhere for this to work now + raise NotImplementedError("arr with no fields is not supported") + dts, counts, offsets = zip(*fields) names = ['f{}'.format(n) for n in range(n_fields)] @@ -1039,6 +1064,9 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, if arr.shape == (): raise ValueError('arr must have at least one dimension') n_elem = arr.shape[-1] + if n_elem == 0: + # too many bugs elsewhere for this to work now + raise NotImplementedError("last axis with size 0 is not supported") if dtype is None: if names is None: @@ -1051,7 +1079,11 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, raise ValueError("don't supply both dtype and names") # sanity check of the input dtype fields = _get_fields_and_offsets(dtype) - dts, counts, offsets = zip(*fields) + if len(fields) == 0: + dts, counts, offsets = [], [], [] + else: + dts, counts, offsets = zip(*fields) + if n_elem != sum(counts): raise ValueError('The length of the last dimension of arr must ' 'be equal to the number of fields in dtype') diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py index b7630cdcd..b6dd3b31c 100644 --- a/numpy/lib/tests/test_arraypad.py +++ b/numpy/lib/tests/test_arraypad.py @@ -469,6 +469,29 @@ class TestStatistic(object): ) assert_array_equal(a, b) + @pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning") + @pytest.mark.filterwarnings( + "ignore:invalid value encountered in (true_divide|double_scalars):" + "RuntimeWarning" + ) + @pytest.mark.parametrize("mode", ["mean", "median"]) + def test_zero_stat_length_valid(self, mode): + arr = np.pad([1., 2.], (1, 2), mode, stat_length=0) + expected = np.array([np.nan, 1., 2., np.nan, np.nan]) + assert_equal(arr, expected) + + @pytest.mark.parametrize("mode", ["minimum", "maximum"]) + def test_zero_stat_length_invalid(self, mode): + match = "stat_length of 0 yields no value for padding" + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 0, mode, stat_length=0) + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 0, mode, stat_length=(1, 0)) + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 1, mode, stat_length=0) + with pytest.raises(ValueError, match=match): + np.pad([1., 2.], 1, mode, stat_length=(1, 0)) + class TestConstant(object): def test_check_constant(self): diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 78f9f85f3..6ee17c830 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -1565,6 +1565,13 @@ M 33 21.99 test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype, converters=converters) + # nested but empty fields also aren't supported + ndtype = [('idx', int), ('code', object), ('nest', [])] + with assert_raises_regex(NotImplementedError, + 'Nested fields.* not supported.*'): + test = np.genfromtxt(TextIO(data), delimiter=";", + dtype=ndtype, converters=converters) + def test_userconverters_with_explicit_dtype(self): # Test user_converters w/ explicit (standard) dtype data = TextIO('skip,skip,2001-01-01,1.0,skip') @@ -1681,6 +1688,10 @@ M 33 21.99 test = np.genfromtxt(data) assert_equal(test, np.array([])) + # when skip_header > 0 + test = np.genfromtxt(data, skip_header=1) + assert_equal(test, np.array([])) + def test_fancy_dtype_alt(self): # Check that a nested dtype isn't MIA data = TextIO('1,2,3.0\n4,5,6.0\n') diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index 0126ccaf8..0c839d486 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -115,6 +115,14 @@ class TestRecFunctions(object): test = get_names(ndtype) assert_equal(test, ('a', ('b', ('ba', 'bb')))) + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names(ndtype) + assert_equal(test, ('a', ('b', ()))) + + ndtype = np.dtype([]) + test = get_names(ndtype) + assert_equal(test, ()) + def test_get_names_flat(self): # Test get_names_flat ndtype = np.dtype([('A', '|S3'), ('B', float)]) @@ -125,6 +133,14 @@ class TestRecFunctions(object): test = get_names_flat(ndtype) assert_equal(test, ('a', 'b', 'ba', 'bb')) + ndtype = np.dtype([('a', int), ('b', [])]) + test = get_names_flat(ndtype) + assert_equal(test, ('a', 'b')) + + ndtype = np.dtype([]) + test = get_names_flat(ndtype) + assert_equal(test, ()) + def test_get_fieldstructure(self): # Test get_fieldstructure @@ -147,6 +163,11 @@ class TestRecFunctions(object): 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} assert_equal(test, control) + # 0 fields + ndtype = np.dtype([]) + test = get_fieldstructure(ndtype) + assert_equal(test, {}) + def test_find_duplicates(self): # Test find_duplicates a = ma.array([(2, (2., 'B')), (1, (2., 'B')), (2, (2., 'B')), @@ -248,7 +269,8 @@ class TestRecFunctions(object): # including uniform fields with subarrays unpacked d = np.array([(1, [2, 3], [[ 4, 5], [ 6, 7]]), (8, [9, 10], [[11, 12], [13, 14]])], - dtype=[('x0', 'i4'), ('x1', ('i4', 2)), ('x2', ('i4', (2, 2)))]) + dtype=[('x0', 'i4'), ('x1', ('i4', 2)), + ('x2', ('i4', (2, 2)))]) dd = structured_to_unstructured(d) ddd = unstructured_to_structured(dd, d.dtype) assert_(dd.base is d) @@ -262,6 +284,40 @@ class TestRecFunctions(object): assert_equal(res, np.zeros((10, 6), dtype=int)) + # test nested combinations of subarrays and structured arrays, gh-13333 + def subarray(dt, shape): + return np.dtype((dt, shape)) + + def structured(*dts): + return np.dtype([('x{}'.format(i), dt) for i, dt in enumerate(dts)]) + + def inspect(dt, dtype=None): + arr = np.zeros((), dt) + ret = structured_to_unstructured(arr, dtype=dtype) + backarr = unstructured_to_structured(ret, dt) + return ret.shape, ret.dtype, backarr.dtype + + dt = structured(subarray(structured(np.int32, np.int32), 3)) + assert_equal(inspect(dt), ((6,), np.int32, dt)) + + dt = structured(subarray(subarray(np.int32, 2), 2)) + assert_equal(inspect(dt), ((4,), np.int32, dt)) + + dt = structured(np.int32) + assert_equal(inspect(dt), ((1,), np.int32, dt)) + + dt = structured(np.int32, subarray(subarray(np.int32, 2), 2)) + assert_equal(inspect(dt), ((5,), np.int32, dt)) + + dt = structured() + assert_raises(ValueError, structured_to_unstructured, np.zeros(3, dt)) + + # these currently don't work, but we may make it work in the future + assert_raises(NotImplementedError, structured_to_unstructured, + np.zeros(3, dt), dtype=np.int32) + assert_raises(NotImplementedError, unstructured_to_structured, + np.zeros((3,0), dtype=np.int32)) + def test_field_assignment_by_name(self): a = np.ones(2, dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'u1')]) newdt = [('b', 'f4'), ('c', 'u1')] diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py index ac4b03a6c..586824743 100644 --- a/numpy/lib/type_check.py +++ b/numpy/lib/type_check.py @@ -395,19 +395,27 @@ def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None): in-place (False). The in-place operation only occurs if casting to an array does not require a copy. Default is True. + + .. versionadded:: 1.13 nan : int, float, optional Value to be used to fill NaN values. If no value is passed then NaN values will be replaced with 0.0. + + .. versionadded:: 1.17 posinf : int, float, optional Value to be used to fill positive infinity values. If no value is passed then positive infinity values will be replaced with a very large number. + + .. versionadded:: 1.17 neginf : int, float, optional Value to be used to fill negative infinity values. If no value is passed then negative infinity values will be replaced with a very small (or negative) number. + + .. versionadded:: 1.17 - .. versionadded:: 1.13 + Returns ------- diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py index c7dbcc5f9..8bcbd8e86 100644 --- a/numpy/lib/utils.py +++ b/numpy/lib/utils.py @@ -1003,93 +1003,6 @@ def _getmembers(item): if hasattr(item, x)] return members -#----------------------------------------------------------------------------- - -# The following SafeEval class and company are adapted from Michael Spencer's -# ASPN Python Cookbook recipe: https://code.activestate.com/recipes/364469/ -# -# Accordingly it is mostly Copyright 2006 by Michael Spencer. -# The recipe, like most of the other ASPN Python Cookbook recipes was made -# available under the Python license. -# https://en.wikipedia.org/wiki/Python_License - -# It has been modified to: -# * handle unary -/+ -# * support True/False/None -# * raise SyntaxError instead of a custom exception. - -class SafeEval(object): - """ - Object to evaluate constant string expressions. - - This includes strings with lists, dicts and tuples using the abstract - syntax tree created by ``compiler.parse``. - - .. deprecated:: 1.10.0 - - See Also - -------- - safe_eval - - """ - def __init__(self): - # 2014-10-15, 1.10 - warnings.warn("SafeEval is deprecated in 1.10 and will be removed.", - DeprecationWarning, stacklevel=2) - - def visit(self, node): - cls = node.__class__ - meth = getattr(self, 'visit' + cls.__name__, self.default) - return meth(node) - - def default(self, node): - raise SyntaxError("Unsupported source construct: %s" - % node.__class__) - - def visitExpression(self, node): - return self.visit(node.body) - - def visitNum(self, node): - return node.n - - def visitStr(self, node): - return node.s - - def visitBytes(self, node): - return node.s - - def visitDict(self, node,**kw): - return dict([(self.visit(k), self.visit(v)) - for k, v in zip(node.keys, node.values)]) - - def visitTuple(self, node): - return tuple([self.visit(i) for i in node.elts]) - - def visitList(self, node): - return [self.visit(i) for i in node.elts] - - def visitUnaryOp(self, node): - import ast - if isinstance(node.op, ast.UAdd): - return +self.visit(node.operand) - elif isinstance(node.op, ast.USub): - return -self.visit(node.operand) - else: - raise SyntaxError("Unknown unary op: %r" % node.op) - - def visitName(self, node): - if node.id == 'False': - return False - elif node.id == 'True': - return True - elif node.id == 'None': - return None - else: - raise SyntaxError("Unknown name: %s" % node.id) - - def visitNameConstant(self, node): - return node.value - def safe_eval(source): """ |