diff options
author | Stephan Hoyer <shoyer@gmail.com> | 2016-10-17 21:32:40 -0400 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2016-10-17 21:32:40 -0400 |
commit | 0a02bb6f62a5c09103cf748bafe7a622f3dfe98b (patch) | |
tree | 04ccebede3afc55afd08eae732a3ececa3846f69 /numpy/lib/function_base.py | |
parent | 15e82e2181d9ae92b6a1a6fb80454bc3a4aa2cb5 (diff) | |
download | numpy-0a02bb6f62a5c09103cf748bafe7a622f3dfe98b.tar.gz |
ENH: add signature argument to vectorize for vectorizing like generalized ufuncs (#8054)
* ENH: add signature argument to vectorize for generalized ufuncs
Example usage (from the docstring):
Vectorized convolution:
>>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)')
>>> convolve(np.eye(4), [1, 2, 1])
array([[ 1., 2., 1., 0., 0., 0.],
[ 0., 1., 2., 1., 0., 0.],
[ 0., 0., 1., 2., 1., 0.],
[ 0., 0., 0., 1., 2., 1.]])
* Use str.format rather than %
* Fix spelling typo
* BUG: fix np.vectorize for size 0 inputs
* DOC: add vectorize to 1.12.0 release notes
* [ci-skip] Remove outdated comment
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r-- | numpy/lib/function_base.py | 288 |
1 files changed, 253 insertions, 35 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index cb1a04f47..98b0413a1 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1,9 +1,10 @@ from __future__ import division, absolute_import, print_function -import warnings -import sys import collections import operator +import re +import sys +import warnings import numpy as np import numpy.core.numeric as _nx @@ -24,16 +25,19 @@ from numpy.core.numerictypes import typecodes, number from numpy.lib.twodim_base import diag from .utils import deprecate from numpy.core.multiarray import ( - _insert, add_docstring, digitize, bincount, + _insert, add_docstring, digitize, bincount, interp as compiled_interp, interp_complex as compiled_interp_complex ) from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc from numpy.compat import long from numpy.compat.py3k import basestring -# Force range to be a generator, for np.delete's usage. if sys.version_info[0] < 3: + # Force range to be a generator, for np.delete's usage. range = xrange + import __builtin__ as builtins +else: + import builtins __all__ = [ @@ -1792,7 +1796,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): Returns ------- - y : float or complex (corresponding to fp) or ndarray + y : float or complex (corresponding to fp) or ndarray The interpolated values, same shape as `x`. Raises @@ -1899,7 +1903,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): return interp_func(x, xp, fp, left, right) else: return interp_func(x, xp, fp, left, right).item() - + def angle(z, deg=0): """ Return the angle of the complex argument. @@ -2242,17 +2246,126 @@ def disp(mesg, device=None, linefeed=True): return +# See http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html +_DIMENSION_NAME = r'\w+' +_CORE_DIMENSION_LIST = '(?:{0:}(?:,{0:})*)?'.format(_DIMENSION_NAME) +_ARGUMENT = r'\({}\)'.format(_CORE_DIMENSION_LIST) +_ARGUMENT_LIST = '{0:}(?:,{0:})*'.format(_ARGUMENT) +_SIGNATURE = '^{0:}->{0:}$'.format(_ARGUMENT_LIST) + + +def _parse_gufunc_signature(signature): + """ + Parse string signatures for a generalized universal function. + + Arguments + --------- + signature : string + Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)`` + for ``np.matmul``. + + Returns + ------- + Tuple of input and output core dimensions parsed from the signature, each + of the form List[Tuple[str, ...]]. + """ + if not re.match(_SIGNATURE, signature): + raise ValueError( + 'not a valid gufunc signature: {}'.format(signature)) + return tuple([tuple(re.findall(_DIMENSION_NAME, arg)) + for arg in re.findall(_ARGUMENT, arg_list)] + for arg_list in signature.split('->')) + + +def _update_dim_sizes(dim_sizes, arg, core_dims): + """ + Incrementally check and update core dimension sizes for a single argument. + + Arguments + --------- + dim_sizes : Dict[str, int] + Sizes of existing core dimensions. Will be updated in-place. + arg : ndarray + Argument to examine. + core_dims : Tuple[str, ...] + Core dimensions for this argument. + """ + if not core_dims: + return + + num_core_dims = len(core_dims) + if arg.ndim < num_core_dims: + raise ValueError( + '%d-dimensional argument does not have enough ' + 'dimensions for all core dimensions %r' + % (arg.ndim, core_dims)) + + core_shape = arg.shape[-num_core_dims:] + for dim, size in zip(core_dims, core_shape): + if dim in dim_sizes: + if size != dim_sizes[dim]: + raise ValueError( + 'inconsistent size for core dimension %r: %r vs %r' + % (dim, size, dim_sizes[dim])) + else: + dim_sizes[dim] = size + + +def _parse_input_dimensions(args, input_core_dims): + """ + Parse broadcast and core dimensions for vectorize with a signature. + + Arguments + --------- + args : Tuple[ndarray, ...] + Tuple of input arguments to examine. + input_core_dims : List[Tuple[str, ...]] + List of core dimensions corresponding to each input. + + Returns + ------- + broadcast_shape : Tuple[int, ...] + Common shape to broadcast all non-core dimensions to. + dim_sizes : Dict[str, int] + Common sizes for named core dimensions. + """ + broadcast_args = [] + dim_sizes = {} + for arg, core_dims in zip(args, input_core_dims): + _update_dim_sizes(dim_sizes, arg, core_dims) + ndim = arg.ndim - len(core_dims) + dummy_array = np.lib.stride_tricks.as_strided(0, arg.shape[:ndim]) + broadcast_args.append(dummy_array) + broadcast_shape = np.lib.stride_tricks._broadcast_shape(*broadcast_args) + return broadcast_shape, dim_sizes + + +def _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims): + """Helper for calculating broadcast shapes with core dimensions.""" + return [broadcast_shape + tuple(dim_sizes[dim] for dim in core_dims) + for core_dims in list_of_core_dims] + + +def _create_arrays(broadcast_shape, dim_sizes, list_of_core_dims, dtypes): + """Helper for creating output arrays in vectorize.""" + shapes = _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims) + arrays = tuple(np.empty(shape, dtype=dtype) + for shape, dtype in zip(shapes, dtypes)) + return arrays + + class vectorize(object): """ - vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False) + vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False, + signature=None) Generalized function class. - Define a vectorized function which takes a nested sequence - of objects or numpy arrays as inputs and returns a - numpy array as output. The vectorized function evaluates `pyfunc` over - successive tuples of the input arrays like the python map function, - except it uses the broadcasting rules of numpy. + Define a vectorized function which takes a nested sequence of objects or + numpy arrays as inputs and returns an single or tuple of numpy array as + output. The vectorized function evaluates `pyfunc` over successive tuples + of the input arrays like the python map function, except it uses the + broadcasting rules of numpy. The data type of the output of `vectorized` is determined by calling the function with the first element of the input. This can be avoided @@ -2282,6 +2395,15 @@ class vectorize(object): .. versionadded:: 1.7.0 + signature : string, optional + Generalized universal function signature, e.g., ``(m,n),(n)->(m)`` for + vectorized matrix-vector multiplication. If provided, ``pyfunc`` will + be called with (and expected to return) arrays with shapes given by the + size of corresponding core dimensions. By default, ``pyfunc`` is + assumed to take scalars as input and output. + + .. versionadded:: 1.12.0 + Returns ------- vectorized : callable @@ -2301,7 +2423,7 @@ class vectorize(object): array([3, 4, 1, 2]) The docstring is taken from the input function to `vectorize` unless it - is specified + is specified: >>> vfunc.__doc__ 'Return a-b if a>b, otherwise return a+b' @@ -2310,7 +2432,7 @@ class vectorize(object): 'Vectorized `myfunc`' The output type is determined by evaluating the first element of the input, - unless it is specified + unless it is specified: >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) @@ -2340,6 +2462,25 @@ class vectorize(object): >>> vpolyval([1, 2, 3], x=[0, 1]) array([3, 6]) + The `signature` argument allows for vectorizing functions that act on + non-scalar arrays of fixed length. For example, you can use it for a + vectorized calculation of Pearson correlation coefficient and its p-value: + + >>> import scipy.stats + >>> pearsonr = np.vectorize(scipy.stats.pearsonr, + ... signature='(n),(n)->(),()') + >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]]) + (array([ 1., -1.]), array([ 0., 0.])) + + Or for a vectorized convolution: + + >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)') + >>> convolve(np.eye(4), [1, 2, 1]) + array([[ 1., 2., 1., 0., 0., 0.], + [ 0., 1., 2., 1., 0., 0.], + [ 0., 0., 1., 2., 1., 0.], + [ 0., 0., 0., 1., 2., 1.]]) + See Also -------- frompyfunc : Takes an arbitrary Python function and returns a ufunc @@ -2359,12 +2500,17 @@ class vectorize(object): The new keyword argument interface and `excluded` argument support further degrades performance. + References + ---------- + .. [1] NumPy Reference, section `Generalized Universal Function API + <http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. """ - def __init__(self, pyfunc, otypes='', doc=None, excluded=None, - cache=False): + def __init__(self, pyfunc, otypes=None, doc=None, excluded=None, + cache=False, signature=None): self.pyfunc = pyfunc self.cache = cache + self.signature = signature self._ufunc = None # Caching to improve default performance if doc is None: @@ -2373,22 +2519,25 @@ class vectorize(object): self.__doc__ = doc if isinstance(otypes, str): - self.otypes = otypes - for char in self.otypes: + for char in otypes: if char not in typecodes['All']: - raise ValueError( - "Invalid otype specified: %s" % (char,)) + raise ValueError("Invalid otype specified: %s" % (char,)) elif iterable(otypes): - self.otypes = ''.join([_nx.dtype(x).char for x in otypes]) - else: - raise ValueError( - "Invalid otype specification") + otypes = ''.join([_nx.dtype(x).char for x in otypes]) + elif otypes is not None: + raise ValueError("Invalid otype specification") + self.otypes = otypes # Excluded variable support if excluded is None: excluded = set() self.excluded = set(excluded) + if signature is not None: + self._in_and_out_core_dims = _parse_gufunc_signature(signature) + else: + self._in_and_out_core_dims = None + def __call__(self, *args, **kwargs): """ Return arrays with the results of `pyfunc` broadcast (vectorized) over @@ -2425,7 +2574,7 @@ class vectorize(object): if not args: raise ValueError('args can not be empty') - if self.otypes: + if self.otypes is not None: otypes = self.otypes nout = len(otypes) @@ -2441,7 +2590,12 @@ class vectorize(object): # the subsequent call when the ufunc is evaluated. # Assumes that ufunc first evaluates the 0th elements in the input # arrays (the input values are not checked to ensure this) - inputs = [asarray(_a).flat[0] for _a in args] + args = [asarray(arg) for arg in args] + if builtins.any(arg.size == 0 for arg in args): + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + + inputs = [arg.flat[0] for arg in args] outputs = func(*inputs) # Performance note: profiling indicates that -- for simple @@ -2477,24 +2631,88 @@ class vectorize(object): def _vectorize_call(self, func, args): """Vectorized call to `func` over positional `args`.""" - if not args: - _res = func() + if self.signature is not None: + res = self._vectorize_call_with_signature(func, args) + elif not args: + res = func() else: ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args) # Convert args to object arrays first - inputs = [array(_a, copy=False, subok=True, dtype=object) - for _a in args] + inputs = [array(a, copy=False, subok=True, dtype=object) + for a in args] outputs = ufunc(*inputs) if ufunc.nout == 1: - _res = array(outputs, - copy=False, subok=True, dtype=otypes[0]) + res = array(outputs, copy=False, subok=True, dtype=otypes[0]) else: - _res = tuple([array(_x, copy=False, subok=True, dtype=_t) - for _x, _t in zip(outputs, otypes)]) - return _res + res = tuple([array(x, copy=False, subok=True, dtype=t) + for x, t in zip(outputs, otypes)]) + return res + + def _vectorize_call_with_signature(self, func, args): + """Vectorized call over positional arguments with a signature.""" + input_core_dims, output_core_dims = self._in_and_out_core_dims + + if len(args) != len(input_core_dims): + raise TypeError('wrong number of positional arguments: ' + 'expected %r, got %r' + % (len(input_core_dims), len(args))) + args = tuple(asanyarray(arg) for arg in args) + + broadcast_shape, dim_sizes = _parse_input_dimensions( + args, input_core_dims) + input_shapes = _calculate_shapes(broadcast_shape, dim_sizes, + input_core_dims) + args = [np.broadcast_to(arg, shape, subok=True) + for arg, shape in zip(args, input_shapes)] + + outputs = None + otypes = self.otypes + nout = len(output_core_dims) + + for index in np.ndindex(*broadcast_shape): + results = func(*(arg[index] for arg in args)) + + n_results = len(results) if isinstance(results, tuple) else 1 + + if nout != n_results: + raise ValueError( + 'wrong number of outputs from pyfunc: expected %r, got %r' + % (nout, n_results)) + + if nout == 1: + results = (results,) + + if outputs is None: + for result, core_dims in zip(results, output_core_dims): + _update_dim_sizes(dim_sizes, result, core_dims) + + if otypes is None: + otypes = [asarray(result).dtype for result in results] + + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes) + + for output, result in zip(outputs, results): + output[index] = result + + if outputs is None: + # did not call the function even once + if otypes is None: + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + if builtins.any(dim not in dim_sizes + for dims in output_core_dims + for dim in dims): + raise ValueError('cannot call `vectorize` with a signature ' + 'including new output dimensions on size 0 ' + 'inputs') + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes) + + return outputs[0] if nout == 1 else outputs def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, |