summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py320
1 files changed, 179 insertions, 141 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 2de5c6193..473c8ea23 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1,12 +1,13 @@
__docformat__ = "restructuredtext en"
__all__ = ['select', 'piecewise', 'trim_zeros', 'copy', 'iterable',
- 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex',
- 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
- 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
- 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef',
- 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett',
- 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring',
- 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc']
+ 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex',
+ 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
+ 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
+ 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov',
+ 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning',
+ 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc',
+ 'add_docstring', 'meshgrid', 'delete', 'insert', 'append', 'interp',
+ 'add_newdoc_ufunc']
import warnings
import types
@@ -1698,80 +1699,9 @@ def disp(mesg, device=None, linefeed=True):
device.flush()
return
-# return number of input arguments and
-# number of default arguments
-
-def _get_nargs(obj):
- import re
-
- terr = re.compile(r'.*? takes (exactly|at least) (?P<exargs>(\d+)|(\w+))' +
- r' argument(s|) \((?P<gargs>(\d+)|(\w+)) given\)')
- def _convert_to_int(strval):
- try:
- result = int(strval)
- except ValueError:
- if strval=='zero':
- result = 0
- elif strval=='one':
- result = 1
- elif strval=='two':
- result = 2
- # How high to go? English only?
- else:
- raise
- return result
-
- if not callable(obj):
- raise TypeError(
- "Object is not callable.")
- if sys.version_info[0] >= 3:
- # inspect currently fails for binary extensions
- # like math.cos. So fall back to other methods if
- # it fails.
- import inspect
- try:
- spec = inspect.getargspec(obj)
- nargs = len(spec.args)
- if spec.defaults:
- ndefaults = len(spec.defaults)
- else:
- ndefaults = 0
- if inspect.ismethod(obj):
- nargs -= 1
- return nargs, ndefaults
- except:
- pass
-
- if hasattr(obj,'func_code'):
- fcode = obj.func_code
- nargs = fcode.co_argcount
- if obj.func_defaults is not None:
- ndefaults = len(obj.func_defaults)
- else:
- ndefaults = 0
- if isinstance(obj, types.MethodType):
- nargs -= 1
- return nargs, ndefaults
-
- try:
- obj()
- return 0, 0
- except TypeError, msg:
- m = terr.match(str(msg))
- if m:
- nargs = _convert_to_int(m.group('exargs'))
- ndefaults = _convert_to_int(m.group('gargs'))
- if isinstance(obj, types.MethodType):
- nargs -= 1
- return nargs, ndefaults
-
- raise ValueError(
- "failed to determine the number of arguments for %s" % (obj))
-
-
class vectorize(object):
"""
- vectorize(pyfunc, otypes='', doc=None)
+ vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False)
Generalized function class.
@@ -1794,13 +1724,30 @@ class vectorize(object):
typecode characters or a list of data type specifiers. There should
be one data type specifier for each output.
doc : str, optional
- The docstring for the function. If None, the docstring will be the
- `pyfunc` one.
+ The docstring for the function. If `None`, the docstring will be the
+ ``pyfunc.__doc__``.
+ excluded : set, optional
+ Set of strings or integers representing the positional or keyword
+ arguments for which the function will not be vectorized. These will be
+ passed directly to `pyfunc` unmodified.
+
+ .. versionadded:: 1.7.0
+
+ cache : bool, optional
+ If `True`, then cache the first function call that determines the number
+ of outputs if `otypes` is not provided.
+
+ .. versionadded:: 1.7.0
+
+ Returns
+ -------
+ vectorized : callable
+ Vectorized function.
Examples
--------
>>> def myfunc(a, b):
- ... \"\"\"Return a-b if a>b, otherwise return a+b\"\"\"
+ ... "Return a-b if a>b, otherwise return a+b"
... if a > b:
... return a - b
... else:
@@ -1830,78 +1777,169 @@ class vectorize(object):
>>> type(out[0])
<type 'numpy.float64'>
+ The `excluded` argument can be used to prevent vectorizing over certain
+ arguments. This can be useful for array-like arguments of a fixed length
+ such as the coefficients for a polynomial as in `polyval`:
+
+ >>> def mypolyval(p, x):
+ ... _p = list(p)
+ ... res = _p.pop(0)
+ ... while _p:
+ ... res = res*x + _p.pop(0)
+ ... return res
+ >>> vpolyval = np.vectorize(mypolyval, excluded=['p'])
+ >>> vpolyval(p=[1, 2, 3], x=[0, 1])
+ array([3, 6])
+
+ Positional arguments may also be excluded by specifying their position:
+
+ >>> vpolyval.excluded.add(0)
+ >>> vpolyval([1, 2, 3], x=[0, 1])
+ array([3, 6])
+
+ Notes
+ -----
+ The `vectorize` function is provided primarily for convenience, not for
+ performance. The implementation is essentially a for loop.
+
+ If `otypes` is not specified, then a call to the function with the first
+ argument will be used to determine the number of outputs. The results of
+ this call will be cached if `cache` is `True` to prevent calling the
+ function twice. However, to implement the cache, the original function must
+ be wrapped which will slow down subsequent calls, so only do this if your
+ function is expensive.
+
+ The new keyword argument interface and `excluded` argument support further
+ degrades performance.
"""
- def __init__(self, pyfunc, otypes='', doc=None):
- self.thefunc = pyfunc
- self.ufunc = None
- nin, ndefault = _get_nargs(pyfunc)
- if nin == 0 and ndefault == 0:
- self.nin = None
- self.nin_wo_defaults = None
- else:
- self.nin = nin
- self.nin_wo_defaults = nin - ndefault
- self.nout = None
+ def __init__(self, pyfunc, otypes='', doc=None, excluded=None, cache=False):
+ self.pyfunc = pyfunc
+ self.cache = cache
+
if doc is None:
self.__doc__ = pyfunc.__doc__
else:
self.__doc__ = doc
+
if isinstance(otypes, str):
self.otypes = otypes
for char in self.otypes:
if char not in typecodes['All']:
- raise ValueError(
- "invalid otype specified")
+ raise ValueError("Invalid otype specified: %s" % (char,))
elif iterable(otypes):
self.otypes = ''.join([_nx.dtype(x).char for x in otypes])
else:
- raise ValueError(
- "Invalid otype specification")
- self.lastcallargs = 0
-
- def __call__(self, *args):
- # get number of outputs and output types by calling
- # the function on the first entries of args
- nargs = len(args)
- if self.nin:
- if (nargs > self.nin) or (nargs < self.nin_wo_defaults):
- raise ValueError(
- "Invalid number of arguments")
-
- # we need a new ufunc if this is being called with more arguments.
- if (self.lastcallargs != nargs):
- self.lastcallargs = nargs
- self.ufunc = None
- self.nout = None
-
- if self.nout is None or self.otypes == '':
- newargs = []
- for arg in args:
- newargs.append(asarray(arg).flat[0])
- theout = self.thefunc(*newargs)
- if isinstance(theout, tuple):
- self.nout = len(theout)
+ raise ValueError("Invalid otype specification")
+
+ # Excluded variable support
+ if excluded is None:
+ excluded = set()
+ self.excluded = set(excluded)
+
+ if self.otypes and not self.excluded:
+ self._ufunc = None # Caching to improve default performance
+
+ def __call__(self, *args, **kwargs):
+ """
+ Return arrays with the results of `pyfunc` broadcast (vectorized) over
+ `args` and `kwargs` not in `excluded`.
+ """
+ excluded = self.excluded
+ if not kwargs and not excluded:
+ func = self.pyfunc
+ vargs = args
+ else:
+ # The wrapper accepts only positional arguments: we use `names` and
+ # `inds` to mutate `the_args` and `kwargs` to pass to the original
+ # function.
+ nargs = len(args)
+
+ names = [_n for _n in kwargs if _n not in excluded]
+ inds = [_i for _i in range(nargs) if _i not in excluded]
+ the_args = list(args)
+ def func(*vargs):
+ for _n, _i in enumerate(inds):
+ the_args[_i] = vargs[_n]
+ kwargs.update(zip(names, vargs[len(inds):]))
+ return self.pyfunc(*the_args, **kwargs)
+
+ vargs = [args[_i] for _i in inds]
+ vargs.extend([kwargs[_n] for _n in names])
+
+ return self._vectorize_call(func=func, args=vargs)
+
+ def _get_ufunc_and_otypes(self, func, args):
+ """Return (ufunc, otypes)."""
+ # frompyfunc will fail if args is empty
+ assert args
+
+ if self.otypes:
+ otypes = self.otypes
+ nout = len(otypes)
+
+ # Note logic here: We only *use* self._ufunc if func is self.pyfunc
+ # even though we set self._ufunc regardless.
+ if func is self.pyfunc and self._ufunc is not None:
+ ufunc = self._ufunc
+ else:
+ ufunc = self._ufunc = frompyfunc(func, len(args), nout)
+ else:
+ # Get number of outputs and output types by calling the function on
+ # the first entries of args. We also cache the result to prevent
+ # the subsequent call when the ufunc is evaluated.
+ # Assumes that ufunc first evaluates the 0th elements in the input
+ # arrays (the input values are not checked to ensure this)
+ inputs = [asarray(_a).flat[0] for _a in args]
+ outputs = func(*inputs)
+
+ # Performance note: profiling indicates that -- for simple functions
+ # at least -- this wrapping can almost double the execution time.
+ # Hence we make it optional.
+ if self.cache:
+ _cache = [outputs]
+ def _func(*vargs):
+ if _cache:
+ return _cache.pop()
+ else:
+ return func(*vargs)
+ else:
+ _func = func
+
+ if isinstance(outputs, tuple):
+ nout = len(outputs)
else:
- self.nout = 1
- theout = (theout,)
- if self.otypes == '':
- otypes = []
- for k in range(self.nout):
- otypes.append(asarray(theout[k]).dtype.char)
- self.otypes = ''.join(otypes)
-
- # Create ufunc if not already created
- if (self.ufunc is None):
- self.ufunc = frompyfunc(self.thefunc, nargs, self.nout)
-
- # Convert to object arrays first
- newargs = [array(arg,copy=False,subok=True,dtype=object) for arg in args]
- if self.nout == 1:
- _res = array(self.ufunc(*newargs),copy=False,
- subok=True,dtype=self.otypes[0])
+ nout = 1
+ outputs = (outputs,)
+
+ otypes = ''.join([asarray(outputs[_k]).dtype.char
+ for _k in range(nout)])
+
+ # Performance note: profiling indicates that creating the ufunc is
+ # not a significant cost compared with wrapping so it seems not
+ # worth trying to cache this.
+ ufunc = frompyfunc(_func, len(args), nout)
+
+ return ufunc, otypes
+
+ def _vectorize_call(self, func, args):
+ """Vectorized call to `func` over positional `args`."""
+ if not args:
+ _res = func()
else:
- _res = tuple([array(x,copy=False,subok=True,dtype=c) \
- for x, c in zip(self.ufunc(*newargs), self.otypes)])
+ ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)
+
+ # Convert args to object arrays first
+ inputs = [array(_a, copy=False, subok=True, dtype=object)
+ for _a in args]
+
+ outputs = ufunc(*inputs)
+
+ if ufunc.nout == 1:
+ _res = array(outputs,
+ copy=False, subok=True, dtype=otypes[0])
+ else:
+ _res = tuple([array(_x, copy=False, subok=True, dtype=_t)
+ for _x, _t in zip(outputs, otypes)])
return _res
def cov(m, y=None, rowvar=1, bias=0, ddof=None):
@@ -2595,7 +2633,7 @@ def i0(x):
References
----------
- .. [1] C. W. Clenshaw, "Chebyshev series for mathematical functions," in
+ .. [1] C. W. Clenshaw, "Chebyshev series for mathematical functions", in
*National Physical Laboratory Mathematical Tables*, vol. 5, London:
Her Majesty's Stationery Office, 1962.
.. [2] M. Abramowitz and I. A. Stegun, *Handbook of Mathematical