diff options
52 files changed, 1198 insertions, 520 deletions
diff --git a/doc/neps/index.rst.tmpl b/doc/neps/index.rst.tmpl index 6c988014f..6cbad8eb2 100644 --- a/doc/neps/index.rst.tmpl +++ b/doc/neps/index.rst.tmpl @@ -15,7 +15,7 @@ Meta-NEPs (NEPs about NEPs or Processes) :maxdepth: 1 {% for nep, tags in neps.items() if tags['Type'] == 'Process' %} - NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}> + {{ tags['Title'] }} <{{ tags['Filename'] }}> {% endfor %} nep-template @@ -27,7 +27,7 @@ Accepted NEPs, implementation in progress :maxdepth: 1 {% for nep, tags in neps.items() if tags['Status'] == 'Accepted' %} - NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}> + {{ tags['Title'] }} <{{ tags['Filename'] }}> {% endfor %} @@ -38,7 +38,7 @@ Open NEPs (under consideration) :maxdepth: 1 {% for nep, tags in neps.items() if tags['Status'] == 'Draft' %} - NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}> + {{ tags['Title'] }} <{{ tags['Filename'] }}> {% endfor %} @@ -50,7 +50,7 @@ Implemented NEPs :maxdepth: 1 {% for nep, tags in neps.items() if tags['Status'] == 'Final' %} - NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}> + {{ tags['Title'] }} <{{ tags['Filename'] }}> {% endfor %} Deferred NEPs @@ -60,7 +60,7 @@ Deferred NEPs :maxdepth: 1 {% for nep, tags in neps.items() if tags['Status'] == 'Deferred' %} - NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}> + {{ tags['Title'] }} <{{ tags['Filename'] }}> {% endfor %} Rejected NEPs @@ -70,5 +70,5 @@ Rejected NEPs :maxdepth: 1 {% for nep, tags in neps.items() if tags['Status'] == 'Rejected' %} - NEP {{ nep }} — {{ tags['Title'] }} <{{ tags['Filename'] }}> + {{ tags['Title'] }} <{{ tags['Filename'] }}> {% endfor %} diff --git a/doc/neps/nep-0000.rst b/doc/neps/nep-0000.rst index b451eeff7..a3ec3a42b 100644 --- a/doc/neps/nep-0000.rst +++ b/doc/neps/nep-0000.rst @@ -1,6 +1,6 @@ -=================== -Purpose and Process -=================== +=========================== +NEP 0 — Purpose and Process +=========================== :Author: Jarrod Millman <millman@berkeley.edu> :Status: Active diff --git a/doc/neps/nep-0001-npy-format.rst b/doc/neps/nep-0001-npy-format.rst index 74512128d..4eded02ff 100644 --- a/doc/neps/nep-0001-npy-format.rst +++ b/doc/neps/nep-0001-npy-format.rst @@ -1,6 +1,6 @@ -===================================== -A Simple File Format for NumPy Arrays -===================================== +============================================= +NEP 1 — A Simple File Format for NumPy Arrays +============================================= :Author: Robert Kern <robert.kern@gmail.com> :Status: Final diff --git a/doc/neps/nep-0002-warnfix.rst b/doc/neps/nep-0002-warnfix.rst index 60dc885b2..207dfa3d4 100644 --- a/doc/neps/nep-0002-warnfix.rst +++ b/doc/neps/nep-0002-warnfix.rst @@ -1,6 +1,6 @@ -========================================================================= -A proposal to build numpy without warning with a big set of warning flags -========================================================================= +================================================================================= +NEP 2 — A proposal to build numpy without warning with a big set of warning flags +================================================================================= :Author: David Cournapeau :Contact: david@ar.media.kyoto-u.ac.jp diff --git a/doc/neps/nep-0003-math_config_clean.rst b/doc/neps/nep-0003-math_config_clean.rst index 5af907437..ebd32b124 100644 --- a/doc/neps/nep-0003-math_config_clean.rst +++ b/doc/neps/nep-0003-math_config_clean.rst @@ -1,6 +1,6 @@ -=========================================================== -Cleaning the math configuration of numpy.core -=========================================================== +===================================================== +NEP 3 — Cleaning the math configuration of numpy.core +===================================================== :Author: David Cournapeau :Contact: david@ar.media.kyoto-u.ac.jp diff --git a/doc/neps/nep-0004-datetime-proposal3.rst b/doc/neps/nep-0004-datetime-proposal3.rst index afeb00d73..b32964e88 100644 --- a/doc/neps/nep-0004-datetime-proposal3.rst +++ b/doc/neps/nep-0004-datetime-proposal3.rst @@ -1,6 +1,6 @@ -==================================================================== - A (third) proposal for implementing some date/time types in NumPy -==================================================================== +========================================================================= +NEP 4 — A (third) proposal for implementing some date/time types in NumPy +========================================================================= :Author: Francesc Alted i Abad :Contact: faltet@pytables.com diff --git a/doc/neps/nep-0005-generalized-ufuncs.rst b/doc/neps/nep-0005-generalized-ufuncs.rst index 54b2b370e..366e26ffd 100644 --- a/doc/neps/nep-0005-generalized-ufuncs.rst +++ b/doc/neps/nep-0005-generalized-ufuncs.rst @@ -1,6 +1,6 @@ -=============================== -Generalized Universal Functions -=============================== +======================================= +NEP 5 — Generalized Universal Functions +======================================= :Status: Final diff --git a/doc/neps/nep-0006-newbugtracker.rst b/doc/neps/nep-0006-newbugtracker.rst index 2b9344ed0..8dc7a1d8e 100644 --- a/doc/neps/nep-0006-newbugtracker.rst +++ b/doc/neps/nep-0006-newbugtracker.rst @@ -1,6 +1,6 @@ -=========================================== -Replacing Trac with a different bug tracker -=========================================== +=================================================== +NEP 6 — Replacing Trac with a different bug tracker +=================================================== :Author: David Cournapeau, Stefan van der Walt :Status: Deferred diff --git a/doc/neps/nep-0007-datetime-proposal.rst b/doc/neps/nep-0007-datetime-proposal.rst index 90894da49..5547a4306 100644 --- a/doc/neps/nep-0007-datetime-proposal.rst +++ b/doc/neps/nep-0007-datetime-proposal.rst @@ -1,6 +1,6 @@ -==================================================================== - A proposal for implementing some date/time types in NumPy -==================================================================== +================================================================== +NEP 7 — A proposal for implementing some date/time types in NumPy +================================================================== :Author: Travis Oliphant :Contact: oliphant@enthought.com diff --git a/doc/neps/nep-0008-groupby_additions.rst b/doc/neps/nep-0008-groupby_additions.rst index fa02f2f9c..3189fcf41 100644 --- a/doc/neps/nep-0008-groupby_additions.rst +++ b/doc/neps/nep-0008-groupby_additions.rst @@ -1,6 +1,6 @@ -==================================================================== - A proposal for adding groupby functionality to NumPy -==================================================================== +============================================================= +NEP 8 — A proposal for adding groupby functionality to NumPy +============================================================= :Author: Travis Oliphant :Contact: oliphant@enthought.com diff --git a/doc/neps/nep-0009-structured_array_extensions.rst b/doc/neps/nep-0009-structured_array_extensions.rst index 695d0d516..8b81a308d 100644 --- a/doc/neps/nep-0009-structured_array_extensions.rst +++ b/doc/neps/nep-0009-structured_array_extensions.rst @@ -1,6 +1,6 @@ -=========================== -Structured array extensions -=========================== +=================================== +NEP 9 — Structured array extensions +=================================== :Status: Deferred diff --git a/doc/neps/nep-0010-new-iterator-ufunc.rst b/doc/neps/nep-0010-new-iterator-ufunc.rst index 7b388a974..8601b4a4c 100644 --- a/doc/neps/nep-0010-new-iterator-ufunc.rst +++ b/doc/neps/nep-0010-new-iterator-ufunc.rst @@ -1,6 +1,6 @@ -===================================== -Optimizing Iterator/UFunc Performance -===================================== +============================================== +NEP 10 — Optimizing Iterator/UFunc Performance +============================================== :Author: Mark Wiebe <mwwiebe@gmail.com> :Content-Type: text/x-rst diff --git a/doc/neps/nep-0011-deferred-ufunc-evaluation.rst b/doc/neps/nep-0011-deferred-ufunc-evaluation.rst index 5f5de3518..a7143c6ee 100644 --- a/doc/neps/nep-0011-deferred-ufunc-evaluation.rst +++ b/doc/neps/nep-0011-deferred-ufunc-evaluation.rst @@ -1,6 +1,6 @@ -========================= -Deferred UFunc Evaluation -========================= +================================== +NEP 11 — Deferred UFunc Evaluation +================================== :Author: Mark Wiebe <mwwiebe@gmail.com> :Content-Type: text/x-rst diff --git a/doc/neps/nep-0012-missing-data.rst b/doc/neps/nep-0012-missing-data.rst index 57c45b4b6..dbcf1b579 100644 --- a/doc/neps/nep-0012-missing-data.rst +++ b/doc/neps/nep-0012-missing-data.rst @@ -1,6 +1,6 @@ -=================================== -Missing Data Functionality in NumPy -=================================== +============================================ +NEP 12 — Missing Data Functionality in NumPy +============================================ :Author: Mark Wiebe <mwwiebe@gmail.com> :Copyright: Copyright 2011 by Enthought, Inc diff --git a/doc/neps/nep-0013-ufunc-overrides.rst b/doc/neps/nep-0013-ufunc-overrides.rst index 61e2ceea9..a51ce3927 100644 --- a/doc/neps/nep-0013-ufunc-overrides.rst +++ b/doc/neps/nep-0013-ufunc-overrides.rst @@ -1,6 +1,6 @@ -================================= -A Mechanism for Overriding Ufuncs -================================= +========================================== +NEP 13 — A Mechanism for Overriding Ufuncs +========================================== .. currentmodule:: numpy diff --git a/doc/neps/nep-0014-dropping-python2.7-proposal.rst b/doc/neps/nep-0014-dropping-python2.7-proposal.rst index 158b89e1c..3adf3b407 100644 --- a/doc/neps/nep-0014-dropping-python2.7-proposal.rst +++ b/doc/neps/nep-0014-dropping-python2.7-proposal.rst @@ -1,6 +1,6 @@ -==================================== -Plan for dropping Python 2.7 support -==================================== +============================================= +NEP 14 — Plan for dropping Python 2.7 support +============================================= :Status: Accepted :Resolution: https://mail.python.org/pipermail/numpy-discussion/2017-November/077419.html diff --git a/doc/neps/nep-0015-merge-multiarray-umath.rst b/doc/neps/nep-0015-merge-multiarray-umath.rst index 17852220f..5e605a04f 100644 --- a/doc/neps/nep-0015-merge-multiarray-umath.rst +++ b/doc/neps/nep-0015-merge-multiarray-umath.rst @@ -1,6 +1,6 @@ -============================ -Merging multiarray and umath -============================ +===================================== +NEP 15 — Merging multiarray and umath +===================================== :Author: Nathaniel J. Smith <njs@pobox.com> :Status: Draft diff --git a/doc/neps/nep-0017-split-out-maskedarray.rst b/doc/neps/nep-0017-split-out-maskedarray.rst index d6dcc1def..7ef949763 100644 --- a/doc/neps/nep-0017-split-out-maskedarray.rst +++ b/doc/neps/nep-0017-split-out-maskedarray.rst @@ -1,6 +1,6 @@ -======================= -Split Out Masked Arrays -======================= +================================ +NEP 17 — Split Out Masked Arrays +================================ :Author: Stéfan van der Walt <stefanv@berkeley.edu> :Status: Rejected diff --git a/doc/neps/nep-0018-array-function-protocol.rst b/doc/neps/nep-0018-array-function-protocol.rst index 58a2833e6..3e23a2e28 100644 --- a/doc/neps/nep-0018-array-function-protocol.rst +++ b/doc/neps/nep-0018-array-function-protocol.rst @@ -1,6 +1,6 @@ -=========================================================== -A dispatch mechanism for NumPy's high level array functions -=========================================================== +==================================================================== +NEP 18 — A dispatch mechanism for NumPy's high level array functions +==================================================================== :Author: Stephan Hoyer <shoyer@google.com> :Author: Matthew Rocklin <mrocklin@gmail.com> diff --git a/doc/neps/nep-0019-rng-policy.rst b/doc/neps/nep-0019-rng-policy.rst index a2cc80262..fe389e5d5 100644 --- a/doc/neps/nep-0019-rng-policy.rst +++ b/doc/neps/nep-0019-rng-policy.rst @@ -1,6 +1,6 @@ -============================== -Random Number Generator Policy -============================== +======================================= +NEP 19 — Random Number Generator Policy +======================================= :Author: Robert Kern <robert.kern@gmail.com> :Status: Draft @@ -169,14 +169,16 @@ context of small unit tests. The new PRNG subsystem MUST provide a second, legacy distributions class that uses the same implementations of the distribution methods as the current -version of ``numpy.random.RandomState``. The methods of this class will keep -the same strict stream-compatibility guarantees. It is intended that this -class will no longer be modified, except to keep it working when numpy -internals change. All new development should go into the primary distributions -class. The purpose of ``RandomState`` will be documented as providing certain -fixed functionality for backwards compatibility and stable numbers for the -limited purpose of unit testing, and not making whole programs reproducible -across numpy versions. +version of ``numpy.random.RandomState``. The methods of this class will have +strict stream-compatibility guarantees, even stricter than the current policy. +It is intended that this class will no longer be modified, except to keep it +working when numpy internals change. All new development should go into the +primary distributions class. Bug fixes that change the stream SHALL NOT be +made to ``RandomState``; instead, buggy distributions should be made to warn +when they are buggy. The purpose of ``RandomState`` will be documented as +providing certain fixed functionality for backwards compatibility and stable +numbers for the limited purpose of unit testing, and not making whole programs +reproducible across numpy versions. This legacy distributions class MUST be accessible under the name ``numpy.random.RandomState`` for backwards compatibility. All current ways of diff --git a/doc/neps/nep-0020-gufunc-signature-enhancement.rst b/doc/neps/nep-0020-gufunc-signature-enhancement.rst index 903ee60cb..be7eecbf3 100644 --- a/doc/neps/nep-0020-gufunc-signature-enhancement.rst +++ b/doc/neps/nep-0020-gufunc-signature-enhancement.rst @@ -1,6 +1,6 @@ -====================================================== -Expansion of Generalized Universal Function Signatures -====================================================== +=============================================================== +NEP 20 — Expansion of Generalized Universal Function Signatures +=============================================================== :Author: Marten van Kerkwijk <mhvk@astro.utoronto.ca> :Status: Draft diff --git a/doc/neps/nep-0021-advanced-indexing.rst b/doc/neps/nep-0021-advanced-indexing.rst index 0279146be..d883a5589 100644 --- a/doc/neps/nep-0021-advanced-indexing.rst +++ b/doc/neps/nep-0021-advanced-indexing.rst @@ -1,6 +1,6 @@ -========================================= -Simplified and explicit advanced indexing -========================================= +================================================== +NEP 21 — Simplified and explicit advanced indexing +================================================== :Author: Sebastian Berg :Author: Stephan Hoyer <shoyer@google.com> diff --git a/doc/neps/nep-0022-ndarray-duck-typing-overview.rst b/doc/neps/nep-0022-ndarray-duck-typing-overview.rst new file mode 100644 index 000000000..04e4a14b7 --- /dev/null +++ b/doc/neps/nep-0022-ndarray-duck-typing-overview.rst @@ -0,0 +1,351 @@ +=========================================================== +NEP 22 — Duck typing for NumPy arrays – high level overview +=========================================================== + +:Author: Stephan Hoyer <shoyer@google.com>, Nathaniel J. Smith <njs@pobox.com> +:Status: Draft +:Type: Informational +:Created: 2018-03-22 + +Abstract +-------- + +We outline a high-level vision for how NumPy will approach handling +“duck arrays”. This is an Informational-class NEP; it doesn’t +prescribe full details for any particular implementation. In brief, we +propose developing a number of new protocols for defining +implementations of multi-dimensional arrays with high-level APIs +matching NumPy. + + +Detailed description +-------------------- + +Traditionally, NumPy’s ``ndarray`` objects have provided two things: a +high level API for expression operations on homogenously-typed, +arbitrary-dimensional, array-structured data, and a concrete +implementation of the API based on strided in-RAM storage. The API is +powerful, fairly general, and used ubiquitously across the scientific +Python stack. The concrete implementation, on the other hand, is +suitable for a wide range of uses, but has limitations: as data sets +grow and NumPy becomes used in a variety of new environments, there +are increasingly cases where the strided in-RAM storage strategy is +inappropriate, and users find they need sparse arrays, lazily +evaluated arrays (as in dask), compressed arrays (as in blosc), arrays +stored in GPU memory, arrays stored in alternative formats such as +Arrow, and so forth – yet users still want to work with these arrays +using the familiar NumPy APIs, and re-use existing code with minimal +(ideally zero) porting overhead. As a working shorthand, we call these +“duck arrays”, by analogy with Python’s “duck typing”: a “duck array” +is a Python object which “quacks like” a numpy array in the sense that +it has the same or similar Python API, but doesn’t share the C-level +implementation. + +This NEP doesn’t propose any specific changes to NumPy or other +projects; instead, it gives an overview of how we hope to extend NumPy +to support a robust ecosystem of projects implementing and relying +upon its high level API. + +Terminology +~~~~~~~~~~~ + +“Duck array” works fine as a placeholder for now, but it’s pretty +jargony and may confuse new users, so we may want to pick something +else for the actual API functions. Unfortunately, “array-like” is +already taken for the concept of “anything that can be coerced into an +array” (including e.g. list objects), and “anyarray” is already taken +for the concept of “something that shares ndarray’s implementation, +but has different semantics”, which is the opposite of a duck array +(e.g., np.matrix is an “anyarray”, but is not a “duck array”). This is +a classic bike-shed so for now we’re just using “duck array”. Some +possible options though include: arrayish, pseudoarray, nominalarray, +ersatzarray, arraymimic, ... + + +General approach +~~~~~~~~~~~~~~~~ + +At a high level, duck array support requires working through each of +the API functions provided by NumPy, and figuring out how it can be +extended to work with duck array objects. In some cases this is easy +(e.g., methods/attributes on ndarray itself); in other cases it’s more +difficult. Here are some principles we’ve found useful so far: + + +Principle 1: Focus on “full” duck arrays, but don’t rule out “partial” duck arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We can distinguish between two classes: + +* “full” duck arrays, which aspire to fully implement np.ndarray’s + Python-level APIs and work essentially anywhere that np.ndarray + works + +* “partial” duck arrays, which intentionally implement only a subset + of np.ndarray’s API. + +Full duck arrays are, well, kind of boring. They have exactly the same +semantics as ndarray, with differences being restricted to +under-the-hood decisions about how the data is actually stored. The +kind of people that are excited about making numpy more extensible are +also, unsurprisingly, excited about changing or extending numpy’s +semantics. So there’s been a lot of discussion of how to best support +partial duck arrays. We've been guilty of this ourself. + +At this point though, we think the best general strategy is to focus +our efforts primarily on supporting full duck arrays, and only worry +about partial duck arrays as much as we need to to make sure we don't +accidentally rule them out for no reason. + +Why focus on full duck arrays? Several reasons: + +First, there are lots of very clear use cases. Potential consumers of +the full duck array interface include almost every package that uses +numpy (scipy, sklearn, astropy, ...), and in particular packages that +provide array-wrapping-classes that handle multiple types of arrays, +such as xarray and dask.array. Potential implementers of the full duck +array interface include: distributed arrays, sparse arrays, masked +arrays, arrays with units (unless they switch to using dtypes), +labeled arrays, and so forth. Clear use cases lead to good and +relevant APIs. + +Second, the Anna Karenina principle applies here: full duck arrays are +all alike, but every partial duck array is partial in its own way: + +* ``xarray.DataArray`` is mostly a duck array, but has incompatible + broadcasting semantics. +* ``xarray.Dataset`` wraps multiple arrays in one object; it still + implements some array interfaces like ``__array_ufunc__``, but + certainly not all of them. +* ``pandas.Series`` has methods with similar behavior to numpy, but + unique null-skipping behavior. +* scipy’s ``LinearOperator``\s support matrix multiplication and nothing else +* h5py and similar libraries for accessing array storage have objects + that support numpy-like slicing and conversion into a full array, + but not computation. +* Some classes may be similar to ndarray, but without supporting the + full indexing semantics. + +And so forth. + +Despite our best attempts, we haven't found any clear, unique way of +slicing up the ndarray API into a hierarchy of related types that +captures these distinctions; in fact, it’s unlikely that any single +person even understands all the distinctions. And this is important, +because we have a *lot* of APIs that we need to add duck array support +to (both in numpy and in all the projects that depend on numpy!). By +definition, these already work for ``ndarray``, so hopefully getting +them to work for full duck arrays shouldn’t be so hard, since by +definition full duck arrays act like ``ndarray``. It’d be very +cumbersome to have to go through each function and identify the exact +subset of the ndarray API that it needs, then figure out which partial +array types can/should support it. Once we have things working for +full duck arrays, we can go back later and refine the APIs needed +further as needed. Focusing on full duck arrays allows us to start +making progress immediately. + +In the future, it might be useful to identify specific use cases for +duck arrays and standardize narrower interfaces targeted just at those +use cases. For example, it might make sense to have a standard “array +loader” interface that file access libraries like h5py, netcdf, pydap, +zarr, ... all implement, to make it easy to switch between these +libraries. But that’s something that we can do as we go, and it +doesn’t necessarily have to involve the NumPy devs at all. For an +example of what this might look like, see the documentation for +`dask.array.from_array +<http://dask.pydata.org/en/latest/array-api.html#dask.array.from_array>`__. + + +Principle 2: Take advantage of duck typing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``ndarray`` has a very large API surface area:: + + In [1]: len(set(dir(np.ndarray)) - set(dir(object))) + Out[1]: 138 + +And this is a huge **under**\estimate, because there are also many +free-standing functions in NumPy and other libraries which currently +use the NumPy C API and thus only work on ``ndarray`` objects. In type +theory, a type is defined by the operations you can perform on an +object; thus, the actual type of ``ndarray`` includes not just its +methods and attributes, but *all* of these functions. For duck arrays +to be successful, they’ll need to implement a large proportion of the +``ndarray`` API – but not all of it. (For example, +``dask.array.Array`` does not provide an equivalent to the +``ndarray.ptp`` method, presumably because no-one has ever noticed or +cared about its absence. But this doesn’t seem to have stopped people +from using dask.) + +This means that realistically, we can’t hope to define the whole duck +array API up front, or that anyone will be able to implement it all in +one go; this will be an incremental process. It also means that even +the so-called “full” duck array interface is somewhat fuzzily defined +at the borders; there are parts of the ``np.ndarray`` API that duck +arrays won’t have to implement, but we aren’t entirely sure what those +are. + +And ultimately, it isn’t really up to the NumPy developers to define +what does or doesn’t qualify as a duck array. If we want scikit-learn +functions to work on dask arrays (for example), then that’s going to +require negotiation between those two projects to discover +incompatibilities, and when an incompatibility is discovered it will +be up to them to negotiate who should change and how. The NumPy +project can provide technical tools and general advice to help resolve +these disagreements, but we can’t force one group or another to take +responsibility for any given bug. + +Therefore, even though we’re focusing on “full” duck arrays, we +*don’t* attempt to define a normative “array ABC” – maybe this will be +useful someday, but right now, it’s not. And as a convenient +side-effect, the lack of a normative definition leaves partial duck +arrays room to experiment. + +But, we do provide some more detailed advice for duck array +implementers and consumers below. + +Principle 3: Focus on protocols +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Historically, numpy has had lots of success at interoperating with +third-party objects by defining *protocols*, like ``__array__`` (asks +an arbitrary object to convert itself into an array), +``__array_interface__`` (a precursor to Python’s buffer protocol), and +``__array_ufunc__`` (allows third-party objects to support ufuncs like +``np.exp``). + +`NEP 16 <https://github.com/numpy/numpy/pull/10706>`_ took a +different approach: we need a duck-array equivalent of +``asarray``, and it proposed to do this by defining a version of +``asarray`` that would let through objects which implemented a new +AbstractArray ABC. As noted above, we now think that trying to define +an ABC is a bad idea for other reasons. But when this NEP was +discussed on the mailing list, we realized that even on its own +merits, this idea is not so great. A better approach is to define a +*method* that can be called on an arbitrary object to ask it to +convert itself into a duck array, and then define a version of +``asarray`` that calls this method. + +This is strictly more powerful: if an object is already a duck array, +it can simply ``return self``. It allows more correct semantics: NEP +16 assumed that ``asarray(obj, dtype=X)`` is the same as +``asarray(obj).astype(X)``, but this isn’t true. And it supports more +use cases: if h5py supported sparse arrays, it might want to provide +an object which is not itself a sparse array, but which can be +automatically converted into a sparse array. See NEP <XX, to be +written> for full details. + +The protocol approach is also more consistent with core Python +conventions: for example, see the ``__iter__`` method for coercing +objects to iterators, or the ``__index__`` protocol for safe integer +coercion. And finally, focusing on protocols leaves the door open for +partial duck arrays, which can pick and choose which subset of the +protocols they want to participate in, each of which have well-defined +semantics. + +Conclusion: protocols are one honking great idea – let’s do more of +those. + +Principle 4: Reuse existing methods when possible +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It’s tempting to try to define cleaned up versions of ndarray methods +with a more minimal interface to allow for easier implementation. For +example, ``__array_reshape__`` could drop some of the strange +arguments accepted by ``reshape`` and ``__array_basic_getitem__`` +could drop all the `strange edge cases +<http://www.numpy.org/neps/nep-0021-advanced-indexing.html>`__ of +NumPy’s advanced indexing. + +But as discussed above, we don’t really know what APIs we need for +duck-typing ndarray. We would inevitably end up with a very long list +of new special methods. In contrast, existing methods like ``reshape`` +and ``__getitem__`` have the advantage of already being widely +used/exercised by libraries that use duck arrays, and in practice, any +serious duck array type is going to have to implement them anyway. + +Principle 5: Make it easy to do the right thing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Making duck arrays work well is going to be a community effort. +Documentation helps, but only goes so far. We want to make it easy to +implement duck arrays that do the right thing. + +One way NumPy can help is by providing mixin classes for implementing +large groups of related functionality at once. +``NDArrayOperatorsMixin`` is a good example: it allows for +implementing arithmetic operators implicitly via the +``__array_ufunc__`` method. It’s not complete, and we’ll want more +helpers like that (e.g. for reductions). + +(We initially thought that the importance of these mixins might be an +argument for providing an array ABC, since that’s the standard way to +do mixins in modern Python. But in discussion around NEP 16 we +realized that partial duck arrays also wanted to take advantage of +these mixins in some cases, so even if we did have an array ABC then +the mixins would still need some sort of separate existence. So never +mind that argument.) + +Tentative duck array guidelines +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As a general rule, libraries using duck arrays should insist upon the +minimum possible requirements, and libraries implementing duck arrays +should provide as complete of an API as possible. This will ensure +maximum compatibility. For example, users should prefer to rely on +``.transpose()`` rather than ``.swapaxes()`` (which can be implemented +in terms of transpose), but duck array authors should ideally +implement both. + +If you are trying to implement a duck array, then you should strive to +implement everything. You certainly need ``.shape``, ``.ndim`` and +``.dtype``, but also your dtype attribute should actually be a +``numpy.dtype`` object, weird fancy indexing edge cases should ideally +work, etc. Only details related to NumPy’s specific ``np.ndarray`` +implementation (e.g., ``strides``, ``data``, ``view``) are explicitly +out of scope. + +A (very) rough sketch of future plans +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The proposals discussed so far – ``__array_ufunc__`` and some kind of +``asarray`` protocol – are clearly necessary but not sufficient for +full duck typing support. We expect the need for additional protocols +to support (at least) these features: + +* **Concatenating** duck arrays, which would be used internally by other + array combining methods like stack/vstack/hstack. The implementation + of concatenate will need to be negotiated among the list of array + arguments. We expect to use an ``__array_concatenate__`` protocol + like ``__array_ufunc__`` instead of multiple dispatch. +* **Ufunc-like functions** that currently aren’t ufuncs. Many NumPy + functions like median, percentile, sort, where and clip could be + written as generalized ufuncs but currently aren’t. Either these + functions should be written as ufuncs, or we should consider adding + another generic wrapper mechanism that works similarly to ufuncs but + makes fewer guarantees about how the implementation is done. +* **Random number generation** with duck arrays, e.g., + ``np.random.randn()``. For example, we might want to add new APIs + like ``random_like()`` for generating new arrays with a matching + shape *and* type – though we'll need to look at some real examples + of how these functions are used to figure out what would be helpful. +* **Miscellaneous other functions** such as ``np.einsum``, + ``np.zeros_like``, and ``np.broadcast_to`` that don’t fall into any + of the above categories. +* **Checking mutability** on duck arrays, which would imply that they + support assignment with ``__setitem__`` and the out argument to + ufuncs. Many otherwise fine duck arrays are not easily mutable (for + example, because they use some kinds of sparse or compressed + storage, or are in read-only shared memory), and it turns out that + frequently-used code like the default implementation of ``np.mean`` + needs to check this (to decide whether it can re-use temporary + arrays). + +We intentionally do not describe exactly how to add support for these +types of duck arrays here. These will be the subject of future NEPs. + + +Copyright +--------- + +This document has been placed in the public domain. diff --git a/doc/neps/tools/build_index.py b/doc/neps/tools/build_index.py index 65225c995..d9c4f690b 100644 --- a/doc/neps/tools/build_index.py +++ b/doc/neps/tools/build_index.py @@ -40,6 +40,10 @@ def nep_metadata(): tags['Title'] = lines[1].strip() tags['Filename'] = source + if not tags['Title'].startswith(f'NEP {nr} — '): + raise RuntimeError( + f'Title for NEP {nr} does not start with "NEP {nr} — " ' + '(note that — here is a special, enlongated dash)') if tags['Status'] in ('Accepted', 'Rejected', 'Withdrawn'): if not 'Resolution' in tags: diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst index 8df763b56..3daa4ae97 100644 --- a/doc/release/1.16.0-notes.rst +++ b/doc/release/1.16.0-notes.rst @@ -41,6 +41,12 @@ Even when no elements needed to be drawn, ``np.random.randint`` and distribution. This has been fixed so that e.g. ``np.random.choice([], 0) == np.array([], dtype=float64)``. +ARM support updated +------------------- +Support for ARM CPUs has been updated to accommodate 32 and 64 bit targets, +and also big and little endian byte ordering. AARCH32 memory alignment issues +have been addressed. + Changes ======= diff --git a/numpy/__init__.py b/numpy/__init__.py index 77b1d924d..b912d2222 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -139,9 +139,7 @@ else: loader = PackageLoader(infunc=True) return loader(*packages, **options) - from . import add_newdocs - __all__ = ['add_newdocs', - 'ModuleDeprecationWarning', + __all__ = ['ModuleDeprecationWarning', 'VisibleDeprecationWarning'] pkgload.__doc__ = PackageLoader.__call__.__doc__ @@ -191,7 +189,7 @@ else: from .testing import Tester # Pytest testing - from numpy.testing._private.pytesttester import PytestTester + from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester @@ -214,7 +212,9 @@ else: except AssertionError: msg = ("The current Numpy installation ({!r}) fails to " "pass simple sanity checks. This can be caused for example " - "by incorrect BLAS library being linked in.") + "by incorrect BLAS library being linked in, or by mixing " + "package managers (pip, conda, apt, ...). Search closed " + "numpy issues for similar problems.") raise RuntimeError(msg.format(__file__)) _sanity_check() diff --git a/numpy/testing/_private/pytesttester.py b/numpy/_pytesttester.py index 8c73fafa4..6a1b3274e 100644 --- a/numpy/testing/_private/pytesttester.py +++ b/numpy/_pytesttester.py @@ -5,7 +5,7 @@ This module implements the ``test()`` function for NumPy modules. The usual boiler plate for doing that is to put the following in the module ``__init__.py`` file:: - from numpy.testing import PytestTester + from numpy._pytesttester import PytestTester test = PytestTester(__name__).test del PytestTester @@ -23,6 +23,9 @@ whether or not that file is found as follows: In practice, tests run from the numpy repo are run in develop mode. That includes the standard ``python runtests.py`` invocation. +This module is imported by every numpy subpackage, so lies at the top level to +simplify circular import issues. For the same reason, it contains no numpy +imports at module scope, instead importing numpy within function calls. """ from __future__ import division, absolute_import, print_function diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py index 4d9cbf5da..9ef30b018 100644 --- a/numpy/core/__init__.py +++ b/numpy/core/__init__.py @@ -59,6 +59,10 @@ del nt from .fromnumeric import amax as max, amin as min, round_ as round from .numeric import absolute as abs +# do this after everything else, to minimize the chance of this misleadingly +# appearing in an import-time traceback +from . import _add_newdocs + __all__ = ['char', 'rec', 'memmap'] __all__ += numeric.__all__ __all__ += fromnumeric.__all__ @@ -100,6 +104,6 @@ del copyreg del sys del _ufunc_reduce -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/add_newdocs.py b/numpy/core/_add_newdocs.py index a882bf1e0..f596e613f 100644 --- a/numpy/add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -10,7 +10,7 @@ NOTE: Many of the methods of ndarray have corresponding functions. """ from __future__ import division, absolute_import, print_function -from numpy.lib import add_newdoc +from numpy.core.function_base import add_newdoc ############################################################################### # diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py index 2fac3caf3..163f125c2 100644 --- a/numpy/core/einsumfunc.py +++ b/numpy/core/einsumfunc.py @@ -4,6 +4,8 @@ Implementation of optimized einsum. """ from __future__ import division, absolute_import, print_function +import itertools + from numpy.compat import basestring from numpy.core.multiarray import c_einsum from numpy.core.numeric import asarray, asanyarray, result_type, tensordot, dot @@ -14,6 +16,44 @@ einsum_symbols = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' einsum_symbols_set = set(einsum_symbols) +def _flop_count(idx_contraction, inner, num_terms, size_dictionary): + """ + Computes the number of FLOPS in the contraction. + + Parameters + ---------- + idx_contraction : iterable + The indices involved in the contraction + inner : bool + Does this contraction require an inner product? + num_terms : int + The number of terms in a contraction + size_dictionary : dict + The size of each of the indices in idx_contraction + + Returns + ------- + flop_count : int + The total number of FLOPS required for the contraction. + + Examples + -------- + + >>> _flop_count('abc', False, 1, {'a': 2, 'b':3, 'c':5}) + 90 + + >>> _flop_count('abc', True, 2, {'a': 2, 'b':3, 'c':5}) + 270 + + """ + + overall_size = _compute_size_by_dict(idx_contraction, size_dictionary) + op_factor = max(1, num_terms - 1) + if inner: + op_factor += 1 + + return overall_size * op_factor + def _compute_size_by_dict(indices, idx_dict): """ Computes the product of the elements in indices based on the dictionary @@ -139,14 +179,9 @@ def _optimal_path(input_sets, output_set, idx_dict, memory_limit): iter_results = [] # Compute all unique pairs - comb_iter = [] - for x in range(len(input_sets) - iteration): - for y in range(x + 1, len(input_sets) - iteration): - comb_iter.append((x, y)) - for curr in full_results: cost, positions, remaining = curr - for con in comb_iter: + for con in itertools.combinations(range(len(input_sets) - iteration), 2): # Find the contraction cont = _find_contraction(con, remaining, output_set) @@ -157,15 +192,10 @@ def _optimal_path(input_sets, output_set, idx_dict, memory_limit): if new_size > memory_limit: continue - # Find cost - new_cost = _compute_size_by_dict(idx_contract, idx_dict) - if idx_removed: - new_cost *= 2 - # Build (total_cost, positions, indices_remaining) - new_cost += cost + total_cost = cost + _flop_count(idx_contract, idx_removed, len(con), idx_dict) new_pos = positions + [con] - iter_results.append((new_cost, new_pos, new_input_sets)) + iter_results.append((total_cost, new_pos, new_input_sets)) # Update combinatorial list, if we did not find anything return best # path + remaining contractions @@ -183,6 +213,102 @@ def _optimal_path(input_sets, output_set, idx_dict, memory_limit): path = min(full_results, key=lambda x: x[0])[1] return path +def _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost, naive_cost): + """Compute the cost (removed size + flops) and resultant indices for + performing the contraction specified by ``positions``. + + Parameters + ---------- + positions : tuple of int + The locations of the proposed tensors to contract. + input_sets : list of sets + The indices found on each tensors. + output_set : set + The output indices of the expression. + idx_dict : dict + Mapping of each index to its size. + memory_limit : int + The total allowed size for an intermediary tensor. + path_cost : int + The contraction cost so far. + naive_cost : int + The cost of the unoptimized expression. + + Returns + ------- + cost : (int, int) + A tuple containing the size of any indices removed, and the flop cost. + positions : tuple of int + The locations of the proposed tensors to contract. + new_input_sets : list of sets + The resulting new list of indices if this proposed contraction is performed. + + """ + + # Find the contraction + contract = _find_contraction(positions, input_sets, output_set) + idx_result, new_input_sets, idx_removed, idx_contract = contract + + # Sieve the results based on memory_limit + new_size = _compute_size_by_dict(idx_result, idx_dict) + if new_size > memory_limit: + return None + + # Build sort tuple + old_sizes = (_compute_size_by_dict(input_sets[p], idx_dict) for p in positions) + removed_size = sum(old_sizes) - new_size + + # NB: removed_size used to be just the size of any removed indices i.e.: + # helpers.compute_size_by_dict(idx_removed, idx_dict) + cost = _flop_count(idx_contract, idx_removed, len(positions), idx_dict) + sort = (-removed_size, cost) + + # Sieve based on total cost as well + if (path_cost + cost) > naive_cost: + return None + + # Add contraction to possible choices + return [sort, positions, new_input_sets] + + +def _update_other_results(results, best): + """Update the positions and provisional input_sets of ``results`` based on + performing the contraction result ``best``. Remove any involving the tensors + contracted. + + Parameters + ---------- + results : list + List of contraction results produced by ``_parse_possible_contraction``. + best : list + The best contraction of ``results`` i.e. the one that will be performed. + + Returns + ------- + mod_results : list + The list of modifed results, updated with outcome of ``best`` contraction. + """ + + best_con = best[1] + bx, by = best_con + mod_results = [] + + for cost, (x, y), con_sets in results: + + # Ignore results involving tensors just contracted + if x in best_con or y in best_con: + continue + + # Update the input_sets + del con_sets[by - int(by > x) - int(by > y)] + del con_sets[bx - int(bx > x) - int(bx > y)] + con_sets.insert(-1, best[2][-1]) + + # Update the position indices + mod_con = x - int(x > bx) - int(x > by), y - int(y > bx) - int(y > by) + mod_results.append((cost, mod_con, con_sets)) + + return mod_results def _greedy_path(input_sets, output_set, idx_dict, memory_limit): """ @@ -219,46 +345,68 @@ def _greedy_path(input_sets, output_set, idx_dict, memory_limit): [(0, 2), (0, 1)] """ + # Handle trivial cases that leaked through if len(input_sets) == 1: return [(0,)] + elif len(input_sets) == 2: + return [(0, 1)] + + # Build up a naive cost + contract = _find_contraction(range(len(input_sets)), input_sets, output_set) + idx_result, new_input_sets, idx_removed, idx_contract = contract + naive_cost = _flop_count(idx_contract, idx_removed, len(input_sets), idx_dict) + # Initially iterate over all pairs + comb_iter = itertools.combinations(range(len(input_sets)), 2) + known_contractions = [] + + path_cost = 0 path = [] - for iteration in range(len(input_sets) - 1): - iteration_results = [] - comb_iter = [] - # Compute all unique pairs - for x in range(len(input_sets)): - for y in range(x + 1, len(input_sets)): - comb_iter.append((x, y)) + for iteration in range(len(input_sets) - 1): + # Iterate over all pairs on first step, only previously found pairs on subsequent steps for positions in comb_iter: - # Find the contraction - contract = _find_contraction(positions, input_sets, output_set) - idx_result, new_input_sets, idx_removed, idx_contract = contract - - # Sieve the results based on memory_limit - if _compute_size_by_dict(idx_result, idx_dict) > memory_limit: + # Always initially ignore outer products + if input_sets[positions[0]].isdisjoint(input_sets[positions[1]]): continue - # Build sort tuple - removed_size = _compute_size_by_dict(idx_removed, idx_dict) - cost = _compute_size_by_dict(idx_contract, idx_dict) - sort = (-removed_size, cost) + result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, path_cost, + naive_cost) + if result is not None: + known_contractions.append(result) - # Add contraction to possible choices - iteration_results.append([sort, positions, new_input_sets]) + # If we do not have a inner contraction, rescan pairs including outer products + if len(known_contractions) == 0: - # If we did not find a new contraction contract remaining - if len(iteration_results) == 0: - path.append(tuple(range(len(input_sets)))) - break + # Then check the outer products + for positions in itertools.combinations(range(len(input_sets)), 2): + result = _parse_possible_contraction(positions, input_sets, output_set, idx_dict, memory_limit, + path_cost, naive_cost) + if result is not None: + known_contractions.append(result) + + # If we still did not find any remaining contractions, default back to einsum like behavior + if len(known_contractions) == 0: + path.append(tuple(range(len(input_sets)))) + break # Sort based on first index - best = min(iteration_results, key=lambda x: x[0]) - path.append(best[1]) + best = min(known_contractions, key=lambda x: x[0]) + + # Now propagate as many unused contractions as possible to next iteration + known_contractions = _update_other_results(known_contractions, best) + + # Next iteration only compute contractions with the new tensor + # All other contractions have been accounted for input_sets = best[2] + new_tensor_pos = len(input_sets) - 1 + comb_iter = ((i, new_tensor_pos) for i in range(new_tensor_pos)) + + # Update path and total cost + path.append(best[1]) + path_cost += best[0][1] return path @@ -314,26 +462,27 @@ def _can_dot(inputs, result, idx_removed): if len(inputs) != 2: return False - # Build a few temporaries input_left, input_right = inputs + + for c in set(input_left + input_right): + # can't deal with repeated indices on same input or more than 2 total + nl, nr = input_left.count(c), input_right.count(c) + if (nl > 1) or (nr > 1) or (nl + nr > 2): + return False + + # can't do implicit summation or dimension collapse e.g. + # "ab,bc->c" (implicitly sum over 'a') + # "ab,ca->ca" (take diagonal of 'a') + if nl + nr - 1 == int(c in result): + return False + + # Build a few temporaries set_left = set(input_left) set_right = set(input_right) keep_left = set_left - idx_removed keep_right = set_right - idx_removed rs = len(idx_removed) - # Indices must overlap between the two operands - if not len(set_left & set_right): - return False - - # We cannot have duplicate indices ("ijj, jk -> ik") - if (len(set_left) != len(input_left)) or (len(set_right) != len(input_right)): - return False - - # Cannot handle partial inner ("ij, ji -> i") - if len(keep_left & keep_right): - return False - # At this point we are a DOT, GEMV, or GEMM operation # Handle inner products @@ -698,6 +847,7 @@ def einsum_path(*operands, **kwargs): # Get length of each unique dimension and ensure all dimensions are correct dimension_dict = {} + broadcast_indices = [[] for x in range(len(input_list))] for tnum, term in enumerate(input_list): sh = operands[tnum].shape if len(sh) != len(term): @@ -706,6 +856,11 @@ def einsum_path(*operands, **kwargs): % (input_subscripts[tnum], tnum)) for cnum, char in enumerate(term): dim = sh[cnum] + + # Build out broadcast indices + if dim == 1: + broadcast_indices[tnum].append(char) + if char in dimension_dict.keys(): # For broadcasting cases we always want the largest dim size if dimension_dict[char] == 1: @@ -717,6 +872,9 @@ def einsum_path(*operands, **kwargs): else: dimension_dict[char] = dim + # Convert broadcast inds to sets + broadcast_indices = [set(x) for x in broadcast_indices] + # Compute size of each input array plus the output array size_list = [] for term in input_list + [output_subscript]: @@ -730,20 +888,14 @@ def einsum_path(*operands, **kwargs): # Compute naive cost # This isn't quite right, need to look into exactly how einsum does this - naive_cost = _compute_size_by_dict(indices, dimension_dict) - indices_in_input = input_subscripts.replace(',', '') - mult = max(len(input_list) - 1, 1) - if (len(indices_in_input) - len(set(indices_in_input))): - mult *= 2 - naive_cost *= mult + inner_product = (sum(len(x) for x in input_sets) - len(indices)) > 0 + naive_cost = _flop_count(indices, inner_product, len(input_list), dimension_dict) # Compute the path if (path_type is False) or (len(input_list) in [1, 2]) or (indices == output_set): # Nothing to be optimized, leave it to einsum path = [tuple(range(len(input_list)))] elif path_type == "greedy": - # Maximum memory should be at most out_size for this algorithm - memory_arg = min(memory_arg, max_size) path = _greedy_path(input_sets, output_set, dimension_dict, memory_arg) elif path_type == "optimal": path = _optimal_path(input_sets, output_set, dimension_dict, memory_arg) @@ -762,18 +914,24 @@ def einsum_path(*operands, **kwargs): contract = _find_contraction(contract_inds, input_sets, output_set) out_inds, input_sets, idx_removed, idx_contract = contract - cost = _compute_size_by_dict(idx_contract, dimension_dict) - if idx_removed: - cost *= 2 + cost = _flop_count(idx_contract, idx_removed, len(contract_inds), dimension_dict) cost_list.append(cost) scale_list.append(len(idx_contract)) size_list.append(_compute_size_by_dict(out_inds, dimension_dict)) + bcast = set() tmp_inputs = [] for x in contract_inds: tmp_inputs.append(input_list.pop(x)) + bcast |= broadcast_indices.pop(x) - do_blas = _can_dot(tmp_inputs, out_inds, idx_removed) + new_bcast_inds = bcast - idx_removed + + # If we're broadcasting, nix blas + if not len(idx_removed & bcast): + do_blas = _can_dot(tmp_inputs, out_inds, idx_removed) + else: + do_blas = False # Last contraction if (cnum - len(path)) == -1: @@ -783,6 +941,7 @@ def einsum_path(*operands, **kwargs): idx_result = "".join([x[1] for x in sorted(sort_result)]) input_list.append(idx_result) + broadcast_indices.append(new_bcast_inds) einsum_str = ",".join(tmp_inputs) + "->" + idx_result contraction = (contract_inds, idx_removed, einsum_str, input_list[:], do_blas) @@ -1200,25 +1359,14 @@ def einsum(*operands, **kwargs): tmp_operands.append(operands.pop(x)) # Do we need to deal with the output? - if specified_out and ((num + 1) == len(contraction_list)): - handle_out = True + handle_out = specified_out and ((num + 1) == len(contraction_list)) - # Handle broadcasting vs BLAS cases + # Call tensordot if still possible if blas: # Checks have already been handled input_str, results_index = einsum_str.split('->') input_left, input_right = input_str.split(',') - if 1 in tmp_operands[0].shape or 1 in tmp_operands[1].shape: - left_dims = {dim: size for dim, size in - zip(input_left, tmp_operands[0].shape)} - right_dims = {dim: size for dim, size in - zip(input_right, tmp_operands[1].shape)} - # If dims do not match we are broadcasting, BLAS off - if any(left_dims[ind] != right_dims[ind] for ind in idx_rm): - blas = False - # Call tensordot if still possible - if blas: tensor_result = input_left + input_right for s in idx_rm: tensor_result = tensor_result.replace(s, "") diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py index 82de1a36e..fb72bada5 100644 --- a/numpy/core/function_base.py +++ b/numpy/core/function_base.py @@ -6,6 +6,7 @@ import operator from . import numeric as _nx from .numeric import (result_type, NaN, shares_memory, MAY_SHARE_BOUNDS, TooHardError,asanyarray) +from numpy.core.multiarray import add_docstring __all__ = ['logspace', 'linspace', 'geomspace'] @@ -356,3 +357,38 @@ def geomspace(start, stop, num=50, endpoint=True, dtype=None): endpoint=endpoint, base=10.0, dtype=dtype) return result.astype(dtype) + + +#always succeed +def add_newdoc(place, obj, doc): + """ + Adds documentation to obj which is in module place. + + If doc is a string add it to obj as a docstring + + If doc is a tuple, then the first element is interpreted as + an attribute of obj and the second as the docstring + (method, docstring) + + If doc is a list, then each element of the list should be a + sequence of length two --> [(method1, docstring1), + (method2, docstring2), ...] + + This routine never raises an error. + + This routine cannot modify read-only docstrings, as appear + in new-style classes or built-in functions. Because this + routine never raises an error the caller must check manually + that the docstrings were changed. + """ + try: + new = getattr(__import__(place, globals(), {}, [obj]), obj) + if isinstance(doc, str): + add_docstring(new, doc.strip()) + elif isinstance(doc, tuple): + add_docstring(getattr(new, doc[0]), doc[1].strip()) + elif isinstance(doc, list): + for val in doc: + add_docstring(getattr(new, val[0]), val[1].strip()) + except Exception: + pass diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h index f2c61a0a1..c712fd3ef 100644 --- a/numpy/core/include/numpy/npy_cpu.h +++ b/numpy/core/include/numpy/npy_cpu.h @@ -63,10 +63,27 @@ #define NPY_CPU_HPPA #elif defined(__alpha__) #define NPY_CPU_ALPHA -#elif defined(__arm__) && defined(__ARMEL__) - #define NPY_CPU_ARMEL -#elif defined(__arm__) && defined(__ARMEB__) - #define NPY_CPU_ARMEB +#elif defined(__arm__) + #if defined(__ARMEB__) + #if defined(__ARM_32BIT_STATE) + #define NPY_CPU_ARMEB_AARCH32 + #elif defined(__ARM_64BIT_STATE) + #define NPY_CPU_ARMEB_AARCH64 + #else + #define NPY_CPU_ARMEB + #endif + #elif defined(__ARMEL__) + #if defined(__ARM_32BIT_STATE) + #define NPY_CPU_ARMEL_AARCH32 + #elif defined(__ARM_64BIT_STATE) + #define NPY_CPU_ARMEL_AARCH64 + #else + #define NPY_CPU_ARMEL + #endif + #else + # error Unknown ARM CPU, please report this to numpy maintainers with \ + information about your platform (OS, CPU and compiler) + #endif #elif defined(__sh__) && defined(__LITTLE_ENDIAN__) #define NPY_CPU_SH_LE #elif defined(__sh__) && defined(__BIG_ENDIAN__) @@ -77,8 +94,6 @@ #define NPY_CPU_MIPSEB #elif defined(__or1k__) #define NPY_CPU_OR1K -#elif defined(__aarch64__) - #define NPY_CPU_AARCH64 #elif defined(__mc68000__) #define NPY_CPU_M68K #elif defined(__arc__) && defined(__LITTLE_ENDIAN__) diff --git a/numpy/core/include/numpy/npy_endian.h b/numpy/core/include/numpy/npy_endian.h index 649bdb0a6..44cdffd14 100644 --- a/numpy/core/include/numpy/npy_endian.h +++ b/numpy/core/include/numpy/npy_endian.h @@ -37,28 +37,31 @@ #define NPY_LITTLE_ENDIAN 1234 #define NPY_BIG_ENDIAN 4321 - #if defined(NPY_CPU_X86) \ - || defined(NPY_CPU_AMD64) \ - || defined(NPY_CPU_IA64) \ - || defined(NPY_CPU_ALPHA) \ - || defined(NPY_CPU_ARMEL) \ - || defined(NPY_CPU_AARCH64) \ - || defined(NPY_CPU_SH_LE) \ - || defined(NPY_CPU_MIPSEL) \ - || defined(NPY_CPU_PPC64LE) \ - || defined(NPY_CPU_ARCEL) \ + #if defined(NPY_CPU_X86) \ + || defined(NPY_CPU_AMD64) \ + || defined(NPY_CPU_IA64) \ + || defined(NPY_CPU_ALPHA) \ + || defined(NPY_CPU_ARMEL) \ + || defined(NPY_CPU_ARMEL_AARCH32) \ + || defined(NPY_CPU_ARMEL_AARCH64) \ + || defined(NPY_CPU_SH_LE) \ + || defined(NPY_CPU_MIPSEL) \ + || defined(NPY_CPU_PPC64LE) \ + || defined(NPY_CPU_ARCEL) \ || defined(NPY_CPU_RISCV64) #define NPY_BYTE_ORDER NPY_LITTLE_ENDIAN - #elif defined(NPY_CPU_PPC) \ - || defined(NPY_CPU_SPARC) \ - || defined(NPY_CPU_S390) \ - || defined(NPY_CPU_HPPA) \ - || defined(NPY_CPU_PPC64) \ - || defined(NPY_CPU_ARMEB) \ - || defined(NPY_CPU_SH_BE) \ - || defined(NPY_CPU_MIPSEB) \ - || defined(NPY_CPU_OR1K) \ - || defined(NPY_CPU_M68K) \ + #elif defined(NPY_CPU_PPC) \ + || defined(NPY_CPU_SPARC) \ + || defined(NPY_CPU_S390) \ + || defined(NPY_CPU_HPPA) \ + || defined(NPY_CPU_PPC64) \ + || defined(NPY_CPU_ARMEB) \ + || defined(NPY_CPU_ARMEB_AARCH32) \ + || defined(NPY_CPU_ARMEB_AARCH64) \ + || defined(NPY_CPU_SH_BE) \ + || defined(NPY_CPU_MIPSEB) \ + || defined(NPY_CPU_OR1K) \ + || defined(NPY_CPU_M68K) \ || defined(NPY_CPU_ARCEB) #define NPY_BYTE_ORDER NPY_BIG_ENDIAN #else diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 46ff78b9c..cdca1d606 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -2084,7 +2084,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op) PyArray_TRIVIALLY_ITERABLE_OP_READ, PyArray_TRIVIALLY_ITERABLE_OP_READ) || (PyArray_NDIM(tmp_arr) == 0 && - PyArray_TRIVIALLY_ITERABLE(tmp_arr))) && + PyArray_TRIVIALLY_ITERABLE(ind))) && /* Check if the type is equivalent to INTP */ PyArray_ITEMSIZE(ind) == sizeof(npy_intp) && PyArray_DESCR(ind)->kind == 'i' && diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index 094612b7d..f9c671f77 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -689,21 +689,16 @@ npy_bswap8_unaligned(char * x) #define PyArray_TRIVIALLY_ITERABLE_OP_NOREAD 0 #define PyArray_TRIVIALLY_ITERABLE_OP_READ 1 -#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) ( \ - PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \ - PyArray_CompareLists(PyArray_DIMS(arr1), \ - PyArray_DIMS(arr2), \ - PyArray_NDIM(arr1)) && \ - (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \ - NPY_ARRAY_F_CONTIGUOUS)) & \ - (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \ - NPY_ARRAY_F_CONTIGUOUS)) \ - ) +#define PyArray_TRIVIALLY_ITERABLE(arr) ( \ + PyArray_NDIM(arr) <= 1 || \ + PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \ + PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \ + ) #define PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size, arr) ( \ - size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \ - PyArray_STRIDE(arr, 0) : \ - PyArray_ITEMSIZE(arr))) + assert(PyArray_TRIVIALLY_ITERABLE(arr)), \ + size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \ + PyArray_STRIDE(arr, 0) : PyArray_ITEMSIZE(arr))) static NPY_INLINE int PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr2, @@ -757,15 +752,22 @@ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr return (!arr1_read || arr1_ahead) && (!arr2_read || arr2_ahead); } +#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) ( \ + PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \ + PyArray_CompareLists(PyArray_DIMS(arr1), \ + PyArray_DIMS(arr2), \ + PyArray_NDIM(arr1)) && \ + (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \ + NPY_ARRAY_F_CONTIGUOUS)) & \ + (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \ + NPY_ARRAY_F_CONTIGUOUS)) \ + ) + #define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2, arr1_read, arr2_read) ( \ PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \ arr1, arr2, arr1_read, arr2_read)) -#define PyArray_TRIVIALLY_ITERABLE(arr) ( \ - PyArray_NDIM(arr) <= 1 || \ - PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \ - PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \ - ) + #define PyArray_PREPARE_TRIVIAL_ITERATION(arr, count, data, stride) \ count = PyArray_SIZE(arr); \ data = PyArray_BYTES(arr); \ @@ -774,7 +776,6 @@ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr PyArray_STRIDE(arr, 0) : \ PyArray_ITEMSIZE(arr))); - #define PyArray_TRIVIALLY_ITERABLE_PAIR(arr1, arr2, arr1_read, arr2_read) ( \ PyArray_TRIVIALLY_ITERABLE(arr1) && \ (PyArray_NDIM(arr2) == 0 || \ diff --git a/numpy/core/src/private/npy_config.h b/numpy/core/src/private/npy_config.h index 107b3cb5b..8143e7719 100644 --- a/numpy/core/src/private/npy_config.h +++ b/numpy/core/src/private/npy_config.h @@ -15,7 +15,8 @@ * amd64 is not harmed much by the bloat as the system provides 16 byte * alignment by default. */ -#if (defined NPY_CPU_X86 || defined _WIN32) +#if (defined NPY_CPU_X86 || defined _WIN32 || defined NPY_CPU_ARMEL_AARCH32 ||\ + defined NPY_CPU_ARMEB_AARCH32) #define NPY_MAX_COPY_ALIGNMENT 8 #else #define NPY_MAX_COPY_ALIGNMENT 16 diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index b964c568e..a3fd72839 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -552,6 +552,181 @@ ufunc_get_name_cstr(PyUFuncObject *ufunc) { } /* + * Helpers for keyword parsing + */ + +/* + * Find key in a list of pointers to keyword names. + * The list should end with NULL. + * + * Returns either the index into the list (pointing to the final key with NULL + * if no match was found), or -1 on failure. + */ +static npy_intp +locate_key(PyObject **kwnames, PyObject *key) +{ + PyObject **kwname = kwnames; + while (*kwname != NULL && *kwname != key) { + kwname++; + } + /* Slow fallback, just in case */ + if (*kwname == NULL) { + int cmp = 0; + kwname = kwnames; + while (*kwname != NULL && + (cmp = PyObject_RichCompareBool(key, *kwname, + Py_EQ)) == 0) { + kwname++; + } + if (cmp < 0) { + return -1; + } + } + return kwname - kwnames; +} + +/* + * Parse keyword arguments, matching against kwnames + * + * Arguments beyond kwnames (the va_list) should contain converters and outputs + * for each keyword name (where an output can be NULL to indicate the particular + * keyword should be ignored). + * + * Returns 0 on success, -1 on failure with an error set. + * + * Note that the parser does not clean up on failure, i.e., already parsed keyword + * values may hold new references, which the caller has to remove. + * + * TODO: ufunc is only used for the name in error messages; passing on the + * name instead might be an option. + * + * TODO: instead of having this function ignore of keywords for which the + * corresponding output is NULL, the calling routine should prepare the + * correct list. + */ +static int +parse_ufunc_keywords(PyUFuncObject *ufunc, PyObject *kwds, PyObject **kwnames, ...) +{ + va_list va; + PyObject *key, *value; + Py_ssize_t pos = 0; + typedef int converter(PyObject *, void *); + + while (PyDict_Next(kwds, &pos, &key, &value)) { + int i; + converter *convert; + void *output = NULL; + npy_intp index = locate_key(kwnames, key); + if (index < 0) { + return -1; + } + if (kwnames[index]) { + va_start(va, kwnames); + for (i = 0; i <= index; i++) { + convert = va_arg(va, converter *); + output = va_arg(va, void *); + } + va_end(va); + } + if (output) { + if (!convert(value, output)) { + return -1; + } + } + else { +#if PY_VERSION_HEX >= 0x03000000 + PyErr_Format(PyExc_TypeError, + "'%S' is an invalid keyword to ufunc '%s'", + key, ufunc_get_name_cstr(ufunc)); +#else + char *str = PyString_AsString(key); + if (str == NULL) { + PyErr_Clear(); + PyErr_SetString(PyExc_TypeError, "invalid keyword argument"); + } + else { + PyErr_Format(PyExc_TypeError, + "'%s' is an invalid keyword to ufunc '%s'", + str, ufunc_get_name_cstr(ufunc)); + } +#endif + return -1; + } + } + return 0; +} + +/* + * Converters for use in parsing of keywords arguments. + */ +NPY_NO_EXPORT int +_subok_converter(PyObject *obj, int *subok) +{ + if (PyBool_Check(obj)) { + *subok = (obj == Py_True); + return NPY_SUCCEED; + } + else { + PyErr_SetString(PyExc_TypeError, + "'subok' must be a boolean"); + return NPY_FAIL; + } +} + +NPY_NO_EXPORT int +_keepdims_converter(PyObject *obj, int *keepdims) +{ + if (PyBool_Check(obj)) { + *keepdims = (obj == Py_True); + return NPY_SUCCEED; + } + else { + PyErr_SetString(PyExc_TypeError, + "'keepdims' must be a boolean"); + return NPY_FAIL; + } +} + +NPY_NO_EXPORT int +_wheremask_converter(PyObject *obj, PyArrayObject **wheremask) +{ + /* + * Optimization: where=True is the same as no where argument. + * This lets us document True as the default. + */ + if (obj == Py_True) { + return NPY_SUCCEED; + } + else { + PyArray_Descr *dtype = PyArray_DescrFromType(NPY_BOOL); + if (dtype == NULL) { + return NPY_FAIL; + } + /* PyArray_FromAny steals reference to dtype, even on failure */ + *wheremask = (PyArrayObject *)PyArray_FromAny(obj, dtype, 0, 0, 0, NULL); + if ((*wheremask) == NULL) { + return NPY_FAIL; + } + return NPY_SUCCEED; + } +} + +NPY_NO_EXPORT int +_new_reference(PyObject *obj, PyObject **out) +{ + Py_INCREF(obj); + *out = obj; + return NPY_SUCCEED; +} + +NPY_NO_EXPORT int +_borrowed_reference(PyObject *obj, PyObject **out) +{ + *out = obj; + return NPY_SUCCEED; +} + +/* * Parses the positional and keyword arguments for a generic ufunc call. * All returned arguments are new references (with optional ones NULL * if not present) @@ -575,12 +750,9 @@ get_ufunc_arguments(PyUFuncObject *ufunc, int nout = ufunc->nout; int nop = ufunc->nargs; PyObject *obj, *context; - PyObject *str_key_obj = NULL; - const char *ufunc_name = ufunc_get_name_cstr(ufunc); - int has_sig = 0; - + PyArray_Descr *dtype = NULL; /* - * Initialize objects so caller knows when outputs and other optional + * Initialize output objects so caller knows when outputs and optional * arguments are set (also means we can safely XDECREF on failure). */ for (i = 0; i < nop; i++) { @@ -646,253 +818,149 @@ get_ufunc_arguments(PyUFuncObject *ufunc, } /* - * Get keyword output and other arguments. - * Raise an error if anything else is present in the - * keyword dictionary. + * If keywords are present, get keyword output and other arguments. + * Raise an error if anything else is present in the keyword dictionary. */ - if (kwds != NULL) { - PyObject *key, *value; - Py_ssize_t pos = 0; - while (PyDict_Next(kwds, &pos, &key, &value)) { - Py_ssize_t length = 0; - char *str = NULL; - int bad_arg = 1; - -#if defined(NPY_PY3K) - Py_XDECREF(str_key_obj); - str_key_obj = PyUnicode_AsASCIIString(key); - if (str_key_obj != NULL) { - key = str_key_obj; - } -#endif - - if (PyBytes_AsStringAndSize(key, &str, &length) < 0) { - PyErr_Clear(); - PyErr_SetString(PyExc_TypeError, "invalid keyword argument"); + if (kwds) { + PyObject *out_kwd = NULL; + PyObject *sig = NULL; + static PyObject *kwnames[13] = {NULL}; + if (kwnames[0] == NULL) { + kwnames[0] = npy_um_str_out; + kwnames[1] = npy_um_str_where; + kwnames[2] = npy_um_str_axes; + kwnames[3] = npy_um_str_axis; + kwnames[4] = npy_um_str_keepdims; + kwnames[5] = npy_um_str_casting; + kwnames[6] = npy_um_str_order; + kwnames[7] = npy_um_str_dtype; + kwnames[8] = npy_um_str_subok; + kwnames[9] = npy_um_str_signature; + kwnames[10] = npy_um_str_sig; + kwnames[11] = npy_um_str_extobj; + kwnames[12] = NULL; /* sentinel */ + } + /* + * Parse using converters to calculate outputs + * (NULL outputs are treated as indicating a keyword is not allowed). + */ + if (parse_ufunc_keywords( + ufunc, kwds, kwnames, + _borrowed_reference, &out_kwd, + _wheremask_converter, out_wheremask, /* new reference */ + _new_reference, out_axes, + _new_reference, out_axis, + _keepdims_converter, out_keepdims, + PyArray_CastingConverter, out_casting, + PyArray_OrderConverter, out_order, + PyArray_DescrConverter2, &dtype, /* new reference */ + _subok_converter, out_subok, + _new_reference, out_typetup, + _borrowed_reference, &sig, + _new_reference, out_extobj) < 0) { + goto fail; + } + /* + * Check that outputs were not passed as positional as well, + * and that they are either None or an array. + */ + if (out_kwd) { /* borrowed reference */ + /* + * Output arrays are generally specified as a tuple of arrays + * and None, but may be a single array or None for ufuncs + * with a single output. + */ + if (nargs > nin) { + PyErr_SetString(PyExc_ValueError, + "cannot specify 'out' as both a " + "positional and keyword argument"); goto fail; } - - switch (str[0]) { - case 'a': - /* possible axes argument for generalized ufunc */ - if (out_axes != NULL && strcmp(str, "axes") == 0) { - if (out_axis != NULL && *out_axis != NULL) { - PyErr_SetString(PyExc_TypeError, - "cannot specify both 'axis' and 'axes'"); - goto fail; - } - Py_INCREF(value); - *out_axes = value; - bad_arg = 0; - } - else if (out_axis != NULL && strcmp(str, "axis") == 0) { - if (out_axes != NULL && *out_axes != NULL) { - PyErr_SetString(PyExc_TypeError, - "cannot specify both 'axis' and 'axes'"); - goto fail; - } - Py_INCREF(value); - *out_axis = value; - bad_arg = 0; - } - break; - case 'c': - /* Provides a policy for allowed casting */ - if (strcmp(str, "casting") == 0) { - if (!PyArray_CastingConverter(value, out_casting)) { - goto fail; - } - bad_arg = 0; - } - break; - case 'd': - /* Another way to specify 'sig' */ - if (strcmp(str, "dtype") == 0) { - /* Allow this parameter to be None */ - PyArray_Descr *dtype; - if (!PyArray_DescrConverter2(value, &dtype)) { - goto fail; - } - if (dtype != NULL) { - if (*out_typetup != NULL) { - PyErr_SetString(PyExc_RuntimeError, - "cannot specify both 'signature' and 'dtype'"); - goto fail; - } - *out_typetup = Py_BuildValue("(N)", dtype); - } - bad_arg = 0; - } - break; - case 'e': - /* - * Overrides the global parameters buffer size, - * error mask, and error object - */ - if (strcmp(str, "extobj") == 0) { - Py_INCREF(value); - *out_extobj = value; - bad_arg = 0; - } - break; - case 'k': - if (out_keepdims != NULL && strcmp(str, "keepdims") == 0) { - if (!PyBool_Check(value)) { - PyErr_SetString(PyExc_TypeError, - "'keepdims' must be a boolean"); - goto fail; - } - *out_keepdims = (value == Py_True); - bad_arg = 0; + if (PyTuple_CheckExact(out_kwd)) { + if (PyTuple_GET_SIZE(out_kwd) != nout) { + PyErr_SetString(PyExc_ValueError, + "The 'out' tuple must have exactly " + "one entry per ufunc output"); + goto fail; + } + /* 'out' must be a tuple of arrays and Nones */ + for(i = 0; i < nout; ++i) { + PyObject *val = PyTuple_GET_ITEM(out_kwd, i); + if (_set_out_array(val, out_op+nin+i) < 0) { + goto fail; } - break; - case 'o': - /* - * Output arrays may be specified as a keyword argument, - * either as a single array or None for single output - * ufuncs, or as a tuple of arrays and Nones. - */ - if (strcmp(str, "out") == 0) { - if (nargs > nin) { - PyErr_SetString(PyExc_ValueError, - "cannot specify 'out' as both a " - "positional and keyword argument"); - goto fail; - } - if (PyTuple_CheckExact(value)) { - if (PyTuple_GET_SIZE(value) != nout) { - PyErr_SetString(PyExc_ValueError, - "The 'out' tuple must have exactly " - "one entry per ufunc output"); - goto fail; - } - /* 'out' must be a tuple of arrays and Nones */ - for(i = 0; i < nout; ++i) { - PyObject *val = PyTuple_GET_ITEM(value, i); - if (_set_out_array(val, out_op+nin+i) < 0) { - goto fail; - } - } - } - else if (nout == 1) { - /* Can be an array if it only has one output */ - if (_set_out_array(value, out_op + nin) < 0) { - goto fail; - } - } - else { - /* - * If the deprecated behavior is ever removed, - * keep only the else branch of this if-else - */ - if (PyArray_Check(value) || value == Py_None) { - if (DEPRECATE("passing a single array to the " - "'out' keyword argument of a " - "ufunc with\n" - "more than one output will " - "result in an error in the " - "future") < 0) { - /* The future error message */ - PyErr_SetString(PyExc_TypeError, + } + } + else if (nout == 1) { + /* Can be an array if it only has one output */ + if (_set_out_array(out_kwd, out_op + nin) < 0) { + goto fail; + } + } + else { + /* + * If the deprecated behavior is ever removed, + * keep only the else branch of this if-else + */ + if (PyArray_Check(out_kwd) || out_kwd == Py_None) { + if (DEPRECATE("passing a single array to the " + "'out' keyword argument of a " + "ufunc with\n" + "more than one output will " + "result in an error in the " + "future") < 0) { + /* The future error message */ + PyErr_SetString(PyExc_TypeError, "'out' must be a tuple of arrays"); - goto fail; - } - if (_set_out_array(value, out_op+nin) < 0) { - goto fail; - } - } - else { - PyErr_SetString(PyExc_TypeError, - nout > 1 ? "'out' must be a tuple " - "of arrays" : - "'out' must be an array or a " - "tuple of a single array"); - goto fail; - } - } - bad_arg = 0; + goto fail; } - /* Allows the default output layout to be overridden */ - else if (strcmp(str, "order") == 0) { - if (!PyArray_OrderConverter(value, out_order)) { - goto fail; - } - bad_arg = 0; + if (_set_out_array(out_kwd, out_op+nin) < 0) { + goto fail; } - break; - case 's': - /* Allows a specific function inner loop to be selected */ - if (strcmp(str, "sig") == 0 || - strcmp(str, "signature") == 0) { - if (has_sig == 1) { - PyErr_SetString(PyExc_ValueError, + } + else { + PyErr_SetString(PyExc_TypeError, + nout > 1 ? "'out' must be a tuple " + "of arrays" : + "'out' must be an array or a " + "tuple of a single array"); + goto fail; + } + } + } + /* + * Check we did not get both axis and axes, or multiple ways + * to define a signature. + */ + if (out_axes != NULL && out_axis != NULL && + *out_axes != NULL && *out_axis != NULL) { + PyErr_SetString(PyExc_TypeError, + "cannot specify both 'axis' and 'axes'"); + goto fail; + } + if (sig) { /* borrowed reference */ + if (*out_typetup != NULL) { + PyErr_SetString(PyExc_ValueError, "cannot specify both 'sig' and 'signature'"); - goto fail; - } - if (*out_typetup != NULL) { - PyErr_SetString(PyExc_RuntimeError, - "cannot specify both 'signature' and 'dtype'"); - goto fail; - } - Py_INCREF(value); - *out_typetup = value; - bad_arg = 0; - has_sig = 1; - } - else if (strcmp(str, "subok") == 0) { - if (!PyBool_Check(value)) { - PyErr_SetString(PyExc_TypeError, - "'subok' must be a boolean"); - goto fail; - } - *out_subok = (value == Py_True); - bad_arg = 0; - } - break; - case 'w': - /* - * Provides a boolean array 'where=' mask if - * out_wheremask is supplied. - */ - if (out_wheremask != NULL && strcmp(str, "where") == 0) { - PyArray_Descr *dtype; - dtype = PyArray_DescrFromType(NPY_BOOL); - if (dtype == NULL) { - goto fail; - } - if (value == Py_True) { - /* - * Optimization: where=True is the same as no - * where argument. This lets us document it as a - * default argument - */ - bad_arg = 0; - break; - } - *out_wheremask = (PyArrayObject *)PyArray_FromAny( - value, dtype, - 0, 0, 0, NULL); - if (*out_wheremask == NULL) { - goto fail; - } - bad_arg = 0; - } - break; + goto fail; } - - if (bad_arg) { - char *format = "'%s' is an invalid keyword to ufunc '%s'"; - PyErr_Format(PyExc_TypeError, format, str, ufunc_name); + Py_INCREF(sig); + *out_typetup = sig; + } + if (dtype) { /* new reference */ + if (*out_typetup != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "cannot specify both 'signature' and 'dtype'"); goto fail; } + /* Note: "N" uses the reference */ + *out_typetup = Py_BuildValue("(N)", dtype); } } - Py_XDECREF(str_key_obj); - return 0; fail: - Py_XDECREF(str_key_obj); + Py_XDECREF(dtype); Py_XDECREF(*out_typetup); Py_XDECREF(*out_extobj); if (out_wheremask != NULL) { diff --git a/numpy/core/src/umath/ufunc_object.h b/numpy/core/src/umath/ufunc_object.h index d6fd3837a..5438270f1 100644 --- a/numpy/core/src/umath/ufunc_object.h +++ b/numpy/core/src/umath/ufunc_object.h @@ -10,13 +10,23 @@ ufunc_seterr(PyObject *NPY_UNUSED(dummy), PyObject *args); NPY_NO_EXPORT const char* ufunc_get_name_cstr(PyUFuncObject *ufunc); -/* interned strings (on umath import) */ -NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_out; -NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_subok; -NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_array_prepare; -NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_array_wrap; -NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_array_finalize; -NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_ufunc; -NPY_VISIBILITY_HIDDEN extern PyObject * npy_um_str_pyvals_name; +/* strings from umathmodule.c that are interned on umath import */ +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_out; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_where; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_axes; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_axis; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_keepdims; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_casting; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_order; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_dtype; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_subok; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_signature; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_sig; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_extobj; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_array_prepare; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_array_wrap; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_array_finalize; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_ufunc; +NPY_VISIBILITY_HIDDEN extern PyObject *npy_um_str_pyvals_name; #endif diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c index 5567b9bbf..9291a5138 100644 --- a/numpy/core/src/umath/umathmodule.c +++ b/numpy/core/src/umath/umathmodule.c @@ -226,20 +226,40 @@ add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args) ***************************************************************************** */ -NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_out = NULL; -NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_subok = NULL; -NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_array_prepare = NULL; -NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_array_wrap = NULL; -NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_array_finalize = NULL; -NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_ufunc = NULL; -NPY_VISIBILITY_HIDDEN PyObject * npy_um_str_pyvals_name = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_out = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_where = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_axes = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_axis = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_keepdims = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_casting = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_order = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_dtype = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_subok = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_signature = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_sig = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_extobj = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_array_prepare = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_array_wrap = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_array_finalize = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_ufunc = NULL; +NPY_VISIBILITY_HIDDEN PyObject *npy_um_str_pyvals_name = NULL; /* intern some strings used in ufuncs */ static int intern_strings(void) { npy_um_str_out = PyUString_InternFromString("out"); + npy_um_str_where = PyUString_InternFromString("where"); + npy_um_str_axes = PyUString_InternFromString("axes"); + npy_um_str_axis = PyUString_InternFromString("axis"); + npy_um_str_keepdims = PyUString_InternFromString("keepdims"); + npy_um_str_casting = PyUString_InternFromString("casting"); + npy_um_str_order = PyUString_InternFromString("order"); + npy_um_str_dtype = PyUString_InternFromString("dtype"); npy_um_str_subok = PyUString_InternFromString("subok"); + npy_um_str_signature = PyUString_InternFromString("signature"); + npy_um_str_sig = PyUString_InternFromString("sig"); + npy_um_str_extobj = PyUString_InternFromString("extobj"); npy_um_str_array_prepare = PyUString_InternFromString("__array_prepare__"); npy_um_str_array_wrap = PyUString_InternFromString("__array_wrap__"); npy_um_str_array_finalize = PyUString_InternFromString("__array_finalize__"); diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py index a72079218..8ce374a75 100644 --- a/numpy/core/tests/test_einsum.py +++ b/numpy/core/tests/test_einsum.py @@ -16,7 +16,7 @@ for size, char in zip(sizes, chars): global_size_dict[char] = size -class TestEinSum(object): +class TestEinsum(object): def test_einsum_errors(self): for do_opt in [True, False]: # Need enough arguments @@ -614,7 +614,7 @@ class TestEinSum(object): np.einsum(a, [0, 51], b, [51, 2], [0, 2], optimize=False) assert_raises(ValueError, lambda: np.einsum(a, [0, 52], b, [52, 2], [0, 2], optimize=False)) assert_raises(ValueError, lambda: np.einsum(a, [-1, 5], b, [5, 2], [-1, 2], optimize=False)) - + def test_einsum_broadcast(self): # Issue #2455 change in handling ellipsis # remove the 'middle broadcast' error @@ -735,19 +735,22 @@ class TestEinSum(object): res = np.einsum('...ij,...jk->...ik', a, a, out=a) assert res is a - def optimize_compare(self, string): + def optimize_compare(self, subscripts, operands=None): # Tests all paths of the optimization function against # conventional einsum - operands = [string] - terms = string.split('->')[0].split(',') - for term in terms: - dims = [global_size_dict[x] for x in term] - operands.append(np.random.rand(*dims)) - - noopt = np.einsum(*operands, optimize=False) - opt = np.einsum(*operands, optimize='greedy') + if operands is None: + args = [subscripts] + terms = subscripts.split('->')[0].split(',') + for term in terms: + dims = [global_size_dict[x] for x in term] + args.append(np.random.rand(*dims)) + else: + args = [subscripts] + operands + + noopt = np.einsum(*args, optimize=False) + opt = np.einsum(*args, optimize='greedy') assert_almost_equal(opt, noopt) - opt = np.einsum(*operands, optimize='optimal') + opt = np.einsum(*args, optimize='optimal') assert_almost_equal(opt, noopt) def test_hadamard_like_products(self): @@ -833,8 +836,28 @@ class TestEinSum(object): b = np.einsum('bbcdc->d', a) assert_equal(b, [12]) + def test_broadcasting_dot_cases(self): + # Ensures broadcasting cases are not mistaken for GEMM -class TestEinSumPath(object): + a = np.random.rand(1, 5, 4) + b = np.random.rand(4, 6) + c = np.random.rand(5, 6) + d = np.random.rand(10) + + self.optimize_compare('ijk,kl,jl', operands=[a, b, c]) + self.optimize_compare('ijk,kl,jl,i->i', operands=[a, b, c, d]) + + e = np.random.rand(1, 1, 5, 4) + f = np.random.rand(7, 7) + self.optimize_compare('abjk,kl,jl', operands=[e, b, c]) + self.optimize_compare('abjk,kl,jl,ab->ab', operands=[e, b, c, f]) + + # Edge case found in gh-11308 + g = np.arange(64).reshape(2, 4, 8) + self.optimize_compare('obk,ijk->ioj', operands=[g, g]) + + +class TestEinsumPath(object): def build_operands(self, string, size_dict=global_size_dict): # Builds views based off initial operands @@ -880,7 +903,7 @@ class TestEinSumPath(object): long_test1 = self.build_operands('acdf,jbje,gihb,hfac,gfac,gifabc,hfac') path, path_str = np.einsum_path(*long_test1, optimize='greedy') self.assert_path_equal(path, ['einsum_path', - (1, 4), (2, 4), (1, 4), (1, 3), (1, 2), (0, 1)]) + (3, 6), (3, 4), (2, 4), (2, 3), (0, 2), (0, 1)]) path, path_str = np.einsum_path(*long_test1, optimize='optimal') self.assert_path_equal(path, ['einsum_path', @@ -889,10 +912,12 @@ class TestEinSumPath(object): # Long test 2 long_test2 = self.build_operands('chd,bde,agbc,hiad,bdi,cgh,agdb') path, path_str = np.einsum_path(*long_test2, optimize='greedy') + print(path) self.assert_path_equal(path, ['einsum_path', (3, 4), (0, 3), (3, 4), (1, 3), (1, 2), (0, 1)]) path, path_str = np.einsum_path(*long_test2, optimize='optimal') + print(path) self.assert_path_equal(path, ['einsum_path', (0, 5), (1, 4), (3, 4), (1, 3), (1, 2), (0, 1)]) @@ -926,7 +951,7 @@ class TestEinSumPath(object): # Edge test4 edge_test4 = self.build_operands('dcc,fce,ea,dbf->ab') path, path_str = np.einsum_path(*edge_test4, optimize='greedy') - self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 2), (0, 1)]) + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)]) path, path_str = np.einsum_path(*edge_test4, optimize='optimal') self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 2), (0, 1)]) @@ -949,7 +974,7 @@ class TestEinSumPath(object): self.assert_path_equal(path, ['einsum_path', (0, 1, 2, 3)]) path, path_str = np.einsum_path(*path_test, optimize=True) - self.assert_path_equal(path, ['einsum_path', (0, 3), (0, 2), (0, 1)]) + self.assert_path_equal(path, ['einsum_path', (1, 2), (0, 1), (0, 1)]) exp_path = ['einsum_path', (0, 2), (0, 2), (0, 1)] path, path_str = np.einsum_path(*path_test, optimize=exp_path) diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py index cbcd3e994..276cd9f93 100644 --- a/numpy/core/tests/test_indexing.py +++ b/numpy/core/tests/test_indexing.py @@ -329,6 +329,21 @@ class TestIndexing(object): assert_raises(IndexError, a.__getitem__, ind) assert_raises(IndexError, a.__setitem__, ind, 0) + def test_trivial_fancy_not_possible(self): + # Test that the fast path for trivial assignment is not incorrectly + # used when the index is not contiguous or 1D, see also gh-11467. + a = np.arange(6) + idx = np.arange(6, dtype=np.intp).reshape(2, 1, 3)[:, :, 0] + assert_array_equal(a[idx], idx) + + # this case must not go into the fast path, note that idx is + # a non-contiuguous none 1D array here. + a[idx] = -1 + res = np.arange(6) + res[0] = -1 + res[3] = -1 + assert_array_equal(a, res) + def test_nonbaseclass_values(self): class SubClass(np.ndarray): def __array_finalize__(self, old): diff --git a/numpy/distutils/__init__.py b/numpy/distutils/__init__.py index b794bebd7..8dd326920 100644 --- a/numpy/distutils/__init__.py +++ b/numpy/distutils/__init__.py @@ -17,7 +17,7 @@ try: # Normally numpy is installed if the above import works, but an interrupted # in-place build could also have left a __config__.py. In that case the # next import may still fail, so keep it inside the try block. - from numpy.testing._private.pytesttester import PytestTester + from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester except ImportError: diff --git a/numpy/f2py/__init__.py b/numpy/f2py/__init__.py index 5075c682d..fbb64f762 100644 --- a/numpy/f2py/__init__.py +++ b/numpy/f2py/__init__.py @@ -69,6 +69,6 @@ def compile(source, f.close() return status -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/fft/__init__.py b/numpy/fft/__init__.py index bbb6ec8c7..44243b483 100644 --- a/numpy/fft/__init__.py +++ b/numpy/fft/__init__.py @@ -6,6 +6,6 @@ from .info import __doc__ from .fftpack import * from .helper import * -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py index d764cdc7e..dc40ac67b 100644 --- a/numpy/lib/__init__.py +++ b/numpy/lib/__init__.py @@ -46,6 +46,6 @@ __all__ += financial.__all__ __all__ += nanfunctions.__all__ __all__ += histograms.__all__ -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 26ef3e235..9a680dd55 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -27,6 +27,7 @@ from numpy.core.fromnumeric import ( ravel, nonzero, sort, partition, mean, any, sum ) from numpy.core.numerictypes import typecodes, number +from numpy.core.function_base import add_newdoc from numpy.lib.twodim_base import diag from .utils import deprecate from numpy.core.multiarray import ( @@ -3892,41 +3893,6 @@ def trapz(y, x=None, dx=1.0, axis=-1): return ret -#always succeed -def add_newdoc(place, obj, doc): - """ - Adds documentation to obj which is in module place. - - If doc is a string add it to obj as a docstring - - If doc is a tuple, then the first element is interpreted as - an attribute of obj and the second as the docstring - (method, docstring) - - If doc is a list, then each element of the list should be a - sequence of length two --> [(method1, docstring1), - (method2, docstring2), ...] - - This routine never raises an error. - - This routine cannot modify read-only docstrings, as appear - in new-style classes or built-in functions. Because this - routine never raises an error the caller must check manually - that the docstrings were changed. - """ - try: - new = getattr(__import__(place, globals(), {}, [obj]), obj) - if isinstance(doc, str): - add_docstring(new, doc.strip()) - elif isinstance(doc, tuple): - add_docstring(getattr(new, doc[0]), doc[1].strip()) - elif isinstance(doc, list): - for val in doc: - add_docstring(getattr(new, val[0]), val[1].strip()) - except Exception: - pass - - # Based on scitools meshgrid def meshgrid(*xi, **kwargs): """ diff --git a/numpy/linalg/__init__.py b/numpy/linalg/__init__.py index 37bd27574..4b696c883 100644 --- a/numpy/linalg/__init__.py +++ b/numpy/linalg/__init__.py @@ -50,6 +50,6 @@ from .info import __doc__ from .linalg import * -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/ma/__init__.py b/numpy/ma/__init__.py index 34f21b8b1..36ceb1f6e 100644 --- a/numpy/ma/__init__.py +++ b/numpy/ma/__init__.py @@ -51,6 +51,6 @@ __all__ = ['core', 'extras'] __all__ += core.__all__ __all__ += extras.__all__ -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/matrixlib/__init__.py b/numpy/matrixlib/__init__.py index 3ad3a9549..777e0cd33 100644 --- a/numpy/matrixlib/__init__.py +++ b/numpy/matrixlib/__init__.py @@ -7,6 +7,6 @@ from .defmatrix import * __all__ = defmatrix.__all__ -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/polynomial/__init__.py b/numpy/polynomial/__init__.py index c18bebedb..85cee9ce6 100644 --- a/numpy/polynomial/__init__.py +++ b/numpy/polynomial/__init__.py @@ -22,6 +22,6 @@ from .hermite import Hermite from .hermite_e import HermiteE from .laguerre import Laguerre -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/random/__init__.py b/numpy/random/__init__.py index 81cb94cc1..82aefce5f 100644 --- a/numpy/random/__init__.py +++ b/numpy/random/__init__.py @@ -117,6 +117,6 @@ def __RandomState_ctor(): """ return RandomState(seed=0) -from numpy.testing._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester diff --git a/numpy/testing/__init__.py b/numpy/testing/__init__.py index a7c85931c..a8bd4fc15 100644 --- a/numpy/testing/__init__.py +++ b/numpy/testing/__init__.py @@ -17,6 +17,6 @@ from ._private.nosetester import ( __all__ = _private.utils.__all__ + ['TestCase', 'run_module_suite'] -from ._private.pytesttester import PytestTester +from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester |