summaryrefslogtreecommitdiff
path: root/doc/source/reference
diff options
context:
space:
mode:
Diffstat (limited to 'doc/source/reference')
-rw-r--r--doc/source/reference/arrays.nditer.cython.rst147
-rw-r--r--doc/source/reference/arrays.nditer.rst181
-rw-r--r--doc/source/reference/c-api/array.rst2
-rw-r--r--doc/source/reference/c-api/index.rst2
-rw-r--r--doc/source/reference/index.rst2
-rw-r--r--doc/source/reference/maskedarray.baseclass.rst1
-rw-r--r--doc/source/reference/routines.ma.rst1
7 files changed, 173 insertions, 163 deletions
diff --git a/doc/source/reference/arrays.nditer.cython.rst b/doc/source/reference/arrays.nditer.cython.rst
new file mode 100644
index 000000000..2cc7763ed
--- /dev/null
+++ b/doc/source/reference/arrays.nditer.cython.rst
@@ -0,0 +1,147 @@
+Putting the Inner Loop in Cython
+================================
+
+Those who want really good performance out of their low level operations
+should strongly consider directly using the iteration API provided
+in C, but for those who are not comfortable with C or C++, Cython
+is a good middle ground with reasonable performance tradeoffs. For
+the :class:`nditer` object, this means letting the iterator take care
+of broadcasting, dtype conversion, and buffering, while giving the inner
+loop to Cython.
+
+For our example, we'll create a sum of squares function. To start,
+let's implement this function in straightforward Python. We want to
+support an 'axis' parameter similar to the numpy :func:`sum` function,
+so we will need to construct a list for the `op_axes` parameter.
+Here's how this looks.
+
+.. admonition:: Example
+
+ >>> def axis_to_axeslist(axis, ndim):
+ ... if axis is None:
+ ... return [-1] * ndim
+ ... else:
+ ... if type(axis) is not tuple:
+ ... axis = (axis,)
+ ... axeslist = [1] * ndim
+ ... for i in axis:
+ ... axeslist[i] = -1
+ ... ax = 0
+ ... for i in range(ndim):
+ ... if axeslist[i] != -1:
+ ... axeslist[i] = ax
+ ... ax += 1
+ ... return axeslist
+ ...
+ >>> def sum_squares_py(arr, axis=None, out=None):
+ ... axeslist = axis_to_axeslist(axis, arr.ndim)
+ ... it = np.nditer([arr, out], flags=['reduce_ok',
+ ... 'buffered', 'delay_bufalloc'],
+ ... op_flags=[['readonly'], ['readwrite', 'allocate']],
+ ... op_axes=[None, axeslist],
+ ... op_dtypes=['float64', 'float64'])
+ ... with it:
+ ... it.operands[1][...] = 0
+ ... it.reset()
+ ... for x, y in it:
+ ... y[...] += x*x
+ ... return it.operands[1]
+ ...
+ >>> a = np.arange(6).reshape(2,3)
+ >>> sum_squares_py(a)
+ array(55.0)
+ >>> sum_squares_py(a, axis=-1)
+ array([ 5., 50.])
+
+To Cython-ize this function, we replace the inner loop (y[...] += x*x) with
+Cython code that's specialized for the float64 dtype. With the
+'external_loop' flag enabled, the arrays provided to the inner loop will
+always be one-dimensional, so very little checking needs to be done.
+
+Here's the listing of sum_squares.pyx::
+
+ import numpy as np
+ cimport numpy as np
+ cimport cython
+
+ def axis_to_axeslist(axis, ndim):
+ if axis is None:
+ return [-1] * ndim
+ else:
+ if type(axis) is not tuple:
+ axis = (axis,)
+ axeslist = [1] * ndim
+ for i in axis:
+ axeslist[i] = -1
+ ax = 0
+ for i in range(ndim):
+ if axeslist[i] != -1:
+ axeslist[i] = ax
+ ax += 1
+ return axeslist
+
+ @cython.boundscheck(False)
+ def sum_squares_cy(arr, axis=None, out=None):
+ cdef np.ndarray[double] x
+ cdef np.ndarray[double] y
+ cdef int size
+ cdef double value
+
+ axeslist = axis_to_axeslist(axis, arr.ndim)
+ it = np.nditer([arr, out], flags=['reduce_ok', 'external_loop',
+ 'buffered', 'delay_bufalloc'],
+ op_flags=[['readonly'], ['readwrite', 'allocate']],
+ op_axes=[None, axeslist],
+ op_dtypes=['float64', 'float64'])
+ with it:
+ it.operands[1][...] = 0
+ it.reset()
+ for xarr, yarr in it:
+ x = xarr
+ y = yarr
+ size = x.shape[0]
+ for i in range(size):
+ value = x[i]
+ y[i] = y[i] + value * value
+ return it.operands[1]
+
+On this machine, building the .pyx file into a module looked like the
+following, but you may have to find some Cython tutorials to tell you
+the specifics for your system configuration.::
+
+ $ cython sum_squares.pyx
+ $ gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -I/usr/include/python2.7 -fno-strict-aliasing -o sum_squares.so sum_squares.c
+
+Running this from the Python interpreter produces the same answers
+as our native Python/NumPy code did.
+
+.. admonition:: Example
+
+ >>> from sum_squares import sum_squares_cy
+ >>> a = np.arange(6).reshape(2,3)
+ >>> sum_squares_cy(a)
+ array(55.0)
+ >>> sum_squares_cy(a, axis=-1)
+ array([ 5., 50.])
+
+Doing a little timing in IPython shows that the reduced overhead and
+memory allocation of the Cython inner loop is providing a very nice
+speedup over both the straightforward Python code and an expression
+using NumPy's built-in sum function.::
+
+ >>> a = np.random.rand(1000,1000)
+
+ >>> timeit sum_squares_py(a, axis=-1)
+ 10 loops, best of 3: 37.1 ms per loop
+
+ >>> timeit np.sum(a*a, axis=-1)
+ 10 loops, best of 3: 20.9 ms per loop
+
+ >>> timeit sum_squares_cy(a, axis=-1)
+ 100 loops, best of 3: 11.8 ms per loop
+
+ >>> np.all(sum_squares_cy(a, axis=-1) == np.sum(a*a, axis=-1))
+ True
+
+ >>> np.all(sum_squares_py(a, axis=-1) == np.sum(a*a, axis=-1))
+ True
diff --git a/doc/source/reference/arrays.nditer.rst b/doc/source/reference/arrays.nditer.rst
index 7dab09a71..2db12a408 100644
--- a/doc/source/reference/arrays.nditer.rst
+++ b/doc/source/reference/arrays.nditer.rst
@@ -1,5 +1,9 @@
.. currentmodule:: numpy
+.. for doctests
+ The last section on Cython is 'included' at the end of this file. The tests
+ for that section are disabled.
+
.. _arrays.nditer:
*********************
@@ -218,21 +222,21 @@ produce identical results to the ones in the previous section.
>>> it = np.nditer(a, flags=['f_index'])
>>> while not it.finished:
... print("%d <%d>" % (it[0], it.index), end=' ')
- ... it.iternext()
+ ... is_not_finished = it.iternext()
...
0 <0> 1 <2> 2 <4> 3 <1> 4 <3> 5 <5>
>>> it = np.nditer(a, flags=['multi_index'])
>>> while not it.finished:
... print("%d <%s>" % (it[0], it.multi_index), end=' ')
- ... it.iternext()
+ ... is_not_finished = it.iternext()
...
0 <(0, 0)> 1 <(0, 1)> 2 <(0, 2)> 3 <(1, 0)> 4 <(1, 1)> 5 <(1, 2)>
>>> with np.nditer(a, flags=['multi_index'], op_flags=['writeonly']) as it:
... while not it.finished:
... it[0] = it.multi_index[1] - it.multi_index[0]
- ... it.iternext()
+ ... is_not_finished = it.iternext()
...
>>> a
array([[ 0, 1, 2],
@@ -316,12 +320,13 @@ specified as an iterator flag.
... op_dtypes=['complex128']):
... print(np.sqrt(x), end=' ')
...
- 1.73205080757j 1.41421356237j 1j 0j (1+0j) (1.41421356237+0j)
+ 1.7320508075688772j 1.4142135623730951j 1j 0j (1+0j) (1.4142135623730951+0j)
>>> for x in np.nditer(a, flags=['buffered'], op_dtypes=['complex128']):
... print(np.sqrt(x), end=' ')
...
- 1.73205080757j 1.41421356237j 1j 0j (1+0j) (1.41421356237+0j)
+ 1.7320508075688772j 1.4142135623730951j 1j 0j (1+0j) (1.4142135623730951+0j)
+
The iterator uses NumPy's casting rules to determine whether a specific
conversion is permitted. By default, it enforces 'safe' casting. This means,
@@ -405,8 +410,8 @@ which includes the input shapes to help diagnose the problem.
... print("%d:%d" % (x,y), end=' ')
...
Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- ValueError: operands could not be broadcast together with shapes (2) (2,3)
+ ...
+ ValueError: operands could not be broadcast together with shapes (2,) (2,3)
Iterator-Allocated Output Arrays
--------------------------------
@@ -482,9 +487,9 @@ reasons.
>>> square(np.arange(6).reshape(2,3), out=b)
Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- File "<stdin>", line 4, in square
- ValueError: non-broadcastable output operand with shape (3) doesn't match the broadcast shape (2,3)
+ ...
+ ValueError: non-broadcastable output operand with shape (3,) doesn't
+ match the broadcast shape (2,3)
Outer Product Iteration
-----------------------
@@ -550,7 +555,7 @@ For a simple example, consider taking the sum of all elements in an array.
>>> a = np.arange(24).reshape(2,3,4)
>>> b = np.array(0)
- >>> with np.nditer([a, b], flags=['reduce_ok', 'external_loop'],
+ >>> with np.nditer([a, b], flags=['reduce_ok'],
... op_flags=[['readonly'], ['readwrite']]) as it:
... for x,y in it:
... y[...] += x
@@ -568,7 +573,7 @@ sums along the last axis of `a`.
.. admonition:: Example
>>> a = np.arange(24).reshape(2,3,4)
- >>> it = np.nditer([a, None], flags=['reduce_ok', 'external_loop'],
+ >>> it = np.nditer([a, None], flags=['reduce_ok'],
... op_flags=[['readonly'], ['readwrite', 'allocate']],
... op_axes=[None, [0,1,-1]])
>>> with it:
@@ -602,7 +607,7 @@ buffering.
.. admonition:: Example
>>> a = np.arange(24).reshape(2,3,4)
- >>> it = np.nditer([a, None], flags=['reduce_ok', 'external_loop',
+ >>> it = np.nditer([a, None], flags=['reduce_ok',
... 'buffered', 'delay_bufalloc'],
... op_flags=[['readonly'], ['readwrite', 'allocate']],
... op_axes=[None, [0,1,-1]])
@@ -617,150 +622,8 @@ buffering.
array([[ 6, 22, 38],
[54, 70, 86]])
-Putting the Inner Loop in Cython
-================================
-
-Those who want really good performance out of their low level operations
-should strongly consider directly using the iteration API provided
-in C, but for those who are not comfortable with C or C++, Cython
-is a good middle ground with reasonable performance tradeoffs. For
-the :class:`nditer` object, this means letting the iterator take care
-of broadcasting, dtype conversion, and buffering, while giving the inner
-loop to Cython.
-
-For our example, we'll create a sum of squares function. To start,
-let's implement this function in straightforward Python. We want to
-support an 'axis' parameter similar to the numpy :func:`sum` function,
-so we will need to construct a list for the `op_axes` parameter.
-Here's how this looks.
-
-.. admonition:: Example
-
- >>> def axis_to_axeslist(axis, ndim):
- ... if axis is None:
- ... return [-1] * ndim
- ... else:
- ... if type(axis) is not tuple:
- ... axis = (axis,)
- ... axeslist = [1] * ndim
- ... for i in axis:
- ... axeslist[i] = -1
- ... ax = 0
- ... for i in range(ndim):
- ... if axeslist[i] != -1:
- ... axeslist[i] = ax
- ... ax += 1
- ... return axeslist
- ...
- >>> def sum_squares_py(arr, axis=None, out=None):
- ... axeslist = axis_to_axeslist(axis, arr.ndim)
- ... it = np.nditer([arr, out], flags=['reduce_ok', 'external_loop',
- ... 'buffered', 'delay_bufalloc'],
- ... op_flags=[['readonly'], ['readwrite', 'allocate']],
- ... op_axes=[None, axeslist],
- ... op_dtypes=['float64', 'float64'])
- ... with it:
- ... it.operands[1][...] = 0
- ... it.reset()
- ... for x, y in it:
- ... y[...] += x*x
- ... return it.operands[1]
- ...
- >>> a = np.arange(6).reshape(2,3)
- >>> sum_squares_py(a)
- array(55.0)
- >>> sum_squares_py(a, axis=-1)
- array([ 5., 50.])
-
-To Cython-ize this function, we replace the inner loop (y[...] += x*x) with
-Cython code that's specialized for the float64 dtype. With the
-'external_loop' flag enabled, the arrays provided to the inner loop will
-always be one-dimensional, so very little checking needs to be done.
-
-Here's the listing of sum_squares.pyx::
-
- import numpy as np
- cimport numpy as np
- cimport cython
-
- def axis_to_axeslist(axis, ndim):
- if axis is None:
- return [-1] * ndim
- else:
- if type(axis) is not tuple:
- axis = (axis,)
- axeslist = [1] * ndim
- for i in axis:
- axeslist[i] = -1
- ax = 0
- for i in range(ndim):
- if axeslist[i] != -1:
- axeslist[i] = ax
- ax += 1
- return axeslist
-
- @cython.boundscheck(False)
- def sum_squares_cy(arr, axis=None, out=None):
- cdef np.ndarray[double] x
- cdef np.ndarray[double] y
- cdef int size
- cdef double value
-
- axeslist = axis_to_axeslist(axis, arr.ndim)
- it = np.nditer([arr, out], flags=['reduce_ok', 'external_loop',
- 'buffered', 'delay_bufalloc'],
- op_flags=[['readonly'], ['readwrite', 'allocate']],
- op_axes=[None, axeslist],
- op_dtypes=['float64', 'float64'])
- with it:
- it.operands[1][...] = 0
- it.reset()
- for xarr, yarr in it:
- x = xarr
- y = yarr
- size = x.shape[0]
- for i in range(size):
- value = x[i]
- y[i] = y[i] + value * value
- return it.operands[1]
-
-On this machine, building the .pyx file into a module looked like the
-following, but you may have to find some Cython tutorials to tell you
-the specifics for your system configuration.::
-
- $ cython sum_squares.pyx
- $ gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -I/usr/include/python2.7 -fno-strict-aliasing -o sum_squares.so sum_squares.c
-
-Running this from the Python interpreter produces the same answers
-as our native Python/NumPy code did.
-
-.. admonition:: Example
-
- >>> from sum_squares import sum_squares_cy
- >>> a = np.arange(6).reshape(2,3)
- >>> sum_squares_cy(a)
- array(55.0)
- >>> sum_squares_cy(a, axis=-1)
- array([ 5., 50.])
-
-Doing a little timing in IPython shows that the reduced overhead and
-memory allocation of the Cython inner loop is providing a very nice
-speedup over both the straightforward Python code and an expression
-using NumPy's built-in sum function.::
-
- >>> a = np.random.rand(1000,1000)
-
- >>> timeit sum_squares_py(a, axis=-1)
- 10 loops, best of 3: 37.1 ms per loop
-
- >>> timeit np.sum(a*a, axis=-1)
- 10 loops, best of 3: 20.9 ms per loop
-
- >>> timeit sum_squares_cy(a, axis=-1)
- 100 loops, best of 3: 11.8 ms per loop
-
- >>> np.all(sum_squares_cy(a, axis=-1) == np.sum(a*a, axis=-1))
- True
+.. for doctests
+ Include Cython section separately. Those tests are skipped entirely via an
+ entry in RST_SKIPLIST
- >>> np.all(sum_squares_py(a, axis=-1) == np.sum(a*a, axis=-1))
- True
+.. include:: arrays.nditer.cython.rst
diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst
index 46af1b45b..22b15fc57 100644
--- a/doc/source/reference/c-api/array.rst
+++ b/doc/source/reference/c-api/array.rst
@@ -834,7 +834,7 @@ General check of Python Type
.. c:function:: PyArray_IsPythonScalar(op)
Evaluates true if *op* is a builtin Python scalar object (int,
- float, complex, str, unicode, long, bool).
+ float, complex, bytes, str, long, bool).
.. c:function:: PyArray_IsAnyScalar(op)
diff --git a/doc/source/reference/c-api/index.rst b/doc/source/reference/c-api/index.rst
index 56fe8e473..bb1ed154e 100644
--- a/doc/source/reference/c-api/index.rst
+++ b/doc/source/reference/c-api/index.rst
@@ -21,7 +21,7 @@ experience at first. Be assured that the task becomes easier with
practice, and you may be surprised at how simple the C-code can be to
understand. Even if you don't think you can write C-code from scratch,
it is much easier to understand and modify already-written source code
-then create it *de novo*.
+than create it *de novo*.
Python extensions are especially straightforward to understand because
they all have a very similar structure. Admittedly, NumPy is not a
diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst
index 6742d605a..d042a1849 100644
--- a/doc/source/reference/index.rst
+++ b/doc/source/reference/index.rst
@@ -12,7 +12,7 @@ NumPy Reference
This reference manual details functions, modules, and objects
included in NumPy, describing what they are and what they do.
-For learning how to use NumPy, see also :ref:`user`.
+For learning how to use NumPy, see the :ref:`complete documentation <manual>`.
.. toctree::
diff --git a/doc/source/reference/maskedarray.baseclass.rst b/doc/source/reference/maskedarray.baseclass.rst
index 9864f21ea..57bbaa8f8 100644
--- a/doc/source/reference/maskedarray.baseclass.rst
+++ b/doc/source/reference/maskedarray.baseclass.rst
@@ -125,7 +125,6 @@ Conversion
MaskedArray.__float__
MaskedArray.__int__
- MaskedArray.__long__
MaskedArray.view
MaskedArray.astype
diff --git a/doc/source/reference/routines.ma.rst b/doc/source/reference/routines.ma.rst
index 5b2098c7a..346ce2a1b 100644
--- a/doc/source/reference/routines.ma.rst
+++ b/doc/source/reference/routines.ma.rst
@@ -396,6 +396,7 @@ Miscellanea
ma.allequal
ma.allclose
ma.apply_along_axis
+ ma.apply_over_axes
ma.arange
ma.choose
ma.ediff1d