From 6647bf7eaeb915e2d09db8b5c7584ee286962d3b Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Tue, 5 Aug 2008 09:20:07 +0000 Subject: Merge from documentation editor. --- numpy/doc/reference/basics.py | 132 ++++++++++- numpy/doc/reference/broadcasting.py | 171 +++++++++++++- numpy/doc/reference/creation.py | 127 ++++++++++- numpy/doc/reference/glossary.py | 362 ++++++++++++++++++++++++++++- numpy/doc/reference/indexing.py | 379 ++++++++++++++++++++++++++++++- numpy/doc/reference/internals.py | 157 ++++++++++++- numpy/doc/reference/structured_arrays.py | 175 +++++++++++++- numpy/doc/reference/ufuncs.py | 130 ++++++++++- numpy/doc/reference/zen.py | 9 - 9 files changed, 1615 insertions(+), 27 deletions(-) delete mode 100644 numpy/doc/reference/zen.py (limited to 'numpy/doc') diff --git a/numpy/doc/reference/basics.py b/numpy/doc/reference/basics.py index 6b86f6741..dfb8fe74d 100644 --- a/numpy/doc/reference/basics.py +++ b/numpy/doc/reference/basics.py @@ -1,9 +1,137 @@ """ - ============ Array basics ============ -Placeholder for array basics documentation. +Array types and conversions between types +========================================= + +Numpy supports a much greater variety of numerical types than Python does. +This section shows which are available, and how to modify an array's data-type. + +========== ========================================================= +Data type Description +========== ========================================================= +bool Boolean (True or False) stored as a byte +int Platform integer (normally either ``int32`` or ``int64``) +int8 Byte (-128 to 127) +int16 Integer (-32768 to 32767) +int32 Integer (-2147483648 to 2147483647) +int64 Integer (9223372036854775808 to 9223372036854775807) +uint8 Unsigned integer (0 to 255) +uint16 Unsigned integer (0 to 65535) +uint32 Unsigned integer (0 to 4294967295) +uint64 Unsigned integer (0 to 18446744073709551615) +float Shorthand for ``float64``. +float32 Single precision float: sign bit, 8 bits exponent, + 23 bits mantissa +float64 Double precision float: sign bit, 11 bits exponent, + 52 bits mantissa +complex Shorthand for ``complex128``. +complex64 Complex number, represented by two 32-bit floats (real + and imaginary components) +complex128 Complex number, represented by two 64-bit floats (real + and imaginary components) +========== ========================================================= + +Numpy numerical types are instances of ``dtype`` (data-type) objects, each +having unique characteristics. Once you have imported NumPy using + + :: + + >>> import numpy as np + +the dtypes are available as ``np.bool``, ``np.float32``, etc. + +Advanced types, not listed in the table above, are explored in +section `link_here`. + +There are 5 basic numerical types representing booleans (bool), integers (int), +unsigned integers (uint) floating point (float) and complex. Those with numbers +in their name indicate the bitsize of the type (i.e. how many bits are needed +to represent a single value in memory). Some types, such as ``int`` and +``intp``, have differing bitsizes, dependent on the platforms (e.g. 32-bit +vs. 64-bit machines). This should be taken into account when interfacing +with low-level code (such as C or Fortran) where the raw memory is addressed. + +Data-types can be used as functions to convert python numbers to array scalars +(see the array scalar section for an explanation), python sequences of numbers +to arrays of that type, or as arguments to the dtype keyword that many numpy +functions or methods accept. Some examples:: + + >>> import numpy as np + >>> x = np.float32(1.0) + >>> x + 1.0 + >>> y = np.int_([1,2,4]) + >>> y + array([1, 2, 4]) + >>> z = np.arange(3, dtype=np.uint8) + array([0, 1, 2], dtype=uint8) + +Array types can also be referred to by character codes, mostly to retain +backward compatibility with older packages such as Numeric. Some +documentation may still refer to these, for example:: + + >>> np.array([1, 2, 3], dtype='f') + array([ 1., 2., 3.], dtype=float32) + +We recommend using dtype objects instead. + +To convert the type of an array, use the .astype() method (preferred) or +the type itself as a function. For example: :: + + >>> z.astype(float) + array([0., 1., 2.]) + >>> np.int8(z) + array([0, 1, 2], dtype=int8) + +Note that, above, we use the *Python* float object as a dtype. NumPy knows +that ``int`` refers to ``np.int``, ``bool`` means ``np.bool`` and +that ``float`` is ``np.float``. The other data-types do not have Python +equivalents. + +To determine the type of an array, look at the dtype attribute:: + + >>> z.dtype + dtype('uint8') + +dtype objects also contain information about the type, such as its bit-width +and its byte-order. See xxx for details. The data type can also be used +indirectly to query properties of the type, such as whether it is an integer:: + + >>> d = np.dtype(int) + >>> d + dtype('int32') + + >>> np.issubdtype(d, int) + True + + >>> np.issubdtype(d, float) + False + + +Array Scalars +============= + +Numpy generally returns elements of arrays as array scalars (a scalar +with an associated dtype). Array scalars differ from Python scalars, but +for the most part they can be used interchangeably (the primary +exception is for versions of Python older than v2.x, where integer array +scalars cannot act as indices for lists and tuples). There are some +exceptions, such as when code requires very specific attributes of a scalar +or when it checks specifically whether a value is a Python scalar. Generally, +problems are easily fixed by explicitly converting array scalars +to Python scalars, using the corresponding Python type function +(e.g., ``int``, ``float``, ``complex``, ``str``, ``unicode``). + +The primary advantage of using array scalars is that +they preserve the array type (Python may not have a matching scalar type +available, e.g. ``int16``). Therefore, the use of array scalars ensures +identical behaviour between arrays and scalars, irrespective of whether the +value is inside an array or not. NumPy scalars also have many of the same +methods arrays do. + +See xxx for details. """ diff --git a/numpy/doc/reference/broadcasting.py b/numpy/doc/reference/broadcasting.py index 797d0edba..95e9b67f9 100644 --- a/numpy/doc/reference/broadcasting.py +++ b/numpy/doc/reference/broadcasting.py @@ -1,9 +1,176 @@ """ - ======================== Broadcasting over arrays ======================== -Placeholder for broadcasting documentation. +The term broadcasting describes how numpy treats arrays with different +shapes during arithmetic operations. Subject to certain constraints, +the smaller array is "broadcast" across the larger array so that they +have compatible shapes. Broadcasting provides a means of vectorizing +array operations so that looping occurs in C instead of Python. It does +this without making needless copies of data and usually leads to +efficient algorithm implementations. There are, however, cases where +broadcasting is a bad idea because it leads to inefficient use of memory +that slows computation. + +NumPy operations are usually done element-by-element, which requires two +arrays to have exactly the same shape:: + + >>> a = np.array([1.0, 2.0, 3.0]) + >>> b = np.array([2.0, 2.0, 2.0]) + >>> a * b + array([ 2., 4., 6.]) + +NumPy's broadcasting rule relaxes this constraint when the arrays' +shapes meet certain constraints. The simplest broadcasting example occurs +when an array and a scalar value are combined in an operation: + +>>> a = np.array([1.0, 2.0, 3.0]) +>>> b = 2.0 +>>> a * b +array([ 2., 4., 6.]) + +The result is equivalent to the previous example where ``b`` was an array. +We can think of the scalar ``b`` being *stretched* during the arithmetic +operation into an array with the same shape as ``a``. The new elements in +``b`` are simply copies of the original scalar. The stretching analogy is +only conceptual. NumPy is smart enough to use the original scalar value +without actually making copies, so that broadcasting operations are as +memory and computationally efficient as possible. + +The second example is more effective than the first, since here broadcasting +moves less memory around during the multiplication (``b`` is a scalar, +not an array). + +General Broadcasting Rules +========================== +When operating on two arrays, NumPy compares their shapes element-wise. +It starts with the trailing dimensions, and works its way forward. Two +dimensions are compatible when + +1) they are equal, or +2) one of them is 1 + +If these conditions are not met, a +``ValueError: frames are not aligned`` exception is thrown, indicating that +the arrays have incompatible shapes. The size of the resulting array +is the maximum size along each dimension of the input arrays. + +Arrays do not need to have the same *number* of dimensions. For example, +if you have a ``256x256x3`` array of RGB values, and you want to scale +each color in the image by a different value, you can multiply the image +by a one-dimensional array with 3 values. Lining up the sizes of the +trailing axes of these arrays according to the broadcast rules, shows that +they are compatible:: + + Image (3d array): 256 x 256 x 3 + Scale (1d array): 3 + Result (3d array): 256 x 256 x 3 + +When either of the dimensions compared is one, the larger of the two is +used. In other words, the smaller of two axes is stretched or "copied" +to match the other. + +In the following example, both the ``A`` and ``B`` arrays have axes with +length one that are expanded to a larger size during the broadcast +operation:: + + A (4d array): 8 x 1 x 6 x 1 + B (3d array): 7 x 1 x 5 + Result (4d array): 8 x 7 x 6 x 5 + +Here are some more examples:: + + A (2d array): 5 x 4 + B (1d array): 1 + Result (2d array): 5 x 4 + + A (2d array): 5 x 4 + B (1d array): 4 + Result (2d array): 5 x 4 + + A (3d array): 15 x 3 x 5 + B (3d array): 15 x 1 x 5 + Result (3d array): 15 x 3 x 5 + + A (3d array): 15 x 3 x 5 + B (2d array): 3 x 5 + Result (3d array): 15 x 3 x 5 + + A (3d array): 15 x 3 x 5 + B (2d array): 3 x 1 + Result (3d array): 15 x 3 x 5 + +Here are examples of shapes that do not broadcast:: + + A (1d array): 3 + B (1d array): 4 # trailing dimensions do not match + + A (2d array): 2 x 1 + B (3d array): 8 x 4 x 3 # second from last dimensions mismatch + +An example of broadcasting in practice:: + + >>> x = np.arange(4) + >>> xx = x.reshape(4,1) + >>> y = np.ones(5) + >>> z = np.ones((3,4)) + + >>> x.shape + (4,) + + >>> y.shape + (5,) + + >>> x + y + : shape mismatch: objects cannot be broadcast to a single shape + + >>> xx.shape + (4, 1) + + >>> y.shape + (5,) + + >>> (xx + y).shape + (4, 5) + + >>> xx + y + array([[ 1., 1., 1., 1., 1.], + [ 2., 2., 2., 2., 2.], + [ 3., 3., 3., 3., 3.], + [ 4., 4., 4., 4., 4.]]) + + >>> x.shape + (4,) + + >>> z.shape + (3, 4) + + >>> (x + z).shape + (3, 4) + + >>> x + z + array([[ 1., 2., 3., 4.], + [ 1., 2., 3., 4.], + [ 1., 2., 3., 4.]]) + +Broadcasting provides a convenient way of taking the outer product (or +any other outer operation) of two arrays. The following example shows an +outer addition operation of two 1-d arrays:: + + >>> a = np.array([0.0, 10.0, 20.0, 30.0]) + >>> b = np.array([1.0, 2.0, 3.0]) + >>> a[:, np.newaxis] + b + array([[ 1., 2., 3.], + [ 11., 12., 13.], + [ 21., 22., 23.], + [ 31., 32., 33.]]) + +Here the ``newaxis`` index operator inserts a new axis into ``a``, +making it a two-dimensional ``4x1`` array. Combining the ``4x1`` array +with ``b``, which has shape ``(3,)``, yields a ``4x3`` array. + +See `this article `_ +for illustrations of broadcasting concepts. """ diff --git a/numpy/doc/reference/creation.py b/numpy/doc/reference/creation.py index 052cd86d7..1e80e5115 100644 --- a/numpy/doc/reference/creation.py +++ b/numpy/doc/reference/creation.py @@ -1,9 +1,132 @@ """ - ============== Array creation ============== -Placeholder for array creation documentation. +Introduction +============ + +There are 5 general mechanisms for creating arrays: + +1) Conversion from other Python structures (e.g., lists, tuples) +2) Intrinsic numpy array array creation objects (e.g., arange, ones, zeros, etc.) +3) Reading arrays from disk, either from standard or custom formats +4) Creating arrays from raw bytes through the use of strings or buffers +5) Use of special library functions (e.g., random) + +This section will not cover means of replicating, joining, or otherwise +expanding or mutating existing arrays. Nor will it cover creating object +arrays or record arrays. Both of those are covered in their own sections. + +Converting Python array-like objects to numpy arrays +==================================================== + +In general, numerical data arranged in an array-like structure in Python can +be converted to arrays through the use of the array() function. The most obvious +examples are lists and tuples. See the documentation for array() for details for +its use. Some +objects may support the array-protocol and allow conversion to arrays this +way. A simple way to find out if the object can be converted to a numpy array +using array() is simply to try it interactively and see if it works! (The +Python Way). + +Examples: :: + + >>> x = np.array([2,3,1,0]) + >>> x = np.array([2, 3, 1, 0]) + >>> x = np.array([[1,2.0],[0,0],(1+1j,3.)]) # note mix of tuple and lists, and types + >>> x = np.array([[ 1.+0.j, 2.+0.j], [ 0.+0.j, 0.+0.j], [ 1.+1.j, 3.+0.j]]) + +Intrinsic numpy array creation +============================== + +Numpy has built-in functions for creating arrays from scratch: + +zeros(shape) will create an array filled with 0 values with the specified +shape. The default dtype is float64. + +``>>> np.zeros((2, 3)) +array([[ 0., 0., 0.], [ 0., 0., 0.]])`` + +ones(shape) will create an array filled with 1 values. It is identical to +zeros in all other respects. + +arange() will create arrays with regularly incrementing values. Check the +docstring for complete information on the various ways it can be used. A few +examples will be given here: :: + + >>> np.arange(10) + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> np.arange(2, 10, dtype=np.float) + array([ 2., 3., 4., 5., 6., 7., 8., 9.]) + >>> np.arange(2, 3, 0.1) + array([ 2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9]) + +Note that there are some subtleties regarding the last usage that the user +should be aware of that are described in the arange docstring. + +indices() will create a set of arrays (stacked as a one-higher dimensioned +array), one per dimension with each representing variation in that dimension. +An examples illustrates much better than a verbal description: :: + + >>> np.indices((3,3)) + array([[[0, 0, 0], [1, 1, 1], [2, 2, 2]], [[0, 1, 2], [0, 1, 2], [0, 1, 2]]]) + +This is particularly useful for evaluating functions of multiple dimensions on +a regular grid. + +Reading arrays from disk +======================== + +This is presumably the most common case of large array creation. The details, +of course, depend greatly on the format of data on disk and so this section +can only give general pointers on how to handle various formats. + +Standard binary formats +----------------------- + +Various fields have standard formats for array data. The following lists the +ones with known python libraries to read them and return numpy arrays (there +may be others for which it is possible to read and convert to numpy arrays so +check the last section as well) + +HDF5: PyTables +FITS: PyFITS +Others? xxx + +Examples of formats that cannot be read directly but for which it is not hard +to convert are libraries like PIL (able to read and write many image formats +such as jpg, png, etc). + +Common ascii formats +-------------------- + +Comma Separated Value files (CSV) are widely used (and an export and import +option for programs like Excel). There are a number of ways of reading these +files in Python. The most convenient ways of reading these are found in pylab +(part of matplotlib) in the xxx function. (list alternatives xxx) + +More generic ascii files can be read using the io package in scipy. xxx a few +more details needed... + +Custom binary formats +--------------------- + +There are a variety of approaches one can use. If the file has a relatively +simple format then one can write a simple I/O library and use the numpy +fromfile() function and .tofile() method to read and write numpy arrays +directly (mind your byteorder though!) If a good C or C++ library exists that +read the data, one can wrap that library with a variety of techniques (see +xxx) though that certainly is much more work and requires significantly more +advanced knowledge to interface with C or C++. + +Use of special libraries +------------------------ + +There are libraries that can be used to generate arrays for special purposes +and it isn't possible to enumerate all of them. The most common uses are use +of the many array generation functions in random that can generate arrays of +random values, and some utility functions to generate special matrices (e.g. +diagonal, see xxx) """ diff --git a/numpy/doc/reference/glossary.py b/numpy/doc/reference/glossary.py index c060378d4..6a182adf4 100644 --- a/numpy/doc/reference/glossary.py +++ b/numpy/doc/reference/glossary.py @@ -1,9 +1,367 @@ """ - ================= Glossary ================= -Place-holder for a glossary. +along an axis + Axes are defined for arrays with more than one dimension. A + 2-dimensional array has two corresponding axes: the first running + vertically downwards across rows (axis 0), and the second running + horizontally across columns (axis 1). + + Many operation can take place along one of these axes. For example, + we can sum each row of an array, in which case we operate along + columns, or axis 1:: + + >>> x = np.arange(12).reshape((3,4)) + + >>> x + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + + >>> x.sum(axis=1) + array([ 6, 22, 38]) + +array or ndarray + A homogeneous container of numerical elements. Each element in the + array occupies a fixed amount of memory (hence homogeneous), and + can be a numerical element of a single type (such as float, int + or complex) or a combination (such as ``(float, int, float)``). Each + array has an associated data-type (or ``dtype``), which describes + the numerical type of its elements:: + + >>> x = np.array([1, 2, 3], float) + + >>> x + array([ 1., 2., 3.]) + + >>> x.dtype # floating point number, 64 bits of memory per element + dtype('float64') + + + # More complicated data type: each array element is a combination of + # and integer and a floating point number + >>> np.array([(1, 2.0), (3, 4.0)], dtype=[('x', int), ('y', float)]) + array([(1, 2.0), (3, 4.0)], + dtype=[('x', '>> x = np.array([1, 2, 3]) + >>> x.shape + (3,) + +broadcast + NumPy can do operations on arrays whose shapes are mismatched:: + + >>> x = np.array([1, 2]) + >>> y = np.array([[3], [4]]) + + >>> x + array([1, 2]) + + >>> y + array([[3], + [4]]) + + >>> x + y + array([[4, 5], + [5, 6]]) + + See `doc.broadcasting`_ for more information. + +decorator + An operator that transforms a function. For example, a ``log`` + decorator may be defined to print debugging information upon + function execution:: + + >>> def log(f): + ... def new_logging_func(*args, **kwargs): + ... print "Logging call with parameters:", args, kwargs + ... return f(*args, **kwargs) + ... + ... return new_logging_func + + Now, when we define a function, we can "decorate" it using ``log``:: + + >>> @log + ... def add(a, b): + ... return a + b + + Calling ``add`` then yields: + + >>> add(1, 2) + Logging call with parameters: (1, 2) {} + 3 + +dictionary + Resembling a language dictionary, which provides a mapping between + words and descriptions thereof, a Python dictionary is a mapping + between two objects:: + + >>> x = {1: 'one', 'two': [1, 2]} + + Here, `x` is a dictionary mapping keys to values, in this case + the integer 1 to the string "one", and the string "two" to + the list ``[1, 2]``. The values may be accessed using their + corresponding keys:: + + >>> x[1] + 'one' + + >>> x['two'] + [1, 2] + + Note that dictionaries are not stored in any specific order. Also, + most mutable (see *immutable* below) objects, such as lists, may not + be used as keys. + + For more information on dictionaries, read the + `Python tutorial `_. + +immutable + An object that cannot be modified after execution is called + immutable. Two common examples are strings and tuples. + +instance + A class definition gives the blueprint for constructing an object:: + + >>> class House(object): + ... wall_colour = 'white' + + Yet, we have to *build* a house before it exists:: + + >>> h = House() # build a house + + Now, ``h`` is called a ``House`` instance. An instance is therefore + a specific realisation of a class. + +iterable + A sequence that allows "walking" (iterating) over items, typically + using a loop such as:: + + >>> x = [1, 2, 3] + >>> [item**2 for item in x] + [1, 4, 9] + + It is often used in combintion with ``enumerate``:: + + >>> for n, k in enumerate(keys): + ... print "Key %d: %s" % (n, k) + ... + Key 0: a + Key 1: b + Key 2: c + +list + A Python container that can hold any number of objects or items. + The items do not have to be of the same type, and can even be + lists themselves:: + + >>> x = [2, 2.0, "two", [2, 2.0]] + + The list `x` contains 4 items, each which can be accessed individually:: + + >>> x[2] # the string 'two' + 'two' + + >>> x[3] # a list, containing an integer 2 and a float 2.0 + [2, 2.0] + + It is also possible to select more than one item at a time, + using *slicing*:: + + >>> x[0:2] # or, equivalently, x[:2] + [2, 2.0] + + In code, arrays are often conveniently expressed as nested lists:: + + + >>> np.array([[1, 2], [3, 4]]) + array([[1, 2], + [3, 4]]) + + For more information, read the section on lists in the `Python + tutorial `_. For a mapping + type (key-value), see *dictionary*. + +mask + A boolean array, used to select only certain elements for an operation:: + + >>> x = np.arange(5) + >>> x + array([0, 1, 2, 3, 4]) + + >>> mask = (x > 2) + >>> mask + array([False, False, False, True, True], dtype=bool) + + >>> x[mask] = -1 + >>> x + array([ 0, 1, 2, -1, -1]) + +masked array + Array that suppressed values indicated by a mask:: + + >>> x = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) + >>> x + masked_array(data = [-- 2.0 --], + mask = [ True False True], + fill_value=1e+20) + + >>> x + [1, 2, 3] + masked_array(data = [-- 4.0 --], + mask = [ True False True], + fill_value=1e+20) + + Masked arrays are often used when operating on arrays containing + missing or invalid entries. + +matrix + A 2-dimensional ndarray that preserves its two-dimensional nature + throughout operations. It has certain special operations, such as ``*`` + (matrix multiplication) and ``**`` (matrix power), defined:: + + >>> x = np.mat([[1, 2], [3, 4]]) + + >>> x + matrix([[1, 2], + [3, 4]]) + + >>> x**2 + matrix([[ 7, 10], + [15, 22]]) + +method + A function associated with an object. For example, each ndarray has a + method called ``repeat``:: + + >>> x = np.array([1, 2, 3]) + + >>> x.repeat(2) + array([1, 1, 2, 2, 3, 3]) + +reference + If ``a`` is a reference to ``b``, then ``(a is b) == True``. Therefore, + ``a`` and ``b`` are different names for the same Python object. + +self + Often seen in method signatures, ``self`` refers to the instance + of the associated class. For example: + + >>> class Paintbrush(object): + ... color = 'blue' + ... + ... def paint(self): + ... print "Painting the city %s!" % self.color + ... + >>> p = Paintbrush() + >>> p.color = 'red' + >>> p.paint() # self refers to 'p' + Painting the city red! + +slice + Used to select only certain elements from a sequence:: + + >>> x = range(5) + >>> x + [0, 1, 2, 3, 4] + + >>> x[1:3] # slice from 1 to 3 (excluding 3 itself) + [1, 2] + + >>> x[1:5:2] # slice from 1 to 5, but skipping every second element + [1, 3] + + >>> x[::-1] # slice a sequence in reverse + [4, 3, 2, 1, 0] + + Arrays may have more than one dimension, each which can be sliced + individually:: + + >>> x = np.array([[1, 2], [3, 4]]) + >>> x + array([[1, 2], + [3, 4]]) + + >>> x[:, 1] + array([2, 4]) + +tuple + A sequence that may contain a variable number of types of any + kind. A tuple is immutable, i.e., once constructed it cannot be + changed. Similar to a list, it can be indexed and sliced:: + + >>> x = (1, 'one', [1, 2]) + + >>> x + (1, 'one', [1, 2]) + + >>> x[0] + 1 + + >>> x[:2] + (1, 'one') + + A useful concept is "tuple unpacking", which allows variables to + be assigned to the contents of a tuple:: + + >>> x, y = (1, 2) + >>> x, y = 1, 2 + + This is often used when a function returns multiple values: + + >>> def return_many(): + ... return 1, 'alpha' + + >>> a, b, c = return_many() + >>> a, b, c + (1, 'alpha', None) + + >>> a + 1 + >>> b + 'alpha' + +ufunc + Universal function. A fast element-wise array operation. Examples include + ``add``, ``sin`` and ``logical_or``. + +view + An array that does not own its data, but refers to another array's + data instead. For example, we may create a view that only shows + every second element of another array:: + + >>> x = np.arange(5) + >>> x + array([0, 1, 2, 3, 4]) + + >>> y = x[::2] + >>> y + array([0, 2, 4]) + + >>> x[0] = 3 # changing x changes y as well, since y is a view on x + >>> y + array([3, 2, 4]) + +wrapper + Python is a high-level (highly abstracted, or English-like) language. + This abstraction comes at a price in execution speed, and sometimes + it becomes necessary to use lower level languages to do fast + computations. A wrapper is code that provides a bridge between + high and the low level languages, allowing, e.g., Python to execute + code written in C or Fortran. + + Examples include ctypes, SWIG and Cython (which wraps C and C++) + and f2py (which wraps Fortran). """ diff --git a/numpy/doc/reference/indexing.py b/numpy/doc/reference/indexing.py index bc13611e8..365edd67a 100644 --- a/numpy/doc/reference/indexing.py +++ b/numpy/doc/reference/indexing.py @@ -1,9 +1,384 @@ """ - ============== Array indexing ============== -Placeholder for array indexing documentation. +Array indexing refers to any use of the square brackets ([]) to index +array values. There are many options to indexing, which give numpy +indexing great power, but with power comes some complexity and the +potential for confusion. This section is just an overview of the +various options and issues related to indexing. Aside from single +element indexing, the details on most of these options are to be +found in related sections. + +Assignment vs referencing +========================= + +Most of the following examples show the use of indexing when referencing +data in an array. The examples work just as well when assigning to an +array. See the section at the end for specific examples and explanations +on how assignments work. + +Single element indexing +======================= + +Single element indexing for a 1-D array is what one expects. It work +exactly like that for other standard Python sequences. It is 0-based, +and accepts negative indices for indexing from the end of the array. :: + + >>> x = np.arange(10) + >>> x[2] + 2 + >>> x[-2] + 8 + +Unlike lists and tuples, numpy arrays support multidimensional indexing +for multidimensional arrays. That means that it is not necessary to +separate each dimension's index into its own set of square brackets. :: + + >>> x.shape = (2,5) # now x is 2-dimensional + >>> x[1,3] + 8 + >>> x[1,-1] + 9 + +Note that if one indexes a multidimensional array with fewer indices +than dimensions, one gets a subdimensional array. For example: :: + + >>> x[0] + array([0, 1, 2, 3, 4]) + +That is, each index specified selects the array corresponding to the rest +of the dimensions selected. In the above example, choosing 0 means that +remaining dimension of lenth 5 is being left unspecified, and that what +is returned is an array of that dimensionality and size. It must be noted +that the returned array is not a copy of the original, but points to the +same values in memory as does the original array (a new view of the same +data in other words, see xxx for details). In this case, +the 1-D array at the first position (0) is returned. So using a single +index on the returned array, results in a single element being returned. +That is: :: + + >>> x[0][2] + 2 + +So note that ``x[0,2] = x[0][2]`` though the second case is more inefficient +a new temporary array is created after the first index that is subsequently +indexed by 2. + +Note to those used to IDL or Fortran memory order as it relates to indexing. +Numpy uses C-order indexing. That means that the last index usually (see +xxx for exceptions) represents the most rapidly changing memory location, +unlike Fortran or IDL, where the first index represents the most rapidly +changing location in memory. This difference represents a great potential +for confusion. + +Other indexing options +====================== + +It is possible to slice and stride arrays to extract arrays of the same +number of dimensions, but of different sizes than the original. The slicing +and striding works exactly the same way it does for lists and tuples except +that they can be applied to multiple dimensions as well. A few +examples illustrates best: :: + + >>> x = np.arange(10) + >>> x[2:5] + array([2, 3, 4]) + >>> x[:-7] + array([0, 1, 2]) + >>> x[1:7:2] + array([1,3,5]) + >>> y = np.arange(35).reshape(5,7) + >>> y[1:5:2,::3] + array([[ 7, 10, 13], + [21, 24, 27]]) + +Note that slices of arrays do not copy the internal array data but +also produce new views of the original data (see xxx for more +explanation of this issue). + +It is possible to index arrays with other arrays for the purposes of +selecting lists of values out of arrays into new arrays. There are two +different ways of accomplishing this. One uses one or more arrays of +index values (see xxx for details). The other involves giving a boolean +array of the proper shape to indicate the values to be selected. +Index arrays are a very powerful tool that allow one to avoid looping +over individual elements in arrays and thus greatly improve performance +(see xxx for examples) + +It is possible to use special features to effectively increase the +number of dimensions in an array through indexing so the resulting +array aquires the shape needed for use in an expression or with a +specific function. See xxx. + +Index arrays +============ + +Numpy arrays may be indexed with other arrays (or any other sequence-like +object that can be converted to an array, such as lists, with the exception +of tuples; see the end of this document for why this is). The use of index +arrays ranges from simple, straightforward cases to complex, hard-to-understand +cases. For all cases of index arrays, what is returned is a copy of the +original data, not a view as one gets for slices. + +Index arrays must be of integer type. Each value in the array indicates which +value in the array to use in place of the index. To illustrate: :: + + >>> x = np.arange(10,1,-1) + >>> x + array([10, 9, 8, 7, 6, 5, 4, 3, 2]) + >>> x[np.array([3, 3, 1, 8])] + array([7, 7, 9, 2]) + + +The index array consisting of the values 3, 3, 1 and 8 correspondingly create +an array of length 4 (same as the index array) where each index is replaced by +the value the index array has in the array being indexed. + +Negative values are permitted and work as they do with single indices or slices: :: + + >>> x[np.array([3,3,-3,8])] + array([7, 7, 4, 2]) + +It is an error to have index values out of bounds: :: + + >>> x[np.array([3, 3, 20, 8])] + : index 20 out of bounds 0<=index<9 + +Generally speaking, what is returned when index arrays are used is an array with +the same shape as the index array, but with the type and values of the array being +indexed. As an example, we can use a multidimensional index array instead: :: + + >>> x[np.array([[1,1],[2,3]])] + array([[9, 9], + [8, 7]]) + +Indexing Multi-dimensional arrays +================================= + +Things become more complex when multidimensional arrays are indexed, particularly +with multidimensional index arrays. These tend to be more unusal uses, but they +are permitted, and they are useful for some problems. We'll start with the +simplest multidimensional case (using the array y from the previous examples): :: + + >>> y[np.array([0,2,4]), np.array([0,1,2])] + array([ 0, 15, 30]) + +In this case, if the index arrays have a matching shape, and there is an index +array for each dimension of the array being indexed, the resultant array has the +same shape as the index arrays, and the values correspond to the index set for each +position in the index arrays. In this example, the first index value is 0 for both +index arrays, and thus the first value of the resultant array is y[0,0]. The next +value is y[2,1], and the last is y[4,2]. + +If the index arrays do not have the same shape, there is an attempt to broadcast +them to the same shape. Broadcasting won't be discussed here but is discussed in +detail in xxx. If they cannot be broadcast to the same shape, an exception is +raised: :: + + >>> y[np.array([0,2,4]), np.array([0,1])] + : shape mismatch: objects cannot be broadcast to a single shape + +The broadcasting mechanism permits index arrays to be combined with scalars for +other indices. The effect is that the scalar value is used for all the corresponding +values of the index arrays: :: + + >>> y[np.array([0,2,4]), 1] + array([ 1, 15, 29]) + +Jumping to the next level of complexity, it is possible to only partially index an array +with index arrays. It takes a bit of thought to understand what happens in such cases. +For example if we just use one index array with y: :: + + >>> y[np.array([0,2,4])] + array([[ 0, 1, 2, 3, 4, 5, 6], + [14, 15, 16, 17, 18, 19, 20], + [28, 29, 30, 31, 32, 33, 34]]) + +What results is the construction of a new array where each value of the index array +selects one row from the array being indexed and the resultant array has the resulting +shape (size of row, number index elements). + +An example of where this may be useful is for a color lookup table where we want to map +the values of an image into RGB triples for display. The lookup table could have a shape +(nlookup, 3). Indexing such an array with an image with shape (ny, nx) with dtype=np.uint8 +(or any integer type so long as values are with the bounds of the lookup table) will +result in an array of shape (ny, nx, 3) where a triple of RGB values is associated with +each pixel location. + +In general, the shape of the resulant array will be the concatenation of the shape of +the index array (or the shape that all the index arrays were broadcast to) with the +shape of any unused dimensions (those not indexed) in the array being indexed. + +Boolean or "mask" index arrays +============================== + +Boolean arrays used as indices are treated in a different manner entirely than index +arrays. Boolean arrays must be of the same shape as the array being indexed, or +broadcastable to the same shape. In the most straightforward case, the boolean array +has the same shape: :: + + >>> b = y>20 + >>> y[b] + array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]) + +The result is a 1-D array containing all the elements in the indexed array corresponding +to all the true elements in the boolean array. As with index arrays, what is returned +is a copy of the data, not a view as one gets with slices. + +With broadcasting, multidimesional arrays may be the result. For example: :: + + >>> b[:,5] # use a 1-D boolean that broadcasts with y + array([False, False, False, True, True], dtype=bool) + >>> y[b[:,5]] + array([[21, 22, 23, 24, 25, 26, 27], + [28, 29, 30, 31, 32, 33, 34]]) + +Here the 4th and 5th rows are selected from the indexed array and combined to make a +2-D array. + +Combining index arrays with slices +================================== + +Index arrays may be combined with slices. For example: :: + + >>> y[np.array([0,2,4]),1:3] + array([[ 1, 2], + [15, 16], + [29, 30]]) + +In effect, the slice is converted to an index array np.array([[1,2]]) (shape (1,2)) that is +broadcast with the index array to produce a resultant array of shape (3,2). + +Likewise, slicing can be combined with broadcasted boolean indices: :: + + >>> y[b[:,5],1:3] + array([[22, 23], + [29, 30]]) + +Structural indexing tools +========================= + +To facilitate easy matching of array shapes with expressions and in +assignments, the np.newaxis object can be used within array indices +to add new dimensions with a size of 1. For example: :: + + >>> y.shape + (5, 7) + >>> y[:,np.newaxis,:].shape + (5, 1, 7) + +Note that there are no new elements in the array, just that the +dimensionality is increased. This can be handy to combine two +arrays in a way that otherwise would require explicitly reshaping +operations. For example: :: + + >>> x = np.arange(5) + >>> x[:,np.newaxis] + x[np.newaxis,:] + array([[0, 1, 2, 3, 4], + [1, 2, 3, 4, 5], + [2, 3, 4, 5, 6], + [3, 4, 5, 6, 7], + [4, 5, 6, 7, 8]]) + +The ellipsis syntax maybe used to indicate selecting in full any +remaining unspecified dimensions. For example: :: + + >>> z = np.arange(81).reshape(3,3,3,3) + >>> z[1,...,2] + array([[29, 32, 35], + [38, 41, 44], + [47, 50, 53]]) + +This is equivalent to: :: + + >>> z[1,:,:,2] + +Assigning values to indexed arrays +================================== + +As mentioned, one can select a subset of an array to assign to using +a single index, slices, and index and mask arrays. The value being +assigned to the indexed array must be shape consistent (the same shape +or broadcastable to the shape the index produces). For example, it is +permitted to assign a constant to a slice: :: + + >>> x[2:7] = 1 + +or an array of the right size: :: + + >>> x[2:7] = np.arange(5) + +Note that assignments may result in changes if assigning +higher types to lower types (like floats to ints) or even +exceptions (assigning complex to floats or ints): :: + + >>> x[1] = 1.2 + >>> x[1] + 1 + >>> x[1] = 1.2j + : can't convert complex to long; use long(abs(z)) + + +Unlike some of the references (such as array and mask indices) +assignments are always made to the original data in the array +(indeed, nothing else would make sense!). Note though, that some +actions may not work as one may naively expect. This particular +example is often surprising to people: :: + + >>> x[np.array([1, 1, 3, 1]) += 1 + +Where people expect that the 1st location will be incremented by 3. +In fact, it will only be incremented by 1. The reason is because +a new array is extracted from the original (as a temporary) containing +the values at 1, 1, 3, 1, then the value 1 is added to the temporary, +and then the temporary is assigned back to the original array. Thus +the value of the array at x[1]+1 is assigned to x[1] three times, +rather than being incremented 3 times. + +Dealing with variable numbers of indices within programs +======================================================== + +The index syntax is very powerful but limiting when dealing with +a variable number of indices. For example, if you want to write +a function that can handle arguments with various numbers of +dimensions without having to write special case code for each +number of possible dimensions, how can that be done? If one +supplies to the index a tuple, the tuple will be interpreted +as a list of indices. For example (using the previous definition +for the array z): :: + + >>> indices = (1,1,1,1) + >>> z[indices] + 40 + +So one can use code to construct tuples of any number of indices +and then use these within an index. + +Slices can be specified within programs by using the slice() function +in Python. For example: :: + + >>> indices = (1,1,1,slice(0,2)) # same as [1,1,1,0:2] + array([39, 40]) + +Likewise, ellipsis can be specified by code by using the Ellipsis object: :: + + >>> indices = (1, Ellipsis, 1) # same as [1,...,1] + >>> z[indices] + array([[28, 31, 34], + [37, 40, 43], + [46, 49, 52]]) + +For this reason it is possible to use the output from the np.where() +function directly as an index since it always returns a tuple of index arrays. + +Because the special treatment of tuples, they are not automatically converted +to an array as a list would be. As an example: :: + + >>> z[[1,1,1,1]] + ... # produces a large array + >>> z[(1,1,1,1)] + 40 # returns a single value """ diff --git a/numpy/doc/reference/internals.py b/numpy/doc/reference/internals.py index 1e1a072cb..a74429368 100644 --- a/numpy/doc/reference/internals.py +++ b/numpy/doc/reference/internals.py @@ -1,9 +1,162 @@ """ - =============== Array Internals =============== -Placeholder for Array Internals documentation. +Internal organization of numpy arrays +===================================== + +It helps to understand a bit about how numpy arrays are handled under the covers to help understand numpy better. This section will not go into great detail. Those wishing to understand the full details are referred to Travis Oliphant's book "Guide to Numpy". + +Numpy arrays consist of two major components, the raw array data (from now on, +referred to as the data buffer), and the information about the raw array data. +The data buffer is typically what people think of as arrays in C or Fortran, +a contiguous (and fixed) block of memory containing fixed sized data items. +Numpy also contains a significant set of data that describes how to interpret +the data in the data buffer. This extra information contains (among other things): + + 1) The basic data element's size in bytes + 2) The start of the data within the data buffer (an offset relative to the + beginning of the data buffer). + 3) The number of dimensions and the size of each dimension + 4) The separation between elements for each dimension (the 'stride'). This + does not have to be a multiple of the element size + 5) The byte order of the data (which may not be the native byte order) + 6) Whether the buffer is read-only + 7) Information (via the dtype object) about the interpretation of the basic + data element. The basic data element may be as simple as a int or a float, + or it may be a compound object (e.g., struct-like), a fixed character field, + or Python object pointers. + 8) Whether the array is to interpreted as C-order or Fortran-order. + +This arrangement allow for very flexible use of arrays. One thing that it allows +is simple changes of the metadata to change the interpretation of the array buffer. +Changing the byteorder of the array is a simple change involving no rearrangement +of the data. The shape of the array can be changed very easily without changing +anything in the data buffer or any data copying at all + +Among other things that are made possible is one can create a new array metadata +object that uses the same data buffer +to create a new view of that data buffer that has a different interpretation +of the buffer (e.g., different shape, offset, byte order, strides, etc) but +shares the same data bytes. Many operations in numpy do just this such as +slices. Other operations, such as transpose, don't move data elements +around in the array, but rather change the information about the shape and strides so that the indexing of the array changes, but the data in the doesn't move. + +Typically these new versions of the array metadata but the same data buffer are +new 'views' into the data buffer. There is a different ndarray object, but it +uses the same data buffer. This is why it is necessary to force copies through +use of the .copy() method if one really wants to make a new and independent +copy of the data buffer. + +New views into arrays mean the the object reference counts for the data buffer +increase. Simply doing away with the original array object will not remove the +data buffer if other views of it still exist. + +Multidimensional Array Indexing Order Issues +============================================ + +What is the right way to index +multi-dimensional arrays? Before you jump to conclusions about the one and +true way to index multi-dimensional arrays, it pays to understand why this is +a confusing issue. This section will try to explain in detail how numpy +indexing works and why we adopt the convention we do for images, and when it +may be appropriate to adopt other conventions. + +The first thing to understand is +that there are two conflicting conventions for indexing 2-dimensional arrays. +Matrix notation uses the first index to indicate which row is being selected and +the second index to indicate which column is selected. This is opposite the +geometrically oriented-convention for images where people generally think the +first index represents x position (i.e., column) and the second represents y +position (i.e., row). This alone is the source of much confusion; +matrix-oriented users and image-oriented users expect two different things with +regard to indexing. + +The second issue to understand is how indices correspond +to the order the array is stored in memory. In Fortran the first index is the +most rapidly varying index when moving through the elements of a two +dimensional array as it is stored in memory. If you adopt the matrix +convention for indexing, then this means the matrix is stored one column at a +time (since the first index moves to the next row as it changes). Thus Fortran +is considered a Column-major language. C has just the opposite convention. In +C, the last index changes most rapidly as one moves through the array as +stored in memory. Thus C is a Row-major language. The matrix is stored by +rows. Note that in both cases it presumes that the matrix convention for +indexing is being used, i.e., for both Fortran and C, the first index is the +row. Note this convention implies that the indexing convention is invariant +and that the data order changes to keep that so. + +But that's not the only way +to look at it. Suppose one has large two-dimensional arrays (images or +matrices) stored in data files. Suppose the data are stored by rows rather than +by columns. If we are to preserve our index convention (whether matrix or +image) that means that depending on the language we use, we may be forced to +reorder the data if it is read into memory to preserve our indexing +convention. For example if we read row-ordered data into memory without +reordering, it will match the matrix indexing convention for C, but not for +Fortran. Conversely, it will match the image indexing convention for Fortran, +but not for C. For C, if one is using data stored in row order, and one wants +to preserve the image index convention, the data must be reordered when +reading into memory. + +In the end, which you do for Fortran or C depends on +which is more important, not reordering data or preserving the indexing +convention. For large images, reordering data is potentially expensive, and +often the indexing convention is inverted to avoid that. + +The situation with +numpy makes this issue yet more complicated. The internal machinery of numpy +arrays is flexible enough to accept any ordering of indices. One can simply +reorder indices by manipulating the internal stride information for arrays +without reordering the data at all. Numpy will know how to map the new index +order to the data without moving the data. + +So if this is true, why not choose +the index order that matches what you most expect? In particular, why not define +row-ordered images to use the image convention? (This is sometimes referred +to as the Fortran convention vs the C convention, thus the 'C' and 'FORTRAN' +order options for array ordering in numpy.) The drawback of doing this is +potential performance penalties. It's common to access the data sequentially, +either implicitly in array operations or explicitly by looping over rows of an +image. When that is done, then the data will be accessed in non-optimal order. +As the first index is incremented, what is actually happening is that elements +spaced far apart in memory are being sequentially accessed, with usually poor +memory access speeds. For example, for a two dimensional image 'im' defined so +that im[0, 10] represents the value at x=0, y=10. To be consistent with usual +Python behavior then im[0] would represent a column at x=0. Yet that data +would be spread over the whole array since the data are stored in row order. +Despite the flexibility of numpy's indexing, it can't really paper over the fact +basic operations are rendered inefficient because of data order or that getting +contiguous subarrays is still awkward (e.g., im[:,0] for the first row, vs +im[0]), thus one can't use an idiom such as for row in im; for col in im does +work, but doesn't yield contiguous column data. + +As it turns out, numpy is +smart enough when dealing with ufuncs to determine which index is the most +rapidly varying one in memory and uses that for the innermost loop. Thus for +ufuncs there is no large intrinsic advantage to either approach in most cases. +On the other hand, use of .flat with an FORTRAN ordered array will lead to +non-optimal memory access as adjacent elements in the flattened array (iterator, +actually) are not contiguous in memory. + +Indeed, the fact is that Python +indexing on lists and other sequences naturally leads to an outside-to inside +ordering (the first index gets the largest grouping, the next the next largest, +and the last gets the smallest element). Since image data are normally stored +by rows, this corresponds to position within rows being the last item indexed. + +If you do want to use Fortran ordering realize that +there are two approaches to consider: 1) accept that the first index is just not +the most rapidly changing in memory and have all your I/O routines reorder +your data when going from memory to disk or visa versa, or use numpy's +mechanism for mapping the first index to the most rapidly varying data. We +recommend the former if possible. The disadvantage of the latter is that many +of numpy's functions will yield arrays without Fortran ordering unless you are +careful to use the 'order' keyword. Doing this would be highly inconvenient. + +Otherwise we recommend simply learning to reverse the usual order of indices +when accessing elements of an array. Granted, it goes against the grain, but +it is more in line with Python semantics and the natural order of the data. """ diff --git a/numpy/doc/reference/structured_arrays.py b/numpy/doc/reference/structured_arrays.py index 708d2ea2c..7bbd0deda 100644 --- a/numpy/doc/reference/structured_arrays.py +++ b/numpy/doc/reference/structured_arrays.py @@ -1,9 +1,176 @@ """ +===================================== +Structured Arrays (aka Record Arrays) +===================================== -================= -Structured Arrays -================= +Introduction +============ -Placeholder for structured array documentation. +Numpy provides powerful capabilities to create arrays of structs or records. +These arrays permit one to manipulate the data by the structs or by fields of +the struct. A simple example will show what is meant.: :: + + >>> x = np.zeros((2,),dtype=('i4,f4,a10')) + >>> x[:] = [(1,2.,'Hello'),(2,3.,"World")] + >>> x + array([(1, 2.0, 'Hello'), (2, 3.0, 'World')], + dtype=[('f0', '>i4'), ('f1', '>f4'), ('f2', '|S10')]) + +Here we have created a one-dimensional array of length 2. Each element of +this array is a record that contains three items, a 32-bit integer, a 32-bit +float, and a string of length 10 or less. If we index this array at the second +position we get the second record: :: + + >>> x[1] + (2,3.,"World") + +The interesting aspect is that we can reference the different fields of the +array simply by indexing the array with the string representing the name of +the field. In this case the fields have received the default names of 'f0', 'f1' +and 'f2'. + + >>> y = x['f1'] + >>> y + array([ 2., 3.], dtype=float32) + >>> y[:] = 2*y + >>> y + array([ 4., 6.], dtype=float32) + >>> x + array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], + dtype=[('f0', '>i4'), ('f1', '>f4'), ('f2', '|S10')]) + +In these examples, y is a simple float array consisting of the 2nd field +in the record. But it is not a copy of the data in the structured array, +instead it is a view. It shares exactly the same data. Thus when we updated +this array by doubling its values, the structured array shows the +corresponding values as doubled as well. Likewise, if one changes the record, +the field view changes: :: + + >>> x[1] = (-1,-1.,"Master") + >>> x + array([(1, 4.0, 'Hello'), (-1, -1.0, 'Master')], + dtype=[('f0', '>i4'), ('f1', '>f4'), ('f2', '|S10')]) + >>> y + array([ 4., -1.], dtype=float32) + +Defining Structured Arrays +========================== + +The definition of a structured array is all done through the dtype object. +There are a **lot** of different ways one can define the fields of a +record. Some of variants are there to provide backward compatibility with +Numeric or numarray, or another module, and should not be used except for +such purposes. These will be so noted. One defines records by specifying +the structure by 4 general ways, using an argument (as supplied to a dtype +function keyword or a dtype object constructor itself) in the form of a: +1) string, 2) tuple, 3) list, or 4) dictionary. Each of these will be briefly +described. + +1) String argument (as used in the above examples). +In this case, the constructor is expecting a comma +separated list of type specifiers, optionally with extra shape information. +The type specifiers can take 4 different forms: :: + + a) b1, i1, i2, i4, i8, u1, u2, u4, u8, f4, f8, c8, c16, a + (representing bytes, ints, unsigned ints, floats, complex and + fixed length strings of specified byte lengths) + b) int8,...,uint8,...,float32, float64, complex64, complex128 + (this time with bit sizes) + c) older Numeric/numarray type specifications (e.g. Float32). + Don't use these in new code! + d) Single character type specifiers (e.g H for unsigned short ints). + Avoid using these unless you must. Details can be found in the + Numpy book + +These different styles can be mixed within the same string (but why would you +want to do that?). Furthermore, each type specifier can be prefixed +with a repetition number, or a shape. In these cases an array +element is created, i.e., an array within a record. That array +is still referred to as a single field. An example: :: + + >>> x = np.zeros(3, dtype='3int8, float32, (2,3)float64') + >>> x + array([([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), + ([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), + ([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])], + dtype=[('f0', '|i1', 3), ('f1', '>f4'), ('f2', '>f8', (2, 3))]) + +By using strings to define the record structure, it precludes being +able to name the fields in the original definition. The names can +be changed as shown later, however. + +2) Tuple argument: The only relevant tuple case that applies to record +structures is when a structure is mapped to an existing data type. This +is done by pairing in a tuple, the existing data type with a matching +dtype definition (using any of the variants being described here). As +an example (using a definition using a list, so see 3) for further +details): :: + + >>> x = zeros(3, dtype=('i4',[('r','u1'), ('g','u1'), ('b','u1'), ('a','u1')])) + >>> x + array([0, 0, 0]) + >>> x['r'] + array([0, 0, 0], dtype=uint8) + +In this case, an array is produced that looks and acts like a simple int32 array, +but also has definitions for fields that use only one byte of the int32 (a bit +like Fortran equivalencing). + +3) List argument: In this case the record structure is defined with a list of +tuples. Each tuple has 2 or 3 elements specifying: 1) The name of the field +('' is permitted), 2) the type of the field, and 3) the shape (optional). +For example: + + >>> x = np.zeros(3, dtype=[('x','f4'),('y',np.float32),('value','f4',(2,2))]) + >>> x + array([(0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]]), + (0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]]), + (0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]])], + dtype=[('x', '>f4'), ('y', '>f4'), ('value', '>f4', (2, 2))]) + +4) Dictionary argument: two different forms are permitted. The first consists +of a dictionary with two required keys ('names' and 'formats'), each having an +equal sized list of values. The format list contains any type/shape specifier +allowed in other contexts. The names must be strings. There are two optional +keys: 'offsets' and 'titles'. Each must be a correspondingly matching list to +the required two where offsets contain integer offsets for each field, and +titles are objects containing metadata for each field (these do not have +to be strings), where the value of None is permitted. As an example: :: + + >>> x = np.zeros(3, dtype={'names':['col1', 'col2'], 'formats':['i4','f4']}) + >>> x + array([(0, 0.0), (0, 0.0), (0, 0.0)], + dtype=[('col1', '>i4'), ('col2', '>f4')]) + +The other dictionary form permitted is a dictionary of name keys with tuple +values specifying type, offset, and an optional title. + + >>> x = np.zeros(3, dtype={'col1':('i1',0,'title 1'), 'col2':('f4',1,'title 2')}) + array([(0, 0.0), (0, 0.0), (0, 0.0)], + dtype=[(('title 1', 'col1'), '|i1'), (('title 2', 'col2'), '>f4')]) + +Accessing and modifying field names +=================================== + +The field names are an attribute of the dtype object defining the record structure. +For the last example: :: + + >>> x.dtype.names + ('col1', 'col2') + >>> x.dtype.names = ('x', 'y') + >>> x + array([(0, 0.0), (0, 0.0), (0, 0.0)], + dtype=[(('title 1', 'x'), '|i1'), (('title 2', 'y'), '>f4')]) + >>> x.dtype.names = ('x', 'y', 'z') # wrong number of names + : must replace all names at once with a sequence of length 2 + +Accessing field titles +==================================== + +The field titles provide a standard place to put associated info for fields. +They do not have to be strings. + + >>> x.dtype.fields['x'][2] + 'title 1' """ diff --git a/numpy/doc/reference/ufuncs.py b/numpy/doc/reference/ufuncs.py index a7f349aa9..4819e5268 100644 --- a/numpy/doc/reference/ufuncs.py +++ b/numpy/doc/reference/ufuncs.py @@ -1,9 +1,135 @@ """ - =================== Universal Functions =================== -Placeholder for ufunc documentation. +Ufuncs are, generally speaking, mathematical functions or operations that are +applied element-by-element to the contents of an array. That is, the result +in each output array element only depends on the value in the corresponding +input array (or arrays) and on no other array elements. Numpy comes with a +large suite of ufuncs, and scipy extends that suite substantially. The simplest +example is the addition operator: :: + + >>> np.array([0,2,3,4]) + np.array([1,1,-1,2]) + array([1, 3, 2, 6]) + +The unfunc module lists all the available ufuncs in numpy. Additional ufuncts +available in xxx in scipy. Documentation on the specific ufuncs may be found +in those modules. This documentation is intended to address the more general +aspects of unfuncs common to most of them. All of the ufuncs that make use of +Python operators (e.g., +, -, etc.) have equivalent functions defined +(e.g. add() for +) + +Type coercion +============= + +What happens when a binary operator (e.g., +,-,\\*,/, etc) deals with arrays of +two different types? What is the type of the result? Typically, the result is +the higher of the two types. For example: :: + + float32 + float64 -> float64 + int8 + int32 -> int32 + int16 + float32 -> float32 + float32 + complex64 -> complex64 + +There are some less obvious cases generally involving mixes of types +(e.g. uints, ints and floats) where equal bit sizes for each are not +capable of saving all the information in a different type of equivalent +bit size. Some examples are int32 vs float32 or uint32 vs int32. +Generally, the result is the higher type of larger size than both +(if available). So: :: + + int32 + float32 -> float64 + uint32 + int32 -> int64 + +Finally, the type coercion behavior when expressions involve Python +scalars is different than that seen for arrays. Since Python has a +limited number of types, combining a Python int with a dtype=np.int8 +array does not coerce to the higher type but instead, the type of the +array prevails. So the rules for Python scalars combined with arrays is +that the result will be that of the array equivalent the Python scalar +if the Python scalar is of a higher 'kind' than the array (e.g., float +vs. int), otherwise the resultant type will be that of the array. +For example: :: + + Python int + int8 -> int8 + Python float + int8 -> float64 + +ufunc methods +============= + +Binary ufuncs support 4 methods. These methods are explained in detail in xxx +(or are they, I don't see anything in the ufunc docstring that is useful?). + +**.reduce(arr)** applies the binary operator to elements of the array in sequence. For example: :: + + >>> np.add.reduce(np.arange(10)) # adds all elements of array + 45 + +For multidimensional arrays, the first dimension is reduced by default: :: + + >>> np.add.reduce(np.arange(10).reshape(2,5)) + array([ 5, 7, 9, 11, 13]) + +The axis keyword can be used to specify different axes to reduce: :: + + >>> np.add.reduce(np.arange(10).reshape(2,5),axis=1) + array([10, 35]) + +**.accumulate(arr)** applies the binary operator and generates an an equivalently +shaped array that includes the accumulated amount for each element of the +array. A couple examples: :: + + >>> np.add.accumulate(np.arange(10)) + array([ 0, 1, 3, 6, 10, 15, 21, 28, 36, 45]) + >>> np.multiply.accumulate(np.arange(1,9)) + array([ 1, 2, 6, 24, 120, 720, 5040, 40320]) + +The behavior for multidimensional arrays is the same as for .reduce(), as is the use of the axis keyword). + +**.reduceat(arr,indices)** allows one to apply reduce to selected parts of an array. +It is a difficult method to understand. See the documentation at: + +**.outer(arr1,arr2)** generates an outer operation on the two arrays arr1 and arr2. It will work on multidimensional arrays (the shape of the result is the +concatenation of the two input shapes.: :: + + >>> np.multiply.outer(np.arange(3),np.arange(4)) + array([[0, 0, 0, 0], + [0, 1, 2, 3], + [0, 2, 4, 6]]) + +Output arguments +================ + +All ufuncs accept an optional output array. The array must be of the expected output shape. Beware that if the type of the output array is of a +different (and lower) type than the output result, the results may be silently +truncated or otherwise corrupted in the downcast to the lower type. This usage +is useful when one wants to avoid creating large temporary arrays and instead +allows one to reuse the same array memory repeatedly (at the expense of not +being able to use more convenient operator notation in expressions). Note that +when the output argument is used, the ufunc still returns a reference to the +result. + + >>> x = np.arange(2) + >>> np.add(np.arange(2),np.arange(2.),x) + array([0, 2]) + >>> x + array([0, 2]) + +and & or as ufuncs +================== + +Invariably people try to use the python 'and' and 'or' as logical operators +(and quite understandably). But these operators do not behave as normal +operators since Python treats these quite differently. They cannot be +overloaded with array equivalents. Thus using 'and' or 'or' with an array +results in an error. There are two alternatives: + + 1) use the ufunc functions logical_and() and logical_or(). + 2) use the bitwise operators & and \\|. The drawback of these is that if + the arguments to these operators are not boolean arrays, the result is + likely incorrect. On the other hand, most usages of logical_and and + logical_or are with boolean arrays. As long as one is careful, this is + a convenient way to apply these operators. """ diff --git a/numpy/doc/reference/zen.py b/numpy/doc/reference/zen.py deleted file mode 100644 index bf6873f68..000000000 --- a/numpy/doc/reference/zen.py +++ /dev/null @@ -1,9 +0,0 @@ -""" - -============ -Zen of NumPy -============ - -Placehold for Zen of NumPy documentation. - -""" -- cgit v1.2.1