From c43e0e5c0f2e8dc52cbc1eed71bf93aa281df3d7 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Fri, 5 May 2017 12:36:36 -0400 Subject: DOC: update structured array docs to reflect #6053 [ci skip] --- numpy/doc/structured_arrays.py | 663 +++++++++++++++++++++++++++++------------ 1 file changed, 470 insertions(+), 193 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 5289e6d0b..749018f35 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -6,231 +6,508 @@ Structured Arrays Introduction ============ -NumPy provides powerful capabilities to create arrays of structured datatype. -These arrays permit one to manipulate the data by named fields. A simple -example will show what is meant.: :: +Structured arrays are ndarrays whose datatype is a composition of simpler +datatypes organized as a sequence of named :term:`fields `. For example, +:: - >>> x = np.array([(1,2.,'Hello'), (2,3.,"World")], - ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) + >>> x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], + ... dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]) >>> x - array([(1, 2.0, 'Hello'), (2, 3.0, 'World')], - dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) + array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], + dtype=[('name', 'S10'), ('age', '>> x[1] - (2,3.,"World") + ('Fido', 3, 27.0) -Conveniently, one can access any field of the array by indexing using the -string that names that field. :: +You can access and modify individual fields of a structured array by indexing +with the field name:: - >>> y = x['bar'] - >>> y - array([ 2., 3.], dtype=float32) - >>> y[:] = 2*y - >>> y - array([ 4., 6.], dtype=float32) + >>> x['age'] + array([9, 3], dtype=int32) + >>> x['age'] = 5 >>> x - array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], - dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) + array([('Rex', 5, 81.0), ('Fido', 5, 27.0)], + dtype=[('name', 'S10'), ('age', '` reference page, and in +summary they are: + +1. A list of tuples, one tuple per field + + Each tuple has the form ``(fieldname, datatype, shape)`` where shape is + optional. ``fieldname`` is a string (or tuple if titles are used, see + :ref:`Field Titles ` below), ``datatype`` may be any object + convertible to a datatype, and shape (optional) is a tuple of integers + specifying subarray shape. + + >>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2,2))]) + dtype=[('x', '>> np.dtype([('x', 'f4'),('', 'i4'),('z', 'i8')]) + dtype([('x', '` may be used in a string and separated by + commas. The itemsize and byte offsets of the fields are determined + automatically, and the field names are given the default names ``f0``, + ``f1``, etc. :: + + >>> np.dtype('i8,f4,S3') + dtype([('f0', '>> np.dtype('3int8, float32, (2,3)float64') + dtype([('f0', 'i1', 3), ('f1', '>> np.dtype({'names': ['col1', 'col2'], 'formats': ['i4','f4']}) + dtype([('col1', '>> np.dtype({'names': ['col1', 'col2'], + ... 'formats': ['i4','f4'], + ... 'offsets': [0, 4], + ... 'itemsize': 12}) + dtype({'names':['col1','col2'], 'formats':['`) cannot overlap with other fields, + because of the risk of clobbering the internal object pointer and then + dereferencing it. + + The optional 'aligned' value can be set to ``True`` to make the automatic + offset computation use aligned offsets (see :ref:`offsets-and-alignment`), + as if the 'align' keyword argument of :func:`numpy.dtype` had been set to + True. + + The optional 'titles' value should be a list of titles of the same length + as 'names', see :ref:`Field Titles ` below. + +4. A dictionary of field names + + The use of this form of specification is discouraged, but documented here + because older numpy code may use it. The keys of the dictionary are the + field names and the values are tuples specifying type and offset:: + + >>> np.dtype=({'col1': ('i1',0), 'col2': ('f4',1)}) + dtype([(('col1'), 'i1'), (('col2'), '>f4')]) + + This form is discouraged because Python dictionaries do not preserve order + in Python versions before Python 3.6, and the order of the fields in a + structured dtype has meaning. :ref:`Field Titles ` may be + specified by using a 3-tuple, see below. + +Manipulating and Displaying Structured Datatypes +------------------------------------------------ + +The list of field names of a structured datatype can be found in the ``names`` +attribute of the dtype object:: + + >>> d = np.dtype([('x', 'i8'), ('y', 'f4')]) + >>> d.names + ('x', 'y') + +The field names may be modified by assigning to the ``names`` attribute using a +sequence of strings of the same length. + +The dtype object also has a dictionary-like attribute, ``fields``, whose keys +are the field names (and :ref:`Field Titles `, see below) and whose +values are tuples containing the dtype and byte offset of each field. :: + + >>> d.fields + mappingproxy({'x': (dtype('int64'), 0), 'y': (dtype('float32'), 8)}) + +Both the ``names`` and ``fields`` attributes will equal ``None`` for +unstructured arrays. + +The string representation of a structured datatype is shown in the "list of +tuples" form if possible, otherwise numpy falls back to using the more general +dictionary form. + +.. _offsets-and-alignment: + +Automatic Byte Offsets and Alignment +------------------------------------ + +Numpy uses one of two methods to automatically determine the field byte offsets +and the overall itemsize of a structured datatype, depending on whether +``align=True`` was specified as a keyword argument to :func:`numpy.dtype`. + +By default (with ``align=False``), numpy will pack the fields together tightly +such that each field starts at the byte offset the previous field ended, and the +fields are contiguous in memory. :: + + >>> def print_offsets(d): + ... print("offsets:", [d.fields[name][1] for name in d.names]) + ... print("itemsize:", d.itemsize) + >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2')) + offsets: [0, 1, 2, 6, 7, 15] + itemsize: 17 + +If ``align=True`` is set, numpy will pad the structure in the same way many C +compilers would pad a C-struct. Aligned structures can give a performance +improvement in some cases, at the cost of increased datatype size. Padding +bytes are inserted between fields such that each field's byte offset will be a +multiple of that field's alignment (usually equal to the field's size in bytes +for simple datatypes, see :c:member:`PyArray_Descr.alignment`). +The structure will also have trailing padding added so that its itemsize is a +multiple of the largest field's alignment. :: + + >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2', align=True)) + offsets: [0, 1, 4, 8, 16, 24] + itemsize: 32 + +Note that although almost all modern C compilers pad in this way by default, +padding in C structs is C-implementation-dependent so this memory layout is not +guaranteed to exactly match that of a corresponding struct in a C program. Some +massaging may be needed either on the numpy side or the C side to obtain exact +correspondence. + +If offsets were specified manually using the optional ``offsets`` key in the +dictionary-based dtype specification, setting ``align=True`` will check that +each field's offset is a multiple of its size and that the itemsize is a +multiple of the largest field size, and raise an exception if not. -In these examples, y is a simple float array consisting of the 2nd field -in the structured type. But, rather than being a copy of the data in the structured -array, it is a view, i.e., it shares exactly the same memory locations. -Thus, when we updated this array by doubling its values, the structured -array shows the corresponding values as doubled as well. Likewise, if one -changes the structured array, the field view also changes: :: +If the offsets of the fields and itemsize of a structured array satisfy the +alignment conditions, the array will have the ``ALIGNED`` :ref:`flag +` set. - >>> x[1] = (-1,-1.,"Master") - >>> x - array([(1, 4.0, 'Hello'), (-1, -1.0, 'Master')], - dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) - >>> y - array([ 4., -1.], dtype=float32) - -Defining Structured Arrays -========================== - -One defines a structured array through the dtype object. There are -**several** alternative ways to define the fields of a record. Some of -these variants provide backward compatibility with Numeric, numarray, or -another module, and should not be used except for such purposes. These -will be so noted. One specifies record structure in -one of four alternative ways, using an argument (as supplied to a dtype -function keyword or a dtype object constructor itself). This -argument must be one of the following: 1) string, 2) tuple, 3) list, or -4) dictionary. Each of these is briefly described below. - -1) String argument. -In this case, the constructor expects a comma-separated list of type -specifiers, optionally with extra shape information. The fields are -given the default names 'f0', 'f1', 'f2' and so on. -The type specifiers can take 4 different forms: :: - - a) b1, i1, i2, i4, i8, u1, u2, u4, u8, f2, f4, f8, c8, c16, a - (representing bytes, ints, unsigned ints, floats, complex and - fixed length strings of specified byte lengths) - b) int8,...,uint8,...,float16, float32, float64, complex64, complex128 - (this time with bit sizes) - c) older Numeric/numarray type specifications (e.g. Float32). - Don't use these in new code! - d) Single character type specifiers (e.g H for unsigned short ints). - Avoid using these unless you must. Details can be found in the - NumPy book - -These different styles can be mixed within the same string (but why would you -want to do that?). Furthermore, each type specifier can be prefixed -with a repetition number, or a shape. In these cases an array -element is created, i.e., an array within a record. That array -is still referred to as a single field. An example: :: - - >>> x = np.zeros(3, dtype='3int8, float32, (2,3)float64') - >>> x - array([([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), - ([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), - ([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])], - dtype=[('f0', '|i1', 3), ('f1', '>f4'), ('f2', '>f8', (2, 3))]) - -By using strings to define the record structure, it precludes being -able to name the fields in the original definition. The names can -be changed as shown later, however. - -2) Tuple argument: The only relevant tuple case that applies to record -structures is when a structure is mapped to an existing data type. This -is done by pairing in a tuple, the existing data type with a matching -dtype definition (using any of the variants being described here). As -an example (using a definition using a list, so see 3) for further -details): :: - - >>> x = np.zeros(3, dtype=('i4',[('r','u1'), ('g','u1'), ('b','u1'), ('a','u1')])) - >>> x - array([0, 0, 0]) - >>> x['r'] - array([0, 0, 0], dtype=uint8) +.. _titles: -In this case, an array is produced that looks and acts like a simple int32 array, -but also has definitions for fields that use only one byte of the int32 (a bit -like Fortran equivalencing). +Field Titles +------------ -3) List argument: In this case the record structure is defined with a list of -tuples. Each tuple has 2 or 3 elements specifying: 1) The name of the field -('' is permitted), 2) the type of the field, and 3) the shape (optional). -For example:: +In addition to field names, fields may also have an associated :term:`title`, +an alternate name, which is sometimes used as an additional description or +mnemonic for the field. The title may be used to index an array, just like a +fieldname. - >>> x = np.zeros(3, dtype=[('x','f4'),('y',np.float32),('value','f4',(2,2))]) - >>> x - array([(0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]]), - (0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]]), - (0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]])], - dtype=[('x', '>f4'), ('y', '>f4'), ('value', '>f4', (2, 2))]) - -4) Dictionary argument: two different forms are permitted. The first consists -of a dictionary with two required keys ('names' and 'formats'), each having an -equal sized list of values. The format list contains any type/shape specifier -allowed in other contexts. The names must be strings. There are two optional -keys: 'offsets' and 'titles'. Each must be a correspondingly matching list to -the required two where offsets contain integer offsets for each field, and -titles are objects containing metadata for each field (these do not have -to be strings), where the value of None is permitted. As an example: :: - - >>> x = np.zeros(3, dtype={'names':['col1', 'col2'], 'formats':['i4','f4']}) - >>> x - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype=[('col1', '>i4'), ('col2', '>f4')]) +To add titles when using the list-of-tuples form of dtype specification, the +fieldname may be be specified as a tuple of two strings (instead of a single +string), which will be the field's title and field name respectively. For +example:: + + >>> np.dtype([(('my title', 'name'), 'f4')]) + +When using the first form of dictionary-based specification, the titles may be +supplied as an extra ``'titles'`` key as described above. When using the second +(discouraged) dictionary-based specification, the title can be supplied by +providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual +2-element tuple:: + + >>> np.dtype({'name': ('i4', 0, 'my title')}) -The other dictionary form permitted is a dictionary of name keys with tuple -values specifying type, offset, and an optional title. :: +The ``dtype.fields`` dictionary will contain :term:`titles` as keys, if any +titles are used. This means effectively that a field with a title will be +represented twice in the fields dictionary. The tuple values for these fields +will also have a third element, the field title. + +Because of this, and because the ``names`` attribute preserves the field order +while the ``fields`` attribute may not, it is recommended to iterate through +the fields of a dtype using the ``names`` attribute of the dtype (which will +not list titles), as in:: - >>> x = np.zeros(3, dtype={'col1':('i1',0,'title 1'), 'col2':('f4',1,'title 2')}) + >>> for name in d.names: + ... print(d.fields[name][:2]) + +Union types +----------- + +Structured datatypes are implemented in numpy to have base type +:class:`numpy.void` by default, but it is possible to interpret other numpy +types as structured types using the ``(base_dtype, dtype)`` form of dtype +specification described in +:ref:`Data Type Objects `. Here, ``base_dtype`` is +the desired underlying dtype, and fields and flags will be copied from +``dtype``. This dtype is similar to a 'union' in C. + +Indexing and Assignment to Structured arrays +============================================= + +Assigning data to a Structured Array +------------------------------------ + +There are a number of ways to assign values to a structured array: Using python +tuples, using scalar values, or using other structured arrays. + +Assignment from Python Native Types (Tuples) +``````````````````````````````````````````` + +The simplest way to assign values to a structured array is using python +tuples. Each assigned value should be a tuple (and not a list or array, as +these will trigger numpy's broadcasting rules) of length equal to the number of +fields in the array. The tuple's elements are assigned to the successive fields +of the array, from left to right:: + + >>> x = np.array([(1,2,3),(4,5,6)], dtype='i8,f4,f8') + >>> x[1] = (7,8,9) >>> x - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype=[(('title 1', 'col1'), '|i1'), (('title 2', 'col2'), '>f4')]) + array([(1, 2., 3.), (7, 8., 9.)], + dtype=[('f0', '>> x.dtype.names - ('col1', 'col2') - >>> x.dtype.names = ('x', 'y') + >>> x = np.zeros(2, dtype='i8,f4,?,S1') + >>> x[:] = 3 >>> x - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype=[(('title 1', 'x'), '|i1'), (('title 2', 'y'), '>f4')]) - >>> x.dtype.names = ('x', 'y', 'z') # wrong number of names - : must replace all names at once with a sequence of length 2 + array([(3, 3.0, True, b'3'), (3, 3.0, True, b'3')], + dtype=[('f0', '>> x[:] = np.arange(2) + >>> x + array([(0, 0.0, False, b'0'), (1, 1.0, True, b'1')], + dtype=[('f0', '>> x = np.zeros(2, dtype=[('A', 'i4'), ('B', 'i4')]) + >>> y = np.zeros(2, dtype=[('A', 'i4')]) + >>> a = np.zeros(2, dtype='i4') + >>> a[:] = x + ValueError: Can't cast from structure to non-structure, except if the structure only has a single field. + >>> a[:] = y + >>> a + array([0, 0], dtype=int32) + +Assignment from other Structured Arrays +``````````````````````````````````````` + +Assignment between two structured arrays occurs as if the source elements had +been converted to tuples and then assigned to the destination elements. That +is, the first field of the source array is assigned to the first field of the +destination array, and the second field likewise, and so on, regardless of +field names. Structured arrays with a different number of fields cannot be +assigned to each other. Bytes of the destination structure which are not +included in any of the fields are unaffected. :: + + >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'f4'), ('c', 'S3')]) + >>> b = np.ones(3, dtype=[('x', 'f4'), ('y', 'S3'), ('z', 'O')]) + >>> b[:] = a + >>> b + array([(0.0, b'0.0', b''), (0.0, b'0.0', b''), (0.0, b'0.0', b'')], + dtype=[('x', '>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> x['foo'] + array([1, 3]) + >>> x['foo'] = 10 + >>> x + array([(10, 2.), (10, 4.)], + dtype=[('foo', '>> y = x['bar'] + >>> y[:] = 10 + >>> x + array([(10, 5.), (10, 5.)], + dtype=[('foo', '>> y.dtype, y.shape, y.strides + (dtype('float32'), (2,), (12,)) -The field titles provide a standard place to put associated info for fields. -They do not have to be strings. :: +Accessing Multiple Fields +``````````````````````````` - >>> x.dtype.fields['x'][2] - 'title 1' +One can index a structured array with a multi-field index, where the index is a +list of field names:: -Accessing multiple fields at once -==================================== + >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')]) + >>> a[['a', 'c']] + array([(0, 0.0), (0, 0.0), (0, 0.0)], + dtype={'names':['a','c'], 'formats':['>> a[['a', 'c']] = (2, 3) + >>> a + array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)], + dtype=[('a', '>> a[['a', 'c']] = a[['c', 'a']] + +Indexing with an Integer to get a Structured Scalar +``````````````````````````````````````````````````` + +Indexing a single element of a structured array (with an integer index) returns +a structured scalar:: + + >>> x = np.array([(1, 2., 3.)], dtype='i,f,f') + >>> scalar = x[0] + >>> scalar + (1, 2., 3.) + >>> type(scalar) + numpy.void + +Importantly, unlike other numpy scalars, structured scalars are mutable and act +like views into the original array, such that modifying the scalar will modify +the original array. Structured scalars also support access and assignment by +field name:: + + >>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> s = x[0] + >>> s['bar'] = 100 + >>> x + array([(1, 100.), (3, 4.)], + dtype=[('foo', '>> x = np.array([(1.5,2.5,(1.0,2.0)),(3.,4.,(4.,5.)),(1.,3.,(2.,6.))], - dtype=[('x','f4'),('y',np.float32),('value','f4',(2,2))]) + >>> scalar = np.array([(1, 2., 3.)], dtype='i,f,f')[0] + >>> scalar[0] + 1 + >>> scalar[1] = 4 -Notice that `x` is created with a list of tuples. :: +Thus, tuples might be though of as the native Python equivalent to numpy's +structured types, much like native python integers are the equivalent to +numpy's integer types. Structured scalars may be converted to a tuple by +calling :func:`ndarray.item`:: - >>> x[['x','y']] - array([(1.5, 2.5), (3.0, 4.0), (1.0, 3.0)], - dtype=[('x', '>> x[['x','value']] - array([(1.5, [[1.0, 2.0], [1.0, 2.0]]), (3.0, [[4.0, 5.0], [4.0, 5.0]]), - (1.0, [[2.0, 6.0], [2.0, 6.0]])], - dtype=[('x', '>> scalar.item(), type(scalar.item()) + ((1, 2.0, 3.0), tuple) -The fields are returned in the order they are asked for.:: +Viewing Structured Arrays Containing Objects +-------------------------------------------- - >>> x[['y','x']] - array([(2.5, 1.5), (4.0, 3.0), (3.0, 1.0)], - dtype=[('y', '>> arr = np.zeros((5,), dtype=[('var1','f8'),('var2','f8')]) - >>> arr['var1'] = np.arange(5) + >>> a = np.zeros(2, dtype=[('a', 'i4'), ('b', 'i4')]) + >>> b = np.ones(2, dtype=[('a', 'i4'), ('b', 'i4')]) + >>> a == b + array([False, False], dtype=bool) -If you fill it in row by row, it takes a take a tuple -(but not a list or array!):: +Currently, if the dtypes of two arrays are not equivalent all comparisons will +return ``False``. This behavior is deprecated as of numpy 1.10 and may change +in the future. - >>> arr[0] = (10,20) - >>> arr - array([(10.0, 20.0), (1.0, 0.0), (2.0, 0.0), (3.0, 0.0), (4.0, 0.0)], - dtype=[('var1', '`` operators will always return ``False`` when +comparing structured arrays. Many other pairwise operators are not supported. Record Arrays ============= -For convenience, numpy provides "record arrays" which allow one to access -fields of structured arrays by attribute rather than by index. Record arrays -are structured arrays wrapped using a subclass of ndarray, -:class:`numpy.recarray`, which allows field access by attribute on the array -object, and record arrays also use a special datatype, :class:`numpy.record`, -which allows field access by attribute on the individual elements of the array. +As an optional convenience numpy provides an ndarray subclass, +:class:`numpy.recarray`, and associated helper functions in the +:mod:`numpy.rec` submodule, which allows access to fields of structured arrays +by attribute, instead of only by index. Record arrays also use a special +datatype, :class:`numpy.record`, which allows field access by attribute on the +structured scalars obtained from the array. -The simplest way to create a record array is with :func:`numpy.rec.array`: :: +The simplest way to create a record array is with :func:`numpy.rec.array`:: - >>> recordarr = np.rec.array([(1,2.,'Hello'),(2,3.,"World")], + >>> recordarr = np.rec.array([(1,2.,'Hello'),(2,3.,"World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) >>> recordarr.bar array([ 2., 3.], dtype=float32) >>> recordarr[1:2] - rec.array([(2, 3.0, 'World')], + rec.array([(2, 3.0, 'World')], dtype=[('foo', '>> recordarr[1:2].foo array([2], dtype=int32) @@ -239,27 +516,28 @@ The simplest way to create a record array is with :func:`numpy.rec.array`: :: >>> recordarr[1].baz 'World' -numpy.rec.array can convert a wide variety of arguments into record arrays, -including normal structured arrays: :: +:func:`numpy.rec.array` can convert a wide variety of arguments into record +arrays, including structured arrays:: - >>> arr = array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = array([(1,2.,'Hello'),(2,3.,"World")], ... dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')]) >>> recordarr = np.rec.array(arr) -The numpy.rec module provides a number of other convenience functions for +The :mod:`numpy.rec` module provides a number of other convenience functions for creating record arrays, see :ref:`record array creation routines `. A record array representation of a structured array can be obtained using the -appropriate :ref:`view`: :: +appropriate :ref:`view`:: - >>> arr = np.array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = np.array([(1,2.,'Hello'),(2,3.,"World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) - >>> recordarr = arr.view(dtype=dtype((np.record, arr.dtype)), + >>> recordarr = arr.view(dtype=dtype((np.record, arr.dtype)), ... type=np.recarray) -For convenience, viewing an ndarray as type `np.recarray` will automatically -convert to `np.record` datatype, so the dtype can be left out of the view: :: +For convenience, viewing an ndarray as type :class:`np.recarray` will +automatically convert to :class:`np.record` datatype, so the dtype can be left +out of the view:: >>> recordarr = arr.view(np.recarray) >>> recordarr.dtype @@ -267,14 +545,14 @@ convert to `np.record` datatype, so the dtype can be left out of the view: :: To get back to a plain ndarray both the dtype and type must be reset. The following view does so, taking into account the unusual case that the -recordarr was not a structured type: :: +recordarr was not a structured type:: >>> arr2 = recordarr.view(recordarr.dtype.fields or recordarr.dtype, np.ndarray) Record array fields accessed by index or by attribute are returned as a record array if the field has a structured type but as a plain ndarray otherwise. :: - >>> recordarr = np.rec.array([('Hello', (1,2)),("World", (3,4))], + >>> recordarr = np.rec.array([('Hello', (1,2)),("World", (3,4))], ... dtype=[('foo', 'S6'),('bar', [('A', int), ('B', int)])]) >>> type(recordarr.foo) @@ -283,8 +561,7 @@ array if the field has a structured type but as a plain ndarray otherwise. :: Note that if a field has the same name as an ndarray attribute, the ndarray attribute takes precedence. Such fields will be inaccessible by attribute but -may still be accessed by index. - +will still be accessible by index. """ from __future__ import division, absolute_import, print_function -- cgit v1.2.1 From a08da3f34e8c1fe7e8997f254acf1c92eea520f9 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Thu, 9 Nov 2017 21:41:17 -0500 Subject: DOC: update structured array docs to reflect #6053, fixups [ci skip] --- numpy/doc/structured_arrays.py | 112 ++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 749018f35..65558a5a0 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -62,7 +62,7 @@ structure. The datatype of a field may be any numpy datatype including other structured datatypes, and it may also be a :term:`sub-array` which behaves like an ndarray of a specified shape. The offsets of the fields are arbitrary, and fields may even overlap. These offsets are usually determined automatically by -numpy, but can also be manually specified. +numpy, but can also be specified. Structured Datatype Creation ---------------------------- @@ -78,8 +78,8 @@ summary they are: Each tuple has the form ``(fieldname, datatype, shape)`` where shape is optional. ``fieldname`` is a string (or tuple if titles are used, see :ref:`Field Titles ` below), ``datatype`` may be any object - convertible to a datatype, and shape (optional) is a tuple of integers - specifying subarray shape. + convertible to a datatype, and ``shape`` is a tuple of integers specifying + subarray shape. >>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2,2))]) dtype=[('x', '>> def print_offsets(d): ... print("offsets:", [d.fields[name][1] for name in d.names]) @@ -211,9 +211,9 @@ If ``align=True`` is set, numpy will pad the structure in the same way many C compilers would pad a C-struct. Aligned structures can give a performance improvement in some cases, at the cost of increased datatype size. Padding bytes are inserted between fields such that each field's byte offset will be a -multiple of that field's alignment (usually equal to the field's size in bytes -for simple datatypes, see :c:member:`PyArray_Descr.alignment`). -The structure will also have trailing padding added so that its itemsize is a +multiple of that field's alignment, which is usually equal to the field's size +in bytes for simple datatypes, see :c:member:`PyArray_Descr.alignment`. The +structure will also have trailing padding added so that its itemsize is a multiple of the largest field's alignment. :: >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2', align=True)) @@ -223,10 +223,10 @@ multiple of the largest field's alignment. :: Note that although almost all modern C compilers pad in this way by default, padding in C structs is C-implementation-dependent so this memory layout is not guaranteed to exactly match that of a corresponding struct in a C program. Some -massaging may be needed either on the numpy side or the C side to obtain exact +work may be needed, either on the numpy side or the C side, to obtain exact correspondence. -If offsets were specified manually using the optional ``offsets`` key in the +If offsets were specified using the optional ``offsets`` key in the dictionary-based dtype specification, setting ``align=True`` will check that each field's offset is a multiple of its size and that the itemsize is a multiple of the largest field size, and raise an exception if not. @@ -242,12 +242,12 @@ Field Titles In addition to field names, fields may also have an associated :term:`title`, an alternate name, which is sometimes used as an additional description or -mnemonic for the field. The title may be used to index an array, just like a -fieldname. +alias for the field. The title may be used to index an array, just like a +field name. To add titles when using the list-of-tuples form of dtype specification, the -fieldname may be be specified as a tuple of two strings (instead of a single -string), which will be the field's title and field name respectively. For +field name may be be specified as a tuple of two strings instead of a single +string, which will be the field's title and field name respectively. For example:: >>> np.dtype([(('my title', 'name'), 'f4')]) @@ -263,12 +263,11 @@ providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual The ``dtype.fields`` dictionary will contain :term:`titles` as keys, if any titles are used. This means effectively that a field with a title will be represented twice in the fields dictionary. The tuple values for these fields -will also have a third element, the field title. - -Because of this, and because the ``names`` attribute preserves the field order -while the ``fields`` attribute may not, it is recommended to iterate through -the fields of a dtype using the ``names`` attribute of the dtype (which will -not list titles), as in:: +will also have a third element, the field title. Because of this, and because +the ``names`` attribute preserves the field order while the ``fields`` +attribute may not, it is recommended to iterate through the fields of a dtype +using the ``names`` attribute of the dtype, which will not list titles, as +in:: >>> for name in d.names: ... print(d.fields[name][:2]) @@ -296,10 +295,10 @@ tuples, using scalar values, or using other structured arrays. Assignment from Python Native Types (Tuples) ``````````````````````````````````````````` -The simplest way to assign values to a structured array is using python -tuples. Each assigned value should be a tuple (and not a list or array, as -these will trigger numpy's broadcasting rules) of length equal to the number of -fields in the array. The tuple's elements are assigned to the successive fields +The simplest way to assign values to a structured array is using python tuples. +Each assigned value should be a tuple of length equal to the number of fields +in the array, and not a list or array as these will trigger numpy's +broadcasting rules. The tuple's elements are assigned to the successive fields of the array, from left to right:: >>> x = np.array([(1,2,3),(4,5,6)], dtype='i8,f4,f8') @@ -312,8 +311,8 @@ Assignment from Scalars ``````````````````````` A scalar assigned to a structured element will be assigned to all fields. This -happens when a scalar is assigned to a structured array, or when a scalar array -is assigned to a structured array:: +happens when a scalar is assigned to a structured array, or when an +unstructured array is assigned to a structured array:: >>> x = np.zeros(2, dtype='i8,f4,?,S1') >>> x[:] = 3 @@ -325,16 +324,16 @@ is assigned to a structured array:: array([(0, 0.0, False, b'0'), (1, 1.0, True, b'1')], dtype=[('f0', '>> x = np.zeros(2, dtype=[('A', 'i4'), ('B', 'i4')]) - >>> y = np.zeros(2, dtype=[('A', 'i4')]) - >>> a = np.zeros(2, dtype='i4') - >>> a[:] = x + >>> twofield = np.zeros(2, dtype=[('A', 'i4'), ('B', 'i4')]) + >>> onefield = np.zeros(2, dtype=[('A', 'i4')]) + >>> nostruct = np.zeros(2, dtype='i4') + >>> nostruct[:] = twofield ValueError: Can't cast from structure to non-structure, except if the structure only has a single field. - >>> a[:] = y - >>> a + >>> nostruct[:] = onefield + >>> nostruct array([0, 0], dtype=int32) Assignment from other Structured Arrays @@ -389,7 +388,7 @@ memory locations and writing to the view will modify the original array. :: dtype=[('foo', '>> y.dtype, y.shape, y.strides (dtype('float32'), (2,), (12,)) @@ -410,12 +409,12 @@ list of field names:: dtype=[('a', '>> type(scalar) numpy.void -Importantly, unlike other numpy scalars, structured scalars are mutable and act -like views into the original array, such that modifying the scalar will modify -the original array. Structured scalars also support access and assignment by -field name:: +Unlike other numpy scalars, structured scalars are mutable and act like views +into the original array, such that modifying the scalar will modify the +original array. Structured scalars also support access and assignment by field +name:: >>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) >>> s = x[0] @@ -453,7 +452,7 @@ Similarly to tuples, structured scalars can also be indexed with an integer:: 1 >>> scalar[1] = 4 -Thus, tuples might be though of as the native Python equivalent to numpy's +Thus, tuples might be thought of as the native Python equivalent to numpy's structured types, much like native python integers are the equivalent to numpy's integer types. Structured scalars may be converted to a tuple by calling :func:`ndarray.item`:: @@ -464,17 +463,17 @@ calling :func:`ndarray.item`:: Viewing Structured Arrays Containing Objects -------------------------------------------- -In order to prevent clobbering of object pointers in fields of +In order to prevent clobbering object pointers in fields of :class:`numpy.object` type, numpy currently does not allow views of structured arrays containing objects. Structure Comparison -------------------- -If the dtypes of two structured arrays are equivalent, testing the equality of -the arrays will result in a boolean array with the dimension of the original -arrays, with elements set to True where all fields of the corresponding -structures are equal. Structured dtypes are equivalent if the field names, +If the dtypes of two void structured arrays are equal, testing the equality of +the arrays will result in a boolean array with the dimensions of the original +arrays, with elements set to ``True`` where all fields of the corresponding +structures are equal. Structured dtypes are equal if the field names, dtypes and titles are the same, ignoring endianness, and the fields are in the same order:: @@ -483,21 +482,22 @@ the same order:: >>> a == b array([False, False], dtype=bool) -Currently, if the dtypes of two arrays are not equivalent all comparisons will -return ``False``. This behavior is deprecated as of numpy 1.10 and may change -in the future. +Currently, if the dtypes of two void structured arrays are not equivalent the +comparison fails, returning the scalar value ``False``. This behavior is +deprecated as of numpy 1.10 and will raise an error or perform elementwise +comparison in the future. -Currently, the ``<`` and ``>`` operators will always return ``False`` when -comparing structured arrays. Many other pairwise operators are not supported. +The ``<`` and ``>`` operators always return ``False`` when comparing void +structured arrays, and arithmetic and bitwise operations are not supported. Record Arrays ============= As an optional convenience numpy provides an ndarray subclass, :class:`numpy.recarray`, and associated helper functions in the -:mod:`numpy.rec` submodule, which allows access to fields of structured arrays -by attribute, instead of only by index. Record arrays also use a special -datatype, :class:`numpy.record`, which allows field access by attribute on the +:mod:`numpy.rec` submodule, that allows access to fields of structured arrays +by attribute instead of only by index. Record arrays also use a special +datatype, :class:`numpy.record`, that allows field access by attribute on the structured scalars obtained from the array. The simplest way to create a record array is with :func:`numpy.rec.array`:: -- cgit v1.2.1