From c43e0e5c0f2e8dc52cbc1eed71bf93aa281df3d7 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Fri, 5 May 2017 12:36:36 -0400 Subject: DOC: update structured array docs to reflect #6053 [ci skip] --- numpy/doc/structured_arrays.py | 663 +++++++++++++++++++++++++++++------------ 1 file changed, 470 insertions(+), 193 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 5289e6d0b..749018f35 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -6,231 +6,508 @@ Structured Arrays Introduction ============ -NumPy provides powerful capabilities to create arrays of structured datatype. -These arrays permit one to manipulate the data by named fields. A simple -example will show what is meant.: :: +Structured arrays are ndarrays whose datatype is a composition of simpler +datatypes organized as a sequence of named :term:`fields `. For example, +:: - >>> x = np.array([(1,2.,'Hello'), (2,3.,"World")], - ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) + >>> x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], + ... dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]) >>> x - array([(1, 2.0, 'Hello'), (2, 3.0, 'World')], - dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) + array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], + dtype=[('name', 'S10'), ('age', '>> x[1] - (2,3.,"World") + ('Fido', 3, 27.0) -Conveniently, one can access any field of the array by indexing using the -string that names that field. :: +You can access and modify individual fields of a structured array by indexing +with the field name:: - >>> y = x['bar'] - >>> y - array([ 2., 3.], dtype=float32) - >>> y[:] = 2*y - >>> y - array([ 4., 6.], dtype=float32) + >>> x['age'] + array([9, 3], dtype=int32) + >>> x['age'] = 5 >>> x - array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], - dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) + array([('Rex', 5, 81.0), ('Fido', 5, 27.0)], + dtype=[('name', 'S10'), ('age', '` reference page, and in +summary they are: + +1. A list of tuples, one tuple per field + + Each tuple has the form ``(fieldname, datatype, shape)`` where shape is + optional. ``fieldname`` is a string (or tuple if titles are used, see + :ref:`Field Titles ` below), ``datatype`` may be any object + convertible to a datatype, and shape (optional) is a tuple of integers + specifying subarray shape. + + >>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2,2))]) + dtype=[('x', '>> np.dtype([('x', 'f4'),('', 'i4'),('z', 'i8')]) + dtype([('x', '` may be used in a string and separated by + commas. The itemsize and byte offsets of the fields are determined + automatically, and the field names are given the default names ``f0``, + ``f1``, etc. :: + + >>> np.dtype('i8,f4,S3') + dtype([('f0', '>> np.dtype('3int8, float32, (2,3)float64') + dtype([('f0', 'i1', 3), ('f1', '>> np.dtype({'names': ['col1', 'col2'], 'formats': ['i4','f4']}) + dtype([('col1', '>> np.dtype({'names': ['col1', 'col2'], + ... 'formats': ['i4','f4'], + ... 'offsets': [0, 4], + ... 'itemsize': 12}) + dtype({'names':['col1','col2'], 'formats':['`) cannot overlap with other fields, + because of the risk of clobbering the internal object pointer and then + dereferencing it. + + The optional 'aligned' value can be set to ``True`` to make the automatic + offset computation use aligned offsets (see :ref:`offsets-and-alignment`), + as if the 'align' keyword argument of :func:`numpy.dtype` had been set to + True. + + The optional 'titles' value should be a list of titles of the same length + as 'names', see :ref:`Field Titles ` below. + +4. A dictionary of field names + + The use of this form of specification is discouraged, but documented here + because older numpy code may use it. The keys of the dictionary are the + field names and the values are tuples specifying type and offset:: + + >>> np.dtype=({'col1': ('i1',0), 'col2': ('f4',1)}) + dtype([(('col1'), 'i1'), (('col2'), '>f4')]) + + This form is discouraged because Python dictionaries do not preserve order + in Python versions before Python 3.6, and the order of the fields in a + structured dtype has meaning. :ref:`Field Titles ` may be + specified by using a 3-tuple, see below. + +Manipulating and Displaying Structured Datatypes +------------------------------------------------ + +The list of field names of a structured datatype can be found in the ``names`` +attribute of the dtype object:: + + >>> d = np.dtype([('x', 'i8'), ('y', 'f4')]) + >>> d.names + ('x', 'y') + +The field names may be modified by assigning to the ``names`` attribute using a +sequence of strings of the same length. + +The dtype object also has a dictionary-like attribute, ``fields``, whose keys +are the field names (and :ref:`Field Titles `, see below) and whose +values are tuples containing the dtype and byte offset of each field. :: + + >>> d.fields + mappingproxy({'x': (dtype('int64'), 0), 'y': (dtype('float32'), 8)}) + +Both the ``names`` and ``fields`` attributes will equal ``None`` for +unstructured arrays. + +The string representation of a structured datatype is shown in the "list of +tuples" form if possible, otherwise numpy falls back to using the more general +dictionary form. + +.. _offsets-and-alignment: + +Automatic Byte Offsets and Alignment +------------------------------------ + +Numpy uses one of two methods to automatically determine the field byte offsets +and the overall itemsize of a structured datatype, depending on whether +``align=True`` was specified as a keyword argument to :func:`numpy.dtype`. + +By default (with ``align=False``), numpy will pack the fields together tightly +such that each field starts at the byte offset the previous field ended, and the +fields are contiguous in memory. :: + + >>> def print_offsets(d): + ... print("offsets:", [d.fields[name][1] for name in d.names]) + ... print("itemsize:", d.itemsize) + >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2')) + offsets: [0, 1, 2, 6, 7, 15] + itemsize: 17 + +If ``align=True`` is set, numpy will pad the structure in the same way many C +compilers would pad a C-struct. Aligned structures can give a performance +improvement in some cases, at the cost of increased datatype size. Padding +bytes are inserted between fields such that each field's byte offset will be a +multiple of that field's alignment (usually equal to the field's size in bytes +for simple datatypes, see :c:member:`PyArray_Descr.alignment`). +The structure will also have trailing padding added so that its itemsize is a +multiple of the largest field's alignment. :: + + >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2', align=True)) + offsets: [0, 1, 4, 8, 16, 24] + itemsize: 32 + +Note that although almost all modern C compilers pad in this way by default, +padding in C structs is C-implementation-dependent so this memory layout is not +guaranteed to exactly match that of a corresponding struct in a C program. Some +massaging may be needed either on the numpy side or the C side to obtain exact +correspondence. + +If offsets were specified manually using the optional ``offsets`` key in the +dictionary-based dtype specification, setting ``align=True`` will check that +each field's offset is a multiple of its size and that the itemsize is a +multiple of the largest field size, and raise an exception if not. -In these examples, y is a simple float array consisting of the 2nd field -in the structured type. But, rather than being a copy of the data in the structured -array, it is a view, i.e., it shares exactly the same memory locations. -Thus, when we updated this array by doubling its values, the structured -array shows the corresponding values as doubled as well. Likewise, if one -changes the structured array, the field view also changes: :: +If the offsets of the fields and itemsize of a structured array satisfy the +alignment conditions, the array will have the ``ALIGNED`` :ref:`flag +` set. - >>> x[1] = (-1,-1.,"Master") - >>> x - array([(1, 4.0, 'Hello'), (-1, -1.0, 'Master')], - dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) - >>> y - array([ 4., -1.], dtype=float32) - -Defining Structured Arrays -========================== - -One defines a structured array through the dtype object. There are -**several** alternative ways to define the fields of a record. Some of -these variants provide backward compatibility with Numeric, numarray, or -another module, and should not be used except for such purposes. These -will be so noted. One specifies record structure in -one of four alternative ways, using an argument (as supplied to a dtype -function keyword or a dtype object constructor itself). This -argument must be one of the following: 1) string, 2) tuple, 3) list, or -4) dictionary. Each of these is briefly described below. - -1) String argument. -In this case, the constructor expects a comma-separated list of type -specifiers, optionally with extra shape information. The fields are -given the default names 'f0', 'f1', 'f2' and so on. -The type specifiers can take 4 different forms: :: - - a) b1, i1, i2, i4, i8, u1, u2, u4, u8, f2, f4, f8, c8, c16, a - (representing bytes, ints, unsigned ints, floats, complex and - fixed length strings of specified byte lengths) - b) int8,...,uint8,...,float16, float32, float64, complex64, complex128 - (this time with bit sizes) - c) older Numeric/numarray type specifications (e.g. Float32). - Don't use these in new code! - d) Single character type specifiers (e.g H for unsigned short ints). - Avoid using these unless you must. Details can be found in the - NumPy book - -These different styles can be mixed within the same string (but why would you -want to do that?). Furthermore, each type specifier can be prefixed -with a repetition number, or a shape. In these cases an array -element is created, i.e., an array within a record. That array -is still referred to as a single field. An example: :: - - >>> x = np.zeros(3, dtype='3int8, float32, (2,3)float64') - >>> x - array([([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), - ([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), - ([0, 0, 0], 0.0, [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])], - dtype=[('f0', '|i1', 3), ('f1', '>f4'), ('f2', '>f8', (2, 3))]) - -By using strings to define the record structure, it precludes being -able to name the fields in the original definition. The names can -be changed as shown later, however. - -2) Tuple argument: The only relevant tuple case that applies to record -structures is when a structure is mapped to an existing data type. This -is done by pairing in a tuple, the existing data type with a matching -dtype definition (using any of the variants being described here). As -an example (using a definition using a list, so see 3) for further -details): :: - - >>> x = np.zeros(3, dtype=('i4',[('r','u1'), ('g','u1'), ('b','u1'), ('a','u1')])) - >>> x - array([0, 0, 0]) - >>> x['r'] - array([0, 0, 0], dtype=uint8) +.. _titles: -In this case, an array is produced that looks and acts like a simple int32 array, -but also has definitions for fields that use only one byte of the int32 (a bit -like Fortran equivalencing). +Field Titles +------------ -3) List argument: In this case the record structure is defined with a list of -tuples. Each tuple has 2 or 3 elements specifying: 1) The name of the field -('' is permitted), 2) the type of the field, and 3) the shape (optional). -For example:: +In addition to field names, fields may also have an associated :term:`title`, +an alternate name, which is sometimes used as an additional description or +mnemonic for the field. The title may be used to index an array, just like a +fieldname. - >>> x = np.zeros(3, dtype=[('x','f4'),('y',np.float32),('value','f4',(2,2))]) - >>> x - array([(0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]]), - (0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]]), - (0.0, 0.0, [[0.0, 0.0], [0.0, 0.0]])], - dtype=[('x', '>f4'), ('y', '>f4'), ('value', '>f4', (2, 2))]) - -4) Dictionary argument: two different forms are permitted. The first consists -of a dictionary with two required keys ('names' and 'formats'), each having an -equal sized list of values. The format list contains any type/shape specifier -allowed in other contexts. The names must be strings. There are two optional -keys: 'offsets' and 'titles'. Each must be a correspondingly matching list to -the required two where offsets contain integer offsets for each field, and -titles are objects containing metadata for each field (these do not have -to be strings), where the value of None is permitted. As an example: :: - - >>> x = np.zeros(3, dtype={'names':['col1', 'col2'], 'formats':['i4','f4']}) - >>> x - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype=[('col1', '>i4'), ('col2', '>f4')]) +To add titles when using the list-of-tuples form of dtype specification, the +fieldname may be be specified as a tuple of two strings (instead of a single +string), which will be the field's title and field name respectively. For +example:: + + >>> np.dtype([(('my title', 'name'), 'f4')]) + +When using the first form of dictionary-based specification, the titles may be +supplied as an extra ``'titles'`` key as described above. When using the second +(discouraged) dictionary-based specification, the title can be supplied by +providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual +2-element tuple:: + + >>> np.dtype({'name': ('i4', 0, 'my title')}) -The other dictionary form permitted is a dictionary of name keys with tuple -values specifying type, offset, and an optional title. :: +The ``dtype.fields`` dictionary will contain :term:`titles` as keys, if any +titles are used. This means effectively that a field with a title will be +represented twice in the fields dictionary. The tuple values for these fields +will also have a third element, the field title. + +Because of this, and because the ``names`` attribute preserves the field order +while the ``fields`` attribute may not, it is recommended to iterate through +the fields of a dtype using the ``names`` attribute of the dtype (which will +not list titles), as in:: - >>> x = np.zeros(3, dtype={'col1':('i1',0,'title 1'), 'col2':('f4',1,'title 2')}) + >>> for name in d.names: + ... print(d.fields[name][:2]) + +Union types +----------- + +Structured datatypes are implemented in numpy to have base type +:class:`numpy.void` by default, but it is possible to interpret other numpy +types as structured types using the ``(base_dtype, dtype)`` form of dtype +specification described in +:ref:`Data Type Objects `. Here, ``base_dtype`` is +the desired underlying dtype, and fields and flags will be copied from +``dtype``. This dtype is similar to a 'union' in C. + +Indexing and Assignment to Structured arrays +============================================= + +Assigning data to a Structured Array +------------------------------------ + +There are a number of ways to assign values to a structured array: Using python +tuples, using scalar values, or using other structured arrays. + +Assignment from Python Native Types (Tuples) +``````````````````````````````````````````` + +The simplest way to assign values to a structured array is using python +tuples. Each assigned value should be a tuple (and not a list or array, as +these will trigger numpy's broadcasting rules) of length equal to the number of +fields in the array. The tuple's elements are assigned to the successive fields +of the array, from left to right:: + + >>> x = np.array([(1,2,3),(4,5,6)], dtype='i8,f4,f8') + >>> x[1] = (7,8,9) >>> x - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype=[(('title 1', 'col1'), '|i1'), (('title 2', 'col2'), '>f4')]) + array([(1, 2., 3.), (7, 8., 9.)], + dtype=[('f0', '>> x.dtype.names - ('col1', 'col2') - >>> x.dtype.names = ('x', 'y') + >>> x = np.zeros(2, dtype='i8,f4,?,S1') + >>> x[:] = 3 >>> x - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype=[(('title 1', 'x'), '|i1'), (('title 2', 'y'), '>f4')]) - >>> x.dtype.names = ('x', 'y', 'z') # wrong number of names - : must replace all names at once with a sequence of length 2 + array([(3, 3.0, True, b'3'), (3, 3.0, True, b'3')], + dtype=[('f0', '>> x[:] = np.arange(2) + >>> x + array([(0, 0.0, False, b'0'), (1, 1.0, True, b'1')], + dtype=[('f0', '>> x = np.zeros(2, dtype=[('A', 'i4'), ('B', 'i4')]) + >>> y = np.zeros(2, dtype=[('A', 'i4')]) + >>> a = np.zeros(2, dtype='i4') + >>> a[:] = x + ValueError: Can't cast from structure to non-structure, except if the structure only has a single field. + >>> a[:] = y + >>> a + array([0, 0], dtype=int32) + +Assignment from other Structured Arrays +``````````````````````````````````````` + +Assignment between two structured arrays occurs as if the source elements had +been converted to tuples and then assigned to the destination elements. That +is, the first field of the source array is assigned to the first field of the +destination array, and the second field likewise, and so on, regardless of +field names. Structured arrays with a different number of fields cannot be +assigned to each other. Bytes of the destination structure which are not +included in any of the fields are unaffected. :: + + >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'f4'), ('c', 'S3')]) + >>> b = np.ones(3, dtype=[('x', 'f4'), ('y', 'S3'), ('z', 'O')]) + >>> b[:] = a + >>> b + array([(0.0, b'0.0', b''), (0.0, b'0.0', b''), (0.0, b'0.0', b'')], + dtype=[('x', '>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> x['foo'] + array([1, 3]) + >>> x['foo'] = 10 + >>> x + array([(10, 2.), (10, 4.)], + dtype=[('foo', '>> y = x['bar'] + >>> y[:] = 10 + >>> x + array([(10, 5.), (10, 5.)], + dtype=[('foo', '>> y.dtype, y.shape, y.strides + (dtype('float32'), (2,), (12,)) -The field titles provide a standard place to put associated info for fields. -They do not have to be strings. :: +Accessing Multiple Fields +``````````````````````````` - >>> x.dtype.fields['x'][2] - 'title 1' +One can index a structured array with a multi-field index, where the index is a +list of field names:: -Accessing multiple fields at once -==================================== + >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')]) + >>> a[['a', 'c']] + array([(0, 0.0), (0, 0.0), (0, 0.0)], + dtype={'names':['a','c'], 'formats':['>> a[['a', 'c']] = (2, 3) + >>> a + array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)], + dtype=[('a', '>> a[['a', 'c']] = a[['c', 'a']] + +Indexing with an Integer to get a Structured Scalar +``````````````````````````````````````````````````` + +Indexing a single element of a structured array (with an integer index) returns +a structured scalar:: + + >>> x = np.array([(1, 2., 3.)], dtype='i,f,f') + >>> scalar = x[0] + >>> scalar + (1, 2., 3.) + >>> type(scalar) + numpy.void + +Importantly, unlike other numpy scalars, structured scalars are mutable and act +like views into the original array, such that modifying the scalar will modify +the original array. Structured scalars also support access and assignment by +field name:: + + >>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> s = x[0] + >>> s['bar'] = 100 + >>> x + array([(1, 100.), (3, 4.)], + dtype=[('foo', '>> x = np.array([(1.5,2.5,(1.0,2.0)),(3.,4.,(4.,5.)),(1.,3.,(2.,6.))], - dtype=[('x','f4'),('y',np.float32),('value','f4',(2,2))]) + >>> scalar = np.array([(1, 2., 3.)], dtype='i,f,f')[0] + >>> scalar[0] + 1 + >>> scalar[1] = 4 -Notice that `x` is created with a list of tuples. :: +Thus, tuples might be though of as the native Python equivalent to numpy's +structured types, much like native python integers are the equivalent to +numpy's integer types. Structured scalars may be converted to a tuple by +calling :func:`ndarray.item`:: - >>> x[['x','y']] - array([(1.5, 2.5), (3.0, 4.0), (1.0, 3.0)], - dtype=[('x', '>> x[['x','value']] - array([(1.5, [[1.0, 2.0], [1.0, 2.0]]), (3.0, [[4.0, 5.0], [4.0, 5.0]]), - (1.0, [[2.0, 6.0], [2.0, 6.0]])], - dtype=[('x', '>> scalar.item(), type(scalar.item()) + ((1, 2.0, 3.0), tuple) -The fields are returned in the order they are asked for.:: +Viewing Structured Arrays Containing Objects +-------------------------------------------- - >>> x[['y','x']] - array([(2.5, 1.5), (4.0, 3.0), (3.0, 1.0)], - dtype=[('y', '>> arr = np.zeros((5,), dtype=[('var1','f8'),('var2','f8')]) - >>> arr['var1'] = np.arange(5) + >>> a = np.zeros(2, dtype=[('a', 'i4'), ('b', 'i4')]) + >>> b = np.ones(2, dtype=[('a', 'i4'), ('b', 'i4')]) + >>> a == b + array([False, False], dtype=bool) -If you fill it in row by row, it takes a take a tuple -(but not a list or array!):: +Currently, if the dtypes of two arrays are not equivalent all comparisons will +return ``False``. This behavior is deprecated as of numpy 1.10 and may change +in the future. - >>> arr[0] = (10,20) - >>> arr - array([(10.0, 20.0), (1.0, 0.0), (2.0, 0.0), (3.0, 0.0), (4.0, 0.0)], - dtype=[('var1', '`` operators will always return ``False`` when +comparing structured arrays. Many other pairwise operators are not supported. Record Arrays ============= -For convenience, numpy provides "record arrays" which allow one to access -fields of structured arrays by attribute rather than by index. Record arrays -are structured arrays wrapped using a subclass of ndarray, -:class:`numpy.recarray`, which allows field access by attribute on the array -object, and record arrays also use a special datatype, :class:`numpy.record`, -which allows field access by attribute on the individual elements of the array. +As an optional convenience numpy provides an ndarray subclass, +:class:`numpy.recarray`, and associated helper functions in the +:mod:`numpy.rec` submodule, which allows access to fields of structured arrays +by attribute, instead of only by index. Record arrays also use a special +datatype, :class:`numpy.record`, which allows field access by attribute on the +structured scalars obtained from the array. -The simplest way to create a record array is with :func:`numpy.rec.array`: :: +The simplest way to create a record array is with :func:`numpy.rec.array`:: - >>> recordarr = np.rec.array([(1,2.,'Hello'),(2,3.,"World")], + >>> recordarr = np.rec.array([(1,2.,'Hello'),(2,3.,"World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) >>> recordarr.bar array([ 2., 3.], dtype=float32) >>> recordarr[1:2] - rec.array([(2, 3.0, 'World')], + rec.array([(2, 3.0, 'World')], dtype=[('foo', '>> recordarr[1:2].foo array([2], dtype=int32) @@ -239,27 +516,28 @@ The simplest way to create a record array is with :func:`numpy.rec.array`: :: >>> recordarr[1].baz 'World' -numpy.rec.array can convert a wide variety of arguments into record arrays, -including normal structured arrays: :: +:func:`numpy.rec.array` can convert a wide variety of arguments into record +arrays, including structured arrays:: - >>> arr = array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = array([(1,2.,'Hello'),(2,3.,"World")], ... dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')]) >>> recordarr = np.rec.array(arr) -The numpy.rec module provides a number of other convenience functions for +The :mod:`numpy.rec` module provides a number of other convenience functions for creating record arrays, see :ref:`record array creation routines `. A record array representation of a structured array can be obtained using the -appropriate :ref:`view`: :: +appropriate :ref:`view`:: - >>> arr = np.array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = np.array([(1,2.,'Hello'),(2,3.,"World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) - >>> recordarr = arr.view(dtype=dtype((np.record, arr.dtype)), + >>> recordarr = arr.view(dtype=dtype((np.record, arr.dtype)), ... type=np.recarray) -For convenience, viewing an ndarray as type `np.recarray` will automatically -convert to `np.record` datatype, so the dtype can be left out of the view: :: +For convenience, viewing an ndarray as type :class:`np.recarray` will +automatically convert to :class:`np.record` datatype, so the dtype can be left +out of the view:: >>> recordarr = arr.view(np.recarray) >>> recordarr.dtype @@ -267,14 +545,14 @@ convert to `np.record` datatype, so the dtype can be left out of the view: :: To get back to a plain ndarray both the dtype and type must be reset. The following view does so, taking into account the unusual case that the -recordarr was not a structured type: :: +recordarr was not a structured type:: >>> arr2 = recordarr.view(recordarr.dtype.fields or recordarr.dtype, np.ndarray) Record array fields accessed by index or by attribute are returned as a record array if the field has a structured type but as a plain ndarray otherwise. :: - >>> recordarr = np.rec.array([('Hello', (1,2)),("World", (3,4))], + >>> recordarr = np.rec.array([('Hello', (1,2)),("World", (3,4))], ... dtype=[('foo', 'S6'),('bar', [('A', int), ('B', int)])]) >>> type(recordarr.foo) @@ -283,8 +561,7 @@ array if the field has a structured type but as a plain ndarray otherwise. :: Note that if a field has the same name as an ndarray attribute, the ndarray attribute takes precedence. Such fields will be inaccessible by attribute but -may still be accessed by index. - +will still be accessible by index. """ from __future__ import division, absolute_import, print_function -- cgit v1.2.1 From a08da3f34e8c1fe7e8997f254acf1c92eea520f9 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Thu, 9 Nov 2017 21:41:17 -0500 Subject: DOC: update structured array docs to reflect #6053, fixups [ci skip] --- numpy/doc/structured_arrays.py | 112 ++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 749018f35..65558a5a0 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -62,7 +62,7 @@ structure. The datatype of a field may be any numpy datatype including other structured datatypes, and it may also be a :term:`sub-array` which behaves like an ndarray of a specified shape. The offsets of the fields are arbitrary, and fields may even overlap. These offsets are usually determined automatically by -numpy, but can also be manually specified. +numpy, but can also be specified. Structured Datatype Creation ---------------------------- @@ -78,8 +78,8 @@ summary they are: Each tuple has the form ``(fieldname, datatype, shape)`` where shape is optional. ``fieldname`` is a string (or tuple if titles are used, see :ref:`Field Titles ` below), ``datatype`` may be any object - convertible to a datatype, and shape (optional) is a tuple of integers - specifying subarray shape. + convertible to a datatype, and ``shape`` is a tuple of integers specifying + subarray shape. >>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2,2))]) dtype=[('x', '>> def print_offsets(d): ... print("offsets:", [d.fields[name][1] for name in d.names]) @@ -211,9 +211,9 @@ If ``align=True`` is set, numpy will pad the structure in the same way many C compilers would pad a C-struct. Aligned structures can give a performance improvement in some cases, at the cost of increased datatype size. Padding bytes are inserted between fields such that each field's byte offset will be a -multiple of that field's alignment (usually equal to the field's size in bytes -for simple datatypes, see :c:member:`PyArray_Descr.alignment`). -The structure will also have trailing padding added so that its itemsize is a +multiple of that field's alignment, which is usually equal to the field's size +in bytes for simple datatypes, see :c:member:`PyArray_Descr.alignment`. The +structure will also have trailing padding added so that its itemsize is a multiple of the largest field's alignment. :: >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2', align=True)) @@ -223,10 +223,10 @@ multiple of the largest field's alignment. :: Note that although almost all modern C compilers pad in this way by default, padding in C structs is C-implementation-dependent so this memory layout is not guaranteed to exactly match that of a corresponding struct in a C program. Some -massaging may be needed either on the numpy side or the C side to obtain exact +work may be needed, either on the numpy side or the C side, to obtain exact correspondence. -If offsets were specified manually using the optional ``offsets`` key in the +If offsets were specified using the optional ``offsets`` key in the dictionary-based dtype specification, setting ``align=True`` will check that each field's offset is a multiple of its size and that the itemsize is a multiple of the largest field size, and raise an exception if not. @@ -242,12 +242,12 @@ Field Titles In addition to field names, fields may also have an associated :term:`title`, an alternate name, which is sometimes used as an additional description or -mnemonic for the field. The title may be used to index an array, just like a -fieldname. +alias for the field. The title may be used to index an array, just like a +field name. To add titles when using the list-of-tuples form of dtype specification, the -fieldname may be be specified as a tuple of two strings (instead of a single -string), which will be the field's title and field name respectively. For +field name may be be specified as a tuple of two strings instead of a single +string, which will be the field's title and field name respectively. For example:: >>> np.dtype([(('my title', 'name'), 'f4')]) @@ -263,12 +263,11 @@ providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual The ``dtype.fields`` dictionary will contain :term:`titles` as keys, if any titles are used. This means effectively that a field with a title will be represented twice in the fields dictionary. The tuple values for these fields -will also have a third element, the field title. - -Because of this, and because the ``names`` attribute preserves the field order -while the ``fields`` attribute may not, it is recommended to iterate through -the fields of a dtype using the ``names`` attribute of the dtype (which will -not list titles), as in:: +will also have a third element, the field title. Because of this, and because +the ``names`` attribute preserves the field order while the ``fields`` +attribute may not, it is recommended to iterate through the fields of a dtype +using the ``names`` attribute of the dtype, which will not list titles, as +in:: >>> for name in d.names: ... print(d.fields[name][:2]) @@ -296,10 +295,10 @@ tuples, using scalar values, or using other structured arrays. Assignment from Python Native Types (Tuples) ``````````````````````````````````````````` -The simplest way to assign values to a structured array is using python -tuples. Each assigned value should be a tuple (and not a list or array, as -these will trigger numpy's broadcasting rules) of length equal to the number of -fields in the array. The tuple's elements are assigned to the successive fields +The simplest way to assign values to a structured array is using python tuples. +Each assigned value should be a tuple of length equal to the number of fields +in the array, and not a list or array as these will trigger numpy's +broadcasting rules. The tuple's elements are assigned to the successive fields of the array, from left to right:: >>> x = np.array([(1,2,3),(4,5,6)], dtype='i8,f4,f8') @@ -312,8 +311,8 @@ Assignment from Scalars ``````````````````````` A scalar assigned to a structured element will be assigned to all fields. This -happens when a scalar is assigned to a structured array, or when a scalar array -is assigned to a structured array:: +happens when a scalar is assigned to a structured array, or when an +unstructured array is assigned to a structured array:: >>> x = np.zeros(2, dtype='i8,f4,?,S1') >>> x[:] = 3 @@ -325,16 +324,16 @@ is assigned to a structured array:: array([(0, 0.0, False, b'0'), (1, 1.0, True, b'1')], dtype=[('f0', '>> x = np.zeros(2, dtype=[('A', 'i4'), ('B', 'i4')]) - >>> y = np.zeros(2, dtype=[('A', 'i4')]) - >>> a = np.zeros(2, dtype='i4') - >>> a[:] = x + >>> twofield = np.zeros(2, dtype=[('A', 'i4'), ('B', 'i4')]) + >>> onefield = np.zeros(2, dtype=[('A', 'i4')]) + >>> nostruct = np.zeros(2, dtype='i4') + >>> nostruct[:] = twofield ValueError: Can't cast from structure to non-structure, except if the structure only has a single field. - >>> a[:] = y - >>> a + >>> nostruct[:] = onefield + >>> nostruct array([0, 0], dtype=int32) Assignment from other Structured Arrays @@ -389,7 +388,7 @@ memory locations and writing to the view will modify the original array. :: dtype=[('foo', '>> y.dtype, y.shape, y.strides (dtype('float32'), (2,), (12,)) @@ -410,12 +409,12 @@ list of field names:: dtype=[('a', '>> type(scalar) numpy.void -Importantly, unlike other numpy scalars, structured scalars are mutable and act -like views into the original array, such that modifying the scalar will modify -the original array. Structured scalars also support access and assignment by -field name:: +Unlike other numpy scalars, structured scalars are mutable and act like views +into the original array, such that modifying the scalar will modify the +original array. Structured scalars also support access and assignment by field +name:: >>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) >>> s = x[0] @@ -453,7 +452,7 @@ Similarly to tuples, structured scalars can also be indexed with an integer:: 1 >>> scalar[1] = 4 -Thus, tuples might be though of as the native Python equivalent to numpy's +Thus, tuples might be thought of as the native Python equivalent to numpy's structured types, much like native python integers are the equivalent to numpy's integer types. Structured scalars may be converted to a tuple by calling :func:`ndarray.item`:: @@ -464,17 +463,17 @@ calling :func:`ndarray.item`:: Viewing Structured Arrays Containing Objects -------------------------------------------- -In order to prevent clobbering of object pointers in fields of +In order to prevent clobbering object pointers in fields of :class:`numpy.object` type, numpy currently does not allow views of structured arrays containing objects. Structure Comparison -------------------- -If the dtypes of two structured arrays are equivalent, testing the equality of -the arrays will result in a boolean array with the dimension of the original -arrays, with elements set to True where all fields of the corresponding -structures are equal. Structured dtypes are equivalent if the field names, +If the dtypes of two void structured arrays are equal, testing the equality of +the arrays will result in a boolean array with the dimensions of the original +arrays, with elements set to ``True`` where all fields of the corresponding +structures are equal. Structured dtypes are equal if the field names, dtypes and titles are the same, ignoring endianness, and the fields are in the same order:: @@ -483,21 +482,22 @@ the same order:: >>> a == b array([False, False], dtype=bool) -Currently, if the dtypes of two arrays are not equivalent all comparisons will -return ``False``. This behavior is deprecated as of numpy 1.10 and may change -in the future. +Currently, if the dtypes of two void structured arrays are not equivalent the +comparison fails, returning the scalar value ``False``. This behavior is +deprecated as of numpy 1.10 and will raise an error or perform elementwise +comparison in the future. -Currently, the ``<`` and ``>`` operators will always return ``False`` when -comparing structured arrays. Many other pairwise operators are not supported. +The ``<`` and ``>`` operators always return ``False`` when comparing void +structured arrays, and arithmetic and bitwise operations are not supported. Record Arrays ============= As an optional convenience numpy provides an ndarray subclass, :class:`numpy.recarray`, and associated helper functions in the -:mod:`numpy.rec` submodule, which allows access to fields of structured arrays -by attribute, instead of only by index. Record arrays also use a special -datatype, :class:`numpy.record`, which allows field access by attribute on the +:mod:`numpy.rec` submodule, that allows access to fields of structured arrays +by attribute instead of only by index. Record arrays also use a special +datatype, :class:`numpy.record`, that allows field access by attribute on the structured scalars obtained from the array. The simplest way to create a record array is with :func:`numpy.rec.array`:: -- cgit v1.2.1 From ac6b1a902b99e340cf7eeeeb7392c91e38db9dd8 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 13 Nov 2017 23:45:45 -0800 Subject: ENH: don't show boolean dtype, as it is implied --- numpy/doc/structured_arrays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 65558a5a0..02581d01b 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -480,7 +480,7 @@ the same order:: >>> a = np.zeros(2, dtype=[('a', 'i4'), ('b', 'i4')]) >>> b = np.ones(2, dtype=[('a', 'i4'), ('b', 'i4')]) >>> a == b - array([False, False], dtype=bool) + array([False, False]) Currently, if the dtypes of two void structured arrays are not equivalent the comparison fails, returning the scalar value ``False``. This behavior is -- cgit v1.2.1 From 4740f15df91a779ccf2e923367a792ddcee5816e Mon Sep 17 00:00:00 2001 From: "luz.paz" Date: Mon, 12 Feb 2018 18:50:27 -0500 Subject: Misc. typos Found via `codespell -q 3 -I ../numpy-whitelist.txt` --- numpy/doc/structured_arrays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 02581d01b..af02e2173 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -246,7 +246,7 @@ alias for the field. The title may be used to index an array, just like a field name. To add titles when using the list-of-tuples form of dtype specification, the -field name may be be specified as a tuple of two strings instead of a single +field name may be specified as a tuple of two strings instead of a single string, which will be the field's title and field name respectively. For example:: -- cgit v1.2.1 From 1eef2af85df832b55c856935a820889cdee83581 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 11 Apr 2018 14:23:07 +0300 Subject: formatting fixes --- numpy/doc/structured_arrays.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index af02e2173..ba667da59 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -284,7 +284,7 @@ the desired underlying dtype, and fields and flags will be copied from ``dtype``. This dtype is similar to a 'union' in C. Indexing and Assignment to Structured arrays -============================================= +============================================ Assigning data to a Structured Array ------------------------------------ @@ -293,7 +293,7 @@ There are a number of ways to assign values to a structured array: Using python tuples, using scalar values, or using other structured arrays. Assignment from Python Native Types (Tuples) -``````````````````````````````````````````` +```````````````````````````````````````````` The simplest way to assign values to a structured array is using python tuples. Each assigned value should be a tuple of length equal to the number of fields -- cgit v1.2.1 From e08eced7990fbdcecb2bd81d3fc736f69bad6dfd Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sun, 10 Jun 2018 21:54:21 -0400 Subject: MAINT: push back multifield copy->view changes to 1.16 --- numpy/doc/structured_arrays.py | 74 ++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 18 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index ba667da59..ab97c5df6 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -133,10 +133,9 @@ summary they are: Offsets may be chosen such that the fields overlap, though this will mean that assigning to one field may clobber any overlapping field's data. As - an exception, fields of :class:`numpy.object` type .. (see - :ref:`object arrays `) cannot overlap with other fields, - because of the risk of clobbering the internal object pointer and then - dereferencing it. + an exception, fields of :class:`numpy.object` type cannot overlap with + other fields, because of the risk of clobbering the internal object + pointer and then dereferencing it. The optional 'aligned' value can be set to ``True`` to make the automatic offset computation use aligned offsets (see :ref:`offsets-and-alignment`), @@ -235,6 +234,11 @@ If the offsets of the fields and itemsize of a structured array satisfy the alignment conditions, the array will have the ``ALIGNED`` :ref:`flag ` set. +A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an +aligned dtype or array to a packed one and vice versa. It takes either a dtype +or structured ndarray as an argument, and returns a copy with fields re-packed, +with or without padding bytes. + .. _titles: Field Titles @@ -396,27 +400,61 @@ typically a non-structured array, except in the case of nested structures. Accessing Multiple Fields ``````````````````````````` -One can index a structured array with a multi-field index, where the index is a -list of field names:: +One can index and assign to a structured array with a multi-field index, where +the index is a list of field names. + +.. warning:: + The behavior of multi-field indexes will change from Numpy 1.15 to Numpy + 1.16. - >>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')]) +In Numpy 1.16, the result of indexing with a multi-field index will be a view +into the original array, as follows:: + + >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) >>> a[['a', 'c']] - array([(0, 0.0), (0, 0.0), (0, 0.0)], - dtype={'names':['a','c'], 'formats':['>> a[['a','c']].view('i8') # will fail in Numpy 1.16 + ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + + will need to be changed. This code has raised a ``FutureWarning`` since + Numpy 1.12. + + The following is a recommended fix, which will behave identically in Numpy + 1.15 and Numpy 1.16:: + + >>> from numpy.lib.recfunctions import repack_fields + >>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16 + array([0, 0, 0]) + +Assigning to an array with a multi-field index will behave the same in Numpy +1.15 and Numpy 1.16. In both versions the assignment will modify the original +array:: + >>> a[['a', 'c']] = (2, 3) >>> a array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)], dtype=[('a', '>> a[['a', 'c']] = a[['c', 'a']] -- cgit v1.2.1 From f1fba70edd1829c64e3290fa6b1a20d01e9d9674 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Tue, 30 Jan 2018 19:15:54 -0500 Subject: ENH: add multi-field assignment helpers in np.lib.recfunctions Adds helper functions for the copy->view transition for multi-field indexes. Adds `structured_to_unstructured`, `apply_along_fields`, `assign_fields_by_name`, `require_fields`. --- numpy/doc/structured_arrays.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index ab97c5df6..42711a7c0 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -443,6 +443,15 @@ behavior since Numpy 1.7. >>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16 array([0, 0, 0]) + The :module:`numpy.lib.recfunctions` module has other new methods + introduced in numpy 1.16 to help users account for this change. These are + :func:`numpy.lib.recfunctions.structured_to_unstructured`, + :func:`numpy.lib.recfunctions.unstructured_to_structured`, + :func:`numpy.lib.recfunctions.apply_along_fields`, + :func:`numpy.lib.recfunctions.assign_fields_by_name`, and + :func:`numpy.lib.recfunctions.require_fields`. + + Assigning to an array with a multi-field index will behave the same in Numpy 1.15 and Numpy 1.16. In both versions the assignment will modify the original array:: -- cgit v1.2.1 From 845def00c85f9f40cfa64e6dabb4158bebd502f4 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Thu, 22 Nov 2018 19:47:58 -0500 Subject: ENH: add back the multifield copy->view change Fixes #10409 Closes #11530 --- numpy/doc/structured_arrays.py | 108 +++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 46 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 42711a7c0..0fcdecf00 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -35,26 +35,24 @@ with the field name:: array([('Rex', 5, 81.0), ('Fido', 5, 27.0)], dtype=[('name', 'S10'), ('age', '>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) >>> a[['a', 'c']] @@ -420,41 +419,58 @@ in the order they were indexed. Note that unlike for single-field indexing, the view's dtype has the same itemsize as the original array, and has fields at the same offsets as in the original array, and unindexed fields are merely missing. -In Numpy 1.15, indexing an array with a multi-field index returns a copy of -the result above for 1.16, but with fields packed together in memory as if -passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the -behavior since Numpy 1.7. - .. warning:: - The new behavior in Numpy 1.16 leads to extra "padding" bytes at the - location of unindexed fields. You will need to update any code which depends - on the data having a "packed" layout. For instance code such as:: + In Numpy 1.15, indexing an array with a multi-field index returned a copy of + the result above, but with fields packed together in memory as if + passed through :func:`numpy.lib.recfunctions.repack_fields`. + + The new behavior as of Numpy 1.16 leads to extra "padding" bytes at the + location of unindexed fields compared to 1.15. You will need to update any + code which depends on the data having a "packed" layout. For instance code + such as:: + + >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) + >>> a[['a','c']].view('i8') # Fails in Numpy 1.16 + ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + + will need to be changed. This code has raised a ``FutureWarning`` since + Numpy 1.12, and similar code has raised ``FutureWarning`` since 1.7. + + In 1.16 a number of functions have been introduced in the + :module:`numpy.lib.recfunctions` module to help users account for this + change. These are + :func:`numpy.lib.recfunctions.repack_fields`. + :func:`numpy.lib.recfunctions.structured_to_unstructured`, + :func:`numpy.lib.recfunctions.unstructured_to_structured`, + :func:`numpy.lib.recfunctions.apply_along_fields`, + :func:`numpy.lib.recfunctions.assign_fields_by_name`, and + :func:`numpy.lib.recfunctions.require_fields`. - >>> a[['a','c']].view('i8') # will fail in Numpy 1.16 - ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + The function :func:`numpy.lib.recfunctions.repack_fields` can always be + used to reproduce the old behavior, as it will return a packed copy of the + structured array. The code above, for example, can be replaced with: - will need to be changed. This code has raised a ``FutureWarning`` since - Numpy 1.12. + >>> repack_fields(a[['a','c']]).view('i8') # supported in 1.16 + array([0, 0, 0]) - The following is a recommended fix, which will behave identically in Numpy - 1.15 and Numpy 1.16:: + Furthermore, numpy now provides a new function + :func:`numpy.lib.recfunctions.structured_to_unstructured` which is a safer + and more efficient alternative for users who wish to convert structured + arrays to unstructured arrays, as the view above is often indeded to do. + This function allows safe conversion to an unstructured type taking into + account padding, often avoids a copy, and also casts the datatypes + as needed, unlike the view. Code such as: - >>> from numpy.lib.recfunctions import repack_fields - >>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16 - array([0, 0, 0]) + >>> a = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) + >>> a[['x', 'z']].view('f4') - The :module:`numpy.lib.recfunctions` module has other new methods - introduced in numpy 1.16 to help users account for this change. These are - :func:`numpy.lib.recfunctions.structured_to_unstructured`, - :func:`numpy.lib.recfunctions.unstructured_to_structured`, - :func:`numpy.lib.recfunctions.apply_along_fields`, - :func:`numpy.lib.recfunctions.assign_fields_by_name`, and - :func:`numpy.lib.recfunctions.require_fields`. + can be made safer by replacing with: + + >>> structured_to_unstructured(a[['x', 'z']]) + array([0, 0, 0]) -Assigning to an array with a multi-field index will behave the same in Numpy -1.15 and Numpy 1.16. In both versions the assignment will modify the original -array:: +Assignment to an array with a multi-field index modifies the original array:: >>> a[['a', 'c']] = (2, 3) >>> a -- cgit v1.2.1 From 2981ed622501ac48c54c4efdf06a84084b70ff66 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sat, 15 Dec 2018 11:46:22 -0500 Subject: DOC: more doc updates for structured arrays [ci skip] --- numpy/doc/structured_arrays.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 0fcdecf00..e92a06124 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -397,6 +397,15 @@ typically a non-structured array, except in the case of nested structures. >>> y.dtype, y.shape, y.strides (dtype('float32'), (2,), (12,)) +If the accessed field is a subarray, the dimensions of the subarray +are appended to the shape of the result:: + + >>> x = np.zeros((2,2), dtype=[('a', np.int32), ('b', np.float64, (3,3))]) + >>> x['a'].shape + (2, 2) + >>> x['b'].shape + (2, 2, 3, 3) + Accessing Multiple Fields ``````````````````````````` -- cgit v1.2.1 From db6f50b27f2f705bd7ace8420a7204ff50872296 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 Jan 2019 21:22:46 +0200 Subject: DOC: doctest structured array introduction --- numpy/doc/structured_arrays.py | 106 ++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 48 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index e92a06124..da3a74bd6 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -13,8 +13,8 @@ datatypes organized as a sequence of named :term:`fields `. For example, >>> x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], ... dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]) >>> x - array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], - dtype=[('name', 'S10'), ('age', '>> x['age'] = 5 >>> x - array([('Rex', 5, 81.0), ('Fido', 5, 27.0)], - dtype=[('name', 'S10'), ('age', '>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2,2))]) - dtype=[('x', '>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2, 2))]) + dtype([('x', '>> np.dtype([('x', 'f4'),('', 'i4'),('z', 'i8')]) + >>> np.dtype([('x', 'f4'), ('', 'i4'), ('z', 'i8')]) dtype([('x', '>> np.dtype('i8,f4,S3') + >>> np.dtype('i8, f4, S3') dtype([('f0', '>> np.dtype('3int8, float32, (2,3)float64') - dtype([('f0', 'i1', 3), ('f1', '>> np.dtype('3int8, float32, (2, 3)float64') + dtype([('f0', 'i1', (3,)), ('f1', '>> np.dtype({'names': ['col1', 'col2'], 'formats': ['i4','f4']}) + >>> np.dtype({'names': ['col1', 'col2'], 'formats': ['i4', 'f4']}) dtype([('col1', '>> np.dtype({'names': ['col1', 'col2'], - ... 'formats': ['i4','f4'], + ... 'formats': ['i4', 'f4'], ... 'offsets': [0, 4], ... 'itemsize': 12}) dtype({'names':['col1','col2'], 'formats':['>> np.dtype=({'col1': ('i1',0), 'col2': ('f4',1)}) - dtype([(('col1'), 'i1'), (('col2'), '>f4')]) + >>> np.dtype({'col1': ('i1', 0), 'col2': ('f4', 1)}) + dtype([('col1', 'i1'), ('col2', '>> def print_offsets(d): ... print("offsets:", [d.fields[name][1] for name in d.names]) ... print("itemsize:", d.itemsize) - >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2')) + >>> print_offsets(np.dtype('u1, u1, i4, u1, i8, u2')) offsets: [0, 1, 2, 6, 7, 15] itemsize: 17 @@ -215,7 +215,7 @@ in bytes for simple datatypes, see :c:member:`PyArray_Descr.alignment`. The structure will also have trailing padding added so that its itemsize is a multiple of the largest field's alignment. :: - >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2', align=True)) + >>> print_offsets(np.dtype('u1, u1, i4, u1, i8, u2', align=True)) offsets: [0, 1, 4, 8, 16, 24] itemsize: 32 @@ -255,6 +255,7 @@ string, which will be the field's title and field name respectively. For example:: >>> np.dtype([(('my title', 'name'), 'f4')]) + dtype([(('my title', 'name'), '>> np.dtype({'name': ('i4', 0, 'my title')}) + dtype([(('my title', 'name'), '>> for name in d.names: ... print(d.fields[name][:2]) + (dtype('int64'), 0) + (dtype('float32'), 8) Union types ----------- @@ -305,8 +309,8 @@ in the array, and not a list or array as these will trigger numpy's broadcasting rules. The tuple's elements are assigned to the successive fields of the array, from left to right:: - >>> x = np.array([(1,2,3),(4,5,6)], dtype='i8,f4,f8') - >>> x[1] = (7,8,9) + >>> x = np.array([(1, 2, 3), (4, 5, 6)], dtype='i8, f4, f8') + >>> x[1] = (7, 8, 9) >>> x array([(1, 2., 3.), (7, 8., 9.)], dtype=[('f0', '>> x = np.zeros(2, dtype='i8,f4,?,S1') + >>> x = np.zeros(2, dtype='i8, f4, ?, S1') >>> x[:] = 3 >>> x - array([(3, 3.0, True, b'3'), (3, 3.0, True, b'3')], + array([(3, 3., True, b'3'), (3, 3., True, b'3')], dtype=[('f0', '>> x[:] = np.arange(2) >>> x - array([(0, 0.0, False, b'0'), (1, 1.0, True, b'1')], + array([(0, 0., False, b'0'), (1, 1., True, b'1')], dtype=[('f0', '>> onefield = np.zeros(2, dtype=[('A', 'i4')]) >>> nostruct = np.zeros(2, dtype='i4') >>> nostruct[:] = twofield + Traceback (most recent call last): + File "", line 1, in ValueError: Can't cast from structure to non-structure, except if the structure only has a single field. >>> nostruct[:] = onefield >>> nostruct @@ -355,7 +361,7 @@ included in any of the fields are unaffected. :: >>> b = np.ones(3, dtype=[('x', 'f4'), ('y', 'S3'), ('z', 'O')]) >>> b[:] = a >>> b - array([(0.0, b'0.0', b''), (0.0, b'0.0', b''), (0.0, b'0.0', b'')], + array([(0., b'0.0', b''), (0., b'0.0', b''), (0., b'0.0', b'')], dtype=[('x', '>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> x = np.array([(1, 2), (3, 4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) >>> x['foo'] array([1, 3]) >>> x['foo'] = 10 @@ -386,9 +392,9 @@ The resulting array is a view into the original array. It shares the same memory locations and writing to the view will modify the original array. :: >>> y = x['bar'] - >>> y[:] = 10 + >>> y[:] = 11 >>> x - array([(10, 5.), (10, 5.)], + array([(10, 11.), (10, 11.)], dtype=[('foo', '>> x = np.zeros((2,2), dtype=[('a', np.int32), ('b', np.float64, (3,3))]) + >>> x = np.zeros((2, 2), dtype=[('a', np.int32), ('b', np.float64, (3, 3))]) >>> x['a'].shape (2, 2) >>> x['b'].shape @@ -438,8 +444,9 @@ same offsets as in the original array, and unindexed fields are merely missing. code which depends on the data having a "packed" layout. For instance code such as:: - >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) - >>> a[['a','c']].view('i8') # Fails in Numpy 1.16 + >>> a[['a', 'c']].view('i8') # Fails in Numpy 1.16 + Traceback (most recent call last): + File "", line 1, in ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype will need to be changed. This code has raised a ``FutureWarning`` since @@ -459,7 +466,8 @@ same offsets as in the original array, and unindexed fields are merely missing. used to reproduce the old behavior, as it will return a packed copy of the structured array. The code above, for example, can be replaced with: - >>> repack_fields(a[['a','c']]).view('i8') # supported in 1.16 + >>> from numpy.lib.recfunctions import repack_fields + >>> repack_fields(a[['a', 'c']]).view('i8') # supported in 1.16 array([0, 0, 0]) Furthermore, numpy now provides a new function @@ -470,12 +478,14 @@ same offsets as in the original array, and unindexed fields are merely missing. account padding, often avoids a copy, and also casts the datatypes as needed, unlike the view. Code such as: - >>> a = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) - >>> a[['x', 'z']].view('f4') + >>> b = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) + >>> b[['x', 'z']].view('f4') + array([0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32) can be made safer by replacing with: - >>> structured_to_unstructured(a[['x', 'z']]) + >>> from numpy.lib.recfunctions import structured_to_unstructured + >>> structured_to_unstructured(b[['x', 'z']]) array([0, 0, 0]) @@ -483,8 +493,8 @@ Assignment to an array with a multi-field index modifies the original array:: >>> a[['a', 'c']] = (2, 3) >>> a - array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)], - dtype=[('a', '>> x = np.array([(1, 2., 3.)], dtype='i,f,f') + >>> x = np.array([(1, 2., 3.)], dtype='i, f, f') >>> scalar = x[0] >>> scalar (1, 2., 3.) >>> type(scalar) - numpy.void + Unlike other numpy scalars, structured scalars are mutable and act like views into the original array, such that modifying the scalar will modify the original array. Structured scalars also support access and assignment by field name:: - >>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> x = np.array([(1, 2), (3, 4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) >>> s = x[0] >>> s['bar'] = 100 >>> x @@ -519,7 +529,7 @@ name:: Similarly to tuples, structured scalars can also be indexed with an integer:: - >>> scalar = np.array([(1, 2., 3.)], dtype='i,f,f')[0] + >>> scalar = np.array([(1, 2., 3.)], dtype='i, f, f')[0] >>> scalar[0] 1 >>> scalar[1] = 4 @@ -530,7 +540,7 @@ numpy's integer types. Structured scalars may be converted to a tuple by calling :func:`ndarray.item`:: >>> scalar.item(), type(scalar.item()) - ((1, 2.0, 3.0), tuple) + ((1, 4.0, 3.0), ) Viewing Structured Arrays Containing Objects -------------------------------------------- @@ -574,24 +584,24 @@ structured scalars obtained from the array. The simplest way to create a record array is with :func:`numpy.rec.array`:: - >>> recordarr = np.rec.array([(1,2.,'Hello'),(2,3.,"World")], + >>> recordarr = np.rec.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) >>> recordarr.bar array([ 2., 3.], dtype=float32) >>> recordarr[1:2] - rec.array([(2, 3.0, 'World')], + rec.array([(2, 3., b'World')], dtype=[('foo', '>> recordarr[1:2].foo array([2], dtype=int32) >>> recordarr.foo[1:2] array([2], dtype=int32) >>> recordarr[1].baz - 'World' + b'World' :func:`numpy.rec.array` can convert a wide variety of arguments into record arrays, including structured arrays:: - >>> arr = array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')]) >>> recordarr = np.rec.array(arr) @@ -602,9 +612,9 @@ creating record arrays, see :ref:`record array creation routines A record array representation of a structured array can be obtained using the appropriate :ref:`view`:: - >>> arr = np.array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) - >>> recordarr = arr.view(dtype=dtype((np.record, arr.dtype)), + >>> recordarr = arr.view(dtype=np.dtype((np.record, arr.dtype)), ... type=np.recarray) For convenience, viewing an ndarray as type :class:`np.recarray` will @@ -624,12 +634,12 @@ recordarr was not a structured type:: Record array fields accessed by index or by attribute are returned as a record array if the field has a structured type but as a plain ndarray otherwise. :: - >>> recordarr = np.rec.array([('Hello', (1,2)),("World", (3,4))], + >>> recordarr = np.rec.array([('Hello', (1, 2)), ("World", (3, 4))], ... dtype=[('foo', 'S6'),('bar', [('A', int), ('B', int)])]) >>> type(recordarr.foo) - + >>> type(recordarr.bar) - + Note that if a field has the same name as an ndarray attribute, the ndarray attribute takes precedence. Such fields will be inaccessible by attribute but -- cgit v1.2.1 From 62433284d65a3629a199958da2df3a807c60fab4 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 20 Feb 2019 23:46:20 +0200 Subject: DOC: reduce warnings when building, reword, tweak doc building --- numpy/doc/structured_arrays.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index da3a74bd6..c3605b49a 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -57,7 +57,7 @@ A structured datatype can be thought of as a sequence of bytes of a certain length (the structure's :term:`itemsize`) which is interpreted as a collection of fields. Each field has a name, a datatype, and a byte offset within the structure. The datatype of a field may be any numpy datatype including other -structured datatypes, and it may also be a :term:`sub-array` which behaves like +structured datatypes, and it may also be a :term:`subarray` which behaves like an ndarray of a specified shape. The offsets of the fields are arbitrary, and fields may even overlap. These offsets are usually determined automatically by numpy, but can also be specified. @@ -231,7 +231,7 @@ each field's offset is a multiple of its size and that the itemsize is a multiple of the largest field size, and raise an exception if not. If the offsets of the fields and itemsize of a structured array satisfy the -alignment conditions, the array will have the ``ALIGNED`` :ref:`flag +alignment conditions, the array will have the ``ALIGNED`` :attr:`flag ` set. A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an @@ -266,7 +266,7 @@ providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual >>> np.dtype({'name': ('i4', 0, 'my title')}) dtype([(('my title', 'name'), '`. A record array representation of a structured array can be obtained using the -appropriate :ref:`view`:: +appropriate `view `_:: >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) -- cgit v1.2.1 From 2f41bb26b061821c77aff6982630de937ad9007a Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 24 Feb 2019 10:10:47 +0200 Subject: DOC: fixes from review --- numpy/doc/structured_arrays.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index c3605b49a..c0437dc07 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -57,10 +57,10 @@ A structured datatype can be thought of as a sequence of bytes of a certain length (the structure's :term:`itemsize`) which is interpreted as a collection of fields. Each field has a name, a datatype, and a byte offset within the structure. The datatype of a field may be any numpy datatype including other -structured datatypes, and it may also be a :term:`subarray` which behaves like -an ndarray of a specified shape. The offsets of the fields are arbitrary, and -fields may even overlap. These offsets are usually determined automatically by -numpy, but can also be specified. +structured datatypes, and it may also be a :term:`subarray data type` which +behaves like an ndarray of a specified shape. The offsets of the fields are +arbitrary, and fields may even overlap. These offsets are usually determined +automatically by numpy, but can also be specified. Structured Datatype Creation ---------------------------- @@ -266,7 +266,7 @@ providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual >>> np.dtype({'name': ('i4', 0, 'my title')}) dtype([(('my title', 'name'), '