From 1f8ce6341159ebb0731c2c262f4576609210d2c8 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Wed, 2 Sep 2020 21:01:35 +0300 Subject: MAINT, DOC: move informational files from numpy.doc.*.py to their *.rst counterparts (#17222) * DOC: redistribute docstring-only content from numpy/doc * DOC: post-transition clean-up * DOC, MAINT: reskip doctests, fix a few easy ones --- numpy/doc/structured_arrays.py | 646 ----------------------------------------- 1 file changed, 646 deletions(-) delete mode 100644 numpy/doc/structured_arrays.py (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py deleted file mode 100644 index 359d4f7f4..000000000 --- a/numpy/doc/structured_arrays.py +++ /dev/null @@ -1,646 +0,0 @@ -""" -================= -Structured Arrays -================= - -Introduction -============ - -Structured arrays are ndarrays whose datatype is a composition of simpler -datatypes organized as a sequence of named :term:`fields `. For example, -:: - - >>> x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], - ... dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]) - >>> x - array([('Rex', 9, 81.), ('Fido', 3, 27.)], - dtype=[('name', 'U10'), ('age', '>> x[1] - ('Fido', 3, 27.0) - -You can access and modify individual fields of a structured array by indexing -with the field name:: - - >>> x['age'] - array([9, 3], dtype=int32) - >>> x['age'] = 5 - >>> x - array([('Rex', 5, 81.), ('Fido', 5, 27.)], - dtype=[('name', 'U10'), ('age', '` reference page, and in -summary they are: - -1. A list of tuples, one tuple per field - - Each tuple has the form ``(fieldname, datatype, shape)`` where shape is - optional. ``fieldname`` is a string (or tuple if titles are used, see - :ref:`Field Titles ` below), ``datatype`` may be any object - convertible to a datatype, and ``shape`` is a tuple of integers specifying - subarray shape. - - >>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2, 2))]) - dtype([('x', '>> np.dtype([('x', 'f4'), ('', 'i4'), ('z', 'i8')]) - dtype([('x', '` may be used in a string and separated by - commas. The itemsize and byte offsets of the fields are determined - automatically, and the field names are given the default names ``f0``, - ``f1``, etc. :: - - >>> np.dtype('i8, f4, S3') - dtype([('f0', '>> np.dtype('3int8, float32, (2, 3)float64') - dtype([('f0', 'i1', (3,)), ('f1', '>> np.dtype({'names': ['col1', 'col2'], 'formats': ['i4', 'f4']}) - dtype([('col1', '>> np.dtype({'names': ['col1', 'col2'], - ... 'formats': ['i4', 'f4'], - ... 'offsets': [0, 4], - ... 'itemsize': 12}) - dtype({'names':['col1','col2'], 'formats':['` below. - -4. A dictionary of field names - - The use of this form of specification is discouraged, but documented here - because older numpy code may use it. The keys of the dictionary are the - field names and the values are tuples specifying type and offset:: - - >>> np.dtype({'col1': ('i1', 0), 'col2': ('f4', 1)}) - dtype([('col1', 'i1'), ('col2', '` may be - specified by using a 3-tuple, see below. - -Manipulating and Displaying Structured Datatypes ------------------------------------------------- - -The list of field names of a structured datatype can be found in the ``names`` -attribute of the dtype object:: - - >>> d = np.dtype([('x', 'i8'), ('y', 'f4')]) - >>> d.names - ('x', 'y') - -The field names may be modified by assigning to the ``names`` attribute using a -sequence of strings of the same length. - -The dtype object also has a dictionary-like attribute, ``fields``, whose keys -are the field names (and :ref:`Field Titles `, see below) and whose -values are tuples containing the dtype and byte offset of each field. :: - - >>> d.fields - mappingproxy({'x': (dtype('int64'), 0), 'y': (dtype('float32'), 8)}) - -Both the ``names`` and ``fields`` attributes will equal ``None`` for -unstructured arrays. The recommended way to test if a dtype is structured is -with `if dt.names is not None` rather than `if dt.names`, to account for dtypes -with 0 fields. - -The string representation of a structured datatype is shown in the "list of -tuples" form if possible, otherwise numpy falls back to using the more general -dictionary form. - -.. _offsets-and-alignment: - -Automatic Byte Offsets and Alignment ------------------------------------- - -Numpy uses one of two methods to automatically determine the field byte offsets -and the overall itemsize of a structured datatype, depending on whether -``align=True`` was specified as a keyword argument to :func:`numpy.dtype`. - -By default (``align=False``), numpy will pack the fields together such that -each field starts at the byte offset the previous field ended, and the fields -are contiguous in memory. :: - - >>> def print_offsets(d): - ... print("offsets:", [d.fields[name][1] for name in d.names]) - ... print("itemsize:", d.itemsize) - >>> print_offsets(np.dtype('u1, u1, i4, u1, i8, u2')) - offsets: [0, 1, 2, 6, 7, 15] - itemsize: 17 - -If ``align=True`` is set, numpy will pad the structure in the same way many C -compilers would pad a C-struct. Aligned structures can give a performance -improvement in some cases, at the cost of increased datatype size. Padding -bytes are inserted between fields such that each field's byte offset will be a -multiple of that field's alignment, which is usually equal to the field's size -in bytes for simple datatypes, see :c:member:`PyArray_Descr.alignment`. The -structure will also have trailing padding added so that its itemsize is a -multiple of the largest field's alignment. :: - - >>> print_offsets(np.dtype('u1, u1, i4, u1, i8, u2', align=True)) - offsets: [0, 1, 4, 8, 16, 24] - itemsize: 32 - -Note that although almost all modern C compilers pad in this way by default, -padding in C structs is C-implementation-dependent so this memory layout is not -guaranteed to exactly match that of a corresponding struct in a C program. Some -work may be needed, either on the numpy side or the C side, to obtain exact -correspondence. - -If offsets were specified using the optional ``offsets`` key in the -dictionary-based dtype specification, setting ``align=True`` will check that -each field's offset is a multiple of its size and that the itemsize is a -multiple of the largest field size, and raise an exception if not. - -If the offsets of the fields and itemsize of a structured array satisfy the -alignment conditions, the array will have the ``ALIGNED`` :attr:`flag -` set. - -A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an -aligned dtype or array to a packed one and vice versa. It takes either a dtype -or structured ndarray as an argument, and returns a copy with fields re-packed, -with or without padding bytes. - -.. _titles: - -Field Titles ------------- - -In addition to field names, fields may also have an associated :term:`title`, -an alternate name, which is sometimes used as an additional description or -alias for the field. The title may be used to index an array, just like a -field name. - -To add titles when using the list-of-tuples form of dtype specification, the -field name may be specified as a tuple of two strings instead of a single -string, which will be the field's title and field name respectively. For -example:: - - >>> np.dtype([(('my title', 'name'), 'f4')]) - dtype([(('my title', 'name'), '>> np.dtype({'name': ('i4', 0, 'my title')}) - dtype([(('my title', 'name'), '>> for name in d.names: - ... print(d.fields[name][:2]) - (dtype('int64'), 0) - (dtype('float32'), 8) - -Union types ------------ - -Structured datatypes are implemented in numpy to have base type -:class:`numpy.void` by default, but it is possible to interpret other numpy -types as structured types using the ``(base_dtype, dtype)`` form of dtype -specification described in -:ref:`Data Type Objects `. Here, ``base_dtype`` is -the desired underlying dtype, and fields and flags will be copied from -``dtype``. This dtype is similar to a 'union' in C. - -Indexing and Assignment to Structured arrays -============================================ - -Assigning data to a Structured Array ------------------------------------- - -There are a number of ways to assign values to a structured array: Using python -tuples, using scalar values, or using other structured arrays. - -Assignment from Python Native Types (Tuples) -```````````````````````````````````````````` - -The simplest way to assign values to a structured array is using python tuples. -Each assigned value should be a tuple of length equal to the number of fields -in the array, and not a list or array as these will trigger numpy's -broadcasting rules. The tuple's elements are assigned to the successive fields -of the array, from left to right:: - - >>> x = np.array([(1, 2, 3), (4, 5, 6)], dtype='i8, f4, f8') - >>> x[1] = (7, 8, 9) - >>> x - array([(1, 2., 3.), (7, 8., 9.)], - dtype=[('f0', '>> x = np.zeros(2, dtype='i8, f4, ?, S1') - >>> x[:] = 3 - >>> x - array([(3, 3., True, b'3'), (3, 3., True, b'3')], - dtype=[('f0', '>> x[:] = np.arange(2) - >>> x - array([(0, 0., False, b'0'), (1, 1., True, b'1')], - dtype=[('f0', '>> twofield = np.zeros(2, dtype=[('A', 'i4'), ('B', 'i4')]) - >>> onefield = np.zeros(2, dtype=[('A', 'i4')]) - >>> nostruct = np.zeros(2, dtype='i4') - >>> nostruct[:] = twofield - Traceback (most recent call last): - ... - TypeError: Cannot cast array data from dtype([('A', '>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'f4'), ('c', 'S3')]) - >>> b = np.ones(3, dtype=[('x', 'f4'), ('y', 'S3'), ('z', 'O')]) - >>> b[:] = a - >>> b - array([(0., b'0.0', b''), (0., b'0.0', b''), (0., b'0.0', b'')], - dtype=[('x', '>> x = np.array([(1, 2), (3, 4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) - >>> x['foo'] - array([1, 3]) - >>> x['foo'] = 10 - >>> x - array([(10, 2.), (10, 4.)], - dtype=[('foo', '>> y = x['bar'] - >>> y[:] = 11 - >>> x - array([(10, 11.), (10, 11.)], - dtype=[('foo', '>> y.dtype, y.shape, y.strides - (dtype('float32'), (2,), (12,)) - -If the accessed field is a subarray, the dimensions of the subarray -are appended to the shape of the result:: - - >>> x = np.zeros((2, 2), dtype=[('a', np.int32), ('b', np.float64, (3, 3))]) - >>> x['a'].shape - (2, 2) - >>> x['b'].shape - (2, 2, 3, 3) - -Accessing Multiple Fields -``````````````````````````` - -One can index and assign to a structured array with a multi-field index, where -the index is a list of field names. - -.. warning:: - The behavior of multi-field indexes changed from Numpy 1.15 to Numpy 1.16. - -The result of indexing with a multi-field index is a view into the original -array, as follows:: - - >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) - >>> a[['a', 'c']] - array([(0, 0.), (0, 0.), (0, 0.)], - dtype={'names':['a','c'], 'formats':['>> a[['a', 'c']].view('i8') # Fails in Numpy 1.16 - Traceback (most recent call last): - File "", line 1, in - ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype - - will need to be changed. This code has raised a ``FutureWarning`` since - Numpy 1.12, and similar code has raised ``FutureWarning`` since 1.7. - - In 1.16 a number of functions have been introduced in the - :mod:`numpy.lib.recfunctions` module to help users account for this - change. These are - :func:`numpy.lib.recfunctions.repack_fields`. - :func:`numpy.lib.recfunctions.structured_to_unstructured`, - :func:`numpy.lib.recfunctions.unstructured_to_structured`, - :func:`numpy.lib.recfunctions.apply_along_fields`, - :func:`numpy.lib.recfunctions.assign_fields_by_name`, and - :func:`numpy.lib.recfunctions.require_fields`. - - The function :func:`numpy.lib.recfunctions.repack_fields` can always be - used to reproduce the old behavior, as it will return a packed copy of the - structured array. The code above, for example, can be replaced with: - - >>> from numpy.lib.recfunctions import repack_fields - >>> repack_fields(a[['a', 'c']]).view('i8') # supported in 1.16 - array([0, 0, 0]) - - Furthermore, numpy now provides a new function - :func:`numpy.lib.recfunctions.structured_to_unstructured` which is a safer - and more efficient alternative for users who wish to convert structured - arrays to unstructured arrays, as the view above is often indeded to do. - This function allows safe conversion to an unstructured type taking into - account padding, often avoids a copy, and also casts the datatypes - as needed, unlike the view. Code such as: - - >>> b = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) - >>> b[['x', 'z']].view('f4') - array([0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32) - - can be made safer by replacing with: - - >>> from numpy.lib.recfunctions import structured_to_unstructured - >>> structured_to_unstructured(b[['x', 'z']]) - array([0, 0, 0]) - - -Assignment to an array with a multi-field index modifies the original array:: - - >>> a[['a', 'c']] = (2, 3) - >>> a - array([(2, 0, 3.), (2, 0, 3.), (2, 0, 3.)], - dtype=[('a', '>> a[['a', 'c']] = a[['c', 'a']] - -Indexing with an Integer to get a Structured Scalar -``````````````````````````````````````````````````` - -Indexing a single element of a structured array (with an integer index) returns -a structured scalar:: - - >>> x = np.array([(1, 2., 3.)], dtype='i, f, f') - >>> scalar = x[0] - >>> scalar - (1, 2., 3.) - >>> type(scalar) - - -Unlike other numpy scalars, structured scalars are mutable and act like views -into the original array, such that modifying the scalar will modify the -original array. Structured scalars also support access and assignment by field -name:: - - >>> x = np.array([(1, 2), (3, 4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) - >>> s = x[0] - >>> s['bar'] = 100 - >>> x - array([(1, 100.), (3, 4.)], - dtype=[('foo', '>> scalar = np.array([(1, 2., 3.)], dtype='i, f, f')[0] - >>> scalar[0] - 1 - >>> scalar[1] = 4 - -Thus, tuples might be thought of as the native Python equivalent to numpy's -structured types, much like native python integers are the equivalent to -numpy's integer types. Structured scalars may be converted to a tuple by -calling :func:`ndarray.item`:: - - >>> scalar.item(), type(scalar.item()) - ((1, 4.0, 3.0), ) - -Viewing Structured Arrays Containing Objects --------------------------------------------- - -In order to prevent clobbering object pointers in fields of -:class:`numpy.object` type, numpy currently does not allow views of structured -arrays containing objects. - -Structure Comparison --------------------- - -If the dtypes of two void structured arrays are equal, testing the equality of -the arrays will result in a boolean array with the dimensions of the original -arrays, with elements set to ``True`` where all fields of the corresponding -structures are equal. Structured dtypes are equal if the field names, -dtypes and titles are the same, ignoring endianness, and the fields are in -the same order:: - - >>> a = np.zeros(2, dtype=[('a', 'i4'), ('b', 'i4')]) - >>> b = np.ones(2, dtype=[('a', 'i4'), ('b', 'i4')]) - >>> a == b - array([False, False]) - -Currently, if the dtypes of two void structured arrays are not equivalent the -comparison fails, returning the scalar value ``False``. This behavior is -deprecated as of numpy 1.10 and will raise an error or perform elementwise -comparison in the future. - -The ``<`` and ``>`` operators always return ``False`` when comparing void -structured arrays, and arithmetic and bitwise operations are not supported. - -Record Arrays -============= - -As an optional convenience numpy provides an ndarray subclass, -:class:`numpy.recarray`, and associated helper functions in the -:mod:`numpy.rec` submodule, that allows access to fields of structured arrays -by attribute instead of only by index. Record arrays also use a special -datatype, :class:`numpy.record`, that allows field access by attribute on the -structured scalars obtained from the array. - -The simplest way to create a record array is with :func:`numpy.rec.array`:: - - >>> recordarr = np.rec.array([(1, 2., 'Hello'), (2, 3., "World")], - ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) - >>> recordarr.bar - array([ 2., 3.], dtype=float32) - >>> recordarr[1:2] - rec.array([(2, 3., b'World')], - dtype=[('foo', '>> recordarr[1:2].foo - array([2], dtype=int32) - >>> recordarr.foo[1:2] - array([2], dtype=int32) - >>> recordarr[1].baz - b'World' - -:func:`numpy.rec.array` can convert a wide variety of arguments into record -arrays, including structured arrays:: - - >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], - ... dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')]) - >>> recordarr = np.rec.array(arr) - -The :mod:`numpy.rec` module provides a number of other convenience functions for -creating record arrays, see :ref:`record array creation routines -`. - -A record array representation of a structured array can be obtained using the -appropriate `view `_:: - - >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], - ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) - >>> recordarr = arr.view(dtype=np.dtype((np.record, arr.dtype)), - ... type=np.recarray) - -For convenience, viewing an ndarray as type :class:`np.recarray` will -automatically convert to :class:`np.record` datatype, so the dtype can be left -out of the view:: - - >>> recordarr = arr.view(np.recarray) - >>> recordarr.dtype - dtype((numpy.record, [('foo', '>> arr2 = recordarr.view(recordarr.dtype.fields or recordarr.dtype, np.ndarray) - -Record array fields accessed by index or by attribute are returned as a record -array if the field has a structured type but as a plain ndarray otherwise. :: - - >>> recordarr = np.rec.array([('Hello', (1, 2)), ("World", (3, 4))], - ... dtype=[('foo', 'S6'),('bar', [('A', int), ('B', int)])]) - >>> type(recordarr.foo) - - >>> type(recordarr.bar) - - -Note that if a field has the same name as an ndarray attribute, the ndarray -attribute takes precedence. Such fields will be inaccessible by attribute but -will still be accessible by index. - -""" -- cgit v1.2.1