From 845def00c85f9f40cfa64e6dabb4158bebd502f4 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Thu, 22 Nov 2018 19:47:58 -0500 Subject: ENH: add back the multifield copy->view change Fixes #10409 Closes #11530 --- numpy/doc/structured_arrays.py | 108 +++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 46 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 42711a7c0..0fcdecf00 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -35,26 +35,24 @@ with the field name:: array([('Rex', 5, 81.0), ('Fido', 5, 27.0)], dtype=[('name', 'S10'), ('age', '>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) >>> a[['a', 'c']] @@ -420,41 +419,58 @@ in the order they were indexed. Note that unlike for single-field indexing, the view's dtype has the same itemsize as the original array, and has fields at the same offsets as in the original array, and unindexed fields are merely missing. -In Numpy 1.15, indexing an array with a multi-field index returns a copy of -the result above for 1.16, but with fields packed together in memory as if -passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the -behavior since Numpy 1.7. - .. warning:: - The new behavior in Numpy 1.16 leads to extra "padding" bytes at the - location of unindexed fields. You will need to update any code which depends - on the data having a "packed" layout. For instance code such as:: + In Numpy 1.15, indexing an array with a multi-field index returned a copy of + the result above, but with fields packed together in memory as if + passed through :func:`numpy.lib.recfunctions.repack_fields`. + + The new behavior as of Numpy 1.16 leads to extra "padding" bytes at the + location of unindexed fields compared to 1.15. You will need to update any + code which depends on the data having a "packed" layout. For instance code + such as:: + + >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) + >>> a[['a','c']].view('i8') # Fails in Numpy 1.16 + ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + + will need to be changed. This code has raised a ``FutureWarning`` since + Numpy 1.12, and similar code has raised ``FutureWarning`` since 1.7. + + In 1.16 a number of functions have been introduced in the + :module:`numpy.lib.recfunctions` module to help users account for this + change. These are + :func:`numpy.lib.recfunctions.repack_fields`. + :func:`numpy.lib.recfunctions.structured_to_unstructured`, + :func:`numpy.lib.recfunctions.unstructured_to_structured`, + :func:`numpy.lib.recfunctions.apply_along_fields`, + :func:`numpy.lib.recfunctions.assign_fields_by_name`, and + :func:`numpy.lib.recfunctions.require_fields`. - >>> a[['a','c']].view('i8') # will fail in Numpy 1.16 - ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype + The function :func:`numpy.lib.recfunctions.repack_fields` can always be + used to reproduce the old behavior, as it will return a packed copy of the + structured array. The code above, for example, can be replaced with: - will need to be changed. This code has raised a ``FutureWarning`` since - Numpy 1.12. + >>> repack_fields(a[['a','c']]).view('i8') # supported in 1.16 + array([0, 0, 0]) - The following is a recommended fix, which will behave identically in Numpy - 1.15 and Numpy 1.16:: + Furthermore, numpy now provides a new function + :func:`numpy.lib.recfunctions.structured_to_unstructured` which is a safer + and more efficient alternative for users who wish to convert structured + arrays to unstructured arrays, as the view above is often indeded to do. + This function allows safe conversion to an unstructured type taking into + account padding, often avoids a copy, and also casts the datatypes + as needed, unlike the view. Code such as: - >>> from numpy.lib.recfunctions import repack_fields - >>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16 - array([0, 0, 0]) + >>> a = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) + >>> a[['x', 'z']].view('f4') - The :module:`numpy.lib.recfunctions` module has other new methods - introduced in numpy 1.16 to help users account for this change. These are - :func:`numpy.lib.recfunctions.structured_to_unstructured`, - :func:`numpy.lib.recfunctions.unstructured_to_structured`, - :func:`numpy.lib.recfunctions.apply_along_fields`, - :func:`numpy.lib.recfunctions.assign_fields_by_name`, and - :func:`numpy.lib.recfunctions.require_fields`. + can be made safer by replacing with: + + >>> structured_to_unstructured(a[['x', 'z']]) + array([0, 0, 0]) -Assigning to an array with a multi-field index will behave the same in Numpy -1.15 and Numpy 1.16. In both versions the assignment will modify the original -array:: +Assignment to an array with a multi-field index modifies the original array:: >>> a[['a', 'c']] = (2, 3) >>> a -- cgit v1.2.1 From 2981ed622501ac48c54c4efdf06a84084b70ff66 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sat, 15 Dec 2018 11:46:22 -0500 Subject: DOC: more doc updates for structured arrays [ci skip] --- numpy/doc/structured_arrays.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 0fcdecf00..e92a06124 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -397,6 +397,15 @@ typically a non-structured array, except in the case of nested structures. >>> y.dtype, y.shape, y.strides (dtype('float32'), (2,), (12,)) +If the accessed field is a subarray, the dimensions of the subarray +are appended to the shape of the result:: + + >>> x = np.zeros((2,2), dtype=[('a', np.int32), ('b', np.float64, (3,3))]) + >>> x['a'].shape + (2, 2) + >>> x['b'].shape + (2, 2, 3, 3) + Accessing Multiple Fields ``````````````````````````` -- cgit v1.2.1 From db6f50b27f2f705bd7ace8420a7204ff50872296 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 Jan 2019 21:22:46 +0200 Subject: DOC: doctest structured array introduction --- numpy/doc/structured_arrays.py | 106 ++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 48 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index e92a06124..da3a74bd6 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -13,8 +13,8 @@ datatypes organized as a sequence of named :term:`fields `. For example, >>> x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], ... dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]) >>> x - array([('Rex', 9, 81.0), ('Fido', 3, 27.0)], - dtype=[('name', 'S10'), ('age', '>> x['age'] = 5 >>> x - array([('Rex', 5, 81.0), ('Fido', 5, 27.0)], - dtype=[('name', 'S10'), ('age', '>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2,2))]) - dtype=[('x', '>> np.dtype([('x', 'f4'), ('y', np.float32), ('z', 'f4', (2, 2))]) + dtype([('x', '>> np.dtype([('x', 'f4'),('', 'i4'),('z', 'i8')]) + >>> np.dtype([('x', 'f4'), ('', 'i4'), ('z', 'i8')]) dtype([('x', '>> np.dtype('i8,f4,S3') + >>> np.dtype('i8, f4, S3') dtype([('f0', '>> np.dtype('3int8, float32, (2,3)float64') - dtype([('f0', 'i1', 3), ('f1', '>> np.dtype('3int8, float32, (2, 3)float64') + dtype([('f0', 'i1', (3,)), ('f1', '>> np.dtype({'names': ['col1', 'col2'], 'formats': ['i4','f4']}) + >>> np.dtype({'names': ['col1', 'col2'], 'formats': ['i4', 'f4']}) dtype([('col1', '>> np.dtype({'names': ['col1', 'col2'], - ... 'formats': ['i4','f4'], + ... 'formats': ['i4', 'f4'], ... 'offsets': [0, 4], ... 'itemsize': 12}) dtype({'names':['col1','col2'], 'formats':['>> np.dtype=({'col1': ('i1',0), 'col2': ('f4',1)}) - dtype([(('col1'), 'i1'), (('col2'), '>f4')]) + >>> np.dtype({'col1': ('i1', 0), 'col2': ('f4', 1)}) + dtype([('col1', 'i1'), ('col2', '>> def print_offsets(d): ... print("offsets:", [d.fields[name][1] for name in d.names]) ... print("itemsize:", d.itemsize) - >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2')) + >>> print_offsets(np.dtype('u1, u1, i4, u1, i8, u2')) offsets: [0, 1, 2, 6, 7, 15] itemsize: 17 @@ -215,7 +215,7 @@ in bytes for simple datatypes, see :c:member:`PyArray_Descr.alignment`. The structure will also have trailing padding added so that its itemsize is a multiple of the largest field's alignment. :: - >>> print_offsets(np.dtype('u1,u1,i4,u1,i8,u2', align=True)) + >>> print_offsets(np.dtype('u1, u1, i4, u1, i8, u2', align=True)) offsets: [0, 1, 4, 8, 16, 24] itemsize: 32 @@ -255,6 +255,7 @@ string, which will be the field's title and field name respectively. For example:: >>> np.dtype([(('my title', 'name'), 'f4')]) + dtype([(('my title', 'name'), '>> np.dtype({'name': ('i4', 0, 'my title')}) + dtype([(('my title', 'name'), '>> for name in d.names: ... print(d.fields[name][:2]) + (dtype('int64'), 0) + (dtype('float32'), 8) Union types ----------- @@ -305,8 +309,8 @@ in the array, and not a list or array as these will trigger numpy's broadcasting rules. The tuple's elements are assigned to the successive fields of the array, from left to right:: - >>> x = np.array([(1,2,3),(4,5,6)], dtype='i8,f4,f8') - >>> x[1] = (7,8,9) + >>> x = np.array([(1, 2, 3), (4, 5, 6)], dtype='i8, f4, f8') + >>> x[1] = (7, 8, 9) >>> x array([(1, 2., 3.), (7, 8., 9.)], dtype=[('f0', '>> x = np.zeros(2, dtype='i8,f4,?,S1') + >>> x = np.zeros(2, dtype='i8, f4, ?, S1') >>> x[:] = 3 >>> x - array([(3, 3.0, True, b'3'), (3, 3.0, True, b'3')], + array([(3, 3., True, b'3'), (3, 3., True, b'3')], dtype=[('f0', '>> x[:] = np.arange(2) >>> x - array([(0, 0.0, False, b'0'), (1, 1.0, True, b'1')], + array([(0, 0., False, b'0'), (1, 1., True, b'1')], dtype=[('f0', '>> onefield = np.zeros(2, dtype=[('A', 'i4')]) >>> nostruct = np.zeros(2, dtype='i4') >>> nostruct[:] = twofield + Traceback (most recent call last): + File "", line 1, in ValueError: Can't cast from structure to non-structure, except if the structure only has a single field. >>> nostruct[:] = onefield >>> nostruct @@ -355,7 +361,7 @@ included in any of the fields are unaffected. :: >>> b = np.ones(3, dtype=[('x', 'f4'), ('y', 'S3'), ('z', 'O')]) >>> b[:] = a >>> b - array([(0.0, b'0.0', b''), (0.0, b'0.0', b''), (0.0, b'0.0', b'')], + array([(0., b'0.0', b''), (0., b'0.0', b''), (0., b'0.0', b'')], dtype=[('x', '>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> x = np.array([(1, 2), (3, 4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) >>> x['foo'] array([1, 3]) >>> x['foo'] = 10 @@ -386,9 +392,9 @@ The resulting array is a view into the original array. It shares the same memory locations and writing to the view will modify the original array. :: >>> y = x['bar'] - >>> y[:] = 10 + >>> y[:] = 11 >>> x - array([(10, 5.), (10, 5.)], + array([(10, 11.), (10, 11.)], dtype=[('foo', '>> x = np.zeros((2,2), dtype=[('a', np.int32), ('b', np.float64, (3,3))]) + >>> x = np.zeros((2, 2), dtype=[('a', np.int32), ('b', np.float64, (3, 3))]) >>> x['a'].shape (2, 2) >>> x['b'].shape @@ -438,8 +444,9 @@ same offsets as in the original array, and unindexed fields are merely missing. code which depends on the data having a "packed" layout. For instance code such as:: - >>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')]) - >>> a[['a','c']].view('i8') # Fails in Numpy 1.16 + >>> a[['a', 'c']].view('i8') # Fails in Numpy 1.16 + Traceback (most recent call last): + File "", line 1, in ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype will need to be changed. This code has raised a ``FutureWarning`` since @@ -459,7 +466,8 @@ same offsets as in the original array, and unindexed fields are merely missing. used to reproduce the old behavior, as it will return a packed copy of the structured array. The code above, for example, can be replaced with: - >>> repack_fields(a[['a','c']]).view('i8') # supported in 1.16 + >>> from numpy.lib.recfunctions import repack_fields + >>> repack_fields(a[['a', 'c']]).view('i8') # supported in 1.16 array([0, 0, 0]) Furthermore, numpy now provides a new function @@ -470,12 +478,14 @@ same offsets as in the original array, and unindexed fields are merely missing. account padding, often avoids a copy, and also casts the datatypes as needed, unlike the view. Code such as: - >>> a = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) - >>> a[['x', 'z']].view('f4') + >>> b = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) + >>> b[['x', 'z']].view('f4') + array([0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32) can be made safer by replacing with: - >>> structured_to_unstructured(a[['x', 'z']]) + >>> from numpy.lib.recfunctions import structured_to_unstructured + >>> structured_to_unstructured(b[['x', 'z']]) array([0, 0, 0]) @@ -483,8 +493,8 @@ Assignment to an array with a multi-field index modifies the original array:: >>> a[['a', 'c']] = (2, 3) >>> a - array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)], - dtype=[('a', '>> x = np.array([(1, 2., 3.)], dtype='i,f,f') + >>> x = np.array([(1, 2., 3.)], dtype='i, f, f') >>> scalar = x[0] >>> scalar (1, 2., 3.) >>> type(scalar) - numpy.void + Unlike other numpy scalars, structured scalars are mutable and act like views into the original array, such that modifying the scalar will modify the original array. Structured scalars also support access and assignment by field name:: - >>> x = np.array([(1,2),(3,4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) + >>> x = np.array([(1, 2), (3, 4)], dtype=[('foo', 'i8'), ('bar', 'f4')]) >>> s = x[0] >>> s['bar'] = 100 >>> x @@ -519,7 +529,7 @@ name:: Similarly to tuples, structured scalars can also be indexed with an integer:: - >>> scalar = np.array([(1, 2., 3.)], dtype='i,f,f')[0] + >>> scalar = np.array([(1, 2., 3.)], dtype='i, f, f')[0] >>> scalar[0] 1 >>> scalar[1] = 4 @@ -530,7 +540,7 @@ numpy's integer types. Structured scalars may be converted to a tuple by calling :func:`ndarray.item`:: >>> scalar.item(), type(scalar.item()) - ((1, 2.0, 3.0), tuple) + ((1, 4.0, 3.0), ) Viewing Structured Arrays Containing Objects -------------------------------------------- @@ -574,24 +584,24 @@ structured scalars obtained from the array. The simplest way to create a record array is with :func:`numpy.rec.array`:: - >>> recordarr = np.rec.array([(1,2.,'Hello'),(2,3.,"World")], + >>> recordarr = np.rec.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) >>> recordarr.bar array([ 2., 3.], dtype=float32) >>> recordarr[1:2] - rec.array([(2, 3.0, 'World')], + rec.array([(2, 3., b'World')], dtype=[('foo', '>> recordarr[1:2].foo array([2], dtype=int32) >>> recordarr.foo[1:2] array([2], dtype=int32) >>> recordarr[1].baz - 'World' + b'World' :func:`numpy.rec.array` can convert a wide variety of arguments into record arrays, including structured arrays:: - >>> arr = array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')]) >>> recordarr = np.rec.array(arr) @@ -602,9 +612,9 @@ creating record arrays, see :ref:`record array creation routines A record array representation of a structured array can be obtained using the appropriate :ref:`view`:: - >>> arr = np.array([(1,2.,'Hello'),(2,3.,"World")], + >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) - >>> recordarr = arr.view(dtype=dtype((np.record, arr.dtype)), + >>> recordarr = arr.view(dtype=np.dtype((np.record, arr.dtype)), ... type=np.recarray) For convenience, viewing an ndarray as type :class:`np.recarray` will @@ -624,12 +634,12 @@ recordarr was not a structured type:: Record array fields accessed by index or by attribute are returned as a record array if the field has a structured type but as a plain ndarray otherwise. :: - >>> recordarr = np.rec.array([('Hello', (1,2)),("World", (3,4))], + >>> recordarr = np.rec.array([('Hello', (1, 2)), ("World", (3, 4))], ... dtype=[('foo', 'S6'),('bar', [('A', int), ('B', int)])]) >>> type(recordarr.foo) - + >>> type(recordarr.bar) - + Note that if a field has the same name as an ndarray attribute, the ndarray attribute takes precedence. Such fields will be inaccessible by attribute but -- cgit v1.2.1 From 62433284d65a3629a199958da2df3a807c60fab4 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 20 Feb 2019 23:46:20 +0200 Subject: DOC: reduce warnings when building, reword, tweak doc building --- numpy/doc/structured_arrays.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index da3a74bd6..c3605b49a 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -57,7 +57,7 @@ A structured datatype can be thought of as a sequence of bytes of a certain length (the structure's :term:`itemsize`) which is interpreted as a collection of fields. Each field has a name, a datatype, and a byte offset within the structure. The datatype of a field may be any numpy datatype including other -structured datatypes, and it may also be a :term:`sub-array` which behaves like +structured datatypes, and it may also be a :term:`subarray` which behaves like an ndarray of a specified shape. The offsets of the fields are arbitrary, and fields may even overlap. These offsets are usually determined automatically by numpy, but can also be specified. @@ -231,7 +231,7 @@ each field's offset is a multiple of its size and that the itemsize is a multiple of the largest field size, and raise an exception if not. If the offsets of the fields and itemsize of a structured array satisfy the -alignment conditions, the array will have the ``ALIGNED`` :ref:`flag +alignment conditions, the array will have the ``ALIGNED`` :attr:`flag ` set. A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an @@ -266,7 +266,7 @@ providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual >>> np.dtype({'name': ('i4', 0, 'my title')}) dtype([(('my title', 'name'), '`. A record array representation of a structured array can be obtained using the -appropriate :ref:`view`:: +appropriate `view `_:: >>> arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) -- cgit v1.2.1 From 2f41bb26b061821c77aff6982630de937ad9007a Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 24 Feb 2019 10:10:47 +0200 Subject: DOC: fixes from review --- numpy/doc/structured_arrays.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'numpy/doc/structured_arrays.py') diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index c3605b49a..c0437dc07 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -57,10 +57,10 @@ A structured datatype can be thought of as a sequence of bytes of a certain length (the structure's :term:`itemsize`) which is interpreted as a collection of fields. Each field has a name, a datatype, and a byte offset within the structure. The datatype of a field may be any numpy datatype including other -structured datatypes, and it may also be a :term:`subarray` which behaves like -an ndarray of a specified shape. The offsets of the fields are arbitrary, and -fields may even overlap. These offsets are usually determined automatically by -numpy, but can also be specified. +structured datatypes, and it may also be a :term:`subarray data type` which +behaves like an ndarray of a specified shape. The offsets of the fields are +arbitrary, and fields may even overlap. These offsets are usually determined +automatically by numpy, but can also be specified. Structured Datatype Creation ---------------------------- @@ -266,7 +266,7 @@ providing a 3-element tuple ``(datatype, offset, title)`` instead of the usual >>> np.dtype({'name': ('i4', 0, 'my title')}) dtype([(('my title', 'name'), '