diff options
author | Allan Haldane <allan.haldane@gmail.com> | 2015-01-16 23:53:41 -0500 |
---|---|---|
committer | Allan Haldane <allan.haldane@gmail.com> | 2015-01-22 17:36:43 -0500 |
commit | 1bd0b4e8f176cd80e81b5f50832db5f8ba1ee1e9 (patch) | |
tree | fce876400e049c7927cfe4b62ee4d1ca00a8ed7b /numpy | |
parent | b69035e8ea28bd759b929822aaba544d3c5f8c30 (diff) | |
download | numpy-1bd0b4e8f176cd80e81b5f50832db5f8ba1ee1e9.tar.gz |
DOC: improve record/structured array nomenclature & guide
This update adds a section better describing record arrays in the user
guide (numpy/doc/structured_arrays.py).
It also corrects nomenclature, such that "structured array" refers to
ndarrays with structured dtype, "record array" refers to modified
ndarrays as created by np.rec.array, and "recarray" refers to ndarrays
viewed as np.recarray. See the note at the end of the structured
array user guide.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 9 | ||||
-rw-r--r-- | numpy/core/records.py | 8 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arrayobject.c | 2 | ||||
-rw-r--r-- | numpy/doc/creation.py | 2 | ||||
-rw-r--r-- | numpy/doc/glossary.py | 12 | ||||
-rw-r--r-- | numpy/doc/structured_arrays.py | 129 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 4 |
7 files changed, 126 insertions, 40 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 73efdb6a9..66b889cc9 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -4629,7 +4629,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('view', >>> print x [(1, 20) (3, 4)] - Using a view to convert an array to a record array: + Using a view to convert an array to a recarray: >>> z = x.view(np.recarray) >>> z.a @@ -5875,17 +5875,18 @@ add_newdoc('numpy.core.multiarray', 'dtype', >>> np.dtype(np.int16) dtype('int16') - Record, one field name 'f1', containing int16: + Structured type, one field name 'f1', containing int16: >>> np.dtype([('f1', np.int16)]) dtype([('f1', '<i2')]) - Record, one field named 'f1', in itself containing a record with one field: + Structured type, one field named 'f1', in itself containing a structured + type with one field: >>> np.dtype([('f1', [('f1', np.int16)])]) dtype([('f1', [('f1', '<i2')])]) - Record, two fields: the first field contains an unsigned int, the + Structured type, two fields: the first field contains an unsigned int, the second an int32: >>> np.dtype([('f1', np.uint), ('f2', np.int32)]) diff --git a/numpy/core/records.py b/numpy/core/records.py index bf4d835ea..23680711f 100644 --- a/numpy/core/records.py +++ b/numpy/core/records.py @@ -3,9 +3,9 @@ Record Arrays ============= Record arrays expose the fields of structured arrays as properties. -Most commonly, ndarrays contain elements of a single type, e.g. floats, integers, -bools etc. However, it is possible for elements to be combinations of these, -such as:: +Most commonly, ndarrays contain elements of a single type, e.g. floats, +integers, bools etc. However, it is possible for elements to be combinations +of these using structured types, such as:: >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)]) >>> a @@ -25,7 +25,7 @@ one would a dictionary:: Record arrays allow us to access fields as properties:: - >>> ar = a.view(np.recarray) + >>> ar = np.rec.array(a) >>> ar.x array([1, 1]) diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 3f91b748c..6e48ef381 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -732,7 +732,7 @@ array_might_be_written(PyArrayObject *obj) { const char *msg = "Numpy has detected that you (may be) writing to an array returned\n" - "by numpy.diagonal or by selecting multiple fields in a record\n" + "by numpy.diagonal or by selecting multiple fields in a structured\n" "array. This code will likely break in a future numpy release --\n" "see numpy.diagonal or arrays.indexing reference docs for details.\n" "The quick fix is to make an explicit copy (e.g., do\n" diff --git a/numpy/doc/creation.py b/numpy/doc/creation.py index 7979b51aa..b10d45d48 100644 --- a/numpy/doc/creation.py +++ b/numpy/doc/creation.py @@ -17,7 +17,7 @@ There are 5 general mechanisms for creating arrays: This section will not cover means of replicating, joining, or otherwise expanding or mutating existing arrays. Nor will it cover creating object -arrays or record arrays. Both of those are covered in their own sections. +arrays or structured arrays. Both of those are covered in their own sections. Converting Python array_like Objects to Numpy Arrays ==================================================== diff --git a/numpy/doc/glossary.py b/numpy/doc/glossary.py index 3770f5761..f856a742b 100644 --- a/numpy/doc/glossary.py +++ b/numpy/doc/glossary.py @@ -284,7 +284,12 @@ Glossary ndarray See *array*. - + + record array + An `ndarray`_ with `structured data type`_ which has been subclassed as + np.recarray and whose dtype is of type np.record, making the + fields of its data type to be accessible by attribute. + reference If ``a`` is a reference to ``b``, then ``(a is b) == True``. Therefore, ``a`` and ``b`` are different names for the same Python object. @@ -345,7 +350,10 @@ Glossary >>> x[:, 1] array([2, 4]) - + + structured data type + A data type composed of other datatypes + tuple A sequence that may contain a variable number of types of any kind. A tuple is immutable, i.e., once constructed it cannot be diff --git a/numpy/doc/structured_arrays.py b/numpy/doc/structured_arrays.py index 0444bdf90..f2329827e 100644 --- a/numpy/doc/structured_arrays.py +++ b/numpy/doc/structured_arrays.py @@ -1,34 +1,33 @@ """ -===================================== -Structured Arrays (and Record Arrays) -===================================== +================= +Structured Arrays +================= Introduction ============ -Numpy provides powerful capabilities to create arrays of structs or records. -These arrays permit one to manipulate the data by the structs or by fields of -the struct. A simple example will show what is meant.: :: +Numpy provides powerful capabilities to create arrays of structured datatype. +These arrays permit one to manipulate the data by named fields. A simple +example will show what is meant.: :: - >>> x = np.zeros((2,),dtype=('i4,f4,a10')) - >>> x[:] = [(1,2.,'Hello'),(2,3.,"World")] + >>> x = np.array([(1,2.,'Hello'), (2,3.,"World")], + ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) >>> x array([(1, 2.0, 'Hello'), (2, 3.0, 'World')], - dtype=[('f0', '>i4'), ('f1', '>f4'), ('f2', '|S10')]) + dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) Here we have created a one-dimensional array of length 2. Each element of -this array is a record that contains three items, a 32-bit integer, a 32-bit +this array is a structure that contains three items, a 32-bit integer, a 32-bit float, and a string of length 10 or less. If we index this array at the second -position we get the second record: :: +position we get the second structure: :: >>> x[1] (2,3.,"World") Conveniently, one can access any field of the array by indexing using the -string that names that field. In this case the fields have received the -default names 'f0', 'f1' and 'f2'. :: +string that names that field. :: - >>> y = x['f1'] + >>> y = x['foo'] >>> y array([ 2., 3.], dtype=float32) >>> y[:] = 2*y @@ -36,19 +35,19 @@ default names 'f0', 'f1' and 'f2'. :: array([ 4., 6.], dtype=float32) >>> x array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], - dtype=[('f0', '>i4'), ('f1', '>f4'), ('f2', '|S10')]) + dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) In these examples, y is a simple float array consisting of the 2nd field -in the record. But, rather than being a copy of the data in the structured +in the structured type. But, rather than being a copy of the data in the structured array, it is a view, i.e., it shares exactly the same memory locations. Thus, when we updated this array by doubling its values, the structured array shows the corresponding values as doubled as well. Likewise, if one -changes the record, the field view also changes: :: +changes the structured array, the field view also changes: :: >>> x[1] = (-1,-1.,"Master") >>> x array([(1, 4.0, 'Hello'), (-1, -1.0, 'Master')], - dtype=[('f0', '>i4'), ('f1', '>f4'), ('f2', '|S10')]) + dtype=[('foo', '>i4'), ('bar', '>f4'), ('baz', '|S10')]) >>> y array([ 4., -1.], dtype=float32) @@ -65,9 +64,10 @@ function keyword or a dtype object constructor itself). This argument must be one of the following: 1) string, 2) tuple, 3) list, or 4) dictionary. Each of these is briefly described below. -1) String argument (as used in the above examples). +1) String argument. In this case, the constructor expects a comma-separated list of type -specifiers, optionally with extra shape information. +specifiers, optionally with extra shape information. The fields are +given the default names 'f0', 'f1', 'f2' and so on. The type specifiers can take 4 different forms: :: a) b1, i1, i2, i4, i8, u1, u2, u4, u8, f2, f4, f8, c8, c16, a<n> @@ -152,7 +152,7 @@ values specifying type, offset, and an optional title. :: Accessing and modifying field names =================================== -The field names are an attribute of the dtype object defining the record structure. +The field names are an attribute of the dtype object defining the structure. For the last example: :: >>> x.dtype.names @@ -213,11 +213,88 @@ If you fill it in row by row, it takes a take a tuple array([(10.0, 20.0), (1.0, 0.0), (2.0, 0.0), (3.0, 0.0), (4.0, 0.0)], dtype=[('var1', '<f8'), ('var2', '<f8')]) -More information -==================================== -You can find some more information on recarrays and structured arrays -(including the difference between the two) `here -<http://www.scipy.org/Cookbook/Recarray>`_. +Record Arrays +============= + +For convenience, numpy provides "record arrays" which allow one to access +fields of structured arrays by attribute rather than by index. Record arrays +are structured arrays wrapped using a subclass of ndarray, +:class:`numpy.recarray`, which allows field access by attribute on the array +object, and record arrays also use a special datatype, :class:`numpy.record`, +which allows field access by attribute on the individual elements of the array. + +The simplest way to create a record array is with :func:`numpy.rec.array`: :: + + >>> recordarr = np.rec.array([(1,2.,'Hello'),(2,3.,"World")], + ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]) + >>> recordarr.bar + array([ 2., 3.], dtype=float32) + >>> recordarr[1:2] + rec.array([(2, 3.0, 'World')], + dtype=[('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')]) + >>> recordarr[1:2].foo + array([2], dtype=int32) + >>> recordarr.foo[1:2] + array([2], dtype=int32) + >>> recordarr[1].baz + 'World' + +numpy.rec.array can convert a wide variety of arguments into record arrays, +including normal structured arrays: :: + + >>> arr = array([(1,2.,'Hello'),(2,3.,"World")], + ... dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')]) + >>> recordarr = np.rec.array(arr) + +The numpy.rec module provides a number of other convenience functions for +creating record arrays, see :ref:`record array creation routines +<routines.array-creation.rec>`. + +A record array representation of a structured array can be obtained using the +appropriate :ref:`view`: :: + + >>> arr = np.array([(1,2.,'Hello'),(2,3.,"World")], + ... dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')]) + >>> recordarr = arr.view(dtype=dtype((np.record, arr.dtype)), + ... type=np.recarray) + +Record array fields accessed by index or by attribute are returned as a record +array if the field has a structured type but as a plain ndarray otherwise. :: + + >>> recordarr = np.rec.array([('Hello', (1,2)),("World", (3,4))], + ... dtype=[('foo', 'S6'),('bar', [('A', int), ('B', int)])]) + >>> type(recordarr.foo) + <type 'numpy.ndarray'> + >>> type(recordarr.bar) + <class 'numpy.core.records.recarray'> + +Partial Attribute Access +------------------------ + +The differences between record arrays and plain structured arrays induce a +small performance penalty. It is possible to apply one or the other view +independently if desired. To allow field access by attribute only on the array +object it is sufficient to view an array as a recarray: :: + + >>> recarr = arr.view(np.recarray) + +This type of view is commonly used, for example in np.npyio and +np.recfunctions. Note that unlike full record arrays the individual elements of +such a view do not have field attributes:: + + >>> recarr[0].foo + AttributeError: 'numpy.void' object has no attribute 'foo' + +To use the np.record dtype only, convert the dtype using the (base_class, +dtype) form described in numpy.dtype. This type of view is rarely used. :: + + >>> arr_records = arr.view(dtype(np.record, arr.dtype)) + +In documentation, the term 'structured array' will refer to objects of type +np.ndarray with structured dtype, 'record array' will refer to structured +arrays subclassed as np.recarray and whose dtype is of type np.record, and +'recarray' will refer to arrays subclassed as np.recarray but whose dtype is +not of type np.record. """ from __future__ import division, absolute_import, print_function diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 5f274f27c..bedd0e941 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -656,7 +656,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, generators should return byte strings for Python 3k. dtype : data-type, optional Data-type of the resulting array; default: float. If this is a - record data-type, the resulting array will be 1-dimensional, and + structured data-type, the resulting array will be 1-dimensional, and each row will be interpreted as an element of the array. In this case, the number of columns used must match the number of fields in the data-type. @@ -680,7 +680,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, The default, None, results in all columns being read. unpack : bool, optional If True, the returned array is transposed, so that arguments may be - unpacked using ``x, y, z = loadtxt(...)``. When used with a record + unpacked using ``x, y, z = loadtxt(...)``. When used with a structured data-type, arrays are returned for each field. Default is False. ndmin : int, optional The returned array will have at least `ndmin` dimensions. |