diff options
Diffstat (limited to 'numpy/lib/arrayterator.py')
-rw-r--r-- | numpy/lib/arrayterator.py | 81 |
1 files changed, 63 insertions, 18 deletions
diff --git a/numpy/lib/arrayterator.py b/numpy/lib/arrayterator.py index 581e0b31e..d4a91b001 100644 --- a/numpy/lib/arrayterator.py +++ b/numpy/lib/arrayterator.py @@ -2,20 +2,9 @@ A buffered iterator for big arrays. This module solves the problem of iterating over a big file-based array -without having to read it into memory. The ``Arrayterator`` class wraps -an array object, and when iterated it will return subarrays with at most -``buf_size`` elements. - -The algorithm works by first finding a "running dimension", along which -the blocks will be extracted. Given an array of dimensions (d1, d2, ..., -dn), eg, if ``buf_size`` is smaller than ``d1`` the first dimension will -be used. If, on the other hand, - - d1 < buf_size < d1*d2 - -the second dimension will be used, and so on. Blocks are extracted along -this dimension, and when the last block is returned the process continues -from the next dimension, until all elements have been read. +without having to read it into memory. The `Arrayterator` class wraps +an array object, and when iterated it will return sub-arrays with at most +a user-specified number of elements. """ @@ -29,13 +18,69 @@ class Arrayterator(object): """ Buffered iterator for big arrays. - This class creates a buffered iterator for reading big arrays in small + `Arrayterator` creates a buffered iterator for reading big arrays in small contiguous blocks. The class is useful for objects stored in the - filesystem. It allows iteration over the object *without* reading + file system. It allows iteration over the object *without* reading everything in memory; instead, small blocks are read and iterated over. - The class can be used with any object that supports multidimensional - slices, like variables from Scientific.IO.NetCDF, pynetcdf and ndarrays. + `Arrayterator` can be used with any object that supports multidimensional + slices. This includes NumPy arrays, but also variables from + Scientific.IO.NetCDF or pynetcdf for example. + + Parameters + ---------- + var : array_like + The object to iterate over. + buf_size : int, optional + The buffer size. If `buf_size` is supplied, the maximum amount of + data that will be read into memory is `buf_size` elements. + Default is None, which will read as many element as possible + into memory. + + Attributes + ---------- + var + buf_size + start + stop + step + shape + flat + + See Also + -------- + ndenumerate : Multidimensional array iterator. + flatiter : Flat array iterator. + memmap : Create a memory-map to an array stored in a binary file on disk. + + Notes + ----- + The algorithm works by first finding a "running dimension", along which + the blocks will be extracted. Given an array of dimensions + ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the + first dimension will be used. If, on the other hand, + ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on. + Blocks are extracted along this dimension, and when the last block is + returned the process continues from the next dimension, until all + elements have been read. + + Examples + -------- + >>> import numpy as np + >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) + >>> a_itor = np.lib.arrayterator.Arrayterator(a, 2) + >>> a_itor.shape + (3, 4, 5, 6) + + Now we can iterate over ``a_itor``, and it will return arrays of size + two. Since `buf_size` was smaller than any dimension, the first + dimension will be iterated over first: + + >>> for subarr in a_itor: + ... if not subarr.all(): + ... print subarr, subarr.shape + ... + [[[[0 1]]]] (1, 1, 1, 2) """ |