summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/lib/__init__.py2
-rw-r--r--numpy/lib/_datasource.py443
-rw-r--r--numpy/lib/format.py412
-rw-r--r--numpy/lib/io.py360
-rw-r--r--numpy/lib/src/_compiled_base.c272
-rw-r--r--numpy/lib/tests/test_format.py512
-rw-r--r--numpy/lib/utils.py159
7 files changed, 2153 insertions, 7 deletions
diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py
index e17a0a726..6667dbc07 100644
--- a/numpy/lib/__init__.py
+++ b/numpy/lib/__init__.py
@@ -15,6 +15,7 @@ from getlimits import *
#import convertcode
from utils import *
from arraysetops import *
+from io import *
import math
__all__ = ['emath','math']
@@ -29,6 +30,7 @@ __all__ += machar.__all__
__all__ += getlimits.__all__
__all__ += utils.__all__
__all__ += arraysetops.__all__
+__all__ += io.__all__
def test(level=1, verbosity=1):
from numpy.testing import NumpyTest
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py
new file mode 100644
index 000000000..06aae85d8
--- /dev/null
+++ b/numpy/lib/_datasource.py
@@ -0,0 +1,443 @@
+"""A file interface for handling local and remote data files.
+The goal of datasource is to abstract some of the file system operations when
+dealing with data files so the researcher doesn't have to know all the
+low-level details. Through datasource, a researcher can obtain and use a
+file with one function call, regardless of location of the file.
+
+DataSource is meant to augment standard python libraries, not replace them.
+It should work seemlessly with standard file IO operations and the os module.
+
+DataSource files can originate locally or remotely:
+
+- local files : '/home/guido/src/local/data.txt'
+- URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
+
+DataSource files can also be compressed or uncompressed. Currently only gzip
+and bz2 are supported.
+
+Example:
+
+ >>> # Create a DataSource, use os.curdir (default) for local storage.
+ >>> ds = datasource.DataSource()
+ >>>
+ >>> # Open a remote file.
+ >>> # DataSource downloads the file, stores it locally in:
+ >>> # './www.google.com/index.html'
+ >>> # opens the file and returns a file object.
+ >>> fp = ds.open('http://www.google.com/index.html')
+ >>>
+ >>> # Use the file as you normally would
+ >>> fp.read()
+ >>> fp.close()
+
+"""
+
+__docformat__ = "restructuredtext en"
+
+import bz2
+import gzip
+import os
+import tempfile
+from shutil import rmtree
+from urllib2 import urlopen, URLError
+from urlparse import urlparse
+
+
+# TODO: .zip support, .tar support?
+_file_openers = {".gz":gzip.open, ".bz2":bz2.BZ2File, None:file}
+
+
+def open(path, mode='r', destpath=os.curdir):
+ """Open ``path`` with ``mode`` and return the file object.
+
+ If ``path`` is an URL, it will be downloaded, stored in the DataSource
+ directory and opened from there.
+
+ *Parameters*:
+
+ path : {string}
+
+ mode : {string}, optional
+
+ destpath : {string}, optional
+ Destination directory where URLs will be downloaded and stored.
+
+ *Returns*:
+
+ file object
+
+ """
+
+ ds = DataSource(destpath)
+ return ds.open(path, mode)
+
+
+class DataSource (object):
+ """A generic data source file (file, http, ftp, ...).
+
+ DataSources could be local files or remote files/URLs. The files may
+ also be compressed or uncompressed. DataSource hides some of the low-level
+ details of downloading the file, allowing you to simply pass in a valid
+ file path (or URL) and obtain a file object.
+
+ *Methods*:
+
+ - exists : test if the file exists locally or remotely
+ - abspath : get absolute path of the file in the DataSource directory
+ - open : open the file
+
+ *Example URL DataSource*::
+
+ # Initialize DataSource with a local directory, default is os.curdir.
+ ds = DataSource('/home/guido')
+
+ # Open remote file.
+ # File will be downloaded and opened from here:
+ # /home/guido/site/xyz.txt
+ ds.open('http://fake.xyz.web/site/xyz.txt')
+
+ *Example using DataSource for temporary files*::
+
+ # Initialize DataSource with 'None' for the local directory.
+ ds = DataSource(None)
+
+ # Open local file.
+ # Opened file exists in a temporary directory like:
+ # /tmp/tmpUnhcvM/foobar.txt
+ # Temporary directories are deleted when the DataSource is deleted.
+ ds.open('/home/guido/foobar.txt')
+
+ *Notes*:
+ BUG : URLs require a scheme string ('http://') to be used.
+ www.google.com will fail.
+
+ >>> repos.exists('www.google.com/index.html')
+ False
+
+ >>> repos.exists('http://www.google.com/index.html')
+ True
+
+ """
+
+ def __init__(self, destpath=os.curdir):
+ """Create a DataSource with a local path at destpath."""
+ if destpath:
+ self._destpath = os.path.abspath(destpath)
+ self._istmpdest = False
+ else:
+ self._destpath = tempfile.mkdtemp()
+ self._istmpdest = True
+
+ def __del__(self):
+ # Remove temp directories
+ if self._istmpdest:
+ rmtree(self._destpath)
+
+ def _iszip(self, filename):
+ """Test if the filename is a zip file by looking at the file extension.
+ """
+ fname, ext = os.path.splitext(filename)
+ return ext in _file_openers.keys()
+
+ def _iswritemode(self, mode):
+ """Test if the given mode will open a file for writing."""
+
+ # Currently only used to test the bz2 files.
+ _writemodes = ("w", "+")
+ for c in mode:
+ if c in _writemodes:
+ return True
+ return False
+
+ def _splitzipext(self, filename):
+ """Split zip extension from filename and return filename.
+
+ *Returns*:
+ base, zip_ext : {tuple}
+
+ """
+
+ if self._iszip(filename):
+ return os.path.splitext(filename)
+ else:
+ return filename, None
+
+ def _possible_names(self, filename):
+ """Return a tuple containing compressed filename variations."""
+ names = [filename]
+ if not self._iszip(filename):
+ for zipext in _file_openers.keys():
+ if zipext:
+ names.append(filename+zipext)
+ return names
+
+ def _isurl(self, path):
+ """Test if path is a net location. Tests the scheme and netloc."""
+
+ # BUG : URLs require a scheme string ('http://') to be used.
+ # www.google.com will fail.
+ # Should we prepend the scheme for those that don't have it and
+ # test that also? Similar to the way we append .gz and test for
+ # for compressed versions of files.
+
+ scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
+ return bool(scheme and netloc)
+
+ def _cache(self, path):
+ """Cache the file specified by path.
+
+ Creates a copy of the file in the datasource cache.
+
+ """
+
+ upath = self.abspath(path)
+
+ # ensure directory exists
+ if not os.path.exists(os.path.dirname(upath)):
+ os.makedirs(os.path.dirname(upath))
+
+ # TODO: Doesn't handle compressed files!
+ if self._isurl(path):
+ try:
+ openedurl = urlopen(path)
+ file(upath, 'w').write(openedurl.read())
+ except URLError:
+ raise URLError("URL not found: ", path)
+ else:
+ try:
+ # TODO: Why not just copy the file with shutils.copyfile?
+ fp = file(path, 'r')
+ file(upath, 'w').write(fp.read())
+ except IOError:
+ raise IOError("File not found: ", path)
+ return upath
+
+ def _findfile(self, path):
+ """Searches for ``path`` and returns full path if found.
+
+ If path is an URL, _findfile will cache a local copy and return
+ the path to the cached file.
+ If path is a local file, _findfile will return a path to that local
+ file.
+
+ The search will include possible compressed versions of the file and
+ return the first occurence found.
+
+ """
+
+ # Build list of possible local file paths
+ if not self._isurl(path):
+ # Valid local paths
+ filelist = self._possible_names(path)
+ # Paths in self._destpath
+ filelist += self._possible_names(self.abspath(path))
+ else:
+ # Cached URLs in self._destpath
+ filelist = self._possible_names(self.abspath(path))
+ # Remote URLs
+ filelist = filelist + self._possible_names(path)
+
+ for name in filelist:
+ if self.exists(name):
+ if self._isurl(name):
+ name = self._cache(name)
+ return name
+ return None
+
+ def abspath(self, path):
+ """Return absolute path of ``path`` in the DataSource directory.
+
+ If ``path`` is an URL, the ``abspath`` will be either the location
+ the file exists locally or the location it would exist when opened
+ using the ``open`` method.
+
+ The functionality is idential to os.path.abspath.
+
+ *Parameters*:
+
+ path : {string}
+ Can be a local file or a remote URL.
+
+ *Returns*:
+
+ Complete path, rooted in the DataSource destination directory.
+
+ *See Also*:
+
+ `open` : Method that downloads and opens files.
+
+ """
+
+ # TODO: This should be more robust. Handles case where path includes
+ # the destpath, but not other sub-paths. Failing case:
+ # path = /home/guido/datafile.txt
+ # destpath = /home/alex/
+ # upath = self.abspath(path)
+ # upath == '/home/alex/home/guido/datafile.txt'
+
+ # handle case where path includes self._destpath
+ splitpath = path.split(self._destpath, 2)
+ if len(splitpath) > 1:
+ path = splitpath[1]
+ scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
+ return os.path.join(self._destpath, netloc, upath.strip(os.sep))
+
+ def exists(self, path):
+ """Test if ``path`` exists.
+
+ Test if ``path`` exists as (and in this order):
+
+ - a local file.
+ - a remote URL that have been downloaded and stored locally in the
+ DataSource directory.
+ - a remote URL that has not been downloaded, but is valid and
+ accessible.
+
+ *Parameters*:
+
+ path : {string}
+ Can be a local file or a remote URL.
+
+ *Returns*:
+
+ boolean
+
+ *See Also*:
+
+ `abspath`
+
+ *Notes*
+
+ When ``path`` is an URL, ``exist`` will return True if it's either
+ stored locally in the DataSource directory, or is a valid remote
+ URL. DataSource does not discriminate between to two, the file
+ is accessible if it exists in either location.
+
+ """
+
+ # Test local path
+ if os.path.exists(path):
+ return True
+
+ # Test cached url
+ upath = self.abspath(path)
+ if os.path.exists(upath):
+ return True
+
+ # Test remote url
+ if self._isurl(path):
+ try:
+ netfile = urlopen(path)
+ del(netfile)
+ return True
+ except URLError:
+ return False
+ return False
+
+ def open(self, path, mode='r'):
+ """Open ``path`` with ``mode`` and return the file object.
+
+ If ``path`` is an URL, it will be downloaded, stored in the DataSource
+ directory and opened from there.
+
+ *Parameters*:
+
+ path : {string}
+
+ mode : {string}, optional
+
+
+ *Returns*:
+
+ file object
+
+ """
+
+ # TODO: There is no support for opening a file for writing which
+ # doesn't exist yet (creating a file). Should there be?
+
+ # TODO: Add a ``subdir`` parameter for specifying the subdirectory
+ # used to store URLs in self._destpath.
+
+ if self._isurl(path) and self._iswritemode(mode):
+ raise ValueError("URLs are not writeable")
+
+ # NOTE: _findfile will fail on a new file opened for writing.
+ found = self._findfile(path)
+ if found:
+ _fname, ext = self._splitzipext(found)
+ if ext == 'bz2':
+ mode.replace("+", "")
+ return _file_openers[ext](found, mode=mode)
+ else:
+ raise IOError("%s not found." % path)
+
+
+class Repository (DataSource):
+ """A data Repository where multiple DataSource's share a base URL/directory.
+
+ Repository extends DataSource by prepending a base URL (or directory) to
+ all the files it handles. Use a Repository when you will be working with
+ multiple files from one base URL. Initialize the Respository with the
+ base URL, then refer to each file by it's filename only.
+
+ *Methods*:
+
+ - exists : test if the file exists locally or remotely
+ - abspath : get absolute path of the file in the DataSource directory
+ - open : open the file
+
+ *Toy example*::
+
+ # Analyze all files in the repository.
+ repos = Repository('/home/user/data/dir/')
+ for filename in filelist:
+ fp = repos.open(filename)
+ fp.analyze()
+ fp.close()
+
+ # Similarly you could use a URL for a repository.
+ repos = Repository('http://www.xyz.edu/data')
+
+ """
+
+ def __init__(self, baseurl, destpath=os.curdir):
+ """Create a Repository with a shared url or directory of baseurl."""
+ DataSource.__init__(self, destpath=destpath)
+ self._baseurl = baseurl
+
+ def __del__(self):
+ DataSource.__del__(self)
+
+ def _fullpath(self, path):
+ """Return complete path for path. Prepends baseurl if necessary."""
+ splitpath = path.split(self._baseurl, 2)
+ if len(splitpath) == 1:
+ result = os.path.join(self._baseurl, path)
+ else:
+ result = path # path contains baseurl already
+ return result
+
+ def _findfile(self, path):
+ """Extend DataSource method to prepend baseurl to ``path``."""
+ return DataSource._findfile(self, self._fullpath(path))
+
+ def abspath(self, path):
+ """Extend DataSource method to prepend baseurl to ``path``."""
+ return DataSource.abspath(self, self._fullpath(path))
+
+ def exists(self, path):
+ """Extend DataSource method to prepend baseurl to ``path``."""
+ return DataSource.exists(self, self._fullpath(path))
+
+ def open(self, path, mode='r'):
+ """Extend DataSource method to prepend baseurl to ``path``."""
+ return DataSource.open(self, self._fullpath(path), mode)
+
+ def listdir(self):
+ '''List files in the source Repository.'''
+ if self._isurl(self._baseurl):
+ raise NotImplementedError, \
+ "Directory listing of URLs, not supported yet."
+ else:
+ return os.listdir(self._baseurl)
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
new file mode 100644
index 000000000..bb58c5c61
--- /dev/null
+++ b/numpy/lib/format.py
@@ -0,0 +1,412 @@
+""" Define a simple format for saving numpy arrays to disk with the full
+information about them.
+
+WARNING: THE FORMAT IS CURRENTLY UNSTABLE. DO NOT STORE CRITICAL DATA WITH IT.
+ While this code is in an SVN branch, the format may change without
+ notice, without backwards compatibility, and without changing the
+ format's version number. When the code moves into the trunk the format
+ will be stabilized, the version number will increment as changes occur,
+ and backwards compatibility with older versions will be maintained.
+
+Format Version 1.0
+------------------
+
+The first 6 bytes are a magic string: exactly "\\x93NUMPY".
+
+The next 1 byte is an unsigned byte: the major version number of the file
+format, e.g. \\x01.
+
+The next 1 byte is an unsigned byte: the minor version number of the file
+format, e.g. \\x00. Note: the version of the file format is not tied to the
+version of the numpy package.
+
+The next 2 bytes form a little-endian unsigned short int: the length of the
+header data HEADER_LEN.
+
+The next HEADER_LEN bytes form the header data describing the array's format. It
+is an ASCII string which contains a Python literal expression of a dictionary.
+It is terminated by a newline ('\\n') and padded with spaces ('\\x20') to make
+the total length of the magic string + 4 + HEADER_LEN be evenly divisible by 16
+for alignment purposes.
+
+The dictionary contains three keys:
+
+ "descr" : dtype.descr
+ An object that can be passed as an argument to the numpy.dtype()
+ constructor to create the array's dtype.
+ "fortran_order" : bool
+ Whether the array data is Fortran-contiguous or not. Since
+ Fortran-contiguous arrays are a common form of non-C-contiguity, we
+ allow them to be written directly to disk for efficiency.
+ "shape" : tuple of int
+ The shape of the array.
+
+For repeatability and readability, this dictionary is formatted using
+pprint.pformat() so the keys are in alphabetic order.
+
+Following the header comes the array data. If the dtype contains Python objects
+(i.e. dtype.hasobject is True), then the data is a Python pickle of the array.
+Otherwise the data is the contiguous (either C- or Fortran-, depending on
+fortran_order) bytes of the array. Consumers can figure out the number of bytes
+by multiplying the number of elements given by the shape (noting that shape=()
+means there is 1 element) by dtype.itemsize.
+"""
+
+import cPickle
+import pprint
+import struct
+
+import numpy
+from numpy.lib.utils import safe_eval
+
+
+MAGIC_PREFIX = '\x93NUMPY'
+MAGIC_LEN = len(MAGIC_PREFIX) + 2
+
+def magic(major, minor):
+ """ Return the magic string for the given file format version.
+
+ Parameters
+ ----------
+ major : int in [0, 255]
+ minor : int in [0, 255]
+
+ Returns
+ -------
+ magic : str
+
+ Raises
+ ------
+ ValueError if the version cannot be formatted.
+ """
+ if major < 0 or major > 255:
+ raise ValueError("major version must be 0 <= major < 256")
+ if minor < 0 or minor > 255:
+ raise ValueError("minor version must be 0 <= minor < 256")
+ return '%s%s%s' % (MAGIC_PREFIX, chr(major), chr(minor))
+
+def read_magic(fp):
+ """ Read the magic string to get the version of the file format.
+
+ Parameters
+ ----------
+ fp : filelike object
+
+ Returns
+ -------
+ major : int
+ minor : int
+ """
+ magic_str = fp.read(MAGIC_LEN)
+ if len(magic_str) != MAGIC_LEN:
+ raise ValueError("could not read %d characters for the magic string; got %r" % (MAGIC_LEN, magic_str))
+ if magic_str[:-2] != MAGIC_PREFIX:
+ raise ValueError("the magic string is not correct; expected %r, got %r" % (MAGIC_PREFIX, magic_str[:-2]))
+ major, minor = map(ord, magic_str[-2:])
+ return major, minor
+
+def dtype_to_descr(dtype):
+ """ Get a serializable descriptor from the dtype.
+
+ The .descr attribute of a dtype object cannot be round-tripped through the
+ dtype() constructor. Simple types, like dtype('float32'), have a descr which
+ looks like a record array with one field with '' as a name. The dtype()
+ constructor interprets this as a request to give a default name. Instead, we
+ construct descriptor that can be passed to dtype().
+ """
+ if dtype.names is not None:
+ # This is a record array. The .descr is fine.
+ # XXX: parts of the record array with an empty name, like padding bytes,
+ # still get fiddled with. This needs to be fixed in the C implementation
+ # of dtype().
+ return dtype.descr
+ else:
+ return dtype.str
+
+def header_data_from_array_1_0(array):
+ """ Get the dictionary of header metadata from a numpy.ndarray.
+
+ Parameters
+ ----------
+ array : numpy.ndarray
+
+ Returns
+ -------
+ d : dict
+ This has the appropriate entries for writing its string representation
+ to the header of the file.
+ """
+ d = {}
+ d['shape'] = array.shape
+ if array.flags.c_contiguous:
+ d['fortran_order'] = False
+ elif array.flags.f_contiguous:
+ d['fortran_order'] = True
+ else:
+ # Totally non-contiguous data. We will have to make it C-contiguous
+ # before writing. Note that we need to test for C_CONTIGUOUS first
+ # because a 1-D array is both C_CONTIGUOUS and F_CONTIGUOUS.
+ d['fortran_order'] = False
+
+ d['descr'] = dtype_to_descr(array.dtype)
+ return d
+
+def write_array_header_1_0(fp, d):
+ """ Write the header for an array using the 1.0 format.
+
+ Parameters
+ ----------
+ fp : filelike object
+ d : dict
+ This has the appropriate entries for writing its string representation
+ to the header of the file.
+ """
+ header = pprint.pformat(d)
+ # Pad the header with spaces and a final newline such that the magic string,
+ # the header-length short and the header are aligned on a 16-byte boundary.
+ # Hopefully, some system, possibly memory-mapping, can take advantage of
+ # our premature optimization.
+ current_header_len = MAGIC_LEN + 2 + len(header) + 1 # 1 for the newline
+ topad = 16 - (current_header_len % 16)
+ header = '%s%s\n' % (header, ' '*topad)
+ if len(header) >= (256*256):
+ raise ValueError("header does not fit inside %s bytes" % (256*256))
+ header_len_str = struct.pack('<H', len(header))
+ fp.write(header_len_str)
+ fp.write(header)
+
+def read_array_header_1_0(fp):
+ """ Read an array header from a filelike object using the 1.0 file format
+ version.
+
+ This will leave the file object located just after the header.
+
+ Parameters
+ ----------
+ fp : filelike object
+
+ Returns
+ -------
+ shape : tuple of int
+ The shape of the array.
+ fortran_order : bool
+ The array data will be written out directly if it is either C-contiguous
+ or Fortran-contiguous. Otherwise, it will be made contiguous before
+ writing it out.
+ dtype : dtype
+
+ Raises
+ ------
+ ValueError if the data is invalid.
+ """
+ # Read an unsigned, little-endian short int which has the length of the
+ # header.
+ hlength_str = fp.read(2)
+ if len(hlength_str) != 2:
+ raise ValueError("EOF at %s before reading array header length" % fp.tell())
+ header_length = struct.unpack('<H', hlength_str)[0]
+ header = fp.read(header_length)
+ if len(header) != header_length:
+ raise ValueError("EOF at %s before reading array header" % fp.tell())
+
+ # The header is a pretty-printed string representation of a literal Python
+ # dictionary with trailing newlines padded to a 16-byte boundary. The keys
+ # are strings.
+ # "shape" : tuple of int
+ # "fortran_order" : bool
+ # "descr" : dtype.descr
+ try:
+ d = safe_eval(header)
+ except SyntaxError, e:
+ raise ValueError("Cannot parse header: %r\nException: %r" % (header, e))
+ if not isinstance(d, dict):
+ raise ValueError("Header is not a dictionary: %r" % d)
+ keys = d.keys()
+ keys.sort()
+ if keys != ['descr', 'fortran_order', 'shape']:
+ raise ValueError("Header does not contain the correct keys: %r" % (keys,))
+
+ # Sanity-check the values.
+ if (not isinstance(d['shape'], tuple) or
+ not numpy.all([isinstance(x, int) for x in d['shape']])):
+ raise ValueError("shape is not valid: %r" % (d['shape'],))
+ if not isinstance(d['fortran_order'], bool):
+ raise ValueError("fortran_order is not a valid bool: %r" % (d['fortran_order'],))
+ try:
+ dtype = numpy.dtype(d['descr'])
+ except TypeError, e:
+ raise ValueError("descr is not a valid dtype descriptor: %r" % (d['descr'],))
+
+ return d['shape'], d['fortran_order'], dtype
+
+def write_array(fp, array, version=(1,0)):
+ """ Write an array to a file, including a header.
+
+ If the array is neither C-contiguous or Fortran-contiguous AND if the
+ filelike object is not a real file object, then this function will have to
+ copy data in memory.
+
+ Parameters
+ ----------
+ fp : filelike object
+ array : numpy.ndarray
+ version : (int, int), optional
+ The version number of the format.
+
+ Raises
+ ------
+ ValueError if the array cannot be persisted.
+ Various other errors from pickling if the array contains Python objects as
+ part of its dtype.
+ """
+ if version != (1, 0):
+ raise ValueError("we only support format version (1,0), not %s" % (version,))
+ fp.write(magic(*version))
+ write_array_header_1_0(fp, header_data_from_array_1_0(array))
+ if array.dtype.hasobject:
+ # We contain Python objects so we cannot write out the data directly.
+ # Instead, we will pickle it out with version 2 of the pickle protocol.
+ cPickle.dump(array, fp, protocol=2)
+ elif array.flags.f_contiguous and not array.flags.c_contiguous:
+ # Use a suboptimal, possibly memory-intensive, but correct way to handle
+ # Fortran-contiguous arrays.
+ fp.write(array.data)
+ else:
+ if isinstance(fp, file):
+ array.tofile(fp)
+ else:
+ # XXX: We could probably chunk this using something like
+ # arrayterator.
+ fp.write(array.tostring('C'))
+
+def read_array(fp):
+ """ Read an array from a file.
+
+ Parameters
+ ----------
+ fp : filelike object
+ If this is not a real file object, then this may take extra memory and
+ time.
+
+ Returns
+ -------
+ array : numpy.ndarray
+
+ Raises
+ ------
+ ValueError if the data is invalid.
+ """
+ version = read_magic(fp)
+ if version != (1, 0):
+ raise ValueError("only support version (1,0) of file format, not %r" % (version,))
+ shape, fortran_order, dtype = read_array_header_1_0(fp)
+ if len(shape) == 0:
+ count = 1
+ else:
+ count = numpy.multiply.reduce(shape)
+
+ # Now read the actual data.
+ if dtype.hasobject:
+ # The array contained Python objects. We need to unpickle the data.
+ array = cPickle.load(fp)
+ else:
+ if isinstance(fp, file):
+ # We can use the fast fromfile() function.
+ array = numpy.fromfile(fp, dtype=dtype, count=count)
+ else:
+ # This is not a real file. We have to read it the memory-intensive way.
+ # XXX: we can probably chunk this to avoid the memory hit.
+ data = fp.read(count * dtype.itemsize)
+ array = numpy.fromstring(data, dtype=dtype, count=count)
+
+ if fortran_order:
+ array.shape = shape[::-1]
+ array = array.transpose()
+ else:
+ array.shape = shape
+
+ return array
+
+
+def open_memmap(filename, mode='r+', dtype=None, shape=None,
+ fortran_order=False, version=(1,0)):
+ """ Open a .npy file as a memory-mapped array.
+
+ Parameters
+ ----------
+ filename : str
+ mode : str, optional
+ The mode to open the file with. In addition to the standard file modes,
+ 'c' is also accepted to mean "copy on write".
+ dtype : dtype, optional
+ shape : tuple of int, optional
+ fortran_order : bool, optional
+ If the mode is a "write" mode, then the file will be created using this
+ dtype, shape, and contiguity.
+ version : tuple of int (major, minor)
+ If the mode is a "write" mode, then this is the version of the file
+ format used to create the file.
+
+ Returns
+ -------
+ marray : numpy.memmap
+
+ Raises
+ ------
+ ValueError if the data or the mode is invalid.
+ IOError if the file is not found or cannot be opened correctly.
+ """
+ if 'w' in mode:
+ # We are creating the file, not reading it.
+ # Check if we ought to create the file.
+ if version != (1, 0):
+ raise ValueError("only support version (1,0) of file format, not %r" % (version,))
+ # Ensure that the given dtype is an authentic dtype object rather than
+ # just something that can be interpreted as a dtype object.
+ dtype = numpy.dtype(dtype)
+ if dtype.hasobject:
+ raise ValueError("the dtype includes Python objects; the array cannot be memory-mapped")
+ d = dict(
+ descr=dtype_to_descr(dtype),
+ fortran_order=fortran_order,
+ shape=shape,
+ )
+ # If we got here, then it should be safe to create the file.
+ fp = open(filename, mode+'b')
+ try:
+ fp.write(magic(*version))
+ write_array_header_1_0(fp, d)
+ offset = fp.tell()
+ finally:
+ fp.close()
+ else:
+ # Read the header of the file first.
+ fp = open(filename, 'rb')
+ try:
+ version = read_magic(fp)
+ if version != (1, 0):
+ raise ValueError("only support version (1,0) of file format, not %r" % (version,))
+ shape, fortran_order, dtype = read_array_header_1_0(fp)
+ if dtype.hasobject:
+ raise ValueError("the dtype includes Python objects; the array cannot be memory-mapped")
+ offset = fp.tell()
+ finally:
+ fp.close()
+
+ if fortran_order:
+ order = 'F'
+ else:
+ order = 'C'
+
+ # We need to change a write-only mode to a read-write mode since we've
+ # already written data to the file.
+ if mode == 'w+':
+ mode = 'r+'
+
+ marray = numpy.memmap(filename, dtype=dtype, shape=shape, order=order,
+ mode=mode, offset=offset)
+
+ return marray
+
+
+
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
new file mode 100644
index 000000000..b4e89c2fc
--- /dev/null
+++ b/numpy/lib/io.py
@@ -0,0 +1,360 @@
+
+__all__ = ['savetxt', 'loadtxt',
+ 'load', 'loads',
+ 'save', 'savez',
+ 'packbits', 'unpackbits',
+ 'DataSource']
+
+import numpy as np
+import format
+import zipfile
+import cStringIO
+import tempfile
+import os
+
+from cPickle import load as _cload, loads
+from _datasource import DataSource
+from _compiled_base import packbits, unpackbits
+
+_file = file
+
+class BagObj(object):
+ """A simple class that converts attribute lookups to
+ getitems on the class passed in.
+ """
+ def __init__(self, obj):
+ self._obj = obj
+ def __getattribute__(self, key):
+ try:
+ return object.__getattribute__(self, '_obj')[key]
+ except KeyError:
+ raise AttributeError, key
+
+class NpzFile(object):
+ """A dictionary-like object with lazy-loading of files in the zipped
+ archive provided on construction.
+
+ The arrays and file strings are lazily loaded on either
+ getitem access using obj['key'] or attribute lookup using obj.f.key
+
+ A list of all files (without .npy) extensions can be obtained
+ with .files and the ZipFile object itself using .zip
+ """
+ def __init__(self, fid):
+ _zip = zipfile.ZipFile(fid)
+ self._files = _zip.namelist()
+ self.files = []
+ for x in self._files:
+ if x.endswith('.npy'):
+ self.files.append(x[:-4])
+ else:
+ self.files.append(x)
+ self.zip = _zip
+ self.f = BagObj(self)
+
+ def __getitem__(self, key):
+ # FIXME: This seems like it will copy strings around
+ # more than is strictly necessary. The zipfile
+ # will read the string and then
+ # the format.read_array will copy the string
+ # to another place in memory.
+ # It would be better if the zipfile could read
+ # (or at least uncompress) the data
+ # directly into the array memory.
+ member = 0
+ if key in self._files:
+ member = 1
+ elif key in self.files:
+ member = 1
+ key += '.npy'
+ if member:
+ bytes = self.zip.read(key)
+ if bytes.startswith(format.MAGIC_PREFIX):
+ value = cStringIO.StringIO(bytes)
+ return format.read_array(value)
+ else:
+ return bytes
+ else:
+ raise KeyError, "%s is not a file in the archive" % key
+
+def load(file, memmap=False):
+ """Load a binary file.
+
+ Read a binary file (either a pickle, or a binary .npy/.npz file) and
+ return the result.
+
+ Parameters
+ ----------
+ file : file-like object or string
+ the file to read. It must support seek and read methods
+ memmap : bool
+ If true, then memory-map the .npy file or unzip the .npz file into
+ a temporary directory and memory-map each component
+ This has no effect for a pickle.
+
+ Returns
+ -------
+ result : array, tuple, dict, etc.
+ data stored in the file.
+ If file contains pickle data, then whatever is stored in the pickle is returned.
+ If the file is .npy file, then an array is returned.
+ If the file is .npz file, then a dictionary-like object is returned which has a
+ filename:array key:value pair for every file in the zip.
+ """
+ if isinstance(file, type("")):
+ fid = _file(file,"rb")
+ else:
+ fid = file
+
+ if memmap:
+ raise NotImplementedError
+
+ # Code to distinguish from NumPy binary files and pickles.
+ _ZIP_PREFIX = 'PK\x03\x04'
+ N = len(format.MAGIC_PREFIX)
+ magic = fid.read(N)
+ fid.seek(-N,1) # back-up
+ if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz)
+ return NpzFile(fid)
+ elif magic == format.MAGIC_PREFIX: # .npy file
+ return format.read_array(fid)
+ else: # Try a pickle
+ try:
+ return _cload(fid)
+ except:
+ raise IOError, \
+ "Failed to interpret file %s as a pickle" % repr(file)
+
+def save(file, arr):
+ """Save an array to a binary file (a string or file-like object).
+
+ If the file is a string, then if it does not have the .npy extension,
+ it is appended and a file open.
+
+ Data is saved to the open file in NumPy-array format
+
+ Examples
+ --------
+ import numpy as np
+ ...
+ np.save('myfile', a)
+ a = np.load('myfile.npy')
+ """
+ if isinstance(file, str):
+ if not file.endswith('.npy'):
+ file = file + '.npy'
+ fid = open(file, "wb")
+ else:
+ fid = file
+
+ arr = np.asanyarray(arr)
+ format.write_array(fid, arr)
+
+def savez(file, *args, **kwds):
+ """Save several arrays into an .npz file format which is a zipped-archive
+ of arrays
+
+ If keyword arguments are given, then filenames are taken from the keywords.
+ If arguments are passed in with no keywords, then stored file names are
+ arr_0, arr_1, etc.
+ """
+
+ if isinstance(file, str):
+ if not file.endswith('.npz'):
+ file = file + '.npz'
+
+ namedict = kwds
+ for i, val in enumerate(args):
+ key = 'arr_%d' % i
+ if key in namedict.keys():
+ raise ValueError, "Cannot use un-named variables and keyword %s" % key
+ namedict[key] = val
+
+ zip = zipfile.ZipFile(file, mode="w")
+
+ # Place to write temporary .npy files
+ # before storing them in the zip
+ direc = tempfile.gettempdir()
+ todel = []
+
+ for key, val in namedict.iteritems():
+ fname = key + '.npy'
+ filename = os.path.join(direc, fname)
+ todel.append(filename)
+ fid = open(filename,'wb')
+ format.write_array(fid, np.asanyarray(val))
+ fid.close()
+ zip.write(filename, arcname=fname)
+
+ zip.close()
+ for name in todel:
+ os.remove(name)
+
+# Adapted from matplotlib
+
+def _getconv(dtype):
+ typ = dtype.type
+ if issubclass(typ, np.bool_):
+ return lambda x: bool(int(x))
+ if issubclass(typ, np.integer):
+ return int
+ elif issubclass(typ, np.floating):
+ return float
+ elif issubclass(typ, np.complex):
+ return complex
+ else:
+ return str
+
+
+def _string_like(obj):
+ try: obj + ''
+ except (TypeError, ValueError): return 0
+ return 1
+
+def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
+ skiprows=0, usecols=None, unpack=False):
+ """
+ Load ASCII data from fname into an array and return the array.
+
+ The data must be regular, same number of values in every row
+
+ fname can be a filename or a file handle. Support for gzipped files is
+ automatic, if the filename ends in .gz
+
+ See scipy.io.loadmat to read and write matfiles.
+
+ Example usage:
+
+ X = loadtxt('test.dat') # data in two columns
+ t = X[:,0]
+ y = X[:,1]
+
+ Alternatively, you can do the same with "unpack"; see below
+
+ X = loadtxt('test.dat') # a matrix of data
+ x = loadtxt('test.dat') # a single column of data
+
+
+ dtype - the data-type of the resulting array. If this is a
+ record data-type, the the resulting array will be 1-d and each row will
+ be interpreted as an element of the array. The number of columns
+ used must match the number of fields in the data-type in this case.
+
+ comments - the character used to indicate the start of a comment
+ in the file
+
+ delimiter is a string-like character used to seperate values in the
+ file. If delimiter is unspecified or none, any whitespace string is
+ a separator.
+
+ converters, if not None, is a dictionary mapping column number to
+ a function that will convert that column to a float. Eg, if
+ column 0 is a date string: converters={0:datestr2num}
+
+ skiprows is the number of rows from the top to skip
+
+ usecols, if not None, is a sequence of integer column indexes to
+ extract where 0 is the first column, eg usecols=(1,4,5) to extract
+ just the 2nd, 5th and 6th columns
+
+ unpack, if True, will transpose the matrix allowing you to unpack
+ into named arguments on the left hand side
+
+ t,y = load('test.dat', unpack=True) # for two column data
+ x,y,z = load('somefile.dat', usecols=(3,5,7), unpack=True)
+
+ """
+
+ if _string_like(fname):
+ if fname.endswith('.gz'):
+ import gzip
+ fh = gzip.open(fname)
+ else:
+ fh = file(fname)
+ elif hasattr(fname, 'seek'):
+ fh = fname
+ else:
+ raise ValueError('fname must be a string or file handle')
+ X = []
+
+ dtype = np.dtype(dtype)
+ defconv = _getconv(dtype)
+ converterseq = None
+ if converters is None:
+ converters = {}
+ if dtype.names is not None:
+ converterseq = [_getconv(dtype.fields[name][0]) \
+ for name in dtype.names]
+
+ for i,line in enumerate(fh):
+ if i<skiprows: continue
+ line = line[:line.find(comments)].strip()
+ if not len(line): continue
+ vals = line.split(delimiter)
+ if converterseq is None:
+ converterseq = [converters.get(j,defconv) \
+ for j in xrange(len(vals))]
+ if usecols is not None:
+ row = [converterseq[j](vals[j]) for j in usecols]
+ else:
+ row = [converterseq[j](val) for j,val in enumerate(vals)]
+ if dtype.names is not None:
+ row = tuple(row)
+ X.append(row)
+
+ X = np.array(X, dtype)
+ r,c = X.shape
+ if r==1 or c==1:
+ X.shape = max([r,c]),
+ if unpack: return X.T
+ else: return X
+
+
+# adjust so that fmt can change across columns if desired.
+
+def savetxt(fname, X, fmt='%.18e',delimiter=' '):
+ """
+ Save the data in X to file fname using fmt string to convert the
+ data to strings
+
+ fname can be a filename or a file handle. If the filename ends in .gz,
+ the file is automatically saved in compressed gzip format. The load()
+ command understands gzipped files transparently.
+
+ Example usage:
+
+ save('test.out', X) # X is an array
+ save('test1.out', (x,y,z)) # x,y,z equal sized 1D arrays
+ save('test2.out', x) # x is 1D
+ save('test3.out', x, fmt='%1.4e') # use exponential notation
+
+ delimiter is used to separate the fields, eg delimiter ',' for
+ comma-separated values
+ """
+
+ if _string_like(fname):
+ if fname.endswith('.gz'):
+ import gzip
+ fh = gzip.open(fname,'wb')
+ else:
+ fh = file(fname,'w')
+ elif hasattr(fname, 'seek'):
+ fh = fname
+ else:
+ raise ValueError('fname must be a string or file handle')
+
+
+ X = np.asarray(X)
+ origShape = None
+ if len(X.shape)==1:
+ origShape = X.shape
+ X.shape = len(X), 1
+ for row in X:
+ fh.write(delimiter.join([fmt%val for val in row]) + '\n')
+
+ if origShape is not None:
+ X.shape = origShape
+
+
+
+
diff --git a/numpy/lib/src/_compiled_base.c b/numpy/lib/src/_compiled_base.c
index 42c0183e8..654e7d95b 100644
--- a/numpy/lib/src/_compiled_base.c
+++ b/numpy/lib/src/_compiled_base.c
@@ -9,6 +9,9 @@ static PyObject *ErrorObject;
goto fail;} \
}
+#define PYSETERROR(message) \
+{ PyErr_SetString(ErrorObject, message); goto fail; }
+
static intp
incr_slot_ (double x, double *bins, intp lbins)
{
@@ -526,6 +529,269 @@ arr_add_docstring(PyObject *dummy, PyObject *args)
return Py_None;
}
+
+static char packbits_doc[] =
+ "out = numpy.packbits(myarray, axis=None)\n\n"
+ " myarray : an integer type array whose elements should be packed to bits\n\n"
+ " This routine packs the elements of a binary-valued dataset into a\n"
+ " NumPy array of type uint8 ('B') whose bits correspond to\n"
+ " the logical (0 or nonzero) value of the input elements.\n"
+ " The dimension over-which bit-packing is done is given by axis.\n"
+ " The shape of the output has the same number of dimensions as the input\n"
+ " (unless axis is None, in which case the output is 1-d).\n"
+ "\n"
+ " Example:\n"
+ " >>> a = array([[[1,0,1],\n"
+ " ... [0,1,0]],\n"
+ " ... [[1,1,0],\n"
+ " ... [0,0,1]]])\n"
+ " >>> b = numpy.packbits(a,axis=-1)\n"
+ " >>> b\n"
+ " array([[[160],[64]],[[192],[32]]], dtype=uint8)\n\n"
+ " Note that 160 = 128 + 32\n"
+ " 192 = 128 + 64\n";
+
+static char unpackbits_doc[] =
+ "out = numpy.unpackbits(myarray, axis=None)\n\n"
+ " myarray - array of uint8 type where each element represents a bit-field\n"
+ " that should be unpacked into a boolean output array\n\n"
+ " The shape of the output array is either 1-d (if axis is None) or\n"
+ " the same shape as the input array with unpacking done along the\n"
+ " axis specified.";
+
+/* PACKBITS
+
+ This function packs binary (0 or 1) 1-bit per pixel arrays
+ into contiguous bytes.
+
+*/
+
+static void
+_packbits(
+ void *In,
+ int element_size, /* in bytes */
+ npy_intp in_N,
+ npy_intp in_stride,
+ void *Out,
+ npy_intp out_N,
+ npy_intp out_stride
+ )
+{
+ char build;
+ int i, index;
+ npy_intp out_Nm1;
+ int maxi, remain, nonzero, j;
+ char *outptr,*inptr;
+
+ outptr = Out; /* pointer to output buffer */
+ inptr = In; /* pointer to input buffer */
+
+ /* Loop through the elements of In */
+ /* Determine whether or not it is nonzero.
+ Yes: set correspdoning bit (and adjust build value)
+ No: move on
+ /* Every 8th value, set the value of build and increment the outptr */
+
+ remain = in_N % 8; /* uneven bits */
+ if (remain == 0) remain = 8;
+ out_Nm1 = out_N - 1;
+ for (index = 0; index < out_N; index++) {
+ build = 0;
+ maxi = (index != out_Nm1 ? 8 : remain);
+ for (i = 0; i < maxi ; i++) {
+ build <<= 1; /* shift bits left one bit */
+ nonzero = 0;
+ for (j = 0; j < element_size; j++) /* determine if this number is non-zero */
+ nonzero += (*(inptr++) != 0);
+ inptr += (in_stride - element_size); /* advance to next input */
+ build += (nonzero != 0); /* add to this bit if the input value is non-zero */
+ }
+ if (index == out_Nm1) build <<= (8-remain);
+ /* printf("Here: %d %d %d %d\n",build,slice,index,maxi);
+ */
+ *outptr = build;
+ outptr += out_stride;
+ }
+ return;
+}
+
+
+static void
+_unpackbits(
+ void *In,
+ int el_size, /* unused */
+ npy_intp in_N,
+ npy_intp in_stride,
+ void *Out,
+ npy_intp out_N,
+ npy_intp out_stride
+ )
+{
+ unsigned char mask;
+ int i,index;
+ char *inptr, *outptr;
+
+ /* Loop through the elements of out
+ */
+ outptr = Out;
+ inptr = In;
+ for (index = 0; index < in_N; index++) {
+ mask = 128;
+ for (i = 0; i < 8 ; i++) {
+ *outptr = ((mask & (unsigned char)(*inptr)) != 0);
+ outptr += out_stride;
+ mask >>= 1;
+ }
+ inptr += in_stride;
+ }
+ return;
+}
+
+static PyObject *
+pack_or_unpack_bits(PyObject *input, int axis, int unpack)
+{
+ PyArrayObject *inp;
+ PyObject *new=NULL;
+ PyObject *out=NULL;
+ npy_intp outdims[MAX_DIMS];
+ int i;
+ void (*thefunc)(void *, int, npy_intp, npy_intp, void *, npy_intp, npy_intp);
+ PyArrayIterObject *it, *ot;
+
+ inp = (PyArrayObject *)PyArray_FROM_O(input);
+
+ if (inp == NULL) return NULL;
+
+ if (unpack) {
+ if (PyArray_TYPE(inp) != NPY_UBYTE)
+ PYSETERROR("Expecting an input array of unsigned byte data type");
+ }
+ else {
+ if (!PyArray_ISINTEGER(inp))
+ PYSETERROR("Expecting an input array of integer data type");
+ }
+
+ new = PyArray_CheckAxis(inp, &axis, 0);
+ Py_DECREF(inp);
+ if (new == NULL) return NULL;
+
+ /* Handle zero-dim array separately */
+ if (PyArray_SIZE(new) == 0) {
+ return PyArray_Copy((PyArrayObject *)new);
+ }
+
+ if (PyArray_NDIM(new) == 0) {
+ if (unpack) {
+ /* Handle 0-d array by converting it to a 1-d array */
+ PyObject *temp;
+ PyArray_Dims newdim = {NULL, 1};
+ npy_intp shape=1;
+ newdim.ptr = &shape;
+ temp = PyArray_Newshape((PyArrayObject *)new, &newdim, NPY_CORDER);
+ if (temp == NULL) goto fail;
+ Py_DECREF(new);
+ new = temp;
+ }
+ else {
+ ubyte *optr, *iptr;
+ out = PyArray_New(new->ob_type, 0, NULL, NPY_UBYTE,
+ NULL, NULL, 0, 0, NULL);
+ if (out == NULL) goto fail;
+ optr = PyArray_DATA(out);
+ iptr = PyArray_DATA(new);
+ *optr = 0;
+ for (i=0; i<PyArray_ITEMSIZE(new); i++) {
+ if (*iptr != 0) {
+ *optr = 1;
+ break;
+ }
+ iptr++;
+ }
+ goto finish;
+ }
+ }
+
+
+ /* Setup output shape */
+ for (i=0; i<PyArray_NDIM(new); i++) {
+ outdims[i] = PyArray_DIM(new, i);
+ }
+
+ if (unpack) {
+ /* Multiply axis dimension by 8 */
+ outdims[axis] <<= 3;
+ thefunc = _unpackbits;
+ }
+ else {
+ /* Divide axis dimension by 8 */
+ /* 8 -> 1, 9 -> 2, 16 -> 2, 17 -> 3 etc.. */
+ outdims[axis] = ((outdims[axis] - 1) >> 3) + 1;
+ thefunc = _packbits;
+ }
+
+ /* Create output array */
+ out = PyArray_New(new->ob_type, PyArray_NDIM(new), outdims, PyArray_UBYTE,
+ NULL, NULL, 0, PyArray_ISFORTRAN(new), NULL);
+ if (out == NULL) goto fail;
+
+ /* Setup iterators to iterate over all but given axis */
+ it = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)new, &axis);
+ ot = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)out, &axis);
+ if (it == NULL || ot == NULL) {
+ Py_XDECREF(it);
+ Py_XDECREF(ot);
+ goto fail;
+ }
+
+ while(PyArray_ITER_NOTDONE(it)) {
+ thefunc(PyArray_ITER_DATA(it), PyArray_ITEMSIZE(new),
+ PyArray_DIM(new, axis), PyArray_STRIDE(new, axis),
+ PyArray_ITER_DATA(ot), PyArray_DIM(out, axis),
+ PyArray_STRIDE(out, axis));
+ PyArray_ITER_NEXT(it);
+ PyArray_ITER_NEXT(ot);
+ }
+ Py_DECREF(it);
+ Py_DECREF(ot);
+
+ finish:
+ Py_DECREF(new);
+ return out;
+
+ fail:
+ Py_XDECREF(new);
+ Py_XDECREF(out);
+ return NULL;
+}
+
+
+
+static PyObject *
+io_pack(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ PyObject *obj;
+ int axis=NPY_MAXDIMS;
+ static char *kwlist[] = {"in", "axis", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&" , kwlist,
+ &obj, PyArray_AxisConverter, &axis))
+ return NULL;
+ return pack_or_unpack_bits(obj, axis, 0);
+}
+
+static PyObject *
+io_unpack(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ PyObject *obj;
+ int axis=NPY_MAXDIMS;
+ static char *kwlist[] = {"in", "axis", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&" , kwlist,
+ &obj, PyArray_AxisConverter, &axis))
+ return NULL;
+ return pack_or_unpack_bits(obj, axis, 1);
+}
+
static struct PyMethodDef methods[] = {
{"_insert", (PyCFunction)arr_insert, METH_VARARGS | METH_KEYWORDS,
arr_insert__doc__},
@@ -537,6 +803,10 @@ static struct PyMethodDef methods[] = {
NULL},
{"add_docstring", (PyCFunction)arr_add_docstring, METH_VARARGS,
NULL},
+ {"packbits", (PyCFunction)io_pack, METH_VARARGS | METH_KEYWORDS,
+ packbits_doc},
+ {"unpackbits", (PyCFunction)io_unpack, METH_VARARGS | METH_KEYWORDS,
+ unpackbits_doc},
{NULL, NULL} /* sentinel */
};
@@ -578,7 +848,7 @@ PyMODINIT_FUNC init_compiled_base(void) {
PyDict_SetItemString(d, "__version__", s);
Py_DECREF(s);
- ErrorObject = PyString_FromString("numpy.lib._compiled_base.error");
+ ErrorObject = PyString_FromString("numpy.lib.error");
PyDict_SetItemString(d, "error", ErrorObject);
Py_DECREF(ErrorObject);
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
new file mode 100644
index 000000000..b238950a3
--- /dev/null
+++ b/numpy/lib/tests/test_format.py
@@ -0,0 +1,512 @@
+r''' Test the .npy file format.
+
+Set up:
+
+ >>> import numpy as np
+ >>> from cStringIO import StringIO
+ >>> from numpy.lib import format
+ >>>
+ >>> scalars = [
+ ... np.uint8,
+ ... np.int8,
+ ... np.uint16,
+ ... np.int16,
+ ... np.uint32,
+ ... np.int32,
+ ... np.uint64,
+ ... np.int64,
+ ... np.float32,
+ ... np.float64,
+ ... np.complex64,
+ ... np.complex128,
+ ... object,
+ ... ]
+ >>>
+ >>> basic_arrays = []
+ >>>
+ >>> for scalar in scalars:
+ ... for endian in '<>':
+ ... dtype = np.dtype(scalar).newbyteorder(endian)
+ ... basic = np.arange(15).astype(dtype)
+ ... basic_arrays.extend([
+ ... np.array([], dtype=dtype),
+ ... np.array(10, dtype=dtype),
+ ... basic,
+ ... basic.reshape((3,5)),
+ ... basic.reshape((3,5)).T,
+ ... basic.reshape((3,5))[::-1,::2],
+ ... ])
+ ...
+ >>>
+ >>> Pdescr = [
+ ... ('x', 'i4', (2,)),
+ ... ('y', 'f8', (2, 2)),
+ ... ('z', 'u1')]
+ >>>
+ >>>
+ >>> PbufferT = [
+ ... ([3,2], [[6.,4.],[6.,4.]], 8),
+ ... ([4,3], [[7.,5.],[7.,5.]], 9),
+ ... ]
+ >>>
+ >>>
+ >>> Ndescr = [
+ ... ('x', 'i4', (2,)),
+ ... ('Info', [
+ ... ('value', 'c16'),
+ ... ('y2', 'f8'),
+ ... ('Info2', [
+ ... ('name', 'S2'),
+ ... ('value', 'c16', (2,)),
+ ... ('y3', 'f8', (2,)),
+ ... ('z3', 'u4', (2,))]),
+ ... ('name', 'S2'),
+ ... ('z2', 'b1')]),
+ ... ('color', 'S2'),
+ ... ('info', [
+ ... ('Name', 'U8'),
+ ... ('Value', 'c16')]),
+ ... ('y', 'f8', (2, 2)),
+ ... ('z', 'u1')]
+ >>>
+ >>>
+ >>> NbufferT = [
+ ... ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8),
+ ... ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9),
+ ... ]
+ >>>
+ >>>
+ >>> record_arrays = [
+ ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')),
+ ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')),
+ ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')),
+ ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')),
+ ... ]
+
+Test the magic string writing.
+
+ >>> format.magic(1, 0)
+ '\x93NUMPY\x01\x00'
+ >>> format.magic(0, 0)
+ '\x93NUMPY\x00\x00'
+ >>> format.magic(255, 255)
+ '\x93NUMPY\xff\xff'
+ >>> format.magic(2, 5)
+ '\x93NUMPY\x02\x05'
+
+Test the magic string reading.
+
+ >>> format.read_magic(StringIO(format.magic(1, 0)))
+ (1, 0)
+ >>> format.read_magic(StringIO(format.magic(0, 0)))
+ (0, 0)
+ >>> format.read_magic(StringIO(format.magic(255, 255)))
+ (255, 255)
+ >>> format.read_magic(StringIO(format.magic(2, 5)))
+ (2, 5)
+
+Test the header writing.
+
+ >>> for arr in basic_arrays + record_arrays:
+ ... f = StringIO()
+ ... format.write_array_header_1_0(f, arr)
+ ... print repr(f.getvalue())
+ ...
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<u2', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>u2', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<i2', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>i2', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<u4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>u4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<i4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>i4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<u8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>u8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<i8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>i8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<f4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>f4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<f8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>f8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<c8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>c8', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '<c16', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '>c16', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (0,)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': ()} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (15,)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 5)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': True, 'shape': (5, 3)} \n"
+ "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 3)} \n"
+ "v\x00{'descr': [('x', '<i4', (2,)), ('y', '<f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
+ "\x16\x02{'descr': [('x', '<i4', (2,)),\n ('Info',\n [('value', '<c16'),\n ('y2', '<f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '<c16', (2,)),\n ('y3', '<f8', (2,)),\n ('z3', '<u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '<U8'), ('Value', '<c16')]),\n ('y', '<f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
+ "v\x00{'descr': [('x', '>i4', (2,)), ('y', '>f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
+ "\x16\x02{'descr': [('x', '>i4', (2,)),\n ('Info',\n [('value', '>c16'),\n ('y2', '>f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '>c16', (2,)),\n ('y3', '>f8', (2,)),\n ('z3', '>u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '>U8'), ('Value', '>c16')]),\n ('y', '>f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
+'''
+
+
+from cStringIO import StringIO
+import os
+import shutil
+import tempfile
+
+from nose.tools import raises
+
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from numpy.lib import format
+
+
+tempdir = None
+
+# Module-level setup.
+def setup_module():
+ global tempdir
+ tempdir = tempfile.mkdtemp()
+
+def teardown_module():
+ global tempdir
+ if tempdir is not None and os.path.isdir(tempdir):
+ shutil.rmtree(tempdir)
+ tempdir = None
+
+
+# Generate some basic arrays to test with.
+scalars = [
+ np.uint8,
+ np.int8,
+ np.uint16,
+ np.int16,
+ np.uint32,
+ np.int32,
+ np.uint64,
+ np.int64,
+ np.float32,
+ np.float64,
+ np.complex64,
+ np.complex128,
+ object,
+]
+basic_arrays = []
+for scalar in scalars:
+ for endian in '<>':
+ dtype = np.dtype(scalar).newbyteorder(endian)
+ basic = np.arange(15).astype(dtype)
+ basic_arrays.extend([
+ # Empty
+ np.array([], dtype=dtype),
+ # Rank-0
+ np.array(10, dtype=dtype),
+ # 1-D
+ basic,
+ # 2-D C-contiguous
+ basic.reshape((3,5)),
+ # 2-D F-contiguous
+ basic.reshape((3,5)).T,
+ # 2-D non-contiguous
+ basic.reshape((3,5))[::-1,::2],
+ ])
+
+# More complicated record arrays.
+# This is the structure of the table used for plain objects:
+#
+# +-+-+-+
+# |x|y|z|
+# +-+-+-+
+
+# Structure of a plain array description:
+Pdescr = [
+ ('x', 'i4', (2,)),
+ ('y', 'f8', (2, 2)),
+ ('z', 'u1')]
+
+# A plain list of tuples with values for testing:
+PbufferT = [
+ # x y z
+ ([3,2], [[6.,4.],[6.,4.]], 8),
+ ([4,3], [[7.,5.],[7.,5.]], 9),
+ ]
+
+
+# This is the structure of the table used for nested objects (DON'T PANIC!):
+#
+# +-+---------------------------------+-----+----------+-+-+
+# |x|Info |color|info |y|z|
+# | +-----+--+----------------+----+--+ +----+-----+ | |
+# | |value|y2|Info2 |name|z2| |Name|Value| | |
+# | | | +----+-----+--+--+ | | | | | | |
+# | | | |name|value|y3|z3| | | | | | | |
+# +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+
+#
+
+# The corresponding nested array description:
+Ndescr = [
+ ('x', 'i4', (2,)),
+ ('Info', [
+ ('value', 'c16'),
+ ('y2', 'f8'),
+ ('Info2', [
+ ('name', 'S2'),
+ ('value', 'c16', (2,)),
+ ('y3', 'f8', (2,)),
+ ('z3', 'u4', (2,))]),
+ ('name', 'S2'),
+ ('z2', 'b1')]),
+ ('color', 'S2'),
+ ('info', [
+ ('Name', 'U8'),
+ ('Value', 'c16')]),
+ ('y', 'f8', (2, 2)),
+ ('z', 'u1')]
+
+NbufferT = [
+ # x Info color info y z
+ # value y2 Info2 name z2 Name Value
+ # name value y3 z3
+ ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8),
+ ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9),
+ ]
+
+record_arrays = [
+ np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')),
+ np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')),
+ np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')),
+ np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')),
+]
+
+def roundtrip(arr):
+ f = StringIO()
+ format.write_array(f, arr)
+ f2 = StringIO(f.getvalue())
+ arr2 = format.read_array(f2)
+ return arr2
+
+def assert_equal(o1, o2):
+ assert o1 == o2
+
+
+def test_roundtrip():
+ for arr in basic_arrays + record_arrays:
+ arr2 = roundtrip(arr)
+ yield assert_array_equal, arr, arr2
+
+def test_memmap_roundtrip():
+ for arr in basic_arrays + record_arrays:
+ if arr.dtype.hasobject:
+ # Skip these since they can't be mmap'ed.
+ continue
+ # Write it out normally and through mmap.
+ nfn = os.path.join(tempdir, 'normal.npy')
+ mfn = os.path.join(tempdir, 'memmap.npy')
+ fp = open(nfn, 'wb')
+ try:
+ format.write_array(fp, arr)
+ finally:
+ fp.close()
+
+ fortran_order = (arr.flags.f_contiguous and not arr.flags.c_contiguous)
+ ma = format.open_memmap(mfn, mode='w+', dtype=arr.dtype,
+ shape=arr.shape, fortran_order=fortran_order)
+ ma[...] = arr
+ del ma
+
+ # Check that both of these files' contents are the same.
+ fp = open(nfn, 'rb')
+ normal_bytes = fp.read()
+ fp.close()
+ fp = open(mfn, 'rb')
+ memmap_bytes = fp.read()
+ fp.close()
+ yield assert_equal, normal_bytes, memmap_bytes
+
+ # Check that reading the file using memmap works.
+ ma = format.open_memmap(nfn, mode='r')
+ yield assert_array_equal, ma, arr
+ del ma
+
+
+def test_write_version_1_0():
+ f = StringIO()
+ arr = np.arange(1)
+ # These should pass.
+ format.write_array(f, arr, version=(1, 0))
+ format.write_array(f, arr)
+
+ # These should all fail.
+ bad_versions = [
+ (1, 1),
+ (0, 0),
+ (0, 1),
+ (2, 0),
+ (2, 2),
+ (255, 255),
+ ]
+ for version in bad_versions:
+ try:
+ format.write_array(f, arr, version=version)
+ except ValueError:
+ pass
+ else:
+ raise AssertionError("we should have raised a ValueError for the bad version %r" % (version,))
+
+
+bad_version_magic = [
+ '\x93NUMPY\x01\x01',
+ '\x93NUMPY\x00\x00',
+ '\x93NUMPY\x00\x01',
+ '\x93NUMPY\x02\x00',
+ '\x93NUMPY\x02\x02',
+ '\x93NUMPY\xff\xff',
+]
+malformed_magic = [
+ '\x92NUMPY\x01\x00',
+ '\x00NUMPY\x01\x00',
+ '\x93numpy\x01\x00',
+ '\x93MATLB\x01\x00',
+ '\x93NUMPY\x01',
+ '\x93NUMPY',
+ '',
+]
+
+def test_read_magic_bad_magic():
+ for magic in malformed_magic:
+ f = StringIO(magic)
+ yield raises(ValueError)(format.read_magic), f
+
+def test_read_version_1_0_bad_magic():
+ for magic in bad_version_magic + malformed_magic:
+ f = StringIO(magic)
+ yield raises(ValueError)(format.read_array), f
+
+
diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py
index 95dd4f581..048ffafc0 100644
--- a/numpy/lib/utils.py
+++ b/numpy/lib/utils.py
@@ -1,3 +1,4 @@
+import compiler
import os
import sys
import inspect
@@ -7,9 +8,10 @@ from numpy.core.multiarray import dtype as _dtype
from numpy.core import product, ndarray
__all__ = ['issubclass_', 'get_numpy_include', 'issubsctype',
- 'issubdtype', 'deprecate', 'get_numarray_include',
+ 'issubdtype', 'deprecate', 'deprecate_with_doc',
+ 'get_numarray_include',
'get_include', 'info', 'source', 'who',
- 'byte_bounds', 'may_share_memory']
+ 'byte_bounds', 'may_share_memory', 'safe_eval']
def issubclass_(arg1, arg2):
try:
@@ -82,15 +84,32 @@ else:
func.__name__ = name
return func
-def deprecate(func, oldname, newname):
+def deprecate(func, oldname=None, newname=None):
+ """Deprecate old functions.
+ Issues a DeprecationWarning, adds warning to oldname's docstring,
+ rebinds oldname.__name__ and returns new function object.
+
+ Example:
+ oldfunc = deprecate(newfunc, 'oldfunc', 'newfunc')
+
+ """
+
import warnings
+ if oldname is None:
+ oldname = func.func_name
+ if newname is None:
+ str1 = "%s is deprecated" % (oldname,)
+ depdoc = "%s is DEPRECATED!" % (oldname,)
+ else:
+ str1 = "%s is deprecated, use %s" % (oldname, newname),
+ depdoc = '%s is DEPRECATED! -- use %s instead' % (oldname, newname,)
+
def newfunc(*args,**kwds):
- warnings.warn("%s is deprecated, use %s" % (oldname, newname),
- DeprecationWarning)
+ warnings.warn(str1, DeprecationWarning)
return func(*args, **kwds)
+
newfunc = _set_function_name(newfunc, oldname)
doc = func.__doc__
- depdoc = '%s is DEPRECATED in numpy: use %s instead' % (oldname, newname,)
if doc is None:
doc = depdoc
else:
@@ -104,6 +123,24 @@ def deprecate(func, oldname, newname):
newfunc.__dict__.update(d)
return newfunc
+def deprecate_with_doc(somestr):
+ """Decorator to deprecate functions and provide detailed documentation
+ with 'somestr' that is added to the functions docstring.
+
+ Example:
+ depmsg = 'function numpy.lib.foo has been merged into numpy.lib.io.foobar'
+ @deprecate_with_doc(depmsg)
+ def foo():
+ pass
+
+ """
+
+ def _decorator(func):
+ newfunc = deprecate(func)
+ newfunc.__doc__ += "\n" + somestr
+ return newfunc
+ return _decorator
+
get_numpy_include = deprecate(get_include, 'get_numpy_include', 'get_include')
@@ -430,3 +467,113 @@ def source(object, output=sys.stdout):
print >> output, inspect.getsource(object)
except:
print >> output, "Not available for this object."
+
+#-----------------------------------------------------------------------------
+
+# The following SafeEval class and company are adapted from Michael Spencer's
+# ASPN Python Cookbook recipe:
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469
+# Accordingly it is mostly Copyright 2006 by Michael Spencer.
+# The recipe, like most of the other ASPN Python Cookbook recipes was made
+# available under the Python license.
+# http://www.python.org/license
+
+# It has been modified to:
+# * handle unary -/+
+# * support True/False/None
+# * raise SyntaxError instead of a custom exception.
+
+class SafeEval(object):
+
+ def visit(self, node, **kw):
+ cls = node.__class__
+ meth = getattr(self,'visit'+cls.__name__,self.default)
+ return meth(node, **kw)
+
+ def default(self, node, **kw):
+ raise SyntaxError("Unsupported source construct: %s" % node.__class__)
+
+ def visitExpression(self, node, **kw):
+ for child in node.getChildNodes():
+ return self.visit(child, **kw)
+
+ def visitConst(self, node, **kw):
+ return node.value
+
+ def visitDict(self, node,**kw):
+ return dict([(self.visit(k),self.visit(v)) for k,v in node.items])
+
+ def visitTuple(self, node, **kw):
+ return tuple([self.visit(i) for i in node.nodes])
+
+ def visitList(self, node, **kw):
+ return [self.visit(i) for i in node.nodes]
+
+ def visitUnaryAdd(self, node, **kw):
+ return +self.visit(node.getChildNodes()[0])
+
+ def visitUnarySub(self, node, **kw):
+ return -self.visit(node.getChildNodes()[0])
+
+ def visitName(self, node, **kw):
+ if node.name == 'False':
+ return False
+ elif node.name == 'True':
+ return True
+ elif node.name == 'None':
+ return None
+ else:
+ raise SyntaxError("Unknown name: %s" % node.name)
+
+def safe_eval(source):
+ """ Evaluate a string containing a Python literal expression without
+ allowing the execution of arbitrary non-literal code.
+
+ Parameters
+ ----------
+ source : str
+
+ Returns
+ -------
+ obj : object
+
+ Raises
+ ------
+ SyntaxError if the code is invalid Python expression syntax or if it
+ contains non-literal code.
+
+ Examples
+ --------
+ >>> from numpy.lib.utils import safe_eval
+ >>> safe_eval('1')
+ 1
+ >>> safe_eval('[1, 2, 3]')
+ [1, 2, 3]
+ >>> safe_eval('{"foo": ("bar", 10.0)}')
+ {'foo': ('bar', 10.0)}
+ >>> safe_eval('import os')
+ Traceback (most recent call last):
+ ...
+ SyntaxError: invalid syntax
+ >>> safe_eval('open("/home/user/.ssh/id_dsa").read()')
+ Traceback (most recent call last):
+ ...
+ SyntaxError: Unsupported source construct: compiler.ast.CallFunc
+ >>> safe_eval('dict')
+ Traceback (most recent call last):
+ ...
+ SyntaxError: Unknown name: dict
+ """
+ walker = SafeEval()
+ try:
+ ast = compiler.parse(source, "eval")
+ except SyntaxError, err:
+ raise
+ try:
+ return walker.visit(ast)
+ except SyntaxError, err:
+ raise
+
+#-----------------------------------------------------------------------------
+
+