diff options
-rw-r--r-- | numpy/lib/__init__.py | 2 | ||||
-rw-r--r-- | numpy/lib/_datasource.py | 443 | ||||
-rw-r--r-- | numpy/lib/format.py | 412 | ||||
-rw-r--r-- | numpy/lib/io.py | 360 | ||||
-rw-r--r-- | numpy/lib/src/_compiled_base.c | 272 | ||||
-rw-r--r-- | numpy/lib/tests/test_format.py | 512 | ||||
-rw-r--r-- | numpy/lib/utils.py | 159 |
7 files changed, 2153 insertions, 7 deletions
diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py index e17a0a726..6667dbc07 100644 --- a/numpy/lib/__init__.py +++ b/numpy/lib/__init__.py @@ -15,6 +15,7 @@ from getlimits import * #import convertcode from utils import * from arraysetops import * +from io import * import math __all__ = ['emath','math'] @@ -29,6 +30,7 @@ __all__ += machar.__all__ __all__ += getlimits.__all__ __all__ += utils.__all__ __all__ += arraysetops.__all__ +__all__ += io.__all__ def test(level=1, verbosity=1): from numpy.testing import NumpyTest diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py new file mode 100644 index 000000000..06aae85d8 --- /dev/null +++ b/numpy/lib/_datasource.py @@ -0,0 +1,443 @@ +"""A file interface for handling local and remote data files. +The goal of datasource is to abstract some of the file system operations when +dealing with data files so the researcher doesn't have to know all the +low-level details. Through datasource, a researcher can obtain and use a +file with one function call, regardless of location of the file. + +DataSource is meant to augment standard python libraries, not replace them. +It should work seemlessly with standard file IO operations and the os module. + +DataSource files can originate locally or remotely: + +- local files : '/home/guido/src/local/data.txt' +- URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt' + +DataSource files can also be compressed or uncompressed. Currently only gzip +and bz2 are supported. + +Example: + + >>> # Create a DataSource, use os.curdir (default) for local storage. + >>> ds = datasource.DataSource() + >>> + >>> # Open a remote file. + >>> # DataSource downloads the file, stores it locally in: + >>> # './www.google.com/index.html' + >>> # opens the file and returns a file object. + >>> fp = ds.open('http://www.google.com/index.html') + >>> + >>> # Use the file as you normally would + >>> fp.read() + >>> fp.close() + +""" + +__docformat__ = "restructuredtext en" + +import bz2 +import gzip +import os +import tempfile +from shutil import rmtree +from urllib2 import urlopen, URLError +from urlparse import urlparse + + +# TODO: .zip support, .tar support? +_file_openers = {".gz":gzip.open, ".bz2":bz2.BZ2File, None:file} + + +def open(path, mode='r', destpath=os.curdir): + """Open ``path`` with ``mode`` and return the file object. + + If ``path`` is an URL, it will be downloaded, stored in the DataSource + directory and opened from there. + + *Parameters*: + + path : {string} + + mode : {string}, optional + + destpath : {string}, optional + Destination directory where URLs will be downloaded and stored. + + *Returns*: + + file object + + """ + + ds = DataSource(destpath) + return ds.open(path, mode) + + +class DataSource (object): + """A generic data source file (file, http, ftp, ...). + + DataSources could be local files or remote files/URLs. The files may + also be compressed or uncompressed. DataSource hides some of the low-level + details of downloading the file, allowing you to simply pass in a valid + file path (or URL) and obtain a file object. + + *Methods*: + + - exists : test if the file exists locally or remotely + - abspath : get absolute path of the file in the DataSource directory + - open : open the file + + *Example URL DataSource*:: + + # Initialize DataSource with a local directory, default is os.curdir. + ds = DataSource('/home/guido') + + # Open remote file. + # File will be downloaded and opened from here: + # /home/guido/site/xyz.txt + ds.open('http://fake.xyz.web/site/xyz.txt') + + *Example using DataSource for temporary files*:: + + # Initialize DataSource with 'None' for the local directory. + ds = DataSource(None) + + # Open local file. + # Opened file exists in a temporary directory like: + # /tmp/tmpUnhcvM/foobar.txt + # Temporary directories are deleted when the DataSource is deleted. + ds.open('/home/guido/foobar.txt') + + *Notes*: + BUG : URLs require a scheme string ('http://') to be used. + www.google.com will fail. + + >>> repos.exists('www.google.com/index.html') + False + + >>> repos.exists('http://www.google.com/index.html') + True + + """ + + def __init__(self, destpath=os.curdir): + """Create a DataSource with a local path at destpath.""" + if destpath: + self._destpath = os.path.abspath(destpath) + self._istmpdest = False + else: + self._destpath = tempfile.mkdtemp() + self._istmpdest = True + + def __del__(self): + # Remove temp directories + if self._istmpdest: + rmtree(self._destpath) + + def _iszip(self, filename): + """Test if the filename is a zip file by looking at the file extension. + """ + fname, ext = os.path.splitext(filename) + return ext in _file_openers.keys() + + def _iswritemode(self, mode): + """Test if the given mode will open a file for writing.""" + + # Currently only used to test the bz2 files. + _writemodes = ("w", "+") + for c in mode: + if c in _writemodes: + return True + return False + + def _splitzipext(self, filename): + """Split zip extension from filename and return filename. + + *Returns*: + base, zip_ext : {tuple} + + """ + + if self._iszip(filename): + return os.path.splitext(filename) + else: + return filename, None + + def _possible_names(self, filename): + """Return a tuple containing compressed filename variations.""" + names = [filename] + if not self._iszip(filename): + for zipext in _file_openers.keys(): + if zipext: + names.append(filename+zipext) + return names + + def _isurl(self, path): + """Test if path is a net location. Tests the scheme and netloc.""" + + # BUG : URLs require a scheme string ('http://') to be used. + # www.google.com will fail. + # Should we prepend the scheme for those that don't have it and + # test that also? Similar to the way we append .gz and test for + # for compressed versions of files. + + scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path) + return bool(scheme and netloc) + + def _cache(self, path): + """Cache the file specified by path. + + Creates a copy of the file in the datasource cache. + + """ + + upath = self.abspath(path) + + # ensure directory exists + if not os.path.exists(os.path.dirname(upath)): + os.makedirs(os.path.dirname(upath)) + + # TODO: Doesn't handle compressed files! + if self._isurl(path): + try: + openedurl = urlopen(path) + file(upath, 'w').write(openedurl.read()) + except URLError: + raise URLError("URL not found: ", path) + else: + try: + # TODO: Why not just copy the file with shutils.copyfile? + fp = file(path, 'r') + file(upath, 'w').write(fp.read()) + except IOError: + raise IOError("File not found: ", path) + return upath + + def _findfile(self, path): + """Searches for ``path`` and returns full path if found. + + If path is an URL, _findfile will cache a local copy and return + the path to the cached file. + If path is a local file, _findfile will return a path to that local + file. + + The search will include possible compressed versions of the file and + return the first occurence found. + + """ + + # Build list of possible local file paths + if not self._isurl(path): + # Valid local paths + filelist = self._possible_names(path) + # Paths in self._destpath + filelist += self._possible_names(self.abspath(path)) + else: + # Cached URLs in self._destpath + filelist = self._possible_names(self.abspath(path)) + # Remote URLs + filelist = filelist + self._possible_names(path) + + for name in filelist: + if self.exists(name): + if self._isurl(name): + name = self._cache(name) + return name + return None + + def abspath(self, path): + """Return absolute path of ``path`` in the DataSource directory. + + If ``path`` is an URL, the ``abspath`` will be either the location + the file exists locally or the location it would exist when opened + using the ``open`` method. + + The functionality is idential to os.path.abspath. + + *Parameters*: + + path : {string} + Can be a local file or a remote URL. + + *Returns*: + + Complete path, rooted in the DataSource destination directory. + + *See Also*: + + `open` : Method that downloads and opens files. + + """ + + # TODO: This should be more robust. Handles case where path includes + # the destpath, but not other sub-paths. Failing case: + # path = /home/guido/datafile.txt + # destpath = /home/alex/ + # upath = self.abspath(path) + # upath == '/home/alex/home/guido/datafile.txt' + + # handle case where path includes self._destpath + splitpath = path.split(self._destpath, 2) + if len(splitpath) > 1: + path = splitpath[1] + scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path) + return os.path.join(self._destpath, netloc, upath.strip(os.sep)) + + def exists(self, path): + """Test if ``path`` exists. + + Test if ``path`` exists as (and in this order): + + - a local file. + - a remote URL that have been downloaded and stored locally in the + DataSource directory. + - a remote URL that has not been downloaded, but is valid and + accessible. + + *Parameters*: + + path : {string} + Can be a local file or a remote URL. + + *Returns*: + + boolean + + *See Also*: + + `abspath` + + *Notes* + + When ``path`` is an URL, ``exist`` will return True if it's either + stored locally in the DataSource directory, or is a valid remote + URL. DataSource does not discriminate between to two, the file + is accessible if it exists in either location. + + """ + + # Test local path + if os.path.exists(path): + return True + + # Test cached url + upath = self.abspath(path) + if os.path.exists(upath): + return True + + # Test remote url + if self._isurl(path): + try: + netfile = urlopen(path) + del(netfile) + return True + except URLError: + return False + return False + + def open(self, path, mode='r'): + """Open ``path`` with ``mode`` and return the file object. + + If ``path`` is an URL, it will be downloaded, stored in the DataSource + directory and opened from there. + + *Parameters*: + + path : {string} + + mode : {string}, optional + + + *Returns*: + + file object + + """ + + # TODO: There is no support for opening a file for writing which + # doesn't exist yet (creating a file). Should there be? + + # TODO: Add a ``subdir`` parameter for specifying the subdirectory + # used to store URLs in self._destpath. + + if self._isurl(path) and self._iswritemode(mode): + raise ValueError("URLs are not writeable") + + # NOTE: _findfile will fail on a new file opened for writing. + found = self._findfile(path) + if found: + _fname, ext = self._splitzipext(found) + if ext == 'bz2': + mode.replace("+", "") + return _file_openers[ext](found, mode=mode) + else: + raise IOError("%s not found." % path) + + +class Repository (DataSource): + """A data Repository where multiple DataSource's share a base URL/directory. + + Repository extends DataSource by prepending a base URL (or directory) to + all the files it handles. Use a Repository when you will be working with + multiple files from one base URL. Initialize the Respository with the + base URL, then refer to each file by it's filename only. + + *Methods*: + + - exists : test if the file exists locally or remotely + - abspath : get absolute path of the file in the DataSource directory + - open : open the file + + *Toy example*:: + + # Analyze all files in the repository. + repos = Repository('/home/user/data/dir/') + for filename in filelist: + fp = repos.open(filename) + fp.analyze() + fp.close() + + # Similarly you could use a URL for a repository. + repos = Repository('http://www.xyz.edu/data') + + """ + + def __init__(self, baseurl, destpath=os.curdir): + """Create a Repository with a shared url or directory of baseurl.""" + DataSource.__init__(self, destpath=destpath) + self._baseurl = baseurl + + def __del__(self): + DataSource.__del__(self) + + def _fullpath(self, path): + """Return complete path for path. Prepends baseurl if necessary.""" + splitpath = path.split(self._baseurl, 2) + if len(splitpath) == 1: + result = os.path.join(self._baseurl, path) + else: + result = path # path contains baseurl already + return result + + def _findfile(self, path): + """Extend DataSource method to prepend baseurl to ``path``.""" + return DataSource._findfile(self, self._fullpath(path)) + + def abspath(self, path): + """Extend DataSource method to prepend baseurl to ``path``.""" + return DataSource.abspath(self, self._fullpath(path)) + + def exists(self, path): + """Extend DataSource method to prepend baseurl to ``path``.""" + return DataSource.exists(self, self._fullpath(path)) + + def open(self, path, mode='r'): + """Extend DataSource method to prepend baseurl to ``path``.""" + return DataSource.open(self, self._fullpath(path), mode) + + def listdir(self): + '''List files in the source Repository.''' + if self._isurl(self._baseurl): + raise NotImplementedError, \ + "Directory listing of URLs, not supported yet." + else: + return os.listdir(self._baseurl) diff --git a/numpy/lib/format.py b/numpy/lib/format.py new file mode 100644 index 000000000..bb58c5c61 --- /dev/null +++ b/numpy/lib/format.py @@ -0,0 +1,412 @@ +""" Define a simple format for saving numpy arrays to disk with the full +information about them. + +WARNING: THE FORMAT IS CURRENTLY UNSTABLE. DO NOT STORE CRITICAL DATA WITH IT. + While this code is in an SVN branch, the format may change without + notice, without backwards compatibility, and without changing the + format's version number. When the code moves into the trunk the format + will be stabilized, the version number will increment as changes occur, + and backwards compatibility with older versions will be maintained. + +Format Version 1.0 +------------------ + +The first 6 bytes are a magic string: exactly "\\x93NUMPY". + +The next 1 byte is an unsigned byte: the major version number of the file +format, e.g. \\x01. + +The next 1 byte is an unsigned byte: the minor version number of the file +format, e.g. \\x00. Note: the version of the file format is not tied to the +version of the numpy package. + +The next 2 bytes form a little-endian unsigned short int: the length of the +header data HEADER_LEN. + +The next HEADER_LEN bytes form the header data describing the array's format. It +is an ASCII string which contains a Python literal expression of a dictionary. +It is terminated by a newline ('\\n') and padded with spaces ('\\x20') to make +the total length of the magic string + 4 + HEADER_LEN be evenly divisible by 16 +for alignment purposes. + +The dictionary contains three keys: + + "descr" : dtype.descr + An object that can be passed as an argument to the numpy.dtype() + constructor to create the array's dtype. + "fortran_order" : bool + Whether the array data is Fortran-contiguous or not. Since + Fortran-contiguous arrays are a common form of non-C-contiguity, we + allow them to be written directly to disk for efficiency. + "shape" : tuple of int + The shape of the array. + +For repeatability and readability, this dictionary is formatted using +pprint.pformat() so the keys are in alphabetic order. + +Following the header comes the array data. If the dtype contains Python objects +(i.e. dtype.hasobject is True), then the data is a Python pickle of the array. +Otherwise the data is the contiguous (either C- or Fortran-, depending on +fortran_order) bytes of the array. Consumers can figure out the number of bytes +by multiplying the number of elements given by the shape (noting that shape=() +means there is 1 element) by dtype.itemsize. +""" + +import cPickle +import pprint +import struct + +import numpy +from numpy.lib.utils import safe_eval + + +MAGIC_PREFIX = '\x93NUMPY' +MAGIC_LEN = len(MAGIC_PREFIX) + 2 + +def magic(major, minor): + """ Return the magic string for the given file format version. + + Parameters + ---------- + major : int in [0, 255] + minor : int in [0, 255] + + Returns + ------- + magic : str + + Raises + ------ + ValueError if the version cannot be formatted. + """ + if major < 0 or major > 255: + raise ValueError("major version must be 0 <= major < 256") + if minor < 0 or minor > 255: + raise ValueError("minor version must be 0 <= minor < 256") + return '%s%s%s' % (MAGIC_PREFIX, chr(major), chr(minor)) + +def read_magic(fp): + """ Read the magic string to get the version of the file format. + + Parameters + ---------- + fp : filelike object + + Returns + ------- + major : int + minor : int + """ + magic_str = fp.read(MAGIC_LEN) + if len(magic_str) != MAGIC_LEN: + raise ValueError("could not read %d characters for the magic string; got %r" % (MAGIC_LEN, magic_str)) + if magic_str[:-2] != MAGIC_PREFIX: + raise ValueError("the magic string is not correct; expected %r, got %r" % (MAGIC_PREFIX, magic_str[:-2])) + major, minor = map(ord, magic_str[-2:]) + return major, minor + +def dtype_to_descr(dtype): + """ Get a serializable descriptor from the dtype. + + The .descr attribute of a dtype object cannot be round-tripped through the + dtype() constructor. Simple types, like dtype('float32'), have a descr which + looks like a record array with one field with '' as a name. The dtype() + constructor interprets this as a request to give a default name. Instead, we + construct descriptor that can be passed to dtype(). + """ + if dtype.names is not None: + # This is a record array. The .descr is fine. + # XXX: parts of the record array with an empty name, like padding bytes, + # still get fiddled with. This needs to be fixed in the C implementation + # of dtype(). + return dtype.descr + else: + return dtype.str + +def header_data_from_array_1_0(array): + """ Get the dictionary of header metadata from a numpy.ndarray. + + Parameters + ---------- + array : numpy.ndarray + + Returns + ------- + d : dict + This has the appropriate entries for writing its string representation + to the header of the file. + """ + d = {} + d['shape'] = array.shape + if array.flags.c_contiguous: + d['fortran_order'] = False + elif array.flags.f_contiguous: + d['fortran_order'] = True + else: + # Totally non-contiguous data. We will have to make it C-contiguous + # before writing. Note that we need to test for C_CONTIGUOUS first + # because a 1-D array is both C_CONTIGUOUS and F_CONTIGUOUS. + d['fortran_order'] = False + + d['descr'] = dtype_to_descr(array.dtype) + return d + +def write_array_header_1_0(fp, d): + """ Write the header for an array using the 1.0 format. + + Parameters + ---------- + fp : filelike object + d : dict + This has the appropriate entries for writing its string representation + to the header of the file. + """ + header = pprint.pformat(d) + # Pad the header with spaces and a final newline such that the magic string, + # the header-length short and the header are aligned on a 16-byte boundary. + # Hopefully, some system, possibly memory-mapping, can take advantage of + # our premature optimization. + current_header_len = MAGIC_LEN + 2 + len(header) + 1 # 1 for the newline + topad = 16 - (current_header_len % 16) + header = '%s%s\n' % (header, ' '*topad) + if len(header) >= (256*256): + raise ValueError("header does not fit inside %s bytes" % (256*256)) + header_len_str = struct.pack('<H', len(header)) + fp.write(header_len_str) + fp.write(header) + +def read_array_header_1_0(fp): + """ Read an array header from a filelike object using the 1.0 file format + version. + + This will leave the file object located just after the header. + + Parameters + ---------- + fp : filelike object + + Returns + ------- + shape : tuple of int + The shape of the array. + fortran_order : bool + The array data will be written out directly if it is either C-contiguous + or Fortran-contiguous. Otherwise, it will be made contiguous before + writing it out. + dtype : dtype + + Raises + ------ + ValueError if the data is invalid. + """ + # Read an unsigned, little-endian short int which has the length of the + # header. + hlength_str = fp.read(2) + if len(hlength_str) != 2: + raise ValueError("EOF at %s before reading array header length" % fp.tell()) + header_length = struct.unpack('<H', hlength_str)[0] + header = fp.read(header_length) + if len(header) != header_length: + raise ValueError("EOF at %s before reading array header" % fp.tell()) + + # The header is a pretty-printed string representation of a literal Python + # dictionary with trailing newlines padded to a 16-byte boundary. The keys + # are strings. + # "shape" : tuple of int + # "fortran_order" : bool + # "descr" : dtype.descr + try: + d = safe_eval(header) + except SyntaxError, e: + raise ValueError("Cannot parse header: %r\nException: %r" % (header, e)) + if not isinstance(d, dict): + raise ValueError("Header is not a dictionary: %r" % d) + keys = d.keys() + keys.sort() + if keys != ['descr', 'fortran_order', 'shape']: + raise ValueError("Header does not contain the correct keys: %r" % (keys,)) + + # Sanity-check the values. + if (not isinstance(d['shape'], tuple) or + not numpy.all([isinstance(x, int) for x in d['shape']])): + raise ValueError("shape is not valid: %r" % (d['shape'],)) + if not isinstance(d['fortran_order'], bool): + raise ValueError("fortran_order is not a valid bool: %r" % (d['fortran_order'],)) + try: + dtype = numpy.dtype(d['descr']) + except TypeError, e: + raise ValueError("descr is not a valid dtype descriptor: %r" % (d['descr'],)) + + return d['shape'], d['fortran_order'], dtype + +def write_array(fp, array, version=(1,0)): + """ Write an array to a file, including a header. + + If the array is neither C-contiguous or Fortran-contiguous AND if the + filelike object is not a real file object, then this function will have to + copy data in memory. + + Parameters + ---------- + fp : filelike object + array : numpy.ndarray + version : (int, int), optional + The version number of the format. + + Raises + ------ + ValueError if the array cannot be persisted. + Various other errors from pickling if the array contains Python objects as + part of its dtype. + """ + if version != (1, 0): + raise ValueError("we only support format version (1,0), not %s" % (version,)) + fp.write(magic(*version)) + write_array_header_1_0(fp, header_data_from_array_1_0(array)) + if array.dtype.hasobject: + # We contain Python objects so we cannot write out the data directly. + # Instead, we will pickle it out with version 2 of the pickle protocol. + cPickle.dump(array, fp, protocol=2) + elif array.flags.f_contiguous and not array.flags.c_contiguous: + # Use a suboptimal, possibly memory-intensive, but correct way to handle + # Fortran-contiguous arrays. + fp.write(array.data) + else: + if isinstance(fp, file): + array.tofile(fp) + else: + # XXX: We could probably chunk this using something like + # arrayterator. + fp.write(array.tostring('C')) + +def read_array(fp): + """ Read an array from a file. + + Parameters + ---------- + fp : filelike object + If this is not a real file object, then this may take extra memory and + time. + + Returns + ------- + array : numpy.ndarray + + Raises + ------ + ValueError if the data is invalid. + """ + version = read_magic(fp) + if version != (1, 0): + raise ValueError("only support version (1,0) of file format, not %r" % (version,)) + shape, fortran_order, dtype = read_array_header_1_0(fp) + if len(shape) == 0: + count = 1 + else: + count = numpy.multiply.reduce(shape) + + # Now read the actual data. + if dtype.hasobject: + # The array contained Python objects. We need to unpickle the data. + array = cPickle.load(fp) + else: + if isinstance(fp, file): + # We can use the fast fromfile() function. + array = numpy.fromfile(fp, dtype=dtype, count=count) + else: + # This is not a real file. We have to read it the memory-intensive way. + # XXX: we can probably chunk this to avoid the memory hit. + data = fp.read(count * dtype.itemsize) + array = numpy.fromstring(data, dtype=dtype, count=count) + + if fortran_order: + array.shape = shape[::-1] + array = array.transpose() + else: + array.shape = shape + + return array + + +def open_memmap(filename, mode='r+', dtype=None, shape=None, + fortran_order=False, version=(1,0)): + """ Open a .npy file as a memory-mapped array. + + Parameters + ---------- + filename : str + mode : str, optional + The mode to open the file with. In addition to the standard file modes, + 'c' is also accepted to mean "copy on write". + dtype : dtype, optional + shape : tuple of int, optional + fortran_order : bool, optional + If the mode is a "write" mode, then the file will be created using this + dtype, shape, and contiguity. + version : tuple of int (major, minor) + If the mode is a "write" mode, then this is the version of the file + format used to create the file. + + Returns + ------- + marray : numpy.memmap + + Raises + ------ + ValueError if the data or the mode is invalid. + IOError if the file is not found or cannot be opened correctly. + """ + if 'w' in mode: + # We are creating the file, not reading it. + # Check if we ought to create the file. + if version != (1, 0): + raise ValueError("only support version (1,0) of file format, not %r" % (version,)) + # Ensure that the given dtype is an authentic dtype object rather than + # just something that can be interpreted as a dtype object. + dtype = numpy.dtype(dtype) + if dtype.hasobject: + raise ValueError("the dtype includes Python objects; the array cannot be memory-mapped") + d = dict( + descr=dtype_to_descr(dtype), + fortran_order=fortran_order, + shape=shape, + ) + # If we got here, then it should be safe to create the file. + fp = open(filename, mode+'b') + try: + fp.write(magic(*version)) + write_array_header_1_0(fp, d) + offset = fp.tell() + finally: + fp.close() + else: + # Read the header of the file first. + fp = open(filename, 'rb') + try: + version = read_magic(fp) + if version != (1, 0): + raise ValueError("only support version (1,0) of file format, not %r" % (version,)) + shape, fortran_order, dtype = read_array_header_1_0(fp) + if dtype.hasobject: + raise ValueError("the dtype includes Python objects; the array cannot be memory-mapped") + offset = fp.tell() + finally: + fp.close() + + if fortran_order: + order = 'F' + else: + order = 'C' + + # We need to change a write-only mode to a read-write mode since we've + # already written data to the file. + if mode == 'w+': + mode = 'r+' + + marray = numpy.memmap(filename, dtype=dtype, shape=shape, order=order, + mode=mode, offset=offset) + + return marray + + + diff --git a/numpy/lib/io.py b/numpy/lib/io.py new file mode 100644 index 000000000..b4e89c2fc --- /dev/null +++ b/numpy/lib/io.py @@ -0,0 +1,360 @@ + +__all__ = ['savetxt', 'loadtxt', + 'load', 'loads', + 'save', 'savez', + 'packbits', 'unpackbits', + 'DataSource'] + +import numpy as np +import format +import zipfile +import cStringIO +import tempfile +import os + +from cPickle import load as _cload, loads +from _datasource import DataSource +from _compiled_base import packbits, unpackbits + +_file = file + +class BagObj(object): + """A simple class that converts attribute lookups to + getitems on the class passed in. + """ + def __init__(self, obj): + self._obj = obj + def __getattribute__(self, key): + try: + return object.__getattribute__(self, '_obj')[key] + except KeyError: + raise AttributeError, key + +class NpzFile(object): + """A dictionary-like object with lazy-loading of files in the zipped + archive provided on construction. + + The arrays and file strings are lazily loaded on either + getitem access using obj['key'] or attribute lookup using obj.f.key + + A list of all files (without .npy) extensions can be obtained + with .files and the ZipFile object itself using .zip + """ + def __init__(self, fid): + _zip = zipfile.ZipFile(fid) + self._files = _zip.namelist() + self.files = [] + for x in self._files: + if x.endswith('.npy'): + self.files.append(x[:-4]) + else: + self.files.append(x) + self.zip = _zip + self.f = BagObj(self) + + def __getitem__(self, key): + # FIXME: This seems like it will copy strings around + # more than is strictly necessary. The zipfile + # will read the string and then + # the format.read_array will copy the string + # to another place in memory. + # It would be better if the zipfile could read + # (or at least uncompress) the data + # directly into the array memory. + member = 0 + if key in self._files: + member = 1 + elif key in self.files: + member = 1 + key += '.npy' + if member: + bytes = self.zip.read(key) + if bytes.startswith(format.MAGIC_PREFIX): + value = cStringIO.StringIO(bytes) + return format.read_array(value) + else: + return bytes + else: + raise KeyError, "%s is not a file in the archive" % key + +def load(file, memmap=False): + """Load a binary file. + + Read a binary file (either a pickle, or a binary .npy/.npz file) and + return the result. + + Parameters + ---------- + file : file-like object or string + the file to read. It must support seek and read methods + memmap : bool + If true, then memory-map the .npy file or unzip the .npz file into + a temporary directory and memory-map each component + This has no effect for a pickle. + + Returns + ------- + result : array, tuple, dict, etc. + data stored in the file. + If file contains pickle data, then whatever is stored in the pickle is returned. + If the file is .npy file, then an array is returned. + If the file is .npz file, then a dictionary-like object is returned which has a + filename:array key:value pair for every file in the zip. + """ + if isinstance(file, type("")): + fid = _file(file,"rb") + else: + fid = file + + if memmap: + raise NotImplementedError + + # Code to distinguish from NumPy binary files and pickles. + _ZIP_PREFIX = 'PK\x03\x04' + N = len(format.MAGIC_PREFIX) + magic = fid.read(N) + fid.seek(-N,1) # back-up + if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz) + return NpzFile(fid) + elif magic == format.MAGIC_PREFIX: # .npy file + return format.read_array(fid) + else: # Try a pickle + try: + return _cload(fid) + except: + raise IOError, \ + "Failed to interpret file %s as a pickle" % repr(file) + +def save(file, arr): + """Save an array to a binary file (a string or file-like object). + + If the file is a string, then if it does not have the .npy extension, + it is appended and a file open. + + Data is saved to the open file in NumPy-array format + + Examples + -------- + import numpy as np + ... + np.save('myfile', a) + a = np.load('myfile.npy') + """ + if isinstance(file, str): + if not file.endswith('.npy'): + file = file + '.npy' + fid = open(file, "wb") + else: + fid = file + + arr = np.asanyarray(arr) + format.write_array(fid, arr) + +def savez(file, *args, **kwds): + """Save several arrays into an .npz file format which is a zipped-archive + of arrays + + If keyword arguments are given, then filenames are taken from the keywords. + If arguments are passed in with no keywords, then stored file names are + arr_0, arr_1, etc. + """ + + if isinstance(file, str): + if not file.endswith('.npz'): + file = file + '.npz' + + namedict = kwds + for i, val in enumerate(args): + key = 'arr_%d' % i + if key in namedict.keys(): + raise ValueError, "Cannot use un-named variables and keyword %s" % key + namedict[key] = val + + zip = zipfile.ZipFile(file, mode="w") + + # Place to write temporary .npy files + # before storing them in the zip + direc = tempfile.gettempdir() + todel = [] + + for key, val in namedict.iteritems(): + fname = key + '.npy' + filename = os.path.join(direc, fname) + todel.append(filename) + fid = open(filename,'wb') + format.write_array(fid, np.asanyarray(val)) + fid.close() + zip.write(filename, arcname=fname) + + zip.close() + for name in todel: + os.remove(name) + +# Adapted from matplotlib + +def _getconv(dtype): + typ = dtype.type + if issubclass(typ, np.bool_): + return lambda x: bool(int(x)) + if issubclass(typ, np.integer): + return int + elif issubclass(typ, np.floating): + return float + elif issubclass(typ, np.complex): + return complex + else: + return str + + +def _string_like(obj): + try: obj + '' + except (TypeError, ValueError): return 0 + return 1 + +def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, + skiprows=0, usecols=None, unpack=False): + """ + Load ASCII data from fname into an array and return the array. + + The data must be regular, same number of values in every row + + fname can be a filename or a file handle. Support for gzipped files is + automatic, if the filename ends in .gz + + See scipy.io.loadmat to read and write matfiles. + + Example usage: + + X = loadtxt('test.dat') # data in two columns + t = X[:,0] + y = X[:,1] + + Alternatively, you can do the same with "unpack"; see below + + X = loadtxt('test.dat') # a matrix of data + x = loadtxt('test.dat') # a single column of data + + + dtype - the data-type of the resulting array. If this is a + record data-type, the the resulting array will be 1-d and each row will + be interpreted as an element of the array. The number of columns + used must match the number of fields in the data-type in this case. + + comments - the character used to indicate the start of a comment + in the file + + delimiter is a string-like character used to seperate values in the + file. If delimiter is unspecified or none, any whitespace string is + a separator. + + converters, if not None, is a dictionary mapping column number to + a function that will convert that column to a float. Eg, if + column 0 is a date string: converters={0:datestr2num} + + skiprows is the number of rows from the top to skip + + usecols, if not None, is a sequence of integer column indexes to + extract where 0 is the first column, eg usecols=(1,4,5) to extract + just the 2nd, 5th and 6th columns + + unpack, if True, will transpose the matrix allowing you to unpack + into named arguments on the left hand side + + t,y = load('test.dat', unpack=True) # for two column data + x,y,z = load('somefile.dat', usecols=(3,5,7), unpack=True) + + """ + + if _string_like(fname): + if fname.endswith('.gz'): + import gzip + fh = gzip.open(fname) + else: + fh = file(fname) + elif hasattr(fname, 'seek'): + fh = fname + else: + raise ValueError('fname must be a string or file handle') + X = [] + + dtype = np.dtype(dtype) + defconv = _getconv(dtype) + converterseq = None + if converters is None: + converters = {} + if dtype.names is not None: + converterseq = [_getconv(dtype.fields[name][0]) \ + for name in dtype.names] + + for i,line in enumerate(fh): + if i<skiprows: continue + line = line[:line.find(comments)].strip() + if not len(line): continue + vals = line.split(delimiter) + if converterseq is None: + converterseq = [converters.get(j,defconv) \ + for j in xrange(len(vals))] + if usecols is not None: + row = [converterseq[j](vals[j]) for j in usecols] + else: + row = [converterseq[j](val) for j,val in enumerate(vals)] + if dtype.names is not None: + row = tuple(row) + X.append(row) + + X = np.array(X, dtype) + r,c = X.shape + if r==1 or c==1: + X.shape = max([r,c]), + if unpack: return X.T + else: return X + + +# adjust so that fmt can change across columns if desired. + +def savetxt(fname, X, fmt='%.18e',delimiter=' '): + """ + Save the data in X to file fname using fmt string to convert the + data to strings + + fname can be a filename or a file handle. If the filename ends in .gz, + the file is automatically saved in compressed gzip format. The load() + command understands gzipped files transparently. + + Example usage: + + save('test.out', X) # X is an array + save('test1.out', (x,y,z)) # x,y,z equal sized 1D arrays + save('test2.out', x) # x is 1D + save('test3.out', x, fmt='%1.4e') # use exponential notation + + delimiter is used to separate the fields, eg delimiter ',' for + comma-separated values + """ + + if _string_like(fname): + if fname.endswith('.gz'): + import gzip + fh = gzip.open(fname,'wb') + else: + fh = file(fname,'w') + elif hasattr(fname, 'seek'): + fh = fname + else: + raise ValueError('fname must be a string or file handle') + + + X = np.asarray(X) + origShape = None + if len(X.shape)==1: + origShape = X.shape + X.shape = len(X), 1 + for row in X: + fh.write(delimiter.join([fmt%val for val in row]) + '\n') + + if origShape is not None: + X.shape = origShape + + + + diff --git a/numpy/lib/src/_compiled_base.c b/numpy/lib/src/_compiled_base.c index 42c0183e8..654e7d95b 100644 --- a/numpy/lib/src/_compiled_base.c +++ b/numpy/lib/src/_compiled_base.c @@ -9,6 +9,9 @@ static PyObject *ErrorObject; goto fail;} \ } +#define PYSETERROR(message) \ +{ PyErr_SetString(ErrorObject, message); goto fail; } + static intp incr_slot_ (double x, double *bins, intp lbins) { @@ -526,6 +529,269 @@ arr_add_docstring(PyObject *dummy, PyObject *args) return Py_None; } + +static char packbits_doc[] = + "out = numpy.packbits(myarray, axis=None)\n\n" + " myarray : an integer type array whose elements should be packed to bits\n\n" + " This routine packs the elements of a binary-valued dataset into a\n" + " NumPy array of type uint8 ('B') whose bits correspond to\n" + " the logical (0 or nonzero) value of the input elements.\n" + " The dimension over-which bit-packing is done is given by axis.\n" + " The shape of the output has the same number of dimensions as the input\n" + " (unless axis is None, in which case the output is 1-d).\n" + "\n" + " Example:\n" + " >>> a = array([[[1,0,1],\n" + " ... [0,1,0]],\n" + " ... [[1,1,0],\n" + " ... [0,0,1]]])\n" + " >>> b = numpy.packbits(a,axis=-1)\n" + " >>> b\n" + " array([[[160],[64]],[[192],[32]]], dtype=uint8)\n\n" + " Note that 160 = 128 + 32\n" + " 192 = 128 + 64\n"; + +static char unpackbits_doc[] = + "out = numpy.unpackbits(myarray, axis=None)\n\n" + " myarray - array of uint8 type where each element represents a bit-field\n" + " that should be unpacked into a boolean output array\n\n" + " The shape of the output array is either 1-d (if axis is None) or\n" + " the same shape as the input array with unpacking done along the\n" + " axis specified."; + +/* PACKBITS + + This function packs binary (0 or 1) 1-bit per pixel arrays + into contiguous bytes. + +*/ + +static void +_packbits( + void *In, + int element_size, /* in bytes */ + npy_intp in_N, + npy_intp in_stride, + void *Out, + npy_intp out_N, + npy_intp out_stride + ) +{ + char build; + int i, index; + npy_intp out_Nm1; + int maxi, remain, nonzero, j; + char *outptr,*inptr; + + outptr = Out; /* pointer to output buffer */ + inptr = In; /* pointer to input buffer */ + + /* Loop through the elements of In */ + /* Determine whether or not it is nonzero. + Yes: set correspdoning bit (and adjust build value) + No: move on + /* Every 8th value, set the value of build and increment the outptr */ + + remain = in_N % 8; /* uneven bits */ + if (remain == 0) remain = 8; + out_Nm1 = out_N - 1; + for (index = 0; index < out_N; index++) { + build = 0; + maxi = (index != out_Nm1 ? 8 : remain); + for (i = 0; i < maxi ; i++) { + build <<= 1; /* shift bits left one bit */ + nonzero = 0; + for (j = 0; j < element_size; j++) /* determine if this number is non-zero */ + nonzero += (*(inptr++) != 0); + inptr += (in_stride - element_size); /* advance to next input */ + build += (nonzero != 0); /* add to this bit if the input value is non-zero */ + } + if (index == out_Nm1) build <<= (8-remain); + /* printf("Here: %d %d %d %d\n",build,slice,index,maxi); + */ + *outptr = build; + outptr += out_stride; + } + return; +} + + +static void +_unpackbits( + void *In, + int el_size, /* unused */ + npy_intp in_N, + npy_intp in_stride, + void *Out, + npy_intp out_N, + npy_intp out_stride + ) +{ + unsigned char mask; + int i,index; + char *inptr, *outptr; + + /* Loop through the elements of out + */ + outptr = Out; + inptr = In; + for (index = 0; index < in_N; index++) { + mask = 128; + for (i = 0; i < 8 ; i++) { + *outptr = ((mask & (unsigned char)(*inptr)) != 0); + outptr += out_stride; + mask >>= 1; + } + inptr += in_stride; + } + return; +} + +static PyObject * +pack_or_unpack_bits(PyObject *input, int axis, int unpack) +{ + PyArrayObject *inp; + PyObject *new=NULL; + PyObject *out=NULL; + npy_intp outdims[MAX_DIMS]; + int i; + void (*thefunc)(void *, int, npy_intp, npy_intp, void *, npy_intp, npy_intp); + PyArrayIterObject *it, *ot; + + inp = (PyArrayObject *)PyArray_FROM_O(input); + + if (inp == NULL) return NULL; + + if (unpack) { + if (PyArray_TYPE(inp) != NPY_UBYTE) + PYSETERROR("Expecting an input array of unsigned byte data type"); + } + else { + if (!PyArray_ISINTEGER(inp)) + PYSETERROR("Expecting an input array of integer data type"); + } + + new = PyArray_CheckAxis(inp, &axis, 0); + Py_DECREF(inp); + if (new == NULL) return NULL; + + /* Handle zero-dim array separately */ + if (PyArray_SIZE(new) == 0) { + return PyArray_Copy((PyArrayObject *)new); + } + + if (PyArray_NDIM(new) == 0) { + if (unpack) { + /* Handle 0-d array by converting it to a 1-d array */ + PyObject *temp; + PyArray_Dims newdim = {NULL, 1}; + npy_intp shape=1; + newdim.ptr = &shape; + temp = PyArray_Newshape((PyArrayObject *)new, &newdim, NPY_CORDER); + if (temp == NULL) goto fail; + Py_DECREF(new); + new = temp; + } + else { + ubyte *optr, *iptr; + out = PyArray_New(new->ob_type, 0, NULL, NPY_UBYTE, + NULL, NULL, 0, 0, NULL); + if (out == NULL) goto fail; + optr = PyArray_DATA(out); + iptr = PyArray_DATA(new); + *optr = 0; + for (i=0; i<PyArray_ITEMSIZE(new); i++) { + if (*iptr != 0) { + *optr = 1; + break; + } + iptr++; + } + goto finish; + } + } + + + /* Setup output shape */ + for (i=0; i<PyArray_NDIM(new); i++) { + outdims[i] = PyArray_DIM(new, i); + } + + if (unpack) { + /* Multiply axis dimension by 8 */ + outdims[axis] <<= 3; + thefunc = _unpackbits; + } + else { + /* Divide axis dimension by 8 */ + /* 8 -> 1, 9 -> 2, 16 -> 2, 17 -> 3 etc.. */ + outdims[axis] = ((outdims[axis] - 1) >> 3) + 1; + thefunc = _packbits; + } + + /* Create output array */ + out = PyArray_New(new->ob_type, PyArray_NDIM(new), outdims, PyArray_UBYTE, + NULL, NULL, 0, PyArray_ISFORTRAN(new), NULL); + if (out == NULL) goto fail; + + /* Setup iterators to iterate over all but given axis */ + it = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)new, &axis); + ot = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)out, &axis); + if (it == NULL || ot == NULL) { + Py_XDECREF(it); + Py_XDECREF(ot); + goto fail; + } + + while(PyArray_ITER_NOTDONE(it)) { + thefunc(PyArray_ITER_DATA(it), PyArray_ITEMSIZE(new), + PyArray_DIM(new, axis), PyArray_STRIDE(new, axis), + PyArray_ITER_DATA(ot), PyArray_DIM(out, axis), + PyArray_STRIDE(out, axis)); + PyArray_ITER_NEXT(it); + PyArray_ITER_NEXT(ot); + } + Py_DECREF(it); + Py_DECREF(ot); + + finish: + Py_DECREF(new); + return out; + + fail: + Py_XDECREF(new); + Py_XDECREF(out); + return NULL; +} + + + +static PyObject * +io_pack(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *obj; + int axis=NPY_MAXDIMS; + static char *kwlist[] = {"in", "axis", NULL}; + + if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&" , kwlist, + &obj, PyArray_AxisConverter, &axis)) + return NULL; + return pack_or_unpack_bits(obj, axis, 0); +} + +static PyObject * +io_unpack(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *obj; + int axis=NPY_MAXDIMS; + static char *kwlist[] = {"in", "axis", NULL}; + + if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&" , kwlist, + &obj, PyArray_AxisConverter, &axis)) + return NULL; + return pack_or_unpack_bits(obj, axis, 1); +} + static struct PyMethodDef methods[] = { {"_insert", (PyCFunction)arr_insert, METH_VARARGS | METH_KEYWORDS, arr_insert__doc__}, @@ -537,6 +803,10 @@ static struct PyMethodDef methods[] = { NULL}, {"add_docstring", (PyCFunction)arr_add_docstring, METH_VARARGS, NULL}, + {"packbits", (PyCFunction)io_pack, METH_VARARGS | METH_KEYWORDS, + packbits_doc}, + {"unpackbits", (PyCFunction)io_unpack, METH_VARARGS | METH_KEYWORDS, + unpackbits_doc}, {NULL, NULL} /* sentinel */ }; @@ -578,7 +848,7 @@ PyMODINIT_FUNC init_compiled_base(void) { PyDict_SetItemString(d, "__version__", s); Py_DECREF(s); - ErrorObject = PyString_FromString("numpy.lib._compiled_base.error"); + ErrorObject = PyString_FromString("numpy.lib.error"); PyDict_SetItemString(d, "error", ErrorObject); Py_DECREF(ErrorObject); diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py new file mode 100644 index 000000000..b238950a3 --- /dev/null +++ b/numpy/lib/tests/test_format.py @@ -0,0 +1,512 @@ +r''' Test the .npy file format. + +Set up: + + >>> import numpy as np + >>> from cStringIO import StringIO + >>> from numpy.lib import format + >>> + >>> scalars = [ + ... np.uint8, + ... np.int8, + ... np.uint16, + ... np.int16, + ... np.uint32, + ... np.int32, + ... np.uint64, + ... np.int64, + ... np.float32, + ... np.float64, + ... np.complex64, + ... np.complex128, + ... object, + ... ] + >>> + >>> basic_arrays = [] + >>> + >>> for scalar in scalars: + ... for endian in '<>': + ... dtype = np.dtype(scalar).newbyteorder(endian) + ... basic = np.arange(15).astype(dtype) + ... basic_arrays.extend([ + ... np.array([], dtype=dtype), + ... np.array(10, dtype=dtype), + ... basic, + ... basic.reshape((3,5)), + ... basic.reshape((3,5)).T, + ... basic.reshape((3,5))[::-1,::2], + ... ]) + ... + >>> + >>> Pdescr = [ + ... ('x', 'i4', (2,)), + ... ('y', 'f8', (2, 2)), + ... ('z', 'u1')] + >>> + >>> + >>> PbufferT = [ + ... ([3,2], [[6.,4.],[6.,4.]], 8), + ... ([4,3], [[7.,5.],[7.,5.]], 9), + ... ] + >>> + >>> + >>> Ndescr = [ + ... ('x', 'i4', (2,)), + ... ('Info', [ + ... ('value', 'c16'), + ... ('y2', 'f8'), + ... ('Info2', [ + ... ('name', 'S2'), + ... ('value', 'c16', (2,)), + ... ('y3', 'f8', (2,)), + ... ('z3', 'u4', (2,))]), + ... ('name', 'S2'), + ... ('z2', 'b1')]), + ... ('color', 'S2'), + ... ('info', [ + ... ('Name', 'U8'), + ... ('Value', 'c16')]), + ... ('y', 'f8', (2, 2)), + ... ('z', 'u1')] + >>> + >>> + >>> NbufferT = [ + ... ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8), + ... ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9), + ... ] + >>> + >>> + >>> record_arrays = [ + ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')), + ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')), + ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')), + ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')), + ... ] + +Test the magic string writing. + + >>> format.magic(1, 0) + '\x93NUMPY\x01\x00' + >>> format.magic(0, 0) + '\x93NUMPY\x00\x00' + >>> format.magic(255, 255) + '\x93NUMPY\xff\xff' + >>> format.magic(2, 5) + '\x93NUMPY\x02\x05' + +Test the magic string reading. + + >>> format.read_magic(StringIO(format.magic(1, 0))) + (1, 0) + >>> format.read_magic(StringIO(format.magic(0, 0))) + (0, 0) + >>> format.read_magic(StringIO(format.magic(255, 255))) + (255, 255) + >>> format.read_magic(StringIO(format.magic(2, 5))) + (2, 5) + +Test the header writing. + + >>> for arr in basic_arrays + record_arrays: + ... f = StringIO() + ... format.write_array_header_1_0(f, arr) + ... print repr(f.getvalue()) + ... + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<u2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<i2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i2', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<u4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<i4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<u8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>u8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<i8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>i8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<f4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>f4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<f8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>f8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<c8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>c8', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '<c16', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '>c16', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|O4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 3)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (0,)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': ()} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (15,)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 5)} \n" + "F\x00{'descr': '|O4', 'fortran_order': True, 'shape': (5, 3)} \n" + "F\x00{'descr': '|O4', 'fortran_order': False, 'shape': (3, 3)} \n" + "v\x00{'descr': [('x', '<i4', (2,)), ('y', '<f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" + "\x16\x02{'descr': [('x', '<i4', (2,)),\n ('Info',\n [('value', '<c16'),\n ('y2', '<f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '<c16', (2,)),\n ('y3', '<f8', (2,)),\n ('z3', '<u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '<U8'), ('Value', '<c16')]),\n ('y', '<f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" + "v\x00{'descr': [('x', '>i4', (2,)), ('y', '>f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" + "\x16\x02{'descr': [('x', '>i4', (2,)),\n ('Info',\n [('value', '>c16'),\n ('y2', '>f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '>c16', (2,)),\n ('y3', '>f8', (2,)),\n ('z3', '>u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '>U8'), ('Value', '>c16')]),\n ('y', '>f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n" +''' + + +from cStringIO import StringIO +import os +import shutil +import tempfile + +from nose.tools import raises + +import numpy as np +from numpy.testing import assert_array_equal + +from numpy.lib import format + + +tempdir = None + +# Module-level setup. +def setup_module(): + global tempdir + tempdir = tempfile.mkdtemp() + +def teardown_module(): + global tempdir + if tempdir is not None and os.path.isdir(tempdir): + shutil.rmtree(tempdir) + tempdir = None + + +# Generate some basic arrays to test with. +scalars = [ + np.uint8, + np.int8, + np.uint16, + np.int16, + np.uint32, + np.int32, + np.uint64, + np.int64, + np.float32, + np.float64, + np.complex64, + np.complex128, + object, +] +basic_arrays = [] +for scalar in scalars: + for endian in '<>': + dtype = np.dtype(scalar).newbyteorder(endian) + basic = np.arange(15).astype(dtype) + basic_arrays.extend([ + # Empty + np.array([], dtype=dtype), + # Rank-0 + np.array(10, dtype=dtype), + # 1-D + basic, + # 2-D C-contiguous + basic.reshape((3,5)), + # 2-D F-contiguous + basic.reshape((3,5)).T, + # 2-D non-contiguous + basic.reshape((3,5))[::-1,::2], + ]) + +# More complicated record arrays. +# This is the structure of the table used for plain objects: +# +# +-+-+-+ +# |x|y|z| +# +-+-+-+ + +# Structure of a plain array description: +Pdescr = [ + ('x', 'i4', (2,)), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +# A plain list of tuples with values for testing: +PbufferT = [ + # x y z + ([3,2], [[6.,4.],[6.,4.]], 8), + ([4,3], [[7.,5.],[7.,5.]], 9), + ] + + +# This is the structure of the table used for nested objects (DON'T PANIC!): +# +# +-+---------------------------------+-----+----------+-+-+ +# |x|Info |color|info |y|z| +# | +-----+--+----------------+----+--+ +----+-----+ | | +# | |value|y2|Info2 |name|z2| |Name|Value| | | +# | | | +----+-----+--+--+ | | | | | | | +# | | | |name|value|y3|z3| | | | | | | | +# +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+ +# + +# The corresponding nested array description: +Ndescr = [ + ('x', 'i4', (2,)), + ('Info', [ + ('value', 'c16'), + ('y2', 'f8'), + ('Info2', [ + ('name', 'S2'), + ('value', 'c16', (2,)), + ('y3', 'f8', (2,)), + ('z3', 'u4', (2,))]), + ('name', 'S2'), + ('z2', 'b1')]), + ('color', 'S2'), + ('info', [ + ('Name', 'U8'), + ('Value', 'c16')]), + ('y', 'f8', (2, 2)), + ('z', 'u1')] + +NbufferT = [ + # x Info color info y z + # value y2 Info2 name z2 Name Value + # name value y3 z3 + ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8), + ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9), + ] + +record_arrays = [ + np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')), + np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')), + np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')), + np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')), +] + +def roundtrip(arr): + f = StringIO() + format.write_array(f, arr) + f2 = StringIO(f.getvalue()) + arr2 = format.read_array(f2) + return arr2 + +def assert_equal(o1, o2): + assert o1 == o2 + + +def test_roundtrip(): + for arr in basic_arrays + record_arrays: + arr2 = roundtrip(arr) + yield assert_array_equal, arr, arr2 + +def test_memmap_roundtrip(): + for arr in basic_arrays + record_arrays: + if arr.dtype.hasobject: + # Skip these since they can't be mmap'ed. + continue + # Write it out normally and through mmap. + nfn = os.path.join(tempdir, 'normal.npy') + mfn = os.path.join(tempdir, 'memmap.npy') + fp = open(nfn, 'wb') + try: + format.write_array(fp, arr) + finally: + fp.close() + + fortran_order = (arr.flags.f_contiguous and not arr.flags.c_contiguous) + ma = format.open_memmap(mfn, mode='w+', dtype=arr.dtype, + shape=arr.shape, fortran_order=fortran_order) + ma[...] = arr + del ma + + # Check that both of these files' contents are the same. + fp = open(nfn, 'rb') + normal_bytes = fp.read() + fp.close() + fp = open(mfn, 'rb') + memmap_bytes = fp.read() + fp.close() + yield assert_equal, normal_bytes, memmap_bytes + + # Check that reading the file using memmap works. + ma = format.open_memmap(nfn, mode='r') + yield assert_array_equal, ma, arr + del ma + + +def test_write_version_1_0(): + f = StringIO() + arr = np.arange(1) + # These should pass. + format.write_array(f, arr, version=(1, 0)) + format.write_array(f, arr) + + # These should all fail. + bad_versions = [ + (1, 1), + (0, 0), + (0, 1), + (2, 0), + (2, 2), + (255, 255), + ] + for version in bad_versions: + try: + format.write_array(f, arr, version=version) + except ValueError: + pass + else: + raise AssertionError("we should have raised a ValueError for the bad version %r" % (version,)) + + +bad_version_magic = [ + '\x93NUMPY\x01\x01', + '\x93NUMPY\x00\x00', + '\x93NUMPY\x00\x01', + '\x93NUMPY\x02\x00', + '\x93NUMPY\x02\x02', + '\x93NUMPY\xff\xff', +] +malformed_magic = [ + '\x92NUMPY\x01\x00', + '\x00NUMPY\x01\x00', + '\x93numpy\x01\x00', + '\x93MATLB\x01\x00', + '\x93NUMPY\x01', + '\x93NUMPY', + '', +] + +def test_read_magic_bad_magic(): + for magic in malformed_magic: + f = StringIO(magic) + yield raises(ValueError)(format.read_magic), f + +def test_read_version_1_0_bad_magic(): + for magic in bad_version_magic + malformed_magic: + f = StringIO(magic) + yield raises(ValueError)(format.read_array), f + + diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py index 95dd4f581..048ffafc0 100644 --- a/numpy/lib/utils.py +++ b/numpy/lib/utils.py @@ -1,3 +1,4 @@ +import compiler import os import sys import inspect @@ -7,9 +8,10 @@ from numpy.core.multiarray import dtype as _dtype from numpy.core import product, ndarray __all__ = ['issubclass_', 'get_numpy_include', 'issubsctype', - 'issubdtype', 'deprecate', 'get_numarray_include', + 'issubdtype', 'deprecate', 'deprecate_with_doc', + 'get_numarray_include', 'get_include', 'info', 'source', 'who', - 'byte_bounds', 'may_share_memory'] + 'byte_bounds', 'may_share_memory', 'safe_eval'] def issubclass_(arg1, arg2): try: @@ -82,15 +84,32 @@ else: func.__name__ = name return func -def deprecate(func, oldname, newname): +def deprecate(func, oldname=None, newname=None): + """Deprecate old functions. + Issues a DeprecationWarning, adds warning to oldname's docstring, + rebinds oldname.__name__ and returns new function object. + + Example: + oldfunc = deprecate(newfunc, 'oldfunc', 'newfunc') + + """ + import warnings + if oldname is None: + oldname = func.func_name + if newname is None: + str1 = "%s is deprecated" % (oldname,) + depdoc = "%s is DEPRECATED!" % (oldname,) + else: + str1 = "%s is deprecated, use %s" % (oldname, newname), + depdoc = '%s is DEPRECATED! -- use %s instead' % (oldname, newname,) + def newfunc(*args,**kwds): - warnings.warn("%s is deprecated, use %s" % (oldname, newname), - DeprecationWarning) + warnings.warn(str1, DeprecationWarning) return func(*args, **kwds) + newfunc = _set_function_name(newfunc, oldname) doc = func.__doc__ - depdoc = '%s is DEPRECATED in numpy: use %s instead' % (oldname, newname,) if doc is None: doc = depdoc else: @@ -104,6 +123,24 @@ def deprecate(func, oldname, newname): newfunc.__dict__.update(d) return newfunc +def deprecate_with_doc(somestr): + """Decorator to deprecate functions and provide detailed documentation + with 'somestr' that is added to the functions docstring. + + Example: + depmsg = 'function numpy.lib.foo has been merged into numpy.lib.io.foobar' + @deprecate_with_doc(depmsg) + def foo(): + pass + + """ + + def _decorator(func): + newfunc = deprecate(func) + newfunc.__doc__ += "\n" + somestr + return newfunc + return _decorator + get_numpy_include = deprecate(get_include, 'get_numpy_include', 'get_include') @@ -430,3 +467,113 @@ def source(object, output=sys.stdout): print >> output, inspect.getsource(object) except: print >> output, "Not available for this object." + +#----------------------------------------------------------------------------- + +# The following SafeEval class and company are adapted from Michael Spencer's +# ASPN Python Cookbook recipe: +# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/364469 +# Accordingly it is mostly Copyright 2006 by Michael Spencer. +# The recipe, like most of the other ASPN Python Cookbook recipes was made +# available under the Python license. +# http://www.python.org/license + +# It has been modified to: +# * handle unary -/+ +# * support True/False/None +# * raise SyntaxError instead of a custom exception. + +class SafeEval(object): + + def visit(self, node, **kw): + cls = node.__class__ + meth = getattr(self,'visit'+cls.__name__,self.default) + return meth(node, **kw) + + def default(self, node, **kw): + raise SyntaxError("Unsupported source construct: %s" % node.__class__) + + def visitExpression(self, node, **kw): + for child in node.getChildNodes(): + return self.visit(child, **kw) + + def visitConst(self, node, **kw): + return node.value + + def visitDict(self, node,**kw): + return dict([(self.visit(k),self.visit(v)) for k,v in node.items]) + + def visitTuple(self, node, **kw): + return tuple([self.visit(i) for i in node.nodes]) + + def visitList(self, node, **kw): + return [self.visit(i) for i in node.nodes] + + def visitUnaryAdd(self, node, **kw): + return +self.visit(node.getChildNodes()[0]) + + def visitUnarySub(self, node, **kw): + return -self.visit(node.getChildNodes()[0]) + + def visitName(self, node, **kw): + if node.name == 'False': + return False + elif node.name == 'True': + return True + elif node.name == 'None': + return None + else: + raise SyntaxError("Unknown name: %s" % node.name) + +def safe_eval(source): + """ Evaluate a string containing a Python literal expression without + allowing the execution of arbitrary non-literal code. + + Parameters + ---------- + source : str + + Returns + ------- + obj : object + + Raises + ------ + SyntaxError if the code is invalid Python expression syntax or if it + contains non-literal code. + + Examples + -------- + >>> from numpy.lib.utils import safe_eval + >>> safe_eval('1') + 1 + >>> safe_eval('[1, 2, 3]') + [1, 2, 3] + >>> safe_eval('{"foo": ("bar", 10.0)}') + {'foo': ('bar', 10.0)} + >>> safe_eval('import os') + Traceback (most recent call last): + ... + SyntaxError: invalid syntax + >>> safe_eval('open("/home/user/.ssh/id_dsa").read()') + Traceback (most recent call last): + ... + SyntaxError: Unsupported source construct: compiler.ast.CallFunc + >>> safe_eval('dict') + Traceback (most recent call last): + ... + SyntaxError: Unknown name: dict + """ + walker = SafeEval() + try: + ast = compiler.parse(source, "eval") + except SyntaxError, err: + raise + try: + return walker.visit(ast) + except SyntaxError, err: + raise + +#----------------------------------------------------------------------------- + + |