summaryrefslogtreecommitdiff
path: root/numpy/lib/_datasource.py
diff options
context:
space:
mode:
authorPauli Virtanen <pav@iki.fi>2009-10-02 19:33:33 +0000
committerPauli Virtanen <pav@iki.fi>2009-10-02 19:33:33 +0000
commit474d013a3b38c5909a7381cfa0cc2c8203807cfa (patch)
treeaf895af917b636c1a0ddcf94a7134052a6d6e55e /numpy/lib/_datasource.py
parentf1e3392d6d8813ed146ce1675f65a880634f727b (diff)
downloadnumpy-474d013a3b38c5909a7381cfa0cc2c8203807cfa.tar.gz
Docstring update: lib
Diffstat (limited to 'numpy/lib/_datasource.py')
-rw-r--r--numpy/lib/_datasource.py288
1 files changed, 167 insertions, 121 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py
index 0fe594ac3..4bfbf0ac4 100644
--- a/numpy/lib/_datasource.py
+++ b/numpy/lib/_datasource.py
@@ -15,7 +15,7 @@ DataSource files can originate locally or remotely:
DataSource files can also be compressed or uncompressed. Currently only gzip
and bz2 are supported.
-Example:
+Example::
>>> # Create a DataSource, use os.curdir (default) for local storage.
>>> ds = datasource.DataSource()
@@ -43,6 +43,28 @@ from shutil import rmtree
# TODO: .zip support, .tar support?
class _FileOpeners(object):
+ """
+ Container for different methods to open (un-)compressed files.
+
+ `_FileOpeners` contains a dictionary that holds one method for each
+ supported file format. Attribute lookup is implemented in such a way that
+ an instance of `_FileOpeners` itself can be indexed with the keys of that
+ dictionary. Currently uncompressed files as well as files
+ compressed with ``gzip`` or ``bz2`` compression are supported.
+
+ Notes
+ -----
+ `_file_openers`, an instance of `_FileOpeners`, is made available for
+ use in the `_datasource` module.
+
+ Examples
+ --------
+ >>> np.lib._datasource._file_openers.keys()
+ [None, '.bz2', '.gz']
+ >>> np.lib._datasource._file_openers['.gz'] is gzip.open
+ True
+
+ """
def __init__(self):
self._loaded = False
self._file_openers = {None: open}
@@ -62,6 +84,21 @@ class _FileOpeners(object):
self._loaded = True
def keys(self):
+ """
+ Return the keys of currently supported file openers.
+
+ Parameters
+ ----------
+ None
+
+ Returns
+ -------
+ keys : list
+ The keys are None for uncompressed files and the file extension
+ strings (i.e. ``'.gz'``, ``'.bz2'``) for supported compression
+ methods.
+
+ """
self._load()
return self._file_openers.keys()
def __getitem__(self, key):
@@ -71,23 +108,34 @@ class _FileOpeners(object):
_file_openers = _FileOpeners()
def open(path, mode='r', destpath=os.curdir):
- """Open ``path`` with ``mode`` and return the file object.
-
- If ``path`` is an URL, it will be downloaded, stored in the DataSource
- directory and opened from there.
-
- *Parameters*:
-
- path : {string}
-
- mode : {string}, optional
-
- destpath : {string}, optional
- Destination directory where URLs will be downloaded and stored.
-
- *Returns*:
-
- file object
+ """
+ Open `path` with `mode` and return the file object.
+
+ If ``path`` is an URL, it will be downloaded, stored in the `DataSource`
+ `destpath` directory and opened from there.
+
+ Parameters
+ ----------
+ path : str
+ Local file path or URL to open.
+ mode : str, optional
+ Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
+ append. Available modes depend on the type of object specified by path.
+ Default is 'r'.
+ destpath : str, optional
+ Path to the directory where the source file gets downloaded to for use.
+ If `destpath` is None, a temporary directory will be created. The
+ default path is the current directory.
+
+ Returns
+ -------
+ out : file object
+ The opened file.
+
+ Notes
+ -----
+ This is a convenience function that instantiates a `DataSource` and
+ returns the file object from ``DataSource.open(path)``.
"""
@@ -96,49 +144,52 @@ def open(path, mode='r', destpath=os.curdir):
class DataSource (object):
- """A generic data source file (file, http, ftp, ...).
+ """
+ DataSource(destpath='.')
+
+ A generic data source file (file, http, ftp, ...).
- DataSources could be local files or remote files/URLs. The files may
- also be compressed or uncompressed. DataSource hides some of the low-level
+ DataSources can be local files or remote files/URLs. The files may
+ also be compressed or uncompressed. DataSource hides some of the low-level
details of downloading the file, allowing you to simply pass in a valid
file path (or URL) and obtain a file object.
- *Methods*:
-
- - exists : test if the file exists locally or remotely
- - abspath : get absolute path of the file in the DataSource directory
- - open : open the file
-
- *Example URL DataSource*::
+ Parameters
+ ----------
+ destpath : str or None, optional
+ Path to the directory where the source file gets downloaded to for use.
+ If `destpath` is None, a temporary directory will be created.
+ The default path is the current directory.
- # Initialize DataSource with a local directory, default is os.curdir.
- ds = DataSource('/home/guido')
+ Notes
+ -----
+ URLs require a scheme string (``http://``) to be used, without it they
+ will fail::
- # Open remote file.
- # File will be downloaded and opened from here:
- # /home/guido/site/xyz.txt
- ds.open('http://fake.xyz.web/site/xyz.txt')
+ >>> repos = DataSource()
+ >>> repos.exists('www.google.com/index.html')
+ False
+ >>> repos.exists('http://www.google.com/index.html')
+ True
- *Example using DataSource for temporary files*::
+ Temporary directories are deleted when the DataSource is deleted.
- # Initialize DataSource with 'None' for the local directory.
- ds = DataSource(None)
+ Examples
+ --------
- # Open local file.
- # Opened file exists in a temporary directory like:
- # /tmp/tmpUnhcvM/foobar.txt
- # Temporary directories are deleted when the DataSource is deleted.
- ds.open('/home/guido/foobar.txt')
+ ::
- *Notes*:
- BUG : URLs require a scheme string ('http://') to be used.
- www.google.com will fail.
+ >>> ds = DataSource('/home/guido')
+ >>> urlname = 'http://www.google.com/index.html'
+ >>> gfile = ds.open('http://www.google.com/index.html') # open remote file
+ >>> ds.abspath(urlname)
+ '/home/guido/www.google.com/site/index.html'
- >>> repos.exists('www.google.com/index.html')
- False
-
- >>> repos.exists('http://www.google.com/index.html')
- True
+ >>> ds = DataSource(None) # use with temporary file
+ >>> ds.open('/home/guido/foobar.txt')
+ <open file '/home/guido.foobar.txt', mode 'r' at 0x91d4430>
+ >>> ds.abspath('/home/guido/foobar.txt')
+ '/tmp/tmpy4pgsP/home/guido/foobar.txt'
"""
@@ -278,25 +329,23 @@ class DataSource (object):
"""
Return absolute path of file in the DataSource directory.
- If `path` is an URL, the ``abspath`` will be either the location
+ If `path` is an URL, then `abspath` will return either the location
the file exists locally or the location it would exist when opened
- using the ``open`` method.
-
- The functionality is idential to os.path.abspath.
+ using the `open` method.
Parameters
----------
- path : string
+ path : str
Can be a local file or a remote URL.
Returns
-------
- out : string
- Complete path, rooted in the DataSource destination directory.
+ out : str
+ Complete path, including the `DataSource` destination directory.
- See Also
- --------
- open
+ Notes
+ -----
+ The functionality is based on `os.path.abspath`.
"""
# We do this here to reduce the 'import numpy' initial import time.
@@ -340,14 +389,13 @@ class DataSource (object):
Test if `path` exists as (and in this order):
- a local file.
- - a remote URL that have been downloaded and stored locally in the
- DataSource directory.
- - a remote URL that has not been downloaded, but is valid and
- accessible.
+ - a remote URL that has been downloaded and stored locally in the
+ `DataSource` directory.
+ - a remote URL that has not been downloaded, but is valid and accessible.
Parameters
----------
- path : string
+ path : str
Can be a local file or a remote URL.
Returns
@@ -355,16 +403,12 @@ class DataSource (object):
out : bool
True if `path` exists.
- See Also
- --------
- abspath
-
Notes
-----
- When `path` is an URL, ``exist`` will return True if it's either stored
- locally in the DataSource directory, or is a valid remote URL. DataSource
- does not discriminate between to two, the file is accessible if it exists
- in either location.
+ When `path` is an URL, `exists` will return True if it's either stored
+ locally in the `DataSource` directory, or is a valid remote URL.
+ `DataSource` does not discriminate between the two, the file is accessible
+ if it exists in either location.
"""
# We import this here because importing urllib2 is slow and
@@ -394,17 +438,17 @@ class DataSource (object):
"""
Open and return file-like object.
- If ``path`` is an URL, it will be downloaded, stored in the DataSource
+ If `path` is an URL, it will be downloaded, stored in the `DataSource`
directory and opened from there.
Parameters
----------
- path : string
- Local file path or URL to open
+ path : str
+ Local file path or URL to open.
mode : {'r', 'w', 'a'}, optional
Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
append. Available modes depend on the type of object specified by
- `path`.
+ `path`. Default is 'r'.
Returns
-------
@@ -435,30 +479,39 @@ class DataSource (object):
class Repository (DataSource):
"""
- A data Repository where multiple DataSource's share a base URL/directory.
+ Repository(baseurl, destpath='.')
- Repository extends DataSource by prepending a base URL (or directory) to
- all the files it handles. Use a Repository when you will be working with
- multiple files from one base URL. Initialize the Respository with the
+ A data repository where multiple DataSource's share a base URL/directory.
+
+ `Repository` extends `DataSource` by prepending a base URL (or directory)
+ to all the files it handles. Use `Repository` when you will be working
+ with multiple files from one base URL. Initialize `Repository` with the
base URL, then refer to each file by its filename only.
- *Methods*:
+ Parameters
+ ----------
+ baseurl : str
+ Path to the local directory or remote location that contains the
+ data files.
+ destpath : str or None, optional
+ Path to the directory where the source file gets downloaded to for use.
+ If `destpath` is None, a temporary directory will be created.
+ The default path is the current directory.
- - exists : test if the file exists locally or remotely
- - abspath : get absolute path of the file in the DataSource directory
- - open : open the file
+ Examples
+ --------
+ To analyze all files in the repository, do something like this
+ (note: this is not self-contained code)::
- *Toy example*::
+ >>> repos = np.lib._datasource.Repository('/home/user/data/dir/')
+ >>> for filename in filelist:
+ ... fp = repos.open(filename)
+ ... fp.analyze()
+ ... fp.close()
- # Analyze all files in the repository.
- repos = Repository('/home/user/data/dir/')
- for filename in filelist:
- fp = repos.open(filename)
- fp.analyze()
- fp.close()
+ Similarly you could use a URL for a repository::
- # Similarly you could use a URL for a repository.
- repos = Repository('http://www.xyz.edu/data')
+ >>> repos = np.lib._datasource.Repository('http://www.xyz.edu/data')
"""
@@ -487,25 +540,20 @@ class Repository (DataSource):
"""
Return absolute path of file in the Repository directory.
- If `path` is an URL, the ``abspath`` will be either the location
+ If `path` is an URL, then `abspath` will return either the location
the file exists locally or the location it would exist when opened
- using the ``open`` method.
-
- The functionality is idential to os.path.abspath.
+ using the `open` method.
Parameters
----------
- path : string
- Can be a local file or a remote URL.
+ path : str
+ Can be a local file or a remote URL. This may, but does not have
+ to, include the `baseurl` with which the `Repository` was initialized.
Returns
-------
- out : string
- Complete path, rooted in the DataSource destination directory.
-
- See Also
- --------
- open
+ out : str
+ Complete path, including the `DataSource` destination directory.
"""
return DataSource.abspath(self, self._fullpath(path))
@@ -517,31 +565,28 @@ class Repository (DataSource):
Test if `path` exists as (and in this order):
- a local file.
- - a remote URL that have been downloaded and stored locally in the
- DataSource directory.
+ - a remote URL that has been downloaded and stored locally in the
+ `DataSource` directory.
- a remote URL that has not been downloaded, but is valid and
accessible.
Parameters
----------
- path : string
- Can be a local file or a remote URL.
+ path : str
+ Can be a local file or a remote URL. This may, but does not have
+ to, include the `baseurl` with which the `Repository` was initialized.
Returns
-------
out : bool
True if `path` exists.
- See Also
- --------
- abspath
-
Notes
-----
- When `path` is an URL, ``exist`` will return True if it's either stored
- locally in the DataSource directory, or is a valid remote URL. DataSource
- does not discriminate between to two, the file is accessible if it exists
- in either location.
+ When `path` is an URL, `exists` will return True if it's either stored
+ locally in the `DataSource` directory, or is a valid remote URL.
+ `DataSource` does not discriminate between the two, the file is accessible
+ if it exists in either location.
"""
return DataSource.exists(self, self._fullpath(path))
@@ -555,12 +600,13 @@ class Repository (DataSource):
Parameters
----------
- path : string
- Local file path or URL to open
+ path : str
+ Local file path or URL to open. This may, but does not have to,
+ include the `baseurl` with which the `Repository` was initialized.
mode : {'r', 'w', 'a'}, optional
Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
append. Available modes depend on the type of object specified by
- `path`.
+ `path`. Default is 'r'.
Returns
-------