summaryrefslogtreecommitdiff
path: root/numpy/ma/mrecords.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/ma/mrecords.py')
-rw-r--r--numpy/ma/mrecords.py318
1 files changed, 190 insertions, 128 deletions
diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py
index 644383925..382bcc972 100644
--- a/numpy/ma/mrecords.py
+++ b/numpy/ma/mrecords.py
@@ -5,19 +5,15 @@ where fields can be accessed as attributes.
Note that :class:`numpy.ma.MaskedArray` already supports structured datatypes
and the masking of individual fields.
-:author: Pierre Gerard-Marchant
+.. moduleauthor:: Pierre Gerard-Marchant
"""
from __future__ import division, absolute_import, print_function
-#!!!: * We should make sure that no field is called '_mask','mask','_fieldmask',
-#!!!: or whatever restricted keywords.
-#!!!: An idea would be to no bother in the first place, and then rename the
-#!!!: invalid fields with a trailing underscore...
-#!!!: Maybe we could just overload the parser function ?
-
-
-__author__ = "Pierre GF Gerard-Marchant"
+# We should make sure that no field is called '_mask','mask','_fieldmask',
+# or whatever restricted keywords. An idea would be to no bother in the
+# first place, and then rename the invalid fields with a trailing
+# underscore. Maybe we could just overload the parser function ?
import sys
import warnings
@@ -36,20 +32,27 @@ _byteorderconv = np.core.records._byteorderconv
_typestr = ntypes._typestr
import numpy.ma as ma
-from numpy.ma import MAError, MaskedArray, masked, nomask, masked_array, \
- getdata, getmaskarray, filled
+from numpy.ma import (
+ MAError, MaskedArray, masked, nomask, masked_array, getdata,
+ getmaskarray, filled
+ )
_check_fill_value = ma.core._check_fill_value
-__all__ = ['MaskedRecords', 'mrecarray',
- 'fromarrays', 'fromrecords', 'fromtextfile', 'addfield',
- ]
+__all__ = [
+ 'MaskedRecords', 'mrecarray', 'fromarrays', 'fromrecords',
+ 'fromtextfile', 'addfield',
+ ]
reserved_fields = ['_data', '_mask', '_fieldmask', 'dtype']
+
def _getformats(data):
- "Returns the formats of each array of arraylist as a comma-separated string."
+ """
+ Returns the formats of arrays in arraylist as a comma-separated string.
+
+ """
if hasattr(data, 'dtype'):
return ",".join([desc[1] for desc in data.dtype.descr])
@@ -62,10 +65,14 @@ def _getformats(data):
formats += ','
return formats[:-1]
+
def _checknames(descr, names=None):
- """Checks that the field names of the descriptor ``descr`` are not some
-reserved keywords. If this is the case, a default 'f%i' is substituted.
-If the argument `names` is not None, updates the field names to valid names.
+ """
+ Checks that field names ``descr`` are not reserved keywords.
+
+ If this is the case, a default 'f%i' is substituted. If the argument
+ `names` is not None, updates the field names to valid names.
+
"""
ndescr = len(descr)
default_names = ['f%i' % i for i in range(ndescr)]
@@ -103,29 +110,33 @@ def _get_fieldmask(self):
class MaskedRecords(MaskedArray, object):
"""
-*IVariables*:
- _data : {recarray}
+ Attributes
+ ----------
+ _data : recarray
Underlying data, as a record array.
- _mask : {boolean array}
- Mask of the records. A record is masked when all its fields are masked.
- _fieldmask : {boolean recarray}
- Record array of booleans, setting the mask of each individual field of each record.
- _fill_value : {record}
+ _mask : boolean array
+ Mask of the records. A record is masked when all its fields are
+ masked.
+ _fieldmask : boolean recarray
+ Record array of booleans, setting the mask of each individual field
+ of each record.
+ _fill_value : record
Filling values for each field.
+
"""
- #............................................
+
def __new__(cls, shape, dtype=None, buf=None, offset=0, strides=None,
formats=None, names=None, titles=None,
byteorder=None, aligned=False,
mask=nomask, hard_mask=False, fill_value=None, keep_mask=True,
copy=False,
**options):
- #
+
self = recarray.__new__(cls, shape, dtype=dtype, buf=buf, offset=offset,
strides=strides, formats=formats, names=names,
titles=titles, byteorder=byteorder,
aligned=aligned,)
- #
+
mdtype = ma.make_mask_descr(self.dtype)
if mask is nomask or not np.size(mask):
if not keep_mask:
@@ -154,9 +165,9 @@ class MaskedRecords(MaskedArray, object):
dtype=mdtype)
self._mask = _mask
return self
- #......................................................
+
def __array_finalize__(self, obj):
- # Make sure we have a _fieldmask by default ..
+ # Make sure we have a _fieldmask by default
_mask = getattr(obj, '_mask', None)
if _mask is None:
objmask = getattr(obj, '_mask', nomask)
@@ -175,19 +186,29 @@ class MaskedRecords(MaskedArray, object):
_dict['_baseclass'] = recarray
return
-
def _getdata(self):
- "Returns the data as a recarray."
+ """
+ Returns the data as a recarray.
+
+ """
return ndarray.view(self, recarray)
+
_data = property(fget=_getdata)
def _getfieldmask(self):
- "Alias to mask"
+ """
+ Alias to mask.
+
+ """
return self._mask
+
_fieldmask = property(fget=_getfieldmask)
def __len__(self):
- "Returns the length"
+ """
+ Returns the length
+
+ """
# We have more than one record
if self.ndim:
return len(self._data)
@@ -197,20 +218,21 @@ class MaskedRecords(MaskedArray, object):
def __getattribute__(self, attr):
try:
return object.__getattribute__(self, attr)
- except AttributeError: # attr must be a fieldname
+ except AttributeError:
+ # attr must be a fieldname
pass
fielddict = ndarray.__getattribute__(self, 'dtype').fields
try:
res = fielddict[attr][:2]
except (TypeError, KeyError):
raise AttributeError("record array has no attribute %s" % attr)
- # So far, so good...
+ # So far, so good
_localdict = ndarray.__getattribute__(self, '__dict__')
_data = ndarray.view(self, _localdict['_baseclass'])
obj = _data.getfield(*res)
if obj.dtype.fields:
- raise NotImplementedError("MaskedRecords is currently limited to"\
- "simple records...")
+ raise NotImplementedError("MaskedRecords is currently limited to"
+ "simple records.")
# Get some special attributes
# Reset the object's mask
hasmasked = False
@@ -238,9 +260,11 @@ class MaskedRecords(MaskedArray, object):
obj = obj.item()
return obj
-
def __setattr__(self, attr, val):
- "Sets the attribute attr to the value val."
+ """
+ Sets the attribute attr to the value val.
+
+ """
# Should we call __setmask__ first ?
if attr in ['mask', 'fieldmask']:
self.__setmask__(val)
@@ -260,14 +284,15 @@ class MaskedRecords(MaskedArray, object):
exctype, value = sys.exc_info()[:2]
raise exctype(value)
else:
- # Get the list of names ......
+ # Get the list of names
fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
# Check the attribute
if attr not in fielddict:
return ret
- if newattr: # We just added this one
- try: # or this setattr worked on an internal
- # attribute.
+ if newattr:
+ # We just added this one or this setattr worked on an
+ # internal attribute.
+ try:
object.__delattr__(self, attr)
except:
return ret
@@ -276,7 +301,7 @@ class MaskedRecords(MaskedArray, object):
res = fielddict[attr][:2]
except (TypeError, KeyError):
raise AttributeError("record array has no attribute %s" % attr)
- #
+
if val is masked:
_fill_value = _localdict['_fill_value']
if _fill_value is not None:
@@ -291,19 +316,22 @@ class MaskedRecords(MaskedArray, object):
_localdict['_mask'].__setitem__(attr, mval)
return obj
-
def __getitem__(self, indx):
- """Returns all the fields sharing the same fieldname base.
-The fieldname base is either `_data` or `_mask`."""
+ """
+ Returns all the fields sharing the same fieldname base.
+
+ The fieldname base is either `_data` or `_mask`.
+
+ """
_localdict = self.__dict__
_mask = ndarray.__getattribute__(self, '_mask')
_data = ndarray.view(self, _localdict['_baseclass'])
- # We want a field ........
+ # We want a field
if isinstance(indx, basestring):
- #!!!: Make sure _sharedmask is True to propagate back to _fieldmask
- #!!!: Don't use _set_mask, there are some copies being made...
- #!!!: ...that break propagation
- #!!!: Don't force the mask to nomask, that wrecks easy masking
+ # Make sure _sharedmask is True to propagate back to _fieldmask
+ # Don't use _set_mask, there are some copies being made that
+ # break propagation Don't force the mask to nomask, that wreaks
+ # easy masking
obj = _data[indx].view(MaskedArray)
obj._mask = _mask[indx]
obj._sharedmask = True
@@ -314,21 +342,26 @@ The fieldname base is either `_data` or `_mask`."""
if not obj.ndim and obj._mask:
return masked
return obj
- # We want some elements ..
- # First, the data ........
+ # We want some elements.
+ # First, the data.
obj = np.array(_data[indx], copy=False).view(mrecarray)
obj._mask = np.array(_mask[indx], copy=False).view(recarray)
return obj
- #....
+
def __setitem__(self, indx, value):
- "Sets the given record to value."
+ """
+ Sets the given record to value.
+
+ """
MaskedArray.__setitem__(self, indx, value)
if isinstance(indx, basestring):
self._mask[indx] = ma.getmaskarray(value)
-
def __str__(self):
- "Calculates the string representation."
+ """
+ Calculates the string representation.
+
+ """
if self.size > 1:
mstr = ["(%s)" % ",".join([str(i) for i in s])
for s in zip(*[getattr(self, f) for f in self.dtype.names])]
@@ -337,9 +370,12 @@ The fieldname base is either `_data` or `_mask`."""
mstr = ["%s" % ",".join([str(i) for i in s])
for s in zip([getattr(self, f) for f in self.dtype.names])]
return "(%s)" % ", ".join(mstr)
- #
+
def __repr__(self):
- "Calculates the repr representation."
+ """
+ Calculates the repr representation.
+
+ """
_names = self.dtype.names
fmt = "%%%is : %%s" % (max([len(n) for n in _names]) + 4,)
reprstr = [fmt % (f, getattr(self, f)) for f in self.dtype.names]
@@ -347,16 +383,19 @@ The fieldname base is either `_data` or `_mask`."""
reprstr.extend([fmt % (' fill_value', self.fill_value),
' )'])
return str("\n".join(reprstr))
-# #......................................................
+
def view(self, dtype=None, type=None):
- """Returns a view of the mrecarray."""
- # OK, basic copy-paste from MaskedArray.view...
+ """
+ Returns a view of the mrecarray.
+
+ """
+ # OK, basic copy-paste from MaskedArray.view.
if dtype is None:
if type is None:
output = ndarray.view(self)
else:
output = ndarray.view(self, type)
- # Here again...
+ # Here again.
elif type is None:
try:
if issubclass(dtype, ndarray):
@@ -368,8 +407,8 @@ The fieldname base is either `_data` or `_mask`."""
except TypeError:
dtype = np.dtype(dtype)
# we need to revert to MaskedArray, but keeping the possibility
- # ...of subclasses (eg, TimeSeriesRecords), so we'll force a type
- # ...set to the first parent
+ # of subclasses (eg, TimeSeriesRecords), so we'll force a type
+ # set to the first parent
if dtype.fields is None:
basetype = self.__class__.__bases__[0]
output = self.__array__().view(dtype, basetype)
@@ -387,27 +426,35 @@ The fieldname base is either `_data` or `_mask`."""
return output
def harden_mask(self):
- "Forces the mask to hard"
+ """
+ Forces the mask to hard.
+
+ """
self._hardmask = True
+
def soften_mask(self):
- "Forces the mask to soft"
+ """
+ Forces the mask to soft
+
+ """
self._hardmask = False
def copy(self):
- """Returns a copy of the masked record."""
- _localdict = self.__dict__
+ """
+ Returns a copy of the masked record.
+
+ """
copied = self._data.copy().view(type(self))
copied._mask = self._mask.copy()
return copied
def tolist(self, fill_value=None):
- """Copy the data portion of the array to a hierarchical python
- list and returns that list.
+ """
+ Return the data portion of the array as a list.
- Data items are converted to the nearest compatible Python
- type. Masked values are converted to fill_value. If
- fill_value is None, the corresponding entries in the output
- list will be ``None``.
+ Data items are converted to the nearest compatible Python type.
+ Masked values are converted to fill_value. If fill_value is None,
+ the corresponding entries in the output list will be ``None``.
"""
if fill_value is not None:
@@ -416,10 +463,11 @@ The fieldname base is either `_data` or `_mask`."""
mask = narray(self._mask.tolist())
result[mask] = None
return result.tolist()
- #--------------------------------------------
- # Pickling
+
def __getstate__(self):
- """Return the internal state of the masked array, for pickling purposes.
+ """Return the internal state of the masked array.
+
+ This is for pickling.
"""
state = (1,
@@ -431,11 +479,13 @@ The fieldname base is either `_data` or `_mask`."""
self._fill_value,
)
return state
- #
+
def __setstate__(self, state):
- """Restore the internal state of the masked array, for pickling purposes.
- ``state`` is typically the output of the ``__getstate__`` output, and is a
- 5-tuple:
+ """
+ Restore the internal state of the masked array.
+
+ This is for pickling. ``state`` is typically the output of the
+ ``__getstate__`` output, and is a 5-tuple:
- class name
- a tuple giving the shape of the data
@@ -449,9 +499,10 @@ The fieldname base is either `_data` or `_mask`."""
mdtype = dtype([(k, bool_) for (k, _) in self.dtype.descr])
self.__dict__['_mask'].__setstate__((shp, mdtype, isf, msk))
self.fill_value = flv
- #
+
def __reduce__(self):
- """Return a 3-tuple for pickling a MaskedArray.
+ """
+ Return a 3-tuple for pickling a MaskedArray.
"""
return (_mrreconstruct,
@@ -459,27 +510,27 @@ The fieldname base is either `_data` or `_mask`."""
self.__getstate__())
def _mrreconstruct(subtype, baseclass, baseshape, basetype,):
- """Internal function that builds a new MaskedArray from the
- information stored in a pickle.
+ """
+ Build a new MaskedArray from the information stored in a pickle.
"""
_data = ndarray.__new__(baseclass, baseshape, basetype).view(subtype)
-# _data._mask = ndarray.__new__(ndarray, baseshape, 'b1')
-# return _data
_mask = ndarray.__new__(ndarray, baseshape, 'b1')
return subtype.__new__(subtype, _data, mask=_mask, dtype=basetype,)
-
mrecarray = MaskedRecords
-#####---------------------------------------------------------------------------
-#---- --- Constructors ---
-#####---------------------------------------------------------------------------
+
+###############################################################################
+# Constructors #
+###############################################################################
+
def fromarrays(arraylist, dtype=None, shape=None, formats=None,
names=None, titles=None, aligned=False, byteorder=None,
fill_value=None):
- """Creates a mrecarray from a (flat) list of masked arrays.
+ """
+ Creates a mrecarray from a (flat) list of masked arrays.
Parameters
----------
@@ -504,6 +555,7 @@ def fromarrays(arraylist, dtype=None, shape=None, formats=None,
Notes
-----
Lists of tuples should be preferred over lists of lists for faster processing.
+
"""
datalist = [getdata(x) for x in arraylist]
masklist = [np.atleast_1d(getmaskarray(x)) for x in arraylist]
@@ -517,11 +569,11 @@ def fromarrays(arraylist, dtype=None, shape=None, formats=None,
return _array
-#..............................................................................
def fromrecords(reclist, dtype=None, shape=None, formats=None, names=None,
titles=None, aligned=False, byteorder=None,
fill_value=None, mask=nomask):
- """Creates a MaskedRecords from a list of records.
+ """
+ Creates a MaskedRecords from a list of records.
Parameters
----------
@@ -548,14 +600,11 @@ def fromrecords(reclist, dtype=None, shape=None, formats=None, names=None,
Notes
-----
Lists of tuples should be preferred over lists of lists for faster processing.
+
"""
# Grab the initial _fieldmask, if needed:
_mask = getattr(reclist, '_mask', None)
- # Get the list of records.....
- try:
- nfields = len(reclist[0])
- except TypeError:
- nfields = len(reclist[0].dtype)
+ # Get the list of records.
if isinstance(reclist, ndarray):
# Make sure we don't have some hidden mask
if isinstance(reclist, MaskedArray):
@@ -584,19 +633,24 @@ def fromrecords(reclist, dtype=None, shape=None, formats=None, names=None,
mrec._mask[:] = _mask
return mrec
+
def _guessvartypes(arr):
- """Tries to guess the dtypes of the str_ ndarray `arr`, by testing element-wise
-conversion. Returns a list of dtypes.
-The array is first converted to ndarray. If the array is 2D, the test is performed
-on the first line. An exception is raised if the file is 3D or more.
+ """
+ Tries to guess the dtypes of the str_ ndarray `arr`.
+
+ Guesses by testing element-wise conversion. Returns a list of dtypes.
+ The array is first converted to ndarray. If the array is 2D, the test
+ is performed on the first line. An exception is raised if the file is
+ 3D or more.
+
"""
vartypes = []
arr = np.asarray(arr)
- if len(arr.shape) == 2 :
+ if len(arr.shape) == 2:
arr = arr[0]
elif len(arr.shape) > 2:
raise ValueError("The array should be 2D at most!")
- # Start the conversion loop .......
+ # Start the conversion loop.
for f in arr:
try:
int(f)
@@ -605,7 +659,7 @@ on the first line. An exception is raised if the file is 3D or more.
float(f)
except ValueError:
try:
- val = complex(f)
+ complex(f)
except ValueError:
vartypes.append(arr.dtype)
else:
@@ -616,9 +670,13 @@ on the first line. An exception is raised if the file is 3D or more.
vartypes.append(np.dtype(int))
return vartypes
+
def openfile(fname):
- "Opens the file handle of file `fname`"
- # A file handle ...................
+ """
+ Opens the file handle of file `fname`.
+
+ """
+ # A file handle
if hasattr(fname, 'readline'):
return fname
# Try to open the file and guess its type
@@ -635,7 +693,8 @@ def openfile(fname):
def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
varnames=None, vartypes=None):
- """Creates a mrecarray from data stored in the file `filename`.
+ """
+ Creates a mrecarray from data stored in the file `filename`.
Parameters
----------
@@ -657,12 +716,12 @@ def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
Ultra simple: the varnames are in the header, one line"""
- # Try to open the file ......................
- f = openfile(fname)
+ # Try to open the file.
+ ftext = openfile(fname)
# Get the first non-empty line as the varnames
while True:
- line = f.readline()
+ line = ftext.readline()
firstline = line[:line.find(commentchar)].strip()
_varnames = firstline.split(delimitor)
if len(_varnames) > 1:
@@ -670,13 +729,13 @@ def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
if varnames is None:
varnames = _varnames
- # Get the data ..............................
- _variables = masked_array([line.strip().split(delimitor) for line in f
- if line[0] != commentchar and len(line) > 1])
+ # Get the data.
+ _variables = masked_array([line.strip().split(delimitor) for line in ftext
+ if line[0] != commentchar and len(line) > 1])
(_, nfields) = _variables.shape
- f.close()
+ ftext.close()
- # Try to guess the dtype ....................
+ # Try to guess the dtype.
if vartypes is None:
vartypes = _guessvartypes(_variables[0])
else:
@@ -687,11 +746,11 @@ def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
warnings.warn(msg % (len(vartypes), nfields))
vartypes = _guessvartypes(_variables[0])
- # Construct the descriptor ..................
+ # Construct the descriptor.
mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)]
mfillv = [ma.default_fill_value(f) for f in vartypes]
- # Get the data and the mask .................
+ # Get the data and the mask.
# We just need a list of masked_arrays. It's easier to create it like that:
_mask = (_variables.T == missingchar)
_datalist = [masked_array(a, mask=m, dtype=t, fill_value=f)
@@ -699,18 +758,21 @@ def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
return fromarrays(_datalist, dtype=mdescr)
-#....................................................................
+
def addfield(mrecord, newfield, newfieldname=None):
- """Adds a new field to the masked record array, using `newfield` as data
-and `newfieldname` as name. If `newfieldname` is None, the new field name is
-set to 'fi', where `i` is the number of existing fields.
+ """Adds a new field to the masked record array
+
+ Uses `newfield` as data and `newfieldname` as name. If `newfieldname`
+ is None, the new field name is set to 'fi', where `i` is the number of
+ existing fields.
+
"""
_data = mrecord._data
_mask = mrecord._mask
if newfieldname is None or newfieldname in reserved_fields:
newfieldname = 'f%i' % len(_data.dtype)
newfield = ma.array(newfield)
- # Get the new data ............
+ # Get the new data.
# Create a new empty recarray
newdtype = np.dtype(_data.dtype.descr + [(newfieldname, newfield.dtype)])
newdata = recarray(_data.shape, newdtype)
@@ -720,7 +782,7 @@ set to 'fi', where `i` is the number of existing fields.
# Add the new field
newdata.setfield(newfield._data, *newdata.dtype.fields[newfieldname])
newdata = newdata.view(MaskedRecords)
- # Get the new mask .............
+ # Get the new mask
# Create a new empty recarray
newmdtype = np.dtype([(n, bool_) for n in newdtype.names])
newmask = recarray(_data.shape, newmdtype)