diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2016-03-19 12:32:14 -0600 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2016-03-19 12:32:14 -0600 |
commit | a7c86008d94bba06914a1415049ff1fde9c9c451 (patch) | |
tree | 5da24d8dabba4a5497b0b2535fc7c1a9731e5bea | |
parent | 89ae05693b9bdfda78a6138e606be7b5531b1ab9 (diff) | |
parent | ed083cffe0c2dd6ebf42d418dddc71848b67800e (diff) | |
download | numpy-a7c86008d94bba06914a1415049ff1fde9c9c451.tar.gz |
Merge pull request #7363 from seberg/masked-silence-2
ENH: Make no unshare mask future warnings less noisy
-rw-r--r-- | doc/release/1.11.0-notes.rst | 29 | ||||
-rw-r--r-- | numpy/ma/core.py | 33 | ||||
-rw-r--r-- | numpy/ma/extras.py | 6 |
3 files changed, 47 insertions, 21 deletions
diff --git a/doc/release/1.11.0-notes.rst b/doc/release/1.11.0-notes.rst index c9b7a6d3e..2aa6daad1 100644 --- a/doc/release/1.11.0-notes.rst +++ b/doc/release/1.11.0-notes.rst @@ -41,8 +41,6 @@ Future Changes The following changes are scheduled for Numpy 1.12.0. * Support for Python 2.6, 3.2, and 3.3 will be dropped. -* Slicing a ``MaskedArray`` will return views of both data **and** mask. - Currently the mask is returned as a copy. * Relaxed stride checking will become the default. See the 1.8.0 release notes for a more extended discussion of what this change implies. * The behavior of the datetime64 "not a time" (NaT) value will be changed @@ -70,6 +68,33 @@ In a future release the following changes will be made. change. That differs from the current behavior where arrays that are f_contiguous but not c_contiguous can be viewed as a dtype type of different size causing the first dimension to change. +* Currently, taking a view of a masked array produces a confusing result. + For example, if we write ``masked_view = masked_original[:]``, then + ``masked_view``'s data array will be a view of ``masked_original``'s data + array, so modifications to one array's data will also affect the other: + ``masked_view[0] = 123; assert masked_original[0] == 123``. But currently, + the *mask* array is copied during assignment operations. While + mask is *initially* a view it is considered to be *shared*. + The first assignment to the masked array will thus cause an implicit + copy, so that changes of one array's mask will not affect the other: + ``masked_view[0] = np.ma.masked; assert masked_original[0] is not + np.ma.masked``. + A similar situation happens when explicitly constructing a masked + array using ``MaskedArray(data, mask)`` -- the returned array will have + a view of ``data`` and "shares" the ``mask``. In the future, these cases + will be normalized so that the data and mask arrays are treated the + same way, and modifications to either will propagate between views. + The mask will not be copied during assignment operations and instead + the original mask will be modified as well. In 1.11, numpy will issue a + ``MaskedArrayFutureWarning`` warning whenever user code modifies the mask + of a view and this may cause values to propagate to another array. + To silence these warnings, and make your code robust against the + upcoming changes, you have two options: if you want to keep the current + behavior, call ``masked_view.unshare_mask()`` before modifying the mask. + If you want to get the future behavior early, use + ``masked_view._sharedmask = False``. However, note that setting + the ``_sharedmask`` attribute will break following explicit calls to + ``masked_view.unshare_mask()``. Compatibility notes diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 709542bc4..41e9d48b7 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -3123,7 +3123,6 @@ class MaskedArray(ndarray): Return the item described by i, as a masked array. """ - dout = self.data[indx] # We could directly use ndarray.__getitem__ on self. # But then we would have to modify __array_finalize__ to prevent the @@ -3194,19 +3193,6 @@ class MaskedArray(ndarray): locations. """ - # 2016.01.15 -- v1.11.0 - self._oldsharedmask = getattr(self, "_oldsharedmask", False) - self._oldsharedmask = self._oldsharedmask or self._sharedmask - if (self._mask is not nomask) and self._oldsharedmask: - warnings.warn( - "Currently, slicing will try to return a view of the data, but" - " will return a copy of the mask. In the future, it will try" - " to return both as views. This means that using" - " `__setitem__` will propagate values back through all masks" - " that are present.", - MaskedArrayFutureWarning - ) - if self is masked: raise MaskError('Cannot alter the masked element.') _data = self._data @@ -3248,10 +3234,24 @@ class MaskedArray(ndarray): _mask[indx] = mval elif not self._hardmask: # Unshare the mask if necessary to avoid propagation + # We want to remove the unshare logic from this place in the + # future. Note that _sharedmask has lots of false positives. if not self._isfield: - _oldsharedmask = self._oldsharedmask + if self._sharedmask and not ( + # If no one else holds a reference (we have two + # references (_mask and self._mask) -- add one for + # getrefcount) and the array owns its own data + # copying the mask should do nothing. + (sys.getrefcount(_mask) == 3) and _mask.flags.owndata): + # 2016.01.15 -- v1.11.0 + warnings.warn( + "setting an item on a masked array which has a shared " + "mask will not copy the mask and also change the " + "original mask array in the future.\n" + "Check the NumPy 1.11 release notes for more " + "information.", + MaskedArrayFutureWarning, stacklevel=2) self.unshare_mask() - self._oldsharedmask = _oldsharedmask _mask = self._mask # Set the data, then the mask _data[indx] = dval @@ -3457,7 +3457,6 @@ class MaskedArray(ndarray): if self._sharedmask: self._mask = self._mask.copy() self._sharedmask = False - self._oldsharedmask = False return self sharedmask = property(fget=lambda self: self._sharedmask, diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 9855b4e76..6e091652e 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -689,6 +689,7 @@ def median(a, axis=None, out=None, overwrite_input=False): # insert indices of low and high median ind.insert(axis, h - 1) low = asorted[ind] + low._sharedmask = False ind[axis] = h high = asorted[ind] # duplicate high if odd number of elements so mean does nothing @@ -1173,9 +1174,9 @@ def _covhelper(x, y=None, rowvar=True, allow_masked=True): # Define some common mask common_mask = np.logical_or(xmask, ymask) if common_mask is not nomask: - x.unshare_mask() - y.unshare_mask() xmask = x._mask = y._mask = ymask = common_mask + x._sharedmask = False + y._sharedmask = False x = ma.concatenate((x, y), axis) xnotmask = np.logical_not(np.concatenate((xmask, ymask), axis)).astype(int) x -= x.mean(axis=rowvar)[tup] @@ -1326,6 +1327,7 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, allow_masked=True, _denom = ma.sqrt(ma.multiply.outer(diag, diag)) else: _denom = diagflat(diag) + _denom._sharedmask = False # We know return is always a copy n = x.shape[1 - rowvar] if rowvar: for i in range(n - 1): |