summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST.in1
-rw-r--r--benchmarks/benchmarks/bench_app.py2
-rw-r--r--benchmarks/benchmarks/common.py12
-rw-r--r--doc/release/1.11.0-notes.rst7
-rw-r--r--numpy/core/fromnumeric.py2
-rw-r--r--numpy/core/tests/test_numeric.py10
-rw-r--r--numpy/distutils/system_info.py49
-rw-r--r--numpy/ma/core.py55
-rw-r--r--numpy/ma/tests/test_core.py36
-rw-r--r--numpy/tests/test_scripts.py18
10 files changed, 159 insertions, 33 deletions
diff --git a/MANIFEST.in b/MANIFEST.in
index 56d40efbf..3695dfe57 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -11,6 +11,7 @@ include numpy/random/mtrand/generate_mtrand_c.py
recursive-include numpy/random/mtrand *.pyx *.pxd
# Add build support that should go in sdist, but not go in bdist/be installed
recursive-include numpy/_build_utils *
+recursive-include numpy/linalg/lapack_lite *.c *.h
# Add sdist files whose use depends on local configuration.
include numpy/core/src/multiarray/cblasfuncs.c
include numpy/core/src/multiarray/python_xerbla.c
diff --git a/benchmarks/benchmarks/bench_app.py b/benchmarks/benchmarks/bench_app.py
index 0e2aca64b..ccf6e4c4a 100644
--- a/benchmarks/benchmarks/bench_app.py
+++ b/benchmarks/benchmarks/bench_app.py
@@ -4,6 +4,8 @@ from .common import Benchmark
import numpy as np
+from six.moves import xrange
+
class LaplaceInplace(Benchmark):
params = ['inplace', 'normal']
diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py
index c99b0afb8..e98396bed 100644
--- a/benchmarks/benchmarks/common.py
+++ b/benchmarks/benchmarks/common.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, print_function
+
import numpy
import random
@@ -26,7 +28,7 @@ TYPES1 = [
# values which will be used to construct our sample data matrices
# replicate 10 times to speed up initial imports of this helper
# and generate some redundancy
-values = [random.uniform(0, 100) for x in range(nx*ny/10)]*10
+values = [random.uniform(0, 100) for x in range(nx*ny//10)]*10
squares = {t: numpy.array(values,
dtype=getattr(numpy, t)).reshape((nx, ny))
@@ -34,16 +36,16 @@ squares = {t: numpy.array(values,
# adjust complex ones to have non-degenerated imagery part -- use
# original data transposed for that
-for t, v in squares.iteritems():
+for t, v in squares.items():
if t.startswith('complex'):
v += v.T*1j
# smaller squares
-squares_ = {t: s[:nxs, :nys] for t, s in squares.iteritems()}
+squares_ = {t: s[:nxs, :nys] for t, s in squares.items()}
# vectors
-vectors = {t: s[0] for t, s in squares.iteritems()}
+vectors = {t: s[0] for t, s in squares.items()}
-indexes = range(nx)
+indexes = list(range(nx))
# so we do not have all items
indexes.pop(5)
indexes.pop(95)
diff --git a/doc/release/1.11.0-notes.rst b/doc/release/1.11.0-notes.rst
index 7c2ed2133..fac868ca3 100644
--- a/doc/release/1.11.0-notes.rst
+++ b/doc/release/1.11.0-notes.rst
@@ -91,6 +91,13 @@ The function now internally calls the generic ``npy_amergesort``
when the type does not implement a merge-sort kind of ``argsort``
method.
+Memory and speed improvements for masked arrays
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Creating a masked array with ``mask=True`` (resp. ``mask=False``) now uses
+``np.ones`` (resp. ``np.zeros``) to create the mask, which is faster and avoid
+a big memory peak. Another optimization was done to avoid a memory peak and
+useless computations when printing a masked array.
+
Changes
=======
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 0fc572cb6..197513294 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -1134,7 +1134,7 @@ def resize(a, new_shape):
a = ravel(a)
Na = len(a)
if not Na:
- return mu.zeros(new_shape, a.dtype.char)
+ return mu.zeros(new_shape, a.dtype)
total_size = um.multiply.reduce(new_shape)
n_copies = int(total_size / Na)
extra = total_size % Na
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index f5c22392a..43dad42f1 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -30,7 +30,15 @@ class TestResize(TestCase):
def test_zeroresize(self):
A = np.array([[1, 2], [3, 4]])
Ar = np.resize(A, (0,))
- assert_equal(Ar, np.array([]))
+ assert_array_equal(Ar, np.array([]))
+ assert_equal(A.dtype, Ar.dtype)
+
+ def test_reshape_from_zero(self):
+ # See also gh-6740
+ A = np.zeros(0, dtype=[('a', np.float32, 1)])
+ Ar = np.resize(A, (2, 1))
+ assert_array_equal(Ar, np.zeros((2, 1), Ar.dtype))
+ assert_equal(A.dtype, Ar.dtype)
class TestNonarrayArgs(TestCase):
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index 7ea8b8c62..94436243e 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -1678,33 +1678,60 @@ class blas_info(system_info):
info = self.check_libs(lib_dirs, blas_libs, [])
if info is None:
return
- if platform.system() != 'Windows' and self.has_cblas():
+ if platform.system() == 'Windows':
# The check for windows is needed because has_cblas uses the
# same compiler that was used to compile Python and msvc is
# often not installed when mingw is being used. This rough
# treatment is not desirable, but windows is tricky.
- info['language'] = 'c'
- info['define_macros'] = [('HAVE_CBLAS', None)]
- else:
info['language'] = 'f77' # XXX: is it generally true?
+ else:
+ lib = self.has_cblas(info)
+ if lib is not None:
+ info['language'] = 'c'
+ info['libraries'] = [lib]
+ info['define_macros'] = [('HAVE_CBLAS', None)]
self.set_info(**info)
- def has_cblas(self):
- # primitive cblas check by looking for the header
+ def has_cblas(self, info):
+ # primitive cblas check by looking for the header and trying to link
+ # cblas or blas
res = False
c = distutils.ccompiler.new_compiler()
tmpdir = tempfile.mkdtemp()
- s = """#include <cblas.h>"""
+ s = """#include <cblas.h>
+ int main(int argc, const char *argv[])
+ {
+ double a[4] = {1,2,3,4};
+ double b[4] = {5,6,7,8};
+ return cblas_ddot(4, a, 1, b, 1) > 10;
+ }"""
src = os.path.join(tmpdir, 'source.c')
try:
with open(src, 'wt') as f:
f.write(s)
+
try:
- c.compile([src], output_dir=tmpdir,
- include_dirs=self.get_include_dirs())
- res = True
+ # check we can compile (find headers)
+ obj = c.compile([src], output_dir=tmpdir,
+ include_dirs=self.get_include_dirs())
+
+ # check we can link (find library)
+ # some systems have separate cblas and blas libs. First
+ # check for cblas lib, and if not present check for blas lib.
+ try:
+ c.link_executable(obj, os.path.join(tmpdir, "a.out"),
+ libraries=["cblas"],
+ library_dirs=info['library_dirs'],
+ extra_postargs=info.get('extra_link_args', []))
+ res = "cblas"
+ except distutils.ccompiler.LinkError:
+ c.link_executable(obj, os.path.join(tmpdir, "a.out"),
+ libraries=["blas"],
+ library_dirs=info['library_dirs'],
+ extra_postargs=info.get('extra_link_args', []))
+ res = "blas"
except distutils.ccompiler.CompileError:
- res = False
+ res = None
finally:
shutil.rmtree(tmpdir)
return res
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index b9f7da092..25e542cd6 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -1248,7 +1248,7 @@ def _recursive_make_descr(datatype, newtype=bool_):
# Is this some kind of composite a la (np.float,2)
elif datatype.subdtype:
mdescr = list(datatype.subdtype)
- mdescr[0] = newtype
+ mdescr[0] = _recursive_make_descr(datatype.subdtype[0], newtype)
return tuple(mdescr)
else:
return newtype
@@ -2684,6 +2684,8 @@ class MaskedArray(ndarray):
_defaultmask = nomask
_defaulthardmask = False
_baseclass = ndarray
+ # Maximum number of elements per axis used when printing an array.
+ _print_width = 100
def __new__(cls, data=None, mask=nomask, dtype=None, copy=False,
subok=True, ndmin=0, fill_value=None,
@@ -2756,13 +2758,19 @@ class MaskedArray(ndarray):
_data._sharedmask = True
else:
# Case 2. : With a mask in input.
- # Read the mask with the current mdtype
- try:
- mask = np.array(mask, copy=copy, dtype=mdtype)
- # Or assume it's a sequence of bool/int
- except TypeError:
- mask = np.array([tuple([m] * len(mdtype)) for m in mask],
- dtype=mdtype)
+ # If mask is boolean, create an array of True or False
+ if mask is True and mdtype == MaskType:
+ mask = np.ones(_data.shape, dtype=mdtype)
+ elif mask is False and mdtype == MaskType:
+ mask = np.zeros(_data.shape, dtype=mdtype)
+ else:
+ # Read the mask with the current mdtype
+ try:
+ mask = np.array(mask, copy=copy, dtype=mdtype)
+ # Or assume it's a sequence of bool/int
+ except TypeError:
+ mask = np.array([tuple([m] * len(mdtype)) for m in mask],
+ dtype=mdtype)
# Make sure the mask and the data have the same shape
if mask.shape != _data.shape:
(nd, nm) = (_data.size, mask.size)
@@ -3695,7 +3703,7 @@ class MaskedArray(ndarray):
if m is nomask:
res = self._data
else:
- if m.shape == ():
+ if m.shape == () and m.itemsize==len(m.dtype):
if m.dtype.names:
m = m.view((bool, len(m.dtype)))
if m.any():
@@ -3710,8 +3718,19 @@ class MaskedArray(ndarray):
# convert to object array to make filled work
names = self.dtype.names
if names is None:
- res = self._data.astype("O")
- res.view(ndarray)[m] = f
+ data = self._data
+ mask = m
+ # For big arrays, to avoid a costly conversion to the
+ # object dtype, extract the corners before the conversion.
+ for axis in range(self.ndim):
+ if data.shape[axis] > self._print_width:
+ ind = self._print_width // 2
+ arr = np.split(data, (ind, -ind), axis=axis)
+ data = np.concatenate((arr[0], arr[2]), axis=axis)
+ arr = np.split(mask, (ind, -ind), axis=axis)
+ mask = np.concatenate((arr[0], arr[2]), axis=axis)
+ res = data.astype("O")
+ res.view(ndarray)[mask] = f
else:
rdtype = _recursive_make_descr(self.dtype, "O")
res = self._data.astype(rdtype)
@@ -4690,7 +4709,7 @@ class MaskedArray(ndarray):
See Also
--------
numpy.ma.dot : equivalent function
-
+
"""
return dot(self, b, out=out, strict=strict)
@@ -5850,6 +5869,18 @@ class mvoid(MaskedArray):
"""
m = self._mask
+ if isinstance(m[indx], ndarray):
+ # Can happen when indx is a multi-dimensional field:
+ # A = ma.masked_array(data=[([0,1],)], mask=[([True,
+ # False],)], dtype=[("A", ">i2", (2,))])
+ # x = A[0]; y = x["A"]; then y.mask["A"].size==2
+ # and we can not say masked/unmasked.
+ # The result is no longer mvoid!
+ # See also issue #6724.
+ return masked_array(
+ data=self._data[indx], mask=m[indx],
+ fill_value=self._fill_value[indx],
+ hard_mask=self._hardmask)
if m is not nomask and m[indx]:
return masked
return self._data[indx]
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index e5fdfddb1..cecdedf26 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -191,6 +191,15 @@ class TestMaskedArray(TestCase):
dma_3 = MaskedArray(dma_1, mask=[1, 0, 0, 0] * 6)
fail_if_equal(dma_3.mask, dma_1.mask)
+ x = array([1, 2, 3], mask=True)
+ assert_equal(x._mask, [True, True, True])
+ x = array([1, 2, 3], mask=False)
+ assert_equal(x._mask, [False, False, False])
+ y = array([1, 2, 3], mask=x._mask, copy=False)
+ assert_(np.may_share_memory(x.mask, y.mask))
+ y = array([1, 2, 3], mask=x._mask, copy=True)
+ assert_(not np.may_share_memory(x.mask, y.mask))
+
def test_creation_with_list_of_maskedarrays(self):
# Tests creaating a masked array from alist of masked arrays.
x = array(np.arange(5), mask=[1, 0, 0, 0, 0])
@@ -599,6 +608,13 @@ class TestMaskedArray(TestCase):
control = np.array([(0, 1), (2, 0)], dtype=a['B'].dtype)
assert_equal(test, control)
+ # test if mask gets set correctly (see #6760)
+ Z = numpy.ma.zeros(2, numpy.dtype([("A", "(2,2)i1,(2,2)i1", (2,2))]))
+ assert_equal(Z.data.dtype, numpy.dtype([('A', [('f0', 'i1', (2, 2)),
+ ('f1', 'i1', (2, 2))], (2, 2))]))
+ assert_equal(Z.mask.dtype, numpy.dtype([('A', [('f0', '?', (2, 2)),
+ ('f1', '?', (2, 2))], (2, 2))]))
+
def test_filled_w_f_order(self):
# Test filled w/ F-contiguous array
a = array(np.array([(0, 1, 2), (4, 5, 6)], order='F'),
@@ -625,6 +641,18 @@ class TestMaskedArray(TestCase):
control = "[(--, (2, --)) (4, (--, 6.0))]"
assert_equal(str(test), control)
+ # Test 0-d array with multi-dimensional dtype
+ t_2d0 = masked_array(data = (0, [[0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0]],
+ 0.0),
+ mask = (False, [[True, False, True],
+ [False, False, True]],
+ False),
+ dtype = "int, (2,3)float, float")
+ control = "(0, [[--, 0.0, --], [0.0, 0.0, --]], 0.0)"
+ assert_equal(str(t_2d0), control)
+
+
def test_flatten_structured_array(self):
# Test flatten_structured_array on arrays
# On ndarray
@@ -691,6 +719,14 @@ class TestMaskedArray(TestCase):
self.assertTrue(f['a'] is masked)
assert_equal(f[1], 4)
+ # exotic dtype
+ A = masked_array(data=[([0,1],)],
+ mask=[([True, False],)],
+ dtype=[("A", ">i2", (2,))])
+ assert_equal(A[0]["A"], A["A"][0])
+ assert_equal(A[0]["A"], masked_array(data=[0, 1],
+ mask=[True, False], dtype=">i2"))
+
def test_mvoid_iter(self):
# Test iteration on __getitem__
ndtype = [('a', int), ('b', int)]
diff --git a/numpy/tests/test_scripts.py b/numpy/tests/test_scripts.py
index c7bb125b3..552383d77 100644
--- a/numpy/tests/test_scripts.py
+++ b/numpy/tests/test_scripts.py
@@ -12,6 +12,7 @@ import numpy as np
from numpy.compat.py3k import basestring, asbytes
from nose.tools import assert_equal
from numpy.testing.decorators import skipif
+from numpy.testing import assert_
skipif_inplace = skipif(isfile(pathjoin(dirname(np.__file__), '..', 'setup.py')))
@@ -63,7 +64,18 @@ def test_f2py():
if sys.platform == 'win32':
f2py_cmd = r"%s\Scripts\f2py.py" % dirname(sys.executable)
code, stdout, stderr = run_command([sys.executable, f2py_cmd, '-v'])
+ assert_equal(stdout.strip(), asbytes('2'))
else:
- f2py_cmd = 'f2py' + basename(sys.executable)[6:]
- code, stdout, stderr = run_command([f2py_cmd, '-v'])
- assert_equal(stdout.strip(), asbytes('2'))
+ # unclear what f2py cmd was installed as, check plain (f2py) and
+ # current python version specific one (f2py3.4)
+ f2py_cmds = ['f2py', 'f2py' + basename(sys.executable)[6:]]
+ success = False
+ for f2py_cmd in f2py_cmds:
+ try:
+ code, stdout, stderr = run_command([f2py_cmd, '-v'])
+ assert_equal(stdout.strip(), asbytes('2'))
+ success = True
+ break
+ except FileNotFoundError:
+ pass
+ assert_(success, "wasn't able to find f2py or %s on commandline" % f2py_cmds[1])