summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.8.0-notes.rst11
-rw-r--r--numpy/__init__.py15
-rw-r--r--numpy/core/src/multiarray/mapping.c1
-rw-r--r--numpy/core/src/umath/loops.c.src17
-rw-r--r--numpy/core/src/umath/simd.inc.src320
-rw-r--r--numpy/core/tests/test_indexing.py112
-rw-r--r--numpy/core/tests/test_scalarmath.py32
-rw-r--r--numpy/core/tests/test_umath.py21
-rw-r--r--numpy/numarray/__init__.py6
-rw-r--r--numpy/oldnumeric/__init__.py6
-rw-r--r--numpy/testing/nosetester.py2
-rw-r--r--numpy/testing/utils.py2
12 files changed, 498 insertions, 47 deletions
diff --git a/doc/release/1.8.0-notes.rst b/doc/release/1.8.0-notes.rst
index 624707219..127226054 100644
--- a/doc/release/1.8.0-notes.rst
+++ b/doc/release/1.8.0-notes.rst
@@ -149,17 +149,18 @@ advantage of compiler builtins to avoid expensive calls to libc.
This improves performance of these operations by about a factor of two on gnu
libc systems.
-Performance improvements to `sqrt` and `abs`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The `sqrt` and `abs` functions for unit stride elementary operations have been
+Performance improvements to base math, `sqrt`, `absolute` and `minimum/maximum`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The base math (add, subtract, divide, multiply), `sqrt`, `absolute` and
+`minimum/maximum` functions for unit stride elementary operations have been
improved to make use of SSE2 CPU SIMD instructions.
This improves performance of these operations up to 4x/2x for float32/float64
depending on the location of the data in the CPU caches. The performance gain
is greatest for in-place operations.
In order to use the improved functions the SSE2 instruction set must be enabled
at compile time. It is enabled by default on x86_64 systems. On x86_32 with a
-capable CPU it must be enabled by passing the appropriate flag to CFLAGS build
-variable (-msse2 with gcc).
+capable CPU it must be enabled by passing the appropriate flag to the CFLAGS
+build variable (-msse2 with gcc).
Changes
=======
diff --git a/numpy/__init__.py b/numpy/__init__.py
index bbbecd6f6..b4be4d707 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -108,6 +108,19 @@ from __future__ import division, absolute_import, print_function
import sys
+
+class ModuleDeprecationWarning(DeprecationWarning):
+ """Module deprecation warning.
+
+ The nose tester turns ordinary Deprecation warnings into test failures.
+ That makes it hard to deprecate whole modules, because they get
+ imported by default. So this is a special Deprecation warning that the
+ nose tester will let pass without making tests fail.
+
+ """
+ pass
+
+
# We first need to detect if we're being called as part of the numpy setup
# procedure itself in a reliable manner.
try:
@@ -138,7 +151,7 @@ else:
return loader(*packages, **options)
from . import add_newdocs
- __all__ = ['add_newdocs']
+ __all__ = ['add_newdocs', 'ModuleDeprecationWarning']
pkgload.__doc__ = PackageLoader.__call__.__doc__
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 21874f8a9..a92955848 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1821,6 +1821,7 @@ PyArray_MapIterBind(PyArrayMapIterObject *mit, PyArrayObject *arr)
for (i = 0; i < n; i++) {
mit->iteraxes[i] = i;
}
+ Py_DECREF(sub);
goto finish;
}
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index c0287b8c8..068ecde7c 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1322,6 +1322,9 @@ NPY_NO_EXPORT void
*((@type@ *)iop1) = io1;
}
else {
+ if (run_binary_simd_@kind@_@TYPE@(args, dimensions, steps)) {
+ return;
+ }
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
@@ -1418,6 +1421,9 @@ NPY_NO_EXPORT void
{
/* */
if (IS_BINARY_REDUCE) {
+ if (run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) {
+ return;
+ }
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2;
@@ -1488,6 +1494,11 @@ NPY_NO_EXPORT void
NPY_NO_EXPORT void
@TYPE@_square(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
+ char * margs[] = {args[0], args[0], args[1]};
+ npy_intp msteps[] = {steps[0], steps[0], steps[1]};
+ if (run_binary_simd_multiply_@TYPE@(margs, dimensions, msteps)) {
+ return;
+ }
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = in1*in1;
@@ -1497,6 +1508,12 @@ NPY_NO_EXPORT void
NPY_NO_EXPORT void
@TYPE@_reciprocal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
+ @type@ one = 1.@c@;
+ char * margs[] = {(char*)&one, args[0], args[1]};
+ npy_intp msteps[] = {0, steps[0], steps[1]};
+ if (run_binary_simd_divide_@TYPE@(margs, dimensions, msteps)) {
+ return;
+ }
UNARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
*((@type@ *)op1) = 1/in1;
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 916473a0b..746943097 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -17,9 +17,14 @@
#include "lowlevel_strided_loops.h"
#include "npy_config.h"
+/* for NO_FLOATING_POINT_SUPPORT */
+#include "numpy/ufuncobject.h"
#include <assert.h>
#include <stdlib.h>
+int PyUFunc_getfperr(void);
+void PyUFunc_clearfperr(void);
+
/*
* stride is equal to element size and input and destination are equal or
* don't overlap within one register
@@ -29,21 +34,41 @@
(npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \
((abs(args[1] - args[0]) >= (vsize)) || ((abs(args[1] - args[0]) == 0))))
+#define IS_BLOCKABLE_REDUCE(esize, vsize) \
+ (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize))
+
+#define IS_BLOCKABLE_BINARY(esize, vsize) \
+ (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \
+ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \
+ npy_is_aligned(args[0], (esize)) && \
+ (abs(args[2] - args[0]) >= (vsize) || abs(args[2] - args[0]) == 0) && \
+ (abs(args[2] - args[1]) >= (vsize) || abs(args[2] - args[1]) >= 0))
+
+#define IS_BLOCKABLE_BINARY_SCALAR1(esize, vsize) \
+ (steps[0] == 0 && steps[1] == steps[2] && steps[2] == (esize) && \
+ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \
+ ((abs(args[2] - args[1]) >= (vsize)) || (abs(args[2] - args[1]) == 0)) && \
+ abs(args[2] - args[0]) >= (esize))
+
+#define IS_BLOCKABLE_BINARY_SCALAR2(esize, vsize) \
+ (steps[1] == 0 && steps[0] == steps[2] && steps[2] == (esize) && \
+ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[0], (esize)) && \
+ ((abs(args[2] - args[0]) >= (vsize)) || (abs(args[2] - args[0]) == 0)) && \
+ abs(args[2] - args[1]) >= (esize))
/* align var to alignment */
-#define UNARY_LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\
+#define LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\
npy_intp i, peel = npy_aligned_block_offset(var, sizeof(type),\
alignment, n);\
for(i = 0; i < peel; i++)
-#define UNARY_LOOP_BLOCKED(type, vsize)\
+#define LOOP_BLOCKED(type, vsize)\
for(; i < npy_blocked_end(peel, sizeof(type), vsize, n);\
i += (vsize / sizeof(type)))
-#define UNARY_LOOP_BLOCKED_END\
+#define LOOP_BLOCKED_END\
for (; i < n; i++)
-
/*
* Dispatcher functions
* decide whether the operation can be vectorized and run it
@@ -58,28 +83,80 @@
*/
/**begin repeat1
- * #func = sqrt, absolute#
+ * #func = sqrt, absolute, minimum, maximum#
+ * #check = IS_BLOCKABLE_UNARY, IS_BLOCKABLE_UNARY, IS_BLOCKABLE_REDUCE, IS_BLOCKABLE_REDUCE#
+ * #name = unary, unary, unary_reduce, unary_reduce#
*/
-#if @vector@
+#if @vector@ && defined HAVE_EMMINTRIN_H
/* prototypes */
static void
-sse2_@func@_@TYPE@(@type@ * op, const @type@ * ip, const npy_intp n);
+sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n);
#endif
static NPY_INLINE int
-run_unary_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
+run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
{
#if @vector@ && defined HAVE_EMMINTRIN_H
- if (IS_BLOCKABLE_UNARY(sizeof(@type@), 16)) {
+ if (@check@(sizeof(@type@), 16)) {
sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]);
return 1;
}
#endif
return 0;
}
+
+/**end repeat1**/
+
+/**begin repeat1
+ * Arithmetic
+ * # kind = add, subtract, multiply, divide#
+ * # OP = +, -, *, /#
+ */
+
+#if @vector@ && defined HAVE_EMMINTRIN_H
+
+/* prototypes */
+static void
+sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2,
+ npy_intp n);
+static void
+sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2,
+ npy_intp n);
+static void
+sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2,
+ npy_intp n);
+
+#endif
+
+static NPY_INLINE int
+run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if @vector@ && defined HAVE_EMMINTRIN_H
+ @type@ * ip1 = (@type@ *)args[0];
+ @type@ * ip2 = (@type@ *)args[1];
+ @type@ * op = (@type@ *)args[2];
+ npy_intp n = dimensions[0];
+ /* argument one scalar */
+ if (IS_BLOCKABLE_BINARY_SCALAR1(sizeof(@type@), 16)) {
+ sse2_binary_scalar1_@kind@_@TYPE@(op, ip1, ip2, n);
+ return 1;
+ }
+ /* argument two scalar */
+ else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(@type@), 16)) {
+ sse2_binary_scalar2_@kind@_@TYPE@(op, ip1, ip2, n);
+ return 1;
+ }
+ else if (IS_BLOCKABLE_BINARY(sizeof(@type@), 16)) {
+ sse2_binary_@kind@_@TYPE@(op, ip1, ip2, n);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
/**end repeat1**/
/**end repeat**/
@@ -89,6 +166,34 @@ run_unary_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
* Vectorized operations
*/
+#ifdef HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+
+/**begin repeat
+* horizontal reductions on a vector
+* # VOP = min, max#
+*/
+
+static NPY_INLINE npy_float sse2_horizontal_@VOP@___m128(__m128 v)
+{
+ npy_float r;
+ __m128 tmp = _mm_movehl_ps(v, v); /* c d ... */
+ __m128 m = _mm_@VOP@_ps(v, tmp); /* m(ac) m(bd) ... */
+ tmp = _mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1));/* m(bd) m(bd) ... */
+ _mm_store_ss(&r, _mm_@VOP@_ps(tmp, m)); /* m(acbd) ... */
+ return r;
+}
+
+static NPY_INLINE npy_double sse2_horizontal_@VOP@___m128d(__m128d v)
+{
+ npy_double r;
+ __m128d tmp = _mm_unpackhi_pd(v, v); /* b b */
+ _mm_store_sd(&r, _mm_@VOP@_pd(tmp, v)); /* m(ab) m(bb) */
+ return r;
+}
+
+/**end repeat**/
+
/**begin repeat
* #type = npy_float, npy_double#
* #TYPE = FLOAT, DOUBLE#
@@ -97,40 +202,160 @@ run_unary_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
* #vtype = __m128, __m128d#
* #vpre = _mm, _mm#
* #vsuf = ps, pd#
+ * #nan = NPY_NANF, NPY_NAN#
*/
-#ifdef HAVE_EMMINTRIN_H
-#include <emmintrin.h>
+/**begin repeat1
+* Arithmetic
+* # kind = add, subtract, multiply, divide#
+* # OP = +, -, *, /#
+* # VOP = add, sub, mul, div#
+*/
static void
-sse2_sqrt_@TYPE@(@type@ * op, const @type@ * ip, const npy_intp n)
+sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
+{
+ LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
+ op[i] = ip1[i] @OP@ ip2[i];
+ /* lots of specializations, to squeeze out max performance */
+ if (npy_is_aligned(&ip1[i], 16) && npy_is_aligned(&ip2[i], 16)) {
+ if (ip1 == ip2) {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, a);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ else {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]);
+ @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ }
+ else if (npy_is_aligned(&ip1[i], 16)) {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]);
+ @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ else if (npy_is_aligned(&ip2[i], 16)) {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]);
+ @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ else {
+ if (ip1 == ip2) {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, a);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ else {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]);
+ @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ }
+ LOOP_BLOCKED_END {
+ op[i] = ip1[i] @OP@ ip2[i];
+ }
+}
+
+
+static void
+sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
+{
+ const @vtype@ a = @vpre@_set1_@vsuf@(ip1[0]);
+ LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
+ op[i] = ip1[0] @OP@ ip2[i];
+ if (npy_is_aligned(&ip2[i], 16)) {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ else {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ LOOP_BLOCKED_END {
+ op[i] = ip1[0] @OP@ ip2[i];
+ }
+}
+
+
+void
+sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n)
+{
+ const @vtype@ b = @vpre@_set1_@vsuf@(ip2[0]);
+ LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
+ op[i] = ip1[i] @OP@ ip2[0];
+ if (npy_is_aligned(&ip1[i], 16)) {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ else {
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]);
+ @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b);
+ @vpre@_store_@vsuf@(&op[i], c);
+ }
+ }
+ LOOP_BLOCKED_END {
+ op[i] = ip1[i] @OP@ ip2[0];
+ }
+}
+
+/**end repeat1**/
+
+static void
+sse2_sqrt_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n)
{
/* align output to 16 bytes */
- UNARY_LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) {
+ LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) {
op[i] = @scalarf@(ip[i]);
}
assert(npy_is_aligned(&op[i], 16));
if (npy_is_aligned(&ip[i], 16)) {
- UNARY_LOOP_BLOCKED(@type@, 16) {
+ LOOP_BLOCKED(@type@, 16) {
@vtype@ d = @vpre@_load_@vsuf@(&ip[i]);
@vpre@_store_@vsuf@(&op[i], @vpre@_sqrt_@vsuf@(d));
}
}
else {
- UNARY_LOOP_BLOCKED(@type@, 16) {
+ LOOP_BLOCKED(@type@, 16) {
@vtype@ d = @vpre@_loadu_@vsuf@(&ip[i]);
@vpre@_store_@vsuf@(&op[i], @vpre@_sqrt_@vsuf@(d));
}
}
- UNARY_LOOP_BLOCKED_END {
+ LOOP_BLOCKED_END {
op[i] = @scalarf@(ip[i]);
}
}
static void
-sse2_absolute_@TYPE@(@type@ * op, const @type@ * ip, const npy_intp n)
+sse2_absolute_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n)
{
/*
* get 0x7FFFFFFF mask (everything but signbit set)
@@ -140,34 +365,87 @@ sse2_absolute_@TYPE@(@type@ * op, const @type@ * ip, const npy_intp n)
const @vtype@ mask = @vpre@_set1_@vsuf@(-0.@c@);
/* align output to 16 bytes */
- UNARY_LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) {
+ LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) {
const @type@ tmp = ip[i] > 0 ? ip[i]: -ip[i];
/* add 0 to clear -0.0 */
op[i] = tmp + 0;
}
assert(npy_is_aligned(&op[i], 16));
if (npy_is_aligned(&ip[i], 16)) {
- UNARY_LOOP_BLOCKED(@type@, 16) {
+ LOOP_BLOCKED(@type@, 16) {
@vtype@ a = @vpre@_load_@vsuf@(&ip[i]);
@vpre@_store_@vsuf@(&op[i], @vpre@_andnot_@vsuf@(mask, a));
}
}
else {
- UNARY_LOOP_BLOCKED(@type@, 16) {
+ LOOP_BLOCKED(@type@, 16) {
@vtype@ a = @vpre@_loadu_@vsuf@(&ip[i]);
@vpre@_store_@vsuf@(&op[i], @vpre@_andnot_@vsuf@(mask, a));
}
}
- UNARY_LOOP_BLOCKED_END {
+ LOOP_BLOCKED_END {
const @type@ tmp = ip[i] > 0 ? ip[i]: -ip[i];
/* add 0 to clear -0.0 */
op[i] = tmp + 0;
}
}
+
+/**begin repeat1
+ * #kind = maximum, minimum#
+ * #VOP = max, min#
+ * #OP = >=, <=#
+ **/
+/* arguments swapped as unary reduce has the swapped compared to unary */
+static void
+sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
+{
+ LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) {
+ *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
+ }
+ assert(npy_is_aligned(&ip[i], 16));
+ if (i + 2 * 16 / sizeof(@type@) <= n) {
+ /* load the first elements */
+ @vtype@ c = @vpre@_load_@vsuf@((@type@*)&ip[i]);
+#ifdef NO_FLOATING_POINT_SUPPORT
+ @vtype@ cnan = @vpre@_cmpneq_@vsuf@(c, c);
+#else
+ /* minps/minpd will set invalid flag if nan is encountered */
+ PyUFunc_clearfperr();
#endif
+ i += 16 / sizeof(@type@);
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ v = @vpre@_load_@vsuf@((@type@*)&ip[i]);
+ c = @vpre@_@VOP@_@vsuf@(c, v);
+#ifdef NO_FLOATING_POINT_SUPPORT
+ /* check for nan, breaking the loop makes non nan case slow */
+ cnan = @vpre@_or_@vsuf@(@vpre@_cmpneq_@vsuf@(v, v), cnan);
+ }
+
+ if (@vpre@_movemask_@vsuf@(cnan)) {
+ *op = @nan@;
+ return;
+ }
+#else
+ }
+#endif
+ {
+ @type@ tmp = sse2_horizontal_@VOP@_@vtype@(c);
+ if (PyUFunc_getfperr() & UFUNC_FPE_INVALID)
+ *op = @nan@;
+ else
+ *op = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp;
+ }
+ }
+ LOOP_BLOCKED_END {
+ *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
+ }
+}
+/**end repeat1**/
/**end repeat**/
+#endif /* HAVE_EMMINTRIN_H */
+
#endif
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index 6d498b81f..fa44900c7 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -19,6 +19,8 @@ class TestIndexing(TestCase):
a = np.array([1, 2, 3])
assert_equal(a[()], a)
assert_(a[()].base is a)
+ a = np.array(0)
+ assert_(isinstance(a[()], np.int_))
def test_empty_fancy_index(self):
# Empty list index creates an empty array
@@ -153,15 +155,32 @@ class TestMultiIndexingAutomated(TestCase):
# Some simpler indices that still cover a bit more
self.simple_indices = [Ellipsis, None, -1, [1], np.array([True]), 'skip']
# Very simple ones to fill the rest:
- self.fill_indices = [slice(None,None), 'skip']
+ self.fill_indices = [slice(None,None), 0]
def _get_multi_index(self, arr, indices):
- """Mimic multi dimensional indexing. Returns the indexed array and a
- flag no_copy. If no_copy is True, np.may_share_memory(arr, arr[indicies])
- should be True (though this may be wrong for 0-d arrays sometimes.
- If this function raises an error it should most of the time match the
- real error as long as there is exactly one error in the index.
+ """Mimic multi dimensional indexing.
+
+ Parameters
+ ----------
+ arr : ndarray
+ Array to be indexed.
+ indices : tuple of index objects
+
+ Returns
+ -------
+ out : ndarray
+ An array equivalent to the indexing operation (but always a copy).
+ `arr[indices]` should be identical.
+ no_copy : bool
+ Whether the indexing operation requires a copy. If this is `True`,
+ `np.may_share_memory(arr, arr[indicies])` should be `True` (with
+ some exceptions for scalars and possibly 0-d arrays).
+
+ Notes
+ -----
+ While the function may mostly match the errors of normal indexing this
+ is generally not the case.
"""
in_indices = list(indices)
indices = []
@@ -264,6 +283,10 @@ class TestMultiIndexingAutomated(TestCase):
+ arr.shape[ax+indx.ndim:]))
indx = flat_indx
else:
+ # This could be changed, a 0-d boolean index can
+ # make sense (even outide the 0-d indexed array case)
+ # Note that originally this is could be interpreted as
+ # integer in the full integer special case.
raise IndexError
if len(indices) > 0 and indices[-1][0] == 'f' and ax != ellipsis_pos:
# NOTE: There could still have been a 0-sized Ellipsis
@@ -359,9 +382,14 @@ class TestMultiIndexingAutomated(TestCase):
def _check_multi_index(self, arr, index):
- """Check mult index getting and simple setting. Input array
- must be a reshaped arange for __setitem__ check for non-view
- arrays to work. It then relies on .flat to work.
+ """Check a multi index item getting and simple setting.
+
+ Parameters
+ ----------
+ arr : ndarray
+ Array to be indexed, must be a reshaped arange.
+ index : tuple of indexing objects
+ Index being tested.
"""
# Test item getting
try:
@@ -371,6 +399,33 @@ class TestMultiIndexingAutomated(TestCase):
assert_raises(Exception, arr.__setitem__, index, 0)
return
+ self._compare_index_result(arr, index, mimic_get, no_copy)
+
+
+ def _check_single_index(self, arr, index):
+ """Check a single index item getting and simple setting.
+
+ Parameters
+ ----------
+ arr : ndarray
+ Array to be indexed, must be an arange.
+ index : indexing object
+ Index being tested. Must be a single index and not a tuple
+ of indexing objects (see also `_check_multi_index`).
+ """
+ try:
+ mimic_get, no_copy = self._get_multi_index(arr, (index,))
+ except Exception as e:
+ assert_raises(Exception, arr.__getitem__, index)
+ assert_raises(Exception, arr.__setitem__, index, 0)
+ return
+
+ self._compare_index_result(arr, index, mimic_get, no_copy)
+
+
+ def _compare_index_result(self, arr, index, mimic_get, no_copy):
+ """Compare mimicked result to indexing result.
+ """
arr = arr.copy()
indexed_arr = arr[index]
assert_array_equal(indexed_arr, mimic_get)
@@ -378,8 +433,13 @@ class TestMultiIndexingAutomated(TestCase):
# (then its not a view, and that does not matter)
if indexed_arr.size != 0 and indexed_arr.ndim != 0:
assert_(np.may_share_memory(indexed_arr, arr) == no_copy)
+ # Check reference count of the original array
+ if no_copy:
+ # refcount increases by one:
+ assert_equal(sys.getrefcount(arr), 3)
+ else:
+ assert_equal(sys.getrefcount(arr), 2)
- sys.stdout.flush()
# Test non-broadcast setitem:
b = arr.copy()
b[index] = mimic_get + 1000
@@ -411,16 +471,20 @@ class TestMultiIndexingAutomated(TestCase):
def test_multidim(self):
- # Check all combinations of all inner 3x3 arrays. Since test None
- # we also test the Ellipsis OK.
- tocheck = [self.simple_indices, self.complex_indices] + [self.simple_indices]*2
- for simple_pos in [0,2,3]:
- tocheck = [self.fill_indices, self.complex_indices, self.fill_indices, self.fill_indices]
- tocheck[simple_pos] = self.simple_indices
- for index in product(*tocheck):
- index = tuple(i for i in index if i != 'skip')
- self._check_multi_index(self.a, index)
- self._check_multi_index(self.b, index)
+ # Automatically test combinations with complex indexes on 2nd (or 1st)
+ # spot and the simple ones in one other spot.
+ with warnings.catch_warnings():
+ # This is so that np.array(True) is not accepted in a full integer
+ # index, when running the file seperatly.
+ warnings.filterwarnings('error', '', DeprecationWarning)
+ for simple_pos in [0,2,3]:
+ tocheck = [self.fill_indices, self.complex_indices,
+ self.fill_indices, self.fill_indices]
+ tocheck[simple_pos] = self.simple_indices
+ for index in product(*tocheck):
+ index = tuple(i for i in index if i != 'skip')
+ self._check_multi_index(self.a, index)
+ self._check_multi_index(self.b, index)
# Check very simple item getting:
self._check_multi_index(self.a, (0,0,0,0))
self._check_multi_index(self.b, (0,0,0,0))
@@ -431,5 +495,13 @@ class TestMultiIndexingAutomated(TestCase):
assert_raises(IndexError, self.a.__setitem__, (0,0,[1],0,0), 0)
+ def test_1d(self):
+ a = np.arange(10)
+ with warnings.catch_warnings():
+ warnings.filterwarnings('error', '', DeprecationWarning)
+ for index in self.complex_indices:
+ self._check_single_index(a, index)
+
+
if __name__ == "__main__":
run_module_suite()
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index 201861279..952a89999 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -2,6 +2,7 @@ from __future__ import division, absolute_import, print_function
import sys
from numpy.testing import *
+from numpy.testing.utils import gen_alignment_data
import numpy as np
types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc,
@@ -44,6 +45,37 @@ class TestTypes(TestCase):
assert_equal(a,b)
+class TestBaseMath(TestCase):
+ def test_blocked(self):
+ #test alignments offsets for simd instructions
+ for dt in [np.float32, np.float64]:
+ for out, inp1, inp2, msg in gen_alignment_data(dtype=dt,
+ type='binary',
+ max_size=12):
+ exp1 = np.ones_like(inp1)
+ inp1[...] = np.ones_like(inp1)
+ inp2[...] = np.zeros_like(inp2)
+ assert_almost_equal(np.add(inp1, inp2), exp1, err_msg=msg)
+ assert_almost_equal(np.add(inp1, 1), exp1 + 1, err_msg=msg)
+ assert_almost_equal(np.add(1, inp2), exp1, err_msg=msg)
+
+ np.add(inp1, inp2, out=out)
+ assert_almost_equal(out, exp1, err_msg=msg)
+
+ inp2[...] += np.arange(inp2.size, dtype=dt) + 1
+ assert_almost_equal(np.square(inp2),
+ np.multiply(inp2, inp2), err_msg=msg)
+ assert_almost_equal(np.reciprocal(inp2),
+ np.divide(1, inp2), err_msg=msg)
+
+ inp1[...] = np.ones_like(inp1)
+ inp2[...] = np.zeros_like(inp2)
+ np.add(inp1, 1, out=out)
+ assert_almost_equal(out, exp1 + 1, err_msg=msg)
+ np.add(1, inp2, out=out)
+ assert_almost_equal(out, exp1, err_msg=msg)
+
+
class TestPower(TestCase):
def test_small_types(self):
for t in [np.int8, np.int16, np.float16]:
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 6bbb15e6b..c58a0d3f5 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -687,6 +687,27 @@ class TestSign(TestCase):
np.seterr(**olderr)
+class TestMinMax(TestCase):
+ def test_minmax_blocked(self):
+ "simd tests on max/min"
+ for dt in [np.float32, np.float64]:
+ for out, inp, msg in gen_alignment_data(dtype=dt, type='unary',
+ max_size=17):
+ for i in range(inp.size):
+ inp[:] = np.arange(inp.size, dtype=dt)
+ inp[i] = np.nan
+ self.assertTrue(np.isnan(inp.max()),
+ msg=repr(inp) + '\n' + msg)
+ self.assertTrue(np.isnan(inp.min()),
+ msg=repr(inp) + '\n' + msg)
+
+ inp[i] = 1e10
+ assert_equal(inp.max(), 1e10, err_msg=msg)
+ inp[i] = -1e10
+ assert_equal(inp.min(), -1e10, err_msg=msg)
+
+
+
class TestAbsolute(TestCase):
def test_abs_blocked(self):
"simd tests on abs"
diff --git a/numpy/numarray/__init__.py b/numpy/numarray/__init__.py
index 706ec1c33..468324ced 100644
--- a/numpy/numarray/__init__.py
+++ b/numpy/numarray/__init__.py
@@ -1,5 +1,8 @@
from __future__ import division, absolute_import, print_function
+import warnings
+from numpy import ModuleDeprecationWarning
+
from .util import *
from .numerictypes import *
from .functions import *
@@ -14,6 +17,9 @@ from . import ufuncs
from . import compat
from . import session
+_msg = "The numarray module will be dropped in Numpy 1.9"
+warnings.warn(_msg, ModuleDeprecationWarning)
+
__all__ = ['session', 'numerictypes']
__all__ += util.__all__
__all__ += numerictypes.__all__
diff --git a/numpy/oldnumeric/__init__.py b/numpy/oldnumeric/__init__.py
index cf34b8300..86cdf55f7 100644
--- a/numpy/oldnumeric/__init__.py
+++ b/numpy/oldnumeric/__init__.py
@@ -3,8 +3,14 @@
"""
from __future__ import division, absolute_import, print_function
+import warnings
+
from numpy import *
+_msg = "The oldnumeric module will be dropped in Numpy 1.9"
+warnings.warn(_msg, ModuleDeprecationWarning)
+
+
def _move_axis_to_0(a, axis):
if axis == 0:
return a
diff --git a/numpy/testing/nosetester.py b/numpy/testing/nosetester.py
index e3f96b9a9..f1ebd2265 100644
--- a/numpy/testing/nosetester.py
+++ b/numpy/testing/nosetester.py
@@ -11,6 +11,7 @@ import sys
import warnings
import numpy.testing.utils
from numpy.compat import basestring
+from numpy import ModuleDeprecationWarning
def get_package_name(filepath):
"""
@@ -378,6 +379,7 @@ class NoseTester(object):
warnings.filterwarnings('ignore', message='Not importing directory')
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
+ warnings.filterwarnings("ignore", category=ModuleDeprecationWarning)
try:
from .noseclasses import NumpyTestProgram
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index 2d8d57c5f..f58997d58 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -26,8 +26,10 @@ __all__ = ['assert_equal', 'assert_almost_equal','assert_approx_equal',
'assert_array_max_ulp', 'assert_warns', 'assert_no_warnings',
'assert_allclose']
+
verbose = 0
+
def assert_(val, msg='') :
"""
Assert that works in release mode.