summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/source/reference/routines.testing.rst1
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src2
-rw-r--r--numpy/core/src/umath/loops.c.src55
-rw-r--r--numpy/lib/twodim_base.py19
4 files changed, 69 insertions, 8 deletions
diff --git a/doc/source/reference/routines.testing.rst b/doc/source/reference/routines.testing.rst
index c0bcdaaeb..834d8bbe3 100644
--- a/doc/source/reference/routines.testing.rst
+++ b/doc/source/reference/routines.testing.rst
@@ -25,6 +25,7 @@ Asserts
assert_array_less
assert_equal
assert_raises
+ assert_raises_regex
assert_warns
assert_string_equal
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index 02920014b..4a75e3293 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -793,7 +793,7 @@ NPY_NO_EXPORT PyArray_StridedUnaryOp *
#endif
-static void
+static NPY_GCC_OPT_3 void
@prefix@_cast_@name1@_to_@name2@(
char *dst, npy_intp dst_stride,
char *src, npy_intp src_stride,
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 3f5048592..ee7e7652d 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -52,6 +52,19 @@
&& (steps[0] == steps[2])\
&& (steps[0] == 0))
+/* binary loop input and output continous */
+#define IS_BINARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \
+ steps[1] == sizeof(tin) && \
+ steps[2] == sizeof(tout))
+/* binary loop input and output continous with first scalar */
+#define IS_BINARY_CONT_S1(tin, tout) (steps[0] == 0 && \
+ steps[1] == sizeof(tin) && \
+ steps[2] == sizeof(tout))
+/* binary loop input and output continous with second scalar */
+#define IS_BINARY_CONT_S2(tin, tout) (steps[0] == sizeof(tin) && \
+ steps[1] == 0 && \
+ steps[2] == sizeof(tout))
+
#define OUTPUT_LOOP\
char *op1 = args[1];\
npy_intp os1 = steps[1];\
@@ -803,13 +816,45 @@ NPY_NO_EXPORT void
* #OP = ==, !=, >, >=, <, <=, &&, ||#
*/
-NPY_NO_EXPORT void
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
@TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
- BINARY_LOOP {
- const @type@ in1 = *(@type@ *)ip1;
- const @type@ in2 = *(@type@ *)ip2;
- *((npy_bool *)op1) = in1 @OP@ in2;
+ /*
+ * gcc vectorization of this is not good (PR60575) but manual integer
+ * vectorization is too tedious to be worthwhile
+ */
+ if (IS_BINARY_CONT(@type@, npy_bool)) {
+ npy_intp i, n = dimensions[0];
+ @type@ * a = (@type@ *)args[0], * b = (@type@ *)args[1];
+ npy_bool * o = (npy_bool *)args[2];
+ for (i = 0; i < n; i++) {
+ o[i] = a[i] @OP@ b[i];
+ }
+ }
+ else if (IS_BINARY_CONT_S1(@type@, npy_bool)) {
+ npy_intp i, n = dimensions[0];
+ @type@ a = *(@type@ *)args[0];
+ @type@ * b = (@type@ *)args[1];
+ npy_bool * o = (npy_bool *)args[2];
+ for (i = 0; i < n; i++) {
+ o[i] = a @OP@ b[i];
+ }
+ }
+ else if (IS_BINARY_CONT_S2(@type@, npy_bool)) {
+ npy_intp i, n = dimensions[0];
+ @type@ * a = (@type@ *)args[0];
+ @type@ b = *(@type@*)args[1];
+ npy_bool * o = (npy_bool *)args[2];
+ for (i = 0; i < n; i++) {
+ o[i] = a[i] @OP@ b;
+ }
+ }
+ else {
+ BINARY_LOOP {
+ const @type@ in1 = *(@type@ *)ip1;
+ const @type@ in2 = *(@type@ *)ip2;
+ *((npy_bool *)op1) = in1 @OP@ in2;
+ }
}
}
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 5a0c0e7ee..20b5cdd67 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -11,10 +11,24 @@ __all__ = ['diag', 'diagflat', 'eye', 'fliplr', 'flipud', 'rot90', 'tri',
from numpy.core.numeric import (
asanyarray, subtract, arange, zeros, greater_equal, multiply, ones,
- asarray, where, dtype as np_dtype, less
+ asarray, where, dtype as np_dtype, less, int8, int16, int32, int64
)
+from numpy.core import iinfo
+i1 = iinfo(int8)
+i2 = iinfo(int16)
+i4 = iinfo(int32)
+def _min_int(low, high):
+ """ get small int that fits the range """
+ if high <= i1.max and low >= i1.min:
+ return int8
+ if high <= i2.max and low >= i2.min:
+ return int16
+ if high <= i4.max and low >= i4.min:
+ return int32
+ return int64
+
def fliplr(m):
"""
@@ -396,7 +410,8 @@ def tri(N, M=None, k=0, dtype=float):
if M is None:
M = N
- m = greater_equal.outer(arange(N), arange(-k, M-k))
+ m = greater_equal.outer(arange(N, dtype=_min_int(0, N)),
+ arange(-k, M-k, dtype=_min_int(-k, M - k)))
# Avoid making a copy if the requested type is already bool
if np_dtype(dtype) != np_dtype(bool):