diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 2 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 55 | ||||
-rw-r--r-- | numpy/lib/twodim_base.py | 19 |
3 files changed, 68 insertions, 8 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 02920014b..4a75e3293 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -793,7 +793,7 @@ NPY_NO_EXPORT PyArray_StridedUnaryOp * #endif -static void +static NPY_GCC_OPT_3 void @prefix@_cast_@name1@_to_@name2@( char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 3f5048592..ee7e7652d 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -52,6 +52,19 @@ && (steps[0] == steps[2])\ && (steps[0] == 0)) +/* binary loop input and output continous */ +#define IS_BINARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \ + steps[1] == sizeof(tin) && \ + steps[2] == sizeof(tout)) +/* binary loop input and output continous with first scalar */ +#define IS_BINARY_CONT_S1(tin, tout) (steps[0] == 0 && \ + steps[1] == sizeof(tin) && \ + steps[2] == sizeof(tout)) +/* binary loop input and output continous with second scalar */ +#define IS_BINARY_CONT_S2(tin, tout) (steps[0] == sizeof(tin) && \ + steps[1] == 0 && \ + steps[2] == sizeof(tout)) + #define OUTPUT_LOOP\ char *op1 = args[1];\ npy_intp os1 = steps[1];\ @@ -803,13 +816,45 @@ NPY_NO_EXPORT void * #OP = ==, !=, >, >=, <, <=, &&, ||# */ -NPY_NO_EXPORT void +NPY_NO_EXPORT NPY_GCC_OPT_3 void @TYPE@_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { - BINARY_LOOP { - const @type@ in1 = *(@type@ *)ip1; - const @type@ in2 = *(@type@ *)ip2; - *((npy_bool *)op1) = in1 @OP@ in2; + /* + * gcc vectorization of this is not good (PR60575) but manual integer + * vectorization is too tedious to be worthwhile + */ + if (IS_BINARY_CONT(@type@, npy_bool)) { + npy_intp i, n = dimensions[0]; + @type@ * a = (@type@ *)args[0], * b = (@type@ *)args[1]; + npy_bool * o = (npy_bool *)args[2]; + for (i = 0; i < n; i++) { + o[i] = a[i] @OP@ b[i]; + } + } + else if (IS_BINARY_CONT_S1(@type@, npy_bool)) { + npy_intp i, n = dimensions[0]; + @type@ a = *(@type@ *)args[0]; + @type@ * b = (@type@ *)args[1]; + npy_bool * o = (npy_bool *)args[2]; + for (i = 0; i < n; i++) { + o[i] = a @OP@ b[i]; + } + } + else if (IS_BINARY_CONT_S2(@type@, npy_bool)) { + npy_intp i, n = dimensions[0]; + @type@ * a = (@type@ *)args[0]; + @type@ b = *(@type@*)args[1]; + npy_bool * o = (npy_bool *)args[2]; + for (i = 0; i < n; i++) { + o[i] = a[i] @OP@ b; + } + } + else { + BINARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((npy_bool *)op1) = in1 @OP@ in2; + } } } diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py index 5a0c0e7ee..20b5cdd67 100644 --- a/numpy/lib/twodim_base.py +++ b/numpy/lib/twodim_base.py @@ -11,10 +11,24 @@ __all__ = ['diag', 'diagflat', 'eye', 'fliplr', 'flipud', 'rot90', 'tri', from numpy.core.numeric import ( asanyarray, subtract, arange, zeros, greater_equal, multiply, ones, - asarray, where, dtype as np_dtype, less + asarray, where, dtype as np_dtype, less, int8, int16, int32, int64 ) +from numpy.core import iinfo +i1 = iinfo(int8) +i2 = iinfo(int16) +i4 = iinfo(int32) +def _min_int(low, high): + """ get small int that fits the range """ + if high <= i1.max and low >= i1.min: + return int8 + if high <= i2.max and low >= i2.min: + return int16 + if high <= i4.max and low >= i4.min: + return int32 + return int64 + def fliplr(m): """ @@ -396,7 +410,8 @@ def tri(N, M=None, k=0, dtype=float): if M is None: M = N - m = greater_equal.outer(arange(N), arange(-k, M-k)) + m = greater_equal.outer(arange(N, dtype=_min_int(0, N)), + arange(-k, M-k, dtype=_min_int(-k, M - k))) # Avoid making a copy if the requested type is already bool if np_dtype(dtype) != np_dtype(bool): |