summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2022-05-07 20:02:19 +0300
committerGitHub <noreply@github.com>2022-05-07 20:02:19 +0300
commit37cb0f8463345648add0e1e98fd146c966146981 (patch)
treed2771e0f43118ef11abc5858bd124f18ce67d6c8
parente3a9e1a4188a27359950029be4d82d87c4a618b2 (diff)
parentc2af30347d99d8bfcac23a7002b9229b829b9030 (diff)
downloadnumpy-37cb0f8463345648add0e1e98fd146c966146981.tar.gz
Merge pull request #21188 from seberg/scalar-math-rewrite
MAINT,ENH: Rewrite scalar math logic
-rw-r--r--benchmarks/benchmarks/bench_scalar.py34
-rw-r--r--doc/release/upcoming_changes/21188.performance.rst8
-rw-r--r--numpy/core/setup_common.py2
-rw-r--r--numpy/core/src/multiarray/abstractdtypes.c3
-rw-r--r--numpy/core/src/multiarray/array_coercion.c10
-rw-r--r--numpy/core/src/multiarray/array_coercion.h3
-rw-r--r--numpy/core/src/multiarray/scalarapi.c8
-rw-r--r--numpy/core/src/umath/scalarmath.c.src1254
-rw-r--r--numpy/core/tests/test_scalarmath.py179
9 files changed, 1010 insertions, 491 deletions
diff --git a/benchmarks/benchmarks/bench_scalar.py b/benchmarks/benchmarks/bench_scalar.py
index 219e48bed..650daa89d 100644
--- a/benchmarks/benchmarks/bench_scalar.py
+++ b/benchmarks/benchmarks/bench_scalar.py
@@ -10,6 +10,8 @@ class ScalarMath(Benchmark):
param_names = ["type"]
def setup(self, typename):
self.num = np.dtype(typename).type(2)
+ self.int32 = np.int32(2)
+ self.int32arr = np.array(2, dtype=np.int32)
def time_addition(self, typename):
n = self.num
@@ -31,3 +33,35 @@ class ScalarMath(Benchmark):
n = self.num
res = abs(abs(abs(abs(abs(abs(abs(abs(abs(abs(n))))))))))
+ def time_add_int32_other(self, typename):
+ # Some mixed cases are fast, some are slow, this documents these
+ # differences. (When writing, it was fast if the type of the result
+ # is one of the inputs.)
+ int32 = self.int32
+ other = self.num
+ int32 + other
+ int32 + other
+ int32 + other
+ int32 + other
+ int32 + other
+
+ def time_add_int32arr_and_other(self, typename):
+ # `arr + scalar` hits the normal ufunc (array) paths.
+ int32 = self.int32arr
+ other = self.num
+ int32 + other
+ int32 + other
+ int32 + other
+ int32 + other
+ int32 + other
+
+ def time_add_other_and_int32arr(self, typename):
+ # `scalar + arr` at some point hit scalar paths in some cases, and
+ # these paths could be optimized more easily
+ int32 = self.int32arr
+ other = self.num
+ other + int32
+ other + int32
+ other + int32
+ other + int32
+ other + int32
diff --git a/doc/release/upcoming_changes/21188.performance.rst b/doc/release/upcoming_changes/21188.performance.rst
new file mode 100644
index 000000000..ce497572a
--- /dev/null
+++ b/doc/release/upcoming_changes/21188.performance.rst
@@ -0,0 +1,8 @@
+Faster operations on NumPy scalars
+----------------------------------
+Many operations on NumPy scalars are now significantly faster, although
+rare operations (e.g. with 0-D arrays rather than scalars) may be slower
+in some cases.
+However, even with these improvements users who want the best performance
+for their scalars, may want to convert a known NumPy scalar into a Python
+one using `scalar.item()`.
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 1143872b1..44fa0a651 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -183,6 +183,8 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
'attribute_optimize_unroll_loops'),
('__attribute__((optimize("O3")))',
'attribute_optimize_opt_3'),
+ ('__attribute__((optimize("O2")))',
+ 'attribute_optimize_opt_2'),
('__attribute__((nonnull (1)))',
'attribute_nonnull'),
('__attribute__((target ("avx")))',
diff --git a/numpy/core/src/multiarray/abstractdtypes.c b/numpy/core/src/multiarray/abstractdtypes.c
index cc1d7fad8..b0345c46b 100644
--- a/numpy/core/src/multiarray/abstractdtypes.c
+++ b/numpy/core/src/multiarray/abstractdtypes.c
@@ -259,6 +259,7 @@ NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyIntAbstractDType = {{{
.tp_name = "numpy._IntegerAbstractDType",
},},
.flags = NPY_DT_ABSTRACT,
+ .type_num = -1,
.dt_slots = &pyintabstractdtype_slots,
};
@@ -276,6 +277,7 @@ NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyFloatAbstractDType = {{{
.tp_name = "numpy._FloatAbstractDType",
},},
.flags = NPY_DT_ABSTRACT,
+ .type_num = -1,
.dt_slots = &pyfloatabstractdtype_slots,
};
@@ -293,5 +295,6 @@ NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyComplexAbstractDType = {{{
.tp_name = "numpy._ComplexAbstractDType",
},},
.flags = NPY_DT_ABSTRACT,
+ .type_num = -1,
.dt_slots = &pycomplexabstractdtype_slots,
};
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index ff77d6883..562e4f008 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -230,6 +230,16 @@ npy_discover_dtype_from_pytype(PyTypeObject *pytype)
return (PyArray_DTypeMeta *)DType;
}
+/*
+ * Note: This function never fails, but will return `NULL` for unknown scalars
+ * and `None` for known array-likes (e.g. tuple, list, ndarray).
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_DiscoverDTypeFromScalarType(PyTypeObject *pytype)
+{
+ return (PyObject *)npy_discover_dtype_from_pytype(pytype);
+}
+
/**
* Find the correct DType class for the given python type. If flags is NULL
diff --git a/numpy/core/src/multiarray/array_coercion.h b/numpy/core/src/multiarray/array_coercion.h
index f2482cecc..63d543cf7 100644
--- a/numpy/core/src/multiarray/array_coercion.h
+++ b/numpy/core/src/multiarray/array_coercion.h
@@ -19,6 +19,9 @@ NPY_NO_EXPORT int
_PyArray_MapPyTypeToDType(
PyArray_DTypeMeta *DType, PyTypeObject *pytype, npy_bool userdef);
+NPY_NO_EXPORT PyObject *
+PyArray_DiscoverDTypeFromScalarType(PyTypeObject *pytype);
+
NPY_NO_EXPORT int
PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value);
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index 8ed91d26c..40f1da2c4 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -312,6 +312,14 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode)
NPY_NO_EXPORT PyObject *
PyArray_ScalarFromObject(PyObject *object)
{
+ if (DEPRECATE(
+ "PyArray_ScalarFromObject() is deprecated and scheduled for "
+ "removal. If you are using this (undocumented) function, "
+ "please notify the NumPy developers to look for solutions."
+ "(Deprecated in NumPy 1.23)") < 0) {
+ return NULL;
+ }
+
PyObject *ret = NULL;
if (PyArray_IsZeroDim(object)) {
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index 402e6b561..f273cdd91 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -26,6 +26,13 @@
#include "binop_override.h"
#include "npy_longdouble.h"
+#include "array_coercion.h"
+#include "common.h"
+#include "can_cast_table.h"
+
+/* TODO: Used for some functions, should possibly move these to npy_math.h */
+#include "loops.h"
+
/* Basic operations:
*
* BINARY:
@@ -45,23 +52,22 @@
* #name = byte, short, int, long, longlong#
* #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
*/
-static void
+static NPY_INLINE int
@name@_ctype_add(@type@ a, @type@ b, @type@ *out) {
*out = a + b;
if ((*out^a) >= 0 || (*out^b) >= 0) {
- return;
+ return 0;
}
- npy_set_floatstatus_overflow();
- return;
+ return NPY_FPE_OVERFLOW;
}
-static void
+
+static NPY_INLINE int
@name@_ctype_subtract(@type@ a, @type@ b, @type@ *out) {
*out = a - b;
if ((*out^a) >= 0 || (*out^~b) >= 0) {
- return;
+ return 0;
}
- npy_set_floatstatus_overflow();
- return;
+ return NPY_FPE_OVERFLOW;
}
/**end repeat**/
@@ -69,23 +75,22 @@ static void
* #name = ubyte, ushort, uint, ulong, ulonglong#
* #type = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
*/
-static void
+static NPY_INLINE int
@name@_ctype_add(@type@ a, @type@ b, @type@ *out) {
*out = a + b;
if (*out >= a && *out >= b) {
- return;
+ return 0;
}
- npy_set_floatstatus_overflow();
- return;
+ return NPY_FPE_OVERFLOW;
}
-static void
+
+static NPY_INLINE int
@name@_ctype_subtract(@type@ a, @type@ b, @type@ *out) {
*out = a - b;
if (a >= b) {
- return;
+ return 0;
}
- npy_set_floatstatus_overflow();
- return;
+ return NPY_FPE_OVERFLOW;
}
/**end repeat**/
@@ -108,18 +113,19 @@ static void
* #neg = (1,0)*4#
*/
#if NPY_SIZEOF_@SIZE@ > NPY_SIZEOF_@SIZENAME@
-static void
+static NPY_INLINE int
@name@_ctype_multiply(@type@ a, @type@ b, @type@ *out) {
@big@ temp;
temp = ((@big@) a) * ((@big@) b);
*out = (@type@) temp;
#if @neg@
- if (temp > NPY_MAX_@NAME@ || temp < NPY_MIN_@NAME@)
+ if (temp > NPY_MAX_@NAME@ || temp < NPY_MIN_@NAME@) {
#else
- if (temp > NPY_MAX_@NAME@)
+ if (temp > NPY_MAX_@NAME@) {
#endif
- npy_set_floatstatus_overflow();
- return;
+ return NPY_FPE_OVERFLOW;
+ }
+ return 0;
}
#endif
/**end repeat**/
@@ -133,12 +139,12 @@ static void
* #SIZE = INT*2, LONG*2, LONGLONG*2#
*/
#if NPY_SIZEOF_LONGLONG == NPY_SIZEOF_@SIZE@
-static void
+static NPY_INLINE int
@name@_ctype_multiply(@type@ a, @type@ b, @type@ *out) {
if (npy_mul_with_overflow_@name@(out, a, b)) {
- npy_set_floatstatus_overflow();
+ return NPY_FPE_OVERFLOW;
}
- return;
+ return 0;
}
#endif
/**end repeat**/
@@ -151,16 +157,16 @@ static void
* npy_long, npy_ulong, npy_longlong, npy_ulonglong#
* #neg = (1,0)*5#
*/
-static void
+static NPY_INLINE int
@name@_ctype_divide(@type@ a, @type@ b, @type@ *out) {
if (b == 0) {
- npy_set_floatstatus_divbyzero();
*out = 0;
+ return NPY_FPE_DIVIDEBYZERO;
}
#if @neg@
else if (b == -1 && a < 0 && a == -a) {
- npy_set_floatstatus_overflow();
*out = a / b;
+ return NPY_FPE_OVERFLOW;
}
#endif
else {
@@ -174,17 +180,20 @@ static void
#else
*out = a / b;
#endif
+ return 0;
}
}
#define @name@_ctype_floor_divide @name@_ctype_divide
-static void
+static NPY_INLINE int
@name@_ctype_remainder(@type@ a, @type@ b, @type@ *out) {
if (a == 0 || b == 0) {
- if (b == 0) npy_set_floatstatus_divbyzero();
*out = 0;
- return;
+ if (b == 0) {
+ return NPY_FPE_DIVIDEBYZERO;
+ }
+ return 0;
}
#if @neg@
else if ((a > 0) == (b > 0)) {
@@ -198,6 +207,7 @@ static void
#else
*out = a % b;
#endif
+ return 0;
}
/**end repeat**/
@@ -205,10 +215,15 @@ static void
*
* #name = byte, ubyte, short, ushort, int, uint, long,
* ulong, longlong, ulonglong#
- * #otyp = npy_float*4, npy_double*6#
*/
-#define @name@_ctype_true_divide(a, b, out) \
- *(out) = ((@otyp@) (a)) / ((@otyp@) (b));
+
+static NPY_INLINE int
+@name@_ctype_true_divide(npy_@name@ a, npy_@name@ b, npy_double *out)
+{
+ *out = (npy_double)a / (npy_double)b;
+ return 0;
+}
+
/**end repeat**/
/* b will always be positive in this call */
@@ -221,17 +236,17 @@ static void
* #upc = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
* LONG, ULONG, LONGLONG, ULONGLONG#
*/
-static void
+static NPY_INLINE int
@name@_ctype_power(@type@ a, @type@ b, @type@ *out) {
@type@ tmp;
if (b == 0) {
*out = 1;
- return;
+ return 0;
}
if (a == 1) {
*out = 1;
- return;
+ return 0;
}
tmp = b & 1 ? a : 1;
@@ -244,6 +259,7 @@ static void
b >>= 1;
}
*out = tmp;
+ return 0;
}
/**end repeat**/
@@ -261,12 +277,28 @@ static void
* #op = &, ^, |#
*/
-#define @name@_ctype_@oper@(arg1, arg2, out) *(out) = (arg1) @op@ (arg2)
+static NPY_INLINE int
+@name@_ctype_@oper@(@type@ arg1, @type@ arg2, @type@ *out)
+{
+ *out = arg1 @op@ arg2;
+ return 0;
+}
/**end repeat1**/
-#define @name@_ctype_lshift(arg1, arg2, out) *(out) = npy_lshift@suffix@(arg1, arg2)
-#define @name@_ctype_rshift(arg1, arg2, out) *(out) = npy_rshift@suffix@(arg1, arg2)
+static NPY_INLINE int
+@name@_ctype_lshift(@type@ arg1, @type@ arg2, @type@ *out)
+{
+ *out = npy_lshift@suffix@(arg1, arg2);
+ return 0;
+}
+
+static NPY_INLINE int
+@name@_ctype_rshift(@type@ arg1, @type@ arg2, @type@ *out)
+{
+ *out = npy_rshift@suffix@(arg1, arg2);
+ return 0;
+}
/**end repeat**/
@@ -275,135 +307,162 @@ static void
* #type = npy_float, npy_double, npy_longdouble#
* #c = f, , l#
*/
-#define @name@_ctype_add(a, b, outp) *(outp) = (a) + (b)
-#define @name@_ctype_subtract(a, b, outp) *(outp) = (a) - (b)
-#define @name@_ctype_multiply(a, b, outp) *(outp) = (a) * (b)
-#define @name@_ctype_divide(a, b, outp) *(outp) = (a) / (b)
+
+/**begin repeat1
+ * #OP = +, -, *, /#
+ * #oper = add, subtract, multiply, divide#
+ */
+
+static NPY_INLINE int
+@name@_ctype_@oper@(@type@ a, @type@ b, @type@ *out)
+{
+ *out = a @OP@ b;
+ return 0;
+}
+
+/**end repeat1**/
+
#define @name@_ctype_true_divide @name@_ctype_divide
-static void
+static NPY_INLINE int
@name@_ctype_floor_divide(@type@ a, @type@ b, @type@ *out) {
*out = npy_floor_divide@c@(a, b);
+ return 0;
}
-static void
+static NPY_INLINE int
@name@_ctype_remainder(@type@ a, @type@ b, @type@ *out) {
*out = npy_remainder@c@(a, b);
+ return 0;
}
-static void
+static NPY_INLINE int
@name@_ctype_divmod(@type@ a, @type@ b, @type@ *out1, @type@ *out2) {
*out1 = npy_divmod@c@(a, b, out2);
+ return 0;
}
/**end repeat**/
-#define half_ctype_add(a, b, outp) *(outp) = \
- npy_float_to_half(npy_half_to_float(a) + npy_half_to_float(b))
-#define half_ctype_subtract(a, b, outp) *(outp) = \
- npy_float_to_half(npy_half_to_float(a) - npy_half_to_float(b))
-#define half_ctype_multiply(a, b, outp) *(outp) = \
- npy_float_to_half(npy_half_to_float(a) * npy_half_to_float(b))
-#define half_ctype_divide(a, b, outp) *(outp) = \
- npy_float_to_half(npy_half_to_float(a) / npy_half_to_float(b))
+/**begin repeat
+ * #OP = +, -, *, /#
+ * #oper = add, subtract, multiply, divide#
+ */
+
+static NPY_INLINE int
+half_ctype_@oper@(npy_half a, npy_half b, npy_half *out)
+{
+ float res = npy_half_to_float(a) @OP@ npy_half_to_float(b);
+ *out = npy_float_to_half(res);
+ return 0;
+}
+
+/**end repeat**/
#define half_ctype_true_divide half_ctype_divide
-static void
-half_ctype_floor_divide(npy_half a, npy_half b, npy_half *out) {
+static NPY_INLINE int
+half_ctype_floor_divide(npy_half a, npy_half b, npy_half *out)
+{
npy_half mod;
if (!b) {
- *out = a / b;
- } else {
+ float res = npy_half_to_float(a) / npy_half_to_float(b);
+ *out = npy_float_to_half(res);
+ }
+ else {
*out = npy_half_divmod(a, b, &mod);
}
+ return 0;
}
-static void
-half_ctype_remainder(npy_half a, npy_half b, npy_half *out) {
+static NPY_INLINE int
+half_ctype_remainder(npy_half a, npy_half b, npy_half *out)
+{
npy_half_divmod(a, b, out);
+ return 0;
}
-static void
-half_ctype_divmod(npy_half a, npy_half b, npy_half *out1, npy_half *out2) {
+static NPY_INLINE int
+half_ctype_divmod(npy_half a, npy_half b, npy_half *out1, npy_half *out2)
+{
*out1 = npy_half_divmod(a, b, out2);
+ return 0;
}
/**begin repeat
* #name = cfloat, cdouble, clongdouble#
+ * #type = npy_cfloat, npy_cdouble, npy_clongdouble#
+ * #TYPE = CFLOAT, CDOUBLE, CLONGDOUBLE#
* #rname = float, double, longdouble#
* #rtype = npy_float, npy_double, npy_longdouble#
* #c = f,,l#
*/
-#define @name@_ctype_add(a, b, outp) do{ \
- (outp)->real = (a).real + (b).real; \
- (outp)->imag = (a).imag + (b).imag; \
- } while(0)
-#define @name@_ctype_subtract(a, b, outp) do{ \
- (outp)->real = (a).real - (b).real; \
- (outp)->imag = (a).imag - (b).imag; \
- } while(0)
-#define @name@_ctype_multiply(a, b, outp) do{ \
- (outp)->real = (a).real * (b).real - (a).imag * (b).imag; \
- (outp)->imag = (a).real * (b).imag + (a).imag * (b).real; \
- } while(0)
-/* Algorithm identical to that in loops.c.src, for consistency */
-#define @name@_ctype_divide(a, b, outp) do{ \
- @rtype@ in1r = (a).real; \
- @rtype@ in1i = (a).imag; \
- @rtype@ in2r = (b).real; \
- @rtype@ in2i = (b).imag; \
- @rtype@ in2r_abs = npy_fabs@c@(in2r); \
- @rtype@ in2i_abs = npy_fabs@c@(in2i); \
- if (in2r_abs >= in2i_abs) { \
- if (in2r_abs == 0 && in2i_abs == 0) { \
- /* divide by zero should yield a complex inf or nan */ \
- (outp)->real = in1r/in2r_abs; \
- (outp)->imag = in1i/in2i_abs; \
- } \
- else { \
- @rtype@ rat = in2i/in2r; \
- @rtype@ scl = 1.0@c@/(in2r + in2i*rat); \
- (outp)->real = (in1r + in1i*rat)*scl; \
- (outp)->imag = (in1i - in1r*rat)*scl; \
- } \
- } \
- else { \
- @rtype@ rat = in2r/in2i; \
- @rtype@ scl = 1.0@c@/(in2i + in2r*rat); \
- (outp)->real = (in1r*rat + in1i)*scl; \
- (outp)->imag = (in1i*rat - in1r)*scl; \
- } \
- } while(0)
+static NPY_INLINE int
+@name@_ctype_add(@type@ a, @type@ b, @type@ *out)
+{
+ out->real = a.real + b.real;
+ out->imag = a.imag + b.imag;
+ return 0;
+}
+
+static NPY_INLINE int
+@name@_ctype_subtract(@type@ a, @type@ b, @type@ *out)
+{
+ out->real = a.real - b.real;
+ out->imag = a.imag - b.imag;
+ return 0;
+}
+
+
+/*
+ * TODO: Mark as to work around FPEs not being issues on clang 12.
+ * This should be removed when possible.
+ */
+static NPY_INLINE int
+@name@_ctype_multiply( @type@ a, @type@ b, @type@ *out)
+{
+ out->real = a.real * b.real - a.imag * b.imag;
+ out->imag = a.real * b.imag + a.imag * b.real;
+ return 0;
+}
+
+/* Use the ufunc loop directly to avoid duplicating the complicated logic */
+static NPY_INLINE int
+@name@_ctype_divide(@type@ a, @type@ b, @type@ *out)
+{
+ char *args[3] = {(char *)&a, (char *)&b, (char *)out};
+ npy_intp steps[3];
+ npy_intp size = 1;
+ @TYPE@_divide(args, &size, steps, NULL);
+ return 0;
+}
#define @name@_ctype_true_divide @name@_ctype_divide
-#define @name@_ctype_floor_divide(a, b, outp) do { \
- @rname@_ctype_floor_divide( \
- ((a).real*(b).real + (a).imag*(b).imag), \
- ((b).real*(b).real + (b).imag*(b).imag), \
- &((outp)->real)); \
- (outp)->imag = 0; \
- } while(0)
/**end repeat**/
/**begin repeat
* #name = byte, ubyte, short, ushort, int, uint, long, ulong,
- * longlong, ulonglong, cfloat, cdouble, clongdouble#
+ * longlong, ulonglong#
*/
-#define @name@_ctype_divmod(a, b, out, out2) { \
- @name@_ctype_floor_divide(a, b, out); \
- @name@_ctype_remainder(a, b, out2); \
- }
+
+static NPY_INLINE int
+@name@_ctype_divmod(npy_@name@ a, npy_@name@ b, npy_@name@ *out, npy_@name@ *out2)
+{
+ int res = @name@_ctype_floor_divide(a, b, out);
+ res |= @name@_ctype_remainder(a, b, out2);
+ return res;
+}
+
/**end repeat**/
@@ -413,20 +472,22 @@ half_ctype_divmod(npy_half a, npy_half b, npy_half *out1, npy_half *out2) {
* #c = f,,l#
*/
-static void
+static NPY_INLINE int
@name@_ctype_power(@type@ a, @type@ b, @type@ *out)
{
*out = npy_pow@c@(a, b);
+ return 0;
}
/**end repeat**/
-static void
+static NPY_INLINE int
half_ctype_power(npy_half a, npy_half b, npy_half *out)
{
const npy_float af = npy_half_to_float(a);
const npy_float bf = npy_half_to_float(b);
const npy_float outf = npy_powf(af,bf);
*out = npy_float_to_half(outf);
+ return 0;
}
/**begin repeat
@@ -438,20 +499,23 @@ half_ctype_power(npy_half a, npy_half b, npy_half *out)
* npy_float, npy_double, npy_longdouble#
* #uns = (0,1)*5,0*3#
*/
-static void
+static NPY_INLINE int
@name@_ctype_negative(@type@ a, @type@ *out)
{
+ *out = -a;
#if @uns@
- npy_set_floatstatus_overflow();
+ return NPY_FPE_OVERFLOW;
+#else
+ return 0;
#endif
- *out = -a;
}
/**end repeat**/
-static void
+static NPY_INLINE int
half_ctype_negative(npy_half a, npy_half *out)
{
*out = a^0x8000u;
+ return 0;
}
@@ -459,11 +523,12 @@ half_ctype_negative(npy_half a, npy_half *out)
* #name = cfloat, cdouble, clongdouble#
* #type = npy_cfloat, npy_cdouble, npy_clongdouble#
*/
-static void
+static NPY_INLINE int
@name@_ctype_negative(@type@ a, @type@ *out)
{
out->real = -a.real;
out->imag = -a.imag;
+ return 0;
}
/**end repeat**/
@@ -475,10 +540,11 @@ static void
* npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_half, npy_float, npy_double, npy_longdouble#
*/
-static void
+static NPY_INLINE int
@name@_ctype_positive(@type@ a, @type@ *out)
{
*out = a;
+ return 0;
}
/**end repeat**/
@@ -487,17 +553,19 @@ static void
* #type = npy_cfloat, npy_cdouble, npy_clongdouble#
* #c = f,,l#
*/
-static void
+static NPY_INLINE int
@name@_ctype_positive(@type@ a, @type@ *out)
{
out->real = a.real;
out->imag = a.imag;
+ return 0;
}
-static void
+static NPY_INLINE int
@name@_ctype_power(@type@ a, @type@ b, @type@ *out)
{
*out = npy_cpow@c@(a, b);
+ return 0;
}
/**end repeat**/
@@ -515,10 +583,11 @@ static void
* #name = byte, short, int, long, longlong#
* #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
*/
-static void
+static NPY_INLINE int
@name@_ctype_absolute(@type@ a, @type@ *out)
{
*out = (a < 0 ? -a : a);
+ return 0;
}
/**end repeat**/
@@ -527,17 +596,19 @@ static void
* #type = npy_float, npy_double, npy_longdouble#
* #c = f,,l#
*/
-static void
+static NPY_INLINE int
@name@_ctype_absolute(@type@ a, @type@ *out)
{
*out = npy_fabs@c@(a);
+ return 0;
}
/**end repeat**/
-static void
+static NPY_INLINE int
half_ctype_absolute(npy_half a, npy_half *out)
{
*out = a&0x7fffu;
+ return 0;
}
/**begin repeat
@@ -546,10 +617,11 @@ half_ctype_absolute(npy_half a, npy_half *out)
* #rtype = npy_float, npy_double, npy_longdouble#
* #c = f,,l#
*/
-static void
+static NPY_INLINE int
@name@_ctype_absolute(@type@ a, @rtype@ *out)
{
*out = npy_cabs@c@(a);
+ return 0;
}
/**end repeat**/
@@ -558,196 +630,400 @@ static void
* ulong, longlong, ulonglong#
*/
-#define @name@_ctype_invert(a, out) *(out) = ~a;
+static NPY_INLINE int
+@name@_ctype_invert(npy_@name@ a, npy_@name@ *out)
+{
+ *out = ~a;
+ return 0;
+}
/**end repeat**/
/*** END OF BASIC CODE **/
-/* The general strategy for commutative binary operators is to
+/*
+ * How binary operators work
+ * -------------------------
+ *
+ * All binary (numeric) operators use the larger of the two types, with the
+ * exception of unsigned int and signed int mixed cases which must promote
+ * to a larger type.
+ *
+ * The strategy employed for all binary operation is that we coerce the other
+ * scalar if it is safe to do. E.g. `float64 + float32` the `float64` can
+ * convert `float32` and do the operation as `float64 + float64`.
+ * OTOH, for `float32 + float64` it is safe, and we should defer to `float64`.
+ *
+ * So we have multiple possible paths:
+ * - The other scalar is a subclass. In principle *both* inputs could be
+ * different subclasses. In this case it would make sense to defer, but
+ * Python's `int` does not try this as well, so we do not here:
+ *
+ * class A(int): pass
+ * class B(int):
+ * def __add__(self, other): return "b"
+ * __radd__ = __add__
+ *
+ * A(1) + B(1) # return 2
+ * B(1) + A(1) # return "b"
+ *
+ * - The other scalar can be converted: All is good, we do the operation
+ * - The other scalar cannot be converted, there are two possibilities:
+ * - The reverse should work, so we return NotImplemented to defer.
+ * (If self is a subclass, this will end up in the "unknown" path.)
+ * - Neither works (e.g. `uint8 + int8`): We currently use the array path.
+ * - The other object is a unknown. It could be either a scalar, an array,
+ * or an array-like (including a list!). Because NumPy scalars pretend to be
+ * arrays we fall into the array fallback path here _normally_ (through
+ * the generic scalar path).
+ * First we check if we should defer, though.
+ *
+ * The last possibility is awkward and leads to very confusing situations.
+ * The problem is that usually we should defer (return NotImplemented)
+ * in that path.
+ * If the other object is a NumPy array (or array-like) it will know what to
+ * do. If NumPy knows that it is a scalar (not generic `object`), then it
+ * would make sense to try and use the "array path" (i.e. deal with it
+ * using the ufunc machinery).
+ *
+ * But this overlooks two things that currently work:
+ *
+ * 1. `np.float64(3) * [1, 2, 3]` happily returns an array result.
+ * 2. `np.int32(3) * decimal.Decimal(3)` works! (see below)
+ *
+ * The first must work, because scalars pretend to be arrays. Which means
+ * they inherit the greedy "convert the other object to an array" logic.
+ * This may be a questionable choice, but is fine.
+ * (As of now, it is not negotiable, since NumPy often converts 0-D arrays
+ * to scalars.)
+ *
+ * The second one is more confusing. This works also by using the ufunc
+ * machinery (array path), but it works because:
+ *
+ * np.add(np.int32(3), decimal.Decimal(3))
+ *
+ * Will convert the `int32` to an int32 array, and the decimal to an object
+ * array. It then *casts* the `int32` array to an object array.
+ * The casting step CONVERTS the integer to a Python integer. The ufunc object
+ * loop will then call back into Python scalar logic.
*
- * 1) Convert the types to the common type if both are scalars (0 return)
- * 2) If both are not scalars use ufunc machinery (-2 return)
- * 3) If both are scalars but cannot be cast to the right type
- * return NotImplemented (-1 return)
+ * The above would be recursive, if it was not for the conversion of the int32
+ * to a Python integer!
+ * This leads us to the EXCEEDINGLY IMPORTANT special case:
*
- * 4) Perform the function on the C-type.
- * 5) If an error condition occurred, check to see
- * what the current error-handling is and handle the error.
+ * WARNING: longdouble and clongdouble do NOT convert to a Python scalar
+ * when cast to object. Thus they MUST NEVER take the array-path.
+ * However, they STILL should defer at least for
+ * `np.longdouble(3) + array`.
*
- * 6) Construct and return the output scalar.
+ *
+ * As a general note, in the above we defer exactly when we know that deferring
+ * will work. `longdouble` uses the "simple" logic of generally deferring
+ * though, because it would otherwise easily run into an infinite recursion.
+ *
+ *
+ * The future?!
+ * ------------
+ *
+ * This is very tricky and it would be nice to formalize away that "recursive"
+ * path we currently use. I (seberg) have currently no great idea on this,
+ * this is more brainstorming!
+ *
+ * If both are scalars (known to NumPy), they have a DType and we may be able
+ * to do the ufunc promotion to make sure there is no risk of recursion.
+ *
+ * In principle always deferring would probably be clean. But we likely cannot
+ * do that? There is also an issue that it is nice that we allow adding a
+ * DType for an existing Python scalar (which will not know about NumPy
+ * scalars).
+ * The DType/ufunc machinery teaches NumPy how arrays will work with that
+ * Python scalar, but the DType may need to help us decide whether we should
+ * defer (return NotImplemented) or try using the ufunc machinery (or a
+ * simplified ufunc-like machinery limited to scalars).
*/
+
+/*
+ * Enum used to describe the space of possibilities when converting the second
+ * argument to a binary operation.
+ */
+typedef enum {
+ /* An error occurred (should not really happen/be possible) */
+ CONVERSION_ERROR = -1,
+ /* A known NumPy scalar, but of higher precision: we defer */
+ DEFER_TO_OTHER_KNOWN_SCALAR,
+ /* Conversion was successful (known scalar of less precision) */
+ CONVERSION_SUCCESS,
+ /*
+ * Other object is an unkown scalar or array-like, we (typically) use
+ * the generic path, which normally ends up in the ufunc machinery.
+ */
+ OTHER_IS_UNKNOWN_OBJECT,
+ /*
+ * Promotion necessary
+ */
+ PROMOTION_REQUIRED,
+ /*
+ * The other object may be a subclass, conversion is successful. We do
+ * not special case this as Python's `int` does not either
+ */
+ OTHER_IS_SUBCLASS,
+} conversion_result;
+
/**begin repeat
* #name = byte, ubyte, short, ushort, int, uint,
* long, ulong, longlong, ulonglong,
- * half, float, longdouble,
+ * half, float, double, longdouble,
* cfloat, cdouble, clongdouble#
- * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
- * npy_long, npy_ulong, npy_longlong, npy_ulonglong,
- * npy_half, npy_float, npy_longdouble,
- * npy_cfloat, npy_cdouble, npy_clongdouble#
* #Name = Byte, UByte, Short, UShort, Int, UInt,
* Long, ULong, LongLong, ULongLong,
- * Half, Float, LongDouble,
+ * Half, Float, Double, LongDouble,
* CFloat, CDouble, CLongDouble#
- * #TYPE = NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT,
- * NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG,
- * NPY_HALF, NPY_FLOAT, NPY_LONGDOUBLE,
- * NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE#
+ * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ * LONG, ULONG, LONGLONG, ULONGLONG,
+ * HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ * CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
+ * npy_long, npy_ulong, npy_longlong, npy_ulonglong,
+ * npy_half, npy_float, npy_double, npy_longdouble,
+ * npy_cfloat, npy_cdouble, npy_clongdouble#
*/
-static int
-_@name@_convert_to_ctype(PyObject *a, @type@ *arg1)
-{
- PyObject *temp;
-
- if (PyArray_IsScalar(a, @Name@)) {
- *arg1 = PyArrayScalar_VAL(a, @Name@);
- return 0;
- }
- else if (PyArray_IsScalar(a, Generic)) {
- PyArray_Descr *descr1;
+#define IS_@TYPE@ 1
- if (!PyArray_IsScalar(a, Number)) {
- return -1;
- }
- descr1 = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(a));
- if (PyArray_CanCastSafely(descr1->type_num, @TYPE@)) {
- PyArray_CastScalarDirect(a, descr1, arg1, @TYPE@);
- Py_DECREF(descr1);
- return 0;
- }
- else {
- Py_DECREF(descr1);
- return -1;
- }
- }
- else if (PyArray_GetPriority(a, NPY_PRIORITY) > NPY_PRIORITY) {
- return -2;
- }
- else if ((temp = PyArray_ScalarFromObject(a)) != NULL) {
- int retval = _@name@_convert_to_ctype(temp, arg1);
+#define IS_SAFE(FROM, TO) _npy_can_cast_safely_table[FROM][TO]
- Py_DECREF(temp);
- return retval;
- }
- return -2;
-}
+/*
+ * TODO: This whole thing is awkward, and we should create a helper header to
+ * define inline functions that convert single elements for all numeric
+ * types. That could then also be used to define all cast loops.
+ * (Even if that may get more complex for SIMD at some point.)
+ * For now, half casts could be optimized because of that.
+ */
-/**end repeat**/
+#if defined(IS_HALF)
+ #define CONVERT_TO_RESULT(value) \
+ *result = npy_float_to_half((float)(value))
+#elif defined(IS_CFLOAT) || defined(IS_CDOUBLE) || defined(IS_CLONGDOUBLE)
+ #define CONVERT_TO_RESULT(value) \
+ result->real = value; \
+ result->imag = 0
+#else
+ #define CONVERT_TO_RESULT(value) *result = value
+#endif
-/* Same as above but added exact checks against known python types for speed */
+#define GET_VALUE_OR_DEFER(OTHER, Other, value) \
+ case NPY_##OTHER: \
+ if (IS_SAFE(NPY_##OTHER, NPY_@TYPE@)) { \
+ assert(Py_TYPE(value) == &Py##Other##ArrType_Type); \
+ CONVERT_TO_RESULT(PyArrayScalar_VAL(value, Other)); \
+ ret = CONVERSION_SUCCESS; \
+ } \
+ else if (IS_SAFE(NPY_@TYPE@, NPY_##OTHER)) { \
+ /*
+ * If self can cast safely to other, this is clear:
+ * we should definitely defer.
+ */ \
+ ret = DEFER_TO_OTHER_KNOWN_SCALAR; \
+ } \
+ else { \
+ /* Otherwise, we must promote */ \
+ ret = PROMOTION_REQUIRED; \
+ } \
+ break;
-/**begin repeat
- * #name = double#
- * #type = npy_double#
- * #Name = Double#
- * #TYPE = NPY_DOUBLE#
- * #PYCHECKEXACT = PyFloat_CheckExact#
- * #PYEXTRACTCTYPE = PyFloat_AS_DOUBLE#
+/*
+ * Complex to complex (and rejecting complex to real) is a bit different:
*/
-static int
-_@name@_convert_to_ctype(PyObject *a, @type@ *arg1)
-{
- PyObject *temp;
-
- if (@PYCHECKEXACT@(a)){
- *arg1 = @PYEXTRACTCTYPE@(a);
- return 0;
- }
-
- if (PyArray_IsScalar(a, @Name@)) {
- *arg1 = PyArrayScalar_VAL(a, @Name@);
- return 0;
- }
- else if (PyArray_IsScalar(a, Generic)) {
- PyArray_Descr *descr1;
+#if defined(IS_CFLOAT) || defined(IS_CDOUBLE) || defined(IS_CLONGDOUBLE)
+
+#define GET_CVALUE_OR_DEFER(OTHER, Other, value) \
+ case NPY_##OTHER: \
+ if (IS_SAFE(NPY_##OTHER, NPY_@TYPE@)) { \
+ assert(Py_TYPE(value) == &Py##Other##ArrType_Type); \
+ result->real = PyArrayScalar_VAL(value, Other).real; \
+ result->imag = PyArrayScalar_VAL(value, Other).imag; \
+ ret = 1; \
+ } \
+ else if (IS_SAFE(NPY_@TYPE@, NPY_##OTHER)) { \
+ ret = DEFER_TO_OTHER_KNOWN_SCALAR; \
+ } \
+ else { \
+ ret = PROMOTION_REQUIRED; \
+ } \
+ break;
- if (!PyArray_IsScalar(a, Number)) {
- return -1;
- }
- descr1 = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(a));
- if (PyArray_CanCastSafely(descr1->type_num, @TYPE@)) {
- PyArray_CastScalarDirect(a, descr1, arg1, @TYPE@);
- Py_DECREF(descr1);
- return 0;
- }
- else {
- Py_DECREF(descr1);
- return -1;
- }
- }
- else if (PyArray_GetPriority(a, NPY_PRIORITY) > NPY_PRIORITY) {
- return -2;
- }
- else if ((temp = PyArray_ScalarFromObject(a)) != NULL) {
- int retval = _@name@_convert_to_ctype(temp, arg1);
+#else
- Py_DECREF(temp);
- return retval;
- }
- return -2;
-}
+/* Getting a complex value to real is never safe: */
+#define GET_CVALUE_OR_DEFER(OTHER, Other, value) \
+ case NPY_##OTHER: \
+ if (IS_SAFE(NPY_@TYPE@, NPY_##OTHER)) { \
+ ret = DEFER_TO_OTHER_KNOWN_SCALAR; \
+ } \
+ else { \
+ ret = PROMOTION_REQUIRED; \
+ } \
+ break;
-/**end repeat**/
+#endif
-/**begin repeat
- * #name = byte, ubyte, short, ushort, int, uint,
- * long, ulong, longlong, ulonglong,
- * half, float, double, cfloat, cdouble#
- * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
- * npy_long, npy_ulong, npy_longlong, npy_ulonglong,
- * npy_half, npy_float, npy_double, npy_cfloat, npy_cdouble#
+/**
+ * Convert the value to the own type and and store the result.
+ *
+ * @param value The value to convert (if compatible)
+ * @param result The result value (output)
+ * @result The result value indicating what we did with `value` or what type
+ * of object it is (see `conversion_result`).
*/
-static int
-_@name@_convert2_to_ctypes(PyObject *a, @type@ *arg1,
- PyObject *b, @type@ *arg2)
+static NPY_INLINE conversion_result
+convert_to_@name@(PyObject *value, @type@ *result)
{
- int ret;
- ret = _@name@_convert_to_ctype(a, arg1);
- if (ret < 0) {
- return ret;
+ if (Py_TYPE(value) == &Py@Name@ArrType_Type) {
+ *result = PyArrayScalar_VAL(value, @Name@);
+ return CONVERSION_SUCCESS;
}
- ret = _@name@_convert_to_ctype(b, arg2);
- if (ret < 0) {
- return ret;
+ /* Optimize the identical scalar specifically. */
+ if (PyArray_IsScalar(value, @Name@)) {
+ *result = PyArrayScalar_VAL(value, @Name@);
+ /*
+ * In principle special, assyemetric, handling could be possible for
+ * subclasses. But in practice even we do not bother later.
+ */
+ return OTHER_IS_SUBCLASS;
}
- return 0;
-}
-/**end repeat**/
-/**begin repeat
- * #name = longdouble, clongdouble#
- * #type = npy_longdouble, npy_clongdouble#
- */
+ /*
+ * Then we check for the basic Python types float, int, and complex.
+ * (this is a bit tedious to do right for complex).
+ */
+ if (PyBool_Check(value)) {
+ CONVERT_TO_RESULT(value == Py_True);
+ return CONVERSION_SUCCESS;
+ }
-static int
-_@name@_convert2_to_ctypes(PyObject *a, @type@ *arg1,
- PyObject *b, @type@ *arg2)
-{
- int ret;
- ret = _@name@_convert_to_ctype(a, arg1);
- if (ret == -2) {
- ret = -3;
+ if (IS_SAFE(NPY_DOUBLE, NPY_@TYPE@) && PyFloat_CheckExact(value)) {
+ CONVERT_TO_RESULT(PyFloat_AS_DOUBLE(value));
+ return CONVERSION_SUCCESS;
}
- if (ret < 0) {
- return ret;
+
+ if (IS_SAFE(NPY_LONG, NPY_@TYPE@) && PyLong_CheckExact(value)) {
+ int overflow;
+ long val = PyLong_AsLongAndOverflow(value, &overflow);
+ if (overflow) {
+ return OTHER_IS_UNKNOWN_OBJECT; /* handle as if arbitrary object */
+ }
+ if (error_converting(val)) {
+ return CONVERSION_ERROR; /* should not be possible */
+ }
+ CONVERT_TO_RESULT(val);
+ return CONVERSION_SUCCESS;
}
- ret = _@name@_convert_to_ctype(b, arg2);
- if (ret == -2) {
- ret = -3;
+
+#if defined(IS_CFLOAT) || defined(IS_CDOUBLE) || defined(IS_CLONGDOUBLE)
+ if (IS_SAFE(NPY_CDOUBLE, NPY_@TYPE@) && PyComplex_CheckExact(value)) {
+ Py_complex val = PyComplex_AsCComplex(value);
+ if (error_converting(val.real)) {
+ return CONVERSION_ERROR; /* should not be possible */
+ }
+ result->real = val.real;
+ result->imag = val.imag;
+ return CONVERSION_SUCCESS;
}
- if (ret < 0) {
- return ret;
+#endif /* defined(IS_CFLOAT) || ... */
+
+ PyObject *dtype = PyArray_DiscoverDTypeFromScalarType(Py_TYPE(value));
+ if (dtype == Py_None) {
+ Py_DECREF(dtype);
+ /* Signal that this is an array or array-like: Defer to array logic */
+ return OTHER_IS_UNKNOWN_OBJECT;
}
- return 0;
+ else if (dtype == NULL) {
+ /*
+ * The input is an unknown python object. This should probably defer
+ * but only does so for float128.
+ * For all other cases, we defer to the array logic. If the object
+ * is indeed not an array-like, this will end up converting the NumPy
+ * scalar to a Python scalar and then try again.
+ * The logic is that the ufunc casts the input to object, which does
+ * the conversion.
+ */
+ return OTHER_IS_UNKNOWN_OBJECT;
+ }
+ /*
+ * Otherwise, we have a clear NumPy scalar, find if it is a compatible
+ * builtin scalar.
+ * Each `GET_VALUE_OR_DEFER` represents a case clause for its type number,
+ * extracting the value if it is safe and otherwise deferring.
+ * (Safety is known at compile time, so the switch statement should be
+ * simplified by the compiler accordingly.)
+ * If we have a scalar that is not listed or not safe, we defer to it.
+ *
+ * We should probably defer more aggressively, but that is too big a change,
+ * since it would disable `np.float64(1.) * [1, 2, 3, 4]`.
+ */
+ int ret; /* set by the GET_VALUE_OR_DEFER macro */
+ switch (((PyArray_DTypeMeta *)dtype)->type_num) {
+ GET_VALUE_OR_DEFER(BOOL, Bool, value);
+ /* UInts */
+ GET_VALUE_OR_DEFER(UBYTE, UByte, value);
+ GET_VALUE_OR_DEFER(USHORT, UShort, value);
+ GET_VALUE_OR_DEFER(UINT, UInt, value);
+ GET_VALUE_OR_DEFER(ULONG, ULong, value);
+ GET_VALUE_OR_DEFER(ULONGLONG, ULongLong, value);
+ /* Ints */
+ GET_VALUE_OR_DEFER(BYTE, Byte, value);
+ GET_VALUE_OR_DEFER(SHORT, Short, value);
+ GET_VALUE_OR_DEFER(INT, Int, value);
+ GET_VALUE_OR_DEFER(LONG, Long, value);
+ GET_VALUE_OR_DEFER(LONGLONG, LongLong, value);
+ /* Floats */
+ case NPY_HALF:
+ if (IS_SAFE(NPY_HALF, NPY_@TYPE@)) {
+ assert(Py_TYPE(value) == &PyHalfArrType_Type);
+ CONVERT_TO_RESULT(npy_half_to_float(PyArrayScalar_VAL(value, Half)));
+ ret = 1;
+ }
+ else if (IS_SAFE(NPY_@TYPE@, NPY_HALF)) {
+ ret = DEFER_TO_OTHER_KNOWN_SCALAR;
+ }
+ else {
+ ret = PROMOTION_REQUIRED;
+ }
+ break;
+ GET_VALUE_OR_DEFER(FLOAT, Float, value);
+ GET_VALUE_OR_DEFER(DOUBLE, Double, value);
+ GET_VALUE_OR_DEFER(LONGDOUBLE, LongDouble, value);
+ /* Complex: We should still defer, but the code won't work... */
+ GET_CVALUE_OR_DEFER(CFLOAT, CFloat, value);
+ GET_CVALUE_OR_DEFER(CDOUBLE, CDouble, value);
+ GET_CVALUE_OR_DEFER(CLONGDOUBLE, CLongDouble, value);
+ default:
+ /*
+ * If there is no match, this is an unknown scalar object. It
+ * would make sense to defer generously here, but it should also
+ * always be safe to use the array path.
+ * The issue is, that the other scalar may or may not be designed
+ * to deal with NumPy scalars. Without knowing that, we cannot
+ * defer (which would be much faster potentially).
+ * TODO: We could add a DType flag to allow opting in to deferring!
+ */
+ ret = OTHER_IS_UNKNOWN_OBJECT;
+ }
+ Py_DECREF(dtype);
+ return ret;
}
+#undef IS_SAFE
+#undef CONVERT_TO_RESULT
+#undef GET_VALUE_OR_DEFER
+#undef GET_CVALUE_OR_DEFER
+#undef IS_@TYPE@
+
/**end repeat**/
@@ -756,102 +1032,145 @@ _@name@_convert2_to_ctypes(PyObject *a, @type@ *arg1,
* #name = (byte, ubyte, short, ushort, int, uint,
* long, ulong, longlong, ulonglong)*12,
* (half, float, double, longdouble,
- * cfloat, cdouble, clongdouble)*5,
- * (half, float, double, longdouble)*2#
+ * cfloat, cdouble, clongdouble)*4,
+ * (half, float, double, longdouble)*3#
* #Name = (Byte, UByte, Short, UShort, Int, UInt,
* Long, ULong,LongLong,ULongLong)*12,
* (Half, Float, Double, LongDouble,
- * CFloat, CDouble, CLongDouble)*5,
- * (Half, Float, Double, LongDouble)*2#
+ * CFloat, CDouble, CLongDouble)*4,
+ * (Half, Float, Double, LongDouble)*3#
* #type = (npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong)*12,
* (npy_half, npy_float, npy_double, npy_longdouble,
- * npy_cfloat, npy_cdouble, npy_clongdouble)*5,
- * (npy_half, npy_float, npy_double, npy_longdouble)*2#
+ * npy_cfloat, npy_cdouble, npy_clongdouble)*4,
+ * (npy_half, npy_float, npy_double, npy_longdouble)*3#
*
* #oper = add*10, subtract*10, multiply*10, remainder*10,
* divmod*10, floor_divide*10, lshift*10, rshift*10, and*10,
* or*10, xor*10, true_divide*10,
- * add*7, subtract*7, multiply*7, floor_divide*7, true_divide*7,
- * divmod*4, remainder*4#
+ * add*7, subtract*7, multiply*7, true_divide*7,
+ * floor_divide*4, divmod*4, remainder*4#
*
- * #fperr = 1*60,0*50,1*10,
- * 1*35,
- * 1*8#
+ * #fperr = 0*110, 1*10,
+ * 1*28, 1*12#
* #twoout = 0*40,1*10,0*70,
- * 0*35,
- * 1*4,0*4#
+ * 0*28,
+ * 0*4,1*4,0*4#
* #otype = (npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong)*11,
- * npy_float*4, npy_double*6,
+ * npy_double*10,
* (npy_half, npy_float, npy_double, npy_longdouble,
- * npy_cfloat, npy_cdouble, npy_clongdouble)*5,
- * (npy_half, npy_float, npy_double, npy_longdouble)*2#
+ * npy_cfloat, npy_cdouble, npy_clongdouble)*4,
+ * (npy_half, npy_float, npy_double, npy_longdouble)*3#
* #OName = (Byte, UByte, Short, UShort, Int, UInt,
* Long, ULong, LongLong, ULongLong)*11,
- * Float*4, Double*6,
+ * Double*10,
* (Half, Float, Double, LongDouble,
- * CFloat, CDouble, CLongDouble)*5,
- * (Half, Float, Double, LongDouble)*2#
+ * CFloat, CDouble, CLongDouble)*4,
+ * (Half, Float, Double, LongDouble)*3#
*/
+#define IS_@name@
static PyObject *
@name@_@oper@(PyObject *a, PyObject *b)
{
PyObject *ret;
- @type@ arg1, arg2;
- @otype@ out;
+ @type@ arg1, arg2, other_val;
-#if @twoout@
- @otype@ out2;
- PyObject *obj;
-#endif
-
-#if @fperr@
- int retstatus;
- int first;
-#endif
+ /*
+ * Check if this operation may be considered forward. Note `is_forward`
+ * does not imply that we can defer to a subclass `b`, we need to check
+ * `BINOP_IS_FORWARD` for that (it takes into account that both may be
+ * identicalclass).
+ */
+ int is_forward = (Py_TYPE(a)->tp_as_number != NULL
+ && (void *)(Py_TYPE(a)->tp_as_number->nb_@oper@) == (void*)(@name@_@oper@));
- BINOP_GIVE_UP_IF_NEEDED(a, b, nb_@oper@, @name@_@oper@);
+ /*
+ * Extract the other value (if it is compatible). Otherwise, decide
+ * how to deal with it. This is somewhat complicated.
+ *
+ * Note: This pattern is used multiple times below.
+ */
+ PyObject *other = is_forward ? b : a;
- switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) {
- case 0:
- break;
- case -1:
- /* one of them can't be cast safely must be mixed-types*/
- return PyArray_Type.tp_as_number->nb_@oper@(a,b);
- case -2:
- /* use default handling */
- if (PyErr_Occurred()) {
- return NULL;
- }
+ conversion_result res = convert_to_@name@(other, &other_val);
+ switch (res) {
+ case CONVERSION_ERROR:
+ return NULL; /* an error occurred (should never happen) */
+ case DEFER_TO_OTHER_KNOWN_SCALAR:
+ /*
+ * defer to other; This is normally a forward operation. However,
+ * it could be backward if an operation is undefined forward.
+ * An example is the complex remainder `complex % bool` will defer
+ * even though it would normally handle the operation.
+ */
+ Py_RETURN_NOTIMPLEMENTED;
+ case CONVERSION_SUCCESS:
+ break; /* successfully extracted value we can proceed */
+ case OTHER_IS_UNKNOWN_OBJECT:
+#if defined(IS_longdouble) || defined(IS_clongdouble)
+ Py_RETURN_NOTIMPLEMENTED;
+#endif
+ BINOP_GIVE_UP_IF_NEEDED(a, b, nb_@oper@, @name@_@oper@);
+ case PROMOTION_REQUIRED:
+ /*
+ * Either an array-like, unknown scalar or we need to promote.
+ *
+ * TODO: We could special case the promotion case here for much
+ * better speed and to deal with integer overflow warnings
+ * correctly. (e.g. `uint8 * int8` cannot warn).
+ */
return PyGenericArrType_Type.tp_as_number->nb_@oper@(a,b);
- case -3:
+ case OTHER_IS_SUBCLASS:
/*
- * special case for longdouble and clongdouble
- * because they have a recursive getitem in their dtype
+ * Success converting. We _could_ in principle defer in cases
+ * were the other subclass does not inherit the behavior. In
+ * practice not even Python's `int` attempt this, so we also punt.
*/
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
+ break;
}
#if @fperr@
- npy_clear_floatstatus_barrier((char*)&out);
+ npy_clear_floatstatus_barrier((char*)&arg1);
+#endif
+ if (is_forward) {
+ arg1 = PyArrayScalar_VAL(a, @Name@);
+ arg2 = other_val;
+ }
+ else {
+ arg1 = other_val;
+ arg2 = PyArrayScalar_VAL(b, @Name@);
+ }
+
+ /*
+ * Prepare the actual calculation.
+ */
+ @otype@ out;
+#if @twoout@
+ @otype@ out2;
+ PyObject *obj;
#endif
+
+
/*
* here we do the actual calculation with arg1 and arg2
* as a function call.
+ * Note that `retstatus` is the "floating point error" value for integer
+ * functions. Float functions should always return 0, and then use
+ * the following `npy_get_floatstatus_barrier`.
*/
#if @twoout@
- @name@_ctype_@oper@(arg1, arg2, (@otype@ *)&out, &out2);
+ int retstatus = @name@_ctype_@oper@(arg1, arg2, &out, &out2);
#else
- @name@_ctype_@oper@(arg1, arg2, (@otype@ *)&out);
+ int retstatus = @name@_ctype_@oper@(arg1, arg2, &out);
#endif
#if @fperr@
/* Check status flag. If it is set, then look up what to do */
- retstatus = npy_get_floatstatus_barrier((char*)&out);
+ retstatus |= npy_get_floatstatus_barrier((char*)&out);
+#endif
if (retstatus) {
int bufsize, errmask;
PyObject *errobj;
@@ -860,14 +1179,13 @@ static PyObject *
&errobj) < 0) {
return NULL;
}
- first = 1;
+ int first = 1;
if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) {
Py_XDECREF(errobj);
return NULL;
}
Py_XDECREF(errobj);
}
-#endif
#if @twoout@
@@ -899,6 +1217,9 @@ static PyObject *
return ret;
}
+
+#undef IS_@name@
+
/**end repeat**/
#define _IS_ZERO(x) (x == 0)
@@ -920,17 +1241,6 @@ static PyObject *
* Half, Float, Double, LongDouble,
* CFloat, CDouble, CLongDouble#
*
- * #oname = float*4, double*6, half, float, double, longdouble,
- * cfloat, cdouble, clongdouble#
- *
- * #otype = npy_float*4, npy_double*6, npy_half, npy_float,
- * npy_double, npy_longdouble,
- * npy_cfloat, npy_cdouble, npy_clongdouble#
- *
- * #OName = Float*4, Double*6, Half, Float,
- * Double, LongDouble,
- * CFloat, CDouble, CLongDouble#
- *
* #isint = 1*10,0*7#
* #isuint = (0,1)*5,0*7#
* #cmplx = 0*14,1*3#
@@ -938,46 +1248,71 @@ static PyObject *
* #zero = 0*10, NPY_HALF_ZERO, 0*6#
* #one = 1*10, NPY_HALF_ONE, 1*6#
*/
+#define IS_@name@
static PyObject *
@name@_power(PyObject *a, PyObject *b, PyObject *modulo)
{
+ if (modulo != Py_None) {
+ /* modular exponentiation is not implemented (gh-8804) */
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+
PyObject *ret;
- @type@ arg1, arg2, out;
+ @type@ arg1, arg2, other_val;
- BINOP_GIVE_UP_IF_NEEDED(a, b, nb_power, @name@_power);
+ int is_forward = (Py_TYPE(a)->tp_as_number != NULL
+ && (void *)(Py_TYPE(a)->tp_as_number->nb_power) == (void*)(@name@_power));
- switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) {
- case 0:
- break;
- case -1:
- /* can't cast both safely mixed-types? */
- return PyArray_Type.tp_as_number->nb_power(a,b,modulo);
- case -2:
- /* use default handling */
- if (PyErr_Occurred()) {
- return NULL;
- }
- return PyGenericArrType_Type.tp_as_number->nb_power(a,b,modulo);
- case -3:
- default:
+ /*
+ * Extract the other value (if it is compatible). See the generic
+ * (non power) version above for detailed notes.
+ */
+ PyObject *other = is_forward ? b : a;
+
+ int res = convert_to_@name@(other, &other_val);
+ switch (res) {
+ case CONVERSION_ERROR:
+ return NULL; /* an error occurred (should never happen) */
+ case DEFER_TO_OTHER_KNOWN_SCALAR:
+ Py_RETURN_NOTIMPLEMENTED;
+ case CONVERSION_SUCCESS:
+ break; /* successfully extracted value we can proceed */
+ case OTHER_IS_UNKNOWN_OBJECT:
+#if defined(IS_longdouble) || defined(IS_clongdouble)
+ Py_RETURN_NOTIMPLEMENTED;
+#endif
+ BINOP_GIVE_UP_IF_NEEDED(a, b, nb_power, @name@_power);
+ case PROMOTION_REQUIRED:
+ return PyGenericArrType_Type.tp_as_number->nb_power(a, b, modulo);
+ case OTHER_IS_SUBCLASS:
/*
- * special case for longdouble and clongdouble
- * because they have a recursive getitem in their dtype
+ * Success converting. We _could_ in principle defer in cases
+ * were the other subclass does not inherit the behavior. In
+ * practice not even Python's `int` attempt this, so we also punt.
*/
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
-
- if (modulo != Py_None) {
- /* modular exponentiation is not implemented (gh-8804) */
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
+ break;
}
#if !@isint@
- npy_clear_floatstatus_barrier((char*)&out);
+ npy_clear_floatstatus_barrier((char*)&arg1);
#endif
+
+ if (is_forward) {
+ arg1 = PyArrayScalar_VAL(a, @Name@);
+ arg2 = other_val;
+ }
+ else {
+ arg1 = other_val;
+ arg2 = PyArrayScalar_VAL(b, @Name@);
+ }
+
+ /*
+ * Prepare the actual calculation:
+ */
+ @type@ out;
+
/*
* here we do the actual calculation with arg1 and arg2
* as a function call.
@@ -989,11 +1324,12 @@ static PyObject *
return NULL;
}
#endif
- @name@_ctype_power(arg1, arg2, &out);
+ int retstatus = @name@_ctype_power(arg1, arg2, &out);
#if !@isint@
/* Check status flag. If it is set, then look up what to do */
- int retstatus = npy_get_floatstatus_barrier((char*)&out);
+ retstatus |= npy_get_floatstatus_barrier((char*)&out);
+#endif
if (retstatus) {
int bufsize, errmask;
PyObject *errobj;
@@ -1009,7 +1345,6 @@ static PyObject *
}
Py_XDECREF(errobj);
}
-#endif
ret = PyArrayScalar_New(@Name@);
if (ret == NULL) {
@@ -1021,78 +1356,28 @@ static PyObject *
}
+#undef IS_@name@
/**end repeat**/
#undef _IS_ZERO
/**begin repeat
*
- * #name = cfloat, cdouble#
- *
- */
-
-/**begin repeat1
- *
- * #oper = divmod, remainder#
- *
- */
-
-#define @name@_@oper@ NULL
-
-/**end repeat1**/
-
-/**end repeat**/
-
-/**begin repeat
- *
- * #oper = divmod, remainder#
+ * #name = (cfloat, cdouble, clongdouble)*3#
+ * #oper = floor_divide*3, divmod*3, remainder*3#
*
*/
/*
-Complex numbers do not support remainder operations. Unfortunately,
-the type inference for long doubles is complicated, and if a remainder
-operation is not defined - if the relevant field is left NULL - then
-operations between long doubles and objects lead to an infinite recursion
-instead of a TypeError. This should ensure that once everything gets
-converted to complex long doubles you correctly get a reasonably
-informative TypeError. This fixes the last part of bug gh-18548.
-*/
-
+ * Complex numbers do not support remainder so we manually make sure that the
+ * operation is not defined. This is/was especially important for longdoubles
+ * due to their tendency to recurse for some operations, see gh-18548.
+ * (We need to define the slots to avoid inheriting it.)
+ */
static PyObject *
-clongdouble_@oper@(PyObject *a, PyObject *b)
+@name@_@oper@(PyObject *NPY_UNUSED(a), PyObject *NPY_UNUSED(b))
{
- npy_clongdouble arg1, arg2;
-
- BINOP_GIVE_UP_IF_NEEDED(a, b, nb_@oper@, clongdouble_@oper@);
-
- switch(_clongdouble_convert2_to_ctypes(a, &arg1, b, &arg2)) {
- case 0:
- break;
- case -1:
- /* one of them can't be cast safely must be mixed-types*/
- return PyArray_Type.tp_as_number->nb_@oper@(a,b);
- case -2:
- /* use default handling */
- if (PyErr_Occurred()) {
- return NULL;
- }
- return PyGenericArrType_Type.tp_as_number->nb_@oper@(a,b);
- case -3:
- /*
- * special case for longdouble and clongdouble
- * because they have a recursive getitem in their dtype
- */
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
-
- /*
- * here we do the actual calculation with arg1 and arg2
- * as a function call.
- */
- PyErr_SetString(PyExc_TypeError, "complex long doubles do not support remainder");
- return NULL;
+ Py_RETURN_NOTIMPLEMENTED;
}
/**end repeat**/
@@ -1124,6 +1409,14 @@ clongdouble_@oper@(PyObject *a, PyObject *b)
* byte, ubyte, short, ushort, int, uint,
* long, ulong, longlong, ulonglong#
*
+ * #Name = (Byte, UByte, Short, UShort, Int, UInt,
+ * Long, ULong, LongLong, ULongLong,
+ * Half, Float, Double, LongDouble,
+ * CFloat, CDouble, CLongDouble)*3,
+ *
+ * Byte, UByte, Short, UShort, Int, UInt,
+ * Long, ULong, LongLong, ULongLong#
+ *
* #type = (npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_half, npy_float, npy_double, npy_longdouble,
@@ -1161,32 +1454,19 @@ clongdouble_@oper@(PyObject *a, PyObject *b)
static PyObject *
@name@_@oper@(PyObject *a)
{
- @type@ arg1;
+ @type@ val;
@otype@ out;
PyObject *ret;
- switch(_@name@_convert_to_ctype(a, &arg1)) {
- case 0:
- break;
- case -1:
- /* can't cast both safely use different add function */
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- case -2:
- /* use default handling */
- if (PyErr_Occurred()) {
- return NULL;
- }
- return PyGenericArrType_Type.tp_as_number->nb_@oper@(a);
- }
+
+ val = PyArrayScalar_VAL(a, @Name@);
+
+ @name@_ctype_@oper@(val, &out);
/*
- * here we do the actual calculation with arg1 and arg2
- * make it a function call.
+ * TODO: Complex absolute should check floating point flags.
*/
- @name@_ctype_@oper@(arg1, &out);
-
ret = PyArrayScalar_New(@OName@);
PyArrayScalar_ASSIGN(ret, @OName@, out);
@@ -1210,6 +1490,10 @@ static PyObject *
* uint, long, ulong, longlong, ulonglong,
* half, float, double, longdouble,
* cfloat, cdouble, clongdouble#
+ * #Name = Byte, UByte, Short, UShort, Int, UInt,
+ * Long, ULong, LongLong, ULongLong,
+ * Half, Float, Double, LongDouble,
+ * CFloat, CDouble, CLongDouble#
* #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
* npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_half, npy_float, npy_double, npy_longdouble,
@@ -1221,24 +1505,14 @@ static int
@name@_bool(PyObject *a)
{
int ret;
- @type@ arg1;
+ @type@ val;
- if (_@name@_convert_to_ctype(a, &arg1) < 0) {
- if (PyErr_Occurred()) {
- return -1;
- }
- return PyGenericArrType_Type.tp_as_number->nb_bool(a);
- }
-
- /*
- * here we do the actual calculation with arg1 and arg2
- * make it a function call.
- */
+ val = PyArrayScalar_VAL(a, @Name@);
#if @simp@
- ret = @nonzero@(arg1);
+ ret = @nonzero@(val);
#else
- ret = (@nonzero@(arg1.real) || @nonzero@(arg1.imag));
+ ret = (@nonzero@(val.real) || @nonzero@(val.imag));
#endif
return ret;
@@ -1321,7 +1595,7 @@ static PyObject *
* #to_ctype = , , , , , , , , , , npy_half_to_double, , , , , , #
* #func = PyFloat_FromDouble*17#
*/
-static NPY_INLINE PyObject *
+static PyObject *
@name@_float(PyObject *obj)
{
#if @cmplx@
@@ -1353,36 +1627,50 @@ static NPY_INLINE PyObject *
* long, ulong, longlong, ulonglong,
* half, float, double, longdouble,
* cfloat, cdouble, clongdouble#
+ * #Name = Byte, UByte, Short, UShort, Int, UInt,
+ * Long, ULong, LongLong, ULongLong,
+ * Half, Float, Double, LongDouble,
+ * CFloat, CDouble, CLongDouble#
* #simp = def*10, def_half, def*3, cmplx*3#
*/
+#define IS_@name@
+
static PyObject*
@name@_richcompare(PyObject *self, PyObject *other, int cmp_op)
{
npy_@name@ arg1, arg2;
- int out=0;
-
- RICHCMP_GIVE_UP_IF_NEEDED(self, other);
+ int out = 0;
- switch(_@name@_convert2_to_ctypes(self, &arg1, other, &arg2)) {
- case 0:
- break;
- case -1:
- /* can't cast both safely use different add function */
- case -2:
- /* use ufunc */
- if (PyErr_Occurred()) {
- return NULL;
- }
- return PyGenericArrType_Type.tp_richcompare(self, other, cmp_op);
- case -3:
- /*
- * special case for longdouble and clongdouble
- * because they have a recursive getitem in their dtype
- */
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
+ /*
+ * Extract the other value (if it is compatible).
+ */
+ int res = convert_to_@name@(other, &arg2);
+ switch (res) {
+ case CONVERSION_ERROR:
+ return NULL; /* an error occurred (should never happen) */
+ case DEFER_TO_OTHER_KNOWN_SCALAR:
+ Py_RETURN_NOTIMPLEMENTED;
+ case CONVERSION_SUCCESS:
+ break; /* successfully extracted value we can proceed */
+ case OTHER_IS_UNKNOWN_OBJECT:
+#if defined(IS_longdouble) || defined(IS_clongdouble)
+ Py_RETURN_NOTIMPLEMENTED;
+#endif
+ RICHCMP_GIVE_UP_IF_NEEDED(self, other);
+ case PROMOTION_REQUIRED:
+ return PyGenericArrType_Type.tp_richcompare(self, other, cmp_op);
+ case OTHER_IS_SUBCLASS:
+ /*
+ * Success converting. We _could_ in principle defer in cases
+ * were the other subclass does not inherit the behavior. In
+ * practice not even Python's `int` attempt this, so we also punt.
+ * (This is also even trickier for richcompare, though.)
+ */
+ break;
}
+ arg1 = PyArrayScalar_VAL(self, @Name@);
+
/* here we do the actual calculation with arg1 and arg2 */
switch (cmp_op) {
case Py_EQ:
@@ -1412,6 +1700,8 @@ static PyObject*
PyArrayScalar_RETURN_FALSE;
}
}
+
+#undef IS_@name@
/**end repeat**/
/**begin repeat
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index f190a83ba..a2b801760 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -8,6 +8,7 @@ from numpy.compat import _pep440
import pytest
from hypothesis import given, settings
from hypothesis.strategies import sampled_from
+from hypothesis.extra import numpy as hynp
import numpy as np
from numpy.testing import (
@@ -24,6 +25,14 @@ types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc,
floating_types = np.floating.__subclasses__()
complex_floating_types = np.complexfloating.__subclasses__()
+objecty_things = [object(), None]
+
+reasonable_operators_for_scalars = [
+ operator.lt, operator.le, operator.eq, operator.ne, operator.ge,
+ operator.gt, operator.add, operator.floordiv, operator.mod,
+ operator.mul, operator.pow, operator.sub, operator.truediv,
+]
+
# This compares scalarmath against ufuncs.
@@ -66,6 +75,41 @@ class TestTypes:
np.add(1, 1)
+@pytest.mark.slow
+@settings(max_examples=10000, deadline=2000)
+@given(sampled_from(reasonable_operators_for_scalars),
+ hynp.arrays(dtype=hynp.scalar_dtypes(), shape=()),
+ hynp.arrays(dtype=hynp.scalar_dtypes(), shape=()))
+def test_array_scalar_ufunc_equivalence(op, arr1, arr2):
+ """
+ This is a thorough test attempting to cover important promotion paths
+ and ensuring that arrays and scalars stay as aligned as possible.
+ However, if it creates troubles, it should maybe just be removed.
+ """
+ scalar1 = arr1[()]
+ scalar2 = arr2[()]
+ assert isinstance(scalar1, np.generic)
+ assert isinstance(scalar2, np.generic)
+
+ if arr1.dtype.kind == "c" or arr2.dtype.kind == "c":
+ comp_ops = {operator.ge, operator.gt, operator.le, operator.lt}
+ if op in comp_ops and (np.isnan(scalar1) or np.isnan(scalar2)):
+ pytest.xfail("complex comp ufuncs use sort-order, scalars do not.")
+
+ # ignore fpe's since they may just mismatch for integers anyway.
+ with warnings.catch_warnings(), np.errstate(all="ignore"):
+ # Comparisons DeprecationWarnings replacing errors (2022-03):
+ warnings.simplefilter("error", DeprecationWarning)
+ try:
+ res = op(arr1, arr2)
+ except Exception as e:
+ with pytest.raises(type(e)):
+ op(scalar1, scalar2)
+ else:
+ scalar_res = op(scalar1, scalar2)
+ assert_array_equal(scalar_res, res)
+
+
class TestBaseMath:
def test_blocked(self):
# test alignments offsets for simd instructions
@@ -778,15 +822,6 @@ def recursionlimit(n):
sys.setrecursionlimit(o)
-objecty_things = [object(), None]
-reasonable_operators_for_scalars = [
- operator.lt, operator.le, operator.eq, operator.ne, operator.ge,
- operator.gt, operator.add, operator.floordiv, operator.mod,
- operator.mul, operator.matmul, operator.pow, operator.sub,
- operator.truediv,
-]
-
-
@given(sampled_from(objecty_things),
sampled_from(reasonable_operators_for_scalars),
sampled_from(types))
@@ -843,3 +878,129 @@ def test_clongdouble_inf_loop(op):
op(None, np.longdouble(3))
except TypeError:
pass
+
+
+@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
+@pytest.mark.parametrize("operation", [
+ lambda min, max: max + max,
+ lambda min, max: min - max,
+ lambda min, max: max * max], ids=["+", "-", "*"])
+def test_scalar_integer_operation_overflow(dtype, operation):
+ st = np.dtype(dtype).type
+ min = st(np.iinfo(dtype).min)
+ max = st(np.iinfo(dtype).max)
+
+ with pytest.warns(RuntimeWarning, match="overflow encountered"):
+ operation(min, max)
+
+
+@pytest.mark.parametrize("dtype", np.typecodes["Integer"])
+@pytest.mark.parametrize("operation", [
+ lambda min, neg_1: abs(min),
+ lambda min, neg_1: min * neg_1,
+ lambda min, neg_1: min // neg_1], ids=["abs", "*", "//"])
+def test_scalar_signed_integer_overflow(dtype, operation):
+ # The minimum signed integer can "overflow" for some additional operations
+ st = np.dtype(dtype).type
+ min = st(np.iinfo(dtype).min)
+ neg_1 = st(-1)
+
+ with pytest.warns(RuntimeWarning, match="overflow encountered"):
+ operation(min, neg_1)
+
+
+@pytest.mark.parametrize("dtype", np.typecodes["UnsignedInteger"])
+@pytest.mark.xfail # TODO: the check is quite simply missing!
+def test_scalar_signed_integer_overflow(dtype):
+ val = np.dtype(dtype).type(8)
+ with pytest.warns(RuntimeWarning, match="overflow encountered"):
+ -val
+
+
+@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
+@pytest.mark.parametrize("operation", [
+ lambda val, zero: val // zero,
+ lambda val, zero: val % zero, ], ids=["//", "%"])
+def test_scalar_integer_operation_divbyzero(dtype, operation):
+ st = np.dtype(dtype).type
+ val = st(100)
+ zero = st(0)
+
+ with pytest.warns(RuntimeWarning, match="divide by zero"):
+ operation(val, zero)
+
+
+ops_with_names = [
+ ("__lt__", "__gt__", operator.lt, True),
+ ("__le__", "__ge__", operator.le, True),
+ ("__eq__", "__eq__", operator.eq, True),
+ # Note __op__ and __rop__ may be identical here:
+ ("__ne__", "__ne__", operator.ne, True),
+ ("__gt__", "__lt__", operator.gt, True),
+ ("__ge__", "__le__", operator.ge, True),
+ ("__floordiv__", "__rfloordiv__", operator.floordiv, False),
+ ("__truediv__", "__rtruediv__", operator.truediv, False),
+ ("__add__", "__radd__", operator.add, False),
+ ("__mod__", "__rmod__", operator.mod, False),
+ ("__mul__", "__rmul__", operator.mul, False),
+ ("__pow__", "__rpow__", operator.pow, False),
+ ("__sub__", "__rsub__", operator.sub, False),
+]
+
+
+@pytest.mark.parametrize(["__op__", "__rop__", "op", "cmp"], ops_with_names)
+@pytest.mark.parametrize("sctype", [np.float32, np.float64, np.longdouble])
+def test_subclass_deferral(sctype, __op__, __rop__, op, cmp):
+ """
+ This test covers scalar subclass deferral. Note that this is exceedingly
+ complicated, especially since it tends to fall back to the array paths and
+ these additionally add the "array priority" mechanism.
+
+ The behaviour was modified subtly in 1.22 (to make it closer to how Python
+ scalars work). Due to its complexity and the fact that subclassing NumPy
+ scalars is probably a bad idea to begin with. There is probably room
+ for adjustments here.
+ """
+ class myf_simple1(sctype):
+ pass
+
+ class myf_simple2(sctype):
+ pass
+
+ def defer(self, other):
+ return NotImplemented
+
+ def op_func(self, other):
+ return __op__
+
+ def rop_func(self, other):
+ return __rop__
+
+ myf_op = type("myf_op", (sctype,), {__op__: op_func, __rop__: rop_func})
+
+ # inheritance has to override, or this is correctly lost:
+ res = op(myf_simple1(1), myf_simple2(2))
+ assert type(res) == sctype or type(res) == np.bool_
+ assert op(myf_simple1(1), myf_simple2(2)) == op(1, 2) # inherited
+
+ # Two independent subclasses do not really define an order. This could
+ # be attempted, but we do not since Python's `int` does neither:
+ assert op(myf_op(1), myf_simple1(2)) == __op__
+ assert op(myf_simple1(1), myf_op(2)) == op(1, 2) # inherited
+
+
+@pytest.mark.parametrize(["__op__", "__rop__", "op", "cmp"], ops_with_names)
+@pytest.mark.parametrize("pytype", [float, int, complex])
+def test_pyscalar_subclasses(pytype, __op__, __rop__, op, cmp):
+ def op_func(self, other):
+ return __op__
+
+ def rop_func(self, other):
+ return __rop__
+
+ myf = type("myf", (pytype,),
+ {__op__: op_func, __rop__: rop_func, "__array_ufunc__": None})
+
+ # Just like normally, we should never presume we can modify the float.
+ assert op(myf(1), np.float64(2)) == __op__
+ assert op(np.float64(1), myf(2)) == __rop__