1 files changed, 61 insertions, 63 deletions
diff --git a/numpy/core/src/umath/loops_arithmetic.dispatch.c.src b/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
index a52bb36b7..19e05f2b5 100644
--- a/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
+++ b/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
@@ -34,6 +34,7 @@
  *     uword −qsign = EOR(−nsign, −dsign);
  *     q = TRUNC((n − (−dsign ) + (−nsign))/d) − (−qsign);
  ********************************************************************************/
+
 #if NPY_SIMD
 /**begin repeat
  * Signed types
@@ -49,34 +50,28 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
     const int vstep            = npyv_nlanes_@sfx@;
     const npyv_@sfx@x3 divisor = npyv_divisor_@sfx@(scalar);
 
-    if (scalar == (npyv_lanetype_@sfx@)-1) {
+    if (scalar == -1) {
         npyv_b@len@ noverflow = npyv_cvt_b@len@_@sfx@(npyv_setall_@sfx@(-1));
         npyv_@sfx@ vzero      = npyv_zero_@sfx@();
         for (; len >= vstep; len -= vstep, src += vstep, dst += vstep) {
-            npyv_@sfx@ a            = npyv_load_@sfx@(src);
-            npyv_b@len@ greater_min = npyv_cmpgt_@sfx@(a, npyv_setall_@sfx@(NPY_MIN_INT@len@));
-            noverflow               = npyv_and_b@len@(noverflow, greater_min);
-            npyv_@sfx@ neg          = npyv_ifsub_@sfx@(greater_min, vzero, a, vzero);
-
+            npyv_@sfx@ a       = npyv_load_@sfx@(src);
+            npyv_b@len@ gt_min = npyv_cmpgt_@sfx@(a, npyv_setall_@sfx@(NPY_MIN_INT@len@));
+            noverflow          = npyv_and_b@len@(noverflow, gt_min);
+            npyv_@sfx@ neg     = npyv_ifsub_@sfx@(gt_min, vzero, a, vzero);
             npyv_store_@sfx@(dst, neg);
         }
-        npy_uint64 tobits = npyv_tobits_b@len@(noverflow);
-    #if npyv_nlanes_@sfx@ == 64
-        int raise = (~tobits) != 0;
-    #else
-        int raise = tobits != (1ULL << vstep)-1;
-    #endif
 
+        int raise_err = npyv_tobits_b@len@(npyv_not_b@len@(noverflow)) != 0;
         for (; len > 0; --len, ++src, ++dst) {
             npyv_lanetype_@sfx@ a = *src;
             if (a == NPY_MIN_INT@len@) {
-                raise = NPY_TRUE;
+                raise_err = 1;
                 *dst  = 0;
             } else {
                 *dst = -a;
             }
         }
-        if (raise) {
+        if (raise_err) {
             npy_set_floatstatus_divbyzero();
         }
     } else {
@@ -89,20 +84,19 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
             npyv_@sfx@  to_ninf   = npyv_xor_@sfx@(nsign_a, nsign_d);
             npyv_@sfx@  trunc     = npyv_divc_@sfx@(npyv_add_@sfx@(a, diff_sign), divisor);
             npyv_@sfx@  floor     = npyv_sub_@sfx@(trunc, to_ninf);
-
             npyv_store_@sfx@(dst, floor);
         }
 
         for (; len > 0; --len, ++src, ++dst) {
             const npyv_lanetype_@sfx@ a = *src;
-            *dst = a / scalar;
-            /* Negative quotients needs to be rounded down */
-            if (((a > 0) != (scalar > 0)) && (*dst * scalar != a)) {
-                *dst = *dst - 1;
+            npyv_lanetype_@sfx@ r = a / scalar;
+            // Negative quotients needs to be rounded down
+            if (((a > 0) != (scalar > 0)) && ((r * scalar) != a)) {
+                r--;
             }
+            *dst = r;
         }
     }
-
     npyv_cleanup();
 }
 /**end repeat**/
@@ -131,7 +125,6 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
         const npyv_lanetype_@sfx@ a = *src;
         *dst = a / scalar;
     }
-
     npyv_cleanup();
 }
 /**end repeat**/
@@ -143,8 +136,8 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
 
 /**begin repeat
  * Signed types
- *  #type  = byte, short, int, long, longlong#
- *  #TYPE  = BYTE, SHORT, INT, LONG, LONGLONG#
+ *  #type  = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
+ *  #TYPE  = BYTE,     SHORT,     INT,     LONG,     LONGLONG#
  */
 #undef TO_SIMD_SFX
 #if 0
@@ -159,42 +152,47 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
 #if NPY_BITSOF_@TYPE@ == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON))
     #undef TO_SIMD_SFX
 #endif
+
+NPY_FINLINE @type@ floor_div_@TYPE@(const @type@ n, const @type@ d)
+{
+    /*
+     * FIXME: On x86 at least, dividing the smallest representable integer
+     * by -1 causes a SIFGPE (division overflow). We treat this case here
+     * (to avoid a SIGFPE crash at python level), but a good solution would
+     * be to treat integer division problems separately from FPU exceptions
+     * (i.e. a different approach than npy_set_floatstatus_divbyzero()).
+     */
+    if (NPY_UNLIKELY(d == 0 || (n == NPY_MIN_@TYPE@ && d == -1))) {
+        npy_set_floatstatus_divbyzero();
+        return 0;
+    }
+    @type@ r = n / d;
+    // Negative quotients needs to be rounded down
+    if (((n > 0) != (d > 0)) && ((r * d) != n)) {
+        r--;
+    }
+    return r;
+}
+
 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divide)
 (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
     if (IS_BINARY_REDUCE) {
-        BINARY_REDUCE_LOOP(npy_@type@) {
-            const npy_@type@ d = *(npy_@type@ *)ip2;
-            if (NPY_UNLIKELY(d == 0 || (io1 == (npy_@type@)NPY_MIN_@TYPE@ && d == (npy_@type@)-1))) {
-                npy_set_floatstatus_divbyzero();
-                io1 = 0;
-            } else {
-                io1 /= d;
-            }
+        BINARY_REDUCE_LOOP(@type@) {
+            io1 = floor_div_@TYPE@(io1, *(@type@*)ip2);
         }
-        *((npy_@type@ *)iop1) = io1;
+        *((@type@ *)iop1) = io1;
     }
 #if NPY_SIMD && defined(TO_SIMD_SFX)
     // for contiguous block of memory, divisor is a scalar and not 0
-    else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(npy_@type@), NPY_SIMD_WIDTH) &&
-             (*(npy_@type@ *)args[1]) != 0) {
+    else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(@type@), NPY_SIMD_WIDTH) &&
+             (*(@type@ *)args[1]) != 0) {
         TO_SIMD_SFX(simd_divide_by_scalar_contig)(args, dimensions[0]);
     }
 #endif
     else {
         BINARY_LOOP {
-            const npy_@type@ in1 = *(npy_@type@ *)ip1;
-            const npy_@type@ in2 = *(npy_@type@ *)ip2;
-            if (NPY_UNLIKELY(in2 == 0 || (in1 == (npy_@type@)NPY_MIN_@TYPE@ && in2 == (npy_@type@)-1))) {
-                npy_set_floatstatus_divbyzero();
-                *((npy_@type@ *)op1) = 0;
-            } else{
-                *((npy_@type@ *)op1) = in1 / in2;
-                 /* Negative quotients needs to be rounded down */
-                 if (((in1 > 0) != (in2 > 0)) && (*((npy_@type@ *)op1) * in2 != in1)) {
-                     *((npy_@type@ *)op1) = *((npy_@type@ *)op1) - 1;
-                 }
-            }
+            *((@type@ *)op1) = floor_div_@TYPE@(*(@type@*)ip1, *(@type@*)ip2);
         }
     }
 }
@@ -202,16 +200,16 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divide)
 
 /**begin repeat
  * Unsigned types
- *  #type  = byte, short, int, long, longlong#
- *  #TYPE  = BYTE, SHORT, INT, LONG, LONGLONG#
+ *  #type  = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
+ *  #TYPE  = UBYTE,     USHORT,     UINT,     ULONG,     ULONGLONG#
+ *  #STYPE = BYTE,      SHORT,      INT,      LONG,      LONGLONG#
  */
-
 #undef TO_SIMD_SFX
 #if 0
 /**begin repeat1
  * #len = 8, 16, 32, 64#
  */
-#elif NPY_BITSOF_@TYPE@ == @len@
+#elif NPY_BITSOF_@STYPE@ == @len@
     #define TO_SIMD_SFX(X) X##_u@len@
 /**end repeat1**/
 #endif
@@ -223,40 +221,40 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divide)
  * Power10(VSX4) is an exception here since it has native support for integer vector division,
  * note neither infrastructure nor NPYV has supported VSX4 yet.
  */
-#if NPY_BITSOF_@TYPE@ == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON))
+#if NPY_BITSOF_@STYPE@ == 64 && !defined(NPY_HAVE_VSX4) && (defined(NPY_HAVE_VSX) || defined(NPY_HAVE_NEON))
     #undef TO_SIMD_SFX
 #endif
-NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(U@TYPE@_divide)
+NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divide)
 (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
     if (IS_BINARY_REDUCE) {
-        BINARY_REDUCE_LOOP(npy_u@type@) {
-            const npy_u@type@ d = *(npy_u@type@ *)ip2;
-            if (NPY_UNLIKELY(d == 0 || (io1 == (npy_u@type@)NPY_MIN_@TYPE@ && d == (npy_u@type@)-1))) {
+        BINARY_REDUCE_LOOP(@type@) {
+            const @type@ d = *(@type@ *)ip2;
+            if (NPY_UNLIKELY(d == 0)) {
                 npy_set_floatstatus_divbyzero();
                 io1 = 0;
             } else {
                 io1 /= d;
             }
         }
-        *((npy_u@type@ *)iop1) = io1;
+        *((@type@ *)iop1) = io1;
     }
 #if NPY_SIMD && defined(TO_SIMD_SFX)
     // for contiguous block of memory, divisor is a scalar and not 0
-    else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(npy_u@type@), NPY_SIMD_WIDTH) &&
-             (*(npy_u@type@ *)args[1]) != 0) {
+    else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(@type@), NPY_SIMD_WIDTH) &&
+             (*(@type@ *)args[1]) != 0) {
         TO_SIMD_SFX(simd_divide_by_scalar_contig)(args, dimensions[0]);
     }
 #endif
     else {
         BINARY_LOOP {
-            const npy_u@type@ in1 = *(npy_u@type@ *)ip1;
-            const npy_u@type@ in2 = *(npy_u@type@ *)ip2;
-            if (NPY_UNLIKELY(in2 == 0 || (in1 == (npy_u@type@)NPY_MIN_@TYPE@ && in2 == (npy_u@type@)-1))) {
+            const @type@ in1 = *(@type@ *)ip1;
+            const @type@ in2 = *(@type@ *)ip2;
+            if (NPY_UNLIKELY(in2 == 0)) {
                 npy_set_floatstatus_divbyzero();
-                *((npy_u@type@ *)op1) = 0;
+                *((@type@ *)op1) = 0;
             } else{
-                *((npy_u@type@ *)op1) = in1 / in2;
+                *((@type@ *)op1) = in1 / in2;
             }
         }
     }