4 files changed, 81 insertions, 67 deletions
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index 6a78ff3c2..3f3532d36 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -162,6 +162,28 @@ NPY_INPLACE npy_long npy_lcml(npy_long a, npy_long b);
 NPY_INPLACE npy_longlong npy_gcdll(npy_longlong a, npy_longlong b);
 NPY_INPLACE npy_longlong npy_lcmll(npy_longlong a, npy_longlong b);
 
+NPY_INPLACE npy_ubyte npy_rshiftuhh(npy_ubyte a, npy_ubyte b);
+NPY_INPLACE npy_ubyte npy_lshiftuhh(npy_ubyte a, npy_ubyte b);
+NPY_INPLACE npy_ushort npy_rshiftuh(npy_ushort a, npy_ushort b);
+NPY_INPLACE npy_ushort npy_lshiftuh(npy_ushort a, npy_ushort b);
+NPY_INPLACE npy_uint npy_rshiftu(npy_uint a, npy_uint b);
+NPY_INPLACE npy_uint npy_lshiftu(npy_uint a, npy_uint b);
+NPY_INPLACE npy_ulong npy_rshiftul(npy_ulong a, npy_ulong b);
+NPY_INPLACE npy_ulong npy_lshiftul(npy_ulong a, npy_ulong b);
+NPY_INPLACE npy_ulonglong npy_rshiftull(npy_ulonglong a, npy_ulonglong b);
+NPY_INPLACE npy_ulonglong npy_lshiftull(npy_ulonglong a, npy_ulonglong b);
+
+NPY_INPLACE npy_byte npy_rshifthh(npy_byte a, npy_byte b);
+NPY_INPLACE npy_byte npy_lshifthh(npy_byte a, npy_byte b);
+NPY_INPLACE npy_short npy_rshifth(npy_short a, npy_short b);
+NPY_INPLACE npy_short npy_lshifth(npy_short a, npy_short b);
+NPY_INPLACE npy_int npy_rshift(npy_int a, npy_int b);
+NPY_INPLACE npy_int npy_lshift(npy_int a, npy_int b);
+NPY_INPLACE npy_long npy_rshiftl(npy_long a, npy_long b);
+NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b);
+NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b);
+NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b);
+
 /*
  * C99 double math funcs
  */
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index fa820baac..18b6d1434 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -716,3 +716,44 @@ npy_@func@@c@(@type@ a, @type@ b)
     return npy_@func@u@c@(a < 0 ? -a : a, b < 0 ? -b : b);
 }
 /**end repeat**/
+
+/* Unlike LCM and GCD, we need byte and short variants for the shift operators,
+ * since the result is dependent on the width of the type
+ */
+/**begin repeat
+ *
+ * #type = byte, short, int, long, longlong#
+ * #c = hh,h,,l,ll#
+ */
+/**begin repeat1
+ *
+ * #u         = u,#
+ * #is_signed = 0,1#
+ */
+NPY_INPLACE npy_@u@@type@
+npy_lshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
+{
+    if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) {
+        return a << b;
+    }
+    else {
+        return 0;
+    }
+}
+NPY_INPLACE npy_@u@@type@
+npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
+{
+    if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) {
+        return a >> b;
+    }
+#if @is_signed@
+    else if (a < 0) {
+        return (npy_@u@@type@)-1;  /* preserve the sign bit */
+    }
+#endif
+    else {
+        return 0;
+    }
+}
+/**end repeat1**/
+/**end repeat**/
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index eccb9d82c..3d0b41318 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -699,6 +699,7 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
  * #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double,
  *          npy_double, npy_double, npy_double, npy_double#
  * #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0#
+ * #c = hh,uhh,h,uh,,u,l,ul,ll,ull#
  */
 
 #define @TYPE@_floor_divide @TYPE@_divide
@@ -808,46 +809,30 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
  *       which is undefined in C.
  */
 
+#define INT_left_shift_needs_clear_floatstatus
+#define UINT_left_shift_needs_clear_floatstatus
+
 NPY_NO_EXPORT NPY_GCC_OPT_3 void
 @TYPE@_left_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps,
                   void *NPY_UNUSED(func))
 {
-    BINARY_LOOP_FAST(@type@, @type@,
-        if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) {
-            *out = in1 << in2;
-        }
-        else {
-            *out = 0;
-        }
-    );
+    BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2));
+
+#ifdef @TYPE@_left_shift_needs_clear_floatstatus
+    // For some reason, our macOS CI sets an "invalid" flag here, but only
+    // for some types.
+    npy_clear_floatstatus_barrier((char*)dimensions);
+#endif
 }
 
+#undef INT_left_shift_needs_clear_floatstatus
+#undef UINT_left_shift_needs_clear_floatstatus
+
 NPY_NO_EXPORT NPY_GCC_OPT_3 void
 @TYPE@_right_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps,
                    void *NPY_UNUSED(func))
 {
-#if @SIGNED@
-    BINARY_LOOP_FAST(@type@, @type@, {
-        if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) {
-            *out = in1 >> in2;
-        }
-        else if (in1 < 0) {
-            *out = (@type@)-1;  /* shift right preserves the sign bit */
-        }
-        else {
-            *out = 0;
-        }
-    });
-#else
-    BINARY_LOOP_FAST(@type@, @type@, {
-        if (NPY_LIKELY(in2 < sizeof(@type@) * CHAR_BIT)) {
-            *out = in1 >> in2;
-        }
-        else {
-            *out = 0;
-        }
-    });
-#endif
+    BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2));
 }
 
 
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index b691115e7..df440e095 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -251,7 +251,7 @@ static void
  *         long, ulong, longlong, ulonglong#
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong#
- * #issigned = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0#
+ * #suffix = hh,uhh,h,uh,,u,l,ul,ll,ull#
  */
 
 /**begin repeat1
@@ -263,42 +263,8 @@ static void
 
 /**end repeat1**/
 
-/* Note: these need to be kept in sync with the shift ufuncs */
-
-#define @name@_ctype_lshift(arg1, arg2, out)                  \
-    do {                                                      \
-        if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \
-            *(out) = (arg1) << (arg2);                        \
-        }                                                     \
-        else {                                                \
-            *(out) = 0;                                       \
-        }                                                     \
-    } while (0)
-
-#if @issigned@
-    #define @name@_ctype_rshift(arg1, arg2, out)                  \
-        do {                                                      \
-            if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \
-                *(out) = (arg1) >> (arg2);                        \
-            }                                                     \
-            else if ((arg1) < 0) {                                \
-                *(out) = -1;                                      \
-            }                                                     \
-            else {                                                \
-                *(out) = 0;                                       \
-            }                                                     \
-        } while (0)
-#else
-    #define @name@_ctype_rshift(arg1, arg2, out)                  \
-        do {                                                      \
-            if (NPY_LIKELY((arg2) < sizeof(@type@) * CHAR_BIT)) { \
-                *(out) = (arg1) >> (arg2);                        \
-            }                                                     \
-            else {                                                \
-                *(out) = 0;                                       \
-            }                                                     \
-        } while (0)
-#endif
+#define @name@_ctype_lshift(arg1, arg2, out) *(out) = npy_lshift@suffix@(arg1, arg2)
+#define @name@_ctype_rshift(arg1, arg2, out) *(out) = npy_rshift@suffix@(arg1, arg2)
 
 /**end repeat**/