summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/include/numpy/npy_math.h22
-rw-r--r--numpy/core/setup.py6
-rw-r--r--numpy/core/setup_common.py39
-rw-r--r--numpy/core/src/npymath/npy_math_internal.h.src41
-rw-r--r--numpy/core/src/umath/loops.c.src49
-rw-r--r--numpy/core/src/umath/scalarmath.c.src13
-rw-r--r--numpy/core/tests/test_scalarmath.py28
7 files changed, 188 insertions, 10 deletions
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index 126b861bf..69e690f28 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -177,6 +177,28 @@ NPY_INPLACE npy_long npy_lcml(npy_long a, npy_long b);
NPY_INPLACE npy_longlong npy_gcdll(npy_longlong a, npy_longlong b);
NPY_INPLACE npy_longlong npy_lcmll(npy_longlong a, npy_longlong b);
+NPY_INPLACE npy_ubyte npy_rshiftuhh(npy_ubyte a, npy_ubyte b);
+NPY_INPLACE npy_ubyte npy_lshiftuhh(npy_ubyte a, npy_ubyte b);
+NPY_INPLACE npy_ushort npy_rshiftuh(npy_ushort a, npy_ushort b);
+NPY_INPLACE npy_ushort npy_lshiftuh(npy_ushort a, npy_ushort b);
+NPY_INPLACE npy_uint npy_rshiftu(npy_uint a, npy_uint b);
+NPY_INPLACE npy_uint npy_lshiftu(npy_uint a, npy_uint b);
+NPY_INPLACE npy_ulong npy_rshiftul(npy_ulong a, npy_ulong b);
+NPY_INPLACE npy_ulong npy_lshiftul(npy_ulong a, npy_ulong b);
+NPY_INPLACE npy_ulonglong npy_rshiftull(npy_ulonglong a, npy_ulonglong b);
+NPY_INPLACE npy_ulonglong npy_lshiftull(npy_ulonglong a, npy_ulonglong b);
+
+NPY_INPLACE npy_byte npy_rshifthh(npy_byte a, npy_byte b);
+NPY_INPLACE npy_byte npy_lshifthh(npy_byte a, npy_byte b);
+NPY_INPLACE npy_short npy_rshifth(npy_short a, npy_short b);
+NPY_INPLACE npy_short npy_lshifth(npy_short a, npy_short b);
+NPY_INPLACE npy_int npy_rshift(npy_int a, npy_int b);
+NPY_INPLACE npy_int npy_lshift(npy_int a, npy_int b);
+NPY_INPLACE npy_long npy_rshiftl(npy_long a, npy_long b);
+NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b);
+NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b);
+NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b);
+
/*
* avx function has a common API for both sin & cos. This enum is used to
* distinguish between the two
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 5ac7752cc..63b515b18 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -463,6 +463,12 @@ def configuration(parent_package='',top_path=None):
rep = check_long_double_representation(config_cmd)
moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1))
+ if check_for_right_shift_internal_compiler_error(config_cmd):
+ moredefs.append('NPY_DO_NOT_OPTIMIZE_LONG_right_shift')
+ moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONG_right_shift')
+ moredefs.append('NPY_DO_NOT_OPTIMIZE_LONGLONG_right_shift')
+ moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONGLONG_right_shift')
+
# Py3K check
if sys.version_info[0] >= 3:
moredefs.append(('NPY_PY3K', 1))
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index a3f7acd6d..84b78b585 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -5,6 +5,7 @@ import sys
import warnings
import copy
import binascii
+import textwrap
from numpy.distutils.misc_util import mingw32
@@ -415,3 +416,41 @@ def long_double_representation(lines):
else:
# We never detected the after_sequence
raise ValueError("Could not lock sequences (%s)" % saw)
+
+
+def check_for_right_shift_internal_compiler_error(cmd):
+ """
+ On our arm CI, this fails with an internal compilation error
+
+ The failure looks like the following, and can be reproduced on ARM64 GCC 5.4:
+
+ <source>: In function 'right_shift':
+ <source>:4:20: internal compiler error: in expand_shift_1, at expmed.c:2349
+ ip1[i] = ip1[i] >> in2;
+ ^
+ Please submit a full bug report,
+ with preprocessed source if appropriate.
+ See <http://gcc.gnu.org/bugs.html> for instructions.
+ Compiler returned: 1
+
+ This function returns True if this compiler bug is present, and we need to
+ turn off optimization for the function
+ """
+ cmd._check_compiler()
+ has_optimize = cmd.try_compile(textwrap.dedent("""\
+ __attribute__((optimize("O3"))) void right_shift() {}
+ """), None, None)
+ if not has_optimize:
+ return False
+
+ no_err = cmd.try_compile(textwrap.dedent("""\
+ typedef long the_type; /* fails also for unsigned and long long */
+ __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) {
+ for (int i = 0; i < n; i++) {
+ if (in2 < (the_type)sizeof(the_type) * 8) {
+ ip1[i] = ip1[i] >> in2;
+ }
+ }
+ }
+ """), None, None)
+ return not no_err
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index fa820baac..18b6d1434 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -716,3 +716,44 @@ npy_@func@@c@(@type@ a, @type@ b)
return npy_@func@u@c@(a < 0 ? -a : a, b < 0 ? -b : b);
}
/**end repeat**/
+
+/* Unlike LCM and GCD, we need byte and short variants for the shift operators,
+ * since the result is dependent on the width of the type
+ */
+/**begin repeat
+ *
+ * #type = byte, short, int, long, longlong#
+ * #c = hh,h,,l,ll#
+ */
+/**begin repeat1
+ *
+ * #u = u,#
+ * #is_signed = 0,1#
+ */
+NPY_INPLACE npy_@u@@type@
+npy_lshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
+{
+ if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) {
+ return a << b;
+ }
+ else {
+ return 0;
+ }
+}
+NPY_INPLACE npy_@u@@type@
+npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
+{
+ if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) {
+ return a >> b;
+ }
+#if @is_signed@
+ else if (a < 0) {
+ return (npy_@u@@type@)-1; /* preserve the sign bit */
+ }
+#endif
+ else {
+ return 0;
+ }
+}
+/**end repeat1**/
+/**end repeat**/
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 2028a5712..5443223ab 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -699,6 +699,7 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
* #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double,
* npy_double, npy_double, npy_double, npy_double#
* #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0#
+ * #c = hh,uhh,h,uh,,u,l,ul,ll,ull#
*/
#define @TYPE@_floor_divide @TYPE@_divide
@@ -776,16 +777,15 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
/**begin repeat2
* Arithmetic
- * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor,
- * left_shift, right_shift#
- * #OP = +, -,*, &, |, ^, <<, >>#
+ * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor#
+ * #OP = +, -, *, &, |, ^#
*/
#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
@TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
- if(IS_BINARY_REDUCE) {
+ if (IS_BINARY_REDUCE) {
BINARY_REDUCE_LOOP(@type@) {
io1 @OP@= *(@type@ *)ip2;
}
@@ -799,6 +799,47 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
/**end repeat2**/
+/*
+ * Arithmetic bit shift operations.
+ *
+ * Intel hardware masks bit shift values, so large shifts wrap around
+ * and can produce surprising results. The special handling ensures that
+ * behavior is independent of compiler or hardware.
+ * TODO: We could implement consistent behavior for negative shifts,
+ * which is undefined in C.
+ */
+
+#define INT_left_shift_needs_clear_floatstatus
+#define UINT_left_shift_needs_clear_floatstatus
+
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+@TYPE@_left_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps,
+ void *NPY_UNUSED(func))
+{
+ BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2));
+
+#ifdef @TYPE@_left_shift_needs_clear_floatstatus
+ // For some reason, our macOS CI sets an "invalid" flag here, but only
+ // for some types.
+ npy_clear_floatstatus_barrier((char*)dimensions);
+#endif
+}
+
+#undef INT_left_shift_needs_clear_floatstatus
+#undef UINT_left_shift_needs_clear_floatstatus
+
+NPY_NO_EXPORT
+#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift
+NPY_GCC_OPT_3
+#endif
+void
+@TYPE@_right_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps,
+ void *NPY_UNUSED(func))
+{
+ BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2));
+}
+
+
/**begin repeat2
* #kind = equal, not_equal, greater, greater_equal, less, less_equal,
* logical_and, logical_or#
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index a7987acda..df440e095 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -246,25 +246,26 @@ static void
/**end repeat**/
-
-/* QUESTION: Should we check for overflow / underflow in (l,r)shift? */
-
/**begin repeat
* #name = byte, ubyte, short, ushort, int, uint,
* long, ulong, longlong, ulonglong#
* #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong#
+ * #suffix = hh,uhh,h,uh,,u,l,ul,ll,ull#
*/
/**begin repeat1
- * #oper = and, xor, or, lshift, rshift#
- * #op = &, ^, |, <<, >>#
+ * #oper = and, xor, or#
+ * #op = &, ^, |#
*/
#define @name@_ctype_@oper@(arg1, arg2, out) *(out) = (arg1) @op@ (arg2)
/**end repeat1**/
+#define @name@_ctype_lshift(arg1, arg2, out) *(out) = npy_lshift@suffix@(arg1, arg2)
+#define @name@_ctype_rshift(arg1, arg2, out) *(out) = npy_rshift@suffix@(arg1, arg2)
+
/**end repeat**/
/**begin repeat
@@ -570,7 +571,7 @@ static void
* 1) Convert the types to the common type if both are scalars (0 return)
* 2) If both are not scalars use ufunc machinery (-2 return)
* 3) If both are scalars but cannot be cast to the right type
- * return NotImplmented (-1 return)
+ * return NotImplemented (-1 return)
*
* 4) Perform the function on the C-type.
* 5) If an error condition occurred, check to see
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index ebba457e3..854df5590 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -664,3 +664,31 @@ class TestAbs(object):
def test_numpy_abs(self):
self._test_abs_func(np.abs)
+
+
+class TestBitShifts(object):
+
+ @pytest.mark.parametrize('type_code', np.typecodes['AllInteger'])
+ @pytest.mark.parametrize('op',
+ [operator.rshift, operator.lshift], ids=['>>', '<<'])
+ def test_shift_all_bits(self, type_code, op):
+ """ Shifts where the shift amount is the width of the type or wider """
+ # gh-2449
+ dt = np.dtype(type_code)
+ nbits = dt.itemsize * 8
+ for val in [5, -5]:
+ for shift in [nbits, nbits + 4]:
+ val_scl = dt.type(val)
+ shift_scl = dt.type(shift)
+ res_scl = op(val_scl, shift_scl)
+ if val_scl < 0 and op is operator.rshift:
+ # sign bit is preserved
+ assert_equal(res_scl, -1)
+ else:
+ assert_equal(res_scl, 0)
+
+ # Result on scalars should be the same as on arrays
+ val_arr = np.array([val]*32, dtype=dt)
+ shift_arr = np.array([shift]*32, dtype=dt)
+ res_arr = op(val_arr, shift_arr)
+ assert_equal(res_arr, res_scl)