diff options
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/include/numpy/npy_math.h | 22 | ||||
-rw-r--r-- | numpy/core/setup.py | 6 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 39 | ||||
-rw-r--r-- | numpy/core/src/npymath/npy_math_internal.h.src | 41 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 49 | ||||
-rw-r--r-- | numpy/core/src/umath/scalarmath.c.src | 13 | ||||
-rw-r--r-- | numpy/core/tests/test_scalarmath.py | 28 |
7 files changed, 188 insertions, 10 deletions
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index 126b861bf..69e690f28 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -177,6 +177,28 @@ NPY_INPLACE npy_long npy_lcml(npy_long a, npy_long b); NPY_INPLACE npy_longlong npy_gcdll(npy_longlong a, npy_longlong b); NPY_INPLACE npy_longlong npy_lcmll(npy_longlong a, npy_longlong b); +NPY_INPLACE npy_ubyte npy_rshiftuhh(npy_ubyte a, npy_ubyte b); +NPY_INPLACE npy_ubyte npy_lshiftuhh(npy_ubyte a, npy_ubyte b); +NPY_INPLACE npy_ushort npy_rshiftuh(npy_ushort a, npy_ushort b); +NPY_INPLACE npy_ushort npy_lshiftuh(npy_ushort a, npy_ushort b); +NPY_INPLACE npy_uint npy_rshiftu(npy_uint a, npy_uint b); +NPY_INPLACE npy_uint npy_lshiftu(npy_uint a, npy_uint b); +NPY_INPLACE npy_ulong npy_rshiftul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulong npy_lshiftul(npy_ulong a, npy_ulong b); +NPY_INPLACE npy_ulonglong npy_rshiftull(npy_ulonglong a, npy_ulonglong b); +NPY_INPLACE npy_ulonglong npy_lshiftull(npy_ulonglong a, npy_ulonglong b); + +NPY_INPLACE npy_byte npy_rshifthh(npy_byte a, npy_byte b); +NPY_INPLACE npy_byte npy_lshifthh(npy_byte a, npy_byte b); +NPY_INPLACE npy_short npy_rshifth(npy_short a, npy_short b); +NPY_INPLACE npy_short npy_lshifth(npy_short a, npy_short b); +NPY_INPLACE npy_int npy_rshift(npy_int a, npy_int b); +NPY_INPLACE npy_int npy_lshift(npy_int a, npy_int b); +NPY_INPLACE npy_long npy_rshiftl(npy_long a, npy_long b); +NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b); +NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b); +NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b); + /* * avx function has a common API for both sin & cos. This enum is used to * distinguish between the two diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 5ac7752cc..63b515b18 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -463,6 +463,12 @@ def configuration(parent_package='',top_path=None): rep = check_long_double_representation(config_cmd) moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) + if check_for_right_shift_internal_compiler_error(config_cmd): + moredefs.append('NPY_DO_NOT_OPTIMIZE_LONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_LONGLONG_right_shift') + moredefs.append('NPY_DO_NOT_OPTIMIZE_ULONGLONG_right_shift') + # Py3K check if sys.version_info[0] >= 3: moredefs.append(('NPY_PY3K', 1)) diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index a3f7acd6d..84b78b585 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -5,6 +5,7 @@ import sys import warnings import copy import binascii +import textwrap from numpy.distutils.misc_util import mingw32 @@ -415,3 +416,41 @@ def long_double_representation(lines): else: # We never detected the after_sequence raise ValueError("Could not lock sequences (%s)" % saw) + + +def check_for_right_shift_internal_compiler_error(cmd): + """ + On our arm CI, this fails with an internal compilation error + + The failure looks like the following, and can be reproduced on ARM64 GCC 5.4: + + <source>: In function 'right_shift': + <source>:4:20: internal compiler error: in expand_shift_1, at expmed.c:2349 + ip1[i] = ip1[i] >> in2; + ^ + Please submit a full bug report, + with preprocessed source if appropriate. + See <http://gcc.gnu.org/bugs.html> for instructions. + Compiler returned: 1 + + This function returns True if this compiler bug is present, and we need to + turn off optimization for the function + """ + cmd._check_compiler() + has_optimize = cmd.try_compile(textwrap.dedent("""\ + __attribute__((optimize("O3"))) void right_shift() {} + """), None, None) + if not has_optimize: + return False + + no_err = cmd.try_compile(textwrap.dedent("""\ + typedef long the_type; /* fails also for unsigned and long long */ + __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) { + for (int i = 0; i < n; i++) { + if (in2 < (the_type)sizeof(the_type) * 8) { + ip1[i] = ip1[i] >> in2; + } + } + } + """), None, None) + return not no_err diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src index fa820baac..18b6d1434 100644 --- a/numpy/core/src/npymath/npy_math_internal.h.src +++ b/numpy/core/src/npymath/npy_math_internal.h.src @@ -716,3 +716,44 @@ npy_@func@@c@(@type@ a, @type@ b) return npy_@func@u@c@(a < 0 ? -a : a, b < 0 ? -b : b); } /**end repeat**/ + +/* Unlike LCM and GCD, we need byte and short variants for the shift operators, + * since the result is dependent on the width of the type + */ +/**begin repeat + * + * #type = byte, short, int, long, longlong# + * #c = hh,h,,l,ll# + */ +/**begin repeat1 + * + * #u = u,# + * #is_signed = 0,1# + */ +NPY_INPLACE npy_@u@@type@ +npy_lshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) +{ + if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) { + return a << b; + } + else { + return 0; + } +} +NPY_INPLACE npy_@u@@type@ +npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) +{ + if (NPY_LIKELY((size_t)b < sizeof(a) * CHAR_BIT)) { + return a >> b; + } +#if @is_signed@ + else if (a < 0) { + return (npy_@u@@type@)-1; /* preserve the sign bit */ + } +#endif + else { + return 0; + } +} +/**end repeat1**/ +/**end repeat**/ diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 2028a5712..5443223ab 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -699,6 +699,7 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED * #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double, * npy_double, npy_double, npy_double, npy_double# * #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0# + * #c = hh,uhh,h,uh,,u,l,ul,ll,ull# */ #define @TYPE@_floor_divide @TYPE@_divide @@ -776,16 +777,15 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void /**begin repeat2 * Arithmetic - * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor, - * left_shift, right_shift# - * #OP = +, -,*, &, |, ^, <<, >># + * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor# + * #OP = +, -, *, &, |, ^# */ #if @CHK@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void @TYPE@_@kind@@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { - if(IS_BINARY_REDUCE) { + if (IS_BINARY_REDUCE) { BINARY_REDUCE_LOOP(@type@) { io1 @OP@= *(@type@ *)ip2; } @@ -799,6 +799,47 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void /**end repeat2**/ +/* + * Arithmetic bit shift operations. + * + * Intel hardware masks bit shift values, so large shifts wrap around + * and can produce surprising results. The special handling ensures that + * behavior is independent of compiler or hardware. + * TODO: We could implement consistent behavior for negative shifts, + * which is undefined in C. + */ + +#define INT_left_shift_needs_clear_floatstatus +#define UINT_left_shift_needs_clear_floatstatus + +NPY_NO_EXPORT NPY_GCC_OPT_3 void +@TYPE@_left_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2)); + +#ifdef @TYPE@_left_shift_needs_clear_floatstatus + // For some reason, our macOS CI sets an "invalid" flag here, but only + // for some types. + npy_clear_floatstatus_barrier((char*)dimensions); +#endif +} + +#undef INT_left_shift_needs_clear_floatstatus +#undef UINT_left_shift_needs_clear_floatstatus + +NPY_NO_EXPORT +#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift +NPY_GCC_OPT_3 +#endif +void +@TYPE@_right_shift@isa@(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2)); +} + + /**begin repeat2 * #kind = equal, not_equal, greater, greater_equal, less, less_equal, * logical_and, logical_or# diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index a7987acda..df440e095 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -246,25 +246,26 @@ static void /**end repeat**/ - -/* QUESTION: Should we check for overflow / underflow in (l,r)shift? */ - /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, * long, ulong, longlong, ulonglong# * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #suffix = hh,uhh,h,uh,,u,l,ul,ll,ull# */ /**begin repeat1 - * #oper = and, xor, or, lshift, rshift# - * #op = &, ^, |, <<, >># + * #oper = and, xor, or# + * #op = &, ^, |# */ #define @name@_ctype_@oper@(arg1, arg2, out) *(out) = (arg1) @op@ (arg2) /**end repeat1**/ +#define @name@_ctype_lshift(arg1, arg2, out) *(out) = npy_lshift@suffix@(arg1, arg2) +#define @name@_ctype_rshift(arg1, arg2, out) *(out) = npy_rshift@suffix@(arg1, arg2) + /**end repeat**/ /**begin repeat @@ -570,7 +571,7 @@ static void * 1) Convert the types to the common type if both are scalars (0 return) * 2) If both are not scalars use ufunc machinery (-2 return) * 3) If both are scalars but cannot be cast to the right type - * return NotImplmented (-1 return) + * return NotImplemented (-1 return) * * 4) Perform the function on the C-type. * 5) If an error condition occurred, check to see diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index ebba457e3..854df5590 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -664,3 +664,31 @@ class TestAbs(object): def test_numpy_abs(self): self._test_abs_func(np.abs) + + +class TestBitShifts(object): + + @pytest.mark.parametrize('type_code', np.typecodes['AllInteger']) + @pytest.mark.parametrize('op', + [operator.rshift, operator.lshift], ids=['>>', '<<']) + def test_shift_all_bits(self, type_code, op): + """ Shifts where the shift amount is the width of the type or wider """ + # gh-2449 + dt = np.dtype(type_code) + nbits = dt.itemsize * 8 + for val in [5, -5]: + for shift in [nbits, nbits + 4]: + val_scl = dt.type(val) + shift_scl = dt.type(shift) + res_scl = op(val_scl, shift_scl) + if val_scl < 0 and op is operator.rshift: + # sign bit is preserved + assert_equal(res_scl, -1) + else: + assert_equal(res_scl, 0) + + # Result on scalars should be the same as on arrays + val_arr = np.array([val]*32, dtype=dt) + shift_arr = np.array([shift]*32, dtype=dt) + res_arr = op(val_arr, shift_arr) + assert_equal(res_arr, res_scl) |