diff options
| -rw-r--r-- | numpy/__init__.py | 9 | ||||
| -rw-r--r-- | numpy/core/setup.py | 36 | ||||
| -rw-r--r-- | numpy/core/setup_common.py | 53 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 96 | ||||
| -rw-r--r-- | numpy/core/src/umath/loops.c.src | 12 | ||||
| -rw-r--r-- | numpy/core/tests/test_nditer.py | 29 |
6 files changed, 132 insertions, 103 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py index 3260046d6..22c90677e 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -85,10 +85,11 @@ __version__ Viewing documentation using IPython ----------------------------------- -Start IPython with the NumPy profile (``ipython -p numpy``), which will -import `numpy` under the alias ``np``. Then, use the ``cpaste`` command to -paste examples into the shell. To see which functions are available in -`numpy`, type ``np.<TAB>`` (where ``<TAB>`` refers to the TAB key), or use + +Start IPython and import `numpy` usually under the alias ``np``: `import +numpy as np`. Then, directly past or use the ``%cpaste`` magic to paste +examples into the shell. To see which functions are available in `numpy`, +type ``np.<TAB>`` (where ``<TAB>`` refers to the TAB key), or use ``np.*cos*?<ENTER>`` (where ``<ENTER>`` refers to the ENTER key) to narrow down the list. To view the docstring for a function, use ``np.cos?<ENTER>`` (to view the docstring) and ``np.cos??<ENTER>`` (to view diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 17dc8438e..10b8c093e 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -177,6 +177,16 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): else: return 1 + # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 + # support on Windows-based platforms + def check_gh14787(fn): + if fn == 'attribute_target_avx512f': + if (sys.platform in ('win32', 'cygwin') and + config.check_compiler_gcc() and + not config.check_gcc_version_at_least(8, 4)): + ext.extra_compile_args.extend( + ['-ffixed-xmm%s' % n for n in range(16, 32)]) + #use_msvc = config.check_decl("_MSC_VER") if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False): raise SystemError("One of the required function to build numpy is not" @@ -227,19 +237,19 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES: if config.check_gcc_function_attribute(dec, fn): moredefs.append((fname2def(fn), 1)) - if fn == 'attribute_target_avx512f': - # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 - # support on Windows-based platforms - if (sys.platform in ('win32', 'cygwin') and - config.check_compiler_gcc() and - not config.check_gcc_version_at_least(8, 4)): - ext.extra_compile_args.extend( - ['-ffixed-xmm%s' % n for n in range(16, 32)]) - - for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS: - if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, - header): - moredefs.append((fname2def(fn), 1)) + check_gh14787(fn) + + platform = sysconfig.get_platform() + if ("x86_64" in platform): + for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX: + if config.check_gcc_function_attribute(dec, fn): + moredefs.append((fname2def(fn), 1)) + check_gh14787(fn) + for dec, fn, code, header in ( + OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX): + if config.check_gcc_function_attribute_with_intrinsics( + dec, fn, code, header): + moredefs.append((fname2def(fn), 1)) for fn in OPTIONAL_VARIABLE_ATTRIBUTES: if config.check_gcc_variable_attribute(fn): diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index a8497fe75..55daa8648 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -209,16 +209,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', 'attribute_optimize_opt_2'), ('__attribute__((nonnull (1)))', 'attribute_nonnull'), - ('__attribute__((target ("avx")))', - 'attribute_target_avx'), - ('__attribute__((target ("avx2")))', - 'attribute_target_avx2'), - ('__attribute__((target ("avx512f")))', - 'attribute_target_avx512f'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx'), ] +OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', + 'attribute_target_avx'), + ('__attribute__((target ("avx2")))', + 'attribute_target_avx2'), + ('__attribute__((target ("avx512f")))', + 'attribute_target_avx512f'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx'), + ] + # function attributes with intrinsics # To ensure your compiler can compile avx intrinsics with just the attributes # gcc 4.8.4 support attributes but not with intrisics @@ -227,23 +229,24 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', # The _mm512_castps_si512 instruction is specific check for AVX-512F support # in gcc-4.9 which is missing a subset of intrinsics. See # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', - 'attribute_target_avx2_with_intrinsics', - '__m256 temp = _mm256_set1_ps(1.0); temp = \ - _mm256_fmadd_ps(temp, temp, temp)', - 'immintrin.h'), - ('__attribute__((target("avx512f")))', - 'attribute_target_avx512f_with_intrinsics', - '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', - 'immintrin.h'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx_with_intrinsics', - '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ - __m512i unused_temp = \ - _mm512_castps_si512(_mm512_set1_ps(1.0));\ - _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', - 'immintrin.h'), - ] +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [ + ('__attribute__((target("avx2,fma")))', + 'attribute_target_avx2_with_intrinsics', + '__m256 temp = _mm256_set1_ps(1.0); temp = \ + _mm256_fmadd_ps(temp, temp, temp)', + 'immintrin.h'), + ('__attribute__((target("avx512f")))', + 'attribute_target_avx512f_with_intrinsics', + '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', + 'immintrin.h'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx_with_intrinsics', + '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ + __m512i unused_temp = \ + _mm512_castps_si512(_mm512_set1_ps(1.0));\ + _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', + 'immintrin.h'), + ] def fname2def(name): return "HAVE_%s" % name.upper() diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index b6acce570..248397196 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -1410,9 +1410,9 @@ check_mask_for_writemasked_reduction(NpyIter *iter, int iop) static int npyiter_check_reduce_ok_and_set_flags( NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags, - int dim) { + int iop, int maskop, int dim) { /* If it's writeable, this means a reduction */ - if (*op_itflags & NPY_OP_ITFLAG_WRITE) { + if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) { if (!(flags & NPY_ITER_REDUCE_OK)) { PyErr_Format(PyExc_ValueError, "output operand requires a reduction along dimension %d, " @@ -1420,17 +1420,35 @@ npyiter_check_reduce_ok_and_set_flags( "does not match the expected output shape.", dim); return 0; } - if (!(*op_itflags & NPY_OP_ITFLAG_READ)) { + if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) { PyErr_SetString(PyExc_ValueError, "output operand requires a reduction, but is flagged as " "write-only, not read-write"); return 0; } + /* + * The ARRAYMASK can't be a reduction, because + * it would be possible to write back to the + * array once when the ARRAYMASK says 'True', + * then have the reduction on the ARRAYMASK + * later flip to 'False', indicating that the + * write back should never have been done, + * and violating the strict masking semantics + */ + if (iop == maskop) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a " + "reduction, but is flagged as " + "the ARRAYMASK operand which " + "is not permitted to be the " + "result of a reduction"); + return 0; + } NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is" "occurring\n"); NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - *op_itflags |= NPY_OP_ITFLAG_REDUCE; + op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; } return 1; } @@ -1613,42 +1631,9 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf goto operand_different_than_broadcast; } /* If it's writeable, this means a reduction */ - if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) { - if (!(flags & NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a " - "reduction, but reduction is " - "not enabled"); - return 0; - } - if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a " - "reduction, but is flagged as " - "write-only, not read-write"); - return 0; - } - /* - * The ARRAYMASK can't be a reduction, because - * it would be possible to write back to the - * array once when the ARRAYMASK says 'True', - * then have the reduction on the ARRAYMASK - * later flip to 'False', indicating that the - * write back should never have been done, - * and violating the strict masking semantics - */ - if (iop == maskop) { - PyErr_SetString(PyExc_ValueError, - "output operand requires a " - "reduction, but is flagged as " - "the ARRAYMASK operand which " - "is not permitted to be the " - "result of a reduction"); - return 0; - } - - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE; + if (!npyiter_check_reduce_ok_and_set_flags( + iter, flags, op_itflags, iop, maskop, idim)) { + return 0; } } else { @@ -1697,7 +1682,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf goto operand_different_than_broadcast; } if (!npyiter_check_reduce_ok_and_set_flags( - iter, flags, &op_itflags[iop], i)) { + iter, flags, op_itflags, iop, maskop, i)) { return 0; } } @@ -1707,8 +1692,14 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf } else { strides[iop] = 0; + /* + * If deleting this axis produces a reduction, but + * reduction wasn't enabled, throw an error. + * NOTE: We currently always allow new-axis if the iteration + * size is 1 (thus allowing broadcasting sometimes). + */ if (!npyiter_check_reduce_ok_and_set_flags( - iter, flags, &op_itflags[iop], i)) { + iter, flags, op_itflags, iop, maskop, i)) { return 0; } } @@ -2545,6 +2536,11 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL); i = npyiter_get_op_axis(op_axes[i], &reduction_axis); + /* + * If i < 0, this is a new axis (the operand does not have it) + * so we can ignore it here. The iterator setup will have + * ensured already that a potential reduction/broadcast is valid. + */ if (i >= 0) { NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d " "for iterator dimension %d to %d\n", (int)i, @@ -2575,22 +2571,6 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, stride *= shape[i]; } } - else { - if (shape == NULL) { - /* - * If deleting this axis produces a reduction, but - * reduction wasn't enabled, throw an error. - * NOTE: We currently always allow new-axis if the iteration - * size is 1 (thus allowing broadcasting sometimes). - */ - if (!reduction_axis && NAD_SHAPE(axisdata) != 1) { - if (!npyiter_check_reduce_ok_and_set_flags( - iter, flags, op_itflags, i)) { - return NULL; - } - } - } - } } } else { diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index e5104db81..fe5aa9374 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -571,7 +571,6 @@ NPY_NO_EXPORT void /**begin repeat1 * #isa = , _avx2# - * #ISA = , AVX2# * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)# * #ATTR = , NPY_GCC_TARGET_AVX2# */ @@ -658,6 +657,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void #define INT_left_shift_needs_clear_floatstatus #define UINT_left_shift_needs_clear_floatstatus +#if @CHK@ NPY_NO_EXPORT NPY_GCC_OPT_3 void @TYPE@_left_shift@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) @@ -670,10 +670,12 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void npy_clear_floatstatus_barrier((char*)dimensions); #endif } +#endif #undef INT_left_shift_needs_clear_floatstatus #undef UINT_left_shift_needs_clear_floatstatus +#if @CHK@ NPY_NO_EXPORT #ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift NPY_GCC_OPT_3 @@ -684,7 +686,7 @@ void { BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2)); } - +#endif /**begin repeat2 * #kind = logical_and, logical_or# @@ -1448,7 +1450,10 @@ NPY_NO_EXPORT void /**begin repeat2 * #ISA = , _avx512_skx# * #isa = simd, avx512_skx# + * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX512_SKX)# **/ + +#if @CHK@ NPY_NO_EXPORT void @TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -1460,6 +1465,7 @@ NPY_NO_EXPORT void } npy_clear_floatstatus_barrier((char*)dimensions); } +#endif /**end repeat2**/ /**end repeat1**/ @@ -2289,7 +2295,7 @@ NPY_NO_EXPORT void } } -#if @SIMD@ +#if @SIMD@ && defined(HAVE_ATTRIBUTE_TARGET_AVX512F) /**begin repeat1 * arithmetic * #kind = conjugate, square, absolute# diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index b43bc50e9..08f44568c 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -2728,6 +2728,7 @@ def test_iter_writemasked_badinput(): op_dtypes=['f4', None], casting='same_kind') + def _is_buffered(iterator): try: iterator.itviews @@ -2803,6 +2804,34 @@ def test_iter_writemasked(a): # were copied back assert_equal(a, np.broadcast_to([3, 3, 2.5] * reps, shape)) + +@pytest.mark.parametrize(["mask", "mask_axes"], [ + # Allocated operand (only broadcasts with -1) + (None, [-1, 0]), + # Reduction along the first dimension (with and without op_axes) + (np.zeros((1, 4), dtype="bool"), [0, 1]), + (np.zeros((1, 4), dtype="bool"), None), + # Test 0-D and -1 op_axes + (np.zeros(4, dtype="bool"), [-1, 0]), + (np.zeros((), dtype="bool"), [-1, -1]), + (np.zeros((), dtype="bool"), None)]) +def test_iter_writemasked_broadcast_error(mask, mask_axes): + # This assumes that a readwrite mask makes sense. This is likely not the + # case and should simply be deprecated. + arr = np.zeros((3, 4)) + itflags = ["reduce_ok"] + mask_flags = ["arraymask", "readwrite", "allocate"] + a_flags = ["writeonly", "writemasked"] + if mask_axes is None: + op_axes = None + else: + op_axes = [mask_axes, [0, 1]] + + with assert_raises(ValueError): + np.nditer((mask, arr), flags=itflags, op_flags=[mask_flags, a_flags], + op_axes=op_axes) + + def test_iter_writemasked_decref(): # force casting (to make it interesting) by using a structured dtype. arr = np.arange(10000).astype(">i,O") |
