summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/__init__.py9
-rw-r--r--numpy/core/setup.py36
-rw-r--r--numpy/core/setup_common.py53
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c96
-rw-r--r--numpy/core/src/umath/loops.c.src12
-rw-r--r--numpy/core/tests/test_nditer.py29
6 files changed, 132 insertions, 103 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py
index 3260046d6..22c90677e 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -85,10 +85,11 @@ __version__
Viewing documentation using IPython
-----------------------------------
-Start IPython with the NumPy profile (``ipython -p numpy``), which will
-import `numpy` under the alias ``np``. Then, use the ``cpaste`` command to
-paste examples into the shell. To see which functions are available in
-`numpy`, type ``np.<TAB>`` (where ``<TAB>`` refers to the TAB key), or use
+
+Start IPython and import `numpy` usually under the alias ``np``: `import
+numpy as np`. Then, directly past or use the ``%cpaste`` magic to paste
+examples into the shell. To see which functions are available in `numpy`,
+type ``np.<TAB>`` (where ``<TAB>`` refers to the TAB key), or use
``np.*cos*?<ENTER>`` (where ``<ENTER>`` refers to the ENTER key) to narrow
down the list. To view the docstring for a function, use
``np.cos?<ENTER>`` (to view the docstring) and ``np.cos??<ENTER>`` (to view
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 17dc8438e..10b8c093e 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -177,6 +177,16 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
else:
return 1
+ # GH-14787: Work around GCC<8.4 bug when compiling with AVX512
+ # support on Windows-based platforms
+ def check_gh14787(fn):
+ if fn == 'attribute_target_avx512f':
+ if (sys.platform in ('win32', 'cygwin') and
+ config.check_compiler_gcc() and
+ not config.check_gcc_version_at_least(8, 4)):
+ ext.extra_compile_args.extend(
+ ['-ffixed-xmm%s' % n for n in range(16, 32)])
+
#use_msvc = config.check_decl("_MSC_VER")
if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False):
raise SystemError("One of the required function to build numpy is not"
@@ -227,19 +237,19 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES:
if config.check_gcc_function_attribute(dec, fn):
moredefs.append((fname2def(fn), 1))
- if fn == 'attribute_target_avx512f':
- # GH-14787: Work around GCC<8.4 bug when compiling with AVX512
- # support on Windows-based platforms
- if (sys.platform in ('win32', 'cygwin') and
- config.check_compiler_gcc() and
- not config.check_gcc_version_at_least(8, 4)):
- ext.extra_compile_args.extend(
- ['-ffixed-xmm%s' % n for n in range(16, 32)])
-
- for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS:
- if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code,
- header):
- moredefs.append((fname2def(fn), 1))
+ check_gh14787(fn)
+
+ platform = sysconfig.get_platform()
+ if ("x86_64" in platform):
+ for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX:
+ if config.check_gcc_function_attribute(dec, fn):
+ moredefs.append((fname2def(fn), 1))
+ check_gh14787(fn)
+ for dec, fn, code, header in (
+ OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX):
+ if config.check_gcc_function_attribute_with_intrinsics(
+ dec, fn, code, header):
+ moredefs.append((fname2def(fn), 1))
for fn in OPTIONAL_VARIABLE_ATTRIBUTES:
if config.check_gcc_variable_attribute(fn):
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index a8497fe75..55daa8648 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -209,16 +209,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
'attribute_optimize_opt_2'),
('__attribute__((nonnull (1)))',
'attribute_nonnull'),
- ('__attribute__((target ("avx")))',
- 'attribute_target_avx'),
- ('__attribute__((target ("avx2")))',
- 'attribute_target_avx2'),
- ('__attribute__((target ("avx512f")))',
- 'attribute_target_avx512f'),
- ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
- 'attribute_target_avx512_skx'),
]
+OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))',
+ 'attribute_target_avx'),
+ ('__attribute__((target ("avx2")))',
+ 'attribute_target_avx2'),
+ ('__attribute__((target ("avx512f")))',
+ 'attribute_target_avx512f'),
+ ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
+ 'attribute_target_avx512_skx'),
+ ]
+
# function attributes with intrinsics
# To ensure your compiler can compile avx intrinsics with just the attributes
# gcc 4.8.4 support attributes but not with intrisics
@@ -227,23 +229,24 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
# The _mm512_castps_si512 instruction is specific check for AVX-512F support
# in gcc-4.9 which is missing a subset of intrinsics. See
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
-OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))',
- 'attribute_target_avx2_with_intrinsics',
- '__m256 temp = _mm256_set1_ps(1.0); temp = \
- _mm256_fmadd_ps(temp, temp, temp)',
- 'immintrin.h'),
- ('__attribute__((target("avx512f")))',
- 'attribute_target_avx512f_with_intrinsics',
- '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
- 'immintrin.h'),
- ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
- 'attribute_target_avx512_skx_with_intrinsics',
- '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
- __m512i unused_temp = \
- _mm512_castps_si512(_mm512_set1_ps(1.0));\
- _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
- 'immintrin.h'),
- ]
+OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [
+ ('__attribute__((target("avx2,fma")))',
+ 'attribute_target_avx2_with_intrinsics',
+ '__m256 temp = _mm256_set1_ps(1.0); temp = \
+ _mm256_fmadd_ps(temp, temp, temp)',
+ 'immintrin.h'),
+ ('__attribute__((target("avx512f")))',
+ 'attribute_target_avx512f_with_intrinsics',
+ '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
+ 'immintrin.h'),
+ ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
+ 'attribute_target_avx512_skx_with_intrinsics',
+ '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
+ __m512i unused_temp = \
+ _mm512_castps_si512(_mm512_set1_ps(1.0));\
+ _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
+ 'immintrin.h'),
+ ]
def fname2def(name):
return "HAVE_%s" % name.upper()
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index b6acce570..248397196 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -1410,9 +1410,9 @@ check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
static int
npyiter_check_reduce_ok_and_set_flags(
NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
- int dim) {
+ int iop, int maskop, int dim) {
/* If it's writeable, this means a reduction */
- if (*op_itflags & NPY_OP_ITFLAG_WRITE) {
+ if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
if (!(flags & NPY_ITER_REDUCE_OK)) {
PyErr_Format(PyExc_ValueError,
"output operand requires a reduction along dimension %d, "
@@ -1420,17 +1420,35 @@ npyiter_check_reduce_ok_and_set_flags(
"does not match the expected output shape.", dim);
return 0;
}
- if (!(*op_itflags & NPY_OP_ITFLAG_READ)) {
+ if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
PyErr_SetString(PyExc_ValueError,
"output operand requires a reduction, but is flagged as "
"write-only, not read-write");
return 0;
}
+ /*
+ * The ARRAYMASK can't be a reduction, because
+ * it would be possible to write back to the
+ * array once when the ARRAYMASK says 'True',
+ * then have the reduction on the ARRAYMASK
+ * later flip to 'False', indicating that the
+ * write back should never have been done,
+ * and violating the strict masking semantics
+ */
+ if (iop == maskop) {
+ PyErr_SetString(PyExc_ValueError,
+ "output operand requires a "
+ "reduction, but is flagged as "
+ "the ARRAYMASK operand which "
+ "is not permitted to be the "
+ "result of a reduction");
+ return 0;
+ }
NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is"
"occurring\n");
NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
- *op_itflags |= NPY_OP_ITFLAG_REDUCE;
+ op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
}
return 1;
}
@@ -1613,42 +1631,9 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
goto operand_different_than_broadcast;
}
/* If it's writeable, this means a reduction */
- if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
- if (!(flags & NPY_ITER_REDUCE_OK)) {
- PyErr_SetString(PyExc_ValueError,
- "output operand requires a "
- "reduction, but reduction is "
- "not enabled");
- return 0;
- }
- if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
- PyErr_SetString(PyExc_ValueError,
- "output operand requires a "
- "reduction, but is flagged as "
- "write-only, not read-write");
- return 0;
- }
- /*
- * The ARRAYMASK can't be a reduction, because
- * it would be possible to write back to the
- * array once when the ARRAYMASK says 'True',
- * then have the reduction on the ARRAYMASK
- * later flip to 'False', indicating that the
- * write back should never have been done,
- * and violating the strict masking semantics
- */
- if (iop == maskop) {
- PyErr_SetString(PyExc_ValueError,
- "output operand requires a "
- "reduction, but is flagged as "
- "the ARRAYMASK operand which "
- "is not permitted to be the "
- "result of a reduction");
- return 0;
- }
-
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
- op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
+ if (!npyiter_check_reduce_ok_and_set_flags(
+ iter, flags, op_itflags, iop, maskop, idim)) {
+ return 0;
}
}
else {
@@ -1697,7 +1682,7 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
goto operand_different_than_broadcast;
}
if (!npyiter_check_reduce_ok_and_set_flags(
- iter, flags, &op_itflags[iop], i)) {
+ iter, flags, op_itflags, iop, maskop, i)) {
return 0;
}
}
@@ -1707,8 +1692,14 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf
}
else {
strides[iop] = 0;
+ /*
+ * If deleting this axis produces a reduction, but
+ * reduction wasn't enabled, throw an error.
+ * NOTE: We currently always allow new-axis if the iteration
+ * size is 1 (thus allowing broadcasting sometimes).
+ */
if (!npyiter_check_reduce_ok_and_set_flags(
- iter, flags, &op_itflags[iop], i)) {
+ iter, flags, op_itflags, iop, maskop, i)) {
return 0;
}
}
@@ -2545,6 +2536,11 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
i = npyiter_get_op_axis(op_axes[i], &reduction_axis);
+ /*
+ * If i < 0, this is a new axis (the operand does not have it)
+ * so we can ignore it here. The iterator setup will have
+ * ensured already that a potential reduction/broadcast is valid.
+ */
if (i >= 0) {
NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
"for iterator dimension %d to %d\n", (int)i,
@@ -2575,22 +2571,6 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
stride *= shape[i];
}
}
- else {
- if (shape == NULL) {
- /*
- * If deleting this axis produces a reduction, but
- * reduction wasn't enabled, throw an error.
- * NOTE: We currently always allow new-axis if the iteration
- * size is 1 (thus allowing broadcasting sometimes).
- */
- if (!reduction_axis && NAD_SHAPE(axisdata) != 1) {
- if (!npyiter_check_reduce_ok_and_set_flags(
- iter, flags, op_itflags, i)) {
- return NULL;
- }
- }
- }
- }
}
}
else {
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index e5104db81..fe5aa9374 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -571,7 +571,6 @@ NPY_NO_EXPORT void
/**begin repeat1
* #isa = , _avx2#
- * #ISA = , AVX2#
* #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)#
* #ATTR = , NPY_GCC_TARGET_AVX2#
*/
@@ -658,6 +657,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
#define INT_left_shift_needs_clear_floatstatus
#define UINT_left_shift_needs_clear_floatstatus
+#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 void
@TYPE@_left_shift@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
@@ -670,10 +670,12 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void
npy_clear_floatstatus_barrier((char*)dimensions);
#endif
}
+#endif
#undef INT_left_shift_needs_clear_floatstatus
#undef UINT_left_shift_needs_clear_floatstatus
+#if @CHK@
NPY_NO_EXPORT
#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift
NPY_GCC_OPT_3
@@ -684,7 +686,7 @@ void
{
BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2));
}
-
+#endif
/**begin repeat2
* #kind = logical_and, logical_or#
@@ -1448,7 +1450,10 @@ NPY_NO_EXPORT void
/**begin repeat2
* #ISA = , _avx512_skx#
* #isa = simd, avx512_skx#
+ * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX512_SKX)#
**/
+
+#if @CHK@
NPY_NO_EXPORT void
@TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
@@ -1460,6 +1465,7 @@ NPY_NO_EXPORT void
}
npy_clear_floatstatus_barrier((char*)dimensions);
}
+#endif
/**end repeat2**/
/**end repeat1**/
@@ -2289,7 +2295,7 @@ NPY_NO_EXPORT void
}
}
-#if @SIMD@
+#if @SIMD@ && defined(HAVE_ATTRIBUTE_TARGET_AVX512F)
/**begin repeat1
* arithmetic
* #kind = conjugate, square, absolute#
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index b43bc50e9..08f44568c 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -2728,6 +2728,7 @@ def test_iter_writemasked_badinput():
op_dtypes=['f4', None],
casting='same_kind')
+
def _is_buffered(iterator):
try:
iterator.itviews
@@ -2803,6 +2804,34 @@ def test_iter_writemasked(a):
# were copied back
assert_equal(a, np.broadcast_to([3, 3, 2.5] * reps, shape))
+
+@pytest.mark.parametrize(["mask", "mask_axes"], [
+ # Allocated operand (only broadcasts with -1)
+ (None, [-1, 0]),
+ # Reduction along the first dimension (with and without op_axes)
+ (np.zeros((1, 4), dtype="bool"), [0, 1]),
+ (np.zeros((1, 4), dtype="bool"), None),
+ # Test 0-D and -1 op_axes
+ (np.zeros(4, dtype="bool"), [-1, 0]),
+ (np.zeros((), dtype="bool"), [-1, -1]),
+ (np.zeros((), dtype="bool"), None)])
+def test_iter_writemasked_broadcast_error(mask, mask_axes):
+ # This assumes that a readwrite mask makes sense. This is likely not the
+ # case and should simply be deprecated.
+ arr = np.zeros((3, 4))
+ itflags = ["reduce_ok"]
+ mask_flags = ["arraymask", "readwrite", "allocate"]
+ a_flags = ["writeonly", "writemasked"]
+ if mask_axes is None:
+ op_axes = None
+ else:
+ op_axes = [mask_axes, [0, 1]]
+
+ with assert_raises(ValueError):
+ np.nditer((mask, arr), flags=itflags, op_flags=[mask_flags, a_flags],
+ op_axes=op_axes)
+
+
def test_iter_writemasked_decref():
# force casting (to make it interesting) by using a structured dtype.
arr = np.arange(10000).astype(">i,O")