summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/upcoming_changes/15844.new_feature.rst4
-rw-r--r--doc/source/reference/simd/generated_tables/cpu_features.inc30
-rw-r--r--numpy/core/_add_newdocs.py4
-rw-r--r--numpy/core/include/numpy/npy_common.h6
-rw-r--r--numpy/core/src/_simd/_simd.dispatch.c.src4
-rw-r--r--numpy/core/src/common/npy_cpu_features.c.src20
-rw-r--r--numpy/core/src/common/npy_cpu_features.h6
-rw-r--r--numpy/core/src/common/simd/avx2/math.h12
-rw-r--r--numpy/core/src/common/simd/avx512/math.h12
-rw-r--r--numpy/core/src/common/simd/neon/math.h59
-rw-r--r--numpy/core/src/common/simd/sse/math.h55
-rw-r--r--numpy/core/src/common/simd/vsx/math.h12
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src6
-rw-r--r--numpy/core/tests/test_cpu_features.py4
-rw-r--r--numpy/core/tests/test_simd.py31
-rw-r--r--numpy/distutils/ccompiler_opt.py7
-rw-r--r--numpy/distutils/checks/cpu_vsx4.c14
-rw-r--r--numpy/distutils/checks/extra_vsx4_mma.c21
-rw-r--r--numpy/distutils/command/build.py4
-rw-r--r--numpy/distutils/tests/test_ccompiler_opt.py12
-rwxr-xr-xnumpy/f2py/crackfortran.py22
-rw-r--r--numpy/f2py/tests/src/crackfortran/accesstype.f9013
-rw-r--r--numpy/f2py/tests/test_crackfortran.py9
-rw-r--r--numpy/ma/core.py59
24 files changed, 354 insertions, 72 deletions
diff --git a/doc/release/upcoming_changes/15844.new_feature.rst b/doc/release/upcoming_changes/15844.new_feature.rst
new file mode 100644
index 000000000..f2746807b
--- /dev/null
+++ b/doc/release/upcoming_changes/15844.new_feature.rst
@@ -0,0 +1,4 @@
+f2py supports reading access type attributes from derived type statements
+-------------------------------------------------------------------------
+As a result, one does not need to use `public` or `private` statements to
+specify derived type access properties.
diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc
index 17d1b4951..7782172d2 100644
--- a/doc/source/reference/simd/generated_tables/cpu_features.inc
+++ b/doc/source/reference/simd/generated_tables/cpu_features.inc
@@ -36,26 +36,28 @@ On IBM/POWER big-endian
.. table::
:align: left
- ======== ================
- Name Implies
- ======== ================
- ``VSX``
- ``VSX2`` ``VSX``
- ``VSX3`` ``VSX`` ``VSX2``
- ======== ================
+ ======== =========================
+ Name Implies
+ ======== =========================
+ ``VSX``
+ ``VSX2`` ``VSX``
+ ``VSX3`` ``VSX`` ``VSX2``
+ ``VSX4`` ``VSX`` ``VSX2`` ``VSX3``
+ ======== =========================
On IBM/POWER little-endian
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table::
:align: left
- ======== ================
- Name Implies
- ======== ================
- ``VSX`` ``VSX2``
- ``VSX2`` ``VSX``
- ``VSX3`` ``VSX`` ``VSX2``
- ======== ================
+ ======== =========================
+ Name Implies
+ ======== =========================
+ ``VSX`` ``VSX2``
+ ``VSX2`` ``VSX``
+ ``VSX3`` ``VSX`` ``VSX2``
+ ``VSX4`` ``VSX`` ``VSX2`` ``VSX3``
+ ======== =========================
On ARMv7/A32
~~~~~~~~~~~~
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 7081f9a59..1bbacad45 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -2943,7 +2943,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('any',
add_newdoc('numpy.core.multiarray', 'ndarray', ('argmax',
"""
- a.argmax(axis=None, out=None)
+ a.argmax(axis=None, out=None, *, keepdims=False)
Return indices of the maximum values along the given axis.
@@ -2958,7 +2958,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('argmax',
add_newdoc('numpy.core.multiarray', 'ndarray', ('argmin',
"""
- a.argmin(axis=None, out=None)
+ a.argmin(axis=None, out=None, *, keepdims=False)
Return indices of the minimum values along the given axis.
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index 88794ca07..1d6234e20 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -180,12 +180,6 @@
defined(__MINGW32__) || defined(__MINGW64__)
#include <io.h>
-/* mingw based on 3.4.5 has lseek but not ftell/fseek */
-#if defined(__MINGW32__) || defined(__MINGW64__)
-extern int __cdecl _fseeki64(FILE *, long long, int);
-extern long long __cdecl _ftelli64(FILE *);
-#endif
-
#define npy_fseek _fseeki64
#define npy_ftell _ftelli64
#define npy_lseek _lseeki64
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index 84de9a059..fabec069c 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -381,7 +381,7 @@ SIMD_IMPL_INTRIN_1(sumup_@sfx@, @esfx@, v@sfx@)
***************************/
#if @fp_only@
/**begin repeat1
- * #intrin = sqrt, recip, abs, square, ceil, trunc#
+ * #intrin = sqrt, recip, abs, square, rint, ceil, trunc, floor#
*/
SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@)
/**end repeat1**/
@@ -615,7 +615,7 @@ SIMD_INTRIN_DEF(sumup_@sfx@)
***************************/
#if @fp_only@
/**begin repeat1
- * #intrin = sqrt, recip, abs, square, ceil, trunc#
+ * #intrin = sqrt, recip, abs, square, rint, ceil, trunc, floor#
*/
SIMD_INTRIN_DEF(@intrin@_@sfx@)
/**end repeat1**/
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index 1385220f9..ff4f9f60a 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -61,7 +61,7 @@ npy_cpu_features_dict(void)
* AVX512VPOPCNTDQ, AVX512VL, AVX512BW, AVX512DQ, AVX512VNNI,
* AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
* AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
- * VSX, VSX2, VSX3,
+ * VSX, VSX2, VSX3, VSX4,
* VX, VXE, VXE2,
* NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
*/
@@ -474,9 +474,15 @@ npy__cpu_init_features(void)
#ifndef AT_HWCAP2
#define AT_HWCAP2 26
#endif
+ #ifndef PPC_FEATURE2_ARCH_2_07
+ #define PPC_FEATURE2_ARCH_2_07 0x80000000
+ #endif
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+ #ifndef PPC_FEATURE2_ARCH_3_1
+ #define PPC_FEATURE2_ARCH_3_1 0x00040000
+ #endif
#endif
static void
@@ -489,15 +495,18 @@ npy__cpu_init_features(void)
return;
hwcap = getauxval(AT_HWCAP2);
- if (hwcap & PPC_FEATURE2_ARCH_3_00)
+ if (hwcap & PPC_FEATURE2_ARCH_3_1)
{
npy__cpu_have[NPY_CPU_FEATURE_VSX] =
npy__cpu_have[NPY_CPU_FEATURE_VSX2] =
- npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX3] =
+ npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1;
return;
}
- npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX3] = (hwcap & PPC_FEATURE2_ARCH_3_00) != 0;
+ npy__cpu_have[NPY_CPU_FEATURE_VSX4] = (hwcap & PPC_FEATURE2_ARCH_3_1) != 0;
// TODO: AIX, FreeBSD
#else
npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1;
@@ -507,6 +516,9 @@ npy__cpu_init_features(void)
#ifdef NPY_HAVE_VSX3
npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
#endif
+ #ifdef NPY_HAVE_VSX4
+ npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1;
+ #endif
#endif
}
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index 1f52a445d..3d5f2e75c 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -65,6 +65,8 @@ enum npy_cpu_features
NPY_CPU_FEATURE_VSX2 = 201,
// POWER9
NPY_CPU_FEATURE_VSX3 = 202,
+ // POWER10
+ NPY_CPU_FEATURE_VSX4 = 203,
// ARM
NPY_CPU_FEATURE_NEON = 300,
@@ -167,8 +169,8 @@ npy_cpu_baseline_list(void);
* On x64: ['SSSE3', 'SSE41', 'POPCNT', 'SSE42', 'AVX', 'F16C', 'FMA3', 'AVX2', 'AVX512F', ...]
* On armhf: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD', 'ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
* On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM']
- * On ppc64: ['VSX', 'VSX2', 'VSX3']
- * On ppc64le: ['VSX3']
+ * On ppc64: ['VSX', 'VSX2', 'VSX3', 'VSX4']
+ * On ppc64le: ['VSX3', 'VSX4']
* On s390x: ['VX', 'VXE', VXE2]
* On any other arch or if the optimization is disabled: []
*/
diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h
index ec15e50e1..deaf4ad11 100644
--- a/numpy/core/src/common/simd/avx2/math.h
+++ b/numpy/core/src/common/simd/avx2/math.h
@@ -42,7 +42,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_max_f64 _mm256_max_pd
// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
{
__m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q);
@@ -76,7 +76,7 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b)
#define npyv_min_f64 _mm256_min_pd
// Minimum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b)
{
__m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q);
@@ -105,6 +105,10 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return _mm256_blendv_epi8(a, b, _mm256_cmpgt_epi64(a, b));
}
+// round to nearest intger even
+#define npyv_rint_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_NEAREST_INT)
+#define npyv_rint_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_NEAREST_INT)
+
// ceil
#define npyv_ceil_f32 _mm256_ceil_ps
#define npyv_ceil_f64 _mm256_ceil_pd
@@ -113,4 +117,8 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
#define npyv_trunc_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_ZERO)
#define npyv_trunc_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_ZERO)
+// floor
+#define npyv_floor_f32 _mm256_floor_ps
+#define npyv_floor_f64 _mm256_floor_pd
+
#endif // _NPY_SIMD_AVX2_MATH_H
diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h
index f30e50ad0..5a6cb6dcd 100644
--- a/numpy/core/src/common/simd/avx512/math.h
+++ b/numpy/core/src/common/simd/avx512/math.h
@@ -51,7 +51,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_max_f64 _mm512_max_pd
// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
{
__mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q);
@@ -84,7 +84,7 @@ NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
#define npyv_min_f64 _mm512_min_pd
// Minimum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b)
{
__mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q);
@@ -112,6 +112,10 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b)
#define npyv_min_u64 _mm512_min_epu64
#define npyv_min_s64 _mm512_min_epi64
+// round to nearest integer even
+#define npyv_rint_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_NEAREST_INT)
+#define npyv_rint_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_NEAREST_INT)
+
// ceil
#define npyv_ceil_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_POS_INF)
#define npyv_ceil_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_POS_INF)
@@ -120,4 +124,8 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b)
#define npyv_trunc_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_ZERO)
#define npyv_trunc_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_ZERO)
+// floor
+#define npyv_floor_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_NEG_INF)
+#define npyv_floor_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_NEG_INF)
+
#endif // _NPY_SIMD_AVX512_MATH_H
diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h
index 19e5cd846..4607d6f27 100644
--- a/numpy/core/src/common/simd/neon/math.h
+++ b/numpy/core/src/common/simd/neon/math.h
@@ -153,6 +153,33 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return vbslq_s64(npyv_cmplt_s64(a, b), a, b);
}
+// round to nearest integer even
+NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a)
+{
+#ifdef NPY_HAVE_ASIMD
+ return vrndnq_f32(a);
+#else
+ // ARMv7 NEON only supports fp to int truncate conversion.
+ // a magic trick of adding 1.5 * 2**23 is used for rounding
+ // to nearest even and then substract this magic number to get
+ // the integer.
+ const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f));
+ const npyv_f32 magic = vdupq_n_f32(12582912.0f); // 1.5 * 2**23
+ npyv_f32 round = vsubq_f32(vaddq_f32(a, magic), magic);
+ npyv_b32 overflow = vcleq_f32(vabsq_f32(a), vreinterpretq_f32_u32(vdupq_n_u32(0x4b000000)));
+ round = vbslq_f32(overflow, round, a);
+ // signed zero
+ round = vreinterpretq_f32_s32(vorrq_s32(
+ vreinterpretq_s32_f32(round),
+ vandq_s32(vreinterpretq_s32_f32(a), szero)
+ ));
+ return round;
+#endif
+}
+#if NPY_SIMD_F64
+ #define npyv_rint_f64 vrndnq_f64
+#endif // NPY_SIMD_F64
+
// ceil
#ifdef NPY_HAVE_ASIMD
#define npyv_ceil_f32 vrndpq_f32
@@ -223,4 +250,36 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
#define npyv_trunc_f64 vrndq_f64
#endif // NPY_SIMD_F64
+// floor
+#ifdef NPY_HAVE_ASIMD
+ #define npyv_floor_f32 vrndmq_f32
+#else
+ NPY_FINLINE npyv_f32 npyv_floor_f32(npyv_f32 a)
+ {
+ const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f));
+ const npyv_u32 one = vreinterpretq_u32_f32(vdupq_n_f32(1.0f));
+ const npyv_s32 max_int = vdupq_n_s32(0x7fffffff);
+
+ npyv_s32 roundi = vcvtq_s32_f32(a);
+ npyv_f32 round = vcvtq_f32_s32(roundi);
+ npyv_f32 floor = vsubq_f32(round, vreinterpretq_f32_u32(
+ vandq_u32(vcgtq_f32(round, a), one)
+ ));
+ // respect signed zero
+ npyv_f32 rzero = vreinterpretq_f32_s32(vorrq_s32(
+ vreinterpretq_s32_f32(floor),
+ vandq_s32(vreinterpretq_s32_f32(a), szero)
+ ));
+ npyv_u32 nnan = npyv_notnan_f32(a);
+ npyv_u32 overflow = vorrq_u32(
+ vceqq_s32(roundi, szero), vceqq_s32(roundi, max_int)
+ );
+
+ return vbslq_f32(vbicq_u32(nnan, overflow), rzero, a);
+ }
+#endif // NPY_HAVE_ASIMD
+#if NPY_SIMD_F64
+ #define npyv_floor_f64 vrndmq_f64
+#endif // NPY_SIMD_F64
+
#endif // _NPY_SIMD_NEON_MATH_H
diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h
index 5daf7711e..e4b77b671 100644
--- a/numpy/core/src/common/simd/sse/math.h
+++ b/numpy/core/src/common/simd/sse/math.h
@@ -42,7 +42,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_max_f64 _mm_max_pd
// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
{
__m128 nn = _mm_cmpord_ps(b, b);
@@ -95,7 +95,7 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b)
#define npyv_min_f64 _mm_min_pd
// Minimum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b)
{
__m128 nn = _mm_cmpord_ps(b, b);
@@ -143,6 +143,38 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return npyv_select_s64(npyv_cmplt_s64(a, b), a, b);
}
+// round to nearest integer even
+NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a)
+{
+#ifdef NPY_HAVE_SSE41
+ return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT);
+#else
+ const npyv_f32 szero = _mm_set1_ps(-0.0f);
+ __m128i roundi = _mm_cvtps_epi32(a);
+ __m128i overflow = _mm_cmpeq_epi32(roundi, _mm_castps_si128(szero));
+ __m128 r = _mm_cvtepi32_ps(roundi);
+ // respect sign of zero
+ r = _mm_or_ps(r, _mm_and_ps(a, szero));
+ return npyv_select_f32(overflow, a, r);
+#endif
+}
+
+// round to nearest integer even
+NPY_FINLINE npyv_f64 npyv_rint_f64(npyv_f64 a)
+{
+#ifdef NPY_HAVE_SSE41
+ return _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT);
+#else
+ const npyv_f64 szero = _mm_set1_pd(-0.0);
+ const npyv_f64 two_power_52 = _mm_set1_pd(0x10000000000000);
+ npyv_f64 sign_two52 = _mm_or_pd(two_power_52, _mm_and_pd(a, szero));
+ // round by add magic number 2^52
+ npyv_f64 round = _mm_sub_pd(_mm_add_pd(a, sign_two52), sign_two52);
+ // respect signed zero, e.g. -0.5 -> -0.0
+ return _mm_or_pd(round, _mm_and_pd(a, szero));
+#endif
+}
+
// ceil
#ifdef NPY_HAVE_SSE41
#define npyv_ceil_f32 _mm_ceil_ps
@@ -202,4 +234,23 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
}
#endif
+// floor
+#ifdef NPY_HAVE_SSE41
+ #define npyv_floor_f32 _mm_floor_ps
+ #define npyv_floor_f64 _mm_floor_pd
+#else
+ NPY_FINLINE npyv_f32 npyv_floor_f32(npyv_f32 a)
+ {
+ const npyv_f32 one = _mm_set1_ps(1.0f);
+ npyv_f32 round = npyv_rint_f32(a);
+ return _mm_sub_ps(round, _mm_and_ps(_mm_cmpgt_ps(round, a), one));
+ }
+ NPY_FINLINE npyv_f64 npyv_floor_f64(npyv_f64 a)
+ {
+ const npyv_f64 one = _mm_set1_pd(1.0);
+ npyv_f64 round = npyv_rint_f64(a);
+ return _mm_sub_pd(round, _mm_and_pd(_mm_cmpgt_pd(round, a), one));
+ }
+#endif // NPY_HAVE_SSE41
+
#endif // _NPY_SIMD_SSE_MATH_H
diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h
index d138cae8a..444bc9e54 100644
--- a/numpy/core/src/common/simd/vsx/math.h
+++ b/numpy/core/src/common/simd/vsx/math.h
@@ -38,7 +38,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_max_f64 vec_max
// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
#define npyv_maxp_f32 vec_max
#define npyv_maxp_f64 vec_max
// Maximum, integer operations
@@ -56,7 +56,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_min_f64 vec_min
// Minimum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
#define npyv_minp_f32 vec_min
#define npyv_minp_f64 vec_min
// Minimum, integer operations
@@ -69,6 +69,10 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_min_u64 vec_min
#define npyv_min_s64 vec_min
+// round to nearest int even
+#define npyv_rint_f32 vec_rint
+#define npyv_rint_f64 vec_rint
+
// ceil
#define npyv_ceil_f32 vec_ceil
#define npyv_ceil_f64 vec_ceil
@@ -77,4 +81,8 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_trunc_f32 vec_trunc
#define npyv_trunc_f64 vec_trunc
+// floor
+#define npyv_floor_f32 vec_floor
+#define npyv_floor_f64 vec_floor
+
#endif // _NPY_SIMD_VSX_MATH_H
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 71808cc48..71401c60e 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -2849,7 +2849,7 @@ static int
#define LT(a,b) ((a) < (b) || ((b) != (b) && (a) ==(a)))
static int
-@TYPE@_compare(@type@ *pa, @type@ *pb)
+@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap))
{
const @type@ a = *pa;
const @type@ b = *pb;
@@ -2869,7 +2869,7 @@ static int
static int
-C@TYPE@_compare(@type@ *pa, @type@ *pb)
+C@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap))
{
const @type@ ar = pa[0];
const @type@ ai = pa[1];
@@ -2924,7 +2924,7 @@ C@TYPE@_compare(@type@ *pa, @type@ *pb)
*/
static int
-@TYPE@_compare(@type@ *pa, @type@ *pb)
+@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap))
{
const @type@ a = *pa;
const @type@ b = *pb;
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py
index 706cf7a7e..1a76897e2 100644
--- a/numpy/core/tests/test_cpu_features.py
+++ b/numpy/core/tests/test_cpu_features.py
@@ -140,8 +140,8 @@ class Test_X86_Features(AbstractTest):
is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE)
@pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power")
class Test_POWER_Features(AbstractTest):
- features = ["VSX", "VSX2", "VSX3"]
- features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00")
+ features = ["VSX", "VSX2", "VSX3", "VSX4"]
+ features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00", VSX4="ARCH_3_1")
def load_flags(self):
self.load_flags_auxv()
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index 12a67c44d..605baefe6 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -330,16 +330,18 @@ class _SIMD_FP(_Test_Utility):
square = self.square(vdata)
assert square == data_square
- @pytest.mark.parametrize("intrin, func", [("self.ceil", math.ceil),
- ("self.trunc", math.trunc)])
+ @pytest.mark.parametrize("intrin, func", [("ceil", math.ceil),
+ ("trunc", math.trunc), ("floor", math.floor), ("rint", round)])
def test_rounding(self, intrin, func):
"""
Test intrinsics:
+ npyv_rint_##SFX
npyv_ceil_##SFX
npyv_trunc_##SFX
+ npyv_floor##SFX
"""
intrin_name = intrin
- intrin = eval(intrin)
+ intrin = getattr(self, intrin)
pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
# special cases
round_cases = ((nan, nan), (pinf, pinf), (ninf, ninf))
@@ -347,20 +349,25 @@ class _SIMD_FP(_Test_Utility):
data_round = [desired]*self.nlanes
_round = intrin(self.setall(case))
assert _round == pytest.approx(data_round, nan_ok=True)
+
for x in range(0, 2**20, 256**2):
for w in (-1.05, -1.10, -1.15, 1.05, 1.10, 1.15):
- data = [x*w+a for a in range(self.nlanes)]
- vdata = self.load(data)
+ data = self.load([(x+a)*w for a in range(self.nlanes)])
data_round = [func(x) for x in data]
- _round = intrin(vdata)
+ _round = intrin(data)
assert _round == data_round
+
# signed zero
- if "ceil" in intrin_name or "trunc" in intrin_name:
- for w in (-0.25, -0.30, -0.45):
- _round = self._to_unsigned(intrin(self.setall(w)))
- data_round = self._to_unsigned(self.setall(-0.0))
- assert _round == data_round
-
+ if intrin_name == "floor":
+ data_szero = (-0.0,)
+ else:
+ data_szero = (-0.0, -0.25, -0.30, -0.45, -0.5)
+
+ for w in data_szero:
+ _round = self._to_unsigned(intrin(self.setall(w)))
+ data_round = self._to_unsigned(self.setall(-0.0))
+ assert _round == data_round
+
def test_max(self):
"""
Test intrinsics:
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index f1d024b94..854584998 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -294,6 +294,9 @@ class _Config:
VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
## Power9/ISA 3.00
VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
+ ## Power10/ISA 3.1
+ VSX4 = dict(interest=4, implies="VSX3", implies_detect=False,
+ extra_checks="VSX4_MMA"),
# IBM/Z
## VX(z13) support
VX = dict(interest=1, headers="vecintrin.h"),
@@ -471,12 +474,16 @@ class _Config:
),
VSX3 = dict(
flags="-mcpu=power9 -mtune=power9", implies_detect=False
+ ),
+ VSX4 = dict(
+ flags="-mcpu=power10 -mtune=power10", implies_detect=False
)
)
if self.cc_is_clang:
partial["VSX"]["flags"] = "-maltivec -mvsx"
partial["VSX2"]["flags"] = "-mpower8-vector"
partial["VSX3"]["flags"] = "-mpower9-vector"
+ partial["VSX4"]["flags"] = "-mpower10-vector"
return partial
diff --git a/numpy/distutils/checks/cpu_vsx4.c b/numpy/distutils/checks/cpu_vsx4.c
new file mode 100644
index 000000000..a6acc7384
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vsx4.c
@@ -0,0 +1,14 @@
+#ifndef __VSX__
+ #error "VSX is not supported"
+#endif
+#include <altivec.h>
+
+typedef __vector unsigned int v_uint32x4;
+
+int main(void)
+{
+ v_uint32x4 v1 = (v_uint32x4){2, 4, 8, 16};
+ v_uint32x4 v2 = (v_uint32x4){2, 2, 2, 2};
+ v_uint32x4 v3 = vec_mod(v1, v2);
+ return (int)vec_extractm(v3);
+}
diff --git a/numpy/distutils/checks/extra_vsx4_mma.c b/numpy/distutils/checks/extra_vsx4_mma.c
new file mode 100644
index 000000000..a70b2a9f6
--- /dev/null
+++ b/numpy/distutils/checks/extra_vsx4_mma.c
@@ -0,0 +1,21 @@
+#ifndef __VSX__
+ #error "VSX is not supported"
+#endif
+#include <altivec.h>
+
+typedef __vector float fv4sf_t;
+typedef __vector unsigned char vec_t;
+
+int main(void)
+{
+ __vector_quad acc0;
+ float a[4] = {0,1,2,3};
+ float b[4] = {0,1,2,3};
+ vec_t *va = (vec_t *) a;
+ vec_t *vb = (vec_t *) b;
+ __builtin_mma_xvf32ger(&acc0, va[0], vb[0]);
+ fv4sf_t result[4];
+ __builtin_mma_disassemble_acc((void *)result, &acc0);
+ fv4sf_t c0 = result[0];
+ return (int)((float*)&c0)[0];
+}
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index dc1ab3b9b..80830d559 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,8 +47,8 @@ class build(old_build):
- not part of dispatch-able features(--cpu-dispatch)
- not supported by compiler or platform
"""
- self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F" \
- " AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2"
+ self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F " \
+ "AVX512_SKX VSX VSX2 VSX3 VSX4 NEON ASIMD VX VXE VXE2"
def finalize_options(self):
build_scripts = self.build_scripts
diff --git a/numpy/distutils/tests/test_ccompiler_opt.py b/numpy/distutils/tests/test_ccompiler_opt.py
index 6f9970c75..1ca8bc09b 100644
--- a/numpy/distutils/tests/test_ccompiler_opt.py
+++ b/numpy/distutils/tests/test_ccompiler_opt.py
@@ -405,7 +405,7 @@ class _Test_CCompilerOpt:
# in msvc, avx512_knl avx512_knm aren't supported
x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*",
armhf=".* asimd asimdhp asimddp .*",
- ppc64="vsx vsx2 vsx3.*",
+ ppc64="vsx vsx2 vsx3 vsx4.*",
s390x="vx vxe vxe2.*"
)
# min
@@ -544,13 +544,13 @@ class _Test_CCompilerOpt:
"""
/*@targets
sse sse2 sse41 avx avx2 avx512f
- vsx vsx2 vsx3
+ vsx vsx2 vsx3 vsx4
neon neon_fp16 asimdhp asimddp
vx vxe vxe2
*/
""",
baseline="avx vsx2 asimd vx vxe",
- x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3",
+ x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx4 vsx3",
s390x="vxe2"
)
# test skipping non-dispatch features
@@ -558,7 +558,7 @@ class _Test_CCompilerOpt:
"""
/*@targets
sse41 avx avx2 avx512f
- vsx2 vsx3
+ vsx2 vsx3 vsx4
asimd asimdhp asimddp
vx vxe vxe2
*/
@@ -571,13 +571,13 @@ class _Test_CCompilerOpt:
"""
/*@targets
sse2 sse41 avx2 avx512f
- vsx2 vsx3
+ vsx2 vsx3 vsx4
neon asimdhp asimddp
vx vxe vxe2
*/
""",
baseline="",
- trap_files=".*(avx2|avx512f|vsx3|asimddp|vxe2).c",
+ trap_files=".*(avx2|avx512f|vsx3|vsx4|asimddp|vxe2).c",
x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon",
s390x="vxe vx"
)
diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py
index aacd2c676..0374ae8d7 100755
--- a/numpy/f2py/crackfortran.py
+++ b/numpy/f2py/crackfortran.py
@@ -892,6 +892,9 @@ def appenddecl(decl, decl2, force=1):
selectpattern = re.compile(
r'\s*(?P<this>(@\(@.*?@\)@|\*[\d*]+|\*\s*@\(@.*?@\)@|))(?P<after>.*)\Z', re.I)
+typedefpattern = re.compile(
+ r'(?:,(?P<attributes>[\w(),]+))?(::)?(?P<name>\b[a-z$_][\w$]*\b)'
+ r'(?:\((?P<params>[\w,]*)\))?\Z', re.I)
nameargspattern = re.compile(
r'\s*(?P<name>\b[\w$]+\b)\s*(@\(@\s*(?P<args>[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P<result>\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P<bind>.*)\s*@\)@))*\s*\Z', re.I)
operatorpattern = re.compile(
@@ -914,6 +917,17 @@ def _is_intent_callback(vdecl):
return 0
+def _resolvetypedefpattern(line):
+ line = ''.join(line.split()) # removes whitespace
+ m1 = typedefpattern.match(line)
+ print(line, m1)
+ if m1:
+ attrs = m1.group('attributes')
+ attrs = [a.lower() for a in attrs.split(',')] if attrs else []
+ return m1.group('name'), attrs, m1.group('params')
+ return None, [], None
+
+
def _resolvenameargspattern(line):
line = markouterparen(line)
m1 = nameargspattern.match(line)
@@ -962,7 +976,13 @@ def analyzeline(m, case, line):
block = 'python module'
elif re.match(r'abstract\s*interface', block, re.I):
block = 'abstract interface'
- name, args, result, bind = _resolvenameargspattern(m.group('after'))
+ if block == 'type':
+ name, attrs, _ = _resolvetypedefpattern(m.group('after'))
+ groupcache[groupcounter]['vars'][name] = dict(attrspec = attrs)
+ args = []
+ result = None
+ else:
+ name, args, result, _ = _resolvenameargspattern(m.group('after'))
if name is None:
if block == 'block data':
name = '_BLOCK_DATA_'
diff --git a/numpy/f2py/tests/src/crackfortran/accesstype.f90 b/numpy/f2py/tests/src/crackfortran/accesstype.f90
new file mode 100644
index 000000000..e2cbd445d
--- /dev/null
+++ b/numpy/f2py/tests/src/crackfortran/accesstype.f90
@@ -0,0 +1,13 @@
+module foo
+ public
+ type, private, bind(c) :: a
+ integer :: i
+ end type a
+ type, bind(c) :: b_
+ integer :: j
+ end type b_
+ public :: b_
+ type :: c
+ integer :: k
+ end type c
+end module foo
diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py
index e33e12d62..ea618bf33 100644
--- a/numpy/f2py/tests/test_crackfortran.py
+++ b/numpy/f2py/tests/test_crackfortran.py
@@ -44,6 +44,15 @@ class TestPublicPrivate:
assert "private" not in mod["vars"]["seta"]["attrspec"]
assert "public" in mod["vars"]["seta"]["attrspec"]
+ def test_access_type(self, tmp_path):
+ fpath = util.getpath("tests", "src", "crackfortran", "accesstype.f90")
+ mod = crackfortran.crackfortran([str(fpath)])
+ assert len(mod) == 1
+ tt = mod[0]['vars']
+ assert set(tt['a']['attrspec']) == {'private', 'bind(c)'}
+ assert set(tt['b_']['attrspec']) == {'public', 'bind(c)'}
+ assert set(tt['c']['attrspec']) == {'public'}
+
class TestModuleProcedure():
def test_moduleOperators(self, tmp_path):
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index e0e5403a9..9c9dfac68 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -3542,15 +3542,17 @@ class MaskedArray(ndarray):
def harden_mask(self):
"""
- Force the mask to hard.
+ Force the mask to hard, preventing unmasking by assignment.
Whether the mask of a masked array is hard or soft is determined by
its `~ma.MaskedArray.hardmask` property. `harden_mask` sets
- `~ma.MaskedArray.hardmask` to ``True``.
+ `~ma.MaskedArray.hardmask` to ``True`` (and returns the modified
+ self).
See Also
--------
ma.MaskedArray.hardmask
+ ma.MaskedArray.soften_mask
"""
self._hardmask = True
@@ -3558,15 +3560,17 @@ class MaskedArray(ndarray):
def soften_mask(self):
"""
- Force the mask to soft.
+ Force the mask to soft (default), allowing unmasking by assignment.
Whether the mask of a masked array is hard or soft is determined by
its `~ma.MaskedArray.hardmask` property. `soften_mask` sets
- `~ma.MaskedArray.hardmask` to ``False``.
+ `~ma.MaskedArray.hardmask` to ``False`` (and returns the modified
+ self).
See Also
--------
ma.MaskedArray.hardmask
+ ma.MaskedArray.harden_mask
"""
self._hardmask = False
@@ -3574,16 +3578,55 @@ class MaskedArray(ndarray):
@property
def hardmask(self):
- """ Hardness of the mask """
+ """
+ Specifies whether values can be unmasked through assignments.
+
+ By default, assigning definite values to masked array entries will
+ unmask them. When `hardmask` is ``True``, the mask will not change
+ through assignments.
+
+ See Also
+ --------
+ ma.MaskedArray.harden_mask
+ ma.MaskedArray.soften_mask
+
+ Examples
+ --------
+ >>> x = np.arange(10)
+ >>> m = np.ma.masked_array(x, x>5)
+ >>> assert not m.hardmask
+
+ Since `m` has a soft mask, assigning an element value unmasks that
+ element:
+
+ >>> m[8] = 42
+ >>> m
+ masked_array(data=[0, 1, 2, 3, 4, 5, --, --, 42, --],
+ mask=[False, False, False, False, False, False,
+ True, True, False, True],
+ fill_value=999999)
+
+ After hardening, the mask is not affected by assignments:
+
+ >>> hardened = np.ma.harden_mask(m)
+ >>> assert m.hardmask and hardened is m
+ >>> m[:] = 23
+ >>> m
+ masked_array(data=[23, 23, 23, 23, 23, 23, --, --, 23, --],
+ mask=[False, False, False, False, False, False,
+ True, True, False, True],
+ fill_value=999999)
+
+ """
return self._hardmask
def unshare_mask(self):
"""
- Copy the mask and set the sharedmask flag to False.
+ Copy the mask and set the `sharedmask` flag to ``False``.
Whether the mask is shared between masked arrays can be seen from
- the `sharedmask` property. `unshare_mask` ensures the mask is not shared.
- A copy of the mask is only made if it was shared.
+ the `sharedmask` property. `unshare_mask` ensures the mask is not
+ shared. A copy of the mask is only made if it was shared.
See Also
--------