diff options
24 files changed, 354 insertions, 72 deletions
diff --git a/doc/release/upcoming_changes/15844.new_feature.rst b/doc/release/upcoming_changes/15844.new_feature.rst new file mode 100644 index 000000000..f2746807b --- /dev/null +++ b/doc/release/upcoming_changes/15844.new_feature.rst @@ -0,0 +1,4 @@ +f2py supports reading access type attributes from derived type statements +------------------------------------------------------------------------- +As a result, one does not need to use `public` or `private` statements to +specify derived type access properties. diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc index 17d1b4951..7782172d2 100644 --- a/doc/source/reference/simd/generated_tables/cpu_features.inc +++ b/doc/source/reference/simd/generated_tables/cpu_features.inc @@ -36,26 +36,28 @@ On IBM/POWER big-endian .. table:: :align: left - ======== ================ - Name Implies - ======== ================ - ``VSX`` - ``VSX2`` ``VSX`` - ``VSX3`` ``VSX`` ``VSX2`` - ======== ================ + ======== ========================= + Name Implies + ======== ========================= + ``VSX`` + ``VSX2`` ``VSX`` + ``VSX3`` ``VSX`` ``VSX2`` + ``VSX4`` ``VSX`` ``VSX2`` ``VSX3`` + ======== ========================= On IBM/POWER little-endian ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. table:: :align: left - ======== ================ - Name Implies - ======== ================ - ``VSX`` ``VSX2`` - ``VSX2`` ``VSX`` - ``VSX3`` ``VSX`` ``VSX2`` - ======== ================ + ======== ========================= + Name Implies + ======== ========================= + ``VSX`` ``VSX2`` + ``VSX2`` ``VSX`` + ``VSX3`` ``VSX`` ``VSX2`` + ``VSX4`` ``VSX`` ``VSX2`` ``VSX3`` + ======== ========================= On ARMv7/A32 ~~~~~~~~~~~~ diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 7081f9a59..1bbacad45 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -2943,7 +2943,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('any', add_newdoc('numpy.core.multiarray', 'ndarray', ('argmax', """ - a.argmax(axis=None, out=None) + a.argmax(axis=None, out=None, *, keepdims=False) Return indices of the maximum values along the given axis. @@ -2958,7 +2958,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('argmax', add_newdoc('numpy.core.multiarray', 'ndarray', ('argmin', """ - a.argmin(axis=None, out=None) + a.argmin(axis=None, out=None, *, keepdims=False) Return indices of the minimum values along the given axis. diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index 88794ca07..1d6234e20 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -180,12 +180,6 @@ defined(__MINGW32__) || defined(__MINGW64__) #include <io.h> -/* mingw based on 3.4.5 has lseek but not ftell/fseek */ -#if defined(__MINGW32__) || defined(__MINGW64__) -extern int __cdecl _fseeki64(FILE *, long long, int); -extern long long __cdecl _ftelli64(FILE *); -#endif - #define npy_fseek _fseeki64 #define npy_ftell _ftelli64 #define npy_lseek _lseeki64 diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src index 84de9a059..fabec069c 100644 --- a/numpy/core/src/_simd/_simd.dispatch.c.src +++ b/numpy/core/src/_simd/_simd.dispatch.c.src @@ -381,7 +381,7 @@ SIMD_IMPL_INTRIN_1(sumup_@sfx@, @esfx@, v@sfx@) ***************************/ #if @fp_only@ /**begin repeat1 - * #intrin = sqrt, recip, abs, square, ceil, trunc# + * #intrin = sqrt, recip, abs, square, rint, ceil, trunc, floor# */ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@) /**end repeat1**/ @@ -615,7 +615,7 @@ SIMD_INTRIN_DEF(sumup_@sfx@) ***************************/ #if @fp_only@ /**begin repeat1 - * #intrin = sqrt, recip, abs, square, ceil, trunc# + * #intrin = sqrt, recip, abs, square, rint, ceil, trunc, floor# */ SIMD_INTRIN_DEF(@intrin@_@sfx@) /**end repeat1**/ diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src index 1385220f9..ff4f9f60a 100644 --- a/numpy/core/src/common/npy_cpu_features.c.src +++ b/numpy/core/src/common/npy_cpu_features.c.src @@ -61,7 +61,7 @@ npy_cpu_features_dict(void) * AVX512VPOPCNTDQ, AVX512VL, AVX512BW, AVX512DQ, AVX512VNNI, * AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG, * AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL, - * VSX, VSX2, VSX3, + * VSX, VSX2, VSX3, VSX4, * VX, VXE, VXE2, * NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM# */ @@ -474,9 +474,15 @@ npy__cpu_init_features(void) #ifndef AT_HWCAP2 #define AT_HWCAP2 26 #endif + #ifndef PPC_FEATURE2_ARCH_2_07 + #define PPC_FEATURE2_ARCH_2_07 0x80000000 + #endif #ifndef PPC_FEATURE2_ARCH_3_00 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #endif + #ifndef PPC_FEATURE2_ARCH_3_1 + #define PPC_FEATURE2_ARCH_3_1 0x00040000 + #endif #endif static void @@ -489,15 +495,18 @@ npy__cpu_init_features(void) return; hwcap = getauxval(AT_HWCAP2); - if (hwcap & PPC_FEATURE2_ARCH_3_00) + if (hwcap & PPC_FEATURE2_ARCH_3_1) { npy__cpu_have[NPY_CPU_FEATURE_VSX] = npy__cpu_have[NPY_CPU_FEATURE_VSX2] = - npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1; + npy__cpu_have[NPY_CPU_FEATURE_VSX3] = + npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1; return; } - npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0; npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1; + npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0; + npy__cpu_have[NPY_CPU_FEATURE_VSX3] = (hwcap & PPC_FEATURE2_ARCH_3_00) != 0; + npy__cpu_have[NPY_CPU_FEATURE_VSX4] = (hwcap & PPC_FEATURE2_ARCH_3_1) != 0; // TODO: AIX, FreeBSD #else npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1; @@ -507,6 +516,9 @@ npy__cpu_init_features(void) #ifdef NPY_HAVE_VSX3 npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1; #endif + #ifdef NPY_HAVE_VSX4 + npy__cpu_have[NPY_CPU_FEATURE_VSX4] = 1; + #endif #endif } diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h index 1f52a445d..3d5f2e75c 100644 --- a/numpy/core/src/common/npy_cpu_features.h +++ b/numpy/core/src/common/npy_cpu_features.h @@ -65,6 +65,8 @@ enum npy_cpu_features NPY_CPU_FEATURE_VSX2 = 201, // POWER9 NPY_CPU_FEATURE_VSX3 = 202, + // POWER10 + NPY_CPU_FEATURE_VSX4 = 203, // ARM NPY_CPU_FEATURE_NEON = 300, @@ -167,8 +169,8 @@ npy_cpu_baseline_list(void); * On x64: ['SSSE3', 'SSE41', 'POPCNT', 'SSE42', 'AVX', 'F16C', 'FMA3', 'AVX2', 'AVX512F', ...] * On armhf: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD', 'ASIMDHP', 'ASIMDDP', 'ASIMDFHM'] * On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM'] - * On ppc64: ['VSX', 'VSX2', 'VSX3'] - * On ppc64le: ['VSX3'] + * On ppc64: ['VSX', 'VSX2', 'VSX3', 'VSX4'] + * On ppc64le: ['VSX3', 'VSX4'] * On s390x: ['VX', 'VXE', VXE2] * On any other arch or if the optimization is disabled: [] */ diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h index ec15e50e1..deaf4ad11 100644 --- a/numpy/core/src/common/simd/avx2/math.h +++ b/numpy/core/src/common/simd/avx2/math.h @@ -42,7 +42,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 _mm256_max_pd // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) { __m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q); @@ -76,7 +76,7 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b) #define npyv_min_f64 _mm256_min_pd // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) { __m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q); @@ -105,6 +105,10 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return _mm256_blendv_epi8(a, b, _mm256_cmpgt_epi64(a, b)); } +// round to nearest intger even +#define npyv_rint_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_NEAREST_INT) +#define npyv_rint_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_NEAREST_INT) + // ceil #define npyv_ceil_f32 _mm256_ceil_ps #define npyv_ceil_f64 _mm256_ceil_pd @@ -113,4 +117,8 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) #define npyv_trunc_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_ZERO) #define npyv_trunc_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_ZERO) +// floor +#define npyv_floor_f32 _mm256_floor_ps +#define npyv_floor_f64 _mm256_floor_pd + #endif // _NPY_SIMD_AVX2_MATH_H diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h index f30e50ad0..5a6cb6dcd 100644 --- a/numpy/core/src/common/simd/avx512/math.h +++ b/numpy/core/src/common/simd/avx512/math.h @@ -51,7 +51,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 _mm512_max_pd // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) { __mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q); @@ -84,7 +84,7 @@ NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b) #define npyv_min_f64 _mm512_min_pd // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) { __mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q); @@ -112,6 +112,10 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b) #define npyv_min_u64 _mm512_min_epu64 #define npyv_min_s64 _mm512_min_epi64 +// round to nearest integer even +#define npyv_rint_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_NEAREST_INT) +#define npyv_rint_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_NEAREST_INT) + // ceil #define npyv_ceil_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_POS_INF) #define npyv_ceil_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_POS_INF) @@ -120,4 +124,8 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b) #define npyv_trunc_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_ZERO) #define npyv_trunc_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_ZERO) +// floor +#define npyv_floor_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_NEG_INF) +#define npyv_floor_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_NEG_INF) + #endif // _NPY_SIMD_AVX512_MATH_H diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h index 19e5cd846..4607d6f27 100644 --- a/numpy/core/src/common/simd/neon/math.h +++ b/numpy/core/src/common/simd/neon/math.h @@ -153,6 +153,33 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return vbslq_s64(npyv_cmplt_s64(a, b), a, b); } +// round to nearest integer even +NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a) +{ +#ifdef NPY_HAVE_ASIMD + return vrndnq_f32(a); +#else + // ARMv7 NEON only supports fp to int truncate conversion. + // a magic trick of adding 1.5 * 2**23 is used for rounding + // to nearest even and then substract this magic number to get + // the integer. + const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f)); + const npyv_f32 magic = vdupq_n_f32(12582912.0f); // 1.5 * 2**23 + npyv_f32 round = vsubq_f32(vaddq_f32(a, magic), magic); + npyv_b32 overflow = vcleq_f32(vabsq_f32(a), vreinterpretq_f32_u32(vdupq_n_u32(0x4b000000))); + round = vbslq_f32(overflow, round, a); + // signed zero + round = vreinterpretq_f32_s32(vorrq_s32( + vreinterpretq_s32_f32(round), + vandq_s32(vreinterpretq_s32_f32(a), szero) + )); + return round; +#endif +} +#if NPY_SIMD_F64 + #define npyv_rint_f64 vrndnq_f64 +#endif // NPY_SIMD_F64 + // ceil #ifdef NPY_HAVE_ASIMD #define npyv_ceil_f32 vrndpq_f32 @@ -223,4 +250,36 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) #define npyv_trunc_f64 vrndq_f64 #endif // NPY_SIMD_F64 +// floor +#ifdef NPY_HAVE_ASIMD + #define npyv_floor_f32 vrndmq_f32 +#else + NPY_FINLINE npyv_f32 npyv_floor_f32(npyv_f32 a) + { + const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f)); + const npyv_u32 one = vreinterpretq_u32_f32(vdupq_n_f32(1.0f)); + const npyv_s32 max_int = vdupq_n_s32(0x7fffffff); + + npyv_s32 roundi = vcvtq_s32_f32(a); + npyv_f32 round = vcvtq_f32_s32(roundi); + npyv_f32 floor = vsubq_f32(round, vreinterpretq_f32_u32( + vandq_u32(vcgtq_f32(round, a), one) + )); + // respect signed zero + npyv_f32 rzero = vreinterpretq_f32_s32(vorrq_s32( + vreinterpretq_s32_f32(floor), + vandq_s32(vreinterpretq_s32_f32(a), szero) + )); + npyv_u32 nnan = npyv_notnan_f32(a); + npyv_u32 overflow = vorrq_u32( + vceqq_s32(roundi, szero), vceqq_s32(roundi, max_int) + ); + + return vbslq_f32(vbicq_u32(nnan, overflow), rzero, a); + } +#endif // NPY_HAVE_ASIMD +#if NPY_SIMD_F64 + #define npyv_floor_f64 vrndmq_f64 +#endif // NPY_SIMD_F64 + #endif // _NPY_SIMD_NEON_MATH_H diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h index 5daf7711e..e4b77b671 100644 --- a/numpy/core/src/common/simd/sse/math.h +++ b/numpy/core/src/common/simd/sse/math.h @@ -42,7 +42,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 _mm_max_pd // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) { __m128 nn = _mm_cmpord_ps(b, b); @@ -95,7 +95,7 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b) #define npyv_min_f64 _mm_min_pd // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) { __m128 nn = _mm_cmpord_ps(b, b); @@ -143,6 +143,38 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return npyv_select_s64(npyv_cmplt_s64(a, b), a, b); } +// round to nearest integer even +NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a) +{ +#ifdef NPY_HAVE_SSE41 + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); +#else + const npyv_f32 szero = _mm_set1_ps(-0.0f); + __m128i roundi = _mm_cvtps_epi32(a); + __m128i overflow = _mm_cmpeq_epi32(roundi, _mm_castps_si128(szero)); + __m128 r = _mm_cvtepi32_ps(roundi); + // respect sign of zero + r = _mm_or_ps(r, _mm_and_ps(a, szero)); + return npyv_select_f32(overflow, a, r); +#endif +} + +// round to nearest integer even +NPY_FINLINE npyv_f64 npyv_rint_f64(npyv_f64 a) +{ +#ifdef NPY_HAVE_SSE41 + return _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT); +#else + const npyv_f64 szero = _mm_set1_pd(-0.0); + const npyv_f64 two_power_52 = _mm_set1_pd(0x10000000000000); + npyv_f64 sign_two52 = _mm_or_pd(two_power_52, _mm_and_pd(a, szero)); + // round by add magic number 2^52 + npyv_f64 round = _mm_sub_pd(_mm_add_pd(a, sign_two52), sign_two52); + // respect signed zero, e.g. -0.5 -> -0.0 + return _mm_or_pd(round, _mm_and_pd(a, szero)); +#endif +} + // ceil #ifdef NPY_HAVE_SSE41 #define npyv_ceil_f32 _mm_ceil_ps @@ -202,4 +234,23 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) } #endif +// floor +#ifdef NPY_HAVE_SSE41 + #define npyv_floor_f32 _mm_floor_ps + #define npyv_floor_f64 _mm_floor_pd +#else + NPY_FINLINE npyv_f32 npyv_floor_f32(npyv_f32 a) + { + const npyv_f32 one = _mm_set1_ps(1.0f); + npyv_f32 round = npyv_rint_f32(a); + return _mm_sub_ps(round, _mm_and_ps(_mm_cmpgt_ps(round, a), one)); + } + NPY_FINLINE npyv_f64 npyv_floor_f64(npyv_f64 a) + { + const npyv_f64 one = _mm_set1_pd(1.0); + npyv_f64 round = npyv_rint_f64(a); + return _mm_sub_pd(round, _mm_and_pd(_mm_cmpgt_pd(round, a), one)); + } +#endif // NPY_HAVE_SSE41 + #endif // _NPY_SIMD_SSE_MATH_H diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h index d138cae8a..444bc9e54 100644 --- a/numpy/core/src/common/simd/vsx/math.h +++ b/numpy/core/src/common/simd/vsx/math.h @@ -38,7 +38,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 vec_max // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. #define npyv_maxp_f32 vec_max #define npyv_maxp_f64 vec_max // Maximum, integer operations @@ -56,7 +56,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_min_f64 vec_min // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. #define npyv_minp_f32 vec_min #define npyv_minp_f64 vec_min // Minimum, integer operations @@ -69,6 +69,10 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_min_u64 vec_min #define npyv_min_s64 vec_min +// round to nearest int even +#define npyv_rint_f32 vec_rint +#define npyv_rint_f64 vec_rint + // ceil #define npyv_ceil_f32 vec_ceil #define npyv_ceil_f64 vec_ceil @@ -77,4 +81,8 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_trunc_f32 vec_trunc #define npyv_trunc_f64 vec_trunc +// floor +#define npyv_floor_f32 vec_floor +#define npyv_floor_f64 vec_floor + #endif // _NPY_SIMD_VSX_MATH_H diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 71808cc48..71401c60e 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -2849,7 +2849,7 @@ static int #define LT(a,b) ((a) < (b) || ((b) != (b) && (a) ==(a))) static int -@TYPE@_compare(@type@ *pa, @type@ *pb) +@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap)) { const @type@ a = *pa; const @type@ b = *pb; @@ -2869,7 +2869,7 @@ static int static int -C@TYPE@_compare(@type@ *pa, @type@ *pb) +C@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap)) { const @type@ ar = pa[0]; const @type@ ai = pa[1]; @@ -2924,7 +2924,7 @@ C@TYPE@_compare(@type@ *pa, @type@ *pb) */ static int -@TYPE@_compare(@type@ *pa, @type@ *pb) +@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap)) { const @type@ a = *pa; const @type@ b = *pb; diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py index 706cf7a7e..1a76897e2 100644 --- a/numpy/core/tests/test_cpu_features.py +++ b/numpy/core/tests/test_cpu_features.py @@ -140,8 +140,8 @@ class Test_X86_Features(AbstractTest): is_power = re.match("^(powerpc|ppc)64", machine, re.IGNORECASE) @pytest.mark.skipif(not is_linux or not is_power, reason="Only for Linux and Power") class Test_POWER_Features(AbstractTest): - features = ["VSX", "VSX2", "VSX3"] - features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00") + features = ["VSX", "VSX2", "VSX3", "VSX4"] + features_map = dict(VSX2="ARCH_2_07", VSX3="ARCH_3_00", VSX4="ARCH_3_1") def load_flags(self): self.load_flags_auxv() diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py index 12a67c44d..605baefe6 100644 --- a/numpy/core/tests/test_simd.py +++ b/numpy/core/tests/test_simd.py @@ -330,16 +330,18 @@ class _SIMD_FP(_Test_Utility): square = self.square(vdata) assert square == data_square - @pytest.mark.parametrize("intrin, func", [("self.ceil", math.ceil), - ("self.trunc", math.trunc)]) + @pytest.mark.parametrize("intrin, func", [("ceil", math.ceil), + ("trunc", math.trunc), ("floor", math.floor), ("rint", round)]) def test_rounding(self, intrin, func): """ Test intrinsics: + npyv_rint_##SFX npyv_ceil_##SFX npyv_trunc_##SFX + npyv_floor##SFX """ intrin_name = intrin - intrin = eval(intrin) + intrin = getattr(self, intrin) pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() # special cases round_cases = ((nan, nan), (pinf, pinf), (ninf, ninf)) @@ -347,20 +349,25 @@ class _SIMD_FP(_Test_Utility): data_round = [desired]*self.nlanes _round = intrin(self.setall(case)) assert _round == pytest.approx(data_round, nan_ok=True) + for x in range(0, 2**20, 256**2): for w in (-1.05, -1.10, -1.15, 1.05, 1.10, 1.15): - data = [x*w+a for a in range(self.nlanes)] - vdata = self.load(data) + data = self.load([(x+a)*w for a in range(self.nlanes)]) data_round = [func(x) for x in data] - _round = intrin(vdata) + _round = intrin(data) assert _round == data_round + # signed zero - if "ceil" in intrin_name or "trunc" in intrin_name: - for w in (-0.25, -0.30, -0.45): - _round = self._to_unsigned(intrin(self.setall(w))) - data_round = self._to_unsigned(self.setall(-0.0)) - assert _round == data_round - + if intrin_name == "floor": + data_szero = (-0.0,) + else: + data_szero = (-0.0, -0.25, -0.30, -0.45, -0.5) + + for w in data_szero: + _round = self._to_unsigned(intrin(self.setall(w))) + data_round = self._to_unsigned(self.setall(-0.0)) + assert _round == data_round + def test_max(self): """ Test intrinsics: diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index f1d024b94..854584998 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -294,6 +294,9 @@ class _Config: VSX2 = dict(interest=2, implies="VSX", implies_detect=False), ## Power9/ISA 3.00 VSX3 = dict(interest=3, implies="VSX2", implies_detect=False), + ## Power10/ISA 3.1 + VSX4 = dict(interest=4, implies="VSX3", implies_detect=False, + extra_checks="VSX4_MMA"), # IBM/Z ## VX(z13) support VX = dict(interest=1, headers="vecintrin.h"), @@ -471,12 +474,16 @@ class _Config: ), VSX3 = dict( flags="-mcpu=power9 -mtune=power9", implies_detect=False + ), + VSX4 = dict( + flags="-mcpu=power10 -mtune=power10", implies_detect=False ) ) if self.cc_is_clang: partial["VSX"]["flags"] = "-maltivec -mvsx" partial["VSX2"]["flags"] = "-mpower8-vector" partial["VSX3"]["flags"] = "-mpower9-vector" + partial["VSX4"]["flags"] = "-mpower10-vector" return partial diff --git a/numpy/distutils/checks/cpu_vsx4.c b/numpy/distutils/checks/cpu_vsx4.c new file mode 100644 index 000000000..a6acc7384 --- /dev/null +++ b/numpy/distutils/checks/cpu_vsx4.c @@ -0,0 +1,14 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include <altivec.h> + +typedef __vector unsigned int v_uint32x4; + +int main(void) +{ + v_uint32x4 v1 = (v_uint32x4){2, 4, 8, 16}; + v_uint32x4 v2 = (v_uint32x4){2, 2, 2, 2}; + v_uint32x4 v3 = vec_mod(v1, v2); + return (int)vec_extractm(v3); +} diff --git a/numpy/distutils/checks/extra_vsx4_mma.c b/numpy/distutils/checks/extra_vsx4_mma.c new file mode 100644 index 000000000..a70b2a9f6 --- /dev/null +++ b/numpy/distutils/checks/extra_vsx4_mma.c @@ -0,0 +1,21 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include <altivec.h> + +typedef __vector float fv4sf_t; +typedef __vector unsigned char vec_t; + +int main(void) +{ + __vector_quad acc0; + float a[4] = {0,1,2,3}; + float b[4] = {0,1,2,3}; + vec_t *va = (vec_t *) a; + vec_t *vb = (vec_t *) b; + __builtin_mma_xvf32ger(&acc0, va[0], vb[0]); + fv4sf_t result[4]; + __builtin_mma_disassemble_acc((void *)result, &acc0); + fv4sf_t c0 = result[0]; + return (int)((float*)&c0)[0]; +} diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py index dc1ab3b9b..80830d559 100644 --- a/numpy/distutils/command/build.py +++ b/numpy/distutils/command/build.py @@ -47,8 +47,8 @@ class build(old_build): - not part of dispatch-able features(--cpu-dispatch) - not supported by compiler or platform """ - self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F" \ - " AVX512_SKX VSX VSX2 VSX3 NEON ASIMD VX VXE VXE2" + self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F " \ + "AVX512_SKX VSX VSX2 VSX3 VSX4 NEON ASIMD VX VXE VXE2" def finalize_options(self): build_scripts = self.build_scripts diff --git a/numpy/distutils/tests/test_ccompiler_opt.py b/numpy/distutils/tests/test_ccompiler_opt.py index 6f9970c75..1ca8bc09b 100644 --- a/numpy/distutils/tests/test_ccompiler_opt.py +++ b/numpy/distutils/tests/test_ccompiler_opt.py @@ -405,7 +405,7 @@ class _Test_CCompilerOpt: # in msvc, avx512_knl avx512_knm aren't supported x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*", armhf=".* asimd asimdhp asimddp .*", - ppc64="vsx vsx2 vsx3.*", + ppc64="vsx vsx2 vsx3 vsx4.*", s390x="vx vxe vxe2.*" ) # min @@ -544,13 +544,13 @@ class _Test_CCompilerOpt: """ /*@targets sse sse2 sse41 avx avx2 avx512f - vsx vsx2 vsx3 + vsx vsx2 vsx3 vsx4 neon neon_fp16 asimdhp asimddp vx vxe vxe2 */ """, baseline="avx vsx2 asimd vx vxe", - x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3", + x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx4 vsx3", s390x="vxe2" ) # test skipping non-dispatch features @@ -558,7 +558,7 @@ class _Test_CCompilerOpt: """ /*@targets sse41 avx avx2 avx512f - vsx2 vsx3 + vsx2 vsx3 vsx4 asimd asimdhp asimddp vx vxe vxe2 */ @@ -571,13 +571,13 @@ class _Test_CCompilerOpt: """ /*@targets sse2 sse41 avx2 avx512f - vsx2 vsx3 + vsx2 vsx3 vsx4 neon asimdhp asimddp vx vxe vxe2 */ """, baseline="", - trap_files=".*(avx2|avx512f|vsx3|asimddp|vxe2).c", + trap_files=".*(avx2|avx512f|vsx3|vsx4|asimddp|vxe2).c", x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon", s390x="vxe vx" ) diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py index aacd2c676..0374ae8d7 100755 --- a/numpy/f2py/crackfortran.py +++ b/numpy/f2py/crackfortran.py @@ -892,6 +892,9 @@ def appenddecl(decl, decl2, force=1): selectpattern = re.compile( r'\s*(?P<this>(@\(@.*?@\)@|\*[\d*]+|\*\s*@\(@.*?@\)@|))(?P<after>.*)\Z', re.I) +typedefpattern = re.compile( + r'(?:,(?P<attributes>[\w(),]+))?(::)?(?P<name>\b[a-z$_][\w$]*\b)' + r'(?:\((?P<params>[\w,]*)\))?\Z', re.I) nameargspattern = re.compile( r'\s*(?P<name>\b[\w$]+\b)\s*(@\(@\s*(?P<args>[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P<result>\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P<bind>.*)\s*@\)@))*\s*\Z', re.I) operatorpattern = re.compile( @@ -914,6 +917,17 @@ def _is_intent_callback(vdecl): return 0 +def _resolvetypedefpattern(line): + line = ''.join(line.split()) # removes whitespace + m1 = typedefpattern.match(line) + print(line, m1) + if m1: + attrs = m1.group('attributes') + attrs = [a.lower() for a in attrs.split(',')] if attrs else [] + return m1.group('name'), attrs, m1.group('params') + return None, [], None + + def _resolvenameargspattern(line): line = markouterparen(line) m1 = nameargspattern.match(line) @@ -962,7 +976,13 @@ def analyzeline(m, case, line): block = 'python module' elif re.match(r'abstract\s*interface', block, re.I): block = 'abstract interface' - name, args, result, bind = _resolvenameargspattern(m.group('after')) + if block == 'type': + name, attrs, _ = _resolvetypedefpattern(m.group('after')) + groupcache[groupcounter]['vars'][name] = dict(attrspec = attrs) + args = [] + result = None + else: + name, args, result, _ = _resolvenameargspattern(m.group('after')) if name is None: if block == 'block data': name = '_BLOCK_DATA_' diff --git a/numpy/f2py/tests/src/crackfortran/accesstype.f90 b/numpy/f2py/tests/src/crackfortran/accesstype.f90 new file mode 100644 index 000000000..e2cbd445d --- /dev/null +++ b/numpy/f2py/tests/src/crackfortran/accesstype.f90 @@ -0,0 +1,13 @@ +module foo + public + type, private, bind(c) :: a + integer :: i + end type a + type, bind(c) :: b_ + integer :: j + end type b_ + public :: b_ + type :: c + integer :: k + end type c +end module foo diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py index e33e12d62..ea618bf33 100644 --- a/numpy/f2py/tests/test_crackfortran.py +++ b/numpy/f2py/tests/test_crackfortran.py @@ -44,6 +44,15 @@ class TestPublicPrivate: assert "private" not in mod["vars"]["seta"]["attrspec"] assert "public" in mod["vars"]["seta"]["attrspec"] + def test_access_type(self, tmp_path): + fpath = util.getpath("tests", "src", "crackfortran", "accesstype.f90") + mod = crackfortran.crackfortran([str(fpath)]) + assert len(mod) == 1 + tt = mod[0]['vars'] + assert set(tt['a']['attrspec']) == {'private', 'bind(c)'} + assert set(tt['b_']['attrspec']) == {'public', 'bind(c)'} + assert set(tt['c']['attrspec']) == {'public'} + class TestModuleProcedure(): def test_moduleOperators(self, tmp_path): diff --git a/numpy/ma/core.py b/numpy/ma/core.py index e0e5403a9..9c9dfac68 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -3542,15 +3542,17 @@ class MaskedArray(ndarray): def harden_mask(self): """ - Force the mask to hard. + Force the mask to hard, preventing unmasking by assignment. Whether the mask of a masked array is hard or soft is determined by its `~ma.MaskedArray.hardmask` property. `harden_mask` sets - `~ma.MaskedArray.hardmask` to ``True``. + `~ma.MaskedArray.hardmask` to ``True`` (and returns the modified + self). See Also -------- ma.MaskedArray.hardmask + ma.MaskedArray.soften_mask """ self._hardmask = True @@ -3558,15 +3560,17 @@ class MaskedArray(ndarray): def soften_mask(self): """ - Force the mask to soft. + Force the mask to soft (default), allowing unmasking by assignment. Whether the mask of a masked array is hard or soft is determined by its `~ma.MaskedArray.hardmask` property. `soften_mask` sets - `~ma.MaskedArray.hardmask` to ``False``. + `~ma.MaskedArray.hardmask` to ``False`` (and returns the modified + self). See Also -------- ma.MaskedArray.hardmask + ma.MaskedArray.harden_mask """ self._hardmask = False @@ -3574,16 +3578,55 @@ class MaskedArray(ndarray): @property def hardmask(self): - """ Hardness of the mask """ + """ + Specifies whether values can be unmasked through assignments. + + By default, assigning definite values to masked array entries will + unmask them. When `hardmask` is ``True``, the mask will not change + through assignments. + + See Also + -------- + ma.MaskedArray.harden_mask + ma.MaskedArray.soften_mask + + Examples + -------- + >>> x = np.arange(10) + >>> m = np.ma.masked_array(x, x>5) + >>> assert not m.hardmask + + Since `m` has a soft mask, assigning an element value unmasks that + element: + + >>> m[8] = 42 + >>> m + masked_array(data=[0, 1, 2, 3, 4, 5, --, --, 42, --], + mask=[False, False, False, False, False, False, + True, True, False, True], + fill_value=999999) + + After hardening, the mask is not affected by assignments: + + >>> hardened = np.ma.harden_mask(m) + >>> assert m.hardmask and hardened is m + >>> m[:] = 23 + >>> m + masked_array(data=[23, 23, 23, 23, 23, 23, --, --, 23, --], + mask=[False, False, False, False, False, False, + True, True, False, True], + fill_value=999999) + + """ return self._hardmask def unshare_mask(self): """ - Copy the mask and set the sharedmask flag to False. + Copy the mask and set the `sharedmask` flag to ``False``. Whether the mask is shared between masked arrays can be seen from - the `sharedmask` property. `unshare_mask` ensures the mask is not shared. - A copy of the mask is only made if it was shared. + the `sharedmask` property. `unshare_mask` ensures the mask is not + shared. A copy of the mask is only made if it was shared. See Also -------- |