diff options
32 files changed, 2062 insertions, 1010 deletions
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b17b478ab..6a05c5830 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -123,4 +123,5 @@ jobs: - uses: actions/upload-artifact@v2 with: + name: ${{ matrix.python }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }} path: ./wheelhouse/*.whl @@ -364,6 +364,8 @@ Nicholas A. Del Grosso <delgrosso@bio.lmu.de> nickdg <delgrosso@bio.lmu.de> Nicholas McKibben <nicholas.bgp@gmail.com> Nick Minkyu Lee <mknicklee@protonmail.com> fivemok <9394929+fivemok@users.noreply.github.com> Oliver Eberle <oliver_eberle@web.de> +Omid Rajaei <rajaei.net@gmail.com> +Omid Rajaei <rajaei.net@gmail.com> <89868505+rajaeinet@users.noreply.github.com> Ondřej Čertík <ondrej.certik@gmail.com> Óscar Villellas Guillén <oscar.villellas@continuum.io> Panos Mavrogiorgos <pmav99@users.noreply.github.com> diff --git a/doc/changelog/1.22.1-changelog.rst b/doc/changelog/1.22.1-changelog.rst new file mode 100644 index 000000000..3b401c1d2 --- /dev/null +++ b/doc/changelog/1.22.1-changelog.rst @@ -0,0 +1,47 @@ + +Contributors +============ + +A total of 14 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Arryan Singh +* Bas van Beek +* Charles Harris +* Denis Laxalde +* Isuru Fernando +* Kevin Sheppard +* Matthew Barber +* Matti Picus +* Melissa Weber Mendonça +* Mukulika Pahari +* Omid Rajaei + +* Pearu Peterson +* Ralf Gommers +* Sebastian Berg + +Pull requests merged +==================== + +A total of 20 pull requests were merged for this release. + +* `#20702 <https://github.com/numpy/numpy/pull/20702>`__: MAINT, DOC: Post 1.22.0 release fixes. +* `#20703 <https://github.com/numpy/numpy/pull/20703>`__: DOC, BUG: Use pngs instead of svgs. +* `#20704 <https://github.com/numpy/numpy/pull/20704>`__: DOC:Fixed the link on user-guide landing page +* `#20714 <https://github.com/numpy/numpy/pull/20714>`__: BUG: Restore vc141 support +* `#20724 <https://github.com/numpy/numpy/pull/20724>`__: BUG: Fix array dimensions solver for multidimensional arguments... +* `#20725 <https://github.com/numpy/numpy/pull/20725>`__: TYP: change type annotation for `__array_namespace__` to ModuleType +* `#20726 <https://github.com/numpy/numpy/pull/20726>`__: TYP, MAINT: Allow `ndindex` to accept integer tuples +* `#20757 <https://github.com/numpy/numpy/pull/20757>`__: BUG: Relax dtype identity check in reductions +* `#20763 <https://github.com/numpy/numpy/pull/20763>`__: TYP: Allow time manipulation functions to accept `date` and `timedelta`... +* `#20768 <https://github.com/numpy/numpy/pull/20768>`__: TYP: Relax the type of `ndarray.__array_finalize__` +* `#20795 <https://github.com/numpy/numpy/pull/20795>`__: MAINT: Raise RuntimeError if setuptools version is too recent. +* `#20796 <https://github.com/numpy/numpy/pull/20796>`__: BUG, DOC: Fixes SciPy docs build warnings +* `#20797 <https://github.com/numpy/numpy/pull/20797>`__: DOC: fix OpenBLAS version in release note +* `#20798 <https://github.com/numpy/numpy/pull/20798>`__: PERF: Optimize array check for bounded 0,1 values +* `#20805 <https://github.com/numpy/numpy/pull/20805>`__: BUG: Fix that reduce-likes honor out always (and live in the... +* `#20806 <https://github.com/numpy/numpy/pull/20806>`__: BUG: ``array_api.argsort(descending=True)`` respects relative... +* `#20807 <https://github.com/numpy/numpy/pull/20807>`__: BUG: Allow integer inputs for pow-related functions in `array_api` +* `#20814 <https://github.com/numpy/numpy/pull/20814>`__: DOC: Refer to NumPy, not pandas, in main page +* `#20815 <https://github.com/numpy/numpy/pull/20815>`__: DOC: Update Copyright to 2022 [License] +* `#20819 <https://github.com/numpy/numpy/pull/20819>`__: BUG: Return correctly shaped inverse indices in array_api set... diff --git a/doc/release/upcoming_changes/15844.new_feature.rst b/doc/release/upcoming_changes/15844.new_feature.rst new file mode 100644 index 000000000..f2746807b --- /dev/null +++ b/doc/release/upcoming_changes/15844.new_feature.rst @@ -0,0 +1,4 @@ +f2py supports reading access type attributes from derived type statements +------------------------------------------------------------------------- +As a result, one does not need to use `public` or `private` statements to +specify derived type access properties. diff --git a/doc/source/release.rst b/doc/source/release.rst index eb6437cf0..fe8027304 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -6,6 +6,7 @@ Release notes :maxdepth: 3 1.23.0 <release/1.23.0-notes> + 1.22.1 <release/1.22.1-notes> 1.22.0 <release/1.22.0-notes> 1.21.5 <release/1.21.5-notes> 1.21.4 <release/1.21.4-notes> diff --git a/doc/source/release/1.22.1-notes.rst b/doc/source/release/1.22.1-notes.rst new file mode 100644 index 000000000..0012f199e --- /dev/null +++ b/doc/source/release/1.22.1-notes.rst @@ -0,0 +1,63 @@ +.. currentmodule:: numpy + +========================== +NumPy 1.22.1 Release Notes +========================== + +The NumPy 1.22.1 is a maintenance release that fixes bugs discovered after the +1.22.0 release. Notable fixes are: + +- Fix f2PY docstring problems (SciPy) +- Fix reduction type problems (AstroPy) +- Fix various typing bugs. + +The Python versions supported for this release are 3.8-3.10. + + +Contributors +============ + +A total of 14 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Arryan Singh +* Bas van Beek +* Charles Harris +* Denis Laxalde +* Isuru Fernando +* Kevin Sheppard +* Matthew Barber +* Matti Picus +* Melissa Weber Mendonça +* Mukulika Pahari +* Omid Rajaei + +* Pearu Peterson +* Ralf Gommers +* Sebastian Berg + + +Pull requests merged +==================== + +A total of 20 pull requests were merged for this release. + +* `#20702 <https://github.com/numpy/numpy/pull/20702>`__: MAINT, DOC: Post 1.22.0 release fixes. +* `#20703 <https://github.com/numpy/numpy/pull/20703>`__: DOC, BUG: Use pngs instead of svgs. +* `#20704 <https://github.com/numpy/numpy/pull/20704>`__: DOC: Fixed the link on user-guide landing page +* `#20714 <https://github.com/numpy/numpy/pull/20714>`__: BUG: Restore vc141 support +* `#20724 <https://github.com/numpy/numpy/pull/20724>`__: BUG: Fix array dimensions solver for multidimensional arguments... +* `#20725 <https://github.com/numpy/numpy/pull/20725>`__: TYP: change type annotation for ``__array_namespace__`` to ModuleType +* `#20726 <https://github.com/numpy/numpy/pull/20726>`__: TYP, MAINT: Allow ``ndindex`` to accept integer tuples +* `#20757 <https://github.com/numpy/numpy/pull/20757>`__: BUG: Relax dtype identity check in reductions +* `#20763 <https://github.com/numpy/numpy/pull/20763>`__: TYP: Allow time manipulation functions to accept ``date`` and ``timedelta``... +* `#20768 <https://github.com/numpy/numpy/pull/20768>`__: TYP: Relax the type of ``ndarray.__array_finalize__`` +* `#20795 <https://github.com/numpy/numpy/pull/20795>`__: MAINT: Raise RuntimeError if setuptools version is too recent. +* `#20796 <https://github.com/numpy/numpy/pull/20796>`__: BUG, DOC: Fixes SciPy docs build warnings +* `#20797 <https://github.com/numpy/numpy/pull/20797>`__: DOC: fix OpenBLAS version in release note +* `#20798 <https://github.com/numpy/numpy/pull/20798>`__: PERF: Optimize array check for bounded 0,1 values +* `#20805 <https://github.com/numpy/numpy/pull/20805>`__: BUG: Fix that reduce-likes honor out always (and live in the... +* `#20806 <https://github.com/numpy/numpy/pull/20806>`__: BUG: ``array_api.argsort(descending=True)`` respects relative... +* `#20807 <https://github.com/numpy/numpy/pull/20807>`__: BUG: Allow integer inputs for pow-related functions in ``array_api`` +* `#20814 <https://github.com/numpy/numpy/pull/20814>`__: DOC: Refer to NumPy, not pandas, in main page +* `#20815 <https://github.com/numpy/numpy/pull/20815>`__: DOC: Update Copyright to 2022 [License] +* `#20819 <https://github.com/numpy/numpy/pull/20819>`__: BUG: Return correctly shaped inverse indices in array_api set... diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 8e92e0f42..451a29a02 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -1451,13 +1451,13 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeType, _DType_co]): def size(self) -> int: ... @property def real( - self: NDArray[_SupportsReal[_ScalarType]], # type: ignore[type-var] + self: ndarray[_ShapeType, dtype[_SupportsReal[_ScalarType]]], # type: ignore[type-var] ) -> ndarray[_ShapeType, _dtype[_ScalarType]]: ... @real.setter def real(self, value: ArrayLike) -> None: ... @property def imag( - self: NDArray[_SupportsImag[_ScalarType]], # type: ignore[type-var] + self: ndarray[_ShapeType, dtype[_SupportsImag[_ScalarType]]], # type: ignore[type-var] ) -> ndarray[_ShapeType, _dtype[_ScalarType]]: ... @imag.setter def imag(self, value: ArrayLike) -> None: ... diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index 88794ca07..1d6234e20 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -180,12 +180,6 @@ defined(__MINGW32__) || defined(__MINGW64__) #include <io.h> -/* mingw based on 3.4.5 has lseek but not ftell/fseek */ -#if defined(__MINGW32__) || defined(__MINGW64__) -extern int __cdecl _fseeki64(FILE *, long long, int); -extern long long __cdecl _ftelli64(FILE *); -#endif - #define npy_fseek _fseeki64 #define npy_ftell _ftelli64 #define npy_lseek _lseeki64 diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 1d7050bbe..bb0633100 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -173,7 +173,11 @@ typedef struct _tagPyUFuncObject { * but this was never implemented. (This is also why the above * selector is called the "legacy" selector.) */ - vectorcallfunc vectorcall; + #ifndef Py_LIMITED_API + vectorcallfunc vectorcall; + #else + void *vectorcall; + #endif /* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */ void *_always_null_previously_masked_innerloop_selector; diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 22cac1e9a..3519ed6cf 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -948,8 +948,8 @@ def configuration(parent_package='',top_path=None): join('src', 'common', 'npy_sort.h.src'), join('src', 'npysort', 'quicksort.c.src'), join('src', 'npysort', 'mergesort.c.src'), - join('src', 'npysort', 'timsort.c.src'), - join('src', 'npysort', 'heapsort.c.src'), + join('src', 'npysort', 'timsort.cpp'), + join('src', 'npysort', 'heapsort.cpp'), join('src', 'npysort', 'radixsort.cpp'), join('src', 'common', 'npy_partition.h.src'), join('src', 'npysort', 'selection.c.src'), diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src index 84de9a059..fabec069c 100644 --- a/numpy/core/src/_simd/_simd.dispatch.c.src +++ b/numpy/core/src/_simd/_simd.dispatch.c.src @@ -381,7 +381,7 @@ SIMD_IMPL_INTRIN_1(sumup_@sfx@, @esfx@, v@sfx@) ***************************/ #if @fp_only@ /**begin repeat1 - * #intrin = sqrt, recip, abs, square, ceil, trunc# + * #intrin = sqrt, recip, abs, square, rint, ceil, trunc, floor# */ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@) /**end repeat1**/ @@ -615,7 +615,7 @@ SIMD_INTRIN_DEF(sumup_@sfx@) ***************************/ #if @fp_only@ /**begin repeat1 - * #intrin = sqrt, recip, abs, square, ceil, trunc# + * #intrin = sqrt, recip, abs, square, rint, ceil, trunc, floor# */ SIMD_INTRIN_DEF(@intrin@_@sfx@) /**end repeat1**/ diff --git a/numpy/core/src/common/numpy_tag.h b/numpy/core/src/common/numpy_tag.h index 60e9b02cd..ee0c36cac 100644 --- a/numpy/core/src/common/numpy_tag.h +++ b/numpy/core/src/common/numpy_tag.h @@ -220,6 +220,40 @@ struct timedelta_tag : date_tag { } }; +struct string_tag { + using type = npy_char; + static constexpr NPY_TYPES type_value = NPY_STRING; + static int less(type const* a, type const* b, size_t len) { + return STRING_LT(a, b, len); + } + static int less_equal(type const* a, type const* b, size_t len) { + return !less(b, a, len); + } + static void swap(type* a, type* b, size_t len) { + STRING_SWAP(a, b, len); + } + static void copy(type * a, type const* b, size_t len) { + STRING_COPY(a, b, len); + } +}; + +struct unicode_tag { + using type = npy_ucs4; + static constexpr NPY_TYPES type_value = NPY_UNICODE; + static int less(type const* a, type const* b, size_t len) { + return UNICODE_LT(a, b, len); + } + static int less_equal(type const* a, type const* b, size_t len) { + return !less(b, a, len); + } + static void swap(type* a, type* b, size_t len) { + UNICODE_SWAP(a, b, len); + } + static void copy(type * a, type const* b, size_t len) { + UNICODE_COPY(a, b, len); + } +}; + } // namespace npy #endif diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h index ec15e50e1..deaf4ad11 100644 --- a/numpy/core/src/common/simd/avx2/math.h +++ b/numpy/core/src/common/simd/avx2/math.h @@ -42,7 +42,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 _mm256_max_pd // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) { __m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q); @@ -76,7 +76,7 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b) #define npyv_min_f64 _mm256_min_pd // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) { __m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q); @@ -105,6 +105,10 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return _mm256_blendv_epi8(a, b, _mm256_cmpgt_epi64(a, b)); } +// round to nearest intger even +#define npyv_rint_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_NEAREST_INT) +#define npyv_rint_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_NEAREST_INT) + // ceil #define npyv_ceil_f32 _mm256_ceil_ps #define npyv_ceil_f64 _mm256_ceil_pd @@ -113,4 +117,8 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) #define npyv_trunc_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_ZERO) #define npyv_trunc_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_ZERO) +// floor +#define npyv_floor_f32 _mm256_floor_ps +#define npyv_floor_f64 _mm256_floor_pd + #endif // _NPY_SIMD_AVX2_MATH_H diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h index f30e50ad0..5a6cb6dcd 100644 --- a/numpy/core/src/common/simd/avx512/math.h +++ b/numpy/core/src/common/simd/avx512/math.h @@ -51,7 +51,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 _mm512_max_pd // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) { __mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q); @@ -84,7 +84,7 @@ NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b) #define npyv_min_f64 _mm512_min_pd // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) { __mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q); @@ -112,6 +112,10 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b) #define npyv_min_u64 _mm512_min_epu64 #define npyv_min_s64 _mm512_min_epi64 +// round to nearest integer even +#define npyv_rint_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_NEAREST_INT) +#define npyv_rint_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_NEAREST_INT) + // ceil #define npyv_ceil_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_POS_INF) #define npyv_ceil_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_POS_INF) @@ -120,4 +124,8 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b) #define npyv_trunc_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_ZERO) #define npyv_trunc_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_ZERO) +// floor +#define npyv_floor_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_NEG_INF) +#define npyv_floor_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_NEG_INF) + #endif // _NPY_SIMD_AVX512_MATH_H diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h index 19e5cd846..4607d6f27 100644 --- a/numpy/core/src/common/simd/neon/math.h +++ b/numpy/core/src/common/simd/neon/math.h @@ -153,6 +153,33 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return vbslq_s64(npyv_cmplt_s64(a, b), a, b); } +// round to nearest integer even +NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a) +{ +#ifdef NPY_HAVE_ASIMD + return vrndnq_f32(a); +#else + // ARMv7 NEON only supports fp to int truncate conversion. + // a magic trick of adding 1.5 * 2**23 is used for rounding + // to nearest even and then substract this magic number to get + // the integer. + const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f)); + const npyv_f32 magic = vdupq_n_f32(12582912.0f); // 1.5 * 2**23 + npyv_f32 round = vsubq_f32(vaddq_f32(a, magic), magic); + npyv_b32 overflow = vcleq_f32(vabsq_f32(a), vreinterpretq_f32_u32(vdupq_n_u32(0x4b000000))); + round = vbslq_f32(overflow, round, a); + // signed zero + round = vreinterpretq_f32_s32(vorrq_s32( + vreinterpretq_s32_f32(round), + vandq_s32(vreinterpretq_s32_f32(a), szero) + )); + return round; +#endif +} +#if NPY_SIMD_F64 + #define npyv_rint_f64 vrndnq_f64 +#endif // NPY_SIMD_F64 + // ceil #ifdef NPY_HAVE_ASIMD #define npyv_ceil_f32 vrndpq_f32 @@ -223,4 +250,36 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) #define npyv_trunc_f64 vrndq_f64 #endif // NPY_SIMD_F64 +// floor +#ifdef NPY_HAVE_ASIMD + #define npyv_floor_f32 vrndmq_f32 +#else + NPY_FINLINE npyv_f32 npyv_floor_f32(npyv_f32 a) + { + const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f)); + const npyv_u32 one = vreinterpretq_u32_f32(vdupq_n_f32(1.0f)); + const npyv_s32 max_int = vdupq_n_s32(0x7fffffff); + + npyv_s32 roundi = vcvtq_s32_f32(a); + npyv_f32 round = vcvtq_f32_s32(roundi); + npyv_f32 floor = vsubq_f32(round, vreinterpretq_f32_u32( + vandq_u32(vcgtq_f32(round, a), one) + )); + // respect signed zero + npyv_f32 rzero = vreinterpretq_f32_s32(vorrq_s32( + vreinterpretq_s32_f32(floor), + vandq_s32(vreinterpretq_s32_f32(a), szero) + )); + npyv_u32 nnan = npyv_notnan_f32(a); + npyv_u32 overflow = vorrq_u32( + vceqq_s32(roundi, szero), vceqq_s32(roundi, max_int) + ); + + return vbslq_f32(vbicq_u32(nnan, overflow), rzero, a); + } +#endif // NPY_HAVE_ASIMD +#if NPY_SIMD_F64 + #define npyv_floor_f64 vrndmq_f64 +#endif // NPY_SIMD_F64 + #endif // _NPY_SIMD_NEON_MATH_H diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h index 5daf7711e..e4b77b671 100644 --- a/numpy/core/src/common/simd/sse/math.h +++ b/numpy/core/src/common/simd/sse/math.h @@ -42,7 +42,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 _mm_max_pd // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) { __m128 nn = _mm_cmpord_ps(b, b); @@ -95,7 +95,7 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b) #define npyv_min_f64 _mm_min_pd // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) { __m128 nn = _mm_cmpord_ps(b, b); @@ -143,6 +143,38 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return npyv_select_s64(npyv_cmplt_s64(a, b), a, b); } +// round to nearest integer even +NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a) +{ +#ifdef NPY_HAVE_SSE41 + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); +#else + const npyv_f32 szero = _mm_set1_ps(-0.0f); + __m128i roundi = _mm_cvtps_epi32(a); + __m128i overflow = _mm_cmpeq_epi32(roundi, _mm_castps_si128(szero)); + __m128 r = _mm_cvtepi32_ps(roundi); + // respect sign of zero + r = _mm_or_ps(r, _mm_and_ps(a, szero)); + return npyv_select_f32(overflow, a, r); +#endif +} + +// round to nearest integer even +NPY_FINLINE npyv_f64 npyv_rint_f64(npyv_f64 a) +{ +#ifdef NPY_HAVE_SSE41 + return _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT); +#else + const npyv_f64 szero = _mm_set1_pd(-0.0); + const npyv_f64 two_power_52 = _mm_set1_pd(0x10000000000000); + npyv_f64 sign_two52 = _mm_or_pd(two_power_52, _mm_and_pd(a, szero)); + // round by add magic number 2^52 + npyv_f64 round = _mm_sub_pd(_mm_add_pd(a, sign_two52), sign_two52); + // respect signed zero, e.g. -0.5 -> -0.0 + return _mm_or_pd(round, _mm_and_pd(a, szero)); +#endif +} + // ceil #ifdef NPY_HAVE_SSE41 #define npyv_ceil_f32 _mm_ceil_ps @@ -202,4 +234,23 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) } #endif +// floor +#ifdef NPY_HAVE_SSE41 + #define npyv_floor_f32 _mm_floor_ps + #define npyv_floor_f64 _mm_floor_pd +#else + NPY_FINLINE npyv_f32 npyv_floor_f32(npyv_f32 a) + { + const npyv_f32 one = _mm_set1_ps(1.0f); + npyv_f32 round = npyv_rint_f32(a); + return _mm_sub_ps(round, _mm_and_ps(_mm_cmpgt_ps(round, a), one)); + } + NPY_FINLINE npyv_f64 npyv_floor_f64(npyv_f64 a) + { + const npyv_f64 one = _mm_set1_pd(1.0); + npyv_f64 round = npyv_rint_f64(a); + return _mm_sub_pd(round, _mm_and_pd(_mm_cmpgt_pd(round, a), one)); + } +#endif // NPY_HAVE_SSE41 + #endif // _NPY_SIMD_SSE_MATH_H diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h index d138cae8a..444bc9e54 100644 --- a/numpy/core/src/common/simd/vsx/math.h +++ b/numpy/core/src/common/simd/vsx/math.h @@ -38,7 +38,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_max_f64 vec_max // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. #define npyv_maxp_f32 vec_max #define npyv_maxp_f64 vec_max // Maximum, integer operations @@ -56,7 +56,7 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_min_f64 vec_min // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. #define npyv_minp_f32 vec_min #define npyv_minp_f64 vec_min // Minimum, integer operations @@ -69,6 +69,10 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_min_u64 vec_min #define npyv_min_s64 vec_min +// round to nearest int even +#define npyv_rint_f32 vec_rint +#define npyv_rint_f64 vec_rint + // ceil #define npyv_ceil_f32 vec_ceil #define npyv_ceil_f64 vec_ceil @@ -77,4 +81,8 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_trunc_f32 vec_trunc #define npyv_trunc_f64 vec_trunc +// floor +#define npyv_floor_f32 vec_floor +#define npyv_floor_f64 vec_floor + #endif // _NPY_SIMD_VSX_MATH_H diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 71808cc48..71401c60e 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -2849,7 +2849,7 @@ static int #define LT(a,b) ((a) < (b) || ((b) != (b) && (a) ==(a))) static int -@TYPE@_compare(@type@ *pa, @type@ *pb) +@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap)) { const @type@ a = *pa; const @type@ b = *pb; @@ -2869,7 +2869,7 @@ static int static int -C@TYPE@_compare(@type@ *pa, @type@ *pb) +C@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap)) { const @type@ ar = pa[0]; const @type@ ai = pa[1]; @@ -2924,7 +2924,7 @@ C@TYPE@_compare(@type@ *pa, @type@ *pb) */ static int -@TYPE@_compare(@type@ *pa, @type@ *pb) +@TYPE@_compare(@type@ *pa, @type@ *pb, PyArrayObject *NPY_UNUSED(ap)) { const @type@ a = *pa; const @type@ b = *pb; diff --git a/numpy/core/src/npysort/heapsort.c.src b/numpy/core/src/npysort/heapsort.c.src deleted file mode 100644 index 4bfea1388..000000000 --- a/numpy/core/src/npysort/heapsort.c.src +++ /dev/null @@ -1,402 +0,0 @@ -/* -*- c -*- */ - -/* - * The purpose of this module is to add faster sort functions - * that are type-specific. This is done by altering the - * function table for the builtin descriptors. - * - * These sorting functions are copied almost directly from numarray - * with a few modifications (complex comparisons compare the imaginary - * part if the real parts are equal, for example), and the names - * are changed. - * - * The original sorting code is due to Charles R. Harris who wrote - * it for numarray. - */ - -/* - * Quick sort is usually the fastest, but the worst case scenario can - * be slower than the merge and heap sorts. The merge sort requires - * extra memory and so for large arrays may not be useful. - * - * The merge sort is *stable*, meaning that equal components - * are unmoved from their entry versions, so it can be used to - * implement lexigraphic sorting on multiple keys. - * - * The heap sort is included for completeness. - */ - -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "npy_sort.h" -#include "npysort_common.h" -#include <stdlib.h> - -#define NOT_USED NPY_UNUSED(unused) -#define PYA_QS_STACK 100 -#define SMALL_QUICKSORT 15 -#define SMALL_MERGESORT 20 -#define SMALL_STRING 16 - - -/* - ***************************************************************************** - ** NUMERIC SORTS ** - ***************************************************************************** - */ - - -/**begin repeat - * - * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, - * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# - * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, half, float, double, longdouble, - * cfloat, cdouble, clongdouble, datetime, timedelta# - * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, - * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, - * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, - * npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta# - */ - -NPY_NO_EXPORT int -heapsort_@suff@(void *start, npy_intp n, void *NOT_USED) -{ - @type@ tmp, *a; - npy_intp i,j,l; - - /* The array needs to be offset by one for heapsort indexing */ - a = (@type@ *)start - 1; - - for (l = n>>1; l > 0; --l) { - tmp = a[l]; - for (i = l, j = l<<1; j <= n;) { - if (j < n && @TYPE@_LT(a[j], a[j+1])) { - j += 1; - } - if (@TYPE@_LT(tmp, a[j])) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - for (; n > 1;) { - tmp = a[n]; - a[n] = a[1]; - n -= 1; - for (i = 1, j = 2; j <= n;) { - if (j < n && @TYPE@_LT(a[j], a[j+1])) { - j++; - } - if (@TYPE@_LT(tmp, a[j])) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - return 0; -} - - -NPY_NO_EXPORT int -aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *NOT_USED) -{ - @type@ *v = vv; - npy_intp *a, i,j,l, tmp; - /* The arrays need to be offset by one for heapsort indexing */ - a = tosort - 1; - - for (l = n>>1; l > 0; --l) { - tmp = a[l]; - for (i = l, j = l<<1; j <= n;) { - if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) { - j += 1; - } - if (@TYPE@_LT(v[tmp], v[a[j]])) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - for (; n > 1;) { - tmp = a[n]; - a[n] = a[1]; - n -= 1; - for (i = 1, j = 2; j <= n;) { - if (j < n && @TYPE@_LT(v[a[j]], v[a[j+1]])) { - j++; - } - if (@TYPE@_LT(v[tmp], v[a[j]])) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - return 0; -} - -/**end repeat**/ - - -/* - ***************************************************************************** - ** STRING SORTS ** - ***************************************************************************** - */ - - -/**begin repeat - * - * #TYPE = STRING, UNICODE# - * #suff = string, unicode# - * #type = npy_char, npy_ucs4# - */ - -NPY_NO_EXPORT int -heapsort_@suff@(void *start, npy_intp n, void *varr) -{ - PyArrayObject *arr = varr; - size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); - @type@ *tmp = malloc(PyArray_ITEMSIZE(arr)); - @type@ *a = (@type@ *)start - len; - npy_intp i, j, l; - - if (tmp == NULL) { - return -NPY_ENOMEM; - } - - for (l = n>>1; l > 0; --l) { - @TYPE@_COPY(tmp, a + l*len, len); - for (i = l, j = l<<1; j <= n;) { - if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len)) - j += 1; - if (@TYPE@_LT(tmp, a + j*len, len)) { - @TYPE@_COPY(a + i*len, a + j*len, len); - i = j; - j += j; - } - else { - break; - } - } - @TYPE@_COPY(a + i*len, tmp, len); - } - - for (; n > 1;) { - @TYPE@_COPY(tmp, a + n*len, len); - @TYPE@_COPY(a + n*len, a + len, len); - n -= 1; - for (i = 1, j = 2; j <= n;) { - if (j < n && @TYPE@_LT(a + j*len, a + (j+1)*len, len)) - j++; - if (@TYPE@_LT(tmp, a + j*len, len)) { - @TYPE@_COPY(a + i*len, a + j*len, len); - i = j; - j += j; - } - else { - break; - } - } - @TYPE@_COPY(a + i*len, tmp, len); - } - - free(tmp); - return 0; -} - - -NPY_NO_EXPORT int -aheapsort_@suff@(void *vv, npy_intp *tosort, npy_intp n, void *varr) -{ - @type@ *v = vv; - PyArrayObject *arr = varr; - size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); - npy_intp *a, i,j,l, tmp; - - /* The array needs to be offset by one for heapsort indexing */ - a = tosort - 1; - - for (l = n>>1; l > 0; --l) { - tmp = a[l]; - for (i = l, j = l<<1; j <= n;) { - if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len)) - j += 1; - if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - for (; n > 1;) { - tmp = a[n]; - a[n] = a[1]; - n -= 1; - for (i = 1, j = 2; j <= n;) { - if (j < n && @TYPE@_LT(v + a[j]*len, v + a[j+1]*len, len)) - j++; - if (@TYPE@_LT(v + tmp*len, v + a[j]*len, len)) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - return 0; -} - -/**end repeat**/ - - -/* - ***************************************************************************** - ** GENERIC SORT ** - ***************************************************************************** - */ - - -NPY_NO_EXPORT int -npy_heapsort(void *start, npy_intp num, void *varr) -{ - PyArrayObject *arr = varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; - char *tmp = malloc(elsize); - char *a = (char *)start - elsize; - npy_intp i, j, l; - - if (tmp == NULL) { - return -NPY_ENOMEM; - } - - for (l = num >> 1; l > 0; --l) { - GENERIC_COPY(tmp, a + l*elsize, elsize); - for (i = l, j = l << 1; j <= num;) { - if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) { - ++j; - } - if (cmp(tmp, a + j*elsize, arr) < 0) { - GENERIC_COPY(a + i*elsize, a + j*elsize, elsize); - i = j; - j += j; - } - else { - break; - } - } - GENERIC_COPY(a + i*elsize, tmp, elsize); - } - - for (; num > 1;) { - GENERIC_COPY(tmp, a + num*elsize, elsize); - GENERIC_COPY(a + num*elsize, a + elsize, elsize); - num -= 1; - for (i = 1, j = 2; j <= num;) { - if (j < num && cmp(a + j*elsize, a + (j+1)*elsize, arr) < 0) { - ++j; - } - if (cmp(tmp, a + j*elsize, arr) < 0) { - GENERIC_COPY(a + i*elsize, a + j*elsize, elsize); - i = j; - j += j; - } - else { - break; - } - } - GENERIC_COPY(a + i*elsize, tmp, elsize); - } - - free(tmp); - return 0; -} - - -NPY_NO_EXPORT int -npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) -{ - char *v = vv; - PyArrayObject *arr = varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; - npy_intp *a, i, j, l, tmp; - - /* The array needs to be offset by one for heapsort indexing */ - a = tosort - 1; - - for (l = n >> 1; l > 0; --l) { - tmp = a[l]; - for (i = l, j = l<<1; j <= n;) { - if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) { - ++j; - } - if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - for (; n > 1;) { - tmp = a[n]; - a[n] = a[1]; - n -= 1; - for (i = 1, j = 2; j <= n;) { - if (j < n && cmp(v + a[j]*elsize, v + a[j+1]*elsize, arr) < 0) { - ++j; - } - if (cmp(v + tmp*elsize, v + a[j]*elsize, arr) < 0) { - a[i] = a[j]; - i = j; - j += j; - } - else { - break; - } - } - a[i] = tmp; - } - - return 0; -} diff --git a/numpy/core/src/npysort/heapsort.cpp b/numpy/core/src/npysort/heapsort.cpp new file mode 100644 index 000000000..3893bf5c2 --- /dev/null +++ b/numpy/core/src/npysort/heapsort.cpp @@ -0,0 +1,619 @@ +/* -*- c -*- */ + +/* + * The purpose of this module is to add faster sort functions + * that are type-specific. This is done by altering the + * function table for the builtin descriptors. + * + * These sorting functions are copied almost directly from numarray + * with a few modifications (complex comparisons compare the imaginary + * part if the real parts are equal, for example), and the names + * are changed. + * + * The original sorting code is due to Charles R. Harris who wrote + * it for numarray. + */ + +/* + * Quick sort is usually the fastest, but the worst case scenario can + * be slower than the merge and heap sorts. The merge sort requires + * extra memory and so for large arrays may not be useful. + * + * The merge sort is *stable*, meaning that equal components + * are unmoved from their entry versions, so it can be used to + * implement lexigraphic sorting on multiple keys. + * + * The heap sort is included for completeness. + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" +#include "numpy_tag.h" + +#include <cstdlib> + +#define NOT_USED NPY_UNUSED(unused) +#define PYA_QS_STACK 100 +#define SMALL_QUICKSORT 15 +#define SMALL_MERGESORT 20 +#define SMALL_STRING 16 + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + +template <typename Tag, typename type> +static int +heapsort_(type *start, npy_intp n) +{ + type tmp, *a; + npy_intp i, j, l; + + /* The array needs to be offset by one for heapsort indexing */ + a = start - 1; + + for (l = n >> 1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l << 1; j <= n;) { + if (j < n && Tag::less(a[j], a[j + 1])) { + j += 1; + } + if (Tag::less(tmp, a[j])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && Tag::less(a[j], a[j + 1])) { + j++; + } + if (Tag::less(tmp, a[j])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} + +template <typename Tag, typename type> +static int +aheapsort_(type *vv, npy_intp *tosort, npy_intp n) +{ + type *v = vv; + npy_intp *a, i, j, l, tmp; + /* The arrays need to be offset by one for heapsort indexing */ + a = tosort - 1; + + for (l = n >> 1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l << 1; j <= n;) { + if (j < n && Tag::less(v[a[j]], v[a[j + 1]])) { + j += 1; + } + if (Tag::less(v[tmp], v[a[j]])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && Tag::less(v[a[j]], v[a[j + 1]])) { + j++; + } + if (Tag::less(v[tmp], v[a[j]])) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + +template <typename Tag, typename type> +NPY_NO_EXPORT int +string_heapsort_(type *start, npy_intp n, void *varr) +{ + PyArrayObject *arr = (PyArrayObject *)varr; + size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); + type *tmp = (type *)malloc(PyArray_ITEMSIZE(arr)); + type *a = (type *)start - len; + npy_intp i, j, l; + + if (tmp == NULL) { + return -NPY_ENOMEM; + } + + for (l = n >> 1; l > 0; --l) { + Tag::copy(tmp, a + l * len, len); + for (i = l, j = l << 1; j <= n;) { + if (j < n && Tag::less(a + j * len, a + (j + 1) * len, len)) + j += 1; + if (Tag::less(tmp, a + j * len, len)) { + Tag::copy(a + i * len, a + j * len, len); + i = j; + j += j; + } + else { + break; + } + } + Tag::copy(a + i * len, tmp, len); + } + + for (; n > 1;) { + Tag::copy(tmp, a + n * len, len); + Tag::copy(a + n * len, a + len, len); + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && Tag::less(a + j * len, a + (j + 1) * len, len)) + j++; + if (Tag::less(tmp, a + j * len, len)) { + Tag::copy(a + i * len, a + j * len, len); + i = j; + j += j; + } + else { + break; + } + } + Tag::copy(a + i * len, tmp, len); + } + + free(tmp); + return 0; +} + +template <typename Tag, typename type> +NPY_NO_EXPORT int +string_aheapsort_(type *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + type *v = vv; + PyArrayObject *arr = (PyArrayObject *)varr; + size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); + npy_intp *a, i, j, l, tmp; + + /* The array needs to be offset by one for heapsort indexing */ + a = tosort - 1; + + for (l = n >> 1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l << 1; j <= n;) { + if (j < n && Tag::less(v + a[j] * len, v + a[j + 1] * len, len)) + j += 1; + if (Tag::less(v + tmp * len, v + a[j] * len, len)) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && Tag::less(v + a[j] * len, v + a[j + 1] * len, len)) + j++; + if (Tag::less(v + tmp * len, v + a[j] * len, len)) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} + +/**end repeat**/ + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + +NPY_NO_EXPORT int +npy_heapsort(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = (PyArrayObject *)varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + char *tmp = (char *)malloc(elsize); + char *a = (char *)start - elsize; + npy_intp i, j, l; + + if (tmp == NULL) { + return -NPY_ENOMEM; + } + + for (l = num >> 1; l > 0; --l) { + GENERIC_COPY(tmp, a + l * elsize, elsize); + for (i = l, j = l << 1; j <= num;) { + if (j < num && + cmp(a + j * elsize, a + (j + 1) * elsize, arr) < 0) { + ++j; + } + if (cmp(tmp, a + j * elsize, arr) < 0) { + GENERIC_COPY(a + i * elsize, a + j * elsize, elsize); + i = j; + j += j; + } + else { + break; + } + } + GENERIC_COPY(a + i * elsize, tmp, elsize); + } + + for (; num > 1;) { + GENERIC_COPY(tmp, a + num * elsize, elsize); + GENERIC_COPY(a + num * elsize, a + elsize, elsize); + num -= 1; + for (i = 1, j = 2; j <= num;) { + if (j < num && + cmp(a + j * elsize, a + (j + 1) * elsize, arr) < 0) { + ++j; + } + if (cmp(tmp, a + j * elsize, arr) < 0) { + GENERIC_COPY(a + i * elsize, a + j * elsize, elsize); + i = j; + j += j; + } + else { + break; + } + } + GENERIC_COPY(a + i * elsize, tmp, elsize); + } + + free(tmp); + return 0; +} + +NPY_NO_EXPORT int +npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + char *v = (char *)vv; + PyArrayObject *arr = (PyArrayObject *)varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + npy_intp *a, i, j, l, tmp; + + /* The array needs to be offset by one for heapsort indexing */ + a = tosort - 1; + + for (l = n >> 1; l > 0; --l) { + tmp = a[l]; + for (i = l, j = l << 1; j <= n;) { + if (j < n && + cmp(v + a[j] * elsize, v + a[j + 1] * elsize, arr) < 0) { + ++j; + } + if (cmp(v + tmp * elsize, v + a[j] * elsize, arr) < 0) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + for (; n > 1;) { + tmp = a[n]; + a[n] = a[1]; + n -= 1; + for (i = 1, j = 2; j <= n;) { + if (j < n && + cmp(v + a[j] * elsize, v + a[j + 1] * elsize, arr) < 0) { + ++j; + } + if (cmp(v + tmp * elsize, v + a[j] * elsize, arr) < 0) { + a[i] = a[j]; + i = j; + j += j; + } + else { + break; + } + } + a[i] = tmp; + } + + return 0; +} + +/*************************************** + * C > C++ dispatch + ***************************************/ + +NPY_NO_EXPORT int +heapsort_bool(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::bool_tag>((npy_bool *)start, n); +} +NPY_NO_EXPORT int +heapsort_byte(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::byte_tag>((npy_byte *)start, n); +} +NPY_NO_EXPORT int +heapsort_ubyte(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::ubyte_tag>((npy_ubyte *)start, n); +} +NPY_NO_EXPORT int +heapsort_short(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::short_tag>((npy_short *)start, n); +} +NPY_NO_EXPORT int +heapsort_ushort(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::ushort_tag>((npy_ushort *)start, n); +} +NPY_NO_EXPORT int +heapsort_int(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::int_tag>((npy_int *)start, n); +} +NPY_NO_EXPORT int +heapsort_uint(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::uint_tag>((npy_uint *)start, n); +} +NPY_NO_EXPORT int +heapsort_long(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::long_tag>((npy_long *)start, n); +} +NPY_NO_EXPORT int +heapsort_ulong(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::ulong_tag>((npy_ulong *)start, n); +} +NPY_NO_EXPORT int +heapsort_longlong(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::longlong_tag>((npy_longlong *)start, n); +} +NPY_NO_EXPORT int +heapsort_ulonglong(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::ulonglong_tag>((npy_ulonglong *)start, n); +} +NPY_NO_EXPORT int +heapsort_half(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::half_tag>((npy_half *)start, n); +} +NPY_NO_EXPORT int +heapsort_float(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::float_tag>((npy_float *)start, n); +} +NPY_NO_EXPORT int +heapsort_double(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::double_tag>((npy_double *)start, n); +} +NPY_NO_EXPORT int +heapsort_longdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::longdouble_tag>((npy_longdouble *)start, n); +} +NPY_NO_EXPORT int +heapsort_cfloat(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::cfloat_tag>((npy_cfloat *)start, n); +} +NPY_NO_EXPORT int +heapsort_cdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::cdouble_tag>((npy_cdouble *)start, n); +} +NPY_NO_EXPORT int +heapsort_clongdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::clongdouble_tag>((npy_clongdouble *)start, n); +} +NPY_NO_EXPORT int +heapsort_datetime(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::datetime_tag>((npy_datetime *)start, n); +} +NPY_NO_EXPORT int +heapsort_timedelta(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return heapsort_<npy::timedelta_tag>((npy_timedelta *)start, n); +} + +NPY_NO_EXPORT int +aheapsort_bool(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::bool_tag>((npy_bool *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_byte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::byte_tag>((npy_byte *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_ubyte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::ubyte_tag>((npy_ubyte *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_short(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::short_tag>((npy_short *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_ushort(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::ushort_tag>((npy_ushort *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_int(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::int_tag>((npy_int *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_uint(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::uint_tag>((npy_uint *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_long(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::long_tag>((npy_long *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_ulong(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::ulong_tag>((npy_ulong *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_longlong(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::longlong_tag>((npy_longlong *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_ulonglong(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::ulonglong_tag>((npy_ulonglong *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_half(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::half_tag>((npy_half *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_float(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::float_tag>((npy_float *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_double(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::double_tag>((npy_double *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_longdouble(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::longdouble_tag>((npy_longdouble *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_cfloat(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::cfloat_tag>((npy_cfloat *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_cdouble(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::cdouble_tag>((npy_cdouble *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_clongdouble(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::clongdouble_tag>((npy_clongdouble *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_datetime(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::datetime_tag>((npy_datetime *)vv, tosort, n); +} +NPY_NO_EXPORT int +aheapsort_timedelta(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aheapsort_<npy::timedelta_tag>((npy_timedelta *)vv, tosort, n); +} + +NPY_NO_EXPORT int +heapsort_string(void *start, npy_intp n, void *varr) +{ + return string_heapsort_<npy::string_tag>((npy_char *)start, n, varr); +} +NPY_NO_EXPORT int +heapsort_unicode(void *start, npy_intp n, void *varr) +{ + return string_heapsort_<npy::unicode_tag>((npy_ucs4 *)start, n, varr); +} + +NPY_NO_EXPORT int +aheapsort_string(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + return string_aheapsort_<npy::string_tag>((npy_char *)vv, tosort, n, varr); +} +NPY_NO_EXPORT int +aheapsort_unicode(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + return string_aheapsort_<npy::unicode_tag>((npy_ucs4 *)vv, tosort, n, + varr); +} diff --git a/numpy/core/src/npysort/npysort_common.h b/numpy/core/src/npysort/npysort_common.h index a537fcd08..ab9f456b6 100644 --- a/numpy/core/src/npysort/npysort_common.h +++ b/numpy/core/src/npysort/npysort_common.h @@ -259,7 +259,7 @@ CLONGDOUBLE_LT(npy_clongdouble a, npy_clongdouble b) NPY_INLINE static void -STRING_COPY(char *s1, char *s2, size_t len) +STRING_COPY(char *s1, char const*s2, size_t len) { memcpy(s1, s2, len); } @@ -295,7 +295,7 @@ STRING_LT(const char *s1, const char *s2, size_t len) NPY_INLINE static void -UNICODE_COPY(npy_ucs4 *s1, npy_ucs4 *s2, size_t len) +UNICODE_COPY(npy_ucs4 *s1, npy_ucs4 const *s2, size_t len) { while(len--) { *s1++ = *s2++; diff --git a/numpy/core/src/npysort/timsort.c.src b/numpy/core/src/npysort/timsort.cpp index 5298f5a1d..8bcd90061 100644 --- a/numpy/core/src/npysort/timsort.c.src +++ b/numpy/core/src/npysort/timsort.cpp @@ -26,7 +26,6 @@ * The heap sort is included for completeness. */ - /* For details of Timsort, refer to * https://github.com/python/cpython/blob/3.7/Objects/listsort.txt */ @@ -35,14 +34,16 @@ #include "npy_sort.h" #include "npysort_common.h" -#include <stdlib.h> +#include "numpy_tag.h" + +#include <cstdlib> +#include <utility> /* enough for 32 * 1.618 ** 128 elements */ #define TIMSORT_STACK_SIZE 128 - - -static npy_intp compute_min_run(npy_intp num) +static npy_intp +compute_min_run(npy_intp num) { npy_intp r = 0; @@ -59,14 +60,12 @@ typedef struct { npy_intp l; /* length */ } run; - /* buffer for argsort. Declared here to avoid multiple declarations. */ typedef struct { npy_intp *pw; npy_intp size; } buffer_intp; - /* buffer method */ static NPY_INLINE int resize_buffer_intp(buffer_intp *buffer, npy_intp new_size) @@ -76,16 +75,19 @@ resize_buffer_intp(buffer_intp *buffer, npy_intp new_size) } if (NPY_UNLIKELY(buffer->pw == NULL)) { - buffer->pw = malloc(new_size * sizeof(npy_intp)); - } else { - buffer->pw = realloc(buffer->pw, new_size * sizeof(npy_intp)); + buffer->pw = (npy_intp *)malloc(new_size * sizeof(npy_intp)); + } + else { + buffer->pw = + (npy_intp *)realloc(buffer->pw, new_size * sizeof(npy_intp)); } buffer->size = new_size; if (NPY_UNLIKELY(buffer->pw == NULL)) { return -NPY_ENOMEM; - } else { + } + else { return 0; } } @@ -96,56 +98,44 @@ resize_buffer_intp(buffer_intp *buffer, npy_intp new_size) ***************************************************************************** */ - -/**begin repeat - * - * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, - * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# - * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, half, float, double, longdouble, - * cfloat, cdouble, clongdouble, datetime, timedelta# - * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, - * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, - * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, - * npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta# - */ - - -typedef struct { - @type@ * pw; +template <typename Tag> +struct buffer_ { + typename Tag::type *pw; npy_intp size; -} buffer_@suff@; - +}; +template <typename Tag> static NPY_INLINE int -resize_buffer_@suff@(buffer_@suff@ *buffer, npy_intp new_size) +resize_buffer_(buffer_<Tag> *buffer, npy_intp new_size) { + using type = typename Tag::type; if (new_size <= buffer->size) { return 0; } if (NPY_UNLIKELY(buffer->pw == NULL)) { - buffer->pw = malloc(new_size * sizeof(@type@)); - } else { - buffer->pw = realloc(buffer->pw, new_size * sizeof(@type@)); + buffer->pw = (type *)malloc(new_size * sizeof(type)); + } + else { + buffer->pw = (type *)realloc(buffer->pw, new_size * sizeof(type)); } buffer->size = new_size; if (NPY_UNLIKELY(buffer->pw == NULL)) { return -NPY_ENOMEM; - } else { + } + else { return 0; } } - +template <typename Tag, typename type> static npy_intp -count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun) +count_run_(type *arr, npy_intp l, npy_intp num, npy_intp minrun) { npy_intp sz; - @type@ vc, *pl, *pi, *pj, *pr; + type vc, *pl, *pi, *pj, *pr; if (NPY_UNLIKELY(num - l == 1)) { return 1; @@ -154,15 +144,18 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun) pl = arr + l; /* (not strictly) ascending sequence */ - if (!@TYPE@_LT(*(pl + 1), *pl)) { - for (pi = pl + 1; pi < arr + num - 1 && !@TYPE@_LT(*(pi + 1), *pi); ++pi) { + if (!Tag::less(*(pl + 1), *pl)) { + for (pi = pl + 1; pi < arr + num - 1 && !Tag::less(*(pi + 1), *pi); + ++pi) { } - } else { /* (strictly) descending sequence */ - for (pi = pl + 1; pi < arr + num - 1 && @TYPE@_LT(*(pi + 1), *pi); ++pi) { + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; pi < arr + num - 1 && Tag::less(*(pi + 1), *pi); + ++pi) { } for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - @TYPE@_SWAP(*pj, *pr); + std::swap(*pj, *pr); } } @@ -172,7 +165,8 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun) if (sz < minrun) { if (l + minrun < num) { sz = minrun; - } else { + } + else { sz = num - l; } @@ -183,7 +177,7 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun) vc = *pi; pj = pi; - while (pl < pj && @TYPE@_LT(vc, *(pj - 1))) { + while (pl < pj && Tag::less(vc, *(pj - 1))) { *pj = *(pj - 1); --pj; } @@ -195,43 +189,42 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun) return sz; } - /* when the left part of the array (p1) is smaller, copy p1 to buffer * and merge from left to right */ +template <typename Tag, typename type> static void -merge_left_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2, - @type@ *p3) +merge_left_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3) { - @type@ *end = p2 + l2; - memcpy(p3, p1, sizeof(@type@) * l1); + type *end = p2 + l2; + memcpy(p3, p1, sizeof(type) * l1); /* first element must be in p2 otherwise skipped in the caller */ *p1++ = *p2++; while (p1 < p2 && p2 < end) { - if (@TYPE@_LT(*p2, *p3)) { + if (Tag::less(*p2, *p3)) { *p1++ = *p2++; - } else { + } + else { *p1++ = *p3++; } } if (p1 != p2) { - memcpy(p1, p3, sizeof(@type@) * (p2 - p1)); + memcpy(p1, p3, sizeof(type) * (p2 - p1)); } } - /* when the right part of the array (p2) is smaller, copy p2 to buffer * and merge from right to left */ +template <typename Tag, typename type> static void -merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2, - @type@ *p3) +merge_right_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3) { npy_intp ofs; - @type@ *start = p1 - 1; - memcpy(p3, p2, sizeof(@type@) * l2); + type *start = p1 - 1; + memcpy(p3, p2, sizeof(type) * l2); p1 += l1 - 1; p2 += l2 - 1; p3 += l2 - 1; @@ -239,31 +232,32 @@ merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2, *p2-- = *p1--; while (p1 < p2 && start < p1) { - if (@TYPE@_LT(*p3, *p1)) { + if (Tag::less(*p3, *p1)) { *p2-- = *p1--; - } else { + } + else { *p2-- = *p3--; } } if (p1 != p2) { ofs = p2 - start; - memcpy(start + 1, p3 - ofs + 1, sizeof(@type@) * ofs); + memcpy(start + 1, p3 - ofs + 1, sizeof(type) * ofs); } } - /* Note: the naming convention of gallop functions are different from that of * CPython. For example, here gallop_right means gallop from left toward right, * whereas in CPython gallop_right means gallop * and find the right most element among equal elements */ +template <typename Tag, typename type> static npy_intp -gallop_right_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) +gallop_right_(const type *arr, const npy_intp size, const type key) { npy_intp last_ofs, ofs, m; - if (@TYPE@_LT(key, arr[0])) { + if (Tag::less(key, arr[0])) { return 0; } @@ -276,9 +270,10 @@ gallop_right_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) break; } - if (@TYPE@_LT(key, arr[ofs])) { + if (Tag::less(key, arr[ofs])) { break; - } else { + } + else { last_ofs = ofs; /* ofs = 1, 3, 7, 15... */ ofs = (ofs << 1) + 1; @@ -289,9 +284,10 @@ gallop_right_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) while (last_ofs + 1 < ofs) { m = last_ofs + ((ofs - last_ofs) >> 1); - if (@TYPE@_LT(key, arr[m])) { + if (Tag::less(key, arr[m])) { ofs = m; - } else { + } + else { last_ofs = m; } } @@ -300,13 +296,13 @@ gallop_right_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) return ofs; } - +template <typename Tag, typename type> static npy_intp -gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) +gallop_left_(const type *arr, const npy_intp size, const type key) { npy_intp last_ofs, ofs, l, m, r; - if (@TYPE@_LT(arr[size - 1], key)) { + if (Tag::less(arr[size - 1], key)) { return size; } @@ -319,9 +315,10 @@ gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) break; } - if (@TYPE@_LT(arr[size - ofs - 1], key)) { + if (Tag::less(arr[size - ofs - 1], key)) { break; - } else { + } + else { last_ofs = ofs; ofs = (ofs << 1) + 1; } @@ -334,9 +331,10 @@ gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) while (l + 1 < r) { m = l + ((r - l) >> 1); - if (@TYPE@_LT(arr[m], key)) { + if (Tag::less(arr[m], key)) { l = m; - } else { + } + else { r = m; } } @@ -345,14 +343,13 @@ gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ key) return r; } - +template <typename Tag, typename type> static int -merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at, - buffer_@suff@ *buffer) +merge_at_(type *arr, const run *stack, const npy_intp at, buffer_<Tag> *buffer) { int ret; npy_intp s1, l1, s2, l2, k; - @type@ *p1, *p2; + type *p1, *p2; s1 = stack[at].s; l1 = stack[at].l; s2 = stack[at + 1].s; @@ -361,7 +358,7 @@ merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at, * if try to comment this out for debugging purpose, remember * in the merging process the first element is skipped */ - k = gallop_right_@suff@(arr + s1, l1, arr[s2]); + k = gallop_right_<Tag>(arr + s1, l1, arr[s2]); if (l1 == k) { /* already sorted */ @@ -372,29 +369,33 @@ merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at, l1 -= k; p2 = arr + s2; /* arr[s2-1] belongs to arr[s2+l2] */ - l2 = gallop_left_@suff@(arr + s2, l2, arr[s2 - 1]); + l2 = gallop_left_<Tag>(arr + s2, l2, arr[s2 - 1]); if (l2 < l1) { - ret = resize_buffer_@suff@(buffer, l2); + ret = resize_buffer_<Tag>(buffer, l2); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - merge_right_@suff@(p1, l1, p2, l2, buffer->pw); - } else { - ret = resize_buffer_@suff@(buffer, l1); + merge_right_<Tag>(p1, l1, p2, l2, buffer->pw); + } + else { + ret = resize_buffer_<Tag>(buffer, l1); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - merge_left_@suff@(p1, l1, p2, l2, buffer->pw); + merge_left_<Tag>(p1, l1, p2, l2, buffer->pw); } return 0; } - +template <typename Tag, typename type> static int -try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, - buffer_@suff@ *buffer) +try_collapse_(type *arr, run *stack, npy_intp *stack_ptr, buffer_<Tag> *buffer) { int ret; npy_intp A, B, C, top; @@ -405,33 +406,42 @@ try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, C = stack[top - 1].l; if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { A = stack[top - 3].l; if (A <= C) { - ret = merge_at_@suff@(arr, stack, top - 3, buffer); + ret = merge_at_<Tag>(arr, stack, top - 3, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += B; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = merge_at_@suff@(arr, stack, top - 2, buffer); + } + else { + ret = merge_at_<Tag>(arr, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; } - } else if (1 < top && B <= C) { - ret = merge_at_@suff@(arr, stack, top - 2, buffer); + } + else if (1 < top && B <= C) { + ret = merge_at_<Tag>(arr, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; - } else { + } + else { break; } } @@ -440,26 +450,32 @@ try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, return 0; } +template <typename Tag, typename type> static int -force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, - buffer_@suff@ *buffer) +force_collapse_(type *arr, run *stack, npy_intp *stack_ptr, + buffer_<Tag> *buffer) { int ret; npy_intp top = *stack_ptr; while (2 < top) { if (stack[top - 3].l <= stack[top - 1].l) { - ret = merge_at_@suff@(arr, stack, top - 3, buffer); + ret = merge_at_<Tag>(arr, stack, top - 3, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += stack[top - 2].l; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = merge_at_@suff@(arr, stack, top - 2, buffer); + } + else { + ret = merge_at_<Tag>(arr, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += stack[top - 1].l; --top; @@ -467,21 +483,24 @@ force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, } if (1 < top) { - ret = merge_at_@suff@(arr, stack, top - 2, buffer); + ret = merge_at_<Tag>(arr, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } } return 0; } - -NPY_NO_EXPORT int -timsort_@suff@(void *start, npy_intp num, void *NPY_UNUSED(varr)) +template <typename Tag> +static int +timsort_(void *start, npy_intp num) { + using type = typename Tag::type; int ret; npy_intp l, n, stack_ptr, minrun; - buffer_@suff@ buffer; + buffer_<Tag> buffer; run stack[TIMSORT_STACK_SIZE]; buffer.pw = NULL; buffer.size = 0; @@ -489,20 +508,24 @@ timsort_@suff@(void *start, npy_intp num, void *NPY_UNUSED(varr)) minrun = compute_min_run(num); for (l = 0; l < num;) { - n = count_run_@suff@(start, l, num, minrun); + n = count_run_<Tag>((type *)start, l, num, minrun); stack[stack_ptr].s = l; stack[stack_ptr].l = n; ++stack_ptr; - ret = try_collapse_@suff@(start, stack, &stack_ptr, &buffer); + ret = try_collapse_<Tag>((type *)start, stack, &stack_ptr, &buffer); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } l += n; } - ret = force_collapse_@suff@(start, stack, &stack_ptr, &buffer); + ret = force_collapse_<Tag>((type *)start, stack, &stack_ptr, &buffer); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } ret = 0; cleanup: @@ -512,16 +535,15 @@ cleanup: return ret; } - /* argsort */ - +template <typename Tag, typename type> static npy_intp -acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, - npy_intp minrun) +acount_run_(type *arr, npy_intp *tosort, npy_intp l, npy_intp num, + npy_intp minrun) { npy_intp sz; - @type@ vc; + type vc; npy_intp vi; npy_intp *pl, *pi, *pj, *pr; @@ -532,17 +554,20 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, pl = tosort + l; /* (not strictly) ascending sequence */ - if (!@TYPE@_LT(arr[*(pl + 1)], arr[*pl])) { - for (pi = pl + 1; pi < tosort + num - 1 - && !@TYPE@_LT(arr[*(pi + 1)], arr[*pi]); ++pi) { + if (!Tag::less(arr[*(pl + 1)], arr[*pl])) { + for (pi = pl + 1; + pi < tosort + num - 1 && !Tag::less(arr[*(pi + 1)], arr[*pi]); + ++pi) { } - } else { /* (strictly) descending sequence */ - for (pi = pl + 1; pi < tosort + num - 1 - && @TYPE@_LT(arr[*(pi + 1)], arr[*pi]); ++pi) { + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; + pi < tosort + num - 1 && Tag::less(arr[*(pi + 1)], arr[*pi]); + ++pi) { } for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - INTP_SWAP(*pj, *pr); + std::swap(*pj, *pr); } } @@ -552,7 +577,8 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, if (sz < minrun) { if (l + minrun < num) { sz = minrun; - } else { + } + else { sz = num - l; } @@ -564,7 +590,7 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, vc = arr[*pi]; pj = pi; - while (pl < pj && @TYPE@_LT(vc, arr[*(pj - 1)])) { + while (pl < pj && Tag::less(vc, arr[*(pj - 1)])) { *pj = *(pj - 1); --pj; } @@ -576,14 +602,14 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, return sz; } - +template <typename Tag, typename type> static npy_intp -agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, - const npy_intp size, const @type@ key) +agallop_right_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type key) { npy_intp last_ofs, ofs, m; - if (@TYPE@_LT(key, arr[tosort[0]])) { + if (Tag::less(key, arr[tosort[0]])) { return 0; } @@ -596,9 +622,10 @@ agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, break; } - if (@TYPE@_LT(key, arr[tosort[ofs]])) { + if (Tag::less(key, arr[tosort[ofs]])) { break; - } else { + } + else { last_ofs = ofs; /* ofs = 1, 3, 7, 15... */ ofs = (ofs << 1) + 1; @@ -609,9 +636,10 @@ agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, while (last_ofs + 1 < ofs) { m = last_ofs + ((ofs - last_ofs) >> 1); - if (@TYPE@_LT(key, arr[tosort[m]])) { + if (Tag::less(key, arr[tosort[m]])) { ofs = m; - } else { + } + else { last_ofs = m; } } @@ -620,15 +648,14 @@ agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, return ofs; } - - +template <typename Tag, typename type> static npy_intp -agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort, - const npy_intp size, const @type@ key) +agallop_left_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type key) { npy_intp last_ofs, ofs, l, m, r; - if (@TYPE@_LT(arr[tosort[size - 1]], key)) { + if (Tag::less(arr[tosort[size - 1]], key)) { return size; } @@ -641,24 +668,27 @@ agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort, break; } - if (@TYPE@_LT(arr[tosort[size - ofs - 1]], key)) { + if (Tag::less(arr[tosort[size - ofs - 1]], key)) { break; - } else { + } + else { last_ofs = ofs; ofs = (ofs << 1) + 1; } } - /* now that arr[tosort[size-ofs-1]] < key <= arr[tosort[size-last_ofs-1]] */ + /* now that arr[tosort[size-ofs-1]] < key <= arr[tosort[size-last_ofs-1]] + */ l = size - ofs - 1; r = size - last_ofs - 1; while (l + 1 < r) { m = l + ((r - l) >> 1); - if (@TYPE@_LT(arr[tosort[m]], key)) { + if (Tag::less(arr[tosort[m]], key)) { l = m; - } else { + } + else { r = m; } } @@ -667,11 +697,10 @@ agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort, return r; } - +template <typename Tag, typename type> static void -amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, - npy_intp l2, - npy_intp *p3) +amerge_left_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3) { npy_intp *end = p2 + l2; memcpy(p3, p1, sizeof(npy_intp) * l1); @@ -679,9 +708,10 @@ amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, *p1++ = *p2++; while (p1 < p2 && p2 < end) { - if (@TYPE@_LT(arr[*p2], arr[*p3])) { + if (Tag::less(arr[*p2], arr[*p3])) { *p1++ = *p2++; - } else { + } + else { *p1++ = *p3++; } } @@ -691,11 +721,10 @@ amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, } } - +template <typename Tag, typename type> static void -amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, - npy_intp l2, - npy_intp *p3) +amerge_right_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3) { npy_intp ofs; npy_intp *start = p1 - 1; @@ -707,9 +736,10 @@ amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, *p2-- = *p1--; while (p1 < p2 && start < p1) { - if (@TYPE@_LT(arr[*p3], arr[*p1])) { + if (Tag::less(arr[*p3], arr[*p1])) { *p2-- = *p1--; - } else { + } + else { *p2-- = *p3--; } } @@ -720,11 +750,10 @@ amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, } } - +template <typename Tag, typename type> static int -amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack, - const npy_intp at, - buffer_intp *buffer) +amerge_at_(type *arr, npy_intp *tosort, const run *stack, const npy_intp at, + buffer_intp *buffer) { int ret; npy_intp s1, l1, s2, l2, k; @@ -734,7 +763,7 @@ amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack, s2 = stack[at + 1].s; l2 = stack[at + 1].l; /* tosort[s2] belongs to tosort[s1+k] */ - k = agallop_right_@suff@(arr, tosort + s1, l1, arr[tosort[s2]]); + k = agallop_right_<Tag>(arr, tosort + s1, l1, arr[tosort[s2]]); if (l1 == k) { /* already sorted */ @@ -745,30 +774,34 @@ amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack, l1 -= k; p2 = tosort + s2; /* tosort[s2-1] belongs to tosort[s2+l2] */ - l2 = agallop_left_@suff@(arr, tosort + s2, l2, arr[tosort[s2 - 1]]); + l2 = agallop_left_<Tag>(arr, tosort + s2, l2, arr[tosort[s2 - 1]]); if (l2 < l1) { ret = resize_buffer_intp(buffer, l2); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - amerge_right_@suff@(arr, p1, l1, p2, l2, buffer->pw); - } else { + amerge_right_<Tag>(arr, p1, l1, p2, l2, buffer->pw); + } + else { ret = resize_buffer_intp(buffer, l1); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - amerge_left_@suff@(arr, p1, l1, p2, l2, buffer->pw); + amerge_left_<Tag>(arr, p1, l1, p2, l2, buffer->pw); } return 0; } - +template <typename Tag, typename type> static int -atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, - npy_intp *stack_ptr, - buffer_intp *buffer) +atry_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer) { int ret; npy_intp A, B, C, top; @@ -779,33 +812,42 @@ atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, C = stack[top - 1].l; if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { A = stack[top - 3].l; if (A <= C) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer); + ret = amerge_at_<Tag>(arr, tosort, stack, top - 3, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += B; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer); + } + else { + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; } - } else if (1 < top && B <= C) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer); + } + else if (1 < top && B <= C) { + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; - } else { + } + else { break; } } @@ -814,28 +856,32 @@ atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, return 0; } - +template <typename Tag, typename type> static int -aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, - npy_intp *stack_ptr, - buffer_intp *buffer) +aforce_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer) { int ret; npy_intp top = *stack_ptr; while (2 < top) { if (stack[top - 3].l <= stack[top - 1].l) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer); + ret = amerge_at_<Tag>(arr, tosort, stack, top - 3, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += stack[top - 2].l; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer); + } + else { + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += stack[top - 1].l; --top; @@ -843,19 +889,21 @@ aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, } if (1 < top) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer); + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } } return 0; } - -NPY_NO_EXPORT int -atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num, - void *NPY_UNUSED(varr)) +template <typename Tag> +static int +atimsort_(void *v, npy_intp *tosort, npy_intp num) { + using type = typename Tag::type; int ret; npy_intp l, n, stack_ptr, minrun; buffer_intp buffer; @@ -866,20 +914,25 @@ atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num, minrun = compute_min_run(num); for (l = 0; l < num;) { - n = acount_run_@suff@(v, tosort, l, num, minrun); + n = acount_run_<Tag>((type *)v, tosort, l, num, minrun); stack[stack_ptr].s = l; stack[stack_ptr].l = n; ++stack_ptr; - ret = atry_collapse_@suff@(v, tosort, stack, &stack_ptr, &buffer); + ret = atry_collapse_<Tag>((type *)v, tosort, stack, &stack_ptr, + &buffer); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } l += n; } - ret = aforce_collapse_@suff@(v, tosort, stack, &stack_ptr, &buffer); + ret = aforce_collapse_<Tag>((type *)v, tosort, stack, &stack_ptr, &buffer); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } ret = 0; cleanup: @@ -891,18 +944,15 @@ cleanup: return ret; } -/**end repeat**/ - - - /* For string sorts and generic sort, element comparisons are very expensive, - * and the time cost of insertion sort (involves N**2 comparison) clearly hurts. - * Implementing binary insertion sort and probably gallop mode during merging process - * can hopefully boost the performance. Here as a temporary workaround we use shorter - * run length to reduce the cost of insertion sort. + * and the time cost of insertion sort (involves N**2 comparison) clearly + * hurts. Implementing binary insertion sort and probably gallop mode during + * merging process can hopefully boost the performance. Here as a temporary + * workaround we use shorter run length to reduce the cost of insertion sort. */ -static npy_intp compute_min_run_short(npy_intp num) +static npy_intp +compute_min_run_short(npy_intp num) { npy_intp r = 0; @@ -920,51 +970,47 @@ static npy_intp compute_min_run_short(npy_intp num) ***************************************************************************** */ - -/**begin repeat - * - * #TYPE = STRING, UNICODE# - * #suff = string, unicode# - * #type = npy_char, npy_ucs4# - */ - - -typedef struct { - @type@ * pw; +template <typename Tag> +struct string_buffer_ { + typename Tag::type *pw; npy_intp size; size_t len; -} buffer_@suff@; - +}; +template <typename Tag> static NPY_INLINE int -resize_buffer_@suff@(buffer_@suff@ *buffer, npy_intp new_size) +resize_buffer_(string_buffer_<Tag> *buffer, npy_intp new_size) { + using type = typename Tag::type; if (new_size <= buffer->size) { return 0; } if (NPY_UNLIKELY(buffer->pw == NULL)) { - buffer->pw = malloc(sizeof(@type@) * new_size * buffer->len); - } else { - buffer->pw = realloc(buffer->pw, sizeof(@type@) * new_size * buffer->len); + buffer->pw = (type *)malloc(sizeof(type) * new_size * buffer->len); + } + else { + buffer->pw = (type *)realloc(buffer->pw, + sizeof(type) * new_size * buffer->len); } buffer->size = new_size; if (NPY_UNLIKELY(buffer->pw == NULL)) { return -NPY_ENOMEM; - } else { + } + else { return 0; } } - +template <typename Tag, typename type> static npy_intp -count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun, - @type@ *vp, size_t len) +count_run_(type *arr, npy_intp l, npy_intp num, npy_intp minrun, type *vp, + size_t len) { npy_intp sz; - @type@ *pl, *pi, *pj, *pr; + type *pl, *pi, *pj, *pr; if (NPY_UNLIKELY(num - l == 1)) { return 1; @@ -973,17 +1019,20 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun, pl = arr + l * len; /* (not strictly) ascending sequence */ - if (!@TYPE@_LT(pl + len, pl, len)) { - for (pi = pl + len; pi < arr + (num - 1) * len - && !@TYPE@_LT(pi + len, pi, len); pi += len) { + if (!Tag::less(pl + len, pl, len)) { + for (pi = pl + len; + pi < arr + (num - 1) * len && !Tag::less(pi + len, pi, len); + pi += len) { } - } else { /* (strictly) descending sequence */ - for (pi = pl + len; pi < arr + (num - 1) * len - && @TYPE@_LT(pi + len, pi, len); pi += len) { + } + else { /* (strictly) descending sequence */ + for (pi = pl + len; + pi < arr + (num - 1) * len && Tag::less(pi + len, pi, len); + pi += len) { } for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) { - @TYPE@_SWAP(pj, pr, len); + Tag::swap(pj, pr, len); } } @@ -993,7 +1042,8 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun, if (sz < minrun) { if (l + minrun < num) { sz = minrun; - } else { + } + else { sz = num - l; } @@ -1001,29 +1051,29 @@ count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun, /* insertion sort */ for (; pi < pr; pi += len) { - @TYPE@_COPY(vp, pi, len); + Tag::copy(vp, pi, len); pj = pi; - while (pl < pj && @TYPE@_LT(vp, pj - len, len)) { - @TYPE@_COPY(pj, pj - len, len); + while (pl < pj && Tag::less(vp, pj - len, len)) { + Tag::copy(pj, pj - len, len); pj -= len; } - @TYPE@_COPY(pj, vp, len); + Tag::copy(pj, vp, len); } } return sz; } - +template <typename Tag> static npy_intp -gallop_right_@suff@(const @type@ *arr, const npy_intp size, - const @type@ *key, size_t len) +gallop_right_(const typename Tag::type *arr, const npy_intp size, + const typename Tag::type *key, size_t len) { npy_intp last_ofs, ofs, m; - if (@TYPE@_LT(key, arr, len)) { + if (Tag::less(key, arr, len)) { return 0; } @@ -1036,9 +1086,10 @@ gallop_right_@suff@(const @type@ *arr, const npy_intp size, break; } - if (@TYPE@_LT(key, arr + ofs * len, len)) { + if (Tag::less(key, arr + ofs * len, len)) { break; - } else { + } + else { last_ofs = ofs; /* ofs = 1, 3, 7, 15... */ ofs = (ofs << 1) + 1; @@ -1049,9 +1100,10 @@ gallop_right_@suff@(const @type@ *arr, const npy_intp size, while (last_ofs + 1 < ofs) { m = last_ofs + ((ofs - last_ofs) >> 1); - if (@TYPE@_LT(key, arr + m * len, len)) { + if (Tag::less(key, arr + m * len, len)) { ofs = m; - } else { + } + else { last_ofs = m; } } @@ -1060,15 +1112,14 @@ gallop_right_@suff@(const @type@ *arr, const npy_intp size, return ofs; } - - +template <typename Tag> static npy_intp -gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ *key, - size_t len) +gallop_left_(const typename Tag::type *arr, const npy_intp size, + const typename Tag::type *key, size_t len) { npy_intp last_ofs, ofs, l, m, r; - if (@TYPE@_LT(arr + (size - 1) * len, key, len)) { + if (Tag::less(arr + (size - 1) * len, key, len)) { return size; } @@ -1081,9 +1132,10 @@ gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ *key, break; } - if (@TYPE@_LT(arr + (size - ofs - 1) * len, key, len)) { + if (Tag::less(arr + (size - ofs - 1) * len, key, len)) { break; - } else { + } + else { last_ofs = ofs; ofs = (ofs << 1) + 1; } @@ -1096,9 +1148,10 @@ gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ *key, while (l + 1 < r) { m = l + ((r - l) >> 1); - if (@TYPE@_LT(arr + m * len, key, len)) { + if (Tag::less(arr + m * len, key, len)) { l = m; - } else { + } + else { r = m; } } @@ -1107,58 +1160,61 @@ gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ *key, return r; } - +template <typename Tag> static void -merge_left_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2, - @type@ *p3, size_t len) +merge_left_(typename Tag::type *p1, npy_intp l1, typename Tag::type *p2, + npy_intp l2, typename Tag::type *p3, size_t len) { - @type@ *end = p2 + l2 * len; - memcpy(p3, p1, sizeof(@type@) * l1 * len); + using type = typename Tag::type; + type *end = p2 + l2 * len; + memcpy(p3, p1, sizeof(type) * l1 * len); /* first element must be in p2 otherwise skipped in the caller */ - @TYPE@_COPY(p1, p2, len); + Tag::copy(p1, p2, len); p1 += len; p2 += len; while (p1 < p2 && p2 < end) { - if (@TYPE@_LT(p2, p3, len)) { - @TYPE@_COPY(p1, p2, len); + if (Tag::less(p2, p3, len)) { + Tag::copy(p1, p2, len); p1 += len; p2 += len; - } else { - @TYPE@_COPY(p1, p3, len); + } + else { + Tag::copy(p1, p3, len); p1 += len; p3 += len; } } if (p1 != p2) { - memcpy(p1, p3, sizeof(@type@) * (p2 - p1)); + memcpy(p1, p3, sizeof(type) * (p2 - p1)); } } - +template <typename Tag, typename type> static void -merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2, - @type@ *p3, size_t len) +merge_right_(type *p1, npy_intp l1, type *p2, npy_intp l2, type *p3, + size_t len) { npy_intp ofs; - @type@ *start = p1 - len; - memcpy(p3, p2, sizeof(@type@) * l2 * len); + type *start = p1 - len; + memcpy(p3, p2, sizeof(type) * l2 * len); p1 += (l1 - 1) * len; p2 += (l2 - 1) * len; p3 += (l2 - 1) * len; /* first element must be in p1 otherwise skipped in the caller */ - @TYPE@_COPY(p2, p1, len); + Tag::copy(p2, p1, len); p2 -= len; p1 -= len; while (p1 < p2 && start < p1) { - if (@TYPE@_LT(p3, p1, len)) { - @TYPE@_COPY(p2, p1, len); + if (Tag::less(p3, p1, len)) { + Tag::copy(p2, p1, len); p2 -= len; p1 -= len; - } else { - @TYPE@_COPY(p2, p3, len); + } + else { + Tag::copy(p2, p3, len); p2 -= len; p3 -= len; } @@ -1166,25 +1222,25 @@ merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2, if (p1 != p2) { ofs = p2 - start; - memcpy(start + len, p3 - ofs + len, sizeof(@type@) * ofs); + memcpy(start + len, p3 - ofs + len, sizeof(type) * ofs); } } - +template <typename Tag, typename type> static int -merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at, - buffer_@suff@ *buffer, size_t len) +merge_at_(type *arr, const run *stack, const npy_intp at, + string_buffer_<Tag> *buffer, size_t len) { int ret; npy_intp s1, l1, s2, l2, k; - @type@ *p1, *p2; + type *p1, *p2; s1 = stack[at].s; l1 = stack[at].l; s2 = stack[at + 1].s; l2 = stack[at + 1].l; /* arr[s2] belongs to arr[s1+k] */ - @TYPE@_COPY(buffer->pw, arr + s2 * len, len); - k = gallop_right_@suff@(arr + s1 * len, l1, buffer->pw, len); + Tag::copy(buffer->pw, arr + s2 * len, len); + k = gallop_right_<Tag>(arr + s1 * len, l1, buffer->pw, len); if (l1 == k) { /* already sorted */ @@ -1195,30 +1251,35 @@ merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at, l1 -= k; p2 = arr + s2 * len; /* arr[s2-1] belongs to arr[s2+l2] */ - @TYPE@_COPY(buffer->pw, arr + (s2 - 1) * len, len); - l2 = gallop_left_@suff@(arr + s2 * len, l2, buffer->pw, len); + Tag::copy(buffer->pw, arr + (s2 - 1) * len, len); + l2 = gallop_left_<Tag>(arr + s2 * len, l2, buffer->pw, len); if (l2 < l1) { - ret = resize_buffer_@suff@(buffer, l2); + ret = resize_buffer_<Tag>(buffer, l2); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - merge_right_@suff@(p1, l1, p2, l2, buffer->pw, len); - } else { - ret = resize_buffer_@suff@(buffer, l1); + merge_right_<Tag>(p1, l1, p2, l2, buffer->pw, len); + } + else { + ret = resize_buffer_<Tag>(buffer, l1); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - merge_left_@suff@(p1, l1, p2, l2, buffer->pw, len); + merge_left_<Tag>(p1, l1, p2, l2, buffer->pw, len); } return 0; } - +template <typename Tag, typename type> static int -try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, - buffer_@suff@ *buffer, size_t len) +try_collapse_(type *arr, run *stack, npy_intp *stack_ptr, + string_buffer_<Tag> *buffer, size_t len) { int ret; npy_intp A, B, C, top; @@ -1229,33 +1290,42 @@ try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, C = stack[top - 1].l; if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { A = stack[top - 3].l; if (A <= C) { - ret = merge_at_@suff@(arr, stack, top - 3, buffer, len); + ret = merge_at_<Tag>(arr, stack, top - 3, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += B; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = merge_at_@suff@(arr, stack, top - 2, buffer, len); + } + else { + ret = merge_at_<Tag>(arr, stack, top - 2, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; } - } else if (1 < top && B <= C) { - ret = merge_at_@suff@(arr, stack, top - 2, buffer, len); + } + else if (1 < top && B <= C) { + ret = merge_at_<Tag>(arr, stack, top - 2, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; - } else { + } + else { break; } } @@ -1264,27 +1334,32 @@ try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, return 0; } - +template <typename Tag, typename type> static int -force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, - buffer_@suff@ *buffer, size_t len) +force_collapse_(type *arr, run *stack, npy_intp *stack_ptr, + string_buffer_<Tag> *buffer, size_t len) { int ret; npy_intp top = *stack_ptr; while (2 < top) { if (stack[top - 3].l <= stack[top - 1].l) { - ret = merge_at_@suff@(arr, stack, top - 3, buffer, len); + ret = merge_at_<Tag>(arr, stack, top - 3, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += stack[top - 2].l; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = merge_at_@suff@(arr, stack, top - 2, buffer, len); + } + else { + ret = merge_at_<Tag>(arr, stack, top - 2, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += stack[top - 1].l; --top; @@ -1292,25 +1367,28 @@ force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr, } if (1 < top) { - ret = merge_at_@suff@(arr, stack, top - 2, buffer, len); + ret = merge_at_<Tag>(arr, stack, top - 2, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } } return 0; } - +template <typename Tag> NPY_NO_EXPORT int -timsort_@suff@(void *start, npy_intp num, void *varr) +string_timsort_(void *start, npy_intp num, void *varr) { - PyArrayObject *arr = varr; + using type = typename Tag::type; + PyArrayObject *arr = reinterpret_cast<PyArrayObject *>(varr); size_t elsize = PyArray_ITEMSIZE(arr); - size_t len = elsize / sizeof(@type@); + size_t len = elsize / sizeof(type); int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; - buffer_@suff@ buffer; + string_buffer_<Tag> buffer; /* Items that have zero size don't make sense to sort */ if (len == 0) { @@ -1323,26 +1401,33 @@ timsort_@suff@(void *start, npy_intp num, void *varr) stack_ptr = 0; minrun = compute_min_run_short(num); /* used for insertion sort and gallop key */ - ret = resize_buffer_@suff@(&buffer, 1); + ret = resize_buffer_<Tag>(&buffer, 1); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } for (l = 0; l < num;) { - n = count_run_@suff@(start, l, num, minrun, buffer.pw, len); + n = count_run_<Tag>((type *)start, l, num, minrun, buffer.pw, len); /* both s and l are scaled by len */ stack[stack_ptr].s = l; stack[stack_ptr].l = n; ++stack_ptr; - ret = try_collapse_@suff@(start, stack, &stack_ptr, &buffer, len); + ret = try_collapse_<Tag>((type *)start, stack, &stack_ptr, &buffer, + len); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } l += n; } - ret = force_collapse_@suff@(start, stack, &stack_ptr, &buffer, len); + ret = force_collapse_<Tag>((type *)start, stack, &stack_ptr, &buffer, len); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } ret = 0; @@ -1353,13 +1438,12 @@ cleanup: return ret; } - /* argsort */ - +template <typename Tag, typename type> static npy_intp -acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, - npy_intp minrun, size_t len) +acount_run_(type *arr, npy_intp *tosort, npy_intp l, npy_intp num, + npy_intp minrun, size_t len) { npy_intp sz; npy_intp vi; @@ -1372,17 +1456,22 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, pl = tosort + l; /* (not strictly) ascending sequence */ - if (!@TYPE@_LT(arr + (*(pl + 1)) * len, arr + (*pl) * len, len)) { - for (pi = pl + 1; pi < tosort + num - 1 - && !@TYPE@_LT(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); ++pi) { + if (!Tag::less(arr + (*(pl + 1)) * len, arr + (*pl) * len, len)) { + for (pi = pl + 1; + pi < tosort + num - 1 && + !Tag::less(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); + ++pi) { } - } else { /* (strictly) descending sequence */ - for (pi = pl + 1; pi < tosort + num - 1 - && @TYPE@_LT(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); ++pi) { + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; + pi < tosort + num - 1 && + Tag::less(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); + ++pi) { } for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - INTP_SWAP(*pj, *pr); + std::swap(*pj, *pr); } } @@ -1392,7 +1481,8 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, if (sz < minrun) { if (l + minrun < num) { sz = minrun; - } else { + } + else { sz = num - l; } @@ -1403,7 +1493,8 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, vi = *pi; pj = pi; - while (pl < pj && @TYPE@_LT(arr + vi * len, arr + (*(pj - 1)) * len, len)) { + while (pl < pj && + Tag::less(arr + vi * len, arr + (*(pj - 1)) * len, len)) { *pj = *(pj - 1); --pj; } @@ -1415,14 +1506,14 @@ acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num, return sz; } - +template <typename Tag, typename type> static npy_intp -agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort, - const npy_intp size, const @type@ *key, size_t len) +agallop_left_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type *key, size_t len) { npy_intp last_ofs, ofs, l, m, r; - if (@TYPE@_LT(arr + tosort[size - 1] * len, key, len)) { + if (Tag::less(arr + tosort[size - 1] * len, key, len)) { return size; } @@ -1435,24 +1526,27 @@ agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort, break; } - if (@TYPE@_LT(arr + tosort[size - ofs - 1] * len, key, len)) { + if (Tag::less(arr + tosort[size - ofs - 1] * len, key, len)) { break; - } else { + } + else { last_ofs = ofs; ofs = (ofs << 1) + 1; } } - /* now that arr[tosort[size-ofs-1]*len] < key <= arr[tosort[size-last_ofs-1]*len] */ + /* now that arr[tosort[size-ofs-1]*len] < key <= + * arr[tosort[size-last_ofs-1]*len] */ l = size - ofs - 1; r = size - last_ofs - 1; while (l + 1 < r) { m = l + ((r - l) >> 1); - if (@TYPE@_LT(arr + tosort[m] * len, key, len)) { + if (Tag::less(arr + tosort[m] * len, key, len)) { l = m; - } else { + } + else { r = m; } } @@ -1461,14 +1555,14 @@ agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort, return r; } - +template <typename Tag, typename type> static npy_intp -agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, - const npy_intp size, const @type@ *key, size_t len) +agallop_right_(const type *arr, const npy_intp *tosort, const npy_intp size, + const type *key, size_t len) { npy_intp last_ofs, ofs, m; - if (@TYPE@_LT(key, arr + tosort[0] * len, len)) { + if (Tag::less(key, arr + tosort[0] * len, len)) { return 0; } @@ -1481,9 +1575,10 @@ agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, break; } - if (@TYPE@_LT(key, arr + tosort[ofs] * len, len)) { + if (Tag::less(key, arr + tosort[ofs] * len, len)) { break; - } else { + } + else { last_ofs = ofs; /* ofs = 1, 3, 7, 15... */ ofs = (ofs << 1) + 1; @@ -1494,9 +1589,10 @@ agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, while (last_ofs + 1 < ofs) { m = last_ofs + ((ofs - last_ofs) >> 1); - if (@TYPE@_LT(key, arr + tosort[m] * len, len)) { + if (Tag::less(key, arr + tosort[m] * len, len)) { ofs = m; - } else { + } + else { last_ofs = m; } } @@ -1505,11 +1601,10 @@ agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort, return ofs; } - - +template <typename Tag, typename type> static void -amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, - npy_intp l2, npy_intp *p3, size_t len) +amerge_left_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3, size_t len) { npy_intp *end = p2 + l2; memcpy(p3, p1, sizeof(npy_intp) * l1); @@ -1517,9 +1612,10 @@ amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, *p1++ = *p2++; while (p1 < p2 && p2 < end) { - if (@TYPE@_LT(arr + (*p2) * len, arr + (*p3) * len, len)) { + if (Tag::less(arr + (*p2) * len, arr + (*p3) * len, len)) { *p1++ = *p2++; - } else { + } + else { *p1++ = *p3++; } } @@ -1529,10 +1625,10 @@ amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, } } - +template <typename Tag, typename type> static void -amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, - npy_intp l2, npy_intp *p3, size_t len) +amerge_right_(type *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, + npy_intp *p3, size_t len) { npy_intp ofs; npy_intp *start = p1 - 1; @@ -1544,9 +1640,10 @@ amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, *p2-- = *p1--; while (p1 < p2 && start < p1) { - if (@TYPE@_LT(arr + (*p3) * len, arr + (*p1) * len, len)) { + if (Tag::less(arr + (*p3) * len, arr + (*p1) * len, len)) { *p2-- = *p1--; - } else { + } + else { *p2-- = *p3--; } } @@ -1557,11 +1654,10 @@ amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, } } - - +template <typename Tag, typename type> static int -amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack, - const npy_intp at, buffer_intp *buffer, size_t len) +amerge_at_(type *arr, npy_intp *tosort, const run *stack, const npy_intp at, + buffer_intp *buffer, size_t len) { int ret; npy_intp s1, l1, s2, l2, k; @@ -1571,7 +1667,7 @@ amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack, s2 = stack[at + 1].s; l2 = stack[at + 1].l; /* tosort[s2] belongs to tosort[s1+k] */ - k = agallop_right_@suff@(arr, tosort + s1, l1, arr + tosort[s2] * len, len); + k = agallop_right_<Tag>(arr, tosort + s1, l1, arr + tosort[s2] * len, len); if (l1 == k) { /* already sorted */ @@ -1582,30 +1678,35 @@ amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack, l1 -= k; p2 = tosort + s2; /* tosort[s2-1] belongs to tosort[s2+l2] */ - l2 = agallop_left_@suff@(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len, - len); + l2 = agallop_left_<Tag>(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len, + len); if (l2 < l1) { ret = resize_buffer_intp(buffer, l2); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - amerge_right_@suff@(arr, p1, l1, p2, l2, buffer->pw, len); - } else { + amerge_right_<Tag>(arr, p1, l1, p2, l2, buffer->pw, len); + } + else { ret = resize_buffer_intp(buffer, l1); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } - amerge_left_@suff@(arr, p1, l1, p2, l2, buffer->pw, len); + amerge_left_<Tag>(arr, p1, l1, p2, l2, buffer->pw, len); } return 0; } - +template <typename Tag, typename type> static int -atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, - npy_intp *stack_ptr, buffer_intp *buffer, size_t len) +atry_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer, size_t len) { int ret; npy_intp A, B, C, top; @@ -1616,33 +1717,44 @@ atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, C = stack[top - 1].l; if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { A = stack[top - 3].l; if (A <= C) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer, len); + ret = amerge_at_<Tag>(arr, tosort, stack, top - 3, buffer, + len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += B; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len); + } + else { + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer, + len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; } - } else if (1 < top && B <= C) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len); + } + else if (1 < top && B <= C) { + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; - } else { + } + else { break; } } @@ -1651,28 +1763,32 @@ atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, return 0; } - - +template <typename Tag, typename type> static int -aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, - npy_intp *stack_ptr, buffer_intp *buffer, size_t len) +aforce_collapse_(type *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer, size_t len) { int ret; npy_intp top = *stack_ptr; while (2 < top) { if (stack[top - 3].l <= stack[top - 1].l) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer, len); + ret = amerge_at_<Tag>(arr, tosort, stack, top - 3, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += stack[top - 2].l; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len); + } + else { + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += stack[top - 1].l; --top; @@ -1680,21 +1796,24 @@ aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack, } if (1 < top) { - ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len); + ret = amerge_at_<Tag>(arr, tosort, stack, top - 2, buffer, len); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } } return 0; } - +template <typename Tag> NPY_NO_EXPORT int -atimsort_@suff@(void *start, npy_intp *tosort, npy_intp num, void *varr) +string_atimsort_(void *start, npy_intp *tosort, npy_intp num, void *varr) { - PyArrayObject *arr = varr; + using type = typename Tag::type; + PyArrayObject *arr = reinterpret_cast<PyArrayObject *>(varr); size_t elsize = PyArray_ITEMSIZE(arr); - size_t len = elsize / sizeof(@type@); + size_t len = elsize / sizeof(type); int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; @@ -1711,21 +1830,27 @@ atimsort_@suff@(void *start, npy_intp *tosort, npy_intp num, void *varr) minrun = compute_min_run_short(num); for (l = 0; l < num;) { - n = acount_run_@suff@(start, tosort, l, num, minrun, len); + n = acount_run_<Tag>((type *)start, tosort, l, num, minrun, len); /* both s and l are scaled by len */ stack[stack_ptr].s = l; stack[stack_ptr].l = n; ++stack_ptr; - ret = atry_collapse_@suff@(start, tosort, stack, &stack_ptr, &buffer, len); + ret = atry_collapse_<Tag>((type *)start, tosort, stack, &stack_ptr, + &buffer, len); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } l += n; } - ret = aforce_collapse_@suff@(start, tosort, stack, &stack_ptr, &buffer, len); + ret = aforce_collapse_<Tag>((type *)start, tosort, stack, &stack_ptr, + &buffer, len); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } ret = 0; @@ -1736,25 +1861,18 @@ cleanup: return ret; } - -/**end repeat**/ - - - /* ***************************************************************************** ** GENERIC SORT ** ***************************************************************************** */ - typedef struct { char *pw; npy_intp size; size_t len; } buffer_char; - static NPY_INLINE int resize_buffer_char(buffer_char *buffer, npy_intp new_size) { @@ -1763,24 +1881,26 @@ resize_buffer_char(buffer_char *buffer, npy_intp new_size) } if (NPY_UNLIKELY(buffer->pw == NULL)) { - buffer->pw = malloc(sizeof(char) * new_size * buffer->len); - } else { - buffer->pw = realloc(buffer->pw, sizeof(char) * new_size * buffer->len); + buffer->pw = (char *)malloc(sizeof(char) * new_size * buffer->len); + } + else { + buffer->pw = (char *)realloc(buffer->pw, + sizeof(char) * new_size * buffer->len); } buffer->size = new_size; if (NPY_UNLIKELY(buffer->pw == NULL)) { return -NPY_ENOMEM; - } else { + } + else { return 0; } } - static npy_intp -npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, - char *vp, size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, char *vp, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) { npy_intp sz; char *pl, *pi, *pj, *pr; @@ -1793,12 +1913,15 @@ npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, /* (not strictly) ascending sequence */ if (cmp(pl, pl + len, py_arr) <= 0) { - for (pi = pl + len; pi < arr + (num - 1) * len - && cmp(pi, pi + len, py_arr) <= 0; pi += len) { + for (pi = pl + len; + pi < arr + (num - 1) * len && cmp(pi, pi + len, py_arr) <= 0; + pi += len) { } - } else { /* (strictly) descending sequence */ - for (pi = pl + len; pi < arr + (num - 1) * len - && cmp(pi + len, pi, py_arr) < 0; pi += len) { + } + else { /* (strictly) descending sequence */ + for (pi = pl + len; + pi < arr + (num - 1) * len && cmp(pi + len, pi, py_arr) < 0; + pi += len) { } for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) { @@ -1812,7 +1935,8 @@ npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, if (sz < minrun) { if (l + minrun < num) { sz = minrun; - } else { + } + else { sz = num - l; } @@ -1835,7 +1959,6 @@ npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, return sz; } - static npy_intp npy_gallop_right(const char *arr, const npy_intp size, const char *key, size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) @@ -1857,7 +1980,8 @@ npy_gallop_right(const char *arr, const npy_intp size, const char *key, if (cmp(key, arr + ofs * len, py_arr) < 0) { break; - } else { + } + else { last_ofs = ofs; /* ofs = 1, 3, 7, 15... */ ofs = (ofs << 1) + 1; @@ -1870,7 +1994,8 @@ npy_gallop_right(const char *arr, const npy_intp size, const char *key, if (cmp(key, arr + m * len, py_arr) < 0) { ofs = m; - } else { + } + else { last_ofs = m; } } @@ -1879,8 +2004,6 @@ npy_gallop_right(const char *arr, const npy_intp size, const char *key, return ofs; } - - static npy_intp npy_gallop_left(const char *arr, const npy_intp size, const char *key, size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) @@ -1902,7 +2025,8 @@ npy_gallop_left(const char *arr, const npy_intp size, const char *key, if (cmp(arr + (size - ofs - 1) * len, key, py_arr) < 0) { break; - } else { + } + else { last_ofs = ofs; ofs = (ofs << 1) + 1; } @@ -1917,7 +2041,8 @@ npy_gallop_left(const char *arr, const npy_intp size, const char *key, if (cmp(arr + m * len, key, py_arr) < 0) { l = m; - } else { + } + else { r = m; } } @@ -1926,11 +2051,9 @@ npy_gallop_left(const char *arr, const npy_intp size, const char *key, return r; } - static void -npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, - char *p3, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) { char *end = p2 + l2 * len; memcpy(p3, p1, sizeof(char) * l1 * len); @@ -1944,7 +2067,8 @@ npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, GENERIC_COPY(p1, p2, len); p1 += len; p2 += len; - } else { + } + else { GENERIC_COPY(p1, p3, len); p1 += len; p3 += len; @@ -1956,11 +2080,9 @@ npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, } } - static void -npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, - char *p3, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, + size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) { npy_intp ofs; char *start = p1 - len; @@ -1978,7 +2100,8 @@ npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, GENERIC_COPY(p2, p1, len); p2 -= len; p1 -= len; - } else { + } + else { GENERIC_COPY(p2, p3, len); p2 -= len; p3 -= len; @@ -1991,12 +2114,10 @@ npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, } } - - static int npy_merge_at(char *arr, const run *stack, const npy_intp at, - buffer_char *buffer, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { int ret; npy_intp s1, l1, s2, l2, k; @@ -2024,13 +2145,18 @@ npy_merge_at(char *arr, const run *stack, const npy_intp at, if (l2 < l1) { ret = resize_buffer_char(buffer, l2); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } npy_merge_right(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); - } else { + } + else { ret = resize_buffer_char(buffer, l1); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } npy_merge_left(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); } @@ -2038,11 +2164,10 @@ npy_merge_at(char *arr, const run *stack, const npy_intp at, return 0; } - static int npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr, - buffer_char *buffer, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { int ret; npy_intp A, B, C, top; @@ -2053,33 +2178,44 @@ npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr, C = stack[top - 1].l; if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { A = stack[top - 3].l; if (A <= C) { - ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr); + ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, + py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += B; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); + } + else { + ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, + py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; } - } else if (1 < top && B <= C) { + } + else if (1 < top && B <= C) { ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; - } else { + } + else { break; } } @@ -2088,11 +2224,10 @@ npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr, return 0; } - static int npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, - buffer_char *buffer, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { int ret; npy_intp top = *stack_ptr; @@ -2101,15 +2236,20 @@ npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, if (stack[top - 3].l <= stack[top - 1].l) { ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += stack[top - 2].l; stack[top - 2] = stack[top - 1]; --top; - } else { + } + else { ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += stack[top - 1].l; --top; @@ -2119,17 +2259,18 @@ npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, if (1 < top) { ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } } return 0; } - NPY_NO_EXPORT int npy_timsort(void *start, npy_intp num, void *varr) { - PyArrayObject *arr = varr; + PyArrayObject *arr = reinterpret_cast<PyArrayObject *>(varr); size_t len = PyArray_ITEMSIZE(arr); PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; int ret; @@ -2151,25 +2292,34 @@ npy_timsort(void *start, npy_intp num, void *varr) /* used for insertion sort and gallop key */ ret = resize_buffer_char(&buffer, len); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } for (l = 0; l < num;) { - n = npy_count_run(start, l, num, minrun, buffer.pw, len, cmp, arr); + n = npy_count_run((char *)start, l, num, minrun, buffer.pw, len, cmp, + arr); /* both s and l are scaled by len */ stack[stack_ptr].s = l; stack[stack_ptr].l = n; ++stack_ptr; - ret = npy_try_collapse(start, stack, &stack_ptr, &buffer, len, cmp, arr); + ret = npy_try_collapse((char *)start, stack, &stack_ptr, &buffer, len, + cmp, arr); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } l += n; } - ret = npy_force_collapse(start, stack, &stack_ptr, &buffer, len, cmp, arr); + ret = npy_force_collapse((char *)start, stack, &stack_ptr, &buffer, len, + cmp, arr); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } ret = 0; @@ -2180,13 +2330,12 @@ cleanup: return ret; } - /* argsort */ static npy_intp npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, - npy_intp minrun, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + npy_intp minrun, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { npy_intp sz; npy_intp vi; @@ -2200,16 +2349,21 @@ npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, /* (not strictly) ascending sequence */ if (cmp(arr + (*pl) * len, arr + (*(pl + 1)) * len, py_arr) <= 0) { - for (pi = pl + 1; pi < tosort + num - 1 - && cmp(arr + (*pi) * len, arr + (*(pi + 1)) * len, py_arr) <= 0; ++pi) { + for (pi = pl + 1; + pi < tosort + num - 1 && + cmp(arr + (*pi) * len, arr + (*(pi + 1)) * len, py_arr) <= 0; + ++pi) { } - } else { /* (strictly) descending sequence */ - for (pi = pl + 1; pi < tosort + num - 1 - && cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, py_arr) < 0; ++pi) { + } + else { /* (strictly) descending sequence */ + for (pi = pl + 1; + pi < tosort + num - 1 && + cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, py_arr) < 0; + ++pi) { } for (pj = pl, pr = pi; pj < pr; ++pj, --pr) { - INTP_SWAP(*pj, *pr); + std::swap(*pj, *pr); } } @@ -2219,7 +2373,8 @@ npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, if (sz < minrun) { if (l + minrun < num) { sz = minrun; - } else { + } + else { sz = num - l; } @@ -2230,7 +2385,8 @@ npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, vi = *pi; pj = pi; - while (pl < pj && cmp(arr + vi * len, arr + (*(pj - 1)) * len, py_arr) < 0) { + while (pl < pj && + cmp(arr + vi * len, arr + (*(pj - 1)) * len, py_arr) < 0) { *pj = *(pj - 1); --pj; } @@ -2242,11 +2398,10 @@ npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, return sz; } - static npy_intp -npy_agallop_left(const char *arr, const npy_intp *tosort, - const npy_intp size, const char *key, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +npy_agallop_left(const char *arr, const npy_intp *tosort, const npy_intp size, + const char *key, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { npy_intp last_ofs, ofs, l, m, r; @@ -2265,13 +2420,15 @@ npy_agallop_left(const char *arr, const npy_intp *tosort, if (cmp(arr + tosort[size - ofs - 1] * len, key, py_arr) < 0) { break; - } else { + } + else { last_ofs = ofs; ofs = (ofs << 1) + 1; } } - /* now that arr[tosort[size-ofs-1]*len] < key <= arr[tosort[size-last_ofs-1]*len] */ + /* now that arr[tosort[size-ofs-1]*len] < key <= + * arr[tosort[size-last_ofs-1]*len] */ l = size - ofs - 1; r = size - last_ofs - 1; @@ -2280,7 +2437,8 @@ npy_agallop_left(const char *arr, const npy_intp *tosort, if (cmp(arr + tosort[m] * len, key, py_arr) < 0) { l = m; - } else { + } + else { r = m; } } @@ -2289,11 +2447,10 @@ npy_agallop_left(const char *arr, const npy_intp *tosort, return r; } - static npy_intp -npy_agallop_right(const char *arr, const npy_intp *tosort, - const npy_intp size, const char *key, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +npy_agallop_right(const char *arr, const npy_intp *tosort, const npy_intp size, + const char *key, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { npy_intp last_ofs, ofs, m; @@ -2312,7 +2469,8 @@ npy_agallop_right(const char *arr, const npy_intp *tosort, if (cmp(key, arr + tosort[ofs] * len, py_arr) < 0) { break; - } else { + } + else { last_ofs = ofs; /* ofs = 1, 3, 7, 15... */ ofs = (ofs << 1) + 1; @@ -2325,7 +2483,8 @@ npy_agallop_right(const char *arr, const npy_intp *tosort, if (cmp(key, arr + tosort[m] * len, py_arr) < 0) { ofs = m; - } else { + } + else { last_ofs = m; } } @@ -2334,7 +2493,6 @@ npy_agallop_right(const char *arr, const npy_intp *tosort, return ofs; } - static void npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, npy_intp *p3, size_t len, @@ -2348,7 +2506,8 @@ npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, while (p1 < p2 && p2 < end) { if (cmp(arr + (*p2) * len, arr + (*p3) * len, py_arr) < 0) { *p1++ = *p2++; - } else { + } + else { *p1++ = *p3++; } } @@ -2358,9 +2517,8 @@ npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, } } - static void -npy_amerge_right(char *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, +npy_amerge_right(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, npy_intp *p3, size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) { @@ -2376,7 +2534,8 @@ npy_amerge_right(char *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, while (p1 < p2 && start < p1) { if (cmp(arr + (*p3) * len, arr + (*p1) * len, py_arr) < 0) { *p2-- = *p1--; - } else { + } + else { *p2-- = *p3--; } } @@ -2387,12 +2546,10 @@ npy_amerge_right(char *arr, npy_intp* p1, npy_intp l1, npy_intp *p2, } } - - static int -npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, - const npy_intp at, buffer_intp *buffer, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, const npy_intp at, + buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { int ret; npy_intp s1, l1, s2, l2, k; @@ -2402,8 +2559,8 @@ npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, s2 = stack[at + 1].s; l2 = stack[at + 1].l; /* tosort[s2] belongs to tosort[s1+k] */ - k = npy_agallop_right(arr, tosort + s1, l1, arr + tosort[s2] * len, len, cmp, - py_arr); + k = npy_agallop_right(arr, tosort + s1, l1, arr + tosort[s2] * len, len, + cmp, py_arr); if (l1 == k) { /* already sorted */ @@ -2420,13 +2577,18 @@ npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, if (l2 < l1) { ret = resize_buffer_intp(buffer, l2); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } npy_amerge_right(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); - } else { + } + else { ret = resize_buffer_intp(buffer, l1); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } npy_amerge_left(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr); } @@ -2434,11 +2596,10 @@ npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, return 0; } - static int -npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, - npy_intp *stack_ptr, buffer_intp *buffer, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) +npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, + buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, + PyArrayObject *py_arr) { int ret; npy_intp A, B, C, top; @@ -2449,33 +2610,45 @@ npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, C = stack[top - 1].l; if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { + (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { A = stack[top - 3].l; if (A <= C) { - ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, py_arr); + ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, + cmp, py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += B; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr); + } + else { + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, + cmp, py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; } - } else if (1 < top && B <= C) { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr); + } + else if (1 < top && B <= C) { + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, + py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += C; --top; - } else { + } + else { break; } } @@ -2484,7 +2657,6 @@ npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, return 0; } - static int npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, buffer_intp *buffer, size_t len, @@ -2495,17 +2667,24 @@ npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, while (2 < top) { if (stack[top - 3].l <= stack[top - 1].l) { - ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, py_arr); + ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, + py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 3].l += stack[top - 2].l; stack[top - 2] = stack[top - 1]; --top; - } else { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr); + } + else { + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, + py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } stack[top - 2].l += stack[top - 1].l; --top; @@ -2513,19 +2692,21 @@ npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, } if (1 < top) { - ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr); + ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, + py_arr); - if (NPY_UNLIKELY(ret < 0)) { return ret; } + if (NPY_UNLIKELY(ret < 0)) { + return ret; + } } return 0; } - NPY_NO_EXPORT int npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) { - PyArrayObject *arr = varr; + PyArrayObject *arr = reinterpret_cast<PyArrayObject *>(varr); size_t len = PyArray_ITEMSIZE(arr); PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; int ret; @@ -2544,23 +2725,28 @@ npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) minrun = compute_min_run_short(num); for (l = 0; l < num;) { - n = npy_acount_run(start, tosort, l, num, minrun, len, cmp, arr); + n = npy_acount_run((char *)start, tosort, l, num, minrun, len, cmp, + arr); /* both s and l are scaled by len */ stack[stack_ptr].s = l; stack[stack_ptr].l = n; ++stack_ptr; - ret = npy_atry_collapse(start, tosort, stack, &stack_ptr, &buffer, len, cmp, - arr); + ret = npy_atry_collapse((char *)start, tosort, stack, &stack_ptr, + &buffer, len, cmp, arr); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } l += n; } - ret = npy_aforce_collapse(start, tosort, stack, &stack_ptr, &buffer, len, - cmp, arr); + ret = npy_aforce_collapse((char *)start, tosort, stack, &stack_ptr, + &buffer, len, cmp, arr); - if (NPY_UNLIKELY(ret < 0)) { goto cleanup; } + if (NPY_UNLIKELY(ret < 0)) { + goto cleanup; + } ret = 0; @@ -2570,3 +2756,239 @@ cleanup: } return ret; } + +/*************************************** + * C > C++ dispatch + ***************************************/ + +NPY_NO_EXPORT int +timsort_bool(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::bool_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_byte(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::byte_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_ubyte(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::ubyte_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_short(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::short_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_ushort(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::ushort_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_int(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::int_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_uint(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::uint_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_long(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::long_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_ulong(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::ulong_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_longlong(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::longlong_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_ulonglong(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::ulonglong_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_half(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::half_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_float(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::float_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_double(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::double_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_longdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::longdouble_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_cfloat(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::cfloat_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_cdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::cdouble_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_clongdouble(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::clongdouble_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_datetime(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::datetime_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_timedelta(void *start, npy_intp num, void *NPY_UNUSED(varr)) +{ + return timsort_<npy::timedelta_tag>(start, num); +} +NPY_NO_EXPORT int +timsort_string(void *start, npy_intp num, void *varr) +{ + return string_timsort_<npy::string_tag>(start, num, varr); +} +NPY_NO_EXPORT int +timsort_unicode(void *start, npy_intp num, void *varr) +{ + return string_timsort_<npy::unicode_tag>(start, num, varr); +} + +NPY_NO_EXPORT int +atimsort_bool(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::bool_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_byte(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::byte_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_ubyte(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::ubyte_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_short(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::short_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_ushort(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::ushort_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_int(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::int_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_uint(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::uint_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_long(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::long_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_ulong(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::ulong_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_longlong(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::longlong_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_ulonglong(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::ulonglong_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_half(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::half_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_float(void *v, npy_intp *tosort, npy_intp num, void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::float_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_double(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::double_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_longdouble(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::longdouble_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_cfloat(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::cfloat_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_cdouble(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::cdouble_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_clongdouble(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::clongdouble_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_datetime(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::datetime_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_timedelta(void *v, npy_intp *tosort, npy_intp num, + void *NPY_UNUSED(varr)) +{ + return atimsort_<npy::timedelta_tag>(v, tosort, num); +} +NPY_NO_EXPORT int +atimsort_string(void *v, npy_intp *tosort, npy_intp num, void *varr) +{ + return string_atimsort_<npy::string_tag>(v, tosort, num, varr); +} +NPY_NO_EXPORT int +atimsort_unicode(void *v, npy_intp *tosort, npy_intp num, void *varr) +{ + return string_atimsort_<npy::unicode_tag>(v, tosort, num, varr); +} diff --git a/numpy/core/tests/examples/checks.pyx b/numpy/core/tests/examples/cython/checks.pyx index 151979db7..151979db7 100644 --- a/numpy/core/tests/examples/checks.pyx +++ b/numpy/core/tests/examples/cython/checks.pyx diff --git a/numpy/core/tests/examples/setup.py b/numpy/core/tests/examples/cython/setup.py index 6e34aa778..6e34aa778 100644 --- a/numpy/core/tests/examples/setup.py +++ b/numpy/core/tests/examples/cython/setup.py diff --git a/numpy/core/tests/examples/limited_api/limited_api.c b/numpy/core/tests/examples/limited_api/limited_api.c new file mode 100644 index 000000000..698c54c57 --- /dev/null +++ b/numpy/core/tests/examples/limited_api/limited_api.c @@ -0,0 +1,17 @@ +#define Py_LIMITED_API 0x03060000 + +#include <Python.h> +#include <numpy/arrayobject.h> +#include <numpy/ufuncobject.h> + +static PyModuleDef moduledef = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "limited_api" +}; + +PyMODINIT_FUNC PyInit_limited_api(void) +{ + import_array(); + import_umath(); + return PyModule_Create(&moduledef); +} diff --git a/numpy/core/tests/examples/limited_api/setup.py b/numpy/core/tests/examples/limited_api/setup.py new file mode 100644 index 000000000..18747dc80 --- /dev/null +++ b/numpy/core/tests/examples/limited_api/setup.py @@ -0,0 +1,22 @@ +""" +Build an example package using the limited Python C API. +""" + +import numpy as np +from setuptools import setup, Extension +import os + +macros = [("NPY_NO_DEPRECATED_API", 0), ("Py_LIMITED_API", "0x03060000")] + +limited_api = Extension( + "limited_api", + sources=[os.path.join('.', "limited_api.c")], + include_dirs=[np.get_include()], + define_macros=macros, +) + +extensions = [limited_api] + +setup( + ext_modules=extensions +) diff --git a/numpy/core/tests/test_cython.py b/numpy/core/tests/test_cython.py index a1f09d0fe..9896de0ec 100644 --- a/numpy/core/tests/test_cython.py +++ b/numpy/core/tests/test_cython.py @@ -32,7 +32,7 @@ def install_temp(request, tmp_path): # Based in part on test_cython from random.tests.test_extending here = os.path.dirname(__file__) - ext_dir = os.path.join(here, "examples") + ext_dir = os.path.join(here, "examples", "cython") cytest = str(tmp_path / "cytest") diff --git a/numpy/core/tests/test_limited_api.py b/numpy/core/tests/test_limited_api.py new file mode 100644 index 000000000..0bb543d59 --- /dev/null +++ b/numpy/core/tests/test_limited_api.py @@ -0,0 +1,41 @@ +import os +import shutil +import subprocess +import sys +import sysconfig +import pytest + + +@pytest.mark.xfail( + sysconfig.get_config_var("Py_DEBUG"), + reason=( + "Py_LIMITED_API is incompatible with Py_DEBUG, Py_TRACE_REFS, " + "and Py_REF_DEBUG" + ), +) +def test_limited_api(tmp_path): + """Test building a third-party C extension with the limited API.""" + # Based in part on test_cython from random.tests.test_extending + + here = os.path.dirname(__file__) + ext_dir = os.path.join(here, "examples", "limited_api") + + cytest = str(tmp_path / "limited_api") + + shutil.copytree(ext_dir, cytest) + # build the examples and "install" them into a temporary directory + + install_log = str(tmp_path / "tmp_install_log.txt") + subprocess.check_call( + [ + sys.executable, + "setup.py", + "build", + "install", + "--prefix", str(tmp_path / "installdir"), + "--single-version-externally-managed", + "--record", + install_log, + ], + cwd=cytest, + ) diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py index 12a67c44d..605baefe6 100644 --- a/numpy/core/tests/test_simd.py +++ b/numpy/core/tests/test_simd.py @@ -330,16 +330,18 @@ class _SIMD_FP(_Test_Utility): square = self.square(vdata) assert square == data_square - @pytest.mark.parametrize("intrin, func", [("self.ceil", math.ceil), - ("self.trunc", math.trunc)]) + @pytest.mark.parametrize("intrin, func", [("ceil", math.ceil), + ("trunc", math.trunc), ("floor", math.floor), ("rint", round)]) def test_rounding(self, intrin, func): """ Test intrinsics: + npyv_rint_##SFX npyv_ceil_##SFX npyv_trunc_##SFX + npyv_floor##SFX """ intrin_name = intrin - intrin = eval(intrin) + intrin = getattr(self, intrin) pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() # special cases round_cases = ((nan, nan), (pinf, pinf), (ninf, ninf)) @@ -347,20 +349,25 @@ class _SIMD_FP(_Test_Utility): data_round = [desired]*self.nlanes _round = intrin(self.setall(case)) assert _round == pytest.approx(data_round, nan_ok=True) + for x in range(0, 2**20, 256**2): for w in (-1.05, -1.10, -1.15, 1.05, 1.10, 1.15): - data = [x*w+a for a in range(self.nlanes)] - vdata = self.load(data) + data = self.load([(x+a)*w for a in range(self.nlanes)]) data_round = [func(x) for x in data] - _round = intrin(vdata) + _round = intrin(data) assert _round == data_round + # signed zero - if "ceil" in intrin_name or "trunc" in intrin_name: - for w in (-0.25, -0.30, -0.45): - _round = self._to_unsigned(intrin(self.setall(w))) - data_round = self._to_unsigned(self.setall(-0.0)) - assert _round == data_round - + if intrin_name == "floor": + data_szero = (-0.0,) + else: + data_szero = (-0.0, -0.25, -0.30, -0.45, -0.5) + + for w in data_szero: + _round = self._to_unsigned(intrin(self.setall(w))) + data_round = self._to_unsigned(self.setall(-0.0)) + assert _round == data_round + def test_max(self): """ Test intrinsics: diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py index aacd2c676..0374ae8d7 100755 --- a/numpy/f2py/crackfortran.py +++ b/numpy/f2py/crackfortran.py @@ -892,6 +892,9 @@ def appenddecl(decl, decl2, force=1): selectpattern = re.compile( r'\s*(?P<this>(@\(@.*?@\)@|\*[\d*]+|\*\s*@\(@.*?@\)@|))(?P<after>.*)\Z', re.I) +typedefpattern = re.compile( + r'(?:,(?P<attributes>[\w(),]+))?(::)?(?P<name>\b[a-z$_][\w$]*\b)' + r'(?:\((?P<params>[\w,]*)\))?\Z', re.I) nameargspattern = re.compile( r'\s*(?P<name>\b[\w$]+\b)\s*(@\(@\s*(?P<args>[\w\s,]*)\s*@\)@|)\s*((result(\s*@\(@\s*(?P<result>\b[\w$]+\b)\s*@\)@|))|(bind\s*@\(@\s*(?P<bind>.*)\s*@\)@))*\s*\Z', re.I) operatorpattern = re.compile( @@ -914,6 +917,17 @@ def _is_intent_callback(vdecl): return 0 +def _resolvetypedefpattern(line): + line = ''.join(line.split()) # removes whitespace + m1 = typedefpattern.match(line) + print(line, m1) + if m1: + attrs = m1.group('attributes') + attrs = [a.lower() for a in attrs.split(',')] if attrs else [] + return m1.group('name'), attrs, m1.group('params') + return None, [], None + + def _resolvenameargspattern(line): line = markouterparen(line) m1 = nameargspattern.match(line) @@ -962,7 +976,13 @@ def analyzeline(m, case, line): block = 'python module' elif re.match(r'abstract\s*interface', block, re.I): block = 'abstract interface' - name, args, result, bind = _resolvenameargspattern(m.group('after')) + if block == 'type': + name, attrs, _ = _resolvetypedefpattern(m.group('after')) + groupcache[groupcounter]['vars'][name] = dict(attrspec = attrs) + args = [] + result = None + else: + name, args, result, _ = _resolvenameargspattern(m.group('after')) if name is None: if block == 'block data': name = '_BLOCK_DATA_' diff --git a/numpy/f2py/tests/src/crackfortran/accesstype.f90 b/numpy/f2py/tests/src/crackfortran/accesstype.f90 new file mode 100644 index 000000000..e2cbd445d --- /dev/null +++ b/numpy/f2py/tests/src/crackfortran/accesstype.f90 @@ -0,0 +1,13 @@ +module foo + public + type, private, bind(c) :: a + integer :: i + end type a + type, bind(c) :: b_ + integer :: j + end type b_ + public :: b_ + type :: c + integer :: k + end type c +end module foo diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py index e33e12d62..ea618bf33 100644 --- a/numpy/f2py/tests/test_crackfortran.py +++ b/numpy/f2py/tests/test_crackfortran.py @@ -44,6 +44,15 @@ class TestPublicPrivate: assert "private" not in mod["vars"]["seta"]["attrspec"] assert "public" in mod["vars"]["seta"]["attrspec"] + def test_access_type(self, tmp_path): + fpath = util.getpath("tests", "src", "crackfortran", "accesstype.f90") + mod = crackfortran.crackfortran([str(fpath)]) + assert len(mod) == 1 + tt = mod[0]['vars'] + assert set(tt['a']['attrspec']) == {'private', 'bind(c)'} + assert set(tt['b_']['attrspec']) == {'public', 'bind(c)'} + assert set(tt['c']['attrspec']) == {'public'} + class TestModuleProcedure(): def test_moduleOperators(self, tmp_path): |