diff options
| -rw-r--r-- | numpy/core/setup.py | 2 | ||||
| -rw-r--r-- | numpy/core/src/npysort/heapsort.cpp | 143 | ||||
| -rw-r--r-- | numpy/core/src/npysort/quicksort.c.src | 651 | ||||
| -rw-r--r-- | numpy/core/src/npysort/quicksort.cpp | 944 | ||||
| -rw-r--r-- | numpy/core/src/npysort/x86-qsort.h | 20 |
5 files changed, 1100 insertions, 660 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py index d002377c6..c5465196f 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -949,7 +949,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'vdot.c'), join('src', 'common', 'npy_sort.h.src'), join('src', 'npysort', 'x86-qsort.dispatch.c.src'), - join('src', 'npysort', 'quicksort.c.src'), + join('src', 'npysort', 'quicksort.cpp'), join('src', 'npysort', 'mergesort.cpp'), join('src', 'npysort', 'timsort.cpp'), join('src', 'npysort', 'heapsort.cpp'), diff --git a/numpy/core/src/npysort/heapsort.cpp b/numpy/core/src/npysort/heapsort.cpp index 3893bf5c2..1f31ed20a 100644 --- a/numpy/core/src/npysort/heapsort.cpp +++ b/numpy/core/src/npysort/heapsort.cpp @@ -47,7 +47,7 @@ */ template <typename Tag, typename type> -static int +NPY_NO_EXPORT int heapsort_(type *start, npy_intp n) { type tmp, *a; @@ -98,7 +98,7 @@ heapsort_(type *start, npy_intp n) } template <typename Tag, typename type> -static int +NPY_NO_EXPORT int aheapsort_(type *vv, npy_intp *tosort, npy_intp n) { type *v = vv; @@ -382,212 +382,349 @@ npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) /*************************************** * C > C++ dispatch ***************************************/ - +template NPY_NO_EXPORT int +heapsort_<npy::bool_tag, npy_bool>(npy_bool *, npy_intp); NPY_NO_EXPORT int heapsort_bool(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::bool_tag>((npy_bool *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::byte_tag, npy_byte>(npy_byte *, npy_intp); NPY_NO_EXPORT int heapsort_byte(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::byte_tag>((npy_byte *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::ubyte_tag, npy_ubyte>(npy_ubyte *, npy_intp); NPY_NO_EXPORT int heapsort_ubyte(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::ubyte_tag>((npy_ubyte *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::short_tag, npy_short>(npy_short *, npy_intp); NPY_NO_EXPORT int heapsort_short(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::short_tag>((npy_short *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::ushort_tag, npy_ushort>(npy_ushort *, npy_intp); NPY_NO_EXPORT int heapsort_ushort(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::ushort_tag>((npy_ushort *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::int_tag, npy_int>(npy_int *, npy_intp); NPY_NO_EXPORT int heapsort_int(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::int_tag>((npy_int *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::uint_tag, npy_uint>(npy_uint *, npy_intp); NPY_NO_EXPORT int heapsort_uint(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::uint_tag>((npy_uint *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::long_tag, npy_long>(npy_long *, npy_intp); NPY_NO_EXPORT int heapsort_long(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::long_tag>((npy_long *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::ulong_tag, npy_ulong>(npy_ulong *, npy_intp); NPY_NO_EXPORT int heapsort_ulong(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::ulong_tag>((npy_ulong *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::longlong_tag, npy_longlong>(npy_longlong *, npy_intp); NPY_NO_EXPORT int heapsort_longlong(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::longlong_tag>((npy_longlong *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::ulonglong_tag, npy_ulonglong>(npy_ulonglong *, npy_intp); NPY_NO_EXPORT int heapsort_ulonglong(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::ulonglong_tag>((npy_ulonglong *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::half_tag, npy_half>(npy_half *, npy_intp); NPY_NO_EXPORT int heapsort_half(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::half_tag>((npy_half *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::float_tag, npy_float>(npy_float *, npy_intp); NPY_NO_EXPORT int heapsort_float(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::float_tag>((npy_float *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::double_tag, npy_double>(npy_double *, npy_intp); NPY_NO_EXPORT int heapsort_double(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::double_tag>((npy_double *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::longdouble_tag, npy_longdouble>(npy_longdouble *, npy_intp); NPY_NO_EXPORT int heapsort_longdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::longdouble_tag>((npy_longdouble *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::cfloat_tag, npy_cfloat>(npy_cfloat *, npy_intp); NPY_NO_EXPORT int heapsort_cfloat(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::cfloat_tag>((npy_cfloat *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::cdouble_tag, npy_cdouble>(npy_cdouble *, npy_intp); NPY_NO_EXPORT int heapsort_cdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::cdouble_tag>((npy_cdouble *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::clongdouble_tag, npy_clongdouble>(npy_clongdouble *, npy_intp); NPY_NO_EXPORT int heapsort_clongdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::clongdouble_tag>((npy_clongdouble *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::datetime_tag, npy_datetime>(npy_datetime *, npy_intp); NPY_NO_EXPORT int heapsort_datetime(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::datetime_tag>((npy_datetime *)start, n); } + +template NPY_NO_EXPORT int +heapsort_<npy::timedelta_tag, npy_timedelta>(npy_timedelta *, npy_intp); NPY_NO_EXPORT int heapsort_timedelta(void *start, npy_intp n, void *NPY_UNUSED(varr)) { return heapsort_<npy::timedelta_tag>((npy_timedelta *)start, n); } +template NPY_NO_EXPORT int +aheapsort_<npy::bool_tag, npy_bool>(npy_bool *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_bool(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::bool_tag>((npy_bool *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::byte_tag, npy_byte>(npy_byte *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_byte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::byte_tag>((npy_byte *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::ubyte_tag, npy_ubyte>(npy_ubyte *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_ubyte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::ubyte_tag>((npy_ubyte *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::short_tag, npy_short>(npy_short *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_short(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::short_tag>((npy_short *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::ushort_tag, npy_ushort>(npy_ushort *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_ushort(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::ushort_tag>((npy_ushort *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::int_tag, npy_int>(npy_int *vv, npy_intp *tosort, npy_intp n); NPY_NO_EXPORT int aheapsort_int(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::int_tag>((npy_int *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::uint_tag, npy_uint>(npy_uint *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_uint(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::uint_tag>((npy_uint *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::long_tag, npy_long>(npy_long *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_long(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::long_tag>((npy_long *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::ulong_tag, npy_ulong>(npy_ulong *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_ulong(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::ulong_tag>((npy_ulong *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::longlong_tag, npy_longlong>(npy_longlong *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_longlong(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::longlong_tag>((npy_longlong *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::ulonglong_tag, npy_ulonglong>(npy_ulonglong *vv, + npy_intp *tosort, npy_intp n); NPY_NO_EXPORT int aheapsort_ulonglong(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::ulonglong_tag>((npy_ulonglong *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::half_tag, npy_half>(npy_half *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_half(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::half_tag>((npy_half *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::float_tag, npy_float>(npy_float *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_float(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::float_tag>((npy_float *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::double_tag, npy_double>(npy_double *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_double(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::double_tag>((npy_double *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::longdouble_tag, npy_longdouble>(npy_longdouble *vv, + npy_intp *tosort, npy_intp n); NPY_NO_EXPORT int aheapsort_longdouble(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::longdouble_tag>((npy_longdouble *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::cfloat_tag, npy_cfloat>(npy_cfloat *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_cfloat(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::cfloat_tag>((npy_cfloat *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::cdouble_tag, npy_cdouble>(npy_cdouble *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_cdouble(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::cdouble_tag>((npy_cdouble *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::clongdouble_tag, npy_clongdouble>(npy_clongdouble *vv, + npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_clongdouble(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::clongdouble_tag>((npy_clongdouble *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::datetime_tag, npy_datetime>(npy_datetime *vv, npy_intp *tosort, + npy_intp n); NPY_NO_EXPORT int aheapsort_datetime(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) { return aheapsort_<npy::datetime_tag>((npy_datetime *)vv, tosort, n); } + +template NPY_NO_EXPORT int +aheapsort_<npy::timedelta_tag, npy_timedelta>(npy_timedelta *vv, + npy_intp *tosort, npy_intp n); NPY_NO_EXPORT int aheapsort_timedelta(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src deleted file mode 100644 index b4b060720..000000000 --- a/numpy/core/src/npysort/quicksort.c.src +++ /dev/null @@ -1,651 +0,0 @@ -/* -*- c -*- */ - -/* - * The purpose of this module is to add faster sort functions - * that are type-specific. This is done by altering the - * function table for the builtin descriptors. - * - * These sorting functions are copied almost directly from numarray - * with a few modifications (complex comparisons compare the imaginary - * part if the real parts are equal, for example), and the names - * are changed. - * - * The original sorting code is due to Charles R. Harris who wrote - * it for numarray. - */ - -/* - * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so - * the code switches to the O(NlogN) worst case heapsort if not enough progress - * is made on the large side of the two quicksort partitions. This improves the - * worst case while still retaining the speed of quicksort for the common case. - * This is variant known as introsort. - * - * - * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2): - * # sort remainder with heapsort if we are not making enough progress - * # we arbitrarily choose 2 * log(n) as the cutoff point - * if recursion_limit < 0: - * heapsort(lower, higher) - * return - * - * if lower < higher: - * pivot_pos = partition(lower, higher) - * # recurse into smaller first and leave larger on stack - * # this limits the required stack space - * if (pivot_pos - lower > higher - pivot_pos): - * quicksort(pivot_pos + 1, higher, recursion_limit - 1) - * quicksort(lower, pivot_pos, recursion_limit - 1) - * else: - * quicksort(lower, pivot_pos, recursion_limit - 1) - * quicksort(pivot_pos + 1, higher, recursion_limit - 1) - * - * - * the below code implements this converted to an iteration and as an - * additional minor optimization skips the recursion depth checking on the - * smaller partition as it is always less than half of the remaining data and - * will thus terminate fast enough - */ - -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "npy_sort.h" -#include "npysort_common.h" -#include "npy_cpu_features.h" -#include "x86-qsort.h" -#include <stdlib.h> - -#ifndef NPY_DISABLE_OPTIMIZATION - #include "x86-qsort.dispatch.h" -#endif - -#define NOT_USED NPY_UNUSED(unused) -/* - * pushing largest partition has upper bound of log2(n) space - * we store two pointers each time - */ -#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) -#define SMALL_QUICKSORT 15 -#define SMALL_MERGESORT 20 -#define SMALL_STRING 16 - - -/* - ***************************************************************************** - ** NUMERIC SORTS ** - ***************************************************************************** - */ - - -/**begin repeat - * - * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, - * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# - * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, half, float, double, longdouble, - * cfloat, cdouble, clongdouble, datetime, timedelta# - * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, - * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, - * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, - * npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta# - * #AVX512 = 0*5, 1, 1, 0*5, 1, 0*7# - */ - -NPY_NO_EXPORT int -quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) -{ - -#if @AVX512@ - void (*dispfunc)(void*, npy_intp) = NULL; - NPY_CPU_DISPATCH_CALL_XB(dispfunc = &x86_quicksort_@suff@); - if (dispfunc) { - (*dispfunc)(start, num); - return 0; - } -#endif - - @type@ vp; - @type@ *pl = start; - @type@ *pr = pl + num - 1; - @type@ *stack[PYA_QS_STACK]; - @type@ **sptr = stack; - @type@ *pm, *pi, *pj, *pk; - int depth[PYA_QS_STACK]; - int * psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - heapsort_@suff@(pl, pr - pl + 1, NULL); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (@TYPE@_LT(*pm, *pl)) @TYPE@_SWAP(*pm, *pl); - if (@TYPE@_LT(*pr, *pm)) @TYPE@_SWAP(*pr, *pm); - if (@TYPE@_LT(*pm, *pl)) @TYPE@_SWAP(*pm, *pl); - vp = *pm; - pi = pl; - pj = pr - 1; - @TYPE@_SWAP(*pm, *pj); - for (;;) { - do ++pi; while (@TYPE@_LT(*pi, vp)); - do --pj; while (@TYPE@_LT(vp, *pj)); - if (pi >= pj) { - break; - } - @TYPE@_SWAP(*pi,*pj); - } - pk = pr - 1; - @TYPE@_SWAP(*pi, *pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vp = *pi; - pj = pi; - pk = pi - 1; - while (pj > pl && @TYPE@_LT(vp, *pk)) { - *pj-- = *pk--; - } - *pj = vp; - } -stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} - - -NPY_NO_EXPORT int -aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) -{ - @type@ *v = vv; - @type@ vp; - npy_intp *pl = tosort; - npy_intp *pr = tosort + num - 1; - npy_intp *stack[PYA_QS_STACK]; - npy_intp **sptr = stack; - npy_intp *pm, *pi, *pj, *pk, vi; - int depth[PYA_QS_STACK]; - int * psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - aheapsort_@suff@(vv, pl, pr - pl + 1, NULL); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl); - if (@TYPE@_LT(v[*pr],v[*pm])) INTP_SWAP(*pr, *pm); - if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl); - vp = v[*pm]; - pi = pl; - pj = pr - 1; - INTP_SWAP(*pm, *pj); - for (;;) { - do ++pi; while (@TYPE@_LT(v[*pi], vp)); - do --pj; while (@TYPE@_LT(vp, v[*pj])); - if (pi >= pj) { - break; - } - INTP_SWAP(*pi, *pj); - } - pk = pr - 1; - INTP_SWAP(*pi,*pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vi = *pi; - vp = v[vi]; - pj = pi; - pk = pi - 1; - while (pj > pl && @TYPE@_LT(vp, v[*pk])) { - *pj-- = *pk--; - } - *pj = vi; - } -stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} - -/**end repeat**/ - - -/* - ***************************************************************************** - ** STRING SORTS ** - ***************************************************************************** - */ - - -/**begin repeat - * - * #TYPE = STRING, UNICODE# - * #suff = string, unicode# - * #type = npy_char, npy_ucs4# - */ - -NPY_NO_EXPORT int -quicksort_@suff@(void *start, npy_intp num, void *varr) -{ - PyArrayObject *arr = varr; - const size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); - @type@ *vp; - @type@ *pl = start; - @type@ *pr = pl + (num - 1)*len; - @type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; - int depth[PYA_QS_STACK]; - int * psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - vp = malloc(PyArray_ITEMSIZE(arr)); - if (vp == NULL) { - return -NPY_ENOMEM; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - heapsort_@suff@(pl, (pr - pl) / len + 1, varr); - goto stack_pop; - } - while ((size_t)(pr - pl) > SMALL_QUICKSORT*len) { - /* quicksort partition */ - pm = pl + (((pr - pl)/len) >> 1)*len; - if (@TYPE@_LT(pm, pl, len)) @TYPE@_SWAP(pm, pl, len); - if (@TYPE@_LT(pr, pm, len)) @TYPE@_SWAP(pr, pm, len); - if (@TYPE@_LT(pm, pl, len)) @TYPE@_SWAP(pm, pl, len); - @TYPE@_COPY(vp, pm, len); - pi = pl; - pj = pr - len; - @TYPE@_SWAP(pm, pj, len); - for (;;) { - do pi += len; while (@TYPE@_LT(pi, vp, len)); - do pj -= len; while (@TYPE@_LT(vp, pj, len)); - if (pi >= pj) { - break; - } - @TYPE@_SWAP(pi, pj, len); - } - pk = pr - len; - @TYPE@_SWAP(pi, pk, len); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + len; - *sptr++ = pr; - pr = pi - len; - } - else { - *sptr++ = pl; - *sptr++ = pi - len; - pl = pi + len; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + len; pi <= pr; pi += len) { - @TYPE@_COPY(vp, pi, len); - pj = pi; - pk = pi - len; - while (pj > pl && @TYPE@_LT(vp, pk, len)) { - @TYPE@_COPY(pj, pk, len); - pj -= len; - pk -= len; - } - @TYPE@_COPY(pj, vp, len); - } -stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - free(vp); - return 0; -} - - -NPY_NO_EXPORT int -aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) -{ - @type@ *v = vv; - PyArrayObject *arr = varr; - size_t len = PyArray_ITEMSIZE(arr)/sizeof(@type@); - @type@ *vp; - npy_intp *pl = tosort; - npy_intp *pr = tosort + num - 1; - npy_intp *stack[PYA_QS_STACK]; - npy_intp **sptr=stack; - npy_intp *pm, *pi, *pj, *pk, vi; - int depth[PYA_QS_STACK]; - int * psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (len == 0) { - return 0; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - aheapsort_@suff@(vv, pl, pr - pl + 1, varr); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (@TYPE@_LT(v + (*pm)*len, v + (*pl)*len, len)) INTP_SWAP(*pm, *pl); - if (@TYPE@_LT(v + (*pr)*len, v + (*pm)*len, len)) INTP_SWAP(*pr, *pm); - if (@TYPE@_LT(v + (*pm)*len, v + (*pl)*len, len)) INTP_SWAP(*pm, *pl); - vp = v + (*pm)*len; - pi = pl; - pj = pr - 1; - INTP_SWAP(*pm,*pj); - for (;;) { - do ++pi; while (@TYPE@_LT(v + (*pi)*len, vp, len)); - do --pj; while (@TYPE@_LT(vp, v + (*pj)*len, len)); - if (pi >= pj) { - break; - } - INTP_SWAP(*pi,*pj); - } - pk = pr - 1; - INTP_SWAP(*pi,*pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vi = *pi; - vp = v + vi*len; - pj = pi; - pk = pi - 1; - while (pj > pl && @TYPE@_LT(vp, v + (*pk)*len, len)) { - *pj-- = *pk--; - } - *pj = vi; - } -stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} - -/**end repeat**/ - - -/* - ***************************************************************************** - ** GENERIC SORT ** - ***************************************************************************** - */ - - -NPY_NO_EXPORT int -npy_quicksort(void *start, npy_intp num, void *varr) -{ - PyArrayObject *arr = varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; - char *vp; - char *pl = start; - char *pr = pl + (num - 1)*elsize; - char *stack[PYA_QS_STACK]; - char **sptr = stack; - char *pm, *pi, *pj, *pk; - int depth[PYA_QS_STACK]; - int * psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (elsize == 0) { - return 0; - } - - vp = malloc(elsize); - if (vp == NULL) { - return -NPY_ENOMEM; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - npy_heapsort(pl, (pr - pl) / elsize + 1, varr); - goto stack_pop; - } - while(pr - pl > SMALL_QUICKSORT*elsize) { - /* quicksort partition */ - pm = pl + (((pr - pl) / elsize) >> 1) * elsize; - if (cmp(pm, pl, arr) < 0) { - GENERIC_SWAP(pm, pl, elsize); - } - if (cmp(pr, pm, arr) < 0) { - GENERIC_SWAP(pr, pm, elsize); - } - if (cmp(pm, pl, arr) < 0) { - GENERIC_SWAP(pm, pl, elsize); - } - GENERIC_COPY(vp, pm, elsize); - pi = pl; - pj = pr - elsize; - GENERIC_SWAP(pm, pj, elsize); - /* - * Generic comparisons may be buggy, so don't rely on the sentinels - * to keep the pointers from going out of bounds. - */ - for (;;) { - do { - pi += elsize; - } while (cmp(pi, vp, arr) < 0 && pi < pj); - do { - pj -= elsize; - } while (cmp(vp, pj, arr) < 0 && pi < pj); - if (pi >= pj) { - break; - } - GENERIC_SWAP(pi, pj, elsize); - } - pk = pr - elsize; - GENERIC_SWAP(pi, pk, elsize); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + elsize; - *sptr++ = pr; - pr = pi - elsize; - } - else { - *sptr++ = pl; - *sptr++ = pi - elsize; - pl = pi + elsize; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + elsize; pi <= pr; pi += elsize) { - GENERIC_COPY(vp, pi, elsize); - pj = pi; - pk = pi - elsize; - while (pj > pl && cmp(vp, pk, arr) < 0) { - GENERIC_COPY(pj, pk, elsize); - pj -= elsize; - pk -= elsize; - } - GENERIC_COPY(pj, vp, elsize); - } -stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - free(vp); - return 0; -} - - -NPY_NO_EXPORT int -npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) -{ - char *v = vv; - PyArrayObject *arr = varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; - char *vp; - npy_intp *pl = tosort; - npy_intp *pr = tosort + num - 1; - npy_intp *stack[PYA_QS_STACK]; - npy_intp **sptr = stack; - npy_intp *pm, *pi, *pj, *pk, vi; - int depth[PYA_QS_STACK]; - int * psdepth = depth; - int cdepth = npy_get_msb(num) * 2; - - /* Items that have zero size don't make sense to sort */ - if (elsize == 0) { - return 0; - } - - for (;;) { - if (NPY_UNLIKELY(cdepth < 0)) { - npy_aheapsort(vv, pl, pr - pl + 1, varr); - goto stack_pop; - } - while ((pr - pl) > SMALL_QUICKSORT) { - /* quicksort partition */ - pm = pl + ((pr - pl) >> 1); - if (cmp(v + (*pm)*elsize, v + (*pl)*elsize, arr) < 0) { - INTP_SWAP(*pm, *pl); - } - if (cmp(v + (*pr)*elsize, v + (*pm)*elsize, arr) < 0) { - INTP_SWAP(*pr, *pm); - } - if (cmp(v + (*pm)*elsize, v + (*pl)*elsize, arr) < 0) { - INTP_SWAP(*pm, *pl); - } - vp = v + (*pm)*elsize; - pi = pl; - pj = pr - 1; - INTP_SWAP(*pm,*pj); - for (;;) { - do { - ++pi; - } while (cmp(v + (*pi)*elsize, vp, arr) < 0 && pi < pj); - do { - --pj; - } while (cmp(vp, v + (*pj)*elsize, arr) < 0 && pi < pj); - if (pi >= pj) { - break; - } - INTP_SWAP(*pi,*pj); - } - pk = pr - 1; - INTP_SWAP(*pi,*pk); - /* push largest partition on stack */ - if (pi - pl < pr - pi) { - *sptr++ = pi + 1; - *sptr++ = pr; - pr = pi - 1; - } - else { - *sptr++ = pl; - *sptr++ = pi - 1; - pl = pi + 1; - } - *psdepth++ = --cdepth; - } - - /* insertion sort */ - for (pi = pl + 1; pi <= pr; ++pi) { - vi = *pi; - vp = v + vi*elsize; - pj = pi; - pk = pi - 1; - while (pj > pl && cmp(vp, v + (*pk)*elsize, arr) < 0) { - *pj-- = *pk--; - } - *pj = vi; - } -stack_pop: - if (sptr == stack) { - break; - } - pr = *(--sptr); - pl = *(--sptr); - cdepth = *(--psdepth); - } - - return 0; -} diff --git a/numpy/core/src/npysort/quicksort.cpp b/numpy/core/src/npysort/quicksort.cpp new file mode 100644 index 000000000..c6b87524d --- /dev/null +++ b/numpy/core/src/npysort/quicksort.cpp @@ -0,0 +1,944 @@ +/* -*- c -*- */ + +/* + * The purpose of this module is to add faster sort functions + * that are type-specific. This is done by altering the + * function table for the builtin descriptors. + * + * These sorting functions are copied almost directly from numarray + * with a few modifications (complex comparisons compare the imaginary + * part if the real parts are equal, for example), and the names + * are changed. + * + * The original sorting code is due to Charles R. Harris who wrote + * it for numarray. + */ + +/* + * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so + * the code switches to the O(NlogN) worst case heapsort if not enough progress + * is made on the large side of the two quicksort partitions. This improves the + * worst case while still retaining the speed of quicksort for the common case. + * This is variant known as introsort. + * + * + * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2): + * # sort remainder with heapsort if we are not making enough progress + * # we arbitrarily choose 2 * log(n) as the cutoff point + * if recursion_limit < 0: + * heapsort(lower, higher) + * return + * + * if lower < higher: + * pivot_pos = partition(lower, higher) + * # recurse into smaller first and leave larger on stack + * # this limits the required stack space + * if (pivot_pos - lower > higher - pivot_pos): + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * quicksort(lower, pivot_pos, recursion_limit - 1) + * else: + * quicksort(lower, pivot_pos, recursion_limit - 1) + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * + * + * the below code implements this converted to an iteration and as an + * additional minor optimization skips the recursion depth checking on the + * smaller partition as it is always less than half of the remaining data and + * will thus terminate fast enough + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_cpu_features.h" +#include "npy_sort.h" +#include "npysort_common.h" +#include "numpy_tag.h" + +#include "x86-qsort.h" +#include <cstdlib> +#include <utility> + +#ifndef NPY_DISABLE_OPTIMIZATION +#include "x86-qsort.dispatch.h" +#endif + +#define NOT_USED NPY_UNUSED(unused) +/* + * pushing largest partition has upper bound of log2(n) space + * we store two pointers each time + */ +#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) +#define SMALL_QUICKSORT 15 +#define SMALL_MERGESORT 20 +#define SMALL_STRING 16 + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + +template <typename Tag, typename type> +NPY_NO_EXPORT int +heapsort_(type *start, npy_intp n); +template <typename Tag, typename type> +NPY_NO_EXPORT int +aheapsort_(type *vv, npy_intp *tosort, npy_intp n); +template <typename Tag, typename type> +NPY_NO_EXPORT int +string_heapsort_(type *start, npy_intp n, void *varr); +template <typename Tag, typename type> +NPY_NO_EXPORT int +string_aheapsort_(type *vv, npy_intp *tosort, npy_intp n, void *varr); + +namespace { + +template <typename Tag> +struct x86_dispatch { + static bool quicksort(typename Tag::type *, npy_intp) { return false; } +}; + +template <> +struct x86_dispatch<npy::int_tag> { + static bool quicksort(npy_int *start, npy_intp num) + { + void (*dispfunc)(void *, npy_intp) = nullptr; + NPY_CPU_DISPATCH_CALL_XB(dispfunc = x86_quicksort_int); + if (dispfunc) { + (*dispfunc)(start, num); + return true; + } + return false; + } +}; + +template <> +struct x86_dispatch<npy::uint_tag> { + static bool quicksort(npy_uint *start, npy_intp num) + { + void (*dispfunc)(void *, npy_intp) = nullptr; + NPY_CPU_DISPATCH_CALL_XB(dispfunc = x86_quicksort_uint); + if (dispfunc) { + (*dispfunc)(start, num); + return true; + } + return false; + } +}; + +template <> +struct x86_dispatch<npy::float_tag> { + static bool quicksort(npy_float *start, npy_intp num) + { + void (*dispfunc)(void *, npy_intp) = nullptr; + NPY_CPU_DISPATCH_CALL_XB(dispfunc = x86_quicksort_float); + if (dispfunc) { + (*dispfunc)(start, num); + return true; + } + return false; + } +}; + +} // namespace + +template <typename Tag, typename type> +static int +quicksort_(type *start, npy_intp num) +{ + if (x86_dispatch<Tag>::quicksort(start, num)) + return 0; + + type vp; + type *pl = start; + type *pr = pl + num - 1; + type *stack[PYA_QS_STACK]; + type **sptr = stack; + type *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + heapsort_<Tag>(pl, pr - pl + 1); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (Tag::less(*pm, *pl)) + std::swap(*pm, *pl); + if (Tag::less(*pr, *pm)) + std::swap(*pr, *pm); + if (Tag::less(*pm, *pl)) + std::swap(*pm, *pl); + vp = *pm; + pi = pl; + pj = pr - 1; + std::swap(*pm, *pj); + for (;;) { + do ++pi; + while (Tag::less(*pi, vp)); + do --pj; + while (Tag::less(vp, *pj)); + if (pi >= pj) { + break; + } + std::swap(*pi, *pj); + } + pk = pr - 1; + std::swap(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vp = *pi; + pj = pi; + pk = pi - 1; + while (pj > pl && Tag::less(vp, *pk)) { + *pj-- = *pk--; + } + *pj = vp; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + +template <typename Tag, typename type> +static int +aquicksort_(type *vv, npy_intp *tosort, npy_intp num) +{ + type *v = vv; + type vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + aheapsort_<Tag>(vv, pl, pr - pl + 1); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (Tag::less(v[*pm], v[*pl])) + std::swap(*pm, *pl); + if (Tag::less(v[*pr], v[*pm])) + std::swap(*pr, *pm); + if (Tag::less(v[*pm], v[*pl])) + std::swap(*pm, *pl); + vp = v[*pm]; + pi = pl; + pj = pr - 1; + std::swap(*pm, *pj); + for (;;) { + do ++pi; + while (Tag::less(v[*pi], vp)); + do --pj; + while (Tag::less(vp, v[*pj])); + if (pi >= pj) { + break; + } + std::swap(*pi, *pj); + } + pk = pr - 1; + std::swap(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v[vi]; + pj = pi; + pk = pi - 1; + while (pj > pl && Tag::less(vp, v[*pk])) { + *pj-- = *pk--; + } + *pj = vi; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + +template <typename Tag, typename type> +static int +string_quicksort_(type *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = (PyArrayObject *)varr; + const size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); + type *vp; + type *pl = start; + type *pr = pl + (num - 1) * len; + type *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + vp = (type *)malloc(PyArray_ITEMSIZE(arr)); + if (vp == NULL) { + return -NPY_ENOMEM; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + string_heapsort_<Tag>(pl, (pr - pl) / len + 1, varr); + goto stack_pop; + } + while ((size_t)(pr - pl) > SMALL_QUICKSORT * len) { + /* quicksort partition */ + pm = pl + (((pr - pl) / len) >> 1) * len; + if (Tag::less(pm, pl, len)) + Tag::swap(pm, pl, len); + if (Tag::less(pr, pm, len)) + Tag::swap(pr, pm, len); + if (Tag::less(pm, pl, len)) + Tag::swap(pm, pl, len); + Tag::copy(vp, pm, len); + pi = pl; + pj = pr - len; + Tag::swap(pm, pj, len); + for (;;) { + do pi += len; + while (Tag::less(pi, vp, len)); + do pj -= len; + while (Tag::less(vp, pj, len)); + if (pi >= pj) { + break; + } + Tag::swap(pi, pj, len); + } + pk = pr - len; + Tag::swap(pi, pk, len); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + len; + *sptr++ = pr; + pr = pi - len; + } + else { + *sptr++ = pl; + *sptr++ = pi - len; + pl = pi + len; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + len; pi <= pr; pi += len) { + Tag::copy(vp, pi, len); + pj = pi; + pk = pi - len; + while (pj > pl && Tag::less(vp, pk, len)) { + Tag::copy(pj, pk, len); + pj -= len; + pk -= len; + } + Tag::copy(pj, vp, len); + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + free(vp); + return 0; +} + +template <typename Tag, typename type> +static int +string_aquicksort_(type *vv, npy_intp *tosort, npy_intp num, void *varr) +{ + type *v = vv; + PyArrayObject *arr = (PyArrayObject *)varr; + size_t len = PyArray_ITEMSIZE(arr) / sizeof(type); + type *vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (len == 0) { + return 0; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + string_aheapsort_<Tag>(vv, pl, pr - pl + 1, varr); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (Tag::less(v + (*pm) * len, v + (*pl) * len, len)) + std::swap(*pm, *pl); + if (Tag::less(v + (*pr) * len, v + (*pm) * len, len)) + std::swap(*pr, *pm); + if (Tag::less(v + (*pm) * len, v + (*pl) * len, len)) + std::swap(*pm, *pl); + vp = v + (*pm) * len; + pi = pl; + pj = pr - 1; + std::swap(*pm, *pj); + for (;;) { + do ++pi; + while (Tag::less(v + (*pi) * len, vp, len)); + do --pj; + while (Tag::less(vp, v + (*pj) * len, len)); + if (pi >= pj) { + break; + } + std::swap(*pi, *pj); + } + pk = pr - 1; + std::swap(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v + vi * len; + pj = pi; + pk = pi - 1; + while (pj > pl && Tag::less(vp, v + (*pk) * len, len)) { + *pj-- = *pk--; + } + *pj = vi; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + +NPY_NO_EXPORT int +npy_quicksort(void *start, npy_intp num, void *varr) +{ + PyArrayObject *arr = (PyArrayObject *)varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + char *vp; + char *pl = (char *)start; + char *pr = pl + (num - 1) * elsize; + char *stack[PYA_QS_STACK]; + char **sptr = stack; + char *pm, *pi, *pj, *pk; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + vp = (char *)malloc(elsize); + if (vp == NULL) { + return -NPY_ENOMEM; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + npy_heapsort(pl, (pr - pl) / elsize + 1, varr); + goto stack_pop; + } + while (pr - pl > SMALL_QUICKSORT * elsize) { + /* quicksort partition */ + pm = pl + (((pr - pl) / elsize) >> 1) * elsize; + if (cmp(pm, pl, arr) < 0) { + GENERIC_SWAP(pm, pl, elsize); + } + if (cmp(pr, pm, arr) < 0) { + GENERIC_SWAP(pr, pm, elsize); + } + if (cmp(pm, pl, arr) < 0) { + GENERIC_SWAP(pm, pl, elsize); + } + GENERIC_COPY(vp, pm, elsize); + pi = pl; + pj = pr - elsize; + GENERIC_SWAP(pm, pj, elsize); + /* + * Generic comparisons may be buggy, so don't rely on the sentinels + * to keep the pointers from going out of bounds. + */ + for (;;) { + do { + pi += elsize; + } while (cmp(pi, vp, arr) < 0 && pi < pj); + do { + pj -= elsize; + } while (cmp(vp, pj, arr) < 0 && pi < pj); + if (pi >= pj) { + break; + } + GENERIC_SWAP(pi, pj, elsize); + } + pk = pr - elsize; + GENERIC_SWAP(pi, pk, elsize); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + elsize; + *sptr++ = pr; + pr = pi - elsize; + } + else { + *sptr++ = pl; + *sptr++ = pi - elsize; + pl = pi + elsize; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + elsize; pi <= pr; pi += elsize) { + GENERIC_COPY(vp, pi, elsize); + pj = pi; + pk = pi - elsize; + while (pj > pl && cmp(vp, pk, arr) < 0) { + GENERIC_COPY(pj, pk, elsize); + pj -= elsize; + pk -= elsize; + } + GENERIC_COPY(pj, vp, elsize); + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + free(vp); + return 0; +} + +NPY_NO_EXPORT int +npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) +{ + char *v = (char *)vv; + PyArrayObject *arr = (PyArrayObject *)varr; + npy_intp elsize = PyArray_ITEMSIZE(arr); + PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare; + char *vp; + npy_intp *pl = tosort; + npy_intp *pr = tosort + num - 1; + npy_intp *stack[PYA_QS_STACK]; + npy_intp **sptr = stack; + npy_intp *pm, *pi, *pj, *pk, vi; + int depth[PYA_QS_STACK]; + int *psdepth = depth; + int cdepth = npy_get_msb(num) * 2; + + /* Items that have zero size don't make sense to sort */ + if (elsize == 0) { + return 0; + } + + for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + npy_aheapsort(vv, pl, pr - pl + 1, varr); + goto stack_pop; + } + while ((pr - pl) > SMALL_QUICKSORT) { + /* quicksort partition */ + pm = pl + ((pr - pl) >> 1); + if (cmp(v + (*pm) * elsize, v + (*pl) * elsize, arr) < 0) { + INTP_SWAP(*pm, *pl); + } + if (cmp(v + (*pr) * elsize, v + (*pm) * elsize, arr) < 0) { + INTP_SWAP(*pr, *pm); + } + if (cmp(v + (*pm) * elsize, v + (*pl) * elsize, arr) < 0) { + INTP_SWAP(*pm, *pl); + } + vp = v + (*pm) * elsize; + pi = pl; + pj = pr - 1; + INTP_SWAP(*pm, *pj); + for (;;) { + do { + ++pi; + } while (cmp(v + (*pi) * elsize, vp, arr) < 0 && pi < pj); + do { + --pj; + } while (cmp(vp, v + (*pj) * elsize, arr) < 0 && pi < pj); + if (pi >= pj) { + break; + } + INTP_SWAP(*pi, *pj); + } + pk = pr - 1; + INTP_SWAP(*pi, *pk); + /* push largest partition on stack */ + if (pi - pl < pr - pi) { + *sptr++ = pi + 1; + *sptr++ = pr; + pr = pi - 1; + } + else { + *sptr++ = pl; + *sptr++ = pi - 1; + pl = pi + 1; + } + *psdepth++ = --cdepth; + } + + /* insertion sort */ + for (pi = pl + 1; pi <= pr; ++pi) { + vi = *pi; + vp = v + vi * elsize; + pj = pi; + pk = pi - 1; + while (pj > pl && cmp(vp, v + (*pk) * elsize, arr) < 0) { + *pj-- = *pk--; + } + *pj = vi; + } + stack_pop: + if (sptr == stack) { + break; + } + pr = *(--sptr); + pl = *(--sptr); + cdepth = *(--psdepth); + } + + return 0; +} + +/*************************************** + * C > C++ dispatch + ***************************************/ + +NPY_NO_EXPORT int +quicksort_bool(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::bool_tag>((npy_bool *)start, n); +} +NPY_NO_EXPORT int +quicksort_byte(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::byte_tag>((npy_byte *)start, n); +} +NPY_NO_EXPORT int +quicksort_ubyte(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::ubyte_tag>((npy_ubyte *)start, n); +} +NPY_NO_EXPORT int +quicksort_short(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::short_tag>((npy_short *)start, n); +} +NPY_NO_EXPORT int +quicksort_ushort(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::ushort_tag>((npy_ushort *)start, n); +} +NPY_NO_EXPORT int +quicksort_int(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::int_tag>((npy_int *)start, n); +} +NPY_NO_EXPORT int +quicksort_uint(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::uint_tag>((npy_uint *)start, n); +} +NPY_NO_EXPORT int +quicksort_long(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::long_tag>((npy_long *)start, n); +} +NPY_NO_EXPORT int +quicksort_ulong(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::ulong_tag>((npy_ulong *)start, n); +} +NPY_NO_EXPORT int +quicksort_longlong(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::longlong_tag>((npy_longlong *)start, n); +} +NPY_NO_EXPORT int +quicksort_ulonglong(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::ulonglong_tag>((npy_ulonglong *)start, n); +} +NPY_NO_EXPORT int +quicksort_half(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::half_tag>((npy_half *)start, n); +} +NPY_NO_EXPORT int +quicksort_float(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::float_tag>((npy_float *)start, n); +} +NPY_NO_EXPORT int +quicksort_double(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::double_tag>((npy_double *)start, n); +} +NPY_NO_EXPORT int +quicksort_longdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::longdouble_tag>((npy_longdouble *)start, n); +} +NPY_NO_EXPORT int +quicksort_cfloat(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::cfloat_tag>((npy_cfloat *)start, n); +} +NPY_NO_EXPORT int +quicksort_cdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::cdouble_tag>((npy_cdouble *)start, n); +} +NPY_NO_EXPORT int +quicksort_clongdouble(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::clongdouble_tag>((npy_clongdouble *)start, n); +} +NPY_NO_EXPORT int +quicksort_datetime(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::datetime_tag>((npy_datetime *)start, n); +} +NPY_NO_EXPORT int +quicksort_timedelta(void *start, npy_intp n, void *NPY_UNUSED(varr)) +{ + return quicksort_<npy::timedelta_tag>((npy_timedelta *)start, n); +} + +NPY_NO_EXPORT int +aquicksort_bool(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::bool_tag>((npy_bool *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_byte(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::byte_tag>((npy_byte *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_ubyte(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::ubyte_tag>((npy_ubyte *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_short(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::short_tag>((npy_short *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_ushort(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::ushort_tag>((npy_ushort *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_int(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::int_tag>((npy_int *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_uint(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::uint_tag>((npy_uint *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_long(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::long_tag>((npy_long *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_ulong(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::ulong_tag>((npy_ulong *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_longlong(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::longlong_tag>((npy_longlong *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_ulonglong(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::ulonglong_tag>((npy_ulonglong *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_half(void *vv, npy_intp *tosort, npy_intp n, void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::half_tag>((npy_half *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_float(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::float_tag>((npy_float *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_double(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::double_tag>((npy_double *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_longdouble(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::longdouble_tag>((npy_longdouble *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_cfloat(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::cfloat_tag>((npy_cfloat *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_cdouble(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::cdouble_tag>((npy_cdouble *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_clongdouble(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::clongdouble_tag>((npy_clongdouble *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_datetime(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::datetime_tag>((npy_datetime *)vv, tosort, n); +} +NPY_NO_EXPORT int +aquicksort_timedelta(void *vv, npy_intp *tosort, npy_intp n, + void *NPY_UNUSED(varr)) +{ + return aquicksort_<npy::timedelta_tag>((npy_timedelta *)vv, tosort, n); +} + +NPY_NO_EXPORT int +quicksort_string(void *start, npy_intp n, void *varr) +{ + return string_quicksort_<npy::string_tag>((npy_char *)start, n, varr); +} +NPY_NO_EXPORT int +quicksort_unicode(void *start, npy_intp n, void *varr) +{ + return string_quicksort_<npy::unicode_tag>((npy_ucs4 *)start, n, varr); +} + +NPY_NO_EXPORT int +aquicksort_string(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + return string_aquicksort_<npy::string_tag>((npy_char *)vv, tosort, n, + varr); +} +NPY_NO_EXPORT int +aquicksort_unicode(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + return string_aquicksort_<npy::unicode_tag>((npy_ucs4 *)vv, tosort, n, + varr); +} diff --git a/numpy/core/src/npysort/x86-qsort.h b/numpy/core/src/npysort/x86-qsort.h index 8cb8e3654..6340e2bc7 100644 --- a/numpy/core/src/npysort/x86-qsort.h +++ b/numpy/core/src/npysort/x86-qsort.h @@ -1,18 +1,28 @@ #include "numpy/npy_common.h" + #include "npy_cpu_dispatch.h" #ifndef NPY_NO_EXPORT - #define NPY_NO_EXPORT NPY_VISIBILITY_HIDDEN +#define NPY_NO_EXPORT NPY_VISIBILITY_HIDDEN #endif #ifndef NPY_DISABLE_OPTIMIZATION - #include "x86-qsort.dispatch.h" +#include "x86-qsort.dispatch.h" +#endif + +#ifdef __cplusplus +extern "C" { #endif + NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_int, - (void *start, npy_intp num)) + (void *start, npy_intp num)) NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_uint, - (void *start, npy_intp num)) + (void *start, npy_intp num)) NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_float, - (void *start, npy_intp num)) + (void *start, npy_intp num)) + +#ifdef __cplusplus +} +#endif |
