diff options
-rw-r--r-- | numpy/core/src/npysort/quicksort.c.src | 95 |
1 files changed, 72 insertions, 23 deletions
diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src index 60c905a4e..0ea9d21c7 100644 --- a/numpy/core/src/npysort/quicksort.c.src +++ b/numpy/core/src/npysort/quicksort.c.src @@ -15,15 +15,36 @@ */ /* - * Quick sort is usually the fastest, but the worst case scenario can - * be slower than the merge and heap sorts. The merge sort requires - * extra memory and so for large arrays may not be useful. + * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so + * the code switches to the O(NlogN) worst case heapsort if not enough progress + * is made on the large side of the two quicksort partitions. This improves the + * worst case while still retaining the speed of quicksort for the common case. + * This is variant known as introsort. * - * The merge sort is *stable*, meaning that equal components - * are unmoved from their entry versions, so it can be used to - * implement lexigraphic sorting on multiple keys. * - * The heap sort is included for completeness. + * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2): + * # sort remainder with heapsort if we are not making enough progress + * # we arbitrarily choose 2 * log(n) as the cutoff point + * if recursion_limit < 0: + * heapsort(lower, higher) + * return + * + * if lower < higher: + * pivot_pos = partition(lower, higher) + * # recurse into smaller first and leave larger on stack + * # this limits the required stack space + * if (pivot_pos - lower > higher - pivot_pos): + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * quicksort(lower, pivot_pos, recursion_limit - 1) + * else: + * quicksort(lower, pivot_pos, recursion_limit - 1) + * quicksort(pivot_pos + 1, higher, recursion_limit - 1) + * + * + * the below code implements this converted to an iteration and as an + * additional minor optimization skips the recursion depth checking on the + * smaller partition as it is always less than half of the remaining data and + * will thus terminate fast enough */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION @@ -33,7 +54,11 @@ #include <stdlib.h> #define NOT_USED NPY_UNUSED(unused) -#define PYA_QS_STACK 100 +/* + * pushing largest partition has upper bound of log2(n) space + * we store two pointers each time + */ +#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) #define SMALL_QUICKSORT 15 #define SMALL_MERGESORT 20 #define SMALL_STRING 16 @@ -69,10 +94,12 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) @type@ *stack[PYA_QS_STACK]; @type@ **sptr = stack; @type@ *pm, *pi, *pj, *pk; - npy_intp depth_limit = npy_get_msb(num) * 2; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; for (;;) { - if (depth_limit-- < 0) { + if (NPY_UNLIKELY(cdepth < 0)) { heapsort_@suff@(pl, pr - pl + 1, NULL); goto stack_pop; } @@ -107,6 +134,7 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) *sptr++ = pi - 1; pl = pi + 1; } + *psdepth++ = --cdepth; } /* insertion sort */ @@ -125,6 +153,7 @@ stack_pop: } pr = *(--sptr); pl = *(--sptr); + cdepth = *(--psdepth); } return 0; @@ -141,14 +170,16 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) npy_intp *stack[PYA_QS_STACK]; npy_intp **sptr = stack; npy_intp *pm, *pi, *pj, *pk, vi; - npy_intp depth_limit = npy_get_msb(num) * 2; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; for (;;) { + if (NPY_UNLIKELY(cdepth < 0)) { + aheapsort_@suff@(vv, pl, pr - pl + 1, NULL); + goto stack_pop; + } while ((pr - pl) > SMALL_QUICKSORT) { - if (depth_limit-- < 0) { - aheapsort_@suff@(vv, pl, pr - pl + 1, NULL); - goto stack_pop; - } /* quicksort partition */ pm = pl + ((pr - pl) >> 1); if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl); @@ -179,6 +210,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) *sptr++ = pi - 1; pl = pi + 1; } + *psdepth++ = --cdepth; } /* insertion sort */ @@ -198,6 +230,7 @@ stack_pop: } pr = *(--sptr); pl = *(--sptr); + cdepth = *(--psdepth); } return 0; @@ -229,14 +262,16 @@ quicksort_@suff@(void *start, npy_intp num, void *varr) @type@ *pl = start; @type@ *pr = pl + (num - 1)*len; @type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; - npy_intp depth_limit = npy_get_msb(num) * 2; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; if (vp == NULL) { return -NPY_ENOMEM; } for (;;) { - if (depth_limit-- < 0) { + if (NPY_UNLIKELY(cdepth < 0)) { heapsort_@suff@(pl, (pr - pl) / len + 1, varr); goto stack_pop; } @@ -271,6 +306,7 @@ quicksort_@suff@(void *start, npy_intp num, void *varr) *sptr++ = pi - len; pl = pi + len; } + *psdepth++ = --cdepth; } /* insertion sort */ @@ -291,6 +327,7 @@ stack_pop: } pr = *(--sptr); pl = *(--sptr); + cdepth = *(--psdepth); } free(vp); @@ -310,10 +347,12 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) npy_intp *stack[PYA_QS_STACK]; npy_intp **sptr=stack; npy_intp *pm, *pi, *pj, *pk, vi; - npy_intp depth_limit = npy_get_msb(num) * 2; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; for (;;) { - if (depth_limit-- < 0) { + if (NPY_UNLIKELY(cdepth < 0)) { aheapsort_@suff@(vv, pl, pr - pl + 1, varr); goto stack_pop; } @@ -348,6 +387,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) *sptr++ = pi - 1; pl = pi + 1; } + *psdepth++ = --cdepth; } /* insertion sort */ @@ -367,6 +407,7 @@ stack_pop: } pr = *(--sptr); pl = *(--sptr); + cdepth = *(--psdepth); } return 0; @@ -394,14 +435,16 @@ npy_quicksort(void *start, npy_intp num, void *varr) char *stack[PYA_QS_STACK]; char **sptr = stack; char *pm, *pi, *pj, *pk; - npy_intp depth_limit = npy_get_msb(num) * 2; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; if (vp == NULL) { return -NPY_ENOMEM; } for (;;) { - if (depth_limit-- < 0) { + if (NPY_UNLIKELY(cdepth < 0)) { npy_heapsort(pl, (pr - pl) / elsize + 1, varr); goto stack_pop; } @@ -446,6 +489,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) *sptr++ = pi - elsize; pl = pi + elsize; } + *psdepth++ = --cdepth; } /* insertion sort */ @@ -466,6 +510,7 @@ stack_pop: } pr = *(--sptr); pl = *(--sptr); + cdepth = *(--psdepth); } free(vp); @@ -486,10 +531,12 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) npy_intp *stack[PYA_QS_STACK]; npy_intp **sptr = stack; npy_intp *pm, *pi, *pj, *pk, vi; - npy_intp depth_limit = npy_get_msb(num) * 2; + int depth[PYA_QS_STACK]; + int * psdepth = depth; + int cdepth = npy_get_msb(num) * 2; for (;;) { - if (depth_limit-- < 0) { + if (NPY_UNLIKELY(cdepth < 0)) { npy_aheapsort(vv, pl, pr - pl + 1, varr); goto stack_pop; } @@ -534,6 +581,7 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) *sptr++ = pi - 1; pl = pi + 1; } + *psdepth++ = --cdepth; } /* insertion sort */ @@ -553,6 +601,7 @@ stack_pop: } pr = *(--sptr); pl = *(--sptr); + cdepth = *(--psdepth); } return 0; |