summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/src/npysort/quicksort.c.src95
1 files changed, 72 insertions, 23 deletions
diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src
index 60c905a4e..0ea9d21c7 100644
--- a/numpy/core/src/npysort/quicksort.c.src
+++ b/numpy/core/src/npysort/quicksort.c.src
@@ -15,15 +15,36 @@
*/
/*
- * Quick sort is usually the fastest, but the worst case scenario can
- * be slower than the merge and heap sorts. The merge sort requires
- * extra memory and so for large arrays may not be useful.
+ * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so
+ * the code switches to the O(NlogN) worst case heapsort if not enough progress
+ * is made on the large side of the two quicksort partitions. This improves the
+ * worst case while still retaining the speed of quicksort for the common case.
+ * This is variant known as introsort.
*
- * The merge sort is *stable*, meaning that equal components
- * are unmoved from their entry versions, so it can be used to
- * implement lexigraphic sorting on multiple keys.
*
- * The heap sort is included for completeness.
+ * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2):
+ * # sort remainder with heapsort if we are not making enough progress
+ * # we arbitrarily choose 2 * log(n) as the cutoff point
+ * if recursion_limit < 0:
+ * heapsort(lower, higher)
+ * return
+ *
+ * if lower < higher:
+ * pivot_pos = partition(lower, higher)
+ * # recurse into smaller first and leave larger on stack
+ * # this limits the required stack space
+ * if (pivot_pos - lower > higher - pivot_pos):
+ * quicksort(pivot_pos + 1, higher, recursion_limit - 1)
+ * quicksort(lower, pivot_pos, recursion_limit - 1)
+ * else:
+ * quicksort(lower, pivot_pos, recursion_limit - 1)
+ * quicksort(pivot_pos + 1, higher, recursion_limit - 1)
+ *
+ *
+ * the below code implements this converted to an iteration and as an
+ * additional minor optimization skips the recursion depth checking on the
+ * smaller partition as it is always less than half of the remaining data and
+ * will thus terminate fast enough
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
@@ -33,7 +54,11 @@
#include <stdlib.h>
#define NOT_USED NPY_UNUSED(unused)
-#define PYA_QS_STACK 100
+/*
+ * pushing largest partition has upper bound of log2(n) space
+ * we store two pointers each time
+ */
+#define PYA_QS_STACK (NPY_BITSOF_INTP * 2)
#define SMALL_QUICKSORT 15
#define SMALL_MERGESORT 20
#define SMALL_STRING 16
@@ -69,10 +94,12 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
@type@ *stack[PYA_QS_STACK];
@type@ **sptr = stack;
@type@ *pm, *pi, *pj, *pk;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
heapsort_@suff@(pl, pr - pl + 1, NULL);
goto stack_pop;
}
@@ -107,6 +134,7 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -125,6 +153,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;
@@ -141,14 +170,16 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
+ aheapsort_@suff@(vv, pl, pr - pl + 1, NULL);
+ goto stack_pop;
+ }
while ((pr - pl) > SMALL_QUICKSORT) {
- if (depth_limit-- < 0) {
- aheapsort_@suff@(vv, pl, pr - pl + 1, NULL);
- goto stack_pop;
- }
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl);
@@ -179,6 +210,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -198,6 +230,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;
@@ -229,14 +262,16 @@ quicksort_@suff@(void *start, npy_intp num, void *varr)
@type@ *pl = start;
@type@ *pr = pl + (num - 1)*len;
@type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
if (vp == NULL) {
return -NPY_ENOMEM;
}
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
heapsort_@suff@(pl, (pr - pl) / len + 1, varr);
goto stack_pop;
}
@@ -271,6 +306,7 @@ quicksort_@suff@(void *start, npy_intp num, void *varr)
*sptr++ = pi - len;
pl = pi + len;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -291,6 +327,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
free(vp);
@@ -310,10 +347,12 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr=stack;
npy_intp *pm, *pi, *pj, *pk, vi;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
aheapsort_@suff@(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
@@ -348,6 +387,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -367,6 +407,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;
@@ -394,14 +435,16 @@ npy_quicksort(void *start, npy_intp num, void *varr)
char *stack[PYA_QS_STACK];
char **sptr = stack;
char *pm, *pi, *pj, *pk;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
if (vp == NULL) {
return -NPY_ENOMEM;
}
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
npy_heapsort(pl, (pr - pl) / elsize + 1, varr);
goto stack_pop;
}
@@ -446,6 +489,7 @@ npy_quicksort(void *start, npy_intp num, void *varr)
*sptr++ = pi - elsize;
pl = pi + elsize;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -466,6 +510,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
free(vp);
@@ -486,10 +531,12 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
npy_aheapsort(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
@@ -534,6 +581,7 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -553,6 +601,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;