summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Taylor <jtaylor.debian@googlemail.com>2016-07-26 20:59:13 +0200
committerJulian Taylor <jtaylor.debian@googlemail.com>2016-07-29 21:41:18 +0200
commit07201420338f87d0a19ec6b5d756f40350e1f21b (patch)
tree0beb4a35b3a19c6b2c38781aab73eb0d89ba5f86
parent4fb9a228c5b3a26eaa67e79a12e5a4d75c01e952 (diff)
downloadnumpy-07201420338f87d0a19ec6b5d756f40350e1f21b.tar.gz
BUG: handle introsort depth limit properly
the larger partition needs its own depth limit else it grows faster than logarithmic. Also increase the stack size to hold up to 64 instead of 50 pointer pairs else it might overflow on arrays larger than than 2^50 elements.
-rw-r--r--numpy/core/src/npysort/quicksort.c.src95
1 files changed, 72 insertions, 23 deletions
diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src
index 60c905a4e..0ea9d21c7 100644
--- a/numpy/core/src/npysort/quicksort.c.src
+++ b/numpy/core/src/npysort/quicksort.c.src
@@ -15,15 +15,36 @@
*/
/*
- * Quick sort is usually the fastest, but the worst case scenario can
- * be slower than the merge and heap sorts. The merge sort requires
- * extra memory and so for large arrays may not be useful.
+ * Quick sort is usually the fastest, but the worst case scenario is O(N^2) so
+ * the code switches to the O(NlogN) worst case heapsort if not enough progress
+ * is made on the large side of the two quicksort partitions. This improves the
+ * worst case while still retaining the speed of quicksort for the common case.
+ * This is variant known as introsort.
*
- * The merge sort is *stable*, meaning that equal components
- * are unmoved from their entry versions, so it can be used to
- * implement lexigraphic sorting on multiple keys.
*
- * The heap sort is included for completeness.
+ * def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2):
+ * # sort remainder with heapsort if we are not making enough progress
+ * # we arbitrarily choose 2 * log(n) as the cutoff point
+ * if recursion_limit < 0:
+ * heapsort(lower, higher)
+ * return
+ *
+ * if lower < higher:
+ * pivot_pos = partition(lower, higher)
+ * # recurse into smaller first and leave larger on stack
+ * # this limits the required stack space
+ * if (pivot_pos - lower > higher - pivot_pos):
+ * quicksort(pivot_pos + 1, higher, recursion_limit - 1)
+ * quicksort(lower, pivot_pos, recursion_limit - 1)
+ * else:
+ * quicksort(lower, pivot_pos, recursion_limit - 1)
+ * quicksort(pivot_pos + 1, higher, recursion_limit - 1)
+ *
+ *
+ * the below code implements this converted to an iteration and as an
+ * additional minor optimization skips the recursion depth checking on the
+ * smaller partition as it is always less than half of the remaining data and
+ * will thus terminate fast enough
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
@@ -33,7 +54,11 @@
#include <stdlib.h>
#define NOT_USED NPY_UNUSED(unused)
-#define PYA_QS_STACK 100
+/*
+ * pushing largest partition has upper bound of log2(n) space
+ * we store two pointers each time
+ */
+#define PYA_QS_STACK (NPY_BITSOF_INTP * 2)
#define SMALL_QUICKSORT 15
#define SMALL_MERGESORT 20
#define SMALL_STRING 16
@@ -69,10 +94,12 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
@type@ *stack[PYA_QS_STACK];
@type@ **sptr = stack;
@type@ *pm, *pi, *pj, *pk;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
heapsort_@suff@(pl, pr - pl + 1, NULL);
goto stack_pop;
}
@@ -107,6 +134,7 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -125,6 +153,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;
@@ -141,14 +170,16 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
+ aheapsort_@suff@(vv, pl, pr - pl + 1, NULL);
+ goto stack_pop;
+ }
while ((pr - pl) > SMALL_QUICKSORT) {
- if (depth_limit-- < 0) {
- aheapsort_@suff@(vv, pl, pr - pl + 1, NULL);
- goto stack_pop;
- }
/* quicksort partition */
pm = pl + ((pr - pl) >> 1);
if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl);
@@ -179,6 +210,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -198,6 +230,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;
@@ -229,14 +262,16 @@ quicksort_@suff@(void *start, npy_intp num, void *varr)
@type@ *pl = start;
@type@ *pr = pl + (num - 1)*len;
@type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
if (vp == NULL) {
return -NPY_ENOMEM;
}
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
heapsort_@suff@(pl, (pr - pl) / len + 1, varr);
goto stack_pop;
}
@@ -271,6 +306,7 @@ quicksort_@suff@(void *start, npy_intp num, void *varr)
*sptr++ = pi - len;
pl = pi + len;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -291,6 +327,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
free(vp);
@@ -310,10 +347,12 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr=stack;
npy_intp *pm, *pi, *pj, *pk, vi;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
aheapsort_@suff@(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
@@ -348,6 +387,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -367,6 +407,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;
@@ -394,14 +435,16 @@ npy_quicksort(void *start, npy_intp num, void *varr)
char *stack[PYA_QS_STACK];
char **sptr = stack;
char *pm, *pi, *pj, *pk;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
if (vp == NULL) {
return -NPY_ENOMEM;
}
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
npy_heapsort(pl, (pr - pl) / elsize + 1, varr);
goto stack_pop;
}
@@ -446,6 +489,7 @@ npy_quicksort(void *start, npy_intp num, void *varr)
*sptr++ = pi - elsize;
pl = pi + elsize;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -466,6 +510,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
free(vp);
@@ -486,10 +531,12 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr)
npy_intp *stack[PYA_QS_STACK];
npy_intp **sptr = stack;
npy_intp *pm, *pi, *pj, *pk, vi;
- npy_intp depth_limit = npy_get_msb(num) * 2;
+ int depth[PYA_QS_STACK];
+ int * psdepth = depth;
+ int cdepth = npy_get_msb(num) * 2;
for (;;) {
- if (depth_limit-- < 0) {
+ if (NPY_UNLIKELY(cdepth < 0)) {
npy_aheapsort(vv, pl, pr - pl + 1, varr);
goto stack_pop;
}
@@ -534,6 +581,7 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr)
*sptr++ = pi - 1;
pl = pi + 1;
}
+ *psdepth++ = --cdepth;
}
/* insertion sort */
@@ -553,6 +601,7 @@ stack_pop:
}
pr = *(--sptr);
pl = *(--sptr);
+ cdepth = *(--psdepth);
}
return 0;