summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2022-01-23 17:38:32 -0700
committerGitHub <noreply@github.com>2022-01-23 17:38:32 -0700
commit23eda2a52b5c7797ee249fc52c71e9b6eec2d06a (patch)
tree891edbf3a8889423db66cea8607d2b609bb29a77
parent3cb68682894d4d9179476b6ed7954762079131f0 (diff)
parent584ddc93886eb5b199fe5c70d3fa9c88b1162eef (diff)
downloadnumpy-23eda2a52b5c7797ee249fc52c71e9b6eec2d06a.tar.gz
Merge pull request #20840 from serge-sans-paille/feature/road-to-cxx-selection
MAINT: Translate selection.c.src to C++ using templates.
-rw-r--r--numpy/core/setup.py2
-rw-r--r--numpy/core/src/common/npy_partition.h.src8
-rw-r--r--numpy/core/src/npysort/selection.c.src419
-rw-r--r--numpy/core/src/npysort/selection.cpp645
4 files changed, 654 insertions, 420 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 3bf6f6250..c97747606 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -952,7 +952,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'npysort', 'heapsort.cpp'),
join('src', 'npysort', 'radixsort.cpp'),
join('src', 'common', 'npy_partition.h.src'),
- join('src', 'npysort', 'selection.c.src'),
+ join('src', 'npysort', 'selection.cpp'),
join('src', 'common', 'npy_binsearch.h'),
join('src', 'npysort', 'binsearch.cpp'),
]
diff --git a/numpy/core/src/common/npy_partition.h.src b/numpy/core/src/common/npy_partition.h.src
index 72c2095f1..5ba652b41 100644
--- a/numpy/core/src/common/npy_partition.h.src
+++ b/numpy/core/src/common/npy_partition.h.src
@@ -28,6 +28,10 @@
#define NPY_MAX_PIVOT_STACK 50
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/**begin repeat
*
* #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
@@ -56,6 +60,10 @@ NPY_NO_EXPORT int aintroselect_@suff@(@type@ *v, npy_intp* tosort, npy_intp num,
/**end repeat**/
+#ifdef __cplusplus
+}
+#endif
+
typedef struct {
int typenum;
PyArray_PartitionFunc * part[NPY_NSELECTS];
diff --git a/numpy/core/src/npysort/selection.c.src b/numpy/core/src/npysort/selection.c.src
deleted file mode 100644
index 0e285b320..000000000
--- a/numpy/core/src/npysort/selection.c.src
+++ /dev/null
@@ -1,419 +0,0 @@
-/* -*- c -*- */
-
-/*
- *
- * The code is loosely based on the quickselect from
- * Nicolas Devillard - 1998 public domain
- * http://ndevilla.free.fr/median/median/
- *
- * Quick select with median of 3 pivot is usually the fastest,
- * but the worst case scenario can be quadratic complexity,
- * e.g. np.roll(np.arange(x), x / 2)
- * To avoid this if it recurses too much it falls back to the
- * worst case linear median of median of group 5 pivot strategy.
- */
-
-
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-
-#include "npy_sort.h"
-#include "npysort_common.h"
-#include "numpy/npy_math.h"
-#include "npy_partition.h"
-#include <stdlib.h>
-
-#define NOT_USED NPY_UNUSED(unused)
-
-
-/*
- *****************************************************************************
- ** NUMERIC SORTS **
- *****************************************************************************
- */
-
-
-static NPY_INLINE void store_pivot(npy_intp pivot, npy_intp kth,
- npy_intp * pivots, npy_intp * npiv)
-{
- if (pivots == NULL) {
- return;
- }
-
- /*
- * If pivot is the requested kth store it, overwriting other pivots if
- * required. This must be done so iterative partition can work without
- * manually shifting lower data offset by kth each time
- */
- if (pivot == kth && *npiv == NPY_MAX_PIVOT_STACK) {
- pivots[*npiv - 1] = pivot;
- }
- /*
- * we only need pivots larger than current kth, larger pivots are not
- * useful as partitions on smaller kth would reorder the stored pivots
- */
- else if (pivot >= kth && *npiv < NPY_MAX_PIVOT_STACK) {
- pivots[*npiv] = pivot;
- (*npiv) += 1;
- }
-}
-
-/**begin repeat
- *
- * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
- * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE,
- * CFLOAT, CDOUBLE, CLONGDOUBLE#
- * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
- * longlong, ulonglong, half, float, double, longdouble,
- * cfloat, cdouble, clongdouble#
- * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
- * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
- * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
- * npy_cdouble, npy_clongdouble#
- * #inexact = 0*11, 1*7#
- */
-
-static npy_intp
-amedian_of_median5_@suff@(@type@ *v, npy_intp* tosort, const npy_intp num,
- npy_intp * pivots,
- npy_intp * npiv);
-
-static npy_intp
-median_of_median5_@suff@(@type@ *v, const npy_intp num,
- npy_intp * pivots,
- npy_intp * npiv);
-
-/**begin repeat1
- * #name = , a#
- * #idx = , tosort#
- * #arg = 0, 1#
- */
-#if @arg@
-/* helper macros to avoid duplication of direct/indirect selection */
-#define IDX(x) tosort[x]
-#define SORTEE(x) tosort[x]
-#define SWAP INTP_SWAP
-#define MEDIAN3_SWAP(v, tosort, low, mid, high) \
- amedian3_swap_@suff@(v, tosort, low, mid, high)
-#define MEDIAN5(v, tosort, subleft) \
- amedian5_@suff@(v, tosort + subleft)
-#define UNGUARDED_PARTITION(v, tosort, pivot, ll, hh) \
- aunguarded_partition_@suff@(v, tosort, pivot, ll, hh)
-#define INTROSELECT(v, tosort, num, kth, pivots, npiv) \
- aintroselect_@suff@(v, tosort, nmed, nmed / 2, pivots, npiv, NULL)
-#define DUMBSELECT(v, tosort, left, num, kth) \
- adumb_select_@suff@(v, tosort + left, num, kth)
-#else
-#define IDX(x) (x)
-#define SORTEE(x) v[x]
-#define SWAP @TYPE@_SWAP
-#define MEDIAN3_SWAP(v, tosort, low, mid, high) \
- median3_swap_@suff@(v, low, mid, high)
-#define MEDIAN5(v, tosort, subleft) \
- median5_@suff@(v + subleft)
-#define UNGUARDED_PARTITION(v, tosort, pivot, ll, hh) \
- unguarded_partition_@suff@(v, pivot, ll, hh)
-#define INTROSELECT(v, tosort, num, kth, pivots, npiv) \
- introselect_@suff@(v, nmed, nmed / 2, pivots, npiv, NULL)
-#define DUMBSELECT(v, tosort, left, num, kth) \
- dumb_select_@suff@(v + left, num, kth)
-#endif
-
-
-/*
- * median of 3 pivot strategy
- * gets min and median and moves median to low and min to low + 1
- * for efficient partitioning, see unguarded_partition
- */
-static NPY_INLINE void
-@name@median3_swap_@suff@(@type@ * v,
-#if @arg@
- npy_intp * tosort,
-#endif
- npy_intp low, npy_intp mid, npy_intp high)
-{
- if (@TYPE@_LT(v[IDX(high)], v[IDX(mid)]))
- SWAP(SORTEE(high), SORTEE(mid));
- if (@TYPE@_LT(v[IDX(high)], v[IDX(low)]))
- SWAP(SORTEE(high), SORTEE(low));
- /* move pivot to low */
- if (@TYPE@_LT(v[IDX(low)], v[IDX(mid)]))
- SWAP(SORTEE(low), SORTEE(mid));
- /* move 3-lowest element to low + 1 */
- SWAP(SORTEE(mid), SORTEE(low + 1));
-}
-
-
-/* select index of median of five elements */
-static npy_intp @name@median5_@suff@(
-#if @arg@
- const @type@ * v, npy_intp * tosort
-#else
- @type@ * v
-#endif
- )
-{
- /* could be optimized as we only need the index (no swaps) */
- if (@TYPE@_LT(v[IDX(1)], v[IDX(0)])) {
- SWAP(SORTEE(1), SORTEE(0));
- }
- if (@TYPE@_LT(v[IDX(4)], v[IDX(3)])) {
- SWAP(SORTEE(4), SORTEE(3));
- }
- if (@TYPE@_LT(v[IDX(3)], v[IDX(0)])) {
- SWAP(SORTEE(3), SORTEE(0));
- }
- if (@TYPE@_LT(v[IDX(4)], v[IDX(1)])) {
- SWAP(SORTEE(4), SORTEE(1));
- }
- if (@TYPE@_LT(v[IDX(2)], v[IDX(1)])) {
- SWAP(SORTEE(2), SORTEE(1));
- }
- if (@TYPE@_LT(v[IDX(3)], v[IDX(2)])) {
- if (@TYPE@_LT(v[IDX(3)], v[IDX(1)])) {
- return 1;
- }
- else {
- return 3;
- }
- }
- else {
- /* v[1] and v[2] swapped into order above */
- return 2;
- }
-}
-
-
-/*
- * partition and return the index were the pivot belongs
- * the data must have following property to avoid bound checks:
- * ll ... hh
- * lower-than-pivot [x x x x] larger-than-pivot
- */
-static NPY_INLINE void
-@name@unguarded_partition_@suff@(@type@ * v,
-#if @arg@
- npy_intp * tosort,
-#endif
- const @type@ pivot,
- npy_intp * ll, npy_intp * hh)
-{
- for (;;) {
- do (*ll)++; while (@TYPE@_LT(v[IDX(*ll)], pivot));
- do (*hh)--; while (@TYPE@_LT(pivot, v[IDX(*hh)]));
-
- if (*hh < *ll)
- break;
-
- SWAP(SORTEE(*ll), SORTEE(*hh));
- }
-}
-
-
-/*
- * select median of median of blocks of 5
- * if used as partition pivot it splits the range into at least 30%/70%
- * allowing linear time worstcase quickselect
- */
-static npy_intp
-@name@median_of_median5_@suff@(@type@ *v,
-#if @arg@
- npy_intp* tosort,
-#endif
- const npy_intp num,
- npy_intp * pivots,
- npy_intp * npiv)
-{
- npy_intp i, subleft;
- npy_intp right = num - 1;
- npy_intp nmed = (right + 1) / 5;
- for (i = 0, subleft = 0; i < nmed; i++, subleft += 5) {
- npy_intp m = MEDIAN5(v, tosort, subleft);
- SWAP(SORTEE(subleft + m), SORTEE(i));
- }
-
- if (nmed > 2)
- INTROSELECT(v, tosort, nmed, nmed / 2, pivots, npiv);
- return nmed / 2;
-}
-
-
-/*
- * N^2 selection, fast only for very small kth
- * useful for close multiple partitions
- * (e.g. even element median, interpolating percentile)
- */
-static int
-@name@dumb_select_@suff@(@type@ *v,
-#if @arg@
- npy_intp * tosort,
-#endif
- npy_intp num, npy_intp kth)
-{
- npy_intp i;
- for (i = 0; i <= kth; i++) {
- npy_intp minidx = i;
- @type@ minval = v[IDX(i)];
- npy_intp k;
- for (k = i + 1; k < num; k++) {
- if (@TYPE@_LT(v[IDX(k)], minval)) {
- minidx = k;
- minval = v[IDX(k)];
- }
- }
- SWAP(SORTEE(i), SORTEE(minidx));
- }
-
- return 0;
-}
-
-
-/*
- * iterative median of 3 quickselect with cutoff to median-of-medians-of5
- * receives stack of already computed pivots in v to minimize the
- * partition size were kth is searched in
- *
- * area that needs partitioning in [...]
- * kth 0: [8 7 6 5 4 3 2 1 0] -> med3 partitions elements [4, 2, 0]
- * 0 1 2 3 4 8 7 5 6 -> pop requested kth -> stack [4, 2]
- * kth 3: 0 1 2 [3] 4 8 7 5 6 -> stack [4]
- * kth 5: 0 1 2 3 4 [8 7 5 6] -> stack [6]
- * kth 8: 0 1 2 3 4 5 6 [8 7] -> stack []
- *
- */
-NPY_NO_EXPORT int
-@name@introselect_@suff@(@type@ *v,
-#if @arg@
- npy_intp* tosort,
-#endif
- npy_intp num, npy_intp kth,
- npy_intp * pivots,
- npy_intp * npiv,
- void *NOT_USED)
-{
- npy_intp low = 0;
- npy_intp high = num - 1;
- int depth_limit;
-
- if (npiv == NULL)
- pivots = NULL;
-
- while (pivots != NULL && *npiv > 0) {
- if (pivots[*npiv - 1] > kth) {
- /* pivot larger than kth set it as upper bound */
- high = pivots[*npiv - 1] - 1;
- break;
- }
- else if (pivots[*npiv - 1] == kth) {
- /* kth was already found in a previous iteration -> done */
- return 0;
- }
-
- low = pivots[*npiv - 1] + 1;
-
- /* pop from stack */
- *npiv -= 1;
- }
-
- /*
- * use a faster O(n*kth) algorithm for very small kth
- * e.g. for interpolating percentile
- */
- if (kth - low < 3) {
- DUMBSELECT(v, tosort, low, high - low + 1, kth - low);
- store_pivot(kth, kth, pivots, npiv);
- return 0;
- }
- // Parenthesis around @inexact@ tells clang dead code as intentional
- else if ((@inexact@) && kth == num - 1) {
- /* useful to check if NaN present via partition(d, (x, -1)) */
- npy_intp k;
- npy_intp maxidx = low;
- @type@ maxval = v[IDX(low)];
- for (k = low + 1; k < num; k++) {
- if (!@TYPE@_LT(v[IDX(k)], maxval)) {
- maxidx = k;
- maxval = v[IDX(k)];
- }
- }
- SWAP(SORTEE(kth), SORTEE(maxidx));
- return 0;
- }
-
- depth_limit = npy_get_msb(num) * 2;
-
- /* guarantee three elements */
- for (;low + 1 < high;) {
- npy_intp ll = low + 1;
- npy_intp hh = high;
-
- /*
- * if we aren't making sufficient progress with median of 3
- * fall back to median-of-median5 pivot for linear worst case
- * med3 for small sizes is required to do unguarded partition
- */
- if (depth_limit > 0 || hh - ll < 5) {
- const npy_intp mid = low + (high - low) / 2;
- /* median of 3 pivot strategy,
- * swapping for efficient partition */
- MEDIAN3_SWAP(v, tosort, low, mid, high);
- }
- else {
- npy_intp mid;
- /* FIXME: always use pivots to optimize this iterative partition */
-#if @arg@
- mid = ll + amedian_of_median5_@suff@(v, tosort + ll, hh - ll, NULL, NULL);
-#else
- mid = ll + median_of_median5_@suff@(v + ll, hh - ll, NULL, NULL);
-#endif
- SWAP(SORTEE(mid), SORTEE(low));
- /* adapt for the larger partition than med3 pivot */
- ll--;
- hh++;
- }
-
- depth_limit--;
-
- /*
- * find place to put pivot (in low):
- * previous swapping removes need for bound checks
- * pivot 3-lowest [x x x] 3-highest
- */
- UNGUARDED_PARTITION(v, tosort, v[IDX(low)], &ll, &hh);
-
- /* move pivot into position */
- SWAP(SORTEE(low), SORTEE(hh));
-
- /* kth pivot stored later */
- if (hh != kth) {
- store_pivot(hh, kth, pivots, npiv);
- }
-
- if (hh >= kth)
- high = hh - 1;
- if (hh <= kth)
- low = ll;
- }
-
- /* two elements */
- if (high == low + 1) {
- if (@TYPE@_LT(v[IDX(high)], v[IDX(low)])) {
- SWAP(SORTEE(high), SORTEE(low))
- }
- }
- store_pivot(kth, kth, pivots, npiv);
-
- return 0;
-}
-
-
-#undef IDX
-#undef SWAP
-#undef SORTEE
-#undef MEDIAN3_SWAP
-#undef MEDIAN5
-#undef UNGUARDED_PARTITION
-#undef INTROSELECT
-#undef DUMBSELECT
-/**end repeat1**/
-
-/**end repeat**/
diff --git a/numpy/core/src/npysort/selection.cpp b/numpy/core/src/npysort/selection.cpp
new file mode 100644
index 000000000..8eb4a57aa
--- /dev/null
+++ b/numpy/core/src/npysort/selection.cpp
@@ -0,0 +1,645 @@
+/* -*- c -*- */
+
+/*
+ *
+ * The code is loosely based on the quickselect from
+ * Nicolas Devillard - 1998 public domain
+ * http://ndevilla.free.fr/median/median/
+ *
+ * Quick select with median of 3 pivot is usually the fastest,
+ * but the worst case scenario can be quadratic complexity,
+ * e.g. np.roll(np.arange(x), x / 2)
+ * To avoid this if it recurses too much it falls back to the
+ * worst case linear median of median of group 5 pivot strategy.
+ */
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#include "numpy/npy_math.h"
+
+#include "npy_partition.h"
+#include "npy_sort.h"
+#include "npysort_common.h"
+#include "numpy_tag.h"
+
+#include <cstdlib>
+#include <utility>
+
+#define NOT_USED NPY_UNUSED(unused)
+
+template <typename Tag, bool arg, typename type>
+NPY_NO_EXPORT int
+introselect_(type *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv);
+
+/*
+ *****************************************************************************
+ ** NUMERIC SORTS **
+ *****************************************************************************
+ */
+
+static NPY_INLINE void
+store_pivot(npy_intp pivot, npy_intp kth, npy_intp *pivots, npy_intp *npiv)
+{
+ if (pivots == NULL) {
+ return;
+ }
+
+ /*
+ * If pivot is the requested kth store it, overwriting other pivots if
+ * required. This must be done so iterative partition can work without
+ * manually shifting lower data offset by kth each time
+ */
+ if (pivot == kth && *npiv == NPY_MAX_PIVOT_STACK) {
+ pivots[*npiv - 1] = pivot;
+ }
+ /*
+ * we only need pivots larger than current kth, larger pivots are not
+ * useful as partitions on smaller kth would reorder the stored pivots
+ */
+ else if (pivot >= kth && *npiv < NPY_MAX_PIVOT_STACK) {
+ pivots[*npiv] = pivot;
+ (*npiv) += 1;
+ }
+}
+
+template <typename type, bool arg>
+struct Sortee {
+ type *v;
+ Sortee(type *v, npy_intp *) : v(v) {}
+ type &operator()(npy_intp i) const { return v[i]; }
+};
+
+template <bool arg>
+struct Idx {
+ Idx(npy_intp *) {}
+ npy_intp operator()(npy_intp i) const { return i; }
+};
+
+template <typename type>
+struct Sortee<type, true> {
+ npy_intp *tosort;
+ Sortee(type *, npy_intp *tosort) : tosort(tosort) {}
+ npy_intp &operator()(npy_intp i) const { return tosort[i]; }
+};
+
+template <>
+struct Idx<true> {
+ npy_intp *tosort;
+ Idx(npy_intp *tosort) : tosort(tosort) {}
+ npy_intp operator()(npy_intp i) const { return tosort[i]; }
+};
+
+template <class T>
+static constexpr bool
+inexact()
+{
+ return !std::is_integral<T>::value;
+}
+
+/*
+ * median of 3 pivot strategy
+ * gets min and median and moves median to low and min to low + 1
+ * for efficient partitioning, see unguarded_partition
+ */
+template <typename Tag, bool arg, typename type>
+static NPY_INLINE void
+median3_swap_(type *v, npy_intp *tosort, npy_intp low, npy_intp mid,
+ npy_intp high)
+{
+ Idx<arg> idx(tosort);
+ Sortee<type, arg> sortee(v, tosort);
+
+ if (Tag::less(v[idx(high)], v[idx(mid)]))
+ std::swap(sortee(high), sortee(mid));
+ if (Tag::less(v[idx(high)], v[idx(low)]))
+ std::swap(sortee(high), sortee(low));
+ /* move pivot to low */
+ if (Tag::less(v[idx(low)], v[idx(mid)]))
+ std::swap(sortee(low), sortee(mid));
+ /* move 3-lowest element to low + 1 */
+ std::swap(sortee(mid), sortee(low + 1));
+}
+
+/* select index of median of five elements */
+template <typename Tag, bool arg, typename type>
+static npy_intp
+median5_(type *v, npy_intp *tosort)
+{
+ Idx<arg> idx(tosort);
+ Sortee<type, arg> sortee(v, tosort);
+
+ /* could be optimized as we only need the index (no swaps) */
+ if (Tag::less(v[idx(1)], v[idx(0)])) {
+ std::swap(sortee(1), sortee(0));
+ }
+ if (Tag::less(v[idx(4)], v[idx(3)])) {
+ std::swap(sortee(4), sortee(3));
+ }
+ if (Tag::less(v[idx(3)], v[idx(0)])) {
+ std::swap(sortee(3), sortee(0));
+ }
+ if (Tag::less(v[idx(4)], v[idx(1)])) {
+ std::swap(sortee(4), sortee(1));
+ }
+ if (Tag::less(v[idx(2)], v[idx(1)])) {
+ std::swap(sortee(2), sortee(1));
+ }
+ if (Tag::less(v[idx(3)], v[idx(2)])) {
+ if (Tag::less(v[idx(3)], v[idx(1)])) {
+ return 1;
+ }
+ else {
+ return 3;
+ }
+ }
+ else {
+ /* v[1] and v[2] swapped into order above */
+ return 2;
+ }
+}
+
+/*
+ * partition and return the index were the pivot belongs
+ * the data must have following property to avoid bound checks:
+ * ll ... hh
+ * lower-than-pivot [x x x x] larger-than-pivot
+ */
+template <typename Tag, bool arg, typename type>
+static NPY_INLINE void
+unguarded_partition_(type *v, npy_intp *tosort, const type pivot, npy_intp *ll,
+ npy_intp *hh)
+{
+ Idx<arg> idx(tosort);
+ Sortee<type, arg> sortee(v, tosort);
+
+ for (;;) {
+ do (*ll)++;
+ while (Tag::less(v[idx(*ll)], pivot));
+ do (*hh)--;
+ while (Tag::less(pivot, v[idx(*hh)]));
+
+ if (*hh < *ll)
+ break;
+
+ std::swap(sortee(*ll), sortee(*hh));
+ }
+}
+
+/*
+ * select median of median of blocks of 5
+ * if used as partition pivot it splits the range into at least 30%/70%
+ * allowing linear time worstcase quickselect
+ */
+template <typename Tag, bool arg, typename type>
+static npy_intp
+median_of_median5_(type *v, npy_intp *tosort, const npy_intp num,
+ npy_intp *pivots, npy_intp *npiv)
+{
+ Idx<arg> idx(tosort);
+ Sortee<type, arg> sortee(v, tosort);
+
+ npy_intp i, subleft;
+ npy_intp right = num - 1;
+ npy_intp nmed = (right + 1) / 5;
+ for (i = 0, subleft = 0; i < nmed; i++, subleft += 5) {
+ npy_intp m = median5_<Tag, arg>(v + (arg ? 0 : subleft),
+ tosort + (arg ? subleft : 0));
+ std::swap(sortee(subleft + m), sortee(i));
+ }
+
+ if (nmed > 2)
+ introselect_<Tag, arg>(v, tosort, nmed, nmed / 2, pivots, npiv);
+ return nmed / 2;
+}
+
+/*
+ * N^2 selection, fast only for very small kth
+ * useful for close multiple partitions
+ * (e.g. even element median, interpolating percentile)
+ */
+template <typename Tag, bool arg, typename type>
+static int
+dumb_select_(type *v, npy_intp *tosort, npy_intp num, npy_intp kth)
+{
+ Idx<arg> idx(tosort);
+ Sortee<type, arg> sortee(v, tosort);
+
+ npy_intp i;
+ for (i = 0; i <= kth; i++) {
+ npy_intp minidx = i;
+ type minval = v[idx(i)];
+ npy_intp k;
+ for (k = i + 1; k < num; k++) {
+ if (Tag::less(v[idx(k)], minval)) {
+ minidx = k;
+ minval = v[idx(k)];
+ }
+ }
+ std::swap(sortee(i), sortee(minidx));
+ }
+
+ return 0;
+}
+
+/*
+ * iterative median of 3 quickselect with cutoff to median-of-medians-of5
+ * receives stack of already computed pivots in v to minimize the
+ * partition size were kth is searched in
+ *
+ * area that needs partitioning in [...]
+ * kth 0: [8 7 6 5 4 3 2 1 0] -> med3 partitions elements [4, 2, 0]
+ * 0 1 2 3 4 8 7 5 6 -> pop requested kth -> stack [4, 2]
+ * kth 3: 0 1 2 [3] 4 8 7 5 6 -> stack [4]
+ * kth 5: 0 1 2 3 4 [8 7 5 6] -> stack [6]
+ * kth 8: 0 1 2 3 4 5 6 [8 7] -> stack []
+ *
+ */
+template <typename Tag, bool arg, typename type>
+NPY_NO_EXPORT int
+introselect_(type *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv)
+{
+ Idx<arg> idx(tosort);
+ Sortee<type, arg> sortee(v, tosort);
+
+ npy_intp low = 0;
+ npy_intp high = num - 1;
+ int depth_limit;
+
+ if (npiv == NULL)
+ pivots = NULL;
+
+ while (pivots != NULL && *npiv > 0) {
+ if (pivots[*npiv - 1] > kth) {
+ /* pivot larger than kth set it as upper bound */
+ high = pivots[*npiv - 1] - 1;
+ break;
+ }
+ else if (pivots[*npiv - 1] == kth) {
+ /* kth was already found in a previous iteration -> done */
+ return 0;
+ }
+
+ low = pivots[*npiv - 1] + 1;
+
+ /* pop from stack */
+ *npiv -= 1;
+ }
+
+ /*
+ * use a faster O(n*kth) algorithm for very small kth
+ * e.g. for interpolating percentile
+ */
+ if (kth - low < 3) {
+ dumb_select_<Tag, arg>(v + (arg ? 0 : low), tosort + (arg ? low : 0),
+ high - low + 1, kth - low);
+ store_pivot(kth, kth, pivots, npiv);
+ return 0;
+ }
+
+ else if (inexact<type>() && kth == num - 1) {
+ /* useful to check if NaN present via partition(d, (x, -1)) */
+ npy_intp k;
+ npy_intp maxidx = low;
+ type maxval = v[idx(low)];
+ for (k = low + 1; k < num; k++) {
+ if (!Tag::less(v[idx(k)], maxval)) {
+ maxidx = k;
+ maxval = v[idx(k)];
+ }
+ }
+ std::swap(sortee(kth), sortee(maxidx));
+ return 0;
+ }
+
+ depth_limit = npy_get_msb(num) * 2;
+
+ /* guarantee three elements */
+ for (; low + 1 < high;) {
+ npy_intp ll = low + 1;
+ npy_intp hh = high;
+
+ /*
+ * if we aren't making sufficient progress with median of 3
+ * fall back to median-of-median5 pivot for linear worst case
+ * med3 for small sizes is required to do unguarded partition
+ */
+ if (depth_limit > 0 || hh - ll < 5) {
+ const npy_intp mid = low + (high - low) / 2;
+ /* median of 3 pivot strategy,
+ * swapping for efficient partition */
+ median3_swap_<Tag, arg>(v, tosort, low, mid, high);
+ }
+ else {
+ npy_intp mid;
+ /* FIXME: always use pivots to optimize this iterative partition */
+ mid = ll + median_of_median5_<Tag, arg>(v + (arg ? 0 : ll),
+ tosort + (arg ? ll : 0),
+ hh - ll, NULL, NULL);
+ std::swap(sortee(mid), sortee(low));
+ /* adapt for the larger partition than med3 pivot */
+ ll--;
+ hh++;
+ }
+
+ depth_limit--;
+
+ /*
+ * find place to put pivot (in low):
+ * previous swapping removes need for bound checks
+ * pivot 3-lowest [x x x] 3-highest
+ */
+ unguarded_partition_<Tag, arg>(v, tosort, v[idx(low)], &ll, &hh);
+
+ /* move pivot into position */
+ std::swap(sortee(low), sortee(hh));
+
+ /* kth pivot stored later */
+ if (hh != kth) {
+ store_pivot(hh, kth, pivots, npiv);
+ }
+
+ if (hh >= kth)
+ high = hh - 1;
+ if (hh <= kth)
+ low = ll;
+ }
+
+ /* two elements */
+ if (high == low + 1) {
+ if (Tag::less(v[idx(high)], v[idx(low)])) {
+ std::swap(sortee(high), sortee(low));
+ }
+ }
+ store_pivot(kth, kth, pivots, npiv);
+
+ return 0;
+}
+
+/***************************************
+ * C > C++ dispatch
+ ***************************************/
+
+extern "C" {
+NPY_NO_EXPORT int
+introselect_bool(npy_bool *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::bool_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_byte(npy_byte *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::byte_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_ubyte(npy_ubyte *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::ubyte_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_short(npy_short *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::short_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_ushort(npy_ushort *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::ushort_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_int(npy_int *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::int_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_uint(npy_uint *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::uint_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_long(npy_long *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::long_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_ulong(npy_ulong *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::ulong_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_longlong(npy_longlong *v, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::longlong_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_ulonglong(npy_ulonglong *v, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::ulonglong_tag, false>(v, nullptr, num, kth,
+ pivots, npiv);
+}
+NPY_NO_EXPORT int
+introselect_half(npy_half *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::half_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_float(npy_float *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::float_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_double(npy_double *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::double_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_longdouble(npy_longdouble *v, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::longdouble_tag, false>(v, nullptr, num, kth,
+ pivots, npiv);
+}
+NPY_NO_EXPORT int
+introselect_cfloat(npy_cfloat *v, npy_intp num, npy_intp kth, npy_intp *pivots,
+ npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::cfloat_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_cdouble(npy_cdouble *v, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::cdouble_tag, false>(v, nullptr, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+introselect_clongdouble(npy_clongdouble *v, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::clongdouble_tag, false>(v, nullptr, num, kth,
+ pivots, npiv);
+}
+
+NPY_NO_EXPORT int
+aintroselect_bool(npy_bool *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::bool_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_byte(npy_byte *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::byte_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_ubyte(npy_ubyte *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::ubyte_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_short(npy_short *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::short_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_ushort(npy_ushort *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::ushort_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_int(npy_int *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::int_tag, true>(v, tosort, num, kth, pivots, npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_uint(npy_uint *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::uint_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_long(npy_long *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::long_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_ulong(npy_ulong *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::ulong_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_longlong(npy_longlong *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::longlong_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_ulonglong(npy_ulonglong *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::ulonglong_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_half(npy_half *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::half_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_float(npy_float *v, npy_intp *tosort, npy_intp num, npy_intp kth,
+ npy_intp *pivots, npy_intp *npiv, void *NOT_USED)
+{
+ return introselect_<npy::float_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_double(npy_double *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::double_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_longdouble(npy_longdouble *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::longdouble_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_cfloat(npy_cfloat *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::cfloat_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_cdouble(npy_cdouble *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::cdouble_tag, true>(v, tosort, num, kth, pivots,
+ npiv);
+}
+NPY_NO_EXPORT int
+aintroselect_clongdouble(npy_clongdouble *v, npy_intp *tosort, npy_intp num,
+ npy_intp kth, npy_intp *pivots, npy_intp *npiv,
+ void *NOT_USED)
+{
+ return introselect_<npy::clongdouble_tag, true>(v, tosort, num, kth,
+ pivots, npiv);
+}
+}