summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/setup.py10
-rw-r--r--numpy/core/src/common/simd/avx512/arithmetic.h72
-rw-r--r--numpy/core/src/multiarray/conversion_utils.c11
-rw-r--r--numpy/core/src/multiarray/conversion_utils.h3
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c3
-rw-r--r--numpy/core/src/multiarray/textreading/conversions.c395
-rw-r--r--numpy/core/src/multiarray/textreading/conversions.h57
-rw-r--r--numpy/core/src/multiarray/textreading/field_types.c201
-rw-r--r--numpy/core/src/multiarray/textreading/field_types.h67
-rw-r--r--numpy/core/src/multiarray/textreading/growth.c47
-rw-r--r--numpy/core/src/multiarray/textreading/growth.h7
-rw-r--r--numpy/core/src/multiarray/textreading/parser_config.h61
-rw-r--r--numpy/core/src/multiarray/textreading/readtext.c312
-rw-r--r--numpy/core/src/multiarray/textreading/readtext.h7
-rw-r--r--numpy/core/src/multiarray/textreading/rows.c481
-rw-r--r--numpy/core/src/multiarray/textreading/rows.h22
-rw-r--r--numpy/core/src/multiarray/textreading/str_to_int.c67
-rw-r--r--numpy/core/src/multiarray/textreading/str_to_int.h174
-rw-r--r--numpy/core/src/multiarray/textreading/stream.h41
-rw-r--r--numpy/core/src/multiarray/textreading/stream_pyobject.c239
-rw-r--r--numpy/core/src/multiarray/textreading/stream_pyobject.h16
-rw-r--r--numpy/core/src/multiarray/textreading/tokenize.c.src457
-rw-r--r--numpy/core/src/multiarray/textreading/tokenize.h78
-rw-r--r--numpy/core/src/npysort/quicksort.c.src17
-rw-r--r--numpy/core/src/npysort/x86-qsort.dispatch.c.src587
-rw-r--r--numpy/core/src/npysort/x86-qsort.h18
-rw-r--r--numpy/core/tests/test_multiarray.py49
27 files changed, 3499 insertions, 0 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index a13480907..9704cff0a 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -869,6 +869,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'typeinfo.h'),
join('src', 'multiarray', 'usertypes.h'),
join('src', 'multiarray', 'vdot.h'),
+ join('src', 'multiarray', 'textreading', 'readtext.h'),
join('include', 'numpy', 'arrayobject.h'),
join('include', 'numpy', '_neighborhood_iterator_imp.h'),
join('include', 'numpy', 'npy_endian.h'),
@@ -947,6 +948,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'usertypes.c'),
join('src', 'multiarray', 'vdot.c'),
join('src', 'common', 'npy_sort.h.src'),
+ join('src', 'npysort', 'x86-qsort.dispatch.c.src'),
join('src', 'npysort', 'quicksort.c.src'),
join('src', 'npysort', 'mergesort.cpp'),
join('src', 'npysort', 'timsort.cpp'),
@@ -956,6 +958,14 @@ def configuration(parent_package='',top_path=None):
join('src', 'npysort', 'selection.cpp'),
join('src', 'common', 'npy_binsearch.h'),
join('src', 'npysort', 'binsearch.cpp'),
+ join('src', 'multiarray', 'textreading', 'conversions.c'),
+ join('src', 'multiarray', 'textreading', 'field_types.c'),
+ join('src', 'multiarray', 'textreading', 'growth.c'),
+ join('src', 'multiarray', 'textreading', 'readtext.c'),
+ join('src', 'multiarray', 'textreading', 'rows.c'),
+ join('src', 'multiarray', 'textreading', 'stream_pyobject.c'),
+ join('src', 'multiarray', 'textreading', 'str_to_int.c'),
+ join('src', 'multiarray', 'textreading', 'tokenize.c.src'),
]
#######################################################################
diff --git a/numpy/core/src/common/simd/avx512/arithmetic.h b/numpy/core/src/common/simd/avx512/arithmetic.h
index f8632e701..93e9d9d45 100644
--- a/numpy/core/src/common/simd/avx512/arithmetic.h
+++ b/numpy/core/src/common/simd/avx512/arithmetic.h
@@ -371,7 +371,79 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
#define npyv_sum_u64 _mm512_reduce_add_epi64
#define npyv_sum_f32 _mm512_reduce_add_ps
#define npyv_sum_f64 _mm512_reduce_add_pd
+ #define npyv_reducemin_u32 _mm512_reduce_min_epu32
+ #define npyv_reducemin_s32 _mm512_reduce_min_epi32
+ #define npyv_reducemin_f32 _mm512_reduce_min_ps
+ #define npyv_reducemax_u32 _mm512_reduce_max_epu32
+ #define npyv_reducemax_s32 _mm512_reduce_max_epi32
+ #define npyv_reducemax_f32 _mm512_reduce_max_ps
#else
+ NPY_FINLINE npy_uint32 npyv_reducemax_u32(npyv_u32 a)
+ {
+ const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+ const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+ npyv_u32 a1 = _mm512_max_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
+ npyv_u32 a2 = _mm512_max_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+ npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+ npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+ return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+ }
+
+ NPY_FINLINE npy_int32 npyv_reducemax_s32(npyv_s32 a)
+ {
+ const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+ const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+ npyv_s32 a1 = _mm512_max_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
+ npyv_s32 a2 = _mm512_max_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+ npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+ npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+ return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+ }
+
+ NPY_FINLINE npy_float npyv_reducemax_f32(npyv_f32 a)
+ {
+ const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+ const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+ npyv_f32 a1 = _mm512_max_ps(a, _mm512_permutex2var_ps(a, idx1, a));
+ npyv_f32 a2 = _mm512_max_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
+ npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+ npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+ return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
+ }
+
+ NPY_FINLINE npy_uint32 npyv_reducemin_u32(npyv_u32 a)
+ {
+ const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+ const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+ npyv_u32 a1 = _mm512_min_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
+ npyv_u32 a2 = _mm512_min_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+ npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+ npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+ return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+ }
+
+ NPY_FINLINE npy_int32 npyv_reducemin_s32(npyv_s32 a)
+ {
+ const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+ const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+ npyv_s32 a1 = _mm512_min_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
+ npyv_s32 a2 = _mm512_min_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+ npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+ npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+ return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+ }
+
+ NPY_FINLINE npy_float npyv_reducemin_f32(npyv_f32 a)
+ {
+ const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+ const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+ npyv_f32 a1 = _mm512_min_ps(a, _mm512_permutex2var_ps(a, idx1, a));
+ npyv_f32 a2 = _mm512_min_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
+ npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+ npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+ return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
+ }
+
NPY_FINLINE npy_uint32 npyv_sum_u32(npyv_u32 a)
{
__m256i half = _mm256_add_epi32(npyv512_lower_si256(a), npyv512_higher_si256(a));
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index a1de580d9..e4eb4f49e 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -993,6 +993,17 @@ PyArray_PyIntAsIntp(PyObject *o)
}
+NPY_NO_EXPORT int
+PyArray_IntpFromPyIntConverter(PyObject *o, npy_intp *val)
+{
+ *val = PyArray_PyIntAsIntp(o);
+ if (error_converting(*val)) {
+ return NPY_FAIL;
+ }
+ return NPY_SUCCEED;
+}
+
+
/*
* PyArray_IntpFromIndexSequence
* Returns the number of dimensions or -1 if an error occurred.
diff --git a/numpy/core/src/multiarray/conversion_utils.h b/numpy/core/src/multiarray/conversion_utils.h
index 4072841ee..4d0fbb894 100644
--- a/numpy/core/src/multiarray/conversion_utils.h
+++ b/numpy/core/src/multiarray/conversion_utils.h
@@ -7,6 +7,9 @@ NPY_NO_EXPORT int
PyArray_IntpConverter(PyObject *obj, PyArray_Dims *seq);
NPY_NO_EXPORT int
+PyArray_IntpFromPyIntConverter(PyObject *o, npy_intp *val);
+
+NPY_NO_EXPORT int
PyArray_OptionalIntpConverter(PyObject *obj, PyArray_Dims *seq);
typedef enum {
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 789446d0c..a7b6898e1 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -69,6 +69,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
#include "get_attr_string.h"
#include "experimental_public_dtype_api.h" /* _get_experimental_dtype_api */
+#include "textreading/readtext.h" /* _readtext_from_file_object */
#include "npy_dlpack.h"
@@ -4456,6 +4457,8 @@ static struct PyMethodDef array_module_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"_get_experimental_dtype_api", (PyCFunction)_get_experimental_dtype_api,
METH_O, NULL},
+ {"_load_from_filelike", (PyCFunction)_load_from_filelike,
+ METH_FASTCALL | METH_KEYWORDS, NULL},
/* from umath */
{"frompyfunc",
(PyCFunction) ufunc_frompyfunc,
diff --git a/numpy/core/src/multiarray/textreading/conversions.c b/numpy/core/src/multiarray/textreading/conversions.c
new file mode 100644
index 000000000..11f4210f7
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/conversions.c
@@ -0,0 +1,395 @@
+
+#include <Python.h>
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "lowlevel_strided_loops.h"
+
+#include "conversions.h"
+#include "str_to_int.h"
+
+#include "array_coercion.h"
+
+
+/*
+ * Coercion to boolean is done via integer right now.
+ */
+NPY_NO_EXPORT int
+to_bool(PyArray_Descr *NPY_UNUSED(descr),
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *NPY_UNUSED(pconfig))
+{
+ int64_t res;
+ if (str_to_int64(str, end, INT64_MIN, INT64_MAX, &res) < 0) {
+ return -1;
+ }
+ *dataptr = (char)(res != 0);
+ return 0;
+}
+
+
+/*
+ * In order to not pack a whole copy of a floating point parser, we copy the
+ * result into ascii and call the Python one. Float parsing isn't super quick
+ * so this is not terrible, but avoiding it would speed up things.
+ *
+ * Also note that parsing the first float of a complex will copy the whole
+ * string to ascii rather than just the first part.
+ * TODO: A tweak of the break might be a simple mitigation there.
+ *
+ * @param str The UCS4 string to parse
+ * @param end Pointer to the end of the string
+ * @param skip_trailing_whitespace If false does not skip trailing whitespace
+ * (used by the complex parser).
+ * @param result Output stored as double value.
+ */
+static NPY_INLINE int
+double_from_ucs4(
+ const Py_UCS4 *str, const Py_UCS4 *end,
+ bool strip_whitespace, double *result, const Py_UCS4 **p_end)
+{
+ /* skip leading whitespace */
+ if (strip_whitespace) {
+ while (Py_UNICODE_ISSPACE(*str)) {
+ str++;
+ }
+ }
+ if (str == end) {
+ return -1; /* empty or only whitespace: not a floating point number */
+ }
+
+ /* We convert to ASCII for the Python parser, use stack if small: */
+ char stack_buf[128];
+ char *heap_buf = NULL;
+ char *ascii = stack_buf;
+
+ size_t str_len = end - str + 1;
+ if (str_len > 128) {
+ heap_buf = PyMem_MALLOC(str_len);
+ if (heap_buf == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ ascii = heap_buf;
+ }
+ char *c = ascii;
+ for (; str < end; str++, c++) {
+ if (NPY_UNLIKELY(*str >= 128)) {
+ /* Character cannot be used, ignore for end calculation and stop */
+ end = str;
+ break;
+ }
+ *c = (char)(*str);
+ }
+ *c = '\0';
+
+ char *end_parsed;
+ *result = PyOS_string_to_double(ascii, &end_parsed, NULL);
+ /* Rewind `end` to the first UCS4 character not parsed: */
+ end = end - (c - end_parsed);
+
+ PyMem_FREE(heap_buf);
+
+ if (*result == -1. && PyErr_Occurred()) {
+ return -1;
+ }
+
+ if (strip_whitespace) {
+ /* and then skip any remainig whitespace: */
+ while (Py_UNICODE_ISSPACE(*end)) {
+ end++;
+ }
+ }
+ *p_end = end;
+ return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_float(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *NPY_UNUSED(pconfig))
+{
+ double double_val;
+ const Py_UCS4 *p_end;
+ if (double_from_ucs4(str, end, true, &double_val, &p_end) < 0) {
+ return -1;
+ }
+ if (p_end != end) {
+ return -1;
+ }
+
+ float val = (float)double_val;
+ memcpy(dataptr, &val, sizeof(float));
+ if (!PyArray_ISNBO(descr->byteorder)) {
+ npy_bswap4_unaligned(dataptr);
+ }
+ return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_double(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *NPY_UNUSED(pconfig))
+{
+ double val;
+ const Py_UCS4 *p_end;
+ if (double_from_ucs4(str, end, true, &val, &p_end) < 0) {
+ return -1;
+ }
+ if (p_end != end) {
+ return -1;
+ }
+
+ memcpy(dataptr, &val, sizeof(double));
+ if (!PyArray_ISNBO(descr->byteorder)) {
+ npy_bswap8_unaligned(dataptr);
+ }
+ return 0;
+}
+
+
+static bool
+to_complex_int(
+ const Py_UCS4 *item, const Py_UCS4 *token_end,
+ double *p_real, double *p_imag,
+ Py_UCS4 imaginary_unit, bool allow_parens)
+{
+ const Py_UCS4 *p_end;
+ bool unmatched_opening_paren = false;
+
+ /* Remove whitespace before the possibly leading '(' */
+ while (Py_UNICODE_ISSPACE(*item)) {
+ ++item;
+ }
+ if (allow_parens && (*item == '(')) {
+ unmatched_opening_paren = true;
+ ++item;
+ /* Allow whitespace within the parentheses: "( 1j)" */
+ while (Py_UNICODE_ISSPACE(*item)) {
+ ++item;
+ }
+ }
+ if (double_from_ucs4(item, token_end, false, p_real, &p_end) < 0) {
+ return false;
+ }
+ if (p_end == token_end) {
+ // No imaginary part in the string (e.g. "3.5")
+ *p_imag = 0.0;
+ return !unmatched_opening_paren;
+ }
+ if (*p_end == imaginary_unit) {
+ /* Only an imaginary part (e.g "1.5j") */
+ *p_imag = *p_real;
+ *p_real = 0.0;
+ ++p_end;
+ }
+ else if (*p_end == '+' || *p_end == '-') {
+ /* Imaginary part still to parse */
+ if (*p_end == '+') {
+ ++p_end; /* Advance to support +- (and ++) */
+ }
+ if (double_from_ucs4(p_end, token_end, false, p_imag, &p_end) < 0) {
+ return false;
+ }
+ if (*p_end != imaginary_unit) {
+ return false;
+ }
+ ++p_end;
+ }
+ else {
+ *p_imag = 0;
+ }
+
+ if (unmatched_opening_paren) {
+ /* Allow whitespace inside brackets as in "(1+2j )" or "( 1j )" */
+ while (Py_UNICODE_ISSPACE(*p_end)) {
+ ++p_end;
+ }
+ if (*p_end == ')') {
+ ++p_end;
+ }
+ else {
+ /* parentheses was not closed */
+ return false;
+ }
+ }
+
+ while (Py_UNICODE_ISSPACE(*p_end)) {
+ ++p_end;
+ }
+ return p_end == token_end;
+}
+
+
+NPY_NO_EXPORT int
+to_cfloat(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig)
+{
+ double real;
+ double imag;
+
+ bool success = to_complex_int(
+ str, end, &real, &imag,
+ pconfig->imaginary_unit, true);
+
+ if (!success) {
+ return -1;
+ }
+ npy_complex64 val = {(float)real, (float)imag};
+ memcpy(dataptr, &val, sizeof(npy_complex64));
+ if (!PyArray_ISNBO(descr->byteorder)) {
+ npy_bswap4_unaligned(dataptr);
+ npy_bswap4_unaligned(dataptr + 4);
+ }
+ return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_cdouble(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig)
+{
+ double real;
+ double imag;
+
+ bool success = to_complex_int(
+ str, end, &real, &imag, pconfig->imaginary_unit, true);
+
+ if (!success) {
+ return -1;
+ }
+ npy_complex128 val = {real, imag};
+ memcpy(dataptr, &val, sizeof(npy_complex128));
+ if (!PyArray_ISNBO(descr->byteorder)) {
+ npy_bswap8_unaligned(dataptr);
+ npy_bswap8_unaligned(dataptr + 8);
+ }
+ return 0;
+}
+
+
+/*
+ * String and unicode conversion functions.
+ */
+NPY_NO_EXPORT int
+to_string(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *NPY_UNUSED(unused))
+{
+ const Py_UCS4* c = str;
+ size_t length = descr->elsize;
+
+ for (size_t i = 0; i < length; i++) {
+ if (c < end) {
+ /*
+ * loadtxt assumed latin1, which is compatible with UCS1 (first
+ * 256 unicode characters).
+ */
+ if (NPY_UNLIKELY(*c > 255)) {
+ /* TODO: Was UnicodeDecodeError, is unspecific error good? */
+ return -1;
+ }
+ dataptr[i] = (Py_UCS1)(*c);
+ c++;
+ }
+ else {
+ dataptr[i] = '\0';
+ }
+ }
+ return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_unicode(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *NPY_UNUSED(unused))
+{
+ int length = descr->elsize / 4;
+
+ if (length <= end - str) {
+ memcpy(dataptr, str, length * 4);
+ }
+ else {
+ size_t given_len = end - str;
+ memcpy(dataptr, str, given_len * 4);
+ memset(dataptr + given_len * 4, '\0', (length - given_len) * 4);
+ }
+
+ if (!PyArray_ISNBO(descr->byteorder)) {
+ for (int i = 0; i < length; i++) {
+ npy_bswap4_unaligned(dataptr);
+ dataptr += 4;
+ }
+ }
+ return 0;
+}
+
+
+
+/*
+ * Convert functions helper for the generic converter.
+ */
+static PyObject *
+call_converter_function(
+ PyObject *func, const Py_UCS4 *str, size_t length, bool byte_converters)
+{
+ PyObject *s = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, str, length);
+ if (s == NULL) {
+ return s;
+ }
+ if (byte_converters) {
+ Py_SETREF(s, PyUnicode_AsEncodedString(s, "latin1", NULL));
+ if (s == NULL) {
+ return NULL;
+ }
+ }
+ if (func == NULL) {
+ return s;
+ }
+ PyObject *result = PyObject_CallFunctionObjArgs(func, s, NULL);
+ Py_DECREF(s);
+ return result;
+}
+
+
+NPY_NO_EXPORT int
+to_generic_with_converter(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *config, PyObject *func)
+{
+ bool use_byte_converter;
+ if (func == NULL) {
+ use_byte_converter = config->c_byte_converters;
+ }
+ else {
+ use_byte_converter = config->python_byte_converters;
+ }
+ /* Converts to unicode and calls custom converter (if set) */
+ PyObject *converted = call_converter_function(
+ func, str, (size_t)(end - str), use_byte_converter);
+ if (converted == NULL) {
+ return -1;
+ }
+
+ int res = PyArray_Pack(descr, dataptr, converted);
+ Py_DECREF(converted);
+ return res;
+}
+
+
+NPY_NO_EXPORT int
+to_generic(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *config)
+{
+ return to_generic_with_converter(descr, str, end, dataptr, config, NULL);
+}
diff --git a/numpy/core/src/multiarray/textreading/conversions.h b/numpy/core/src/multiarray/textreading/conversions.h
new file mode 100644
index 000000000..222eea4e7
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/conversions.h
@@ -0,0 +1,57 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_CONVERSIONS_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_CONVERSIONS_H_
+
+#include <stdbool.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+
+#include "textreading/parser_config.h"
+
+NPY_NO_EXPORT int
+to_bool(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_float(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_double(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_cfloat(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_cdouble(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_string(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *unused);
+
+NPY_NO_EXPORT int
+to_unicode(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *unused);
+
+NPY_NO_EXPORT int
+to_generic_with_converter(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *unused, PyObject *func);
+
+NPY_NO_EXPORT int
+to_generic(PyArray_Descr *descr,
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+ parser_config *pconfig);
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_CONVERSIONS_H_ */
diff --git a/numpy/core/src/multiarray/textreading/field_types.c b/numpy/core/src/multiarray/textreading/field_types.c
new file mode 100644
index 000000000..0722efd57
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/field_types.c
@@ -0,0 +1,201 @@
+#include "field_types.h"
+#include "conversions.h"
+#include "str_to_int.h"
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+#include "alloc.h"
+
+#include "textreading/growth.h"
+
+
+NPY_NO_EXPORT void
+field_types_xclear(int num_field_types, field_type *ft) {
+ assert(num_field_types >= 0);
+ if (ft == NULL) {
+ return;
+ }
+ for (int i = 0; i < num_field_types; i++) {
+ Py_XDECREF(ft[i].descr);
+ ft[i].descr = NULL;
+ }
+ PyMem_Free(ft);
+}
+
+
+/*
+ * Fetch custom converters for the builtin NumPy DTypes (or the generic one).
+ * Structured DTypes get unpacked and `object` uses the generic method.
+ *
+ * TODO: This should probably be moved on the DType object in some form,
+ * to allow user DTypes to define their own converters.
+ */
+static set_from_ucs4_function *
+get_from_ucs4_function(PyArray_Descr *descr)
+{
+ if (descr->type_num == NPY_BOOL) {
+ return &to_bool;
+ }
+ else if (PyDataType_ISSIGNED(descr)) {
+ switch (descr->elsize) {
+ case 1:
+ return &to_int8;
+ case 2:
+ return &to_int16;
+ case 4:
+ return &to_int32;
+ case 8:
+ return &to_int64;
+ default:
+ assert(0);
+ }
+ }
+ else if (PyDataType_ISUNSIGNED(descr)) {
+ switch (descr->elsize) {
+ case 1:
+ return &to_uint8;
+ case 2:
+ return &to_uint16;
+ case 4:
+ return &to_uint32;
+ case 8:
+ return &to_uint64;
+ default:
+ assert(0);
+ }
+ }
+ else if (descr->type_num == NPY_FLOAT) {
+ return &to_float;
+ }
+ else if (descr->type_num == NPY_DOUBLE) {
+ return &to_double;
+ }
+ else if (descr->type_num == NPY_CFLOAT) {
+ return &to_cfloat;
+ }
+ else if (descr->type_num == NPY_CDOUBLE) {
+ return &to_cdouble;
+ }
+ else if (descr->type_num == NPY_STRING) {
+ return &to_string;
+ }
+ else if (descr->type_num == NPY_UNICODE) {
+ return &to_unicode;
+ }
+ return &to_generic;
+}
+
+
+/*
+ * Note that the function cleans up `ft` on error. If `num_field_types < 0`
+ * cleanup has already happened in the internal call.
+ */
+static npy_intp
+field_type_grow_recursive(PyArray_Descr *descr,
+ npy_intp num_field_types, field_type **ft, npy_intp *ft_size,
+ npy_intp field_offset)
+{
+ if (PyDataType_HASSUBARRAY(descr)) {
+ PyArray_Dims shape = {NULL, -1};
+
+ if (!(PyArray_IntpConverter(descr->subarray->shape, &shape))) {
+ PyErr_SetString(PyExc_ValueError, "invalid subarray shape");
+ field_types_xclear(num_field_types, *ft);
+ return -1;
+ }
+ npy_intp size = PyArray_MultiplyList(shape.ptr, shape.len);
+ npy_free_cache_dim_obj(shape);
+ for (npy_intp i = 0; i < size; i++) {
+ num_field_types = field_type_grow_recursive(descr->subarray->base,
+ num_field_types, ft, ft_size, field_offset);
+ field_offset += descr->subarray->base->elsize;
+ if (num_field_types < 0) {
+ return -1;
+ }
+ }
+ return num_field_types;
+ }
+ else if (PyDataType_HASFIELDS(descr)) {
+ npy_int num_descr_fields = PyTuple_Size(descr->names);
+ if (num_descr_fields < 0) {
+ field_types_xclear(num_field_types, *ft);
+ return -1;
+ }
+ for (npy_intp i = 0; i < num_descr_fields; i++) {
+ PyObject *key = PyTuple_GET_ITEM(descr->names, i);
+ PyObject *tup = PyObject_GetItem(descr->fields, key);
+ if (tup == NULL) {
+ field_types_xclear(num_field_types, *ft);
+ return -1;
+ }
+ PyArray_Descr *field_descr;
+ PyObject *title;
+ int offset;
+ if (!PyArg_ParseTuple(tup, "Oi|O", &field_descr, &offset, &title)) {
+ Py_DECREF(tup);
+ field_types_xclear(num_field_types, *ft);
+ return -1;
+ }
+ Py_DECREF(tup);
+ num_field_types = field_type_grow_recursive(
+ field_descr, num_field_types, ft, ft_size,
+ field_offset + offset);
+ if (num_field_types < 0) {
+ return -1;
+ }
+ }
+ return num_field_types;
+ }
+
+ if (*ft_size <= num_field_types) {
+ npy_intp alloc_size = grow_size_and_multiply(
+ ft_size, 4, sizeof(field_type));
+ if (alloc_size < 0) {
+ field_types_xclear(num_field_types, *ft);
+ return -1;
+ }
+ field_type *new_ft = PyMem_Realloc(*ft, alloc_size);
+ if (new_ft == NULL) {
+ field_types_xclear(num_field_types, *ft);
+ return -1;
+ }
+ *ft = new_ft;
+ }
+
+ Py_INCREF(descr);
+ (*ft)[num_field_types].descr = descr;
+ (*ft)[num_field_types].set_from_ucs4 = get_from_ucs4_function(descr);
+ (*ft)[num_field_types].structured_offset = field_offset;
+
+ return num_field_types + 1;
+}
+
+
+/*
+ * Prepare the "field_types" for the given dtypes/descriptors. Currently,
+ * we copy the itemsize, but the main thing is that we check for custom
+ * converters.
+ */
+NPY_NO_EXPORT npy_intp
+field_types_create(PyArray_Descr *descr, field_type **ft)
+{
+ if (descr->subarray != NULL) {
+ /*
+ * This could probably be allowed, but NumPy absorbs the dimensions
+ * so it is an awkward corner case that probably never really worked.
+ */
+ PyErr_SetString(PyExc_TypeError,
+ "file reader does not support subarray dtypes. You can"
+ "put the dtype into a structured one using "
+ "`np.dtype(('name', dtype))` to avoid this limitation.");
+ return -1;
+ }
+
+ npy_intp ft_size = 4;
+ *ft = PyMem_Malloc(ft_size * sizeof(field_type));
+ if (*ft == NULL) {
+ return -1;
+ }
+ return field_type_grow_recursive(descr, 0, ft, &ft_size, 0);
+}
diff --git a/numpy/core/src/multiarray/textreading/field_types.h b/numpy/core/src/multiarray/textreading/field_types.h
new file mode 100644
index 000000000..f26e00a5e
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/field_types.h
@@ -0,0 +1,67 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_FIELD_TYPES_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_FIELD_TYPES_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/parser_config.h"
+
+/**
+ * Function defining the conversion for each value.
+ *
+ * This function must support unaligned memory access. As of now, there is
+ * no special error handling (in whatever form): We assume that it is always
+ * reasonable to raise a `ValueError` noting the string that failed to be
+ * converted.
+ *
+ * NOTE: An earlier version of the code had unused default values (pandas
+ * does this) when columns are missing. We could define this either
+ * by passing `NULL` in, or by adding a default explicitly somewhere.
+ * (I think users should probably have to define the default, at which
+ * point it doesn't matter here.)
+ *
+ * NOTE: We are currently passing the parser config, this could be made public
+ * or could be set up to be dtype specific/private. Always passing
+ * pconfig fully seems easier right now even if it may change.
+ * (A future use-case may for example be user-specified strings that are
+ * considered boolean True or False).
+ *
+ * TODO: Aside from nailing down the above notes, it may be nice to expose
+ * these function publically. This could allow user DTypes to provide
+ * a converter or custom converters written in C rather than Python.
+ *
+ * @param descr The NumPy descriptor of the field (may be byte-swapped, etc.)
+ * @param str Pointer to the beginning of the UCS4 string to be parsed.
+ * @param end Pointer to the end of the UCS4 string. This value is currently
+ * guaranteed to be `\0`, ensuring that parsers can rely on
+ * nul-termination.
+ * @param dataptr The pointer where to store the parsed value
+ * @param pconfig Additional configuration for the parser.
+ * @returns 0 on success and -1 on failure. If the return value is -1 an
+ * error may or may not be set. If an error is set, it is chained
+ * behind the generic ValueError.
+ */
+typedef int (set_from_ucs4_function)(
+ PyArray_Descr *descr, const Py_UCS4 *str, const Py_UCS4 *end,
+ char *dataptr, parser_config *pconfig);
+
+typedef struct _field_type {
+ set_from_ucs4_function *set_from_ucs4;
+ /* The original NumPy descriptor */
+ PyArray_Descr *descr;
+ /* Offset to this entry within row. */
+ npy_intp structured_offset;
+} field_type;
+
+
+NPY_NO_EXPORT void
+field_types_xclear(int num_field_types, field_type *ft);
+
+NPY_NO_EXPORT npy_intp
+field_types_create(PyArray_Descr *descr, field_type **ft);
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_FIELD_TYPES_H_ */
diff --git a/numpy/core/src/multiarray/textreading/growth.c b/numpy/core/src/multiarray/textreading/growth.c
new file mode 100644
index 000000000..49a09d572
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/growth.c
@@ -0,0 +1,47 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "templ_common.h"
+
+/*
+ * Helper function taking the size input and growing it (based on min_grow).
+ * The current scheme is a minimum growth and a general growth by 25%
+ * overallocation. This is then capped at 2**20 elements, as that propels us
+ * in the range of large page sizes (so it is presumably more than enough).
+ *
+ * It further multiplies it with `itemsize` and ensures that all results fit
+ * into an `npy_intp`.
+ * Returns -1 if any overflow occurred or the result would not fit.
+ * The user has to ensure the input is ssize_t but not negative.
+ */
+NPY_NO_EXPORT npy_intp
+grow_size_and_multiply(npy_intp *size, npy_intp min_grow, npy_intp itemsize) {
+ /* min_grow must be a power of two: */
+ assert((min_grow & (min_grow - 1)) == 0);
+ npy_uintp new_size = (npy_uintp)*size;
+ npy_intp growth = *size >> 2;
+ if (growth <= min_grow) {
+ /* can never lead to overflow if we are using min_growth */
+ new_size += min_grow;
+ }
+ else {
+ if (growth > 1 << 20) {
+ /* limit growth to order of MiB (even hugepages are not larger) */
+ growth = 1 << 20;
+ }
+ new_size += growth + min_grow - 1;
+ new_size &= ~min_grow;
+
+ if (new_size > NPY_MAX_INTP) {
+ return -1;
+ }
+ }
+ *size = (npy_intp)new_size;
+ npy_intp alloc_size;
+ if (npy_mul_with_overflow_intp(&alloc_size, (npy_intp)new_size, itemsize)) {
+ return -1;
+ }
+ return alloc_size;
+}
+
diff --git a/numpy/core/src/multiarray/textreading/growth.h b/numpy/core/src/multiarray/textreading/growth.h
new file mode 100644
index 000000000..237b77ad3
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/growth.h
@@ -0,0 +1,7 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_GROWTH_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_GROWTH_H_
+
+NPY_NO_EXPORT npy_intp
+grow_size_and_multiply(npy_intp *size, npy_intp min_grow, npy_intp itemsize);
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_GROWTH_H_ */
diff --git a/numpy/core/src/multiarray/textreading/parser_config.h b/numpy/core/src/multiarray/textreading/parser_config.h
new file mode 100644
index 000000000..00e911667
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/parser_config.h
@@ -0,0 +1,61 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_PARSER_CONFIG_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_PARSER_CONFIG_H_
+
+#include <stdbool.h>
+
+typedef struct {
+ /*
+ * Field delimiter character.
+ * Typically ',', ' ', '\t', ignored if `delimiter_is_whitespace` is true.
+ */
+ Py_UCS4 delimiter;
+
+ /*
+ * Character used to quote fields.
+ * Typically '"' or "'". To disable quoting we set this to UINT_MAX
+ * (which is not a valid unicode character and thus cannot occur in the
+ * file; the same is used for all other characters if necessary).
+ */
+ Py_UCS4 quote;
+
+ /*
+ * Character(s) that indicates the start of a comment.
+ * Typically '#', '%' or ';'.
+ * When encountered in a line and not inside quotes, all character
+ * from the comment character(s) to the end of the line are ignored.
+ */
+ Py_UCS4 comment;
+
+ /*
+ * Ignore whitespace at the beginning of a field (outside/before quotes).
+ * Is (and must be) set if `delimiter_is_whitespace`.
+ */
+ bool ignore_leading_whitespace;
+
+ /*
+ * If true, the delimiter is ignored and any unicode whitespace is used
+ * for splitting (same as `string.split()` in Python). In that case
+ * `ignore_leading_whitespace` should also be set.
+ */
+ bool delimiter_is_whitespace;
+
+ /*
+ * The imaginary unit character. Default is `j`.
+ */
+ Py_UCS4 imaginary_unit;
+
+ /*
+ * Data should be encoded as `latin1` when using python converter
+ * (implementing `loadtxt` default Python 2 compatibility mode).
+ * The c byte converter is used when the user requested `dtype="S"`.
+ * In this case we go via `dtype=object`, however, loadtxt allows latin1
+ * while normal object to string casts only accept ASCII, so it ensures
+ * that that the object array already contains bytes and not strings.
+ */
+ bool python_byte_converters;
+ bool c_byte_converters;
+} parser_config;
+
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_PARSER_CONFIG_H_ */
diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
new file mode 100644
index 000000000..7af5ee891
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -0,0 +1,312 @@
+#include <stdio.h>
+#include <stdbool.h>
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "npy_argparse.h"
+#include "common.h"
+#include "conversion_utils.h"
+
+#include "textreading/parser_config.h"
+#include "textreading/stream_pyobject.h"
+#include "textreading/field_types.h"
+#include "textreading/rows.h"
+#include "textreading/str_to_int.h"
+
+
+//
+// `usecols` must point to a Python object that is Py_None or a 1-d contiguous
+// numpy array with data type int32.
+//
+// `dtype` must point to a Python object that is Py_None or a numpy dtype
+// instance. If the latter, code and sizes must be arrays of length
+// num_dtype_fields, holding the flattened data field type codes and byte
+// sizes. (num_dtype_fields, codes, and sizes can be inferred from dtype,
+// but we do that in Python code.)
+//
+// If both `usecols` and `dtype` are not None, and the data type is compound,
+// then len(usecols) must equal num_dtype_fields.
+//
+// If `dtype` is given and it is compound, and `usecols` is None, then the
+// number of columns in the file must match the number of fields in `dtype`.
+//
+static PyObject *
+_readtext_from_stream(stream *s,
+ parser_config *pc, Py_ssize_t num_usecols, Py_ssize_t usecols[],
+ Py_ssize_t skiplines, Py_ssize_t max_rows,
+ PyObject *converters, PyObject *dtype)
+{
+ PyArrayObject *arr = NULL;
+ PyArray_Descr *out_dtype = NULL;
+ field_type *ft = NULL;
+
+ /*
+ * If dtypes[0] is dtype the input was not structured and the result
+ * is considered "homogeneous" and we have to discover the number of
+ * columns/
+ */
+ out_dtype = (PyArray_Descr *)dtype;
+ Py_INCREF(out_dtype);
+
+ Py_ssize_t num_fields = field_types_create(out_dtype, &ft);
+ if (num_fields < 0) {
+ goto finish;
+ }
+ bool homogeneous = num_fields == 1 && ft[0].descr == out_dtype;
+
+ if (!homogeneous && usecols != NULL && num_usecols != num_fields) {
+ PyErr_Format(PyExc_TypeError,
+ "If a structured dtype is used, the number of columns in "
+ "`usecols` must match the effective number of fields. "
+ "But %zd usecols were given and the number of fields is %zd.",
+ num_usecols, num_fields);
+ goto finish;
+ }
+
+ arr = read_rows(
+ s, max_rows, num_fields, ft, pc,
+ num_usecols, usecols, skiplines, converters,
+ NULL, out_dtype, homogeneous);
+ if (arr == NULL) {
+ goto finish;
+ }
+
+ finish:
+ Py_XDECREF(out_dtype);
+ field_types_xclear(num_fields, ft);
+ return (PyObject *)arr;
+}
+
+
+static int
+parse_control_character(PyObject *obj, Py_UCS4 *character)
+{
+ if (obj == Py_None) {
+ *character = (Py_UCS4)-1; /* character beyond unicode range */
+ return 1;
+ }
+ if (!PyUnicode_Check(obj) || PyUnicode_GetLength(obj) != 1) {
+ PyErr_Format(PyExc_TypeError,
+ "Text reading control character must be a single unicode "
+ "character or None; but got: %.100R", obj);
+ return 0;
+ }
+ *character = PyUnicode_READ_CHAR(obj, 0);
+ return 1;
+}
+
+
+/*
+ * A (somewhat verbose) check that none of the control characters match or are
+ * newline. Most of these combinations are completely fine, just weird or
+ * surprising.
+ * (I.e. there is an implicit priority for control characters, so if a comment
+ * matches a delimiter, it would just be a comment.)
+ * In theory some `delimiter=None` paths could have a "meaning", but let us
+ * assume that users are better of setting one of the control chars to `None`
+ * for clarity.
+ *
+ * This also checks that the control characters cannot be newlines.
+ */
+static int
+error_if_matching_control_characters(
+ Py_UCS4 delimiter, Py_UCS4 quote, Py_UCS4 comment)
+{
+ char *control_char1;
+ char *control_char2 = NULL;
+ if (comment != (Py_UCS4)-1) {
+ control_char1 = "comment";
+ if (comment == '\r' || comment == '\n') {
+ goto error;
+ }
+ else if (comment == quote) {
+ control_char2 = "quotechar";
+ goto error;
+ }
+ else if (comment == delimiter) {
+ control_char2 = "delimiter";
+ goto error;
+ }
+ }
+ if (quote != (Py_UCS4)-1) {
+ control_char1 = "quotechar";
+ if (quote == '\r' || quote == '\n') {
+ goto error;
+ }
+ else if (quote == delimiter) {
+ control_char2 = "delimiter";
+ goto error;
+ }
+ }
+ if (delimiter != (Py_UCS4)-1) {
+ control_char1 = "delimiter";
+ if (delimiter == '\r' || delimiter == '\n') {
+ goto error;
+ }
+ }
+ /* The above doesn't work with delimiter=None, which means "whitespace" */
+ if (delimiter == (Py_UCS4)-1) {
+ control_char1 = "delimiter";
+ if (Py_UNICODE_ISSPACE(comment)) {
+ control_char2 = "comment";
+ goto error;
+ }
+ else if (Py_UNICODE_ISSPACE(quote)) {
+ control_char2 = "quotechar";
+ goto error;
+ }
+ }
+ return 0;
+
+ error:
+ if (control_char2 != NULL) {
+ PyErr_Format(PyExc_TypeError,
+ "The values for control characters '%s' and '%s' are "
+ "incompatible",
+ control_char1, control_char2);
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "control character '%s' cannot be a newline (`\\r` or `\\n`).",
+ control_char1, control_char2);
+ }
+ return -1;
+}
+
+
+NPY_NO_EXPORT PyObject *
+_load_from_filelike(PyObject *NPY_UNUSED(mod),
+ PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
+{
+ PyObject *file;
+ Py_ssize_t skiplines = 0;
+ Py_ssize_t max_rows = -1;
+ PyObject *usecols_obj = Py_None;
+ PyObject *converters = Py_None;
+
+ PyObject *dtype = Py_None;
+ PyObject *encoding_obj = Py_None;
+ const char *encoding = NULL;
+
+ parser_config pc = {
+ .delimiter = ',',
+ .comment = '#',
+ .quote = '"',
+ .imaginary_unit = 'j',
+ .delimiter_is_whitespace = false,
+ .ignore_leading_whitespace = false,
+ .python_byte_converters = false,
+ .c_byte_converters = false,
+ };
+ bool filelike = true;
+
+ PyObject *arr = NULL;
+
+ NPY_PREPARE_ARGPARSER;
+ if (npy_parse_arguments("_load_from_filelike", args, len_args, kwnames,
+ "file", NULL, &file,
+ "|delimiter", &parse_control_character, &pc.delimiter,
+ "|comment", &parse_control_character, &pc.comment,
+ "|quote", &parse_control_character, &pc.quote,
+ "|imaginary_unit", &parse_control_character, &pc.imaginary_unit,
+ "|usecols", NULL, &usecols_obj,
+ "|skiplines", &PyArray_IntpFromPyIntConverter, &skiplines,
+ "|max_rows", &PyArray_IntpFromPyIntConverter, &max_rows,
+ "|converters", NULL, &converters,
+ "|dtype", NULL, &dtype,
+ "|encoding", NULL, &encoding_obj,
+ "|filelike", &PyArray_BoolConverter, &filelike,
+ "|byte_converters", &PyArray_BoolConverter, &pc.python_byte_converters,
+ "|c_byte_converters", PyArray_BoolConverter, &pc.c_byte_converters,
+ NULL, NULL, NULL) < 0) {
+ return NULL;
+ }
+
+ /* Reject matching control characters, they just rarely make sense anyway */
+ if (error_if_matching_control_characters(
+ pc.delimiter, pc.quote, pc.comment) < 0) {
+ return NULL;
+ }
+
+ if (pc.delimiter == (Py_UCS4)-1) {
+ pc.delimiter_is_whitespace = true;
+ /* Ignore leading whitespace to match `string.split(None)` */
+ pc.ignore_leading_whitespace = true;
+ }
+
+ if (!PyArray_DescrCheck(dtype) ) {
+ PyErr_SetString(PyExc_TypeError,
+ "internal error: dtype must be provided and be a NumPy dtype");
+ return NULL;
+ }
+
+ if (encoding_obj != Py_None) {
+ if (!PyUnicode_Check(encoding_obj)) {
+ PyErr_SetString(PyExc_TypeError,
+ "encoding must be a unicode string.");
+ return NULL;
+ }
+ encoding = PyUnicode_AsUTF8(encoding_obj);
+ if (encoding == NULL) {
+ return NULL;
+ }
+ }
+
+ /*
+ * Parse usecols, the rest of NumPy has no clear helper for this, so do
+ * it here manually.
+ */
+ Py_ssize_t num_usecols = -1;
+ Py_ssize_t *usecols = NULL;
+ if (usecols_obj != Py_None) {
+ num_usecols = PySequence_Length(usecols_obj);
+ if (num_usecols < 0) {
+ return NULL;
+ }
+ /* Calloc just to not worry about overflow */
+ usecols = PyMem_Calloc(num_usecols, sizeof(Py_ssize_t));
+ for (Py_ssize_t i = 0; i < num_usecols; i++) {
+ PyObject *tmp = PySequence_GetItem(usecols_obj, i);
+ if (tmp == NULL) {
+ PyMem_FREE(usecols);
+ return NULL;
+ }
+ usecols[i] = PyNumber_AsSsize_t(tmp, PyExc_OverflowError);
+ if (error_converting(usecols[i])) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+ PyErr_Format(PyExc_TypeError,
+ "usecols must be an int or a sequence of ints but "
+ "it contains at least one element of type '%s'",
+ Py_TYPE(tmp)->tp_name);
+ }
+ Py_DECREF(tmp);
+ PyMem_FREE(usecols);
+ return NULL;
+ }
+ Py_DECREF(tmp);
+ }
+ }
+
+ stream *s;
+ if (filelike) {
+ s = stream_python_file(file, encoding);
+ }
+ else {
+ s = stream_python_iterable(file, encoding);
+ }
+ if (s == NULL) {
+ PyMem_FREE(usecols);
+ return NULL;
+ }
+
+ arr = _readtext_from_stream(
+ s, &pc, num_usecols, usecols, skiplines, max_rows, converters, dtype);
+ stream_close(s);
+ PyMem_FREE(usecols);
+ return arr;
+}
+
diff --git a/numpy/core/src/multiarray/textreading/readtext.h b/numpy/core/src/multiarray/textreading/readtext.h
new file mode 100644
index 000000000..5cf48c555
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/readtext.h
@@ -0,0 +1,7 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_READTEXT_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_READTEXT_H_
+
+NPY_NO_EXPORT PyObject *
+_load_from_filelike(PyObject *self, PyObject *args, PyObject *kwargs);
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_READTEXT_H_ */
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
new file mode 100644
index 000000000..e30ff835e
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -0,0 +1,481 @@
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "numpy/npy_3kcompat.h"
+#include "alloc.h"
+
+#include <string.h>
+#include <stdbool.h>
+
+#include "textreading/stream.h"
+#include "textreading/tokenize.h"
+#include "textreading/conversions.h"
+#include "textreading/field_types.h"
+#include "textreading/rows.h"
+#include "textreading/growth.h"
+
+/*
+ * Minimum size to grow the allcoation by (or 25%). The 8KiB means the actual
+ * growths is within `8 KiB <= size < 16 KiB` (depending on the row size).
+ */
+#define MIN_BLOCK_SIZE (1 << 13)
+
+
+
+/*
+ * Create the array of converter functions from the Python converters.
+ */
+static PyObject **
+create_conv_funcs(
+ PyObject *converters, Py_ssize_t num_fields, const Py_ssize_t *usecols)
+{
+ assert(converters != Py_None);
+
+ PyObject **conv_funcs = PyMem_Calloc(num_fields, sizeof(PyObject *));
+ if (conv_funcs == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ if (PyCallable_Check(converters)) {
+ /* a single converter used for all columns individually */
+ for (Py_ssize_t i = 0; i < num_fields; i++) {
+ Py_INCREF(converters);
+ conv_funcs[i] = converters;
+ }
+ return conv_funcs;
+ }
+ else if (!PyDict_Check(converters)) {
+ PyErr_SetString(PyExc_TypeError,
+ "converters must be a dictionary mapping columns to converter "
+ "functions or a single callable.");
+ goto error;
+ }
+
+ PyObject *key, *value;
+ Py_ssize_t pos = 0;
+ while (PyDict_Next(converters, &pos, &key, &value)) {
+ Py_ssize_t column = PyNumber_AsSsize_t(key, PyExc_IndexError);
+ if (column == -1 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_TypeError,
+ "keys of the converters dictionary must be integers; "
+ "got %.100R", key);
+ goto error;
+ }
+ if (usecols != NULL) {
+ /*
+ * This code searches for the corresponding usecol. It is
+ * identical to the legacy usecols code, which has two weaknesses:
+ * 1. It fails for duplicated usecols only setting converter for
+ * the first one.
+ * 2. It fails e.g. if usecols uses negative indexing and
+ * converters does not. (This is a feature, since it allows
+ * us to correctly normalize converters to result column here.)
+ */
+ Py_ssize_t i = 0;
+ for (; i < num_fields; i++) {
+ if (column == usecols[i]) {
+ column = i;
+ break;
+ }
+ }
+ if (i == num_fields) {
+ continue; /* ignore unused converter */
+ }
+ }
+ else {
+ if (column < -num_fields || column >= num_fields) {
+ PyErr_Format(PyExc_ValueError,
+ "converter specified for column %zd, which is invalid "
+ "for the number of fields %d.", column, num_fields);
+ goto error;
+ }
+ if (column < 0) {
+ column += num_fields;
+ }
+ }
+ if (!PyCallable_Check(value)) {
+ PyErr_Format(PyExc_TypeError,
+ "values of the converters dictionary must be callable, "
+ "but the value associated with key %R is not", key);
+ goto error;
+ }
+ Py_INCREF(value);
+ conv_funcs[column] = value;
+ }
+ return conv_funcs;
+
+ error:
+ for (Py_ssize_t i = 0; i < num_fields; i++) {
+ Py_XDECREF(conv_funcs[i]);
+ }
+ PyMem_FREE(conv_funcs);
+ return NULL;
+}
+
+/**
+ * Read a file into the provided array, or create (and possibly grow) an
+ * array to read into.
+ *
+ * @param s The stream object/struct providing reading capabilities used by
+ * the tokenizer.
+ * @param max_rows The number of rows to read, or -1. If negative
+ * all rows are read.
+ * @param num_field_types The number of field types stored in `field_types`.
+ * @param field_types Information about the dtype for each column (or one if
+ * `homogeneous`).
+ * @param pconfig Pointer to the parser config object used by both the
+ * tokenizer and the conversion functions.
+ * @param num_usecols The number of columns in `usecols`.
+ * @param usecols An array of length `num_usecols` or NULL. If given indicates
+ * which column is read for each individual row (negative columns are
+ * accepted).
+ * @param skiplines The number of lines to skip, these lines are ignored.
+ * @param converters Python dictionary of converters. Finalizing converters
+ * is difficult without information about the number of columns.
+ * @param data_array An array to be filled or NULL. In either case a new
+ * reference is returned (the reference to `data_array` is not stolen).
+ * @param out_descr The dtype used for allocating a new array. This is not
+ * used if `data_array` is provided. Note that the actual dtype of the
+ * returned array can differ for strings.
+ * @param num_cols Pointer in which the actual (discovered) number of columns
+ * is returned. This is only relevant if `homogeneous` is true.
+ * @param homogeneous Whether the datatype of the array is not homogeneous,
+ * i.e. not structured. In this case the number of columns has to be
+ * discovered an the returned array will be 2-dimensional rather than
+ * 1-dimensional.
+ *
+ * @returns Returns the result as an array object or NULL on error. The result
+ * is always a new reference (even when `data_array` was passed in).
+ */
+NPY_NO_EXPORT PyArrayObject *
+read_rows(stream *s,
+ npy_intp max_rows, Py_ssize_t num_field_types, field_type *field_types,
+ parser_config *pconfig, Py_ssize_t num_usecols, Py_ssize_t *usecols,
+ Py_ssize_t skiplines, PyObject *converters,
+ PyArrayObject *data_array, PyArray_Descr *out_descr,
+ bool homogeneous)
+{
+ char *data_ptr = NULL;
+ Py_ssize_t current_num_fields;
+ npy_intp row_size = out_descr->elsize;
+ PyObject **conv_funcs = NULL;
+
+ bool needs_init = PyDataType_FLAGCHK(out_descr, NPY_NEEDS_INIT);
+
+ int ndim = homogeneous ? 2 : 1;
+ npy_intp result_shape[2] = {0, 1};
+
+ bool data_array_allocated = data_array == NULL;
+ /* Make sure we own `data_array` for the purpose of error handling */
+ Py_XINCREF(data_array);
+ size_t rows_per_block = 1; /* will be increased depending on row size */
+ npy_intp data_allocated_rows = 0;
+
+ /* We give a warning if max_rows is used and an empty line is encountered */
+ bool give_empty_row_warning = max_rows >= 0;
+
+ int ts_result = 0;
+ tokenizer_state ts;
+ if (tokenizer_init(&ts, pconfig) < 0) {
+ goto error;
+ }
+
+ /* Set the actual number of fields if it is already known, otherwise -1 */
+ Py_ssize_t actual_num_fields = -1;
+ if (usecols != NULL) {
+ assert(homogeneous || num_field_types == num_usecols);
+ actual_num_fields = num_usecols;
+ }
+ else if (!homogeneous) {
+ assert(usecols == NULL || num_field_types == num_usecols);
+ actual_num_fields = num_field_types;
+ }
+
+ for (Py_ssize_t i = 0; i < skiplines; i++) {
+ ts.state = TOKENIZE_GOTO_LINE_END;
+ ts_result = tokenize(s, &ts, pconfig);
+ if (ts_result < 0) {
+ goto error;
+ }
+ else if (ts_result != 0) {
+ /* Fewer lines than skiplines is acceptable */
+ break;
+ }
+ }
+
+ Py_ssize_t row_count = 0; /* number of rows actually processed */
+ while ((max_rows < 0 || row_count < max_rows) && ts_result == 0) {
+ ts_result = tokenize(s, &ts, pconfig);
+ if (ts_result < 0) {
+ goto error;
+ }
+ current_num_fields = ts.num_fields;
+ field_info *fields = ts.fields;
+ if (NPY_UNLIKELY(ts.num_fields == 0)) {
+ /*
+ * Deprecated NumPy 1.23, 2021-01-13 (not really a deprecation,
+ * but similar policy should apply to removing the warning again)
+ */
+ /* Tokenizer may give a final "empty line" even if there is none */
+ if (give_empty_row_warning && ts_result == 0) {
+ give_empty_row_warning = false;
+ if (PyErr_WarnFormat(PyExc_UserWarning, 3,
+ "Input line %zd contained no data and will not be "
+ "counted towards `max_rows=%zd`. This differs from "
+ "the behaviour in NumPy <=1.22 which counted lines "
+ "rather than rows. If desired, the previous behaviour "
+ "can be achieved by using `itertools.islice`.\n"
+ "Please see the 1.23 release notes for an example on "
+ "how to do this. If you wish to ignore this warning, "
+ "use `warnings.filterwarnings`. This warning is "
+ "expected to be removed in the future and is given "
+ "only once per `loadtxt` call.",
+ row_count + skiplines + 1, max_rows) < 0) {
+ goto error;
+ }
+ }
+ continue; /* Ignore empty line */
+ }
+
+ if (NPY_UNLIKELY(data_ptr == NULL)) {
+ // We've deferred some of the initialization tasks to here,
+ // because we've now read the first line, and we definitively
+ // know how many fields (i.e. columns) we will be processing.
+ if (actual_num_fields == -1) {
+ actual_num_fields = current_num_fields;
+ }
+
+ if (converters != Py_None) {
+ conv_funcs = create_conv_funcs(
+ converters, actual_num_fields, usecols);
+ if (conv_funcs == NULL) {
+ goto error;
+ }
+ }
+
+ /* Note that result_shape[1] is only used if homogeneous is true */
+ result_shape[1] = actual_num_fields;
+ if (homogeneous) {
+ row_size *= actual_num_fields;
+ }
+
+ if (data_array == NULL) {
+ if (max_rows < 0) {
+ /*
+ * Negative max_rows denotes to read the whole file, we
+ * approach this by allocating ever larger blocks.
+ * Adds a number of rows based on `MIN_BLOCK_SIZE`.
+ * Note: later code grows assuming this is a power of two.
+ */
+ if (row_size == 0) {
+ /* actual rows_per_block should not matter here */
+ rows_per_block = 512;
+ }
+ else {
+ /* safe on overflow since min_rows will be 0 or 1 */
+ size_t min_rows = (
+ (MIN_BLOCK_SIZE + row_size - 1) / row_size);
+ while (rows_per_block < min_rows) {
+ rows_per_block *= 2;
+ }
+ }
+ data_allocated_rows = rows_per_block;
+ }
+ else {
+ data_allocated_rows = max_rows;
+ }
+ result_shape[0] = data_allocated_rows;
+ Py_INCREF(out_descr);
+ /*
+ * We do not use Empty, as it would fill with None
+ * and requiring decref'ing if we shrink again.
+ */
+ data_array = (PyArrayObject *)PyArray_SimpleNewFromDescr(
+ ndim, result_shape, out_descr);
+#ifdef NPY_RELAXED_STRIDES_DEBUG
+ /* Incompatible with NPY_RELAXED_STRIDES_DEBUG due to growing */
+ if (result_shape[0] == 1) {
+ PyArray_STRIDES(data_array)[0] = row_size;
+ }
+#endif /* NPY_RELAXED_STRIDES_DEBUG */
+ if (data_array == NULL) {
+ goto error;
+ }
+ if (needs_init) {
+ memset(PyArray_BYTES(data_array), 0, PyArray_NBYTES(data_array));
+ }
+ }
+ else {
+ assert(max_rows >=0);
+ data_allocated_rows = max_rows;
+ }
+ data_ptr = PyArray_BYTES(data_array);
+ }
+
+ if (!usecols && (actual_num_fields != current_num_fields)) {
+ PyErr_Format(PyExc_ValueError,
+ "the number of columns changed from %d to %d at row %zu; "
+ "use `usecols` to select a subset and avoid this error",
+ actual_num_fields, current_num_fields, row_count+1);
+ goto error;
+ }
+
+ if (NPY_UNLIKELY(data_allocated_rows == row_count)) {
+ /*
+ * Grow by ~25% and rounded up to the next rows_per_block
+ * NOTE: This is based on very crude timings and could be refined!
+ */
+ npy_intp new_rows = data_allocated_rows;
+ npy_intp alloc_size = grow_size_and_multiply(
+ &new_rows, rows_per_block, row_size);
+ if (alloc_size < 0) {
+ /* should normally error much earlier, but make sure */
+ PyErr_SetString(PyExc_ValueError,
+ "array is too big. Cannot read file as a single array; "
+ "providing a maximum number of rows to read may help.");
+ goto error;
+ }
+
+ char *new_data = PyDataMem_UserRENEW(
+ PyArray_BYTES(data_array), alloc_size ? alloc_size : 1,
+ PyArray_HANDLER(data_array));
+ if (new_data == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ /* Replace the arrays data since it may have changed */
+ ((PyArrayObject_fields *)data_array)->data = new_data;
+ ((PyArrayObject_fields *)data_array)->dimensions[0] = new_rows;
+ data_ptr = new_data + row_count * row_size;
+ data_allocated_rows = new_rows;
+ if (needs_init) {
+ memset(data_ptr, '\0', (new_rows - row_count) * row_size);
+ }
+ }
+
+ for (Py_ssize_t i = 0; i < actual_num_fields; ++i) {
+ Py_ssize_t f; /* The field, either 0 (if homogeneous) or i. */
+ Py_ssize_t col; /* The column as read, remapped by usecols */
+ char *item_ptr;
+ if (homogeneous) {
+ f = 0;
+ item_ptr = data_ptr + i * field_types[0].descr->elsize;
+ }
+ else {
+ f = i;
+ item_ptr = data_ptr + field_types[f].structured_offset;
+ }
+
+ if (usecols == NULL) {
+ col = i;
+ }
+ else {
+ col = usecols[i];
+ if (col < 0) {
+ // Python-like column indexing: k = -1 means the last column.
+ col += current_num_fields;
+ }
+ if (NPY_UNLIKELY((col < 0) || (col >= current_num_fields))) {
+ PyErr_Format(PyExc_ValueError,
+ "invalid column index %d at row %zu with %d "
+ "columns",
+ usecols[i], current_num_fields, row_count+1);
+ goto error;
+ }
+ }
+
+ /*
+ * The following function calls represent the main "conversion"
+ * step, i.e. parsing the unicode string for each field and storing
+ * the result in the array.
+ */
+ int parser_res;
+ Py_UCS4 *str = ts.field_buffer + fields[col].offset;
+ Py_UCS4 *end = ts.field_buffer + fields[col + 1].offset - 1;
+ if (conv_funcs == NULL || conv_funcs[i] == NULL) {
+ parser_res = field_types[f].set_from_ucs4(field_types[f].descr,
+ str, end, item_ptr, pconfig);
+ }
+ else {
+ parser_res = to_generic_with_converter(field_types[f].descr,
+ str, end, item_ptr, pconfig, conv_funcs[i]);
+ }
+
+ if (NPY_UNLIKELY(parser_res < 0)) {
+ PyObject *exc, *val, *tb;
+ PyErr_Fetch(&exc, &val, &tb);
+
+ size_t length = end - str;
+ PyObject *string = PyUnicode_FromKindAndData(
+ PyUnicode_4BYTE_KIND, str, length);
+ if (string == NULL) {
+ npy_PyErr_ChainExceptions(exc, val, tb);
+ goto error;
+ }
+ PyErr_Format(PyExc_ValueError,
+ "could not convert string %.100R to %S at "
+ "row %zu, column %d.",
+ string, field_types[f].descr, row_count, col+1);
+ Py_DECREF(string);
+ npy_PyErr_ChainExceptionsCause(exc, val, tb);
+ goto error;
+ }
+ }
+
+ ++row_count;
+ data_ptr += row_size;
+ }
+
+ tokenizer_clear(&ts);
+ PyMem_FREE(conv_funcs);
+
+ if (data_array == NULL) {
+ assert(row_count == 0 && result_shape[0] == 0);
+ if (actual_num_fields == -1) {
+ /*
+ * We found no rows and have to discover the number of elements
+ * we have no choice but to guess 1.
+ * NOTE: It may make sense to move this outside of here to refine
+ * the behaviour where necessary.
+ */
+ result_shape[1] = 1;
+ }
+ else {
+ result_shape[1] = actual_num_fields;
+ }
+ Py_INCREF(out_descr);
+ data_array = (PyArrayObject *)PyArray_Empty(
+ ndim, result_shape, out_descr, 0);
+ }
+
+ /*
+ * Note that if there is no data, `data_array` may still be NULL and
+ * row_count is 0. In that case, always realloc just in case.
+ */
+ if (data_array_allocated && data_allocated_rows != row_count) {
+ size_t size = row_count * row_size;
+ char *new_data = PyDataMem_UserRENEW(
+ PyArray_BYTES(data_array), size ? size : 1,
+ PyArray_HANDLER(data_array));
+ if (new_data == NULL) {
+ Py_DECREF(data_array);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ ((PyArrayObject_fields *)data_array)->data = new_data;
+ ((PyArrayObject_fields *)data_array)->dimensions[0] = row_count;
+ }
+
+ return data_array;
+
+ error:
+ PyMem_FREE(conv_funcs);
+ tokenizer_clear(&ts);
+ Py_XDECREF(data_array);
+ return NULL;
+}
diff --git a/numpy/core/src/multiarray/textreading/rows.h b/numpy/core/src/multiarray/textreading/rows.h
new file mode 100644
index 000000000..20eb9e186
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/rows.h
@@ -0,0 +1,22 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_ROWS_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_ROWS_H_
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdio.h>
+
+#include "textreading/stream.h"
+#include "textreading/field_types.h"
+#include "textreading/parser_config.h"
+
+
+NPY_NO_EXPORT PyArrayObject *
+read_rows(stream *s,
+ npy_intp nrows, Py_ssize_t num_field_types, field_type *field_types,
+ parser_config *pconfig, Py_ssize_t num_usecols, Py_ssize_t *usecols,
+ Py_ssize_t skiplines, PyObject *converters,
+ PyArrayObject *data_array, PyArray_Descr *out_descr,
+ bool homogeneous);
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_ROWS_H_ */
diff --git a/numpy/core/src/multiarray/textreading/str_to_int.c b/numpy/core/src/multiarray/textreading/str_to_int.c
new file mode 100644
index 000000000..11b03e31c
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/str_to_int.c
@@ -0,0 +1,67 @@
+
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "lowlevel_strided_loops.h"
+
+#include <string.h>
+#include "textreading/str_to_int.h"
+#include "textreading/parser_config.h"
+
+
+#define DECLARE_TO_INT(intw, INT_MIN, INT_MAX, byteswap_unaligned) \
+ NPY_NO_EXPORT int \
+ to_##intw(PyArray_Descr *descr, \
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr, \
+ parser_config *pconfig) \
+ { \
+ int64_t parsed; \
+ intw##_t x; \
+ \
+ if (str_to_int64(str, end, INT_MIN, INT_MAX, &parsed) < 0) { \
+ return -1; \
+ } \
+ else { \
+ x = (intw##_t)parsed; \
+ } \
+ memcpy(dataptr, &x, sizeof(x)); \
+ if (!PyArray_ISNBO(descr->byteorder)) { \
+ byteswap_unaligned(dataptr); \
+ } \
+ return 0; \
+ }
+
+#define DECLARE_TO_UINT(uintw, UINT_MAX, byteswap_unaligned) \
+ NPY_NO_EXPORT int \
+ to_##uintw(PyArray_Descr *descr, \
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr, \
+ parser_config *pconfig) \
+ { \
+ uint64_t parsed; \
+ uintw##_t x; \
+ \
+ if (str_to_uint64(str, end, UINT_MAX, &parsed) < 0) { \
+ return -1; \
+ } \
+ else { \
+ x = (uintw##_t)parsed; \
+ } \
+ memcpy(dataptr, &x, sizeof(x)); \
+ if (!PyArray_ISNBO(descr->byteorder)) { \
+ byteswap_unaligned(dataptr); \
+ } \
+ return 0; \
+ }
+
+#define byteswap_nothing(ptr)
+
+DECLARE_TO_INT(int8, INT8_MIN, INT8_MAX, byteswap_nothing)
+DECLARE_TO_INT(int16, INT16_MIN, INT16_MAX, npy_bswap2_unaligned)
+DECLARE_TO_INT(int32, INT32_MIN, INT32_MAX, npy_bswap4_unaligned)
+DECLARE_TO_INT(int64, INT64_MIN, INT64_MAX, npy_bswap8_unaligned)
+
+DECLARE_TO_UINT(uint8, UINT8_MAX, byteswap_nothing)
+DECLARE_TO_UINT(uint16, UINT16_MAX, npy_bswap2_unaligned)
+DECLARE_TO_UINT(uint32, UINT32_MAX, npy_bswap4_unaligned)
+DECLARE_TO_UINT(uint64, UINT64_MAX, npy_bswap8_unaligned)
diff --git a/numpy/core/src/multiarray/textreading/str_to_int.h b/numpy/core/src/multiarray/textreading/str_to_int.h
new file mode 100644
index 000000000..a0a89a0ef
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/str_to_int.h
@@ -0,0 +1,174 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STR_TO_INT_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STR_TO_INT_H_
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/parser_config.h"
+
+
+/*
+ * The following two string conversion functions are largely equivalent
+ * in Pandas. They are in the header file here, to ensure they can be easily
+ * inline in the other function.
+ * Unlike pandas, pass in end-pointer (do not rely on \0) and return 0 or -1.
+ *
+ * The actual functions are defined using macro templating below.
+ */
+NPY_FINLINE int
+str_to_int64(
+ const Py_UCS4 *p_item, const Py_UCS4 *p_end,
+ int64_t int_min, int64_t int_max, int64_t *result)
+{
+ const Py_UCS4 *p = (const Py_UCS4 *)p_item;
+ bool isneg = 0;
+ int64_t number = 0;
+
+ // Skip leading spaces.
+ while (Py_UNICODE_ISSPACE(*p)) {
+ ++p;
+ }
+
+ // Handle sign.
+ if (*p == '-') {
+ isneg = true;
+ ++p;
+ }
+ else if (*p == '+') {
+ p++;
+ }
+
+ // Check that there is a first digit.
+ if (!isdigit(*p)) {
+ return -1;
+ }
+
+ if (isneg) {
+ // If number is greater than pre_min, at least one more digit
+ // can be processed without overflowing.
+ int dig_pre_min = -(int_min % 10);
+ int64_t pre_min = int_min / 10;
+
+ // Process the digits.
+ int d = *p;
+ while (isdigit(d)) {
+ if ((number > pre_min) || ((number == pre_min) && (d - '0' <= dig_pre_min))) {
+ number = number * 10 - (d - '0');
+ d = *++p;
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+ else {
+ // If number is less than pre_max, at least one more digit
+ // can be processed without overflowing.
+ int64_t pre_max = int_max / 10;
+ int dig_pre_max = int_max % 10;
+
+ // Process the digits.
+ int d = *p;
+ while (isdigit(d)) {
+ if ((number < pre_max) || ((number == pre_max) && (d - '0' <= dig_pre_max))) {
+ number = number * 10 + (d - '0');
+ d = *++p;
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+
+ // Skip trailing spaces.
+ while (Py_UNICODE_ISSPACE(*p)) {
+ ++p;
+ }
+
+ // Did we use up all the characters?
+ if (p != p_end) {
+ return -1;
+ }
+
+ *result = number;
+ return 0;
+}
+
+
+NPY_FINLINE int
+str_to_uint64(
+ const Py_UCS4 *p_item, const Py_UCS4 *p_end,
+ uint64_t uint_max, uint64_t *result)
+{
+ const Py_UCS4 *p = (const Py_UCS4 *)p_item;
+ uint64_t number = 0;
+ int d;
+
+ // Skip leading spaces.
+ while (Py_UNICODE_ISSPACE(*p)) {
+ ++p;
+ }
+
+ // Handle sign.
+ if (*p == '-') {
+ return -1;
+ }
+ if (*p == '+') {
+ p++;
+ }
+
+ // Check that there is a first digit.
+ if (!isdigit(*p)) {
+ return -1;
+ }
+
+ // If number is less than pre_max, at least one more digit
+ // can be processed without overflowing.
+ uint64_t pre_max = uint_max / 10;
+ int dig_pre_max = uint_max % 10;
+
+ // Process the digits.
+ d = *p;
+ while (isdigit(d)) {
+ if ((number < pre_max) || ((number == pre_max) && (d - '0' <= dig_pre_max))) {
+ number = number * 10 + (d - '0');
+ d = *++p;
+ }
+ else {
+ return -1;
+ }
+ }
+
+ // Skip trailing spaces.
+ while (Py_UNICODE_ISSPACE(*p)) {
+ ++p;
+ }
+
+ // Did we use up all the characters?
+ if (p != p_end) {
+ return -1;
+ }
+
+ *result = number;
+ return 0;
+}
+
+
+#define DECLARE_TO_INT_PROTOTYPE(intw) \
+ NPY_NO_EXPORT int \
+ to_##intw(PyArray_Descr *descr, \
+ const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr, \
+ parser_config *pconfig);
+
+DECLARE_TO_INT_PROTOTYPE(int8)
+DECLARE_TO_INT_PROTOTYPE(int16)
+DECLARE_TO_INT_PROTOTYPE(int32)
+DECLARE_TO_INT_PROTOTYPE(int64)
+
+DECLARE_TO_INT_PROTOTYPE(uint8)
+DECLARE_TO_INT_PROTOTYPE(uint16)
+DECLARE_TO_INT_PROTOTYPE(uint32)
+DECLARE_TO_INT_PROTOTYPE(uint64)
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STR_TO_INT_H_ */
diff --git a/numpy/core/src/multiarray/textreading/stream.h b/numpy/core/src/multiarray/textreading/stream.h
new file mode 100644
index 000000000..59bd14074
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/stream.h
@@ -0,0 +1,41 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_H_
+
+#include <stdint.h>
+
+/*
+ * When getting the next line, we hope that the buffer provider can already
+ * give some information about the newlines, because for Python iterables
+ * we definitely expect to get line-by-line buffers.
+ *
+ * BUFFER_IS_FILEEND must be returned when the end of the file is reached and
+ * must NOT be returned together with a valid (non-empty) buffer.
+ */
+#define BUFFER_MAY_CONTAIN_NEWLINE 0
+#define BUFFER_IS_LINEND 1
+#define BUFFER_IS_FILEEND 2
+
+/*
+ * Base struct for streams. We currently have two, a chunked reader for
+ * filelikes and a line-by-line for any iterable.
+ * As of writing, the chunked reader was only used for filelikes not already
+ * opened. That is to preserve the amount read in case of an error exactly.
+ * If we drop this, we could read it more often (but not when `max_rows` is
+ * used).
+ *
+ * The "streams" can extend this struct to store their own data (so it is
+ * a very lightweight "object").
+ */
+typedef struct _stream {
+ int (*stream_nextbuf)(void *sdata, char **start, char **end, int *kind);
+ // Note that the first argument to stream_close is the stream pointer
+ // itself, not the stream_data pointer.
+ int (*stream_close)(struct _stream *strm);
+} stream;
+
+
+#define stream_nextbuf(s, start, end, kind) \
+ ((s)->stream_nextbuf((s), start, end, kind))
+#define stream_close(s) ((s)->stream_close((s)))
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_H_ */
diff --git a/numpy/core/src/multiarray/textreading/stream_pyobject.c b/numpy/core/src/multiarray/textreading/stream_pyobject.c
new file mode 100644
index 000000000..6f84ff01d
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/stream_pyobject.c
@@ -0,0 +1,239 @@
+/*
+ * C side structures to provide capabilities to read Python file like objects
+ * in chunks, or iterate through iterables with each result representing a
+ * single line of a file.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+
+#include "textreading/stream.h"
+
+#define READ_CHUNKSIZE 1 << 14
+
+
+typedef struct {
+ stream stream;
+ /* The Python file object being read. */
+ PyObject *file;
+
+ /* The `read` attribute of the file object. */
+ PyObject *read;
+ /* Amount to read each time we call `obj.read()` */
+ PyObject *chunksize;
+
+ /* Python str object holding the line most recently read from the file. */
+ PyObject *chunk;
+
+ /* Encoding compatible with Python's `PyUnicode_Encode` (may be NULL) */
+ const char *encoding;
+} python_chunks_from_file;
+
+
+/*
+ * Helper function to support byte objects as well as unicode strings.
+ *
+ * NOTE: Steals a reference to `str` (although usually returns it unmodified).
+ */
+static NPY_INLINE PyObject *
+process_stringlike(PyObject *str, const char *encoding)
+{
+ if (PyBytes_Check(str)) {
+ PyObject *ustr;
+ ustr = PyUnicode_FromEncodedObject(str, encoding, NULL);
+ if (ustr == NULL) {
+ return NULL;
+ }
+ Py_DECREF(str);
+ return ustr;
+ }
+ else if (!PyUnicode_Check(str)) {
+ PyErr_SetString(PyExc_TypeError,
+ "non-string returned while reading data");
+ Py_DECREF(str);
+ return NULL;
+ }
+ return str;
+}
+
+
+static NPY_INLINE void
+buffer_info_from_unicode(PyObject *str, char **start, char **end, int *kind)
+{
+ Py_ssize_t length = PyUnicode_GET_LENGTH(str);
+ *kind = PyUnicode_KIND(str);
+
+ if (*kind == PyUnicode_1BYTE_KIND) {
+ *start = (char *)PyUnicode_1BYTE_DATA(str);
+ }
+ else if (*kind == PyUnicode_2BYTE_KIND) {
+ *start = (char *)PyUnicode_2BYTE_DATA(str);
+ length *= sizeof(Py_UCS2);
+ }
+ else if (*kind == PyUnicode_4BYTE_KIND) {
+ *start = (char *)PyUnicode_4BYTE_DATA(str);
+ length *= sizeof(Py_UCS4);
+ }
+ *end = *start + length;
+}
+
+
+static int
+fb_nextbuf(python_chunks_from_file *fb, char **start, char **end, int *kind)
+{
+ Py_XDECREF(fb->chunk);
+ fb->chunk = NULL;
+
+ PyObject *chunk = PyObject_CallFunctionObjArgs(fb->read, fb->chunksize, NULL);
+ if (chunk == NULL) {
+ return -1;
+ }
+ fb->chunk = process_stringlike(chunk, fb->encoding);
+ if (fb->chunk == NULL) {
+ return -1;
+ }
+ buffer_info_from_unicode(fb->chunk, start, end, kind);
+ if (*start == *end) {
+ return BUFFER_IS_FILEEND;
+ }
+ return BUFFER_MAY_CONTAIN_NEWLINE;
+}
+
+
+static int
+fb_del(stream *strm)
+{
+ python_chunks_from_file *fb = (python_chunks_from_file *)strm;
+
+ Py_XDECREF(fb->file);
+ Py_XDECREF(fb->read);
+ Py_XDECREF(fb->chunksize);
+ Py_XDECREF(fb->chunk);
+
+ PyMem_FREE(strm);
+
+ return 0;
+}
+
+
+NPY_NO_EXPORT stream *
+stream_python_file(PyObject *obj, const char *encoding)
+{
+ python_chunks_from_file *fb;
+
+ fb = (python_chunks_from_file *)PyMem_Calloc(1, sizeof(python_chunks_from_file));
+ if (fb == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ fb->stream.stream_nextbuf = (void *)&fb_nextbuf;
+ fb->stream.stream_close = &fb_del;
+
+ fb->encoding = encoding;
+ Py_INCREF(obj);
+ fb->file = obj;
+
+ fb->read = PyObject_GetAttrString(obj, "read");
+ if (fb->read == NULL) {
+ goto fail;
+ }
+ fb->chunksize = PyLong_FromLong(READ_CHUNKSIZE);
+ if (fb->chunksize == NULL) {
+ goto fail;
+ }
+
+ return (stream *)fb;
+
+fail:
+ fb_del((stream *)fb);
+ return NULL;
+}
+
+
+/*
+ * Stream from a Python iterable by interpreting each item as a line in a file
+ */
+typedef struct {
+ stream stream;
+ /* The Python file object being read. */
+ PyObject *iterator;
+
+ /* Python str object holding the line most recently fetched */
+ PyObject *line;
+
+ /* Encoding compatible with Python's `PyUnicode_Encode` (may be NULL) */
+ const char *encoding;
+} python_lines_from_iterator;
+
+
+static int
+it_del(stream *strm)
+{
+ python_lines_from_iterator *it = (python_lines_from_iterator *)strm;
+
+ Py_XDECREF(it->iterator);
+ Py_XDECREF(it->line);
+
+ PyMem_FREE(strm);
+ return 0;
+}
+
+
+static int
+it_nextbuf(python_lines_from_iterator *it, char **start, char **end, int *kind)
+{
+ Py_XDECREF(it->line);
+ it->line = NULL;
+
+ PyObject *line = PyIter_Next(it->iterator);
+ if (line == NULL) {
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ *start = NULL;
+ *end = NULL;
+ return BUFFER_IS_FILEEND;
+ }
+ it->line = process_stringlike(line, it->encoding);
+ if (it->line == NULL) {
+ return -1;
+ }
+
+ buffer_info_from_unicode(it->line, start, end, kind);
+ return BUFFER_IS_LINEND;
+}
+
+
+NPY_NO_EXPORT stream *
+stream_python_iterable(PyObject *obj, const char *encoding)
+{
+ python_lines_from_iterator *it;
+
+ if (!PyIter_Check(obj)) {
+ PyErr_SetString(PyExc_TypeError,
+ "error reading from object, expected an iterable.");
+ return NULL;
+ }
+
+ it = (python_lines_from_iterator *)PyMem_Calloc(1, sizeof(*it));
+ if (it == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ it->stream.stream_nextbuf = (void *)&it_nextbuf;
+ it->stream.stream_close = &it_del;
+
+ it->encoding = encoding;
+ Py_INCREF(obj);
+ it->iterator = obj;
+
+ return (stream *)it;
+}
diff --git a/numpy/core/src/multiarray/textreading/stream_pyobject.h b/numpy/core/src/multiarray/textreading/stream_pyobject.h
new file mode 100644
index 000000000..45c11dd95
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/stream_pyobject.h
@@ -0,0 +1,16 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_PYOBJECT_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_PYOBJECT_H_
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include "textreading/stream.h"
+
+NPY_NO_EXPORT stream *
+stream_python_file(PyObject *obj, const char *encoding);
+
+NPY_NO_EXPORT stream *
+stream_python_iterable(PyObject *obj, const char *encoding);
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_PYOBJECT_H_ */
diff --git a/numpy/core/src/multiarray/textreading/tokenize.c.src b/numpy/core/src/multiarray/textreading/tokenize.c.src
new file mode 100644
index 000000000..6ddba3345
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/tokenize.c.src
@@ -0,0 +1,457 @@
+
+#include <Python.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/stream.h"
+#include "textreading/tokenize.h"
+#include "textreading/parser_config.h"
+#include "textreading/growth.h"
+
+
+/*
+ How parsing quoted fields works:
+
+ For quoting to be activated, the first character of the field
+ must be the quote character (after taking into account
+ ignore_leading_spaces). While quoting is active, delimiters
+ are treated as regular characters, not delimiters. Quoting is
+ deactivated by the second occurrence of the quote character. An
+ exception is the occurrence of two consecutive quote characters,
+ which is treated as a literal occurrence of a single quote character.
+ E.g. (with delimiter=',' and quote='"'):
+ 12.3,"New York, NY","3'2"""
+ The second and third fields are `New York, NY` and `3'2"`.
+
+ If a non-delimiter occurs after the closing quote, the quote is
+ ignored and parsing continues with quoting deactivated. Quotes
+ that occur while quoting is not activated are not handled specially;
+ they become part of the data.
+ E.g:
+ 12.3,"ABC"DEF,XY"Z
+ The second and third fields are `ABCDEF` and `XY"Z`.
+
+ Note that the second field of
+ 12.3,"ABC" ,4.5
+ is `ABC `. Currently there is no option to ignore whitespace
+ at the end of a field.
+*/
+
+
+/**begin repeat
+ * #type = Py_UCS1, Py_UCS2, Py_UCS4#
+ */
+static NPY_INLINE int
+copy_to_field_buffer_@type@(tokenizer_state *ts,
+ const @type@ *chunk_start, const @type@ *chunk_end)
+{
+ npy_intp chunk_length = chunk_end - chunk_start;
+ npy_intp size = chunk_length + ts->field_buffer_pos + 2;
+
+ if (NPY_UNLIKELY(ts->field_buffer_length < size)) {
+ npy_intp alloc_size = grow_size_and_multiply(&size, 32, sizeof(Py_UCS4));
+ if (alloc_size < 0) {
+ PyErr_Format(PyExc_ValueError,
+ "line too long to handle while reading file.");
+ return -1;
+ }
+ Py_UCS4 *grown = PyMem_Realloc(ts->field_buffer, alloc_size);
+ if (grown == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ ts->field_buffer_length = size;
+ ts->field_buffer = grown;
+ }
+
+ Py_UCS4 *write_pos = ts->field_buffer + ts->field_buffer_pos;
+ for (; chunk_start < chunk_end; chunk_start++, write_pos++) {
+ *write_pos = (Py_UCS4)*chunk_start;
+ }
+ *write_pos = '\0'; /* always ensure we end with NUL */
+ ts->field_buffer_pos += chunk_length;
+ return 0;
+}
+/**end repeat**/
+
+
+static NPY_INLINE int
+add_field(tokenizer_state *ts)
+{
+ /* The previous field is done, advance to keep a NUL byte at the end */
+ ts->field_buffer_pos += 1;
+
+ if (NPY_UNLIKELY(ts->num_fields + 1 > ts->fields_size)) {
+ npy_intp size = ts->num_fields;
+
+ npy_intp alloc_size = grow_size_and_multiply(
+ &size, 4, sizeof(field_info));
+ if (alloc_size < 0) {
+ /* Check for a size overflow, path should be almost impossible. */
+ PyErr_Format(PyExc_ValueError,
+ "too many columns found; cannot read file.");
+ return -1;
+ }
+ field_info *fields = PyMem_Realloc(ts->fields, alloc_size);
+ if (fields == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ ts->fields = fields;
+ ts->fields_size = size;
+ }
+
+ ts->fields[ts->num_fields].offset = ts->field_buffer_pos;
+ ts->fields[ts->num_fields].quoted = false;
+ ts->num_fields += 1;
+ /* Ensure this (currently empty) word is NUL terminated. */
+ ts->field_buffer[ts->field_buffer_pos] = '\0';
+ return 0;
+}
+
+
+/**begin repeat
+ * #kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND, PyUnicode_4BYTE_KIND#
+ * #type = Py_UCS1, Py_UCS2, Py_UCS4#
+ */
+static NPY_INLINE int
+tokenizer_core_@type@(tokenizer_state *ts, parser_config *const config)
+{
+ @type@ *pos = (@type@ *)ts->pos;
+ @type@ *stop = (@type@ *)ts->end;
+ @type@ *chunk_start;
+
+ if (ts->state == TOKENIZE_CHECK_QUOTED) {
+ /* before we can check for quotes, strip leading whitespace */
+ if (config->ignore_leading_whitespace) {
+ while (pos < stop && Py_UNICODE_ISSPACE(*pos) &&
+ *pos != '\r' && *pos != '\n') {
+ pos++;
+ }
+ if (pos == stop) {
+ ts->pos = (char *)pos;
+ return 0;
+ }
+ }
+
+ /* Setting chunk effectively starts the field */
+ if (*pos == config->quote) {
+ ts->fields[ts->num_fields - 1].quoted = true;
+ ts->state = TOKENIZE_QUOTED;
+ pos++; /* TOKENIZE_QUOTED is OK with pos == stop */
+ }
+ else {
+ /* Set to TOKENIZE_QUOTED or TOKENIZE_QUOTED_WHITESPACE */
+ ts->state = ts->unquoted_state;
+ }
+ }
+
+ switch (ts->state) {
+ case TOKENIZE_UNQUOTED:
+ chunk_start = pos;
+ for (; pos < stop; pos++) {
+ if (*pos == '\r') {
+ ts->state = TOKENIZE_EAT_CRLF;
+ break;
+ }
+ else if (*pos == '\n') {
+ ts->state = TOKENIZE_LINE_END;
+ break;
+ }
+ else if (*pos == config->delimiter) {
+ ts->state = TOKENIZE_INIT;
+ break;
+ }
+ else if (*pos == config->comment) {
+ ts->state = TOKENIZE_GOTO_LINE_END;
+ break;
+ }
+ }
+ if (copy_to_field_buffer_@type@(ts, chunk_start, pos) < 0) {
+ return -1;
+ }
+ pos++;
+ break;
+
+ case TOKENIZE_UNQUOTED_WHITESPACE:
+ /* Note, this branch is largely identical to `TOKENIZE_UNQUOTED` */
+ chunk_start = pos;
+ for (; pos < stop; pos++) {
+ if (*pos == '\r') {
+ ts->state = TOKENIZE_EAT_CRLF;
+ break;
+ }
+ else if (*pos == '\n') {
+ ts->state = TOKENIZE_LINE_END;
+ break;
+ }
+ else if (Py_UNICODE_ISSPACE(*pos)) {
+ ts->state = TOKENIZE_INIT;
+ break;
+ }
+ else if (*pos == config->comment) {
+ ts->state = TOKENIZE_GOTO_LINE_END;
+ break;
+ }
+ }
+ if (copy_to_field_buffer_@type@(ts, chunk_start, pos) < 0) {
+ return -1;
+ }
+ pos++;
+ break;
+
+ case TOKENIZE_QUOTED:
+ chunk_start = pos;
+ for (; pos < stop; pos++) {
+ if (*pos == config->quote) {
+ ts->state = TOKENIZE_QUOTED_CHECK_DOUBLE_QUOTE;
+ break;
+ }
+ }
+ if (copy_to_field_buffer_@type@(ts, chunk_start, pos) < 0) {
+ return -1;
+ }
+ pos++;
+ break;
+
+ case TOKENIZE_QUOTED_CHECK_DOUBLE_QUOTE:
+ if (*pos == config->quote) {
+ /* Copy the quote character directly from the config: */
+ if (copy_to_field_buffer_Py_UCS4(ts,
+ &config->quote, &config->quote+1) < 0) {
+ return -1;
+ }
+ ts->state = TOKENIZE_QUOTED;
+ pos++;
+ }
+ else {
+ /* continue parsing as if unquoted */
+ ts->state = TOKENIZE_UNQUOTED;
+ }
+ break;
+
+ case TOKENIZE_GOTO_LINE_END:
+ if (ts->buf_state != BUFFER_MAY_CONTAIN_NEWLINE) {
+ pos = stop; /* advance to next buffer */
+ ts->state = TOKENIZE_LINE_END;
+ break;
+ }
+ for (; pos < stop; pos++) {
+ if (*pos == '\r') {
+ ts->state = TOKENIZE_EAT_CRLF;
+ break;
+ }
+ else if (*pos == '\n') {
+ ts->state = TOKENIZE_LINE_END;
+ break;
+ }
+ }
+ pos++;
+ break;
+
+ case TOKENIZE_EAT_CRLF:
+ /* "Universal newline" support: remove \n in \r\n. */
+ if (*pos == '\n') {
+ pos++;
+ }
+ ts->state = TOKENIZE_LINE_END;
+ break;
+
+ default:
+ assert(0);
+ }
+
+ ts->pos = (char *)pos;
+ return 0;
+}
+/**end repeat**/
+
+
+/*
+ * This tokenizer always copies the full "row" (all tokens). This makes
+ * two things easier:
+ * 1. It means that every word is guaranteed to be followed by a NUL character
+ * (although it can include one as well).
+ * 2. If usecols are used we can sniff the first row easier by parsing it
+ * fully. Further, usecols can be negative so we may not know which row we
+ * need up-front.
+ *
+ * The tokenizer could grow the ability to skip fields and check the
+ * maximum number of fields when known, it is unclear that this is worthwhile.
+ *
+ * Unlike some tokenizers, this one tries to work in chunks and copies
+ * data in chunks as well. The hope is that this makes multiple light-weight
+ * loops rather than a single heavy one, to allow e.g. quickly scanning for the
+ * end of a field. Copying chunks also means we usually only check once per
+ * field whether the buffer is large enough.
+ * Different choices are possible, this one seems to work well, though.
+ *
+ * The core (main part) of the tokenizer is specialized for the three Python
+ * unicode flavors UCS1, UCS2, and UCS4 as a worthwhile optimization.
+ */
+NPY_NO_EXPORT int
+tokenize(stream *s, tokenizer_state *ts, parser_config *const config)
+{
+ assert(ts->fields_size >= 2);
+ assert(ts->field_buffer_length >= 2*sizeof(Py_UCS4));
+
+ int finished_reading_file = 0;
+
+ /* Reset to start of buffer */
+ ts->field_buffer_pos = 0;
+ ts->num_fields = 0;
+
+ while (1) {
+ /*
+ * This loop adds new fields to the result (to make up a full row)
+ * until the row ends (typically a line end or the file end)
+ */
+ if (ts->state == TOKENIZE_INIT) {
+ /* Start a new field */
+ if (add_field(ts) < 0) {
+ return -1;
+ }
+ ts->state = TOKENIZE_CHECK_QUOTED;
+ }
+
+ if (NPY_UNLIKELY(ts->pos >= ts->end)) {
+ if (ts->buf_state == BUFFER_IS_LINEND &&
+ ts->state != TOKENIZE_QUOTED) {
+ /*
+ * Finished line, do not read anymore (also do not eat \n).
+ * If we are in a quoted field and the "line" does not end with
+ * a newline, the quoted field will not have it either.
+ * I.e. `np.loadtxt(['"a', 'b"'], dtype="S2", quotechar='"')`
+ * reads "ab". This matches `next(csv.reader(['"a', 'b"']))`.
+ */
+ break;
+ }
+ /* fetch new data */
+ ts->buf_state = stream_nextbuf(s,
+ &ts->pos, &ts->end, &ts->unicode_kind);
+ if (ts->buf_state < 0) {
+ return -1;
+ }
+ if (ts->buf_state == BUFFER_IS_FILEEND) {
+ finished_reading_file = 1;
+ ts->pos = ts->end; /* stream should ensure this. */
+ break;
+ }
+ else if (ts->pos == ts->end) {
+ /* This must be an empty line (and it must be indicated!). */
+ assert(ts->buf_state == BUFFER_IS_LINEND);
+ break;
+ }
+ }
+ int status;
+ if (ts->unicode_kind == PyUnicode_1BYTE_KIND) {
+ status = tokenizer_core_Py_UCS1(ts, config);
+ }
+ else if (ts->unicode_kind == PyUnicode_2BYTE_KIND) {
+ status = tokenizer_core_Py_UCS2(ts, config);
+ }
+ else {
+ assert(ts->unicode_kind == PyUnicode_4BYTE_KIND);
+ status = tokenizer_core_Py_UCS4(ts, config);
+ }
+ if (status < 0) {
+ return -1;
+ }
+
+ if (ts->state == TOKENIZE_LINE_END) {
+ break;
+ }
+ }
+
+ /*
+ * We have finished tokenizing a full row into fields, finalize result
+ */
+ if (ts->buf_state == BUFFER_IS_LINEND) {
+ /* This line is "finished", make sure we don't touch it again: */
+ ts->buf_state = BUFFER_MAY_CONTAIN_NEWLINE;
+ if (NPY_UNLIKELY(ts->pos < ts->end)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Found an unquoted embedded newline within a single line of "
+ "input. This is currently not supported.");
+ return -1;
+ }
+ }
+
+ /* Finish the last field (we "append" one to store the last ones length) */
+ if (add_field(ts) < 0) {
+ return -1;
+ }
+ ts->num_fields -= 1;
+
+ /*
+ * If have one field, but that field is completely empty, this is an
+ * empty line, and we just ignore it.
+ */
+ if (ts->num_fields == 1
+ && ts->fields[1].offset - ts->fields[0].offset == 1
+ && !ts->fields->quoted) {
+ ts->num_fields--;
+ }
+ ts->state = TOKENIZE_INIT;
+ return finished_reading_file;
+}
+
+
+NPY_NO_EXPORT void
+tokenizer_clear(tokenizer_state *ts)
+{
+ PyMem_FREE(ts->field_buffer);
+ ts->field_buffer = NULL;
+ ts->field_buffer_length = 0;
+
+ PyMem_FREE(ts->fields);
+ ts->fields = NULL;
+ ts->fields_size = 0;
+}
+
+
+/*
+ * Initialize the tokenizer. We may want to copy all important config
+ * variables into the tokenizer. This would improve the cache locality during
+ * tokenizing.
+ */
+NPY_NO_EXPORT int
+tokenizer_init(tokenizer_state *ts, parser_config *config)
+{
+ /* State and buf_state could be moved into tokenize if we go by row */
+ ts->buf_state = BUFFER_MAY_CONTAIN_NEWLINE;
+ ts->state = TOKENIZE_INIT;
+ if (config->delimiter_is_whitespace) {
+ ts->unquoted_state = TOKENIZE_UNQUOTED_WHITESPACE;
+ }
+ else {
+ ts->unquoted_state = TOKENIZE_UNQUOTED;
+ }
+ ts->num_fields = 0;
+
+ ts->buf_state = 0;
+ ts->pos = NULL;
+ ts->end = NULL;
+
+ ts->field_buffer = PyMem_Malloc(32 * sizeof(Py_UCS4));
+ if (ts->field_buffer == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ ts->field_buffer_length = 32;
+
+ ts->fields = PyMem_Malloc(4 * sizeof(*ts->fields));
+ if (ts->fields == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ ts->fields_size = 4;
+ return 0;
+}
diff --git a/numpy/core/src/multiarray/textreading/tokenize.h b/numpy/core/src/multiarray/textreading/tokenize.h
new file mode 100644
index 000000000..fa10bb9b0
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/tokenize.h
@@ -0,0 +1,78 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_
+
+#include <Python.h>
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/stream.h"
+#include "textreading/parser_config.h"
+
+
+typedef enum {
+ /* Initialization of fields */
+ TOKENIZE_INIT,
+ TOKENIZE_CHECK_QUOTED,
+ /* Main field parsing states */
+ TOKENIZE_UNQUOTED,
+ TOKENIZE_UNQUOTED_WHITESPACE,
+ TOKENIZE_QUOTED,
+ /* Handling of two character control sequences (except "\r\n") */
+ TOKENIZE_QUOTED_CHECK_DOUBLE_QUOTE,
+ /* Line end handling */
+ TOKENIZE_LINE_END,
+ TOKENIZE_EAT_CRLF, /* "\r\n" support (carriage return, line feed) */
+ TOKENIZE_GOTO_LINE_END,
+} tokenizer_parsing_state;
+
+
+typedef struct {
+ size_t offset;
+ bool quoted;
+} field_info;
+
+
+typedef struct {
+ tokenizer_parsing_state state;
+ /* Either TOKENIZE_UNQUOTED or TOKENIZE_UNQUOTED_WHITESPACE: */
+ tokenizer_parsing_state unquoted_state;
+ int unicode_kind;
+ int buf_state;
+ /* the buffer we are currently working on */
+ char *pos;
+ char *end;
+ /*
+ * Space to copy words into. The buffer must always be at least two NUL
+ * entries longer (8 bytes) than the actual word (including initially).
+ * The first byte beyond the current word is always NUL'ed on write, the
+ * second byte is there to allow easy appending of an additional empty
+ * word at the end (this word is also NUL terminated).
+ */
+ npy_intp field_buffer_length;
+ npy_intp field_buffer_pos;
+ Py_UCS4 *field_buffer;
+
+ /*
+ * Fields, including information about the field being quoted. This
+ * always includes one "additional" empty field. The length of a field
+ * is equal to `fields[i+1].offset - fields[i].offset - 1`.
+ *
+ * The tokenizer assumes at least one field is allocated.
+ */
+ npy_intp num_fields;
+ npy_intp fields_size;
+ field_info *fields;
+} tokenizer_state;
+
+
+NPY_NO_EXPORT void
+tokenizer_clear(tokenizer_state *ts);
+
+
+NPY_NO_EXPORT int
+tokenizer_init(tokenizer_state *ts, parser_config *config);
+
+NPY_NO_EXPORT int
+tokenize(stream *s, tokenizer_state *ts, parser_config *const config);
+
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_ */
diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src
index 933f75808..b4b060720 100644
--- a/numpy/core/src/npysort/quicksort.c.src
+++ b/numpy/core/src/npysort/quicksort.c.src
@@ -51,8 +51,14 @@
#include "npy_sort.h"
#include "npysort_common.h"
+#include "npy_cpu_features.h"
+#include "x86-qsort.h"
#include <stdlib.h>
+#ifndef NPY_DISABLE_OPTIMIZATION
+ #include "x86-qsort.dispatch.h"
+#endif
+
#define NOT_USED NPY_UNUSED(unused)
/*
* pushing largest partition has upper bound of log2(n) space
@@ -83,11 +89,22 @@
* npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
* npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta#
+ * #AVX512 = 0*5, 1, 1, 0*5, 1, 0*7#
*/
NPY_NO_EXPORT int
quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
{
+
+#if @AVX512@
+ void (*dispfunc)(void*, npy_intp) = NULL;
+ NPY_CPU_DISPATCH_CALL_XB(dispfunc = &x86_quicksort_@suff@);
+ if (dispfunc) {
+ (*dispfunc)(start, num);
+ return 0;
+ }
+#endif
+
@type@ vp;
@type@ *pl = start;
@type@ *pr = pl + num - 1;
diff --git a/numpy/core/src/npysort/x86-qsort.dispatch.c.src b/numpy/core/src/npysort/x86-qsort.dispatch.c.src
new file mode 100644
index 000000000..b93c737cb
--- /dev/null
+++ b/numpy/core/src/npysort/x86-qsort.dispatch.c.src
@@ -0,0 +1,587 @@
+/*@targets
+ * $maxopt $keep_baseline avx512_skx
+ */
+// policy $keep_baseline is used to avoid skip building avx512_skx
+// when its part of baseline features (--cpu-baseline), since
+// 'baseline' option isn't specified within targets.
+
+#include "x86-qsort.h"
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#ifdef NPY_HAVE_AVX512_SKX
+#include <immintrin.h>
+#include "numpy/npy_math.h"
+#include "npy_sort.h"
+#include "simd/simd.h"
+
+
+/*
+ * Quicksort using AVX-512 for int, uint32 and float. The ideas and code are
+ * based on these two research papers:
+ * (1) Fast and Robust Vectorized In-Place Sorting of Primitive Types
+ * https://drops.dagstuhl.de/opus/volltexte/2021/13775/
+ * (2) A Novel Hybrid Quicksort Algorithm Vectorized using AVX-512 on Intel Skylake
+ * https://arxiv.org/pdf/1704.08579.pdf
+ *
+ * High level idea: Vectorize the quicksort partitioning using AVX-512
+ * compressstore instructions. The algorithm to pick the pivot is to use median of
+ * 72 elements picked at random. If the array size is < 128, then use
+ * Bitonic sorting network. Good resource for bitonic sorting network:
+ * http://mitp-content-server.mit.edu:18180/books/content/sectbyfn?collid=books_pres_0&fn=Chapter%2027.pdf&id=8030
+ *
+ * Refer to https://github.com/numpy/numpy/pull/20133#issuecomment-958110340 for
+ * potential problems when converting this code to universal intrinsics framework.
+ */
+
+/*
+ * Constants used in sorting 16 elements in a ZMM registers. Based on Bitonic
+ * sorting network (see
+ * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
+ */
+#define NETWORK1 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1
+#define NETWORK2 12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3
+#define NETWORK3 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7
+#define NETWORK4 13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2
+#define NETWORK5 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+#define NETWORK6 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4
+#define NETWORK7 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8
+#define ZMM_MAX_FLOAT _mm512_set1_ps(NPY_INFINITYF)
+#define ZMM_MAX_UINT _mm512_set1_epi32(NPY_MAX_UINT32)
+#define ZMM_MAX_INT _mm512_set1_epi32(NPY_MAX_INT32)
+#define SHUFFLE_MASK(a,b,c,d) (a << 6) | (b << 4) | (c << 2) | d
+#define SHUFFLE_ps(ZMM, MASK) _mm512_shuffle_ps(zmm, zmm, MASK)
+#define SHUFFLE_epi32(ZMM, MASK) _mm512_shuffle_epi32(zmm, MASK)
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*
+ * Vectorized random number generator xoroshiro128+. Broken into 2 parts:
+ * (1) vnext generates 2 64-bit random integers
+ * (2) rnd_epu32 converts this to 4 32-bit random integers and bounds it to
+ * the length of the array
+ */
+#define VROTL(x, k) /* rotate each uint64_t value in vector */ \
+ _mm256_or_si256(_mm256_slli_epi64((x),(k)),_mm256_srli_epi64((x),64-(k)))
+
+static NPY_INLINE
+__m256i vnext(__m256i* s0, __m256i* s1) {
+ *s1 = _mm256_xor_si256(*s0, *s1); /* modify vectors s1 and s0 */
+ *s0 = _mm256_xor_si256(_mm256_xor_si256(VROTL(*s0, 24), *s1),
+ _mm256_slli_epi64(*s1, 16));
+ *s1 = VROTL(*s1, 37);
+ return _mm256_add_epi64(*s0, *s1); /* return random vector */
+}
+
+/* transform random numbers to the range between 0 and bound - 1 */
+static NPY_INLINE
+__m256i rnd_epu32(__m256i rnd_vec, __m256i bound) {
+ __m256i even = _mm256_srli_epi64(_mm256_mul_epu32(rnd_vec, bound), 32);
+ __m256i odd = _mm256_mul_epu32(_mm256_srli_epi64(rnd_vec, 32), bound);
+ return _mm256_blend_epi32(odd, even, 0b01010101);
+}
+
+/**begin repeat
+ *
+ * #TYPE = INT, UINT, FLOAT#
+ * #type = int, uint, float#
+ * #type_t = npy_int, npy_uint, npy_float#
+ * #zmm_t = __m512i, __m512i, __m512#
+ * #ymm_t = __m256i, __m256i, __m256#
+ * #vsuf1 = epi32, epu32, ps#
+ * #vsuf2 = epi32, epi32, ps#
+ * #vsuf3 = si512, si512, ps#
+ * #vsuf4 = s32, u32, f32#
+ * #CMP_GE_OP = _MM_CMPINT_NLT, _MM_CMPINT_NLT, _CMP_GE_OQ#
+ * #TYPE_MAX_VAL = NPY_MAX_INT32, NPY_MAX_UINT32, NPY_INFINITYF#
+ * #TYPE_MIN_VAL = NPY_MIN_INT32, 0, -NPY_INFINITYF#
+ */
+
+/*
+ * COEX == Compare and Exchange two registers by swapping min and max values
+ */
+#define COEX_ZMM_@vsuf1@(a, b) { \
+ @zmm_t@ temp = a; \
+ a = _mm512_min_@vsuf1@(a,b); \
+ b = _mm512_max_@vsuf1@(temp, b);} \
+
+#define COEX_YMM_@vsuf1@(a, b){ \
+ @ymm_t@ temp = a; \
+ a = _mm256_min_@vsuf1@(a, b); \
+ b = _mm256_max_@vsuf1@(temp, b);} \
+
+static NPY_INLINE
+@zmm_t@ cmp_merge_@vsuf1@(@zmm_t@ in1, @zmm_t@ in2, __mmask16 mask)
+{
+ @zmm_t@ min = _mm512_min_@vsuf1@(in2, in1);
+ @zmm_t@ max = _mm512_max_@vsuf1@(in2, in1);
+ return _mm512_mask_mov_@vsuf2@(min, mask, max); // 0 -> min, 1 -> max
+}
+
+/*
+ * Assumes zmm is random and performs a full sorting network defined in
+ * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
+ */
+static NPY_INLINE
+@zmm_t@ sort_zmm_@vsuf1@(@zmm_t@ zmm)
+{
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(0,1,2,3)), 0xCCCC);
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+ zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK3),zmm), 0xF0F0);
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(1,0,3,2)), 0xCCCC);
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+ zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5),zmm), 0xFF00);
+ zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK6),zmm), 0xF0F0);
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(1,0,3,2)), 0xCCCC);
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+ return zmm;
+}
+
+// Assumes zmm is bitonic and performs a recursive half cleaner
+static NPY_INLINE
+@zmm_t@ bitonic_merge_zmm_@vsuf1@(@zmm_t@ zmm)
+{
+ // 1) half_cleaner[16]: compare 1-9, 2-10, 3-11 etc ..
+ zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK7),zmm), 0xFF00);
+ // 2) half_cleaner[8]: compare 1-5, 2-6, 3-7 etc ..
+ zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK6),zmm), 0xF0F0);
+ // 3) half_cleaner[4]
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(1,0,3,2)), 0xCCCC);
+ // 3) half_cleaner[1]
+ zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+ return zmm;
+}
+
+// Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner
+static NPY_INLINE
+void bitonic_merge_two_zmm_@vsuf1@(@zmm_t@* zmm1, @zmm_t@* zmm2)
+{
+ // 1) First step of a merging network: coex of zmm1 and zmm2 reversed
+ *zmm2 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), *zmm2);
+ @zmm_t@ zmm3 = _mm512_min_@vsuf1@(*zmm1, *zmm2);
+ @zmm_t@ zmm4 = _mm512_max_@vsuf1@(*zmm1, *zmm2);
+ // 2) Recursive half cleaner for each
+ *zmm1 = bitonic_merge_zmm_@vsuf1@(zmm3);
+ *zmm2 = bitonic_merge_zmm_@vsuf1@(zmm4);
+}
+
+// Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive half cleaner
+static NPY_INLINE
+void bitonic_merge_four_zmm_@vsuf1@(@zmm_t@* zmm)
+{
+ @zmm_t@ zmm2r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[2]);
+ @zmm_t@ zmm3r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[3]);
+ @zmm_t@ zmm_t1 = _mm512_min_@vsuf1@(zmm[0], zmm3r);
+ @zmm_t@ zmm_t2 = _mm512_min_@vsuf1@(zmm[1], zmm2r);
+ @zmm_t@ zmm_t3 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[1], zmm2r));
+ @zmm_t@ zmm_t4 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[0], zmm3r));
+ @zmm_t@ zmm0 = _mm512_min_@vsuf1@(zmm_t1, zmm_t2);
+ @zmm_t@ zmm1 = _mm512_max_@vsuf1@(zmm_t1, zmm_t2);
+ @zmm_t@ zmm2 = _mm512_min_@vsuf1@(zmm_t3, zmm_t4);
+ @zmm_t@ zmm3 = _mm512_max_@vsuf1@(zmm_t3, zmm_t4);
+ zmm[0] = bitonic_merge_zmm_@vsuf1@(zmm0);
+ zmm[1] = bitonic_merge_zmm_@vsuf1@(zmm1);
+ zmm[2] = bitonic_merge_zmm_@vsuf1@(zmm2);
+ zmm[3] = bitonic_merge_zmm_@vsuf1@(zmm3);
+}
+
+static NPY_INLINE
+void bitonic_merge_eight_zmm_@vsuf1@(@zmm_t@* zmm)
+{
+ @zmm_t@ zmm4r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[4]);
+ @zmm_t@ zmm5r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[5]);
+ @zmm_t@ zmm6r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[6]);
+ @zmm_t@ zmm7r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[7]);
+ @zmm_t@ zmm_t1 = _mm512_min_@vsuf1@(zmm[0], zmm7r);
+ @zmm_t@ zmm_t2 = _mm512_min_@vsuf1@(zmm[1], zmm6r);
+ @zmm_t@ zmm_t3 = _mm512_min_@vsuf1@(zmm[2], zmm5r);
+ @zmm_t@ zmm_t4 = _mm512_min_@vsuf1@(zmm[3], zmm4r);
+ @zmm_t@ zmm_t5 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[3], zmm4r));
+ @zmm_t@ zmm_t6 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[2], zmm5r));
+ @zmm_t@ zmm_t7 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[1], zmm6r));
+ @zmm_t@ zmm_t8 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[0], zmm7r));
+ COEX_ZMM_@vsuf1@(zmm_t1, zmm_t3);
+ COEX_ZMM_@vsuf1@(zmm_t2, zmm_t4);
+ COEX_ZMM_@vsuf1@(zmm_t5, zmm_t7);
+ COEX_ZMM_@vsuf1@(zmm_t6, zmm_t8);
+ COEX_ZMM_@vsuf1@(zmm_t1, zmm_t2);
+ COEX_ZMM_@vsuf1@(zmm_t3, zmm_t4);
+ COEX_ZMM_@vsuf1@(zmm_t5, zmm_t6);
+ COEX_ZMM_@vsuf1@(zmm_t7, zmm_t8);
+ zmm[0] = bitonic_merge_zmm_@vsuf1@(zmm_t1);
+ zmm[1] = bitonic_merge_zmm_@vsuf1@(zmm_t2);
+ zmm[2] = bitonic_merge_zmm_@vsuf1@(zmm_t3);
+ zmm[3] = bitonic_merge_zmm_@vsuf1@(zmm_t4);
+ zmm[4] = bitonic_merge_zmm_@vsuf1@(zmm_t5);
+ zmm[5] = bitonic_merge_zmm_@vsuf1@(zmm_t6);
+ zmm[6] = bitonic_merge_zmm_@vsuf1@(zmm_t7);
+ zmm[7] = bitonic_merge_zmm_@vsuf1@(zmm_t8);
+}
+
+static NPY_INLINE
+void sort_16_@vsuf1@(@type_t@* arr, npy_int N)
+{
+ __mmask16 load_mask = (0x0001 << N) - 0x0001;
+ @zmm_t@ zmm = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask, arr);
+ _mm512_mask_storeu_@vsuf2@(arr, load_mask, sort_zmm_@vsuf1@(zmm));
+}
+
+static NPY_INLINE
+void sort_32_@vsuf1@(@type_t@* arr, npy_int N)
+{
+ if (N <= 16) {
+ sort_16_@vsuf1@(arr, N);
+ return;
+ }
+ @zmm_t@ zmm1 = _mm512_loadu_@vsuf3@(arr);
+ __mmask16 load_mask = (0x0001 << (N-16)) - 0x0001;
+ @zmm_t@ zmm2 = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask, arr + 16);
+ zmm1 = sort_zmm_@vsuf1@(zmm1);
+ zmm2 = sort_zmm_@vsuf1@(zmm2);
+ bitonic_merge_two_zmm_@vsuf1@(&zmm1, &zmm2);
+ _mm512_storeu_@vsuf3@(arr, zmm1);
+ _mm512_mask_storeu_@vsuf2@(arr + 16, load_mask, zmm2);
+}
+
+static NPY_INLINE
+void sort_64_@vsuf1@(@type_t@* arr, npy_int N)
+{
+ if (N <= 32) {
+ sort_32_@vsuf1@(arr, N);
+ return;
+ }
+ @zmm_t@ zmm[4];
+ zmm[0] = _mm512_loadu_@vsuf3@(arr);
+ zmm[1] = _mm512_loadu_@vsuf3@(arr + 16);
+ __mmask16 load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
+ if (N < 48) {
+ load_mask1 = (0x0001 << (N-32)) - 0x0001;
+ load_mask2 = 0x0000;
+ }
+ else if (N < 64) {
+ load_mask2 = (0x0001 << (N-48)) - 0x0001;
+ }
+ zmm[2] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask1, arr + 32);
+ zmm[3] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask2, arr + 48);
+ zmm[0] = sort_zmm_@vsuf1@(zmm[0]);
+ zmm[1] = sort_zmm_@vsuf1@(zmm[1]);
+ zmm[2] = sort_zmm_@vsuf1@(zmm[2]);
+ zmm[3] = sort_zmm_@vsuf1@(zmm[3]);
+ bitonic_merge_two_zmm_@vsuf1@(&zmm[0], &zmm[1]);
+ bitonic_merge_two_zmm_@vsuf1@(&zmm[2], &zmm[3]);
+ bitonic_merge_four_zmm_@vsuf1@(zmm);
+ _mm512_storeu_@vsuf3@(arr, zmm[0]);
+ _mm512_storeu_@vsuf3@(arr + 16, zmm[1]);
+ _mm512_mask_storeu_@vsuf2@(arr + 32, load_mask1, zmm[2]);
+ _mm512_mask_storeu_@vsuf2@(arr + 48, load_mask2, zmm[3]);
+}
+
+static NPY_INLINE
+void sort_128_@vsuf1@(@type_t@* arr, npy_int N)
+{
+ if (N <= 64) {
+ sort_64_@vsuf1@(arr, N);
+ return;
+ }
+ @zmm_t@ zmm[8];
+ zmm[0] = _mm512_loadu_@vsuf3@(arr);
+ zmm[1] = _mm512_loadu_@vsuf3@(arr + 16);
+ zmm[2] = _mm512_loadu_@vsuf3@(arr + 32);
+ zmm[3] = _mm512_loadu_@vsuf3@(arr + 48);
+ zmm[0] = sort_zmm_@vsuf1@(zmm[0]);
+ zmm[1] = sort_zmm_@vsuf1@(zmm[1]);
+ zmm[2] = sort_zmm_@vsuf1@(zmm[2]);
+ zmm[3] = sort_zmm_@vsuf1@(zmm[3]);
+ __mmask16 load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
+ __mmask16 load_mask3 = 0xFFFF, load_mask4 = 0xFFFF;
+ if (N < 80) {
+ load_mask1 = (0x0001 << (N-64)) - 0x0001;
+ load_mask2 = 0x0000;
+ load_mask3 = 0x0000;
+ load_mask4 = 0x0000;
+ }
+ else if (N < 96) {
+ load_mask2 = (0x0001 << (N-80)) - 0x0001;
+ load_mask3 = 0x0000;
+ load_mask4 = 0x0000;
+ }
+ else if (N < 112) {
+ load_mask3 = (0x0001 << (N-96)) - 0x0001;
+ load_mask4 = 0x0000;
+ }
+ else {
+ load_mask4 = (0x0001 << (N-112)) - 0x0001;
+ }
+ zmm[4] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask1, arr + 64);
+ zmm[5] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask2, arr + 80);
+ zmm[6] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask3, arr + 96);
+ zmm[7] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask4, arr + 112);
+ zmm[4] = sort_zmm_@vsuf1@(zmm[4]);
+ zmm[5] = sort_zmm_@vsuf1@(zmm[5]);
+ zmm[6] = sort_zmm_@vsuf1@(zmm[6]);
+ zmm[7] = sort_zmm_@vsuf1@(zmm[7]);
+ bitonic_merge_two_zmm_@vsuf1@(&zmm[0], &zmm[1]);
+ bitonic_merge_two_zmm_@vsuf1@(&zmm[2], &zmm[3]);
+ bitonic_merge_two_zmm_@vsuf1@(&zmm[4], &zmm[5]);
+ bitonic_merge_two_zmm_@vsuf1@(&zmm[6], &zmm[7]);
+ bitonic_merge_four_zmm_@vsuf1@(zmm);
+ bitonic_merge_four_zmm_@vsuf1@(zmm + 4);
+ bitonic_merge_eight_zmm_@vsuf1@(zmm);
+ _mm512_storeu_@vsuf3@(arr, zmm[0]);
+ _mm512_storeu_@vsuf3@(arr + 16, zmm[1]);
+ _mm512_storeu_@vsuf3@(arr + 32, zmm[2]);
+ _mm512_storeu_@vsuf3@(arr + 48, zmm[3]);
+ _mm512_mask_storeu_@vsuf2@(arr + 64, load_mask1, zmm[4]);
+ _mm512_mask_storeu_@vsuf2@(arr + 80, load_mask2, zmm[5]);
+ _mm512_mask_storeu_@vsuf2@(arr + 96, load_mask3, zmm[6]);
+ _mm512_mask_storeu_@vsuf2@(arr + 112, load_mask4, zmm[7]);
+}
+
+
+static NPY_INLINE
+void swap_@TYPE@(@type_t@ *arr, npy_intp ii, npy_intp jj) {
+ @type_t@ temp = arr[ii];
+ arr[ii] = arr[jj];
+ arr[jj] = temp;
+}
+
+// Median of 3 stratergy
+//static NPY_INLINE
+//npy_intp get_pivot_index(@type_t@ *arr, const npy_intp left, const npy_intp right) {
+// return (rand() % (right + 1 - left)) + left;
+// //npy_intp middle = ((right-left)/2) + left;
+// //@type_t@ a = arr[left], b = arr[middle], c = arr[right];
+// //if ((b >= a && b <= c) || (b <= a && b >= c))
+// // return middle;
+// //if ((a >= b && a <= c) || (a <= b && a >= c))
+// // return left;
+// //else
+// // return right;
+//}
+
+/*
+ * Picking the pivot: Median of 72 array elements chosen at random.
+ */
+
+static NPY_INLINE
+@type_t@ get_pivot_@vsuf1@(@type_t@ *arr, const npy_intp left, const npy_intp right) {
+ /* seeds for vectorized random number generator */
+ __m256i s0 = _mm256_setr_epi64x(8265987198341093849, 3762817312854612374,
+ 1324281658759788278, 6214952190349879213);
+ __m256i s1 = _mm256_setr_epi64x(2874178529384792648, 1257248936691237653,
+ 7874578921548791257, 1998265912745817298);
+ s0 = _mm256_add_epi64(s0, _mm256_set1_epi64x(left));
+ s1 = _mm256_sub_epi64(s1, _mm256_set1_epi64x(right));
+
+ npy_intp arrsize = right - left + 1;
+ __m256i bound = _mm256_set1_epi32(arrsize > INT32_MAX ? INT32_MAX : arrsize);
+ __m512i left_vec = _mm512_set1_epi64(left);
+ __m512i right_vec = _mm512_set1_epi64(right);
+ @ymm_t@ v[9];
+ /* fill 9 vectors with random numbers */
+ for (npy_int i = 0; i < 9; ++i) {
+ __m256i rand_64 = vnext(&s0, &s1); /* vector with 4 random uint64_t */
+ __m512i rand_32 = _mm512_cvtepi32_epi64(rnd_epu32(rand_64, bound)); /* random numbers between 0 and bound - 1 */
+ __m512i indices;
+ if (i < 5)
+ indices = _mm512_add_epi64(left_vec, rand_32); /* indices for arr */
+ else
+ indices = _mm512_sub_epi64(right_vec, rand_32); /* indices for arr */
+
+ v[i] = _mm512_i64gather_@vsuf2@(indices, arr, sizeof(@type_t@));
+ }
+
+ /* median network for 9 elements */
+ COEX_YMM_@vsuf1@(v[0], v[1]); COEX_YMM_@vsuf1@(v[2], v[3]);
+ COEX_YMM_@vsuf1@(v[4], v[5]); COEX_YMM_@vsuf1@(v[6], v[7]);
+ COEX_YMM_@vsuf1@(v[0], v[2]); COEX_YMM_@vsuf1@(v[1], v[3]);
+ COEX_YMM_@vsuf1@(v[4], v[6]); COEX_YMM_@vsuf1@(v[5], v[7]);
+ COEX_YMM_@vsuf1@(v[0], v[4]); COEX_YMM_@vsuf1@(v[1], v[2]);
+ COEX_YMM_@vsuf1@(v[5], v[6]); COEX_YMM_@vsuf1@(v[3], v[7]);
+ COEX_YMM_@vsuf1@(v[1], v[5]); COEX_YMM_@vsuf1@(v[2], v[6]);
+ COEX_YMM_@vsuf1@(v[3], v[5]); COEX_YMM_@vsuf1@(v[2], v[4]);
+ COEX_YMM_@vsuf1@(v[3], v[4]);
+ COEX_YMM_@vsuf1@(v[3], v[8]);
+ COEX_YMM_@vsuf1@(v[4], v[8]);
+
+ // technically v[4] needs to be sorted before we pick the correct median,
+ // picking the 4th element works just as well for performance
+ @type_t@* temp = (@type_t@*) &v[4];
+
+ return temp[4];
+}
+
+/*
+ * Parition one ZMM register based on the pivot and returns the index of the
+ * last element that is less than equal to the pivot.
+ */
+static NPY_INLINE
+npy_int partition_vec_@vsuf1@(@type_t@* arr, npy_intp left, npy_intp right,
+ const @zmm_t@ curr_vec, const @zmm_t@ pivot_vec,
+ @zmm_t@* smallest_vec, @zmm_t@* biggest_vec)
+{
+ /* which elements are larger than the pivot */
+ __mmask16 gt_mask = _mm512_cmp_@vsuf1@_mask(curr_vec, pivot_vec, @CMP_GE_OP@);
+ npy_int amount_gt_pivot = _mm_popcnt_u32((npy_int)gt_mask);
+ _mm512_mask_compressstoreu_@vsuf2@(arr + left, _knot_mask16(gt_mask), curr_vec);
+ _mm512_mask_compressstoreu_@vsuf2@(arr + right - amount_gt_pivot, gt_mask, curr_vec);
+ *smallest_vec = _mm512_min_@vsuf1@(curr_vec, *smallest_vec);
+ *biggest_vec = _mm512_max_@vsuf1@(curr_vec, *biggest_vec);
+ return amount_gt_pivot;
+}
+
+/*
+ * Parition an array based on the pivot and returns the index of the
+ * last element that is less than equal to the pivot.
+ */
+static NPY_INLINE
+npy_intp partition_avx512_@vsuf1@(@type_t@* arr, npy_intp left, npy_intp right,
+ @type_t@ pivot, @type_t@* smallest, @type_t@* biggest)
+{
+ /* make array length divisible by 16 , shortening the array */
+ for (npy_int i = (right - left) % 16; i > 0; --i) {
+ *smallest = MIN(*smallest, arr[left]);
+ *biggest = MAX(*biggest, arr[left]);
+ if (arr[left] > pivot) {
+ swap_@TYPE@(arr, left, --right);
+ }
+ else {
+ ++left;
+ }
+ }
+
+ if(left == right)
+ return left; /* less than 16 elements in the array */
+
+ @zmm_t@ pivot_vec = _mm512_set1_@vsuf2@(pivot);
+ @zmm_t@ min_vec = _mm512_set1_@vsuf2@(*smallest);
+ @zmm_t@ max_vec = _mm512_set1_@vsuf2@(*biggest);
+
+ if(right - left == 16) {
+ @zmm_t@ vec = _mm512_loadu_@vsuf3@(arr + left);
+ npy_int amount_gt_pivot = partition_vec_@vsuf1@(arr, left, left + 16, vec, pivot_vec, &min_vec, &max_vec);
+ *smallest = npyv_reducemin_@vsuf4@(min_vec);
+ *biggest = npyv_reducemax_@vsuf4@(max_vec);
+ return left + (16 - amount_gt_pivot);
+ }
+
+ // first and last 16 values are partitioned at the end
+ @zmm_t@ vec_left = _mm512_loadu_@vsuf3@(arr + left);
+ @zmm_t@ vec_right = _mm512_loadu_@vsuf3@(arr + (right - 16));
+ // store points of the vectors
+ npy_intp r_store = right - 16;
+ npy_intp l_store = left;
+ // indices for loading the elements
+ left += 16;
+ right -= 16;
+ while(right - left != 0) {
+ @zmm_t@ curr_vec;
+ /*
+ * if fewer elements are stored on the right side of the array,
+ * then next elements are loaded from the right side,
+ * otherwise from the left side
+ */
+ if((r_store + 16) - right < left - l_store) {
+ right -= 16;
+ curr_vec = _mm512_loadu_@vsuf3@(arr + right);
+ }
+ else {
+ curr_vec = _mm512_loadu_@vsuf3@(arr + left);
+ left += 16;
+ }
+ // partition the current vector and save it on both sides of the array
+ npy_int amount_gt_pivot = partition_vec_@vsuf1@(arr, l_store, r_store + 16, curr_vec, pivot_vec, &min_vec, &max_vec);;
+ r_store -= amount_gt_pivot; l_store += (16 - amount_gt_pivot);
+ }
+
+ /* partition and save vec_left and vec_right */
+ npy_int amount_gt_pivot = partition_vec_@vsuf1@(arr, l_store, r_store + 16, vec_left, pivot_vec, &min_vec, &max_vec);
+ l_store += (16 - amount_gt_pivot);
+ amount_gt_pivot = partition_vec_@vsuf1@(arr, l_store, l_store + 16, vec_right, pivot_vec, &min_vec, &max_vec);
+ l_store += (16 - amount_gt_pivot);
+ *smallest = npyv_reducemin_@vsuf4@(min_vec);
+ *biggest = npyv_reducemax_@vsuf4@(max_vec);
+ return l_store;
+}
+
+static NPY_INLINE
+void qsort_@type@(@type_t@* arr, npy_intp left, npy_intp right, npy_int max_iters)
+{
+ /*
+ * Resort to heapsort if quicksort isnt making any progress
+ */
+ if (max_iters <= 0) {
+ heapsort_@type@((void*)(arr + left), right + 1 - left, NULL);
+ return;
+ }
+ /*
+ * Base case: use bitonic networks to sort arrays <= 128
+ */
+ if (right + 1 - left <= 128) {
+ sort_128_@vsuf1@(arr + left, right + 1 -left);
+ return;
+ }
+
+ @type_t@ pivot = get_pivot_@vsuf1@(arr, left, right);
+ @type_t@ smallest = @TYPE_MAX_VAL@;
+ @type_t@ biggest = @TYPE_MIN_VAL@;
+ npy_intp pivot_index = partition_avx512_@vsuf1@(arr, left, right+1, pivot, &smallest, &biggest);
+ if (pivot != smallest)
+ qsort_@type@(arr, left, pivot_index - 1, max_iters - 1);
+ if (pivot != biggest)
+ qsort_@type@(arr, pivot_index, right, max_iters - 1);
+}
+/**end repeat**/
+
+static NPY_INLINE
+npy_intp replace_nan_with_inf(npy_float* arr, npy_intp arrsize)
+{
+ npy_intp nan_count = 0;
+ __mmask16 loadmask = 0xFFFF;
+ while (arrsize > 0) {
+ if (arrsize < 16) {
+ loadmask = (0x0001 << arrsize) - 0x0001;
+ }
+ __m512 in_zmm = _mm512_maskz_loadu_ps(loadmask, arr);
+ __mmask16 nanmask = _mm512_cmp_ps_mask(in_zmm, in_zmm, _CMP_NEQ_UQ);
+ nan_count += _mm_popcnt_u32((npy_int) nanmask);
+ _mm512_mask_storeu_ps(arr, nanmask, ZMM_MAX_FLOAT);
+ arr += 16;
+ arrsize -= 16;
+ }
+ return nan_count;
+}
+
+static NPY_INLINE
+void replace_inf_with_nan(npy_float* arr, npy_intp arrsize, npy_intp nan_count)
+{
+ for (npy_intp ii = arrsize-1; nan_count > 0; --ii) {
+ arr[ii] = NPY_NANF;
+ nan_count -= 1;
+ }
+}
+
+/**begin repeat
+ *
+ * #type = int, uint, float#
+ * #type_t = npy_int, npy_uint, npy_float#
+ * #FIXNAN = 0, 0, 1#
+ */
+
+NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(x86_quicksort_@type@)
+(void* arr, npy_intp arrsize)
+{
+ if (arrsize > 1) {
+#if @FIXNAN@
+ npy_intp nan_count = replace_nan_with_inf((@type_t@*) arr, arrsize);
+#endif
+ qsort_@type@((@type_t@*) arr, 0, arrsize-1, 2*log2(arrsize));
+#if @FIXNAN@
+ replace_inf_with_nan((@type_t@*) arr, arrsize, nan_count);
+#endif
+ }
+}
+/**end repeat**/
+
+#endif // NPY_HAVE_AVX512_SKX
diff --git a/numpy/core/src/npysort/x86-qsort.h b/numpy/core/src/npysort/x86-qsort.h
new file mode 100644
index 000000000..8cb8e3654
--- /dev/null
+++ b/numpy/core/src/npysort/x86-qsort.h
@@ -0,0 +1,18 @@
+#include "numpy/npy_common.h"
+#include "npy_cpu_dispatch.h"
+
+#ifndef NPY_NO_EXPORT
+ #define NPY_NO_EXPORT NPY_VISIBILITY_HIDDEN
+#endif
+
+#ifndef NPY_DISABLE_OPTIMIZATION
+ #include "x86-qsort.dispatch.h"
+#endif
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_int,
+ (void *start, npy_intp num))
+
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_uint,
+ (void *start, npy_intp num))
+
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_float,
+ (void *start, npy_intp num))
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 708e82910..73bb5e2d8 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -9278,3 +9278,52 @@ class TestViewDtype:
[[1284, 1798], [4368, 4882]],
[[2312, 2826], [5396, 5910]]]
assert_array_equal(x.view('<i2'), expected)
+
+
+# Test various array sizes that hit different code paths in quicksort-avx512
+@pytest.mark.parametrize("N", [8, 16, 24, 32, 48, 64, 96, 128, 151, 191,
+ 256, 383, 512, 1023, 2047])
+def test_sort_float(N):
+ # Regular data with nan sprinkled
+ np.random.seed(42)
+ arr = -0.5 + np.random.sample(N).astype('f')
+ arr[np.random.choice(arr.shape[0], 3)] = np.nan
+ assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))
+
+ # (2) with +INF
+ infarr = np.inf*np.ones(N, dtype='f')
+ infarr[np.random.choice(infarr.shape[0], 5)] = -1.0
+ assert_equal(np.sort(infarr, kind='quick'), np.sort(infarr, kind='heap'))
+
+ # (3) with -INF
+ neginfarr = -np.inf*np.ones(N, dtype='f')
+ neginfarr[np.random.choice(neginfarr.shape[0], 5)] = 1.0
+ assert_equal(np.sort(neginfarr, kind='quick'),
+ np.sort(neginfarr, kind='heap'))
+
+ # (4) with +/-INF
+ infarr = np.inf*np.ones(N, dtype='f')
+ infarr[np.random.choice(infarr.shape[0], (int)(N/2))] = -np.inf
+ assert_equal(np.sort(infarr, kind='quick'), np.sort(infarr, kind='heap'))
+
+
+def test_sort_int():
+ # Random data with NPY_MAX_INT32 and NPY_MIN_INT32 sprinkled
+ rng = np.random.default_rng(42)
+ N = 2047
+ minv = np.iinfo(np.int32).min
+ maxv = np.iinfo(np.int32).max
+ arr = rng.integers(low=minv, high=maxv, size=N).astype('int32')
+ arr[np.random.choice(arr.shape[0], 10)] = minv
+ arr[np.random.choice(arr.shape[0], 10)] = maxv
+ assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))
+
+
+def test_sort_uint():
+ # Random data with NPY_MAX_UINT32 sprinkled
+ rng = np.random.default_rng(42)
+ N = 2047
+ maxv = np.iinfo(np.uint32).max
+ arr = rng.integers(low=0, high=maxv, size=N).astype('uint32')
+ arr[np.random.choice(arr.shape[0], 10)] = maxv
+ assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))