34 files changed, 4999 insertions, 347 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py
index abe53fe9a..46d80fb76 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -52,8 +52,6 @@ of numpy are available under the ``doc`` sub-module::
 
 Available subpackages
 ---------------------
-doc
-    Topical documentation on broadcasting, indexing, etc.
 lib
     Basic functions used by several sub-packages.
 random
@@ -66,8 +64,6 @@ polynomial
     Polynomial tools
 testing
     NumPy testing tools
-f2py
-    Fortran to Python Interface Generator.
 distutils
     Enhancements to distutils with support for
     Fortran compilers support and more.
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index a13480907..9704cff0a 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -869,6 +869,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'typeinfo.h'),
             join('src', 'multiarray', 'usertypes.h'),
             join('src', 'multiarray', 'vdot.h'),
+            join('src', 'multiarray', 'textreading', 'readtext.h'),
             join('include', 'numpy', 'arrayobject.h'),
             join('include', 'numpy', '_neighborhood_iterator_imp.h'),
             join('include', 'numpy', 'npy_endian.h'),
@@ -947,6 +948,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'usertypes.c'),
             join('src', 'multiarray', 'vdot.c'),
             join('src', 'common', 'npy_sort.h.src'),
+            join('src', 'npysort', 'x86-qsort.dispatch.c.src'),
             join('src', 'npysort', 'quicksort.c.src'),
             join('src', 'npysort', 'mergesort.cpp'),
             join('src', 'npysort', 'timsort.cpp'),
@@ -956,6 +958,14 @@ def configuration(parent_package='',top_path=None):
             join('src', 'npysort', 'selection.cpp'),
             join('src', 'common', 'npy_binsearch.h'),
             join('src', 'npysort', 'binsearch.cpp'),
+            join('src', 'multiarray', 'textreading', 'conversions.c'),
+            join('src', 'multiarray', 'textreading', 'field_types.c'),
+            join('src', 'multiarray', 'textreading', 'growth.c'),
+            join('src', 'multiarray', 'textreading', 'readtext.c'),
+            join('src', 'multiarray', 'textreading', 'rows.c'),
+            join('src', 'multiarray', 'textreading', 'stream_pyobject.c'),
+            join('src', 'multiarray', 'textreading', 'str_to_int.c'),
+            join('src', 'multiarray', 'textreading', 'tokenize.c.src'),
             ]
 
     #######################################################################
diff --git a/numpy/core/src/common/simd/avx512/arithmetic.h b/numpy/core/src/common/simd/avx512/arithmetic.h
index f8632e701..93e9d9d45 100644
--- a/numpy/core/src/common/simd/avx512/arithmetic.h
+++ b/numpy/core/src/common/simd/avx512/arithmetic.h
@@ -371,7 +371,79 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
     #define npyv_sum_u64 _mm512_reduce_add_epi64
     #define npyv_sum_f32 _mm512_reduce_add_ps
     #define npyv_sum_f64 _mm512_reduce_add_pd
+    #define npyv_reducemin_u32 _mm512_reduce_min_epu32
+    #define npyv_reducemin_s32 _mm512_reduce_min_epi32
+    #define npyv_reducemin_f32 _mm512_reduce_min_ps
+    #define npyv_reducemax_u32 _mm512_reduce_max_epu32
+    #define npyv_reducemax_s32 _mm512_reduce_max_epi32
+    #define npyv_reducemax_f32 _mm512_reduce_max_ps
 #else
+    NPY_FINLINE npy_uint32 npyv_reducemax_u32(npyv_u32 a)
+    {
+        const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+        const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+        npyv_u32 a1 = _mm512_max_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
+        npyv_u32 a2 = _mm512_max_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+        npyv_u32 a3 = _mm512_max_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_u32 a4 = _mm512_max_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+    }
+
+    NPY_FINLINE npy_int32 npyv_reducemax_s32(npyv_s32 a)
+    {
+        const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+        const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+        npyv_s32 a1 = _mm512_max_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
+        npyv_s32 a2 = _mm512_max_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+        npyv_s32 a3 = _mm512_max_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_s32 a4 = _mm512_max_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+    }
+
+    NPY_FINLINE npy_float npyv_reducemax_f32(npyv_f32 a)
+    {
+        const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+        const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+        npyv_f32 a1 = _mm512_max_ps(a, _mm512_permutex2var_ps(a, idx1, a));
+        npyv_f32 a2 = _mm512_max_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
+        npyv_f32 a3 = _mm512_max_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_f32 a4 = _mm512_max_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
+    }
+
+    NPY_FINLINE npy_uint32 npyv_reducemin_u32(npyv_u32 a)
+    {
+        const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+        const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+        npyv_u32 a1 = _mm512_min_epu32(a, _mm512_permutex2var_epi32(a, idx1, a));
+        npyv_u32 a2 = _mm512_min_epu32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+        npyv_u32 a3 = _mm512_min_epu32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_u32 a4 = _mm512_min_epu32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+    }
+
+    NPY_FINLINE npy_int32 npyv_reducemin_s32(npyv_s32 a)
+    {
+        const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+        const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+        npyv_s32 a1 = _mm512_min_epi32(a, _mm512_permutex2var_epi32(a, idx1, a));
+        npyv_s32 a2 = _mm512_min_epi32(a1, _mm512_permutex2var_epi32(a1, idx2, a1));
+        npyv_s32 a3 = _mm512_min_epi32(a2, _mm512_shuffle_epi32(a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_s32 a4 = _mm512_min_epi32(a3, _mm512_shuffle_epi32(a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        return _mm_cvtsi128_si32(_mm512_extracti32x4_epi32(a4, 0x00));
+    }
+
+    NPY_FINLINE npy_float npyv_reducemin_f32(npyv_f32 a)
+    {
+        const npyv_u32 idx1 = _mm512_set_epi32(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+        const npyv_u32 idx2 = _mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
+        npyv_f32 a1 = _mm512_min_ps(a, _mm512_permutex2var_ps(a, idx1, a));
+        npyv_f32 a2 = _mm512_min_ps(a1, _mm512_permutex2var_ps(a1, idx2, a1));
+        npyv_f32 a3 = _mm512_min_ps(a2, _mm512_shuffle_ps(a2, a2, (1<<6 | 0<<4 | 3<<2 | 2)));
+        npyv_f32 a4 = _mm512_min_ps(a3, _mm512_shuffle_sp(a3, a3, (2<<6 | 3<<4 | 0<<2 | 1)));
+        return _mm_cvtss_f32(_mm512_extractf32x4_ps(a4, 0x00));
+    }
+
     NPY_FINLINE npy_uint32 npyv_sum_u32(npyv_u32 a)
     {
         __m256i half = _mm256_add_epi32(npyv512_lower_si256(a), npyv512_higher_si256(a));
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index a1de580d9..e4eb4f49e 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -993,6 +993,17 @@ PyArray_PyIntAsIntp(PyObject *o)
 }
 
 
+NPY_NO_EXPORT int
+PyArray_IntpFromPyIntConverter(PyObject *o, npy_intp *val)
+{
+    *val = PyArray_PyIntAsIntp(o);
+    if (error_converting(*val)) {
+        return NPY_FAIL;
+    }
+    return NPY_SUCCEED;
+}
+
+
 /*
  * PyArray_IntpFromIndexSequence
  * Returns the number of dimensions or -1 if an error occurred.
diff --git a/numpy/core/src/multiarray/conversion_utils.h b/numpy/core/src/multiarray/conversion_utils.h
index 4072841ee..4d0fbb894 100644
--- a/numpy/core/src/multiarray/conversion_utils.h
+++ b/numpy/core/src/multiarray/conversion_utils.h
@@ -7,6 +7,9 @@ NPY_NO_EXPORT int
 PyArray_IntpConverter(PyObject *obj, PyArray_Dims *seq);
 
 NPY_NO_EXPORT int
+PyArray_IntpFromPyIntConverter(PyObject *o, npy_intp *val);
+
+NPY_NO_EXPORT int
 PyArray_OptionalIntpConverter(PyObject *obj, PyArray_Dims *seq);
 
 typedef enum {
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 789446d0c..a7b6898e1 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -69,6 +69,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 
 #include "get_attr_string.h"
 #include "experimental_public_dtype_api.h"  /* _get_experimental_dtype_api */
+#include "textreading/readtext.h"  /* _readtext_from_file_object */
 
 #include "npy_dlpack.h"
 
@@ -4456,6 +4457,8 @@ static struct PyMethodDef array_module_methods[] = {
         METH_VARARGS | METH_KEYWORDS, NULL},
     {"_get_experimental_dtype_api", (PyCFunction)_get_experimental_dtype_api,
         METH_O, NULL},
+    {"_load_from_filelike", (PyCFunction)_load_from_filelike,
+        METH_FASTCALL | METH_KEYWORDS, NULL},
     /* from umath */
     {"frompyfunc",
         (PyCFunction) ufunc_frompyfunc,
diff --git a/numpy/core/src/multiarray/textreading/conversions.c b/numpy/core/src/multiarray/textreading/conversions.c
new file mode 100644
index 000000000..11f4210f7
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/conversions.c
@@ -0,0 +1,395 @@
+
+#include <Python.h>
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "lowlevel_strided_loops.h"
+
+#include "conversions.h"
+#include "str_to_int.h"
+
+#include "array_coercion.h"
+
+
+/*
+ * Coercion to boolean is done via integer right now.
+ */
+NPY_NO_EXPORT int
+to_bool(PyArray_Descr *NPY_UNUSED(descr),
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *NPY_UNUSED(pconfig))
+{
+    int64_t res;
+    if (str_to_int64(str, end, INT64_MIN, INT64_MAX, &res) < 0) {
+        return -1;
+    }
+    *dataptr = (char)(res != 0);
+    return 0;
+}
+
+
+/*
+ * In order to not pack a whole copy of a floating point parser, we copy the
+ * result into ascii and call the Python one.  Float parsing isn't super quick
+ * so this is not terrible, but avoiding it would speed up things.
+ *
+ * Also note that parsing the first float of a complex will copy the whole
+ * string to ascii rather than just the first part.
+ * TODO: A tweak of the break might be a simple mitigation there.
+ *
+ * @param str The UCS4 string to parse
+ * @param end Pointer to the end of the string
+ * @param skip_trailing_whitespace If false does not skip trailing whitespace
+ *        (used by the complex parser).
+ * @param result Output stored as double value.
+ */
+static NPY_INLINE int
+double_from_ucs4(
+        const Py_UCS4 *str, const Py_UCS4 *end,
+        bool strip_whitespace, double *result, const Py_UCS4 **p_end)
+{
+    /* skip leading whitespace */
+    if (strip_whitespace) {
+        while (Py_UNICODE_ISSPACE(*str)) {
+            str++;
+        }
+    }
+    if (str == end) {
+        return -1;  /* empty or only whitespace: not a floating point number */
+    }
+
+    /* We convert to ASCII for the Python parser, use stack if small: */
+    char stack_buf[128];
+    char *heap_buf = NULL;
+    char *ascii = stack_buf;
+
+    size_t str_len = end - str + 1;
+    if (str_len > 128) {
+        heap_buf = PyMem_MALLOC(str_len);
+        if (heap_buf == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        ascii = heap_buf;
+    }
+    char *c = ascii;
+    for (; str < end; str++, c++) {
+        if (NPY_UNLIKELY(*str >= 128)) {
+            /* Character cannot be used, ignore for end calculation and stop */
+            end = str;
+            break;
+        }
+        *c = (char)(*str);
+    }
+    *c = '\0';
+
+    char *end_parsed;
+    *result = PyOS_string_to_double(ascii, &end_parsed, NULL);
+    /* Rewind `end` to the first UCS4 character not parsed: */
+    end = end - (c - end_parsed);
+
+    PyMem_FREE(heap_buf);
+
+    if (*result == -1. && PyErr_Occurred()) {
+        return -1;
+    }
+
+    if (strip_whitespace) {
+        /* and then skip any remainig whitespace: */
+        while (Py_UNICODE_ISSPACE(*end)) {
+            end++;
+        }
+    }
+    *p_end = end;
+    return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_float(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *NPY_UNUSED(pconfig))
+{
+    double double_val;
+    const Py_UCS4 *p_end;
+    if (double_from_ucs4(str, end, true, &double_val, &p_end) < 0) {
+        return -1;
+    }
+    if (p_end != end) {
+        return -1;
+    }
+
+    float val = (float)double_val;
+    memcpy(dataptr, &val, sizeof(float));
+    if (!PyArray_ISNBO(descr->byteorder)) {
+        npy_bswap4_unaligned(dataptr);
+    }
+    return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_double(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *NPY_UNUSED(pconfig))
+{
+    double val;
+    const Py_UCS4 *p_end;
+    if (double_from_ucs4(str, end, true, &val, &p_end) < 0) {
+        return -1;
+    }
+    if (p_end != end) {
+        return -1;
+    }
+
+    memcpy(dataptr, &val, sizeof(double));
+    if (!PyArray_ISNBO(descr->byteorder)) {
+        npy_bswap8_unaligned(dataptr);
+    }
+    return 0;
+}
+
+
+static bool
+to_complex_int(
+        const Py_UCS4 *item, const Py_UCS4 *token_end,
+        double *p_real, double *p_imag,
+        Py_UCS4 imaginary_unit, bool allow_parens)
+{
+    const Py_UCS4 *p_end;
+    bool unmatched_opening_paren = false;
+
+    /* Remove whitespace before the possibly leading '(' */
+    while (Py_UNICODE_ISSPACE(*item)) {
+        ++item;
+    }
+    if (allow_parens && (*item == '(')) {
+        unmatched_opening_paren = true;
+        ++item;
+        /* Allow whitespace within the parentheses: "( 1j)" */
+        while (Py_UNICODE_ISSPACE(*item)) {
+            ++item;
+        }
+    }
+    if (double_from_ucs4(item, token_end, false, p_real, &p_end) < 0) {
+        return false;
+    }
+    if (p_end == token_end) {
+        // No imaginary part in the string (e.g. "3.5")
+        *p_imag = 0.0;
+        return !unmatched_opening_paren;
+    }
+    if (*p_end == imaginary_unit) {
+        /* Only an imaginary part (e.g "1.5j") */
+        *p_imag = *p_real;
+        *p_real = 0.0;
+        ++p_end;
+    }
+    else if (*p_end == '+' || *p_end == '-') {
+        /* Imaginary part still to parse */
+        if (*p_end == '+') {
+            ++p_end;  /* Advance to support +- (and ++) */
+        }
+        if (double_from_ucs4(p_end, token_end, false, p_imag, &p_end) < 0) {
+            return false;
+        }
+        if (*p_end != imaginary_unit) {
+            return false;
+        }
+        ++p_end;
+    }
+    else {
+        *p_imag = 0;
+    }
+
+    if (unmatched_opening_paren) {
+        /* Allow whitespace inside brackets as in "(1+2j )" or "( 1j )" */
+        while (Py_UNICODE_ISSPACE(*p_end)) {
+            ++p_end;
+        }
+        if (*p_end == ')') {
+            ++p_end;
+        }
+        else {
+            /* parentheses was not closed */
+            return false;
+        }
+    }
+
+    while (Py_UNICODE_ISSPACE(*p_end)) {
+        ++p_end;
+    }
+    return p_end == token_end;
+}
+
+
+NPY_NO_EXPORT int
+to_cfloat(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig)
+{
+    double real;
+    double imag;
+
+    bool success = to_complex_int(
+            str, end, &real, &imag,
+            pconfig->imaginary_unit, true);
+
+    if (!success) {
+        return -1;
+    }
+    npy_complex64 val = {(float)real, (float)imag};
+    memcpy(dataptr, &val, sizeof(npy_complex64));
+    if (!PyArray_ISNBO(descr->byteorder)) {
+        npy_bswap4_unaligned(dataptr);
+        npy_bswap4_unaligned(dataptr + 4);
+    }
+    return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_cdouble(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig)
+{
+    double real;
+    double imag;
+
+    bool success = to_complex_int(
+            str, end, &real, &imag, pconfig->imaginary_unit, true);
+
+    if (!success) {
+        return -1;
+    }
+    npy_complex128 val = {real, imag};
+    memcpy(dataptr, &val, sizeof(npy_complex128));
+    if (!PyArray_ISNBO(descr->byteorder)) {
+        npy_bswap8_unaligned(dataptr);
+        npy_bswap8_unaligned(dataptr + 8);
+    }
+    return 0;
+}
+
+
+/*
+ * String and unicode conversion functions.
+ */
+NPY_NO_EXPORT int
+to_string(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *NPY_UNUSED(unused))
+{
+    const Py_UCS4* c = str;
+    size_t length = descr->elsize;
+
+    for (size_t i = 0; i < length; i++) {
+        if (c < end) {
+            /*
+             * loadtxt assumed latin1, which is compatible with UCS1 (first
+             * 256 unicode characters).
+             */
+            if (NPY_UNLIKELY(*c > 255)) {
+                /* TODO: Was UnicodeDecodeError, is unspecific error good? */
+                return -1;
+            }
+            dataptr[i] = (Py_UCS1)(*c);
+            c++;
+        }
+        else {
+            dataptr[i] = '\0';
+        }
+    }
+    return 0;
+}
+
+
+NPY_NO_EXPORT int
+to_unicode(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *NPY_UNUSED(unused))
+{
+    int length = descr->elsize / 4;
+
+    if (length <= end - str) {
+        memcpy(dataptr, str, length * 4);
+    }
+    else {
+        size_t given_len = end - str;
+        memcpy(dataptr, str, given_len * 4);
+        memset(dataptr + given_len * 4, '\0', (length - given_len) * 4);
+    }
+
+    if (!PyArray_ISNBO(descr->byteorder)) {
+        for (int i = 0; i < length; i++) {
+            npy_bswap4_unaligned(dataptr);
+            dataptr += 4;
+        }
+    }
+    return 0;
+}
+
+
+
+/*
+ * Convert functions helper for the generic converter.
+ */
+static PyObject *
+call_converter_function(
+        PyObject *func, const Py_UCS4 *str, size_t length, bool byte_converters)
+{
+    PyObject *s = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, str, length);
+    if (s == NULL) {
+        return s;
+    }
+    if (byte_converters) {
+        Py_SETREF(s, PyUnicode_AsEncodedString(s, "latin1", NULL));
+        if (s == NULL) {
+            return NULL;
+        }
+    }
+    if (func == NULL) {
+        return s;
+    }
+    PyObject *result = PyObject_CallFunctionObjArgs(func, s, NULL);
+    Py_DECREF(s);
+    return result;
+}
+
+
+NPY_NO_EXPORT int
+to_generic_with_converter(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *config, PyObject *func)
+{
+    bool use_byte_converter;
+    if (func == NULL) {
+        use_byte_converter = config->c_byte_converters;
+    }
+    else {
+        use_byte_converter = config->python_byte_converters;
+    }
+    /* Converts to unicode and calls custom converter (if set) */
+    PyObject *converted = call_converter_function(
+            func, str, (size_t)(end - str), use_byte_converter);
+    if (converted == NULL) {
+        return -1;
+    }
+
+    int res = PyArray_Pack(descr, dataptr, converted);
+    Py_DECREF(converted);
+    return res;
+}
+
+
+NPY_NO_EXPORT int
+to_generic(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *config)
+{
+    return to_generic_with_converter(descr, str, end, dataptr, config, NULL);
+}
diff --git a/numpy/core/src/multiarray/textreading/conversions.h b/numpy/core/src/multiarray/textreading/conversions.h
new file mode 100644
index 000000000..222eea4e7
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/conversions.h
@@ -0,0 +1,57 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_CONVERSIONS_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_CONVERSIONS_H_
+
+#include <stdbool.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+
+#include "textreading/parser_config.h"
+
+NPY_NO_EXPORT int
+to_bool(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_float(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_double(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_cfloat(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_cdouble(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig);
+
+NPY_NO_EXPORT int
+to_string(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *unused);
+
+NPY_NO_EXPORT int
+to_unicode(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *unused);
+
+NPY_NO_EXPORT int
+to_generic_with_converter(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *unused, PyObject *func);
+
+NPY_NO_EXPORT int
+to_generic(PyArray_Descr *descr,
+        const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,
+        parser_config *pconfig);
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_CONVERSIONS_H_ */
diff --git a/numpy/core/src/multiarray/textreading/field_types.c b/numpy/core/src/multiarray/textreading/field_types.c
new file mode 100644
index 000000000..0722efd57
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/field_types.c
@@ -0,0 +1,201 @@
+#include "field_types.h"
+#include "conversions.h"
+#include "str_to_int.h"
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+#include "alloc.h"
+
+#include "textreading/growth.h"
+
+
+NPY_NO_EXPORT void
+field_types_xclear(int num_field_types, field_type *ft) {
+    assert(num_field_types >= 0);
+    if (ft == NULL) {
+        return;
+    }
+    for (int i = 0; i < num_field_types; i++) {
+        Py_XDECREF(ft[i].descr);
+        ft[i].descr = NULL;
+    }
+    PyMem_Free(ft);
+}
+
+
+/*
+ * Fetch custom converters for the builtin NumPy DTypes (or the generic one).
+ * Structured DTypes get unpacked and `object` uses the generic method.
+ *
+ * TODO: This should probably be moved on the DType object in some form,
+ *       to allow user DTypes to define their own converters.
+ */
+static set_from_ucs4_function *
+get_from_ucs4_function(PyArray_Descr *descr)
+{
+    if (descr->type_num == NPY_BOOL) {
+        return &to_bool;
+    }
+    else if (PyDataType_ISSIGNED(descr)) {
+        switch (descr->elsize) {
+            case 1:
+                return &to_int8;
+            case 2:
+                return &to_int16;
+            case 4:
+                return &to_int32;
+            case 8:
+                return &to_int64;
+            default:
+                assert(0);
+        }
+    }
+    else if (PyDataType_ISUNSIGNED(descr)) {
+        switch (descr->elsize) {
+            case 1:
+                return &to_uint8;
+            case 2:
+                return &to_uint16;
+            case 4:
+                return &to_uint32;
+            case 8:
+                return &to_uint64;
+            default:
+                assert(0);
+        }
+    }
+    else if (descr->type_num == NPY_FLOAT) {
+        return &to_float;
+    }
+    else if (descr->type_num == NPY_DOUBLE) {
+        return &to_double;
+    }
+    else if (descr->type_num == NPY_CFLOAT) {
+        return &to_cfloat;
+    }
+    else if (descr->type_num == NPY_CDOUBLE) {
+        return &to_cdouble;
+    }
+    else if (descr->type_num == NPY_STRING) {
+        return &to_string;
+    }
+    else if (descr->type_num == NPY_UNICODE) {
+        return &to_unicode;
+    }
+    return &to_generic;
+}
+
+
+/*
+ * Note that the function cleans up `ft` on error.  If `num_field_types < 0`
+ * cleanup has already happened in the internal call.
+ */
+static npy_intp
+field_type_grow_recursive(PyArray_Descr *descr,
+        npy_intp num_field_types, field_type **ft, npy_intp *ft_size,
+        npy_intp field_offset)
+{
+    if (PyDataType_HASSUBARRAY(descr)) {
+        PyArray_Dims shape = {NULL, -1};
+
+        if (!(PyArray_IntpConverter(descr->subarray->shape, &shape))) {
+             PyErr_SetString(PyExc_ValueError, "invalid subarray shape");
+             field_types_xclear(num_field_types, *ft);
+             return -1;
+        }
+        npy_intp size = PyArray_MultiplyList(shape.ptr, shape.len);
+        npy_free_cache_dim_obj(shape);
+        for (npy_intp i = 0; i < size; i++) {
+            num_field_types = field_type_grow_recursive(descr->subarray->base,
+                    num_field_types, ft, ft_size, field_offset);
+            field_offset += descr->subarray->base->elsize;
+            if (num_field_types < 0) {
+                return -1;
+            }
+        }
+        return num_field_types;
+    }
+    else if (PyDataType_HASFIELDS(descr)) {
+        npy_int num_descr_fields = PyTuple_Size(descr->names);
+        if (num_descr_fields < 0) {
+            field_types_xclear(num_field_types, *ft);
+            return -1;
+        }
+        for (npy_intp i = 0; i < num_descr_fields; i++) {
+            PyObject *key = PyTuple_GET_ITEM(descr->names, i);
+            PyObject *tup = PyObject_GetItem(descr->fields, key);
+            if (tup == NULL) {
+                field_types_xclear(num_field_types, *ft);
+                return -1;
+            }
+            PyArray_Descr *field_descr;
+            PyObject *title;
+            int offset;
+            if (!PyArg_ParseTuple(tup, "Oi|O", &field_descr, &offset, &title)) {
+                Py_DECREF(tup);
+                field_types_xclear(num_field_types, *ft);
+                return -1;
+            }
+            Py_DECREF(tup);
+            num_field_types = field_type_grow_recursive(
+                    field_descr, num_field_types, ft, ft_size,
+                    field_offset + offset);
+            if (num_field_types < 0) {
+                return -1;
+            }
+        }
+        return num_field_types;
+    }
+
+    if (*ft_size <= num_field_types) {
+        npy_intp alloc_size = grow_size_and_multiply(
+                ft_size, 4, sizeof(field_type));
+        if (alloc_size < 0) {
+            field_types_xclear(num_field_types, *ft);
+            return -1;
+        }
+        field_type *new_ft = PyMem_Realloc(*ft, alloc_size);
+        if (new_ft == NULL) {
+            field_types_xclear(num_field_types, *ft);
+            return -1;
+        }
+        *ft = new_ft;
+    }
+
+    Py_INCREF(descr);
+    (*ft)[num_field_types].descr = descr;
+    (*ft)[num_field_types].set_from_ucs4 = get_from_ucs4_function(descr);
+    (*ft)[num_field_types].structured_offset = field_offset;
+
+    return num_field_types + 1;
+}
+
+
+/*
+ * Prepare the "field_types" for the given dtypes/descriptors.  Currently,
+ * we copy the itemsize, but the main thing is that we check for custom
+ * converters.
+ */
+NPY_NO_EXPORT npy_intp
+field_types_create(PyArray_Descr *descr, field_type **ft)
+{
+    if (descr->subarray != NULL) {
+        /*
+         * This could probably be allowed, but NumPy absorbs the dimensions
+         * so it is an awkward corner case that probably never really worked.
+         */
+        PyErr_SetString(PyExc_TypeError,
+                "file reader does not support subarray dtypes.  You can"
+                "put the dtype into a structured one using "
+                "`np.dtype(('name', dtype))` to avoid this limitation.");
+        return -1;
+    }
+
+    npy_intp ft_size = 4;
+    *ft = PyMem_Malloc(ft_size * sizeof(field_type));
+    if (*ft == NULL) {
+        return -1;
+    }
+    return field_type_grow_recursive(descr, 0, ft, &ft_size, 0);
+}
diff --git a/numpy/core/src/multiarray/textreading/field_types.h b/numpy/core/src/multiarray/textreading/field_types.h
new file mode 100644
index 000000000..f26e00a5e
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/field_types.h
@@ -0,0 +1,67 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_FIELD_TYPES_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_FIELD_TYPES_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/parser_config.h"
+
+/**
+ * Function defining the conversion for each value.
+ *
+ * This function must support unaligned memory access.  As of now, there is
+ * no special error handling (in whatever form):  We assume that it is always
+ * reasonable to raise a `ValueError` noting the string that failed to be
+ * converted.
+ *
+ * NOTE: An earlier version of the code had unused default values (pandas
+ *       does this) when columns are missing.  We could define this either
+ *       by passing `NULL` in, or by adding a default explicitly somewhere.
+ *       (I think users should probably have to define the default, at which
+ *       point it doesn't matter here.)
+ *
+ * NOTE: We are currently passing the parser config, this could be made public
+ *       or could be set up to be dtype specific/private.  Always passing
+ *       pconfig fully seems easier right now even if it may change.
+ *       (A future use-case may for example be user-specified strings that are
+ *       considered boolean True or False).
+ *
+ * TODO: Aside from nailing down the above notes, it may be nice to expose
+ *       these function publically.  This could allow user DTypes to provide
+ *       a converter or custom converters written in C rather than Python.
+ *
+ * @param descr The NumPy descriptor of the field (may be byte-swapped, etc.)
+ * @param str Pointer to the beginning of the UCS4 string to be parsed.
+ * @param end Pointer to the end of the UCS4 string.  This value is currently
+ *            guaranteed to be `\0`, ensuring that parsers can rely on
+ *            nul-termination.
+ * @param dataptr The pointer where to store the parsed value
+ * @param pconfig Additional configuration for the parser.
+ * @returns 0 on success and -1 on failure.  If the return value is -1 an
+ *          error may or may not be set.  If an error is set, it is chained
+ *          behind the generic ValueError.
+ */
+typedef int (set_from_ucs4_function)(
+        PyArray_Descr *descr, const Py_UCS4 *str, const Py_UCS4 *end,
+        char *dataptr, parser_config *pconfig);
+
+typedef struct _field_type {
+    set_from_ucs4_function *set_from_ucs4;
+    /* The original NumPy descriptor */
+    PyArray_Descr *descr;
+    /* Offset to this entry within row. */
+    npy_intp structured_offset;
+} field_type;
+
+
+NPY_NO_EXPORT void
+field_types_xclear(int num_field_types, field_type *ft);
+
+NPY_NO_EXPORT npy_intp
+field_types_create(PyArray_Descr *descr, field_type **ft);
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_FIELD_TYPES_H_ */
diff --git a/numpy/core/src/multiarray/textreading/growth.c b/numpy/core/src/multiarray/textreading/growth.c
new file mode 100644
index 000000000..49a09d572
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/growth.c
@@ -0,0 +1,47 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "templ_common.h"
+
+/*
+ * Helper function taking the size input and growing it (based on min_grow).
+ * The current scheme is a minimum growth and a general growth by 25%
+ * overallocation.  This is then capped at 2**20 elements, as that propels us
+ * in the range of large page sizes (so it is presumably more than enough).
+ *
+ * It further multiplies it with `itemsize` and ensures that all results fit
+ * into an `npy_intp`.
+ * Returns -1 if any overflow occurred or the result would not fit.
+ * The user has to ensure the input is ssize_t but not negative.
+ */
+NPY_NO_EXPORT npy_intp
+grow_size_and_multiply(npy_intp *size, npy_intp min_grow, npy_intp itemsize) {
+    /* min_grow must be a power of two: */
+    assert((min_grow & (min_grow - 1)) == 0);
+    npy_uintp new_size = (npy_uintp)*size;
+    npy_intp growth = *size >> 2;
+    if (growth <= min_grow) {
+        /* can never lead to overflow if we are using min_growth */
+        new_size += min_grow;
+    }
+    else {
+        if (growth > 1 << 20) {
+            /* limit growth to order of MiB (even hugepages are not larger) */
+            growth = 1 << 20;
+        }
+        new_size += growth + min_grow - 1;
+        new_size &= ~min_grow;
+
+        if (new_size > NPY_MAX_INTP) {
+            return -1;
+        }
+    }
+    *size = (npy_intp)new_size;
+    npy_intp alloc_size;
+    if (npy_mul_with_overflow_intp(&alloc_size, (npy_intp)new_size, itemsize)) {
+        return -1;
+    }
+    return alloc_size;
+}
+
diff --git a/numpy/core/src/multiarray/textreading/growth.h b/numpy/core/src/multiarray/textreading/growth.h
new file mode 100644
index 000000000..237b77ad3
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/growth.h
@@ -0,0 +1,7 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_GROWTH_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_GROWTH_H_
+
+NPY_NO_EXPORT npy_intp
+grow_size_and_multiply(npy_intp *size, npy_intp min_grow, npy_intp itemsize);
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_GROWTH_H_ */
diff --git a/numpy/core/src/multiarray/textreading/parser_config.h b/numpy/core/src/multiarray/textreading/parser_config.h
new file mode 100644
index 000000000..00e911667
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/parser_config.h
@@ -0,0 +1,61 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_PARSER_CONFIG_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_PARSER_CONFIG_H_
+
+#include <stdbool.h>
+
+typedef struct {
+    /*
+     *  Field delimiter character.
+     *  Typically ',', ' ', '\t', ignored if `delimiter_is_whitespace` is true.
+     */
+    Py_UCS4 delimiter;
+
+    /*
+     *  Character used to quote fields.
+     *  Typically '"' or "'".  To disable quoting we set this to UINT_MAX
+     *  (which is not a valid unicode character and thus cannot occur in the
+     *  file; the same is used for all other characters if necessary).
+     */
+    Py_UCS4 quote;
+
+    /*
+     *  Character(s) that indicates the start of a comment.
+     *  Typically '#', '%' or ';'.
+     *  When encountered in a line and not inside quotes, all character
+     *  from the comment character(s) to the end of the line are ignored.
+     */
+    Py_UCS4 comment;
+
+    /*
+     *  Ignore whitespace at the beginning of a field (outside/before quotes).
+     *  Is (and must be) set if `delimiter_is_whitespace`.
+     */
+    bool ignore_leading_whitespace;
+
+    /*
+     * If true, the delimiter is ignored and any unicode whitespace is used
+     * for splitting (same as `string.split()` in Python). In that case
+     * `ignore_leading_whitespace` should also be set.
+     */
+    bool delimiter_is_whitespace;
+
+    /*
+     *  The imaginary unit character. Default is `j`.
+     */
+    Py_UCS4 imaginary_unit;
+
+     /*
+      * Data should be encoded as `latin1` when using python converter
+      * (implementing `loadtxt` default Python 2 compatibility mode).
+      * The c byte converter is used when the user requested `dtype="S"`.
+      * In this case we go via `dtype=object`, however, loadtxt allows latin1
+      * while normal object to string casts only accept ASCII, so it ensures
+      * that that the object array already contains bytes and not strings.
+      */
+     bool python_byte_converters;
+     bool c_byte_converters;
+} parser_config;
+
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_PARSER_CONFIG_H_ */
diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
new file mode 100644
index 000000000..7af5ee891
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -0,0 +1,312 @@
+#include <stdio.h>
+#include <stdbool.h>
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "npy_argparse.h"
+#include "common.h"
+#include "conversion_utils.h"
+
+#include "textreading/parser_config.h"
+#include "textreading/stream_pyobject.h"
+#include "textreading/field_types.h"
+#include "textreading/rows.h"
+#include "textreading/str_to_int.h"
+
+
+//
+// `usecols` must point to a Python object that is Py_None or a 1-d contiguous
+// numpy array with data type int32.
+//
+// `dtype` must point to a Python object that is Py_None or a numpy dtype
+// instance.  If the latter, code and sizes must be arrays of length
+// num_dtype_fields, holding the flattened data field type codes and byte
+// sizes. (num_dtype_fields, codes, and sizes can be inferred from dtype,
+// but we do that in Python code.)
+//
+// If both `usecols` and `dtype` are not None, and the data type is compound,
+// then len(usecols) must equal num_dtype_fields.
+//
+// If `dtype` is given and it is compound, and `usecols` is None, then the
+// number of columns in the file must match the number of fields in `dtype`.
+//
+static PyObject *
+_readtext_from_stream(stream *s,
+        parser_config *pc, Py_ssize_t num_usecols, Py_ssize_t usecols[],
+        Py_ssize_t skiplines, Py_ssize_t max_rows,
+        PyObject *converters, PyObject *dtype)
+{
+    PyArrayObject *arr = NULL;
+    PyArray_Descr *out_dtype = NULL;
+    field_type *ft = NULL;
+
+    /*
+     * If dtypes[0] is dtype the input was not structured and the result
+     * is considered "homogeneous" and we have to discover the number of
+     * columns/
+     */
+    out_dtype = (PyArray_Descr *)dtype;
+    Py_INCREF(out_dtype);
+
+    Py_ssize_t num_fields = field_types_create(out_dtype, &ft);
+    if (num_fields < 0) {
+        goto finish;
+    }
+    bool homogeneous = num_fields == 1 && ft[0].descr == out_dtype;
+
+    if (!homogeneous && usecols != NULL && num_usecols != num_fields) {
+        PyErr_Format(PyExc_TypeError,
+                "If a structured dtype is used, the number of columns in "
+                "`usecols` must match the effective number of fields. "
+                "But %zd usecols were given and the number of fields is %zd.",
+                num_usecols, num_fields);
+        goto finish;
+    }
+
+    arr = read_rows(
+            s, max_rows, num_fields, ft, pc,
+            num_usecols, usecols, skiplines, converters,
+            NULL, out_dtype, homogeneous);
+    if (arr == NULL) {
+        goto finish;
+    }
+
+  finish:
+    Py_XDECREF(out_dtype);
+    field_types_xclear(num_fields, ft);
+    return (PyObject *)arr;
+}
+
+
+static int
+parse_control_character(PyObject *obj, Py_UCS4 *character)
+{
+    if (obj == Py_None) {
+        *character = (Py_UCS4)-1;  /* character beyond unicode range */
+        return 1;
+    }
+    if (!PyUnicode_Check(obj) || PyUnicode_GetLength(obj) != 1) {
+        PyErr_Format(PyExc_TypeError,
+                "Text reading control character must be a single unicode "
+                "character or None; but got: %.100R", obj);
+        return 0;
+    }
+    *character = PyUnicode_READ_CHAR(obj, 0);
+    return 1;
+}
+
+
+/*
+ * A (somewhat verbose) check that none of the control characters match or are
+ * newline.  Most of these combinations are completely fine, just weird or
+ * surprising.
+ * (I.e. there is an implicit priority for control characters, so if a comment
+ * matches a delimiter, it would just be a comment.)
+ * In theory some `delimiter=None` paths could have a "meaning", but let us
+ * assume that users are better of setting one of the control chars to `None`
+ * for clarity.
+ *
+ * This also checks that the control characters cannot be newlines.
+ */
+static int
+error_if_matching_control_characters(
+        Py_UCS4 delimiter, Py_UCS4 quote, Py_UCS4 comment)
+{
+    char *control_char1;
+    char *control_char2 = NULL;
+    if (comment != (Py_UCS4)-1) {
+        control_char1 = "comment";
+        if (comment == '\r' || comment == '\n') {
+            goto error;
+        }
+        else if (comment == quote) {
+            control_char2 = "quotechar";
+            goto error;
+        }
+        else if (comment == delimiter) {
+            control_char2 = "delimiter";
+            goto error;
+        }
+    }
+    if (quote != (Py_UCS4)-1) {
+        control_char1 = "quotechar";
+        if (quote == '\r' || quote == '\n') {
+            goto error;
+        }
+        else if (quote == delimiter) {
+            control_char2 = "delimiter";
+            goto error;
+        }
+    }
+    if (delimiter != (Py_UCS4)-1) {
+        control_char1 = "delimiter";
+        if (delimiter == '\r' || delimiter == '\n') {
+            goto error;
+        }
+    }
+    /* The above doesn't work with delimiter=None, which means "whitespace" */
+    if (delimiter == (Py_UCS4)-1) {
+        control_char1 = "delimiter";
+        if (Py_UNICODE_ISSPACE(comment)) {
+            control_char2 = "comment";
+            goto error;
+        }
+        else if (Py_UNICODE_ISSPACE(quote)) {
+            control_char2 = "quotechar";
+            goto error;
+        }
+    }
+    return 0;
+
+  error:
+    if (control_char2 != NULL) {
+        PyErr_Format(PyExc_TypeError,
+                "The values for control characters '%s' and '%s' are "
+                "incompatible",
+                control_char1, control_char2);
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                "control character '%s' cannot be a newline (`\\r` or `\\n`).",
+                control_char1, control_char2);
+    }
+    return -1;
+}
+
+
+NPY_NO_EXPORT PyObject *
+_load_from_filelike(PyObject *NPY_UNUSED(mod),
+        PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
+{
+    PyObject *file;
+    Py_ssize_t skiplines = 0;
+    Py_ssize_t max_rows = -1;
+    PyObject *usecols_obj = Py_None;
+    PyObject *converters = Py_None;
+
+    PyObject *dtype = Py_None;
+    PyObject *encoding_obj = Py_None;
+    const char *encoding = NULL;
+
+    parser_config pc = {
+        .delimiter = ',',
+        .comment = '#',
+        .quote = '"',
+        .imaginary_unit = 'j',
+        .delimiter_is_whitespace = false,
+        .ignore_leading_whitespace = false,
+        .python_byte_converters = false,
+        .c_byte_converters = false,
+    };
+    bool filelike = true;
+
+    PyObject *arr = NULL;
+
+    NPY_PREPARE_ARGPARSER;
+    if (npy_parse_arguments("_load_from_filelike", args, len_args, kwnames,
+            "file", NULL, &file,
+            "|delimiter", &parse_control_character, &pc.delimiter,
+            "|comment", &parse_control_character, &pc.comment,
+            "|quote", &parse_control_character, &pc.quote,
+            "|imaginary_unit", &parse_control_character, &pc.imaginary_unit,
+            "|usecols", NULL, &usecols_obj,
+            "|skiplines", &PyArray_IntpFromPyIntConverter, &skiplines,
+            "|max_rows", &PyArray_IntpFromPyIntConverter, &max_rows,
+            "|converters", NULL, &converters,
+            "|dtype", NULL, &dtype,
+            "|encoding", NULL, &encoding_obj,
+            "|filelike", &PyArray_BoolConverter, &filelike,
+            "|byte_converters", &PyArray_BoolConverter, &pc.python_byte_converters,
+            "|c_byte_converters", PyArray_BoolConverter, &pc.c_byte_converters,
+            NULL, NULL, NULL) < 0) {
+        return NULL;
+    }
+
+    /* Reject matching control characters, they just rarely make sense anyway */
+    if (error_if_matching_control_characters(
+            pc.delimiter, pc.quote, pc.comment) < 0) {
+        return NULL;
+    }
+
+    if (pc.delimiter == (Py_UCS4)-1) {
+        pc.delimiter_is_whitespace = true;
+        /* Ignore leading whitespace to match `string.split(None)` */
+        pc.ignore_leading_whitespace = true;
+    }
+
+    if (!PyArray_DescrCheck(dtype) ) {
+        PyErr_SetString(PyExc_TypeError,
+                "internal error: dtype must be provided and be a NumPy dtype");
+        return NULL;
+    }
+
+    if (encoding_obj != Py_None) {
+        if (!PyUnicode_Check(encoding_obj)) {
+            PyErr_SetString(PyExc_TypeError,
+                    "encoding must be a unicode string.");
+            return NULL;
+        }
+        encoding = PyUnicode_AsUTF8(encoding_obj);
+        if (encoding == NULL) {
+            return NULL;
+        }
+    }
+
+    /*
+     * Parse usecols, the rest of NumPy has no clear helper for this, so do
+     * it here manually.
+     */
+    Py_ssize_t num_usecols = -1;
+    Py_ssize_t *usecols = NULL;
+    if (usecols_obj != Py_None) {
+        num_usecols = PySequence_Length(usecols_obj);
+        if (num_usecols < 0) {
+            return NULL;
+        }
+        /* Calloc just to not worry about overflow */
+        usecols = PyMem_Calloc(num_usecols, sizeof(Py_ssize_t));
+        for (Py_ssize_t i = 0; i < num_usecols; i++) {
+            PyObject *tmp = PySequence_GetItem(usecols_obj, i);
+            if (tmp == NULL) {
+                PyMem_FREE(usecols);
+                return NULL;
+            }
+            usecols[i] = PyNumber_AsSsize_t(tmp, PyExc_OverflowError);
+            if (error_converting(usecols[i])) {
+                if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+                    PyErr_Format(PyExc_TypeError,
+                            "usecols must be an int or a sequence of ints but "
+                            "it contains at least one element of type '%s'",
+                            Py_TYPE(tmp)->tp_name);
+                }
+                Py_DECREF(tmp);
+                PyMem_FREE(usecols);
+                return NULL;
+            }
+            Py_DECREF(tmp);
+        }
+    }
+
+    stream *s;
+    if (filelike) {
+        s = stream_python_file(file, encoding);
+    }
+    else {
+        s = stream_python_iterable(file, encoding);
+    }
+    if (s == NULL) {
+        PyMem_FREE(usecols);
+        return NULL;
+    }
+
+    arr = _readtext_from_stream(
+            s, &pc, num_usecols, usecols, skiplines, max_rows, converters, dtype);
+    stream_close(s);
+    PyMem_FREE(usecols);
+    return arr;
+}
+
diff --git a/numpy/core/src/multiarray/textreading/readtext.h b/numpy/core/src/multiarray/textreading/readtext.h
new file mode 100644
index 000000000..5cf48c555
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/readtext.h
@@ -0,0 +1,7 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_READTEXT_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_READTEXT_H_
+
+NPY_NO_EXPORT PyObject *
+_load_from_filelike(PyObject *self, PyObject *args, PyObject *kwargs);
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_READTEXT_H_ */
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
new file mode 100644
index 000000000..e30ff835e
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -0,0 +1,481 @@
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "numpy/npy_3kcompat.h"
+#include "alloc.h"
+
+#include <string.h>
+#include <stdbool.h>
+
+#include "textreading/stream.h"
+#include "textreading/tokenize.h"
+#include "textreading/conversions.h"
+#include "textreading/field_types.h"
+#include "textreading/rows.h"
+#include "textreading/growth.h"
+
+/*
+ * Minimum size to grow the allcoation by (or 25%). The 8KiB means the actual
+ * growths is within `8 KiB <= size < 16 KiB` (depending on the row size).
+ */
+#define MIN_BLOCK_SIZE (1 << 13)
+
+
+
+/*
+ *  Create the array of converter functions from the Python converters.
+ */
+static PyObject **
+create_conv_funcs(
+        PyObject *converters, Py_ssize_t num_fields, const Py_ssize_t *usecols)
+{
+    assert(converters != Py_None);
+
+    PyObject **conv_funcs = PyMem_Calloc(num_fields, sizeof(PyObject *));
+    if (conv_funcs == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    if (PyCallable_Check(converters)) {
+        /* a single converter used for all columns individually */
+        for (Py_ssize_t i = 0; i < num_fields; i++) {
+            Py_INCREF(converters);
+            conv_funcs[i] = converters;
+        }
+        return conv_funcs;
+    }
+    else if (!PyDict_Check(converters)) {
+        PyErr_SetString(PyExc_TypeError,
+                "converters must be a dictionary mapping columns to converter "
+                "functions or a single callable.");
+        goto error;
+    }
+
+    PyObject *key, *value;
+    Py_ssize_t pos = 0;
+    while (PyDict_Next(converters, &pos, &key, &value)) {
+        Py_ssize_t column = PyNumber_AsSsize_t(key, PyExc_IndexError);
+        if (column == -1 && PyErr_Occurred()) {
+            PyErr_Format(PyExc_TypeError,
+                    "keys of the converters dictionary must be integers; "
+                    "got %.100R", key);
+            goto error;
+        }
+        if (usecols != NULL) {
+            /*
+             * This code searches for the corresponding usecol.  It is
+             * identical to the legacy usecols code, which has two weaknesses:
+             * 1. It fails for duplicated usecols only setting converter for
+             *    the first one.
+             * 2. It fails e.g. if usecols uses negative indexing and
+             *    converters does not.  (This is a feature, since it allows
+             *    us to correctly normalize converters to result column here.)
+             */
+            Py_ssize_t i = 0;
+            for (; i < num_fields; i++) {
+                if (column == usecols[i]) {
+                    column = i;
+                    break;
+                }
+            }
+            if (i == num_fields) {
+                continue;  /* ignore unused converter */
+            }
+        }
+        else {
+            if (column < -num_fields || column >= num_fields) {
+                PyErr_Format(PyExc_ValueError,
+                        "converter specified for column %zd, which is invalid "
+                        "for the number of fields %d.", column, num_fields);
+                goto error;
+            }
+            if (column < 0) {
+                column += num_fields;
+            }
+        }
+        if (!PyCallable_Check(value)) {
+            PyErr_Format(PyExc_TypeError,
+                    "values of the converters dictionary must be callable, "
+                    "but the value associated with key %R is not", key);
+            goto error;
+        }
+        Py_INCREF(value);
+        conv_funcs[column] = value;
+    }
+    return conv_funcs;
+
+  error:
+    for (Py_ssize_t i = 0; i < num_fields; i++) {
+        Py_XDECREF(conv_funcs[i]);
+    }
+    PyMem_FREE(conv_funcs);
+    return NULL;
+}
+
+/**
+ * Read a file into the provided array, or create (and possibly grow) an
+ * array to read into.
+ *
+ * @param s The stream object/struct providing reading capabilities used by
+ *        the tokenizer.
+ * @param max_rows The number of rows to read, or -1.  If negative
+ *        all rows are read.
+ * @param num_field_types The number of field types stored in `field_types`.
+ * @param field_types Information about the dtype for each column (or one if
+ *        `homogeneous`).
+ * @param pconfig Pointer to the parser config object used by both the
+ *        tokenizer and the conversion functions.
+ * @param num_usecols The number of columns in `usecols`.
+ * @param usecols An array of length `num_usecols` or NULL.  If given indicates
+ *        which column is read for each individual row (negative columns are
+ *        accepted).
+ * @param skiplines The number of lines to skip, these lines are ignored.
+ * @param converters Python dictionary of converters.  Finalizing converters
+ *        is difficult without information about the number of columns.
+ * @param data_array An array to be filled or NULL.  In either case a new
+ *        reference is returned (the reference to `data_array` is not stolen).
+ * @param out_descr The dtype used for allocating a new array.  This is not
+ *        used if `data_array` is provided.  Note that the actual dtype of the
+ *        returned array can differ for strings.
+ * @param num_cols Pointer in which the actual (discovered) number of columns
+ *        is returned.  This is only relevant if `homogeneous` is true.
+ * @param homogeneous Whether the datatype of the array is not homogeneous,
+ *        i.e. not structured.  In this case the number of columns has to be
+ *        discovered an the returned array will be 2-dimensional rather than
+ *        1-dimensional.
+ *
+ * @returns Returns the result as an array object or NULL on error.  The result
+ *          is always a new reference (even when `data_array` was passed in).
+ */
+NPY_NO_EXPORT PyArrayObject *
+read_rows(stream *s,
+        npy_intp max_rows, Py_ssize_t num_field_types, field_type *field_types,
+        parser_config *pconfig, Py_ssize_t num_usecols, Py_ssize_t *usecols,
+        Py_ssize_t skiplines, PyObject *converters,
+        PyArrayObject *data_array, PyArray_Descr *out_descr,
+        bool homogeneous)
+{
+    char *data_ptr = NULL;
+    Py_ssize_t current_num_fields;
+    npy_intp row_size = out_descr->elsize;
+    PyObject **conv_funcs = NULL;
+
+    bool needs_init = PyDataType_FLAGCHK(out_descr, NPY_NEEDS_INIT);
+
+    int ndim = homogeneous ? 2 : 1;
+    npy_intp result_shape[2] = {0, 1};
+
+    bool data_array_allocated = data_array == NULL;
+    /* Make sure we own `data_array` for the purpose of error handling */
+    Py_XINCREF(data_array);
+    size_t rows_per_block = 1;  /* will be increased depending on row size */
+    npy_intp data_allocated_rows = 0;
+
+    /* We give a warning if max_rows is used and an empty line is encountered */
+    bool give_empty_row_warning = max_rows >= 0;
+
+    int ts_result = 0;
+    tokenizer_state ts;
+    if (tokenizer_init(&ts, pconfig) < 0) {
+        goto error;
+    }
+
+    /* Set the actual number of fields if it is already known, otherwise -1 */
+    Py_ssize_t actual_num_fields = -1;
+    if (usecols != NULL) {
+        assert(homogeneous || num_field_types == num_usecols);
+        actual_num_fields = num_usecols;
+    }
+    else if (!homogeneous) {
+        assert(usecols == NULL || num_field_types == num_usecols);
+        actual_num_fields = num_field_types;
+    }
+
+    for (Py_ssize_t i = 0; i < skiplines; i++) {
+        ts.state = TOKENIZE_GOTO_LINE_END;
+        ts_result = tokenize(s, &ts, pconfig);
+        if (ts_result < 0) {
+            goto error;
+        }
+        else if (ts_result != 0) {
+            /* Fewer lines than skiplines is acceptable */
+            break;
+        }
+    }
+
+    Py_ssize_t row_count = 0;  /* number of rows actually processed */
+    while ((max_rows < 0 || row_count < max_rows) && ts_result == 0) {
+        ts_result = tokenize(s, &ts, pconfig);
+        if (ts_result < 0) {
+            goto error;
+        }
+        current_num_fields = ts.num_fields;
+        field_info *fields = ts.fields;
+        if (NPY_UNLIKELY(ts.num_fields == 0)) {
+            /*
+             * Deprecated NumPy 1.23, 2021-01-13 (not really a deprecation,
+             * but similar policy should apply to removing the warning again)
+             */
+             /* Tokenizer may give a final "empty line" even if there is none */
+            if (give_empty_row_warning && ts_result == 0) {
+                give_empty_row_warning = false;
+                if (PyErr_WarnFormat(PyExc_UserWarning, 3,
+                        "Input line %zd contained no data and will not be "
+                        "counted towards `max_rows=%zd`.  This differs from "
+                        "the behaviour in NumPy <=1.22 which counted lines "
+                        "rather than rows.  If desired, the previous behaviour "
+                        "can be achieved by using `itertools.islice`.\n"
+                        "Please see the 1.23 release notes for an example on "
+                        "how to do this.  If you wish to ignore this warning, "
+                        "use `warnings.filterwarnings`.  This warning is "
+                        "expected to be removed in the future and is given "
+                        "only once per `loadtxt` call.",
+                        row_count + skiplines + 1, max_rows) < 0) {
+                    goto error;
+                }
+            }
+            continue;  /* Ignore empty line */
+        }
+
+        if (NPY_UNLIKELY(data_ptr == NULL)) {
+            // We've deferred some of the initialization tasks to here,
+            // because we've now read the first line, and we definitively
+            // know how many fields (i.e. columns) we will be processing.
+            if (actual_num_fields == -1) {
+                actual_num_fields = current_num_fields;
+            }
+
+            if (converters != Py_None) {
+                conv_funcs = create_conv_funcs(
+                        converters, actual_num_fields, usecols);
+                if (conv_funcs == NULL) {
+                    goto error;
+                }
+            }
+
+            /* Note that result_shape[1] is only used if homogeneous is true */
+            result_shape[1] = actual_num_fields;
+            if (homogeneous) {
+                row_size *= actual_num_fields;
+            }
+
+            if (data_array == NULL) {
+                if (max_rows < 0) {
+                    /*
+                     * Negative max_rows denotes to read the whole file, we
+                     * approach this by allocating ever larger blocks.
+                     * Adds a number of rows based on `MIN_BLOCK_SIZE`.
+                     * Note: later code grows assuming this is a power of two.
+                     */
+                    if (row_size == 0) {
+                        /* actual rows_per_block should not matter here */
+                        rows_per_block = 512;
+                    }
+                    else {
+                        /* safe on overflow since min_rows will be 0 or 1 */
+                        size_t min_rows = (
+                                (MIN_BLOCK_SIZE + row_size - 1) / row_size);
+                        while (rows_per_block < min_rows) {
+                            rows_per_block *= 2;
+                        }
+                    }
+                    data_allocated_rows = rows_per_block;
+                }
+                else {
+                    data_allocated_rows = max_rows;
+                }
+                result_shape[0] = data_allocated_rows;
+                Py_INCREF(out_descr);
+                /*
+                 * We do not use Empty, as it would fill with None
+                 * and requiring decref'ing if we shrink again.
+                 */
+                data_array = (PyArrayObject *)PyArray_SimpleNewFromDescr(
+                        ndim, result_shape, out_descr);
+#ifdef NPY_RELAXED_STRIDES_DEBUG
+                /* Incompatible with NPY_RELAXED_STRIDES_DEBUG due to growing */
+                if (result_shape[0] == 1) {
+                    PyArray_STRIDES(data_array)[0] = row_size;
+                }
+#endif /* NPY_RELAXED_STRIDES_DEBUG */
+                if (data_array == NULL) {
+                    goto error;
+                }
+                if (needs_init) {
+                    memset(PyArray_BYTES(data_array), 0, PyArray_NBYTES(data_array));
+                }
+            }
+            else {
+                assert(max_rows >=0);
+                data_allocated_rows = max_rows;
+            }
+            data_ptr = PyArray_BYTES(data_array);
+        }
+
+        if (!usecols && (actual_num_fields != current_num_fields)) {
+            PyErr_Format(PyExc_ValueError,
+                    "the number of columns changed from %d to %d at row %zu; "
+                    "use `usecols` to select a subset and avoid this error",
+                    actual_num_fields, current_num_fields, row_count+1);
+            goto error;
+        }
+
+        if (NPY_UNLIKELY(data_allocated_rows == row_count)) {
+            /*
+             * Grow by ~25% and rounded up to the next rows_per_block
+             * NOTE: This is based on very crude timings and could be refined!
+             */
+            npy_intp new_rows = data_allocated_rows;
+            npy_intp alloc_size = grow_size_and_multiply(
+                    &new_rows, rows_per_block, row_size);
+            if (alloc_size < 0) {
+                /* should normally error much earlier, but make sure */
+                PyErr_SetString(PyExc_ValueError,
+                        "array is too big. Cannot read file as a single array; "
+                        "providing a maximum number of rows to read may help.");
+                goto error;
+            }
+
+            char *new_data = PyDataMem_UserRENEW(
+                    PyArray_BYTES(data_array), alloc_size ? alloc_size : 1,
+                    PyArray_HANDLER(data_array));
+            if (new_data == NULL) {
+                PyErr_NoMemory();
+                goto error;
+            }
+            /* Replace the arrays data since it may have changed */
+            ((PyArrayObject_fields *)data_array)->data = new_data;
+            ((PyArrayObject_fields *)data_array)->dimensions[0] = new_rows;
+            data_ptr = new_data + row_count * row_size;
+            data_allocated_rows = new_rows;
+            if (needs_init) {
+                memset(data_ptr, '\0', (new_rows - row_count) * row_size);
+            }
+        }
+
+        for (Py_ssize_t i = 0; i < actual_num_fields; ++i) {
+            Py_ssize_t f;  /* The field, either 0 (if homogeneous) or i. */
+            Py_ssize_t col;  /* The column as read, remapped by usecols */
+            char *item_ptr;
+            if (homogeneous) {
+                f = 0;
+                item_ptr = data_ptr + i * field_types[0].descr->elsize;
+            }
+            else {
+                f = i;
+                item_ptr = data_ptr + field_types[f].structured_offset;
+            }
+
+            if (usecols == NULL) {
+                col = i;
+            }
+            else {
+                col = usecols[i];
+                if (col < 0) {
+                    // Python-like column indexing: k = -1 means the last column.
+                    col += current_num_fields;
+                }
+                if (NPY_UNLIKELY((col < 0) || (col >= current_num_fields))) {
+                    PyErr_Format(PyExc_ValueError,
+                            "invalid column index %d at row %zu with %d "
+                            "columns",
+                            usecols[i], current_num_fields, row_count+1);
+                    goto error;
+                }
+            }
+
+            /*
+             * The following function calls represent the main "conversion"
+             * step, i.e. parsing the unicode string for each field and storing
+             * the result in the array.
+             */
+            int parser_res;
+            Py_UCS4 *str = ts.field_buffer + fields[col].offset;
+            Py_UCS4 *end = ts.field_buffer + fields[col + 1].offset - 1;
+            if (conv_funcs == NULL || conv_funcs[i] == NULL) {
+                parser_res = field_types[f].set_from_ucs4(field_types[f].descr,
+                        str, end, item_ptr, pconfig);
+            }
+            else {
+                parser_res = to_generic_with_converter(field_types[f].descr,
+                        str, end, item_ptr, pconfig, conv_funcs[i]);
+            }
+
+            if (NPY_UNLIKELY(parser_res < 0)) {
+                PyObject *exc, *val, *tb;
+                PyErr_Fetch(&exc, &val, &tb);
+
+                size_t length = end - str;
+                PyObject *string = PyUnicode_FromKindAndData(
+                        PyUnicode_4BYTE_KIND, str, length);
+                if (string == NULL) {
+                    npy_PyErr_ChainExceptions(exc, val, tb);
+                    goto error;
+                }
+                PyErr_Format(PyExc_ValueError,
+                        "could not convert string %.100R to %S at "
+                        "row %zu, column %d.",
+                        string, field_types[f].descr, row_count, col+1);
+                Py_DECREF(string);
+                npy_PyErr_ChainExceptionsCause(exc, val, tb);
+                goto error;
+            }
+        }
+
+        ++row_count;
+        data_ptr += row_size;
+    }
+
+    tokenizer_clear(&ts);
+    PyMem_FREE(conv_funcs);
+
+    if (data_array == NULL) {
+        assert(row_count == 0 && result_shape[0] == 0);
+        if (actual_num_fields == -1) {
+            /*
+             * We found no rows and have to discover the number of elements
+             * we have no choice but to guess 1.
+             * NOTE: It may make sense to move this outside of here to refine
+             *       the behaviour where necessary.
+             */
+            result_shape[1] = 1;
+        }
+        else {
+            result_shape[1] = actual_num_fields;
+        }
+        Py_INCREF(out_descr);
+        data_array = (PyArrayObject *)PyArray_Empty(
+                ndim, result_shape, out_descr, 0);
+    }
+
+    /*
+     * Note that if there is no data, `data_array` may still be NULL and
+     * row_count is 0.  In that case, always realloc just in case.
+     */
+    if (data_array_allocated && data_allocated_rows != row_count) {
+        size_t size = row_count * row_size;
+        char *new_data = PyDataMem_UserRENEW(
+                PyArray_BYTES(data_array), size ? size : 1,
+                PyArray_HANDLER(data_array));
+        if (new_data == NULL) {
+            Py_DECREF(data_array);
+            PyErr_NoMemory();
+            return NULL;
+        }
+        ((PyArrayObject_fields *)data_array)->data = new_data;
+        ((PyArrayObject_fields *)data_array)->dimensions[0] = row_count;
+    }
+
+    return data_array;
+
+  error:
+    PyMem_FREE(conv_funcs);
+    tokenizer_clear(&ts);
+    Py_XDECREF(data_array);
+    return NULL;
+}
diff --git a/numpy/core/src/multiarray/textreading/rows.h b/numpy/core/src/multiarray/textreading/rows.h
new file mode 100644
index 000000000..20eb9e186
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/rows.h
@@ -0,0 +1,22 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_ROWS_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_ROWS_H_
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdio.h>
+
+#include "textreading/stream.h"
+#include "textreading/field_types.h"
+#include "textreading/parser_config.h"
+
+
+NPY_NO_EXPORT PyArrayObject *
+read_rows(stream *s,
+        npy_intp nrows, Py_ssize_t num_field_types, field_type *field_types,
+        parser_config *pconfig, Py_ssize_t num_usecols, Py_ssize_t *usecols,
+        Py_ssize_t skiplines, PyObject *converters,
+        PyArrayObject *data_array, PyArray_Descr *out_descr,
+        bool homogeneous);
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_ROWS_H_ */
diff --git a/numpy/core/src/multiarray/textreading/str_to_int.c b/numpy/core/src/multiarray/textreading/str_to_int.c
new file mode 100644
index 000000000..11b03e31c
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/str_to_int.c
@@ -0,0 +1,67 @@
+
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "lowlevel_strided_loops.h"
+
+#include <string.h>
+#include "textreading/str_to_int.h"
+#include "textreading/parser_config.h"
+
+
+#define DECLARE_TO_INT(intw, INT_MIN, INT_MAX, byteswap_unaligned)          \
+    NPY_NO_EXPORT int                                                       \
+    to_##intw(PyArray_Descr *descr,                                         \
+            const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,          \
+            parser_config *pconfig)                                         \
+    {                                                                       \
+        int64_t parsed;                                                     \
+        intw##_t x;                                                         \
+                                                                            \
+        if (str_to_int64(str, end, INT_MIN, INT_MAX, &parsed) < 0) {        \
+            return -1;                                                      \
+        }                                                                   \
+        else {                                                              \
+            x = (intw##_t)parsed;                                           \
+        }                                                                   \
+        memcpy(dataptr, &x, sizeof(x));                                     \
+        if (!PyArray_ISNBO(descr->byteorder)) {                             \
+            byteswap_unaligned(dataptr);                                    \
+        }                                                                   \
+        return 0;                                                           \
+    }
+
+#define DECLARE_TO_UINT(uintw, UINT_MAX, byteswap_unaligned)                \
+    NPY_NO_EXPORT int                                                       \
+    to_##uintw(PyArray_Descr *descr,                                        \
+            const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,          \
+            parser_config *pconfig)                                         \
+    {                                                                       \
+        uint64_t parsed;                                                    \
+        uintw##_t x;                                                        \
+                                                                            \
+        if (str_to_uint64(str, end, UINT_MAX, &parsed) < 0) {               \
+            return -1;                                                      \
+        }                                                                   \
+        else {                                                              \
+            x = (uintw##_t)parsed;                                          \
+        }                                                                   \
+        memcpy(dataptr, &x, sizeof(x));                                     \
+        if (!PyArray_ISNBO(descr->byteorder)) {                             \
+            byteswap_unaligned(dataptr);                                    \
+        }                                                                   \
+        return 0;                                                           \
+    }
+
+#define byteswap_nothing(ptr)
+
+DECLARE_TO_INT(int8, INT8_MIN, INT8_MAX, byteswap_nothing)
+DECLARE_TO_INT(int16, INT16_MIN, INT16_MAX, npy_bswap2_unaligned)
+DECLARE_TO_INT(int32, INT32_MIN, INT32_MAX, npy_bswap4_unaligned)
+DECLARE_TO_INT(int64, INT64_MIN, INT64_MAX, npy_bswap8_unaligned)
+
+DECLARE_TO_UINT(uint8, UINT8_MAX, byteswap_nothing)
+DECLARE_TO_UINT(uint16, UINT16_MAX, npy_bswap2_unaligned)
+DECLARE_TO_UINT(uint32, UINT32_MAX, npy_bswap4_unaligned)
+DECLARE_TO_UINT(uint64, UINT64_MAX, npy_bswap8_unaligned)
diff --git a/numpy/core/src/multiarray/textreading/str_to_int.h b/numpy/core/src/multiarray/textreading/str_to_int.h
new file mode 100644
index 000000000..a0a89a0ef
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/str_to_int.h
@@ -0,0 +1,174 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STR_TO_INT_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STR_TO_INT_H_
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/parser_config.h"
+
+
+/*
+ * The following two string conversion functions are largely equivalent
+ * in Pandas.  They are in the header file here, to ensure they can be easily
+ * inline in the other function.
+ * Unlike pandas, pass in end-pointer (do not rely on \0) and return 0 or -1.
+ *
+ * The actual functions are defined using macro templating below.
+ */
+NPY_FINLINE int
+str_to_int64(
+        const Py_UCS4 *p_item, const Py_UCS4 *p_end,
+        int64_t int_min, int64_t int_max, int64_t *result)
+{
+    const Py_UCS4 *p = (const Py_UCS4 *)p_item;
+    bool isneg = 0;
+    int64_t number = 0;
+
+    // Skip leading spaces.
+    while (Py_UNICODE_ISSPACE(*p)) {
+        ++p;
+    }
+
+    // Handle sign.
+    if (*p == '-') {
+        isneg = true;
+        ++p;
+    }
+    else if (*p == '+') {
+        p++;
+    }
+
+    // Check that there is a first digit.
+    if (!isdigit(*p)) {
+        return -1;
+    }
+
+    if (isneg) {
+        // If number is greater than pre_min, at least one more digit
+        // can be processed without overflowing.
+        int dig_pre_min = -(int_min % 10);
+        int64_t pre_min = int_min / 10;
+
+        // Process the digits.
+        int d = *p;
+        while (isdigit(d)) {
+            if ((number > pre_min) || ((number == pre_min) && (d - '0' <= dig_pre_min))) {
+                number = number * 10 - (d - '0');
+                d = *++p;
+            }
+            else {
+                return -1;
+            }
+        }
+    }
+    else {
+        // If number is less than pre_max, at least one more digit
+        // can be processed without overflowing.
+        int64_t pre_max = int_max / 10;
+        int dig_pre_max = int_max % 10;
+
+        // Process the digits.
+        int d = *p;
+        while (isdigit(d)) {
+            if ((number < pre_max) || ((number == pre_max) && (d - '0' <= dig_pre_max))) {
+                number = number * 10 + (d - '0');
+                d = *++p;
+            }
+            else {
+                return -1;
+            }
+        }
+    }
+
+    // Skip trailing spaces.
+    while (Py_UNICODE_ISSPACE(*p)) {
+        ++p;
+    }
+
+    // Did we use up all the characters?
+    if (p != p_end) {
+        return -1;
+    }
+
+    *result = number;
+    return 0;
+}
+
+
+NPY_FINLINE int
+str_to_uint64(
+        const Py_UCS4 *p_item, const Py_UCS4 *p_end,
+        uint64_t uint_max, uint64_t *result)
+{
+    const Py_UCS4 *p = (const Py_UCS4 *)p_item;
+    uint64_t number = 0;
+    int d;
+
+    // Skip leading spaces.
+    while (Py_UNICODE_ISSPACE(*p)) {
+        ++p;
+    }
+
+    // Handle sign.
+    if (*p == '-') {
+        return -1;
+    }
+    if (*p == '+') {
+        p++;
+    }
+
+    // Check that there is a first digit.
+    if (!isdigit(*p)) {
+        return -1;
+    }
+
+    // If number is less than pre_max, at least one more digit
+    // can be processed without overflowing.
+    uint64_t pre_max = uint_max / 10;
+    int dig_pre_max = uint_max % 10;
+
+    // Process the digits.
+    d = *p;
+    while (isdigit(d)) {
+        if ((number < pre_max) || ((number == pre_max) && (d - '0' <= dig_pre_max))) {
+            number = number * 10 + (d - '0');
+            d = *++p;
+        }
+        else {
+            return -1;
+        }
+    }
+
+    // Skip trailing spaces.
+    while (Py_UNICODE_ISSPACE(*p)) {
+        ++p;
+    }
+
+    // Did we use up all the characters?
+    if (p != p_end) {
+        return -1;
+    }
+
+    *result = number;
+    return 0;
+}
+
+
+#define DECLARE_TO_INT_PROTOTYPE(intw)                                  \
+    NPY_NO_EXPORT int                                                   \
+    to_##intw(PyArray_Descr *descr,                                     \
+            const Py_UCS4 *str, const Py_UCS4 *end, char *dataptr,      \
+            parser_config *pconfig);
+
+DECLARE_TO_INT_PROTOTYPE(int8)
+DECLARE_TO_INT_PROTOTYPE(int16)
+DECLARE_TO_INT_PROTOTYPE(int32)
+DECLARE_TO_INT_PROTOTYPE(int64)
+
+DECLARE_TO_INT_PROTOTYPE(uint8)
+DECLARE_TO_INT_PROTOTYPE(uint16)
+DECLARE_TO_INT_PROTOTYPE(uint32)
+DECLARE_TO_INT_PROTOTYPE(uint64)
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STR_TO_INT_H_ */
diff --git a/numpy/core/src/multiarray/textreading/stream.h b/numpy/core/src/multiarray/textreading/stream.h
new file mode 100644
index 000000000..59bd14074
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/stream.h
@@ -0,0 +1,41 @@
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_H_
+
+#include <stdint.h>
+
+/*
+ * When getting the next line, we hope that the buffer provider can already
+ * give some information about the newlines, because for Python iterables
+ * we definitely expect to get line-by-line buffers.
+ *
+ * BUFFER_IS_FILEEND must be returned when the end of the file is reached and
+ * must NOT be returned together with a valid (non-empty) buffer.
+ */
+#define BUFFER_MAY_CONTAIN_NEWLINE 0
+#define BUFFER_IS_LINEND 1
+#define BUFFER_IS_FILEEND 2
+
+/*
+ * Base struct for streams.  We currently have two, a chunked reader for
+ * filelikes and a line-by-line for any iterable.
+ * As of writing, the chunked reader was only used for filelikes not already
+ * opened.  That is to preserve the amount read in case of an error exactly.
+ * If we drop this, we could read it more often (but not when `max_rows` is
+ * used).
+ *
+ * The "streams" can extend this struct to store their own data (so it is
+ * a very lightweight "object").
+ */
+typedef struct _stream {
+    int (*stream_nextbuf)(void *sdata, char **start, char **end, int *kind);
+    // Note that the first argument to stream_close is the stream pointer
+    // itself, not the stream_data pointer.
+    int (*stream_close)(struct _stream *strm);
+} stream;
+
+
+#define stream_nextbuf(s, start, end, kind)  \
+        ((s)->stream_nextbuf((s), start, end, kind))
+#define stream_close(s)    ((s)->stream_close((s)))
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_H_ */
diff --git a/numpy/core/src/multiarray/textreading/stream_pyobject.c b/numpy/core/src/multiarray/textreading/stream_pyobject.c
new file mode 100644
index 000000000..6f84ff01d
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/stream_pyobject.c
@@ -0,0 +1,239 @@
+/*
+ * C side structures to provide capabilities to read Python file like objects
+ * in chunks, or iterate through iterables with each result representing a
+ * single line of a file.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+
+#include "textreading/stream.h"
+
+#define READ_CHUNKSIZE 1 << 14
+
+
+typedef struct {
+    stream stream;
+    /* The Python file object being read. */
+    PyObject *file;
+
+    /* The `read` attribute of the file object. */
+    PyObject *read;
+    /* Amount to read each time we call `obj.read()` */
+    PyObject *chunksize;
+
+    /* Python str object holding the line most recently read from the file. */
+    PyObject *chunk;
+
+    /* Encoding compatible with Python's `PyUnicode_Encode` (may be NULL) */
+    const char *encoding;
+} python_chunks_from_file;
+
+
+/*
+ * Helper function to support byte objects as well as unicode strings.
+ *
+ * NOTE: Steals a reference to `str` (although usually returns it unmodified).
+ */
+static NPY_INLINE PyObject *
+process_stringlike(PyObject *str, const char *encoding)
+{
+    if (PyBytes_Check(str)) {
+        PyObject *ustr;
+        ustr = PyUnicode_FromEncodedObject(str, encoding, NULL);
+        if (ustr == NULL) {
+            return NULL;
+        }
+        Py_DECREF(str);
+        return ustr;
+    }
+    else if (!PyUnicode_Check(str)) {
+        PyErr_SetString(PyExc_TypeError,
+                "non-string returned while reading data");
+        Py_DECREF(str);
+        return NULL;
+    }
+    return str;
+}
+
+
+static NPY_INLINE void
+buffer_info_from_unicode(PyObject *str, char **start, char **end, int *kind)
+{
+    Py_ssize_t length = PyUnicode_GET_LENGTH(str);
+    *kind = PyUnicode_KIND(str);
+
+    if (*kind == PyUnicode_1BYTE_KIND) {
+        *start = (char *)PyUnicode_1BYTE_DATA(str);
+    }
+    else if (*kind == PyUnicode_2BYTE_KIND) {
+        *start = (char *)PyUnicode_2BYTE_DATA(str);
+        length *= sizeof(Py_UCS2);
+    }
+    else if (*kind == PyUnicode_4BYTE_KIND) {
+        *start = (char *)PyUnicode_4BYTE_DATA(str);
+        length *= sizeof(Py_UCS4);
+    }
+    *end = *start + length;
+}
+
+
+static int
+fb_nextbuf(python_chunks_from_file *fb, char **start, char **end, int *kind)
+{
+    Py_XDECREF(fb->chunk);
+    fb->chunk = NULL;
+
+    PyObject *chunk = PyObject_CallFunctionObjArgs(fb->read, fb->chunksize, NULL);
+    if (chunk == NULL) {
+        return -1;
+    }
+    fb->chunk = process_stringlike(chunk, fb->encoding);
+    if (fb->chunk == NULL) {
+        return -1;
+    }
+    buffer_info_from_unicode(fb->chunk, start, end, kind);
+    if (*start == *end) {
+        return BUFFER_IS_FILEEND;
+    }
+    return BUFFER_MAY_CONTAIN_NEWLINE;
+}
+
+
+static int
+fb_del(stream *strm)
+{
+    python_chunks_from_file *fb = (python_chunks_from_file *)strm;
+
+    Py_XDECREF(fb->file);
+    Py_XDECREF(fb->read);
+    Py_XDECREF(fb->chunksize);
+    Py_XDECREF(fb->chunk);
+
+    PyMem_FREE(strm);
+
+    return 0;
+}
+
+
+NPY_NO_EXPORT stream *
+stream_python_file(PyObject *obj, const char *encoding)
+{
+    python_chunks_from_file *fb;
+
+    fb = (python_chunks_from_file *)PyMem_Calloc(1, sizeof(python_chunks_from_file));
+    if (fb == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    fb->stream.stream_nextbuf = (void *)&fb_nextbuf;
+    fb->stream.stream_close = &fb_del;
+
+    fb->encoding = encoding;
+    Py_INCREF(obj);
+    fb->file = obj;
+
+    fb->read = PyObject_GetAttrString(obj, "read");
+    if (fb->read == NULL) {
+        goto fail;
+    }
+    fb->chunksize = PyLong_FromLong(READ_CHUNKSIZE);
+    if (fb->chunksize == NULL) {
+        goto fail;
+    }
+
+    return (stream *)fb;
+
+fail:
+    fb_del((stream *)fb);
+    return NULL;
+}
+
+
+/*
+ * Stream from a Python iterable by interpreting each item as a line in a file
+ */
+typedef struct {
+    stream stream;
+    /* The Python file object being read. */
+    PyObject *iterator;
+
+    /* Python str object holding the line most recently fetched */
+    PyObject *line;
+
+    /* Encoding compatible with Python's `PyUnicode_Encode` (may be NULL) */
+    const char *encoding;
+} python_lines_from_iterator;
+
+
+static int
+it_del(stream *strm)
+{
+    python_lines_from_iterator *it = (python_lines_from_iterator *)strm;
+
+    Py_XDECREF(it->iterator);
+    Py_XDECREF(it->line);
+
+    PyMem_FREE(strm);
+    return 0;
+}
+
+
+static int
+it_nextbuf(python_lines_from_iterator *it, char **start, char **end, int *kind)
+{
+    Py_XDECREF(it->line);
+    it->line = NULL;
+
+    PyObject *line = PyIter_Next(it->iterator);
+    if (line == NULL) {
+        if (PyErr_Occurred()) {
+            return -1;
+        }
+        *start = NULL;
+        *end = NULL;
+        return BUFFER_IS_FILEEND;
+    }
+    it->line = process_stringlike(line, it->encoding);
+    if (it->line == NULL) {
+        return -1;
+    }
+
+    buffer_info_from_unicode(it->line, start, end, kind);
+    return BUFFER_IS_LINEND;
+}
+
+
+NPY_NO_EXPORT stream *
+stream_python_iterable(PyObject *obj, const char *encoding)
+{
+    python_lines_from_iterator *it;
+
+    if (!PyIter_Check(obj)) {
+        PyErr_SetString(PyExc_TypeError,
+                "error reading from object, expected an iterable.");
+        return NULL;
+    }
+
+    it = (python_lines_from_iterator *)PyMem_Calloc(1, sizeof(*it));
+    if (it == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    it->stream.stream_nextbuf = (void *)&it_nextbuf;
+    it->stream.stream_close = &it_del;
+
+    it->encoding = encoding;
+    Py_INCREF(obj);
+    it->iterator = obj;
+
+    return (stream *)it;
+}
diff --git a/numpy/core/src/multiarray/textreading/stream_pyobject.h b/numpy/core/src/multiarray/textreading/stream_pyobject.h
new file mode 100644
index 000000000..45c11dd95
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/stream_pyobject.h
@@ -0,0 +1,16 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_PYOBJECT_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_PYOBJECT_H_
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include "textreading/stream.h"
+
+NPY_NO_EXPORT stream *
+stream_python_file(PyObject *obj, const char *encoding);
+
+NPY_NO_EXPORT stream *
+stream_python_iterable(PyObject *obj, const char *encoding);
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_PYOBJECT_H_ */
diff --git a/numpy/core/src/multiarray/textreading/tokenize.c.src b/numpy/core/src/multiarray/textreading/tokenize.c.src
new file mode 100644
index 000000000..6ddba3345
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/tokenize.c.src
@@ -0,0 +1,457 @@
+
+#include <Python.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/stream.h"
+#include "textreading/tokenize.h"
+#include "textreading/parser_config.h"
+#include "textreading/growth.h"
+
+
+/*
+    How parsing quoted fields works:
+
+    For quoting to be activated, the first character of the field
+    must be the quote character (after taking into account
+    ignore_leading_spaces).  While quoting is active, delimiters
+    are treated as regular characters, not delimiters.  Quoting is
+    deactivated by the second occurrence of the quote character.  An
+    exception is the occurrence of two consecutive quote characters,
+    which is treated as a literal occurrence of a single quote character.
+    E.g. (with delimiter=',' and quote='"'):
+        12.3,"New York, NY","3'2"""
+    The second and third fields are `New York, NY` and `3'2"`.
+
+    If a non-delimiter occurs after the closing quote, the quote is
+    ignored and parsing continues with quoting deactivated.  Quotes
+    that occur while quoting is not activated are not handled specially;
+    they become part of the data.
+    E.g:
+        12.3,"ABC"DEF,XY"Z
+    The second and third fields are `ABCDEF` and `XY"Z`.
+
+    Note that the second field of
+        12.3,"ABC"   ,4.5
+    is `ABC   `.  Currently there is no option to ignore whitespace
+    at the end of a field.
+*/
+
+
+/**begin repeat
+ * #type = Py_UCS1, Py_UCS2, Py_UCS4#
+ */
+static NPY_INLINE int
+copy_to_field_buffer_@type@(tokenizer_state *ts,
+        const @type@ *chunk_start, const @type@ *chunk_end)
+{
+    npy_intp chunk_length = chunk_end - chunk_start;
+    npy_intp size = chunk_length + ts->field_buffer_pos + 2;
+
+    if (NPY_UNLIKELY(ts->field_buffer_length < size)) {
+        npy_intp alloc_size = grow_size_and_multiply(&size, 32, sizeof(Py_UCS4));
+        if (alloc_size < 0) {
+            PyErr_Format(PyExc_ValueError,
+                    "line too long to handle while reading file.");
+            return -1;
+        }
+        Py_UCS4 *grown = PyMem_Realloc(ts->field_buffer, alloc_size);
+        if (grown == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        ts->field_buffer_length = size;
+        ts->field_buffer = grown;
+    }
+
+    Py_UCS4 *write_pos = ts->field_buffer + ts->field_buffer_pos;
+    for (; chunk_start < chunk_end; chunk_start++, write_pos++) {
+        *write_pos = (Py_UCS4)*chunk_start;
+    }
+    *write_pos = '\0';  /* always ensure we end with NUL */
+    ts->field_buffer_pos += chunk_length;
+    return 0;
+}
+/**end repeat**/
+
+
+static NPY_INLINE int
+add_field(tokenizer_state *ts)
+{
+    /* The previous field is done, advance to keep a NUL byte at the end */
+    ts->field_buffer_pos += 1;
+
+    if (NPY_UNLIKELY(ts->num_fields + 1 > ts->fields_size)) {
+        npy_intp size = ts->num_fields;
+
+        npy_intp alloc_size = grow_size_and_multiply(
+                &size, 4, sizeof(field_info));
+        if (alloc_size < 0) {
+            /* Check for a size overflow, path should be almost impossible. */
+            PyErr_Format(PyExc_ValueError,
+                    "too many columns found; cannot read file.");
+            return -1;
+        }
+        field_info *fields = PyMem_Realloc(ts->fields, alloc_size);
+        if (fields == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        ts->fields = fields;
+        ts->fields_size = size;
+    }
+
+    ts->fields[ts->num_fields].offset = ts->field_buffer_pos;
+    ts->fields[ts->num_fields].quoted = false;
+    ts->num_fields += 1;
+    /* Ensure this (currently empty) word is NUL terminated. */
+    ts->field_buffer[ts->field_buffer_pos] = '\0';
+    return 0;
+}
+
+
+/**begin repeat
+ * #kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND, PyUnicode_4BYTE_KIND#
+ * #type = Py_UCS1, Py_UCS2, Py_UCS4#
+ */
+static NPY_INLINE int
+tokenizer_core_@type@(tokenizer_state *ts, parser_config *const config)
+{
+    @type@ *pos = (@type@ *)ts->pos;
+    @type@ *stop = (@type@ *)ts->end;
+    @type@ *chunk_start;
+
+    if (ts->state == TOKENIZE_CHECK_QUOTED) {
+        /* before we can check for quotes, strip leading whitespace */
+        if (config->ignore_leading_whitespace) {
+            while (pos < stop && Py_UNICODE_ISSPACE(*pos) &&
+                        *pos != '\r' && *pos != '\n') {
+                pos++;
+            }
+            if (pos == stop) {
+                ts->pos = (char *)pos;
+                return 0;
+            }
+        }
+
+        /* Setting chunk effectively starts the field */
+        if (*pos == config->quote) {
+            ts->fields[ts->num_fields - 1].quoted = true;
+            ts->state = TOKENIZE_QUOTED;
+            pos++;  /* TOKENIZE_QUOTED is OK with pos == stop */
+        }
+        else {
+            /* Set to TOKENIZE_QUOTED or TOKENIZE_QUOTED_WHITESPACE */
+            ts->state = ts->unquoted_state;
+        }
+    }
+
+    switch (ts->state) {
+        case TOKENIZE_UNQUOTED:
+            chunk_start = pos;
+            for (; pos < stop; pos++) {
+                if (*pos == '\r') {
+                    ts->state = TOKENIZE_EAT_CRLF;
+                    break;
+                }
+                else if (*pos == '\n') {
+                    ts->state = TOKENIZE_LINE_END;
+                    break;
+                }
+                else if (*pos == config->delimiter) {
+                    ts->state = TOKENIZE_INIT;
+                    break;
+                }
+                else if (*pos == config->comment) {
+                    ts->state = TOKENIZE_GOTO_LINE_END;
+                    break;
+                }
+            }
+            if (copy_to_field_buffer_@type@(ts, chunk_start, pos) < 0) {
+                return -1;
+            }
+            pos++;
+            break;
+
+        case TOKENIZE_UNQUOTED_WHITESPACE:
+            /* Note, this branch is largely identical to `TOKENIZE_UNQUOTED` */
+            chunk_start = pos;
+            for (; pos < stop; pos++) {
+                if (*pos == '\r') {
+                    ts->state = TOKENIZE_EAT_CRLF;
+                    break;
+                }
+                else if (*pos == '\n') {
+                    ts->state = TOKENIZE_LINE_END;
+                    break;
+                }
+                else if (Py_UNICODE_ISSPACE(*pos)) {
+                    ts->state = TOKENIZE_INIT;
+                    break;
+                }
+                else if (*pos == config->comment) {
+                    ts->state = TOKENIZE_GOTO_LINE_END;
+                    break;
+                }
+            }
+            if (copy_to_field_buffer_@type@(ts, chunk_start, pos) < 0) {
+                return -1;
+            }
+            pos++;
+            break;
+
+        case TOKENIZE_QUOTED:
+            chunk_start = pos;
+            for (; pos < stop; pos++) {
+                if (*pos == config->quote) {
+                    ts->state = TOKENIZE_QUOTED_CHECK_DOUBLE_QUOTE;
+                    break;
+                }
+            }
+            if (copy_to_field_buffer_@type@(ts, chunk_start, pos) < 0) {
+                return -1;
+            }
+            pos++;
+            break;
+
+        case TOKENIZE_QUOTED_CHECK_DOUBLE_QUOTE:
+            if (*pos == config->quote) {
+                /* Copy the quote character directly from the config: */
+                if (copy_to_field_buffer_Py_UCS4(ts,
+                        &config->quote, &config->quote+1) < 0) {
+                    return -1;
+                }
+                ts->state = TOKENIZE_QUOTED;
+                pos++;
+            }
+            else {
+                /* continue parsing as if unquoted */
+                ts->state = TOKENIZE_UNQUOTED;
+            }
+            break;
+
+        case TOKENIZE_GOTO_LINE_END:
+            if (ts->buf_state != BUFFER_MAY_CONTAIN_NEWLINE) {
+                pos = stop;  /* advance to next buffer */
+                ts->state = TOKENIZE_LINE_END;
+                break;
+            }
+            for (; pos < stop; pos++) {
+                if (*pos == '\r') {
+                    ts->state = TOKENIZE_EAT_CRLF;
+                    break;
+                }
+                else if (*pos == '\n') {
+                    ts->state = TOKENIZE_LINE_END;
+                    break;
+                }
+            }
+            pos++;
+            break;
+
+        case TOKENIZE_EAT_CRLF:
+            /* "Universal newline" support: remove \n in \r\n. */
+            if (*pos == '\n') {
+                pos++;
+            }
+            ts->state = TOKENIZE_LINE_END;
+            break;
+
+        default:
+            assert(0);
+    }
+
+    ts->pos = (char *)pos;
+    return 0;
+}
+/**end repeat**/
+
+
+/*
+ * This tokenizer always copies the full "row" (all tokens).  This makes
+ * two things easier:
+ * 1. It means that every word is guaranteed to be followed by a NUL character
+ *    (although it can include one as well).
+ * 2. If usecols are used we can sniff the first row easier by parsing it
+ *    fully.  Further, usecols can be negative so we may not know which row we
+ *    need up-front.
+ *
+ * The tokenizer could grow the ability to skip fields and check the
+ * maximum number of fields when known, it is unclear that this is worthwhile.
+ *
+ * Unlike some tokenizers, this one tries to work in chunks and copies
+ * data in chunks as well.  The hope is that this makes multiple light-weight
+ * loops rather than a single heavy one, to allow e.g. quickly scanning for the
+ * end of a field.  Copying chunks also means we usually only check once per
+ * field whether the buffer is large enough.
+ * Different choices are possible, this one seems to work well, though.
+ *
+ * The core (main part) of the tokenizer is specialized for the three Python
+ * unicode flavors UCS1, UCS2, and UCS4 as a worthwhile optimization.
+ */
+NPY_NO_EXPORT int
+tokenize(stream *s, tokenizer_state *ts, parser_config *const config)
+{
+    assert(ts->fields_size >= 2);
+    assert(ts->field_buffer_length >= 2*sizeof(Py_UCS4));
+
+    int finished_reading_file = 0;
+
+    /* Reset to start of buffer */
+    ts->field_buffer_pos = 0;
+    ts->num_fields = 0;
+
+    while (1) {
+        /*
+         * This loop adds new fields to the result (to make up a full row)
+         * until the row ends (typically a line end or the file end)
+         */
+        if (ts->state == TOKENIZE_INIT) {
+            /* Start a new field */
+            if (add_field(ts) < 0) {
+                return -1;
+            }
+            ts->state = TOKENIZE_CHECK_QUOTED;
+        }
+
+        if (NPY_UNLIKELY(ts->pos >= ts->end)) {
+            if (ts->buf_state == BUFFER_IS_LINEND &&
+                    ts->state != TOKENIZE_QUOTED) {
+                /*
+                 * Finished line, do not read anymore (also do not eat \n).
+                 * If we are in a quoted field and the "line" does not end with
+                 * a newline, the quoted field will not have it either.
+                 * I.e. `np.loadtxt(['"a', 'b"'], dtype="S2", quotechar='"')`
+                 * reads "ab". This matches `next(csv.reader(['"a', 'b"']))`.
+                 */
+                break;
+            }
+            /* fetch new data */
+            ts->buf_state = stream_nextbuf(s,
+                    &ts->pos, &ts->end, &ts->unicode_kind);
+            if (ts->buf_state < 0) {
+                return -1;
+            }
+            if (ts->buf_state == BUFFER_IS_FILEEND) {
+                finished_reading_file = 1;
+                ts->pos = ts->end;  /* stream should ensure this. */
+                break;
+            }
+            else if (ts->pos == ts->end) {
+                /* This must be an empty line (and it must be indicated!). */
+                assert(ts->buf_state == BUFFER_IS_LINEND);
+                break;
+            }
+        }
+        int status;
+        if (ts->unicode_kind == PyUnicode_1BYTE_KIND) {
+            status = tokenizer_core_Py_UCS1(ts, config);
+        }
+        else if (ts->unicode_kind == PyUnicode_2BYTE_KIND) {
+            status = tokenizer_core_Py_UCS2(ts, config);
+        }
+        else {
+            assert(ts->unicode_kind == PyUnicode_4BYTE_KIND);
+            status = tokenizer_core_Py_UCS4(ts, config);
+        }
+        if (status < 0) {
+            return -1;
+        }
+
+        if (ts->state == TOKENIZE_LINE_END) {
+            break;
+        }
+    }
+
+    /*
+     * We have finished tokenizing a full row into fields, finalize result
+     */
+    if (ts->buf_state == BUFFER_IS_LINEND) {
+        /* This line is "finished", make sure we don't touch it again: */
+        ts->buf_state = BUFFER_MAY_CONTAIN_NEWLINE;
+        if (NPY_UNLIKELY(ts->pos < ts->end)) {
+            PyErr_SetString(PyExc_ValueError,
+                    "Found an unquoted embedded newline within a single line of "
+                    "input.  This is currently not supported.");
+            return -1;
+        }
+    }
+
+    /* Finish the last field (we "append" one to store the last ones length) */
+    if (add_field(ts) < 0) {
+        return -1;
+    }
+    ts->num_fields -= 1;
+
+    /*
+     * If have one field, but that field is completely empty, this is an
+     * empty line, and we just ignore it.
+     */
+    if (ts->num_fields == 1
+             && ts->fields[1].offset - ts->fields[0].offset == 1
+             && !ts->fields->quoted) {
+        ts->num_fields--;
+    }
+    ts->state = TOKENIZE_INIT;
+    return finished_reading_file;
+}
+
+
+NPY_NO_EXPORT void
+tokenizer_clear(tokenizer_state *ts)
+{
+    PyMem_FREE(ts->field_buffer);
+    ts->field_buffer = NULL;
+    ts->field_buffer_length = 0;
+
+    PyMem_FREE(ts->fields);
+    ts->fields = NULL;
+    ts->fields_size = 0;
+}
+
+
+/*
+ * Initialize the tokenizer.  We may want to copy all important config
+ * variables into the tokenizer.  This would improve the cache locality during
+ * tokenizing.
+ */
+NPY_NO_EXPORT int
+tokenizer_init(tokenizer_state *ts, parser_config *config)
+{
+    /* State and buf_state could be moved into tokenize if we go by row */
+    ts->buf_state = BUFFER_MAY_CONTAIN_NEWLINE;
+    ts->state = TOKENIZE_INIT;
+    if (config->delimiter_is_whitespace) {
+        ts->unquoted_state = TOKENIZE_UNQUOTED_WHITESPACE;
+    }
+    else {
+        ts->unquoted_state = TOKENIZE_UNQUOTED;
+    }
+    ts->num_fields = 0;
+
+    ts->buf_state = 0;
+    ts->pos = NULL;
+    ts->end = NULL;
+
+    ts->field_buffer = PyMem_Malloc(32 * sizeof(Py_UCS4));
+    if (ts->field_buffer == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    ts->field_buffer_length = 32;
+
+    ts->fields = PyMem_Malloc(4 * sizeof(*ts->fields));
+    if (ts->fields == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    ts->fields_size = 4;
+    return 0;
+}
diff --git a/numpy/core/src/multiarray/textreading/tokenize.h b/numpy/core/src/multiarray/textreading/tokenize.h
new file mode 100644
index 000000000..fa10bb9b0
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/tokenize.h
@@ -0,0 +1,78 @@
+
+#ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_
+#define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_
+
+#include <Python.h>
+#include "numpy/ndarraytypes.h"
+
+#include "textreading/stream.h"
+#include "textreading/parser_config.h"
+
+
+typedef enum {
+    /* Initialization of fields */
+    TOKENIZE_INIT,
+    TOKENIZE_CHECK_QUOTED,
+    /* Main field parsing states */
+    TOKENIZE_UNQUOTED,
+    TOKENIZE_UNQUOTED_WHITESPACE,
+    TOKENIZE_QUOTED,
+    /* Handling of two character control sequences (except "\r\n") */
+    TOKENIZE_QUOTED_CHECK_DOUBLE_QUOTE,
+    /* Line end handling */
+    TOKENIZE_LINE_END,
+    TOKENIZE_EAT_CRLF,  /* "\r\n" support (carriage return, line feed) */
+    TOKENIZE_GOTO_LINE_END,
+} tokenizer_parsing_state;
+
+
+typedef struct {
+    size_t offset;
+    bool quoted;
+} field_info;
+
+
+typedef struct {
+    tokenizer_parsing_state state;
+    /* Either TOKENIZE_UNQUOTED or TOKENIZE_UNQUOTED_WHITESPACE: */
+    tokenizer_parsing_state unquoted_state;
+    int unicode_kind;
+    int buf_state;
+    /* the buffer we are currently working on */
+    char *pos;
+    char *end;
+    /*
+     * Space to copy words into.  The buffer must always be at least two NUL
+     * entries longer (8 bytes) than the actual word (including initially).
+     * The first byte beyond the current word is always NUL'ed on write, the
+     * second byte is there to allow easy appending of an additional empty
+     * word at the end (this word is also NUL terminated).
+     */
+    npy_intp field_buffer_length;
+    npy_intp field_buffer_pos;
+    Py_UCS4 *field_buffer;
+
+    /*
+     * Fields, including information about the field being quoted.  This
+     * always includes one "additional" empty field.  The length of a field
+     * is equal to `fields[i+1].offset - fields[i].offset - 1`.
+     *
+     * The tokenizer assumes at least one field is allocated.
+     */
+    npy_intp num_fields;
+    npy_intp fields_size;
+    field_info *fields;
+} tokenizer_state;
+
+
+NPY_NO_EXPORT void
+tokenizer_clear(tokenizer_state *ts);
+
+
+NPY_NO_EXPORT int
+tokenizer_init(tokenizer_state *ts, parser_config *config);
+
+NPY_NO_EXPORT int
+tokenize(stream *s, tokenizer_state *ts, parser_config *const config);
+
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_ */
diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src
index 933f75808..b4b060720 100644
--- a/numpy/core/src/npysort/quicksort.c.src
+++ b/numpy/core/src/npysort/quicksort.c.src
@@ -51,8 +51,14 @@
 
 #include "npy_sort.h"
 #include "npysort_common.h"
+#include "npy_cpu_features.h"
+#include "x86-qsort.h"
 #include <stdlib.h>
 
+#ifndef NPY_DISABLE_OPTIMIZATION
+    #include "x86-qsort.dispatch.h"
+#endif
+
 #define NOT_USED NPY_UNUSED(unused)
 /*
  * pushing largest partition has upper bound of log2(n) space
@@ -83,11 +89,22 @@
  *         npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *         npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
  *         npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta#
+ * #AVX512 = 0*5, 1, 1, 0*5, 1, 0*7#
  */
 
 NPY_NO_EXPORT int
 quicksort_@suff@(void *start, npy_intp num, void *NOT_USED)
 {
+
+#if @AVX512@
+    void (*dispfunc)(void*, npy_intp) = NULL;
+    NPY_CPU_DISPATCH_CALL_XB(dispfunc = &x86_quicksort_@suff@);
+    if (dispfunc) {
+       (*dispfunc)(start, num);
+       return 0;
+    }
+#endif
+
     @type@ vp;
     @type@ *pl = start;
     @type@ *pr = pl + num - 1;
diff --git a/numpy/core/src/npysort/x86-qsort.dispatch.c.src b/numpy/core/src/npysort/x86-qsort.dispatch.c.src
new file mode 100644
index 000000000..b93c737cb
--- /dev/null
+++ b/numpy/core/src/npysort/x86-qsort.dispatch.c.src
@@ -0,0 +1,587 @@
+/*@targets
+ * $maxopt $keep_baseline avx512_skx
+ */
+// policy $keep_baseline is used to avoid skip building avx512_skx
+// when its part of baseline features (--cpu-baseline), since
+// 'baseline' option isn't specified within targets.
+
+#include "x86-qsort.h"
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#ifdef NPY_HAVE_AVX512_SKX
+#include <immintrin.h>
+#include "numpy/npy_math.h"
+#include "npy_sort.h"
+#include "simd/simd.h"
+
+
+/*
+ * Quicksort using AVX-512 for int, uint32 and float. The ideas and code are
+ * based on these two research papers:
+ * (1) Fast and Robust Vectorized In-Place Sorting of Primitive Types
+ *     https://drops.dagstuhl.de/opus/volltexte/2021/13775/
+ * (2) A Novel Hybrid Quicksort Algorithm Vectorized using AVX-512 on Intel Skylake
+ *     https://arxiv.org/pdf/1704.08579.pdf
+ *
+ * High level idea: Vectorize the quicksort partitioning using AVX-512
+ * compressstore instructions. The algorithm to pick the pivot is to use median of
+ * 72 elements picked at random. If the array size is < 128, then use
+ * Bitonic sorting network. Good resource for bitonic sorting network:
+ * http://mitp-content-server.mit.edu:18180/books/content/sectbyfn?collid=books_pres_0&fn=Chapter%2027.pdf&id=8030
+ *
+ * Refer to https://github.com/numpy/numpy/pull/20133#issuecomment-958110340 for
+ * potential problems when converting this code to universal intrinsics framework.
+ */
+
+/*
+ * Constants used in sorting 16 elements in a ZMM registers. Based on Bitonic
+ * sorting network (see
+ * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg)
+ */
+#define NETWORK1 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1
+#define NETWORK2 12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3
+#define NETWORK3 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7
+#define NETWORK4 13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2
+#define NETWORK5 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+#define NETWORK6 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4
+#define NETWORK7 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8
+#define ZMM_MAX_FLOAT _mm512_set1_ps(NPY_INFINITYF)
+#define ZMM_MAX_UINT _mm512_set1_epi32(NPY_MAX_UINT32)
+#define ZMM_MAX_INT _mm512_set1_epi32(NPY_MAX_INT32)
+#define SHUFFLE_MASK(a,b,c,d) (a << 6) | (b << 4) | (c << 2) | d
+#define SHUFFLE_ps(ZMM, MASK) _mm512_shuffle_ps(zmm, zmm, MASK)
+#define SHUFFLE_epi32(ZMM, MASK) _mm512_shuffle_epi32(zmm, MASK)
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*
+ * Vectorized random number generator xoroshiro128+. Broken into 2 parts:
+ * (1) vnext generates 2 64-bit random integers
+ * (2) rnd_epu32 converts this to 4 32-bit random integers and bounds it to
+ *     the length of the array
+ */
+#define VROTL(x, k) /* rotate each uint64_t value in vector */               \
+    _mm256_or_si256(_mm256_slli_epi64((x),(k)),_mm256_srli_epi64((x),64-(k)))
+
+static NPY_INLINE
+__m256i vnext(__m256i* s0, __m256i* s1) {
+    *s1 = _mm256_xor_si256(*s0, *s1); /* modify vectors s1 and s0 */
+    *s0 = _mm256_xor_si256(_mm256_xor_si256(VROTL(*s0, 24), *s1),
+                           _mm256_slli_epi64(*s1, 16));
+    *s1 = VROTL(*s1, 37);
+    return _mm256_add_epi64(*s0, *s1); /* return random vector */
+}
+
+/* transform random numbers to the range between 0 and bound - 1 */
+static NPY_INLINE
+__m256i rnd_epu32(__m256i rnd_vec, __m256i bound) {
+    __m256i even = _mm256_srli_epi64(_mm256_mul_epu32(rnd_vec, bound), 32);
+    __m256i odd = _mm256_mul_epu32(_mm256_srli_epi64(rnd_vec, 32), bound);
+    return _mm256_blend_epi32(odd, even, 0b01010101);
+}
+
+/**begin repeat
+ *
+ * #TYPE = INT, UINT, FLOAT#
+ * #type = int, uint, float#
+ * #type_t = npy_int, npy_uint, npy_float#
+ * #zmm_t = __m512i, __m512i, __m512#
+ * #ymm_t = __m256i, __m256i, __m256#
+ * #vsuf1 = epi32, epu32, ps#
+ * #vsuf2 = epi32, epi32, ps#
+ * #vsuf3 = si512, si512, ps#
+ * #vsuf4 = s32, u32, f32#
+ * #CMP_GE_OP = _MM_CMPINT_NLT, _MM_CMPINT_NLT, _CMP_GE_OQ#
+ * #TYPE_MAX_VAL = NPY_MAX_INT32, NPY_MAX_UINT32, NPY_INFINITYF#
+ * #TYPE_MIN_VAL = NPY_MIN_INT32, 0, -NPY_INFINITYF#
+ */
+
+/*
+ * COEX == Compare and Exchange two registers by swapping min and max values
+ */
+#define COEX_ZMM_@vsuf1@(a, b) {                                          \
+    @zmm_t@ temp = a;                                                     \
+    a = _mm512_min_@vsuf1@(a,b);                                          \
+    b = _mm512_max_@vsuf1@(temp, b);}                                     \
+
+#define COEX_YMM_@vsuf1@(a, b){                                           \
+    @ymm_t@ temp = a;                                                     \
+    a = _mm256_min_@vsuf1@(a, b);                                         \
+    b = _mm256_max_@vsuf1@(temp, b);}                                     \
+
+static NPY_INLINE
+@zmm_t@ cmp_merge_@vsuf1@(@zmm_t@ in1, @zmm_t@ in2, __mmask16 mask)
+{
+    @zmm_t@ min = _mm512_min_@vsuf1@(in2, in1);
+    @zmm_t@ max = _mm512_max_@vsuf1@(in2, in1);
+    return _mm512_mask_mov_@vsuf2@(min, mask, max); // 0 -> min, 1 -> max
+}
+
+/*
+ * Assumes zmm is random and performs a full sorting network defined in
+ * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
+ */
+static NPY_INLINE
+@zmm_t@ sort_zmm_@vsuf1@(@zmm_t@ zmm)
+{
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(0,1,2,3)), 0xCCCC);
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+    zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK3),zmm), 0xF0F0);
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(1,0,3,2)), 0xCCCC);
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+    zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5),zmm), 0xFF00);
+    zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK6),zmm), 0xF0F0);
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(1,0,3,2)), 0xCCCC);
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+    return zmm;
+}
+
+// Assumes zmm is bitonic and performs a recursive half cleaner
+static NPY_INLINE
+@zmm_t@ bitonic_merge_zmm_@vsuf1@(@zmm_t@ zmm)
+{
+    // 1) half_cleaner[16]: compare 1-9, 2-10, 3-11 etc ..
+    zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK7),zmm), 0xFF00);
+    // 2) half_cleaner[8]: compare 1-5, 2-6, 3-7 etc ..
+    zmm = cmp_merge_@vsuf1@(zmm, _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK6),zmm), 0xF0F0);
+    // 3) half_cleaner[4]
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(1,0,3,2)), 0xCCCC);
+    // 3) half_cleaner[1]
+    zmm = cmp_merge_@vsuf1@(zmm, SHUFFLE_@vsuf2@(zmm, SHUFFLE_MASK(2,3,0,1)), 0xAAAA);
+    return zmm;
+}
+
+// Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner
+static NPY_INLINE
+void bitonic_merge_two_zmm_@vsuf1@(@zmm_t@* zmm1, @zmm_t@* zmm2)
+{
+    // 1) First step of a merging network: coex of zmm1 and zmm2 reversed
+    *zmm2 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), *zmm2);
+    @zmm_t@ zmm3 = _mm512_min_@vsuf1@(*zmm1, *zmm2);
+    @zmm_t@ zmm4 = _mm512_max_@vsuf1@(*zmm1, *zmm2);
+    // 2) Recursive half cleaner for each
+    *zmm1 = bitonic_merge_zmm_@vsuf1@(zmm3);
+    *zmm2 = bitonic_merge_zmm_@vsuf1@(zmm4);
+}
+
+// Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive half cleaner
+static NPY_INLINE
+void bitonic_merge_four_zmm_@vsuf1@(@zmm_t@* zmm)
+{
+    @zmm_t@ zmm2r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[2]);
+    @zmm_t@ zmm3r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[3]);
+    @zmm_t@ zmm_t1 = _mm512_min_@vsuf1@(zmm[0], zmm3r);
+    @zmm_t@ zmm_t2 = _mm512_min_@vsuf1@(zmm[1], zmm2r);
+    @zmm_t@ zmm_t3 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[1], zmm2r));
+    @zmm_t@ zmm_t4 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[0], zmm3r));
+    @zmm_t@ zmm0 = _mm512_min_@vsuf1@(zmm_t1, zmm_t2);
+    @zmm_t@ zmm1 = _mm512_max_@vsuf1@(zmm_t1, zmm_t2);
+    @zmm_t@ zmm2 = _mm512_min_@vsuf1@(zmm_t3, zmm_t4);
+    @zmm_t@ zmm3 = _mm512_max_@vsuf1@(zmm_t3, zmm_t4);
+    zmm[0] = bitonic_merge_zmm_@vsuf1@(zmm0);
+    zmm[1] = bitonic_merge_zmm_@vsuf1@(zmm1);
+    zmm[2] = bitonic_merge_zmm_@vsuf1@(zmm2);
+    zmm[3] = bitonic_merge_zmm_@vsuf1@(zmm3);
+}
+
+static NPY_INLINE
+void bitonic_merge_eight_zmm_@vsuf1@(@zmm_t@* zmm)
+{
+    @zmm_t@ zmm4r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[4]);
+    @zmm_t@ zmm5r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[5]);
+    @zmm_t@ zmm6r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[6]);
+    @zmm_t@ zmm7r = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), zmm[7]);
+    @zmm_t@ zmm_t1 = _mm512_min_@vsuf1@(zmm[0], zmm7r);
+    @zmm_t@ zmm_t2 = _mm512_min_@vsuf1@(zmm[1], zmm6r);
+    @zmm_t@ zmm_t3 = _mm512_min_@vsuf1@(zmm[2], zmm5r);
+    @zmm_t@ zmm_t4 = _mm512_min_@vsuf1@(zmm[3], zmm4r);
+    @zmm_t@ zmm_t5 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[3], zmm4r));
+    @zmm_t@ zmm_t6 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[2], zmm5r));
+    @zmm_t@ zmm_t7 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[1], zmm6r));
+    @zmm_t@ zmm_t8 = _mm512_permutexvar_@vsuf2@(_mm512_set_epi32(NETWORK5), _mm512_max_@vsuf1@(zmm[0], zmm7r));
+    COEX_ZMM_@vsuf1@(zmm_t1, zmm_t3);
+    COEX_ZMM_@vsuf1@(zmm_t2, zmm_t4);
+    COEX_ZMM_@vsuf1@(zmm_t5, zmm_t7);
+    COEX_ZMM_@vsuf1@(zmm_t6, zmm_t8);
+    COEX_ZMM_@vsuf1@(zmm_t1, zmm_t2);
+    COEX_ZMM_@vsuf1@(zmm_t3, zmm_t4);
+    COEX_ZMM_@vsuf1@(zmm_t5, zmm_t6);
+    COEX_ZMM_@vsuf1@(zmm_t7, zmm_t8);
+    zmm[0] = bitonic_merge_zmm_@vsuf1@(zmm_t1);
+    zmm[1] = bitonic_merge_zmm_@vsuf1@(zmm_t2);
+    zmm[2] = bitonic_merge_zmm_@vsuf1@(zmm_t3);
+    zmm[3] = bitonic_merge_zmm_@vsuf1@(zmm_t4);
+    zmm[4] = bitonic_merge_zmm_@vsuf1@(zmm_t5);
+    zmm[5] = bitonic_merge_zmm_@vsuf1@(zmm_t6);
+    zmm[6] = bitonic_merge_zmm_@vsuf1@(zmm_t7);
+    zmm[7] = bitonic_merge_zmm_@vsuf1@(zmm_t8);
+}
+
+static NPY_INLINE
+void sort_16_@vsuf1@(@type_t@* arr, npy_int N)
+{
+    __mmask16 load_mask = (0x0001 << N) - 0x0001;
+    @zmm_t@ zmm = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask, arr);
+    _mm512_mask_storeu_@vsuf2@(arr, load_mask, sort_zmm_@vsuf1@(zmm));
+}
+
+static NPY_INLINE
+void sort_32_@vsuf1@(@type_t@* arr, npy_int N)
+{
+    if (N <= 16) {
+        sort_16_@vsuf1@(arr, N);
+        return;
+    }
+    @zmm_t@ zmm1 = _mm512_loadu_@vsuf3@(arr);
+    __mmask16 load_mask = (0x0001 << (N-16)) - 0x0001;
+    @zmm_t@ zmm2 = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask, arr + 16);
+    zmm1 = sort_zmm_@vsuf1@(zmm1);
+    zmm2 = sort_zmm_@vsuf1@(zmm2);
+    bitonic_merge_two_zmm_@vsuf1@(&zmm1, &zmm2);
+    _mm512_storeu_@vsuf3@(arr, zmm1);
+    _mm512_mask_storeu_@vsuf2@(arr + 16, load_mask, zmm2);
+}
+
+static NPY_INLINE
+void sort_64_@vsuf1@(@type_t@* arr, npy_int N)
+{
+    if (N <= 32) {
+        sort_32_@vsuf1@(arr, N);
+        return;
+    }
+    @zmm_t@ zmm[4];
+    zmm[0] = _mm512_loadu_@vsuf3@(arr);
+    zmm[1] = _mm512_loadu_@vsuf3@(arr + 16);
+    __mmask16 load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
+    if (N < 48) {
+        load_mask1 = (0x0001 << (N-32)) - 0x0001;
+        load_mask2 = 0x0000;
+    }
+    else if (N < 64) {
+        load_mask2 = (0x0001 << (N-48)) - 0x0001;
+    }
+    zmm[2] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask1, arr + 32);
+    zmm[3] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask2, arr + 48);
+    zmm[0] = sort_zmm_@vsuf1@(zmm[0]);
+    zmm[1] = sort_zmm_@vsuf1@(zmm[1]);
+    zmm[2] = sort_zmm_@vsuf1@(zmm[2]);
+    zmm[3] = sort_zmm_@vsuf1@(zmm[3]);
+    bitonic_merge_two_zmm_@vsuf1@(&zmm[0], &zmm[1]);
+    bitonic_merge_two_zmm_@vsuf1@(&zmm[2], &zmm[3]);
+    bitonic_merge_four_zmm_@vsuf1@(zmm);
+    _mm512_storeu_@vsuf3@(arr, zmm[0]);
+    _mm512_storeu_@vsuf3@(arr + 16, zmm[1]);
+    _mm512_mask_storeu_@vsuf2@(arr + 32, load_mask1, zmm[2]);
+    _mm512_mask_storeu_@vsuf2@(arr + 48, load_mask2, zmm[3]);
+}
+
+static NPY_INLINE
+void sort_128_@vsuf1@(@type_t@* arr, npy_int N)
+{
+    if (N <= 64) {
+        sort_64_@vsuf1@(arr, N);
+        return;
+    }
+    @zmm_t@ zmm[8];
+    zmm[0] = _mm512_loadu_@vsuf3@(arr);
+    zmm[1] = _mm512_loadu_@vsuf3@(arr + 16);
+    zmm[2] = _mm512_loadu_@vsuf3@(arr + 32);
+    zmm[3] = _mm512_loadu_@vsuf3@(arr + 48);
+    zmm[0] = sort_zmm_@vsuf1@(zmm[0]);
+    zmm[1] = sort_zmm_@vsuf1@(zmm[1]);
+    zmm[2] = sort_zmm_@vsuf1@(zmm[2]);
+    zmm[3] = sort_zmm_@vsuf1@(zmm[3]);
+    __mmask16 load_mask1 = 0xFFFF, load_mask2 = 0xFFFF;
+    __mmask16 load_mask3 = 0xFFFF, load_mask4 = 0xFFFF;
+    if (N < 80) {
+        load_mask1 = (0x0001 << (N-64)) - 0x0001;
+        load_mask2 = 0x0000;
+        load_mask3 = 0x0000;
+        load_mask4 = 0x0000;
+    }
+    else if (N < 96) {
+        load_mask2 = (0x0001 << (N-80)) - 0x0001;
+        load_mask3 = 0x0000;
+        load_mask4 = 0x0000;
+    }
+    else if (N < 112) {
+        load_mask3 = (0x0001 << (N-96)) - 0x0001;
+        load_mask4 = 0x0000;
+    }
+    else {
+        load_mask4 = (0x0001 << (N-112)) - 0x0001;
+    }
+    zmm[4] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask1, arr + 64);
+    zmm[5] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask2, arr + 80);
+    zmm[6] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask3, arr + 96);
+    zmm[7] = _mm512_mask_loadu_@vsuf2@(ZMM_MAX_@TYPE@, load_mask4, arr + 112);
+    zmm[4] = sort_zmm_@vsuf1@(zmm[4]);
+    zmm[5] = sort_zmm_@vsuf1@(zmm[5]);
+    zmm[6] = sort_zmm_@vsuf1@(zmm[6]);
+    zmm[7] = sort_zmm_@vsuf1@(zmm[7]);
+    bitonic_merge_two_zmm_@vsuf1@(&zmm[0], &zmm[1]);
+    bitonic_merge_two_zmm_@vsuf1@(&zmm[2], &zmm[3]);
+    bitonic_merge_two_zmm_@vsuf1@(&zmm[4], &zmm[5]);
+    bitonic_merge_two_zmm_@vsuf1@(&zmm[6], &zmm[7]);
+    bitonic_merge_four_zmm_@vsuf1@(zmm);
+    bitonic_merge_four_zmm_@vsuf1@(zmm + 4);
+    bitonic_merge_eight_zmm_@vsuf1@(zmm);
+    _mm512_storeu_@vsuf3@(arr, zmm[0]);
+    _mm512_storeu_@vsuf3@(arr + 16, zmm[1]);
+    _mm512_storeu_@vsuf3@(arr + 32, zmm[2]);
+    _mm512_storeu_@vsuf3@(arr + 48, zmm[3]);
+    _mm512_mask_storeu_@vsuf2@(arr + 64, load_mask1, zmm[4]);
+    _mm512_mask_storeu_@vsuf2@(arr + 80, load_mask2, zmm[5]);
+    _mm512_mask_storeu_@vsuf2@(arr + 96, load_mask3, zmm[6]);
+    _mm512_mask_storeu_@vsuf2@(arr + 112, load_mask4, zmm[7]);
+}
+
+
+static NPY_INLINE
+void swap_@TYPE@(@type_t@ *arr, npy_intp ii, npy_intp jj) {
+    @type_t@ temp = arr[ii];
+    arr[ii] = arr[jj];
+    arr[jj] = temp;
+}
+
+// Median of 3 stratergy
+//static NPY_INLINE
+//npy_intp get_pivot_index(@type_t@ *arr, const npy_intp left, const npy_intp right) {
+//    return (rand() % (right + 1 - left)) + left;
+//    //npy_intp middle = ((right-left)/2) + left;
+//    //@type_t@ a = arr[left], b = arr[middle], c = arr[right];
+//    //if ((b >= a && b <= c) || (b <= a && b >= c))
+//    //    return middle;
+//    //if ((a >= b && a <= c) || (a <= b && a >= c))
+//    //    return left;
+//    //else
+//    //    return right;
+//}
+
+/*
+ * Picking the pivot: Median of 72 array elements chosen at random.
+ */
+
+static NPY_INLINE
+@type_t@ get_pivot_@vsuf1@(@type_t@ *arr, const npy_intp left, const npy_intp right) {
+    /* seeds for vectorized random number generator */
+    __m256i s0 = _mm256_setr_epi64x(8265987198341093849, 3762817312854612374,
+                                    1324281658759788278, 6214952190349879213);
+    __m256i s1 = _mm256_setr_epi64x(2874178529384792648, 1257248936691237653,
+                                    7874578921548791257, 1998265912745817298);
+    s0 = _mm256_add_epi64(s0, _mm256_set1_epi64x(left));
+    s1 = _mm256_sub_epi64(s1, _mm256_set1_epi64x(right));
+
+    npy_intp arrsize = right - left + 1;
+    __m256i bound = _mm256_set1_epi32(arrsize > INT32_MAX ? INT32_MAX : arrsize);
+    __m512i left_vec = _mm512_set1_epi64(left);
+    __m512i right_vec = _mm512_set1_epi64(right);
+    @ymm_t@ v[9];
+    /* fill 9 vectors with random numbers */
+    for (npy_int i = 0; i < 9; ++i) {
+        __m256i rand_64 = vnext(&s0, &s1); /* vector with 4 random uint64_t */
+        __m512i rand_32 = _mm512_cvtepi32_epi64(rnd_epu32(rand_64, bound)); /* random numbers between 0 and bound - 1 */
+        __m512i indices;
+        if (i < 5)
+            indices = _mm512_add_epi64(left_vec, rand_32); /* indices for arr */
+        else 
+            indices = _mm512_sub_epi64(right_vec, rand_32); /* indices for arr */
+
+        v[i] = _mm512_i64gather_@vsuf2@(indices, arr, sizeof(@type_t@));
+    }
+
+    /* median network for 9 elements */
+    COEX_YMM_@vsuf1@(v[0], v[1]); COEX_YMM_@vsuf1@(v[2], v[3]);
+    COEX_YMM_@vsuf1@(v[4], v[5]); COEX_YMM_@vsuf1@(v[6], v[7]);
+    COEX_YMM_@vsuf1@(v[0], v[2]); COEX_YMM_@vsuf1@(v[1], v[3]);
+    COEX_YMM_@vsuf1@(v[4], v[6]); COEX_YMM_@vsuf1@(v[5], v[7]);
+    COEX_YMM_@vsuf1@(v[0], v[4]); COEX_YMM_@vsuf1@(v[1], v[2]);
+    COEX_YMM_@vsuf1@(v[5], v[6]); COEX_YMM_@vsuf1@(v[3], v[7]);
+    COEX_YMM_@vsuf1@(v[1], v[5]); COEX_YMM_@vsuf1@(v[2], v[6]);
+    COEX_YMM_@vsuf1@(v[3], v[5]); COEX_YMM_@vsuf1@(v[2], v[4]);
+    COEX_YMM_@vsuf1@(v[3], v[4]);
+    COEX_YMM_@vsuf1@(v[3], v[8]);
+    COEX_YMM_@vsuf1@(v[4], v[8]);
+
+    // technically v[4] needs to be sorted before we pick the correct median,
+    // picking the 4th element works just as well for performance
+    @type_t@* temp = (@type_t@*) &v[4];
+
+    return temp[4];
+}
+
+/*
+ * Parition one ZMM register based on the pivot and returns the index of the
+ * last element that is less than equal to the pivot.
+ */
+static NPY_INLINE
+npy_int partition_vec_@vsuf1@(@type_t@* arr, npy_intp left, npy_intp right,
+                                const @zmm_t@ curr_vec, const @zmm_t@ pivot_vec,
+                                @zmm_t@* smallest_vec, @zmm_t@* biggest_vec)
+{
+    /* which elements are larger than the pivot */
+    __mmask16 gt_mask = _mm512_cmp_@vsuf1@_mask(curr_vec, pivot_vec, @CMP_GE_OP@);
+    npy_int amount_gt_pivot = _mm_popcnt_u32((npy_int)gt_mask);
+    _mm512_mask_compressstoreu_@vsuf2@(arr + left, _knot_mask16(gt_mask), curr_vec);
+    _mm512_mask_compressstoreu_@vsuf2@(arr + right - amount_gt_pivot, gt_mask, curr_vec);
+    *smallest_vec = _mm512_min_@vsuf1@(curr_vec, *smallest_vec);
+    *biggest_vec = _mm512_max_@vsuf1@(curr_vec, *biggest_vec);
+    return amount_gt_pivot;
+}
+
+/*
+ * Parition an array based on the pivot and returns the index of the
+ * last element that is less than equal to the pivot.
+ */
+static NPY_INLINE
+npy_intp partition_avx512_@vsuf1@(@type_t@* arr, npy_intp left, npy_intp right,
+                                    @type_t@ pivot, @type_t@* smallest, @type_t@* biggest)
+{
+    /* make array length divisible by 16 , shortening the array */
+    for (npy_int i = (right - left) % 16; i > 0; --i) {
+        *smallest = MIN(*smallest, arr[left]);
+        *biggest = MAX(*biggest, arr[left]);
+        if (arr[left] > pivot) {
+            swap_@TYPE@(arr, left, --right);
+        }
+        else {
+            ++left;
+        }
+    }
+
+    if(left == right)
+      return left; /* less than 16 elements in the array */
+
+    @zmm_t@ pivot_vec = _mm512_set1_@vsuf2@(pivot);
+    @zmm_t@ min_vec = _mm512_set1_@vsuf2@(*smallest);
+    @zmm_t@ max_vec = _mm512_set1_@vsuf2@(*biggest);
+
+    if(right - left == 16) {
+        @zmm_t@ vec = _mm512_loadu_@vsuf3@(arr + left);
+        npy_int amount_gt_pivot = partition_vec_@vsuf1@(arr, left, left + 16, vec, pivot_vec, &min_vec, &max_vec);
+        *smallest = npyv_reducemin_@vsuf4@(min_vec);
+        *biggest = npyv_reducemax_@vsuf4@(max_vec);
+        return left + (16 - amount_gt_pivot);
+    }
+
+    // first and last 16 values are partitioned at the end
+    @zmm_t@ vec_left = _mm512_loadu_@vsuf3@(arr + left);
+    @zmm_t@ vec_right = _mm512_loadu_@vsuf3@(arr + (right - 16));
+    // store points of the vectors
+    npy_intp r_store = right - 16;
+    npy_intp l_store = left;
+    // indices for loading the elements
+    left += 16;
+    right -= 16;
+    while(right - left != 0) {
+        @zmm_t@ curr_vec;
+        /*
+         * if fewer elements are stored on the right side of the array,
+         * then next elements are loaded from the right side,
+         * otherwise from the left side
+         */
+        if((r_store + 16) - right < left - l_store) {
+            right -= 16;
+            curr_vec = _mm512_loadu_@vsuf3@(arr + right);
+        }
+        else {
+            curr_vec = _mm512_loadu_@vsuf3@(arr + left);
+            left += 16;
+        }
+        // partition the current vector and save it on both sides of the array
+        npy_int amount_gt_pivot = partition_vec_@vsuf1@(arr, l_store, r_store + 16, curr_vec, pivot_vec, &min_vec, &max_vec);;
+        r_store -= amount_gt_pivot; l_store += (16 - amount_gt_pivot);
+    }
+
+    /* partition and save vec_left and vec_right */
+    npy_int amount_gt_pivot = partition_vec_@vsuf1@(arr, l_store, r_store + 16, vec_left, pivot_vec, &min_vec, &max_vec);
+    l_store += (16 - amount_gt_pivot);
+    amount_gt_pivot = partition_vec_@vsuf1@(arr, l_store, l_store + 16, vec_right, pivot_vec, &min_vec, &max_vec);
+    l_store += (16 - amount_gt_pivot);
+    *smallest = npyv_reducemin_@vsuf4@(min_vec);
+    *biggest = npyv_reducemax_@vsuf4@(max_vec);
+    return l_store;
+}
+
+static NPY_INLINE
+void qsort_@type@(@type_t@* arr, npy_intp left, npy_intp right, npy_int max_iters)
+{
+    /*
+     * Resort to heapsort if quicksort isnt making any progress
+     */
+    if (max_iters <= 0) {
+        heapsort_@type@((void*)(arr + left), right + 1 - left, NULL);
+        return;
+    }
+    /*
+     * Base case: use bitonic networks to sort arrays <= 128
+     */
+    if (right + 1 - left <= 128) {
+        sort_128_@vsuf1@(arr + left, right + 1 -left);
+        return;
+    }
+
+    @type_t@ pivot = get_pivot_@vsuf1@(arr, left, right);
+    @type_t@ smallest = @TYPE_MAX_VAL@;
+    @type_t@ biggest = @TYPE_MIN_VAL@;
+    npy_intp pivot_index = partition_avx512_@vsuf1@(arr, left, right+1, pivot, &smallest, &biggest);
+    if (pivot != smallest)
+        qsort_@type@(arr, left, pivot_index - 1, max_iters - 1);
+    if (pivot != biggest)
+        qsort_@type@(arr, pivot_index, right, max_iters - 1);
+}
+/**end repeat**/
+
+static NPY_INLINE
+npy_intp replace_nan_with_inf(npy_float* arr, npy_intp arrsize)
+{
+    npy_intp nan_count = 0;
+    __mmask16 loadmask = 0xFFFF;
+    while (arrsize > 0) {
+        if (arrsize < 16) {
+            loadmask = (0x0001 << arrsize) - 0x0001;
+        }
+        __m512 in_zmm = _mm512_maskz_loadu_ps(loadmask, arr);
+        __mmask16 nanmask = _mm512_cmp_ps_mask(in_zmm, in_zmm, _CMP_NEQ_UQ);
+        nan_count += _mm_popcnt_u32((npy_int) nanmask);
+        _mm512_mask_storeu_ps(arr, nanmask, ZMM_MAX_FLOAT);
+        arr += 16;
+        arrsize -= 16;
+    }
+    return nan_count;
+}
+
+static NPY_INLINE
+void replace_inf_with_nan(npy_float* arr, npy_intp arrsize, npy_intp nan_count)
+{
+    for (npy_intp ii = arrsize-1; nan_count > 0; --ii) {
+        arr[ii] = NPY_NANF;
+        nan_count -= 1;
+    }
+}
+
+/**begin repeat
+ *
+ * #type = int, uint, float#
+ * #type_t = npy_int, npy_uint, npy_float#
+ * #FIXNAN = 0, 0, 1#
+ */
+
+NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(x86_quicksort_@type@)
+(void* arr, npy_intp arrsize)
+{
+    if (arrsize > 1) {
+#if @FIXNAN@
+        npy_intp nan_count = replace_nan_with_inf((@type_t@*) arr, arrsize);
+#endif
+        qsort_@type@((@type_t@*) arr, 0, arrsize-1, 2*log2(arrsize));
+#if @FIXNAN@
+        replace_inf_with_nan((@type_t@*) arr, arrsize, nan_count);
+#endif
+    }
+}
+/**end repeat**/
+
+#endif // NPY_HAVE_AVX512_SKX
diff --git a/numpy/core/src/npysort/x86-qsort.h b/numpy/core/src/npysort/x86-qsort.h
new file mode 100644
index 000000000..8cb8e3654
--- /dev/null
+++ b/numpy/core/src/npysort/x86-qsort.h
@@ -0,0 +1,18 @@
+#include "numpy/npy_common.h"
+#include "npy_cpu_dispatch.h"
+
+#ifndef NPY_NO_EXPORT
+    #define NPY_NO_EXPORT NPY_VISIBILITY_HIDDEN
+#endif
+
+#ifndef NPY_DISABLE_OPTIMIZATION
+    #include "x86-qsort.dispatch.h"
+#endif
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_int,
+             (void *start, npy_intp num))
+
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_uint,
+             (void *start, npy_intp num))
+
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void x86_quicksort_float,
+             (void *start, npy_intp num))
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 708e82910..73bb5e2d8 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -9278,3 +9278,52 @@ class TestViewDtype:
                     [[1284, 1798], [4368, 4882]],
                     [[2312, 2826], [5396, 5910]]]
         assert_array_equal(x.view('<i2'), expected)
+
+
+# Test various array sizes that hit different code paths in quicksort-avx512
+@pytest.mark.parametrize("N", [8, 16, 24, 32, 48, 64, 96, 128, 151, 191,
+                               256, 383, 512, 1023, 2047])
+def test_sort_float(N):
+    # Regular data with nan sprinkled
+    np.random.seed(42)
+    arr = -0.5 + np.random.sample(N).astype('f')
+    arr[np.random.choice(arr.shape[0], 3)] = np.nan
+    assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))
+
+    # (2) with +INF
+    infarr = np.inf*np.ones(N, dtype='f')
+    infarr[np.random.choice(infarr.shape[0], 5)] = -1.0
+    assert_equal(np.sort(infarr, kind='quick'), np.sort(infarr, kind='heap'))
+
+    # (3) with -INF
+    neginfarr = -np.inf*np.ones(N, dtype='f')
+    neginfarr[np.random.choice(neginfarr.shape[0], 5)] = 1.0
+    assert_equal(np.sort(neginfarr, kind='quick'),
+                 np.sort(neginfarr, kind='heap'))
+
+    # (4) with +/-INF
+    infarr = np.inf*np.ones(N, dtype='f')
+    infarr[np.random.choice(infarr.shape[0], (int)(N/2))] = -np.inf
+    assert_equal(np.sort(infarr, kind='quick'), np.sort(infarr, kind='heap'))
+
+
+def test_sort_int():
+    # Random data with NPY_MAX_INT32 and NPY_MIN_INT32 sprinkled
+    rng = np.random.default_rng(42)
+    N = 2047
+    minv = np.iinfo(np.int32).min
+    maxv = np.iinfo(np.int32).max
+    arr = rng.integers(low=minv, high=maxv, size=N).astype('int32')
+    arr[np.random.choice(arr.shape[0], 10)] = minv
+    arr[np.random.choice(arr.shape[0], 10)] = maxv
+    assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))
+
+
+def test_sort_uint():
+    # Random data with NPY_MAX_UINT32 sprinkled
+    rng = np.random.default_rng(42)
+    N = 2047
+    maxv = np.iinfo(np.uint32).max
+    arr = rng.integers(low=0, high=maxv, size=N).astype('uint32')
+    arr[np.random.choice(arr.shape[0], 10)] = maxv
+    assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index d5b130b72..900538134 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -4411,7 +4411,7 @@ def _check_interpolation_as_method(method, interpolation, fname):
         f"the `interpolation=` argument to {fname} was renamed to "
         "`method=`, which has additional options.\n"
         "Users of the modes 'nearest', 'lower', 'higher', or "
-        "'midpoint' are encouraged to review the method they. "
+        "'midpoint' are encouraged to review the method they used. "
         "(Deprecated NumPy 1.22)",
         DeprecationWarning, stacklevel=4)
     if method != "linear":
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 6818ef81d..90424aab4 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -5,6 +5,7 @@ import itertools
 import warnings
 import weakref
 import contextlib
+import operator
 from operator import itemgetter, index as opindex, methodcaller
 from collections.abc import Mapping
 
@@ -13,6 +14,7 @@ from . import format
 from ._datasource import DataSource
 from numpy.core import overrides
 from numpy.core.multiarray import packbits, unpackbits
+from numpy.core._multiarray_umath import _load_from_filelike
 from numpy.core.overrides import set_array_function_like_doc, set_module
 from ._iotools import (
     LineSplitter, NameValidator, StringConverter, ConverterError,
@@ -721,101 +723,6 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
     zipf.close()
 
 
-def _floatconv(x):
-    try:
-        return float(x)  # The fastest path.
-    except ValueError:
-        if '0x' in x:  # Don't accidentally convert "a" ("0xa") to 10.
-            try:
-                return float.fromhex(x)
-            except ValueError:
-                pass
-        raise  # Raise the original exception, which makes more sense.
-
-
-_CONVERTERS = [  # These converters only ever get strs (not bytes) as input.
-    (np.bool_, lambda x: bool(int(x))),
-    (np.uint64, np.uint64),
-    (np.int64, np.int64),
-    (np.integer, lambda x: int(float(x))),
-    (np.longdouble, np.longdouble),
-    (np.floating, _floatconv),
-    (complex, lambda x: complex(x.replace('+-', '-'))),
-    (np.bytes_, methodcaller('encode', 'latin-1')),
-    (np.unicode_, str),
-]
-
-
-def _getconv(dtype):
-    """
-    Find the correct dtype converter. Adapted from matplotlib.
-
-    Even when a lambda is returned, it is defined at the toplevel, to allow
-    testing for equality and enabling optimization for single-type data.
-    """
-    for base, conv in _CONVERTERS:
-        if issubclass(dtype.type, base):
-            return conv
-    return str
-
-
-# _loadtxt_flatten_dtype_internal and _loadtxt_pack_items are loadtxt helpers
-# lifted to the toplevel because recursive inner functions cause either
-# GC-dependent reference loops (because they are closures over loadtxt's
-# internal variables) or large overheads if using a manual trampoline to hide
-# the recursive calls.
-
-
-# not to be confused with the flatten_dtype we import...
-def _loadtxt_flatten_dtype_internal(dt):
-    """Unpack a structured data-type, and produce a packer function."""
-    if dt.names is None:
-        # If the dtype is flattened, return.
-        # If the dtype has a shape, the dtype occurs
-        # in the list more than once.
-        shape = dt.shape
-        if len(shape) == 0:
-            return ([dt.base], None)
-        else:
-            packing = [(shape[-1], list)]
-            if len(shape) > 1:
-                for dim in dt.shape[-2::-1]:
-                    packing = [(dim*packing[0][0], packing*dim)]
-            return ([dt.base] * int(np.prod(dt.shape)),
-                    functools.partial(_loadtxt_pack_items, packing))
-    else:
-        types = []
-        packing = []
-        for field in dt.names:
-            tp, bytes = dt.fields[field]
-            flat_dt, flat_packer = _loadtxt_flatten_dtype_internal(tp)
-            types.extend(flat_dt)
-            flat_packing = flat_packer.args[0] if flat_packer else None
-            # Avoid extra nesting for subarrays
-            if tp.ndim > 0:
-                packing.extend(flat_packing)
-            else:
-                packing.append((len(flat_dt), flat_packing))
-        return (types, functools.partial(_loadtxt_pack_items, packing))
-
-
-def _loadtxt_pack_items(packing, items):
-    """Pack items into nested lists based on re-packing info."""
-    if packing is None:
-        return items[0]
-    elif packing is tuple:
-        return tuple(items)
-    elif packing is list:
-        return list(items)
-    else:
-        start = 0
-        ret = []
-        for length, subpacking in packing:
-            ret.append(
-                _loadtxt_pack_items(subpacking, items[start:start+length]))
-            start += length
-        return tuple(ret)
-
 def _ensure_ndmin_ndarray_check_param(ndmin):
     """Just checks if the param ndmin is supported on
         _ensure_ndmin_ndarray. It is intended to be used as
@@ -853,17 +760,330 @@ def _ensure_ndmin_ndarray(a, *, ndmin: int):
 _loadtxt_chunksize = 50000
 
 
-def _loadtxt_dispatcher(fname, dtype=None, comments=None, delimiter=None,
-                        converters=None, skiprows=None, usecols=None, unpack=None,
-                        ndmin=None, encoding=None, max_rows=None, *, like=None):
+def _loadtxt_dispatcher(
+        fname, dtype=None, comments=None, delimiter=None,
+        converters=None, skiprows=None, usecols=None, unpack=None,
+        ndmin=None, encoding=None, max_rows=None, *, like=None):
     return (like,)
 
 
+def _check_nonneg_int(value, name="argument"):
+    try:
+        operator.index(value)
+    except TypeError:
+        raise TypeError(f"{name} must be an integer") from None
+    if value < 0:
+        raise ValueError(f"{name} must be nonnegative")
+
+
+def _preprocess_comments(iterable, comments, encoding):
+    """
+    Generator that consumes a line iterated iterable and strips out the
+    multiple (or multi-character) comments from lines.
+    This is a pre-processing step to achieve feature parity with loadtxt
+    (we assume that this feature is a nieche feature).
+    """
+    for line in iterable:
+        if isinstance(line, bytes):
+            # Need to handle conversion here, or the splitting would fail
+            line = line.decode(encoding)
+
+        for c in comments:
+            line = line.split(c, 1)[0]
+
+        yield line
+
+
+# The number of rows we read in one go if confronted with a parametric dtype
+_loadtxt_chunksize = 50000
+
+
+def _read(fname, *, delimiter=',', comment='#', quote='"',
+          imaginary_unit='j', usecols=None, skiplines=0,
+          max_rows=None, converters=None, ndmin=None, unpack=False,
+          dtype=np.float64, encoding="bytes"):
+    r"""
+    Read a NumPy array from a text file.
+
+    Parameters
+    ----------
+    fname : str or file object
+        The filename or the file to be read.
+    delimiter : str, optional
+        Field delimiter of the fields in line of the file.
+        Default is a comma, ','.  If None any sequence of whitespace is
+        considered a delimiter.
+    comment : str or sequence of str or None, optional
+        Character that begins a comment.  All text from the comment
+        character to the end of the line is ignored.
+        Multiple comments or multiple-character comment strings are supported,
+        but may be slower and `quote` must be empty if used.
+        Use None to disable all use of comments.
+    quote : str or None, optional
+        Character that is used to quote string fields. Default is '"'
+        (a double quote). Use None to disable quote support.
+    imaginary_unit : str, optional
+        Character that represent the imaginay unit `sqrt(-1)`.
+        Default is 'j'.
+    usecols : array_like, optional
+        A one-dimensional array of integer column numbers.  These are the
+        columns from the file to be included in the array.  If this value
+        is not given, all the columns are used.
+    skiplines : int, optional
+        Number of lines to skip before interpreting the data in the file.
+    max_rows : int, optional
+        Maximum number of rows of data to read.  Default is to read the
+        entire file.
+    converters : dict or callable, optional
+        A function to parse all columns strings into the desired value, or
+        a dictionary mapping column number to a parser function.
+        E.g. if column 0 is a date string: ``converters = {0: datestr2num}``.
+        Converters can also be used to provide a default value for missing
+        data, e.g. ``converters = lambda s: float(s.strip() or 0)`` will
+        convert empty fields to 0.
+        Default: None
+    ndmin : int, optional
+        Minimum dimension of the array returned.
+        Allowed values are 0, 1 or 2.  Default is 0.
+    unpack : bool, optional
+        If True, the returned array is transposed, so that arguments may be
+        unpacked using ``x, y, z = read(...)``.  When used with a structured
+        data-type, arrays are returned for each field.  Default is False.
+    dtype : numpy data type
+        A NumPy dtype instance, can be a structured dtype to map to the
+        columns of the file.
+    encoding : str, optional
+        Encoding used to decode the inputfile. The special value 'bytes'
+        (the default) enables backwards-compatible behavior for `converters`,
+        ensuring that inputs to the converter functions are encoded
+        bytes objects. The special value 'bytes' has no additional effect if
+        ``converters=None``. If encoding is ``'bytes'`` or ``None``, the
+        default system encoding is used.
+
+    Returns
+    -------
+    ndarray
+        NumPy array.
+
+    Examples
+    --------
+    First we create a file for the example.
+
+    >>> s1 = '1.0,2.0,3.0\n4.0,5.0,6.0\n'
+    >>> with open('example1.csv', 'w') as f:
+    ...     f.write(s1)
+    >>> a1 = read_from_filename('example1.csv')
+    >>> a1
+    array([[1., 2., 3.],
+           [4., 5., 6.]])
+
+    The second example has columns with different data types, so a
+    one-dimensional array with a structured data type is returned.
+    The tab character is used as the field delimiter.
+
+    >>> s2 = '1.0\t10\talpha\n2.3\t25\tbeta\n4.5\t16\tgamma\n'
+    >>> with open('example2.tsv', 'w') as f:
+    ...     f.write(s2)
+    >>> a2 = read_from_filename('example2.tsv', delimiter='\t')
+    >>> a2
+    array([(1. , 10, b'alpha'), (2.3, 25, b'beta'), (4.5, 16, b'gamma')],
+          dtype=[('f0', '<f8'), ('f1', 'u1'), ('f2', 'S5')])
+    """
+    # Handle special 'bytes' keyword for encoding
+    byte_converters = False
+    if encoding == 'bytes':
+        encoding = None
+        byte_converters = True
+
+    if dtype is None:
+        raise TypeError("a dtype must be provided.")
+    dtype = np.dtype(dtype)
+
+    read_dtype_via_object_chunks = None
+    if dtype.kind in 'SUM' and (
+            dtype == "S0" or dtype == "U0" or dtype == "M8" or dtype == 'm8'):
+        # This is a legacy "flexible" dtype.  We do not truly support
+        # parametric dtypes currently (no dtype discovery step in the core),
+        # but have to support these for backward compatibility.
+        read_dtype_via_object_chunks = dtype
+        dtype = np.dtype(object)
+
+    if usecols is not None:
+        # Allow usecols to be a single int or a sequence of ints, the C-code
+        # handles the rest
+        try:
+            usecols = list(usecols)
+        except TypeError:
+            usecols = [usecols]
+
+    _ensure_ndmin_ndarray_check_param(ndmin)
+
+    if comment is None:
+        comments = None
+    else:
+        # assume comments are a sequence of strings
+        if "" in comment:
+            raise ValueError(
+                "comments cannot be an empty string. Use comments=None to "
+                "disable comments."
+            )
+        comments = tuple(comment)
+        comment = None
+        if len(comments) == 0:
+            comments = None  # No comments at all
+        elif len(comments) == 1:
+            # If there is only one comment, and that comment has one character,
+            # the normal parsing can deal with it just fine.
+            if isinstance(comments[0], str) and len(comments[0]) == 1:
+                comment = comments[0]
+                comments = None
+        else:
+            # Input validation if there are multiple comment characters
+            if delimiter in comments:
+                raise TypeError(
+                    f"Comment characters '{comments}' cannot include the "
+                    f"delimiter '{delimiter}'"
+                )
+
+    # comment is now either a 1 or 0 character string or a tuple:
+    if comments is not None:
+        # Note: An earlier version support two character comments (and could
+        #       have been extended to multiple characters, we assume this is
+        #       rare enough to not optimize for.
+        if quote is not None:
+            raise ValueError(
+                "when multiple comments or a multi-character comment is "
+                "given, quotes are not supported.  In this case quotechar "
+                "must be set to None.")
+
+    if len(imaginary_unit) != 1:
+        raise ValueError('len(imaginary_unit) must be 1.')
+
+    _check_nonneg_int(skiplines)
+    if max_rows is not None:
+        _check_nonneg_int(max_rows)
+    else:
+        # Passing -1 to the C code means "read the entire file".
+        max_rows = -1
+
+    fh_closing_ctx = contextlib.nullcontext()
+    filelike = False
+    try:
+        if isinstance(fname, os.PathLike):
+            fname = os.fspath(fname)
+        if isinstance(fname, str):
+            fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
+            if encoding is None:
+                encoding = getattr(fh, 'encoding', 'latin1')
+
+            fh_closing_ctx = contextlib.closing(fh)
+            data = fh
+            filelike = True
+        else:
+            if encoding is None:
+                encoding = getattr(fname, 'encoding', 'latin1')
+            data = iter(fname)
+    except TypeError as e:
+        raise ValueError(
+            f"fname must be a string, filehandle, list of strings,\n"
+            f"or generator. Got {type(fname)} instead.") from e
+
+    with fh_closing_ctx:
+        if comments is not None:
+            if filelike:
+                data = iter(data)
+                filelike = False
+            data = _preprocess_comments(data, comments, encoding)
+
+        if read_dtype_via_object_chunks is None:
+            arr = _load_from_filelike(
+                data, delimiter=delimiter, comment=comment, quote=quote,
+                imaginary_unit=imaginary_unit,
+                usecols=usecols, skiplines=skiplines, max_rows=max_rows,
+                converters=converters, dtype=dtype,
+                encoding=encoding, filelike=filelike,
+                byte_converters=byte_converters)
+
+        else:
+            # This branch reads the file into chunks of object arrays and then
+            # casts them to the desired actual dtype.  This ensures correct
+            # string-length and datetime-unit discovery (like `arr.astype()`).
+            # Due to chunking, certain error reports are less clear, currently.
+            if filelike:
+                data = iter(data)  # cannot chunk when reading from file
+
+            c_byte_converters = False
+            if read_dtype_via_object_chunks == "S":
+                c_byte_converters = True  # Use latin1 rather than ascii
+
+            chunks = []
+            while max_rows != 0:
+                if max_rows < 0:
+                    chunk_size = _loadtxt_chunksize
+                else:
+                    chunk_size = min(_loadtxt_chunksize, max_rows)
+
+                next_arr = _load_from_filelike(
+                    data, delimiter=delimiter, comment=comment, quote=quote,
+                    imaginary_unit=imaginary_unit,
+                    usecols=usecols, skiplines=skiplines, max_rows=max_rows,
+                    converters=converters, dtype=dtype,
+                    encoding=encoding, filelike=filelike,
+                    byte_converters=byte_converters,
+                    c_byte_converters=c_byte_converters)
+                # Cast here already.  We hope that this is better even for
+                # large files because the storage is more compact.  It could
+                # be adapted (in principle the concatenate could cast).
+                chunks.append(next_arr.astype(read_dtype_via_object_chunks))
+
+                skiprows = 0  # Only have to skip for first chunk
+                if max_rows >= 0:
+                    max_rows -= chunk_size
+                if len(next_arr) < chunk_size:
+                    # There was less data than requested, so we are done.
+                    break
+
+            # Need at least one chunk, but if empty, the last one may have
+            # the wrong shape.
+            if len(chunks) > 1 and len(chunks[-1]) == 0:
+                del chunks[-1]
+            if len(chunks) == 1:
+                arr = chunks[0]
+            else:
+                arr = np.concatenate(chunks, axis=0)
+
+    # NOTE: ndmin works as advertised for structured dtypes, but normally
+    #       these would return a 1D result plus the structured dimension,
+    #       so ndmin=2 adds a third dimension even when no squeezing occurs.
+    #       A `squeeze=False` could be a better solution (pandas uses squeeze).
+    arr = _ensure_ndmin_ndarray(arr, ndmin=ndmin)
+
+    if arr.shape:
+        if arr.shape[0] == 0:
+            warnings.warn(
+                f'loadtxt: input contained no data: "{fname}"',
+                category=UserWarning,
+                stacklevel=3
+            )
+
+    if unpack:
+        # Unpack structured dtypes if requested:
+        dt = arr.dtype
+        if dt.names is not None:
+            # For structured arrays, return an array for each field.
+            return [arr[field] for field in dt.names]
+        else:
+            return arr.T
+    else:
+        return arr
+
+
 @set_array_function_like_doc
 @set_module('numpy')
 def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             converters=None, skiprows=0, usecols=None, unpack=False,
-            ndmin=0, encoding='bytes', max_rows=None, *, like=None):
+            ndmin=0, encoding='bytes', max_rows=None, *, quotechar=None,
+            like=None):
     r"""
     Load data from a text file.
 
@@ -882,19 +1102,20 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         each row will be interpreted as an element of the array.  In this
         case, the number of columns used must match the number of fields in
         the data-type.
-    comments : str or sequence of str, optional
+    comments : str or sequence of str or None, optional
         The characters or list of characters used to indicate the start of a
         comment. None implies no comments. For backwards compatibility, byte
         strings will be decoded as 'latin1'. The default is '#'.
     delimiter : str, optional
         The string used to separate values. For backwards compatibility, byte
         strings will be decoded as 'latin1'. The default is whitespace.
-    converters : dict, optional
-        A dictionary mapping column number to a function that will parse the
-        column string into the desired value.  E.g., if column 0 is a date
-        string: ``converters = {0: datestr2num}``.  Converters can also be
-        used to provide a default value for missing data (but see also
-        `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``.
+    converters : dict or callable, optional
+        A function to parse all columns strings into the desired value, or
+        a dictionary mapping column number to a parser function.
+        E.g. if column 0 is a date string: ``converters = {0: datestr2num}``.
+        Converters can also be used to provide a default value for missing
+        data, e.g. ``converters = lambda s: float(s.strip() or 0)`` will
+        convert empty fields to 0.
         Default: None.
     skiprows : int, optional
         Skip the first `skiprows` lines, including comments; default: 0.
@@ -932,6 +1153,16 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         is to read all the lines.
 
         .. versionadded:: 1.16.0
+    quotechar : unicode character or None, optional
+        The character used to denote the start and end of a quoted item.
+        Occurrences of the delimiter or comment characters are ignored within
+        a quoted item. The default value is ``quotechar=None``, which means
+        quoting support is disabled.
+
+        If two consecutive instances of `quotechar` are found within a quoted
+        field, the first is treated as an escape character. See examples.
+
+        .. versionadded:: 1.23.0
     ${ARRAY_FUNCTION_LIKE}
 
         .. versionadded:: 1.20.0
@@ -979,6 +1210,29 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     >>> y
     array([2., 4.])
 
+    The `converters` argument is used to specify functions to preprocess the
+    text prior to parsing. `converters` can be a dictionary that maps
+    preprocessing functions to each column:
+
+    >>> s = StringIO("1.618, 2.296\n3.141, 4.669\n")
+    >>> conv = {
+    ...     0: lambda x: np.floor(float(x)),  # conversion fn for column 0
+    ...     1: lambda x: np.ceil(float(x)),  # conversion fn for column 1
+    ... }
+    >>> np.loadtxt(s, delimiter=",", converters=conv)
+    array([[1., 3.],
+           [3., 5.]])
+
+    `converters` can be a callable instead of a dictionary, in which case it
+    is applied to all columns:
+
+    >>> s = StringIO("0xDE 0xAD\n0xC0 0xDE")
+    >>> import functools
+    >>> conv = functools.partial(int, base=16)
+    >>> np.loadtxt(s, converters=conv)
+    array([[222., 173.],
+           [192., 222.]])
+
     This example shows how `converters` can be used to convert a field
     with a trailing minus sign into a negative number.
 
@@ -986,242 +1240,90 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     >>> def conv(fld):
     ...     return -float(fld[:-1]) if fld.endswith(b'-') else float(fld)
     ...
-    >>> np.loadtxt(s, converters={0: conv, 1: conv})
+    >>> np.loadtxt(s, converters=conv)
     array([[ 10.01, -31.25],
            [ 19.22,  64.31],
            [-17.57,  63.94]])
-    """
-
-    if like is not None:
-        return _loadtxt_with_like(
-            fname, dtype=dtype, comments=comments, delimiter=delimiter,
-            converters=converters, skiprows=skiprows, usecols=usecols,
-            unpack=unpack, ndmin=ndmin, encoding=encoding,
-            max_rows=max_rows, like=like
-        )
 
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    # Nested functions used by loadtxt.
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    Using a callable as the converter can be particularly useful for handling
+    values with different formatting, e.g. floats with underscores:
 
-    def split_line(line: str):
-        """Chop off comments, strip, and split at delimiter."""
-        for comment in comments:  # Much faster than using a single regex.
-            line = line.split(comment, 1)[0]
-        line = line.strip('\r\n')
-        return line.split(delimiter) if line else []
+    >>> s = StringIO("1 2.7 100_000")
+    >>> np.loadtxt(s, converters=float)
+    array([1.e+00, 2.7e+00, 1.e+05])
 
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-    # Main body of loadtxt.
-    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-    _ensure_ndmin_ndarray_check_param(ndmin)
-
-    # Type conversions for Py3 convenience
-    if comments is not None:
-        if isinstance(comments, (str, bytes)):
-            comments = [comments]
-        comments = [_decode_line(x) for x in comments]
-    else:
-        comments = []
-
-    if delimiter is not None:
-        delimiter = _decode_line(delimiter)
-
-    user_converters = converters
-
-    byte_converters = False
-    if encoding == 'bytes':
-        encoding = None
-        byte_converters = True
-
-    if usecols is not None:
-        # Copy usecols, allowing it to be a single int or a sequence of ints.
-        try:
-            usecols = list(usecols)
-        except TypeError:
-            usecols = [usecols]
-        for i, col_idx in enumerate(usecols):
-            try:
-                usecols[i] = opindex(col_idx)  # Cast to builtin int now.
-            except TypeError as e:
-                e.args = (
-                    "usecols must be an int or a sequence of ints but "
-                    "it contains at least one element of type %s" %
-                    type(col_idx),
-                    )
-                raise
-        if len(usecols) > 1:
-            usecols_getter = itemgetter(*usecols)
-        else:
-            # Get an iterable back, even if using a single column.
-            usecols_getter = lambda obj, c=usecols[0]: [obj[c]]
-    else:
-        usecols_getter = None
+    This idea can be extended to automatically handle values specified in
+    many different formats:
 
-    # Make sure we're dealing with a proper dtype
-    dtype = np.dtype(dtype)
-    defconv = _getconv(dtype)
+    >>> def conv(val):
+    ...     try:
+    ...         return float(val)
+    ...     except ValueError:
+    ...         return float.fromhex(val)
+    >>> s = StringIO("1, 2.5, 3_000, 0b4, 0x1.4000000000000p+2")
+    >>> np.loadtxt(s, delimiter=",", converters=conv, encoding=None)
+    array([1.0e+00, 2.5e+00, 3.0e+03, 1.8e+02, 5.0e+00])
 
-    dtype_types, packer = _loadtxt_flatten_dtype_internal(dtype)
+    Note that with the default ``encoding="bytes"``, the inputs to the
+    converter function are latin-1 encoded byte strings. To deactivate the
+    implicit encoding prior to conversion, use ``encoding=None``
 
-    fh_closing_ctx = contextlib.nullcontext()
-    try:
-        if isinstance(fname, os_PathLike):
-            fname = os_fspath(fname)
-        if _is_string_like(fname):
-            fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
-            fencoding = getattr(fh, 'encoding', 'latin1')
-            line_iter = iter(fh)
-            fh_closing_ctx = contextlib.closing(fh)
-        else:
-            line_iter = iter(fname)
-            fencoding = getattr(fname, 'encoding', 'latin1')
-            try:
-                first_line = next(line_iter)
-            except StopIteration:
-                pass  # Nothing matters if line_iter is empty.
-            else:
-                # Put first_line back.
-                line_iter = itertools.chain([first_line], line_iter)
-                if isinstance(first_line, bytes):
-                    # Using latin1 matches _decode_line's behavior.
-                    decoder = methodcaller(
-                        "decode",
-                        encoding if encoding is not None else "latin1")
-                    line_iter = map(decoder, line_iter)
-    except TypeError as e:
-        raise ValueError(
-            f"fname must be a string, filehandle, list of strings,\n"
-            f"or generator. Got {type(fname)} instead."
-        ) from e
+    >>> s = StringIO('10.01 31.25-\n19.22 64.31\n17.57- 63.94')
+    >>> conv = lambda x: -float(x[:-1]) if x.endswith('-') else float(x)
+    >>> np.loadtxt(s, converters=conv, encoding=None)
+    array([[ 10.01, -31.25],
+           [ 19.22,  64.31],
+           [-17.57,  63.94]])
 
-    with fh_closing_ctx:
+    Support for quoted fields is enabled with the `quotechar` parameter.
+    Comment and delimiter characters are ignored when they appear within a
+    quoted item delineated by `quotechar`:
 
-        # input may be a python2 io stream
-        if encoding is not None:
-            fencoding = encoding
-        # we must assume local encoding
-        # TODO emit portability warning?
-        elif fencoding is None:
-            import locale
-            fencoding = locale.getpreferredencoding()
-
-        # Skip the first `skiprows` lines
-        for i in range(skiprows):
-            next(line_iter)
-
-        # Read until we find a line with some values, and use it to determine
-        # the need for decoding and estimate the number of columns.
-        for first_line in line_iter:
-            ncols = len(usecols or split_line(first_line))
-            if ncols:
-                # Put first_line back.
-                line_iter = itertools.chain([first_line], line_iter)
-                break
-        else:  # End of lines reached
-            ncols = len(usecols or [])
-            warnings.warn('loadtxt: Empty input file: "%s"' % fname,
-                          stacklevel=2)
-
-        line_iter = itertools.islice(line_iter, max_rows)
-        lineno_words_iter = filter(
-            itemgetter(1),  # item[1] is words; filter skips empty lines.
-            enumerate(map(split_line, line_iter), 1 + skiprows))
-
-        # Now that we know ncols, create the default converters list, and
-        # set packing, if necessary.
-        if len(dtype_types) > 1:
-            # We're dealing with a structured array, each field of
-            # the dtype matches a column
-            converters = [_getconv(dt) for dt in dtype_types]
-        else:
-            # All fields have the same dtype; use specialized packers which are
-            # much faster than those using _loadtxt_pack_items.
-            converters = [defconv for i in range(ncols)]
-            if ncols == 1:
-                packer = itemgetter(0)
-            else:
-                def packer(row): return row
+    >>> s = StringIO('"alpha, #42", 10.0\n"beta, #64", 2.0\n')
+    >>> dtype = np.dtype([("label", "U12"), ("value", float)])
+    >>> np.loadtxt(s, dtype=dtype, delimiter=",", quotechar='"')
+    array([('alpha, #42', 10.), ('beta, #64',  2.)],
+          dtype=[('label', '<U12'), ('value', '<f8')])
 
-        # By preference, use the converters specified by the user
-        for i, conv in (user_converters or {}).items():
-            if usecols:
-                try:
-                    i = usecols.index(i)
-                except ValueError:
-                    # Unused converter specified
-                    continue
-            if byte_converters:
-                # converters may use decode to workaround numpy's old
-                # behaviour, so encode the string again (converters are only
-                # called with strings) before passing to the user converter.
-                def tobytes_first(conv, x):
-                    return conv(x.encode("latin1"))
-                converters[i] = functools.partial(tobytes_first, conv)
-            else:
-                converters[i] = conv
-
-        fencode = methodcaller("encode", fencoding)
-        converters = [conv if conv is not bytes else fencode
-                      for conv in converters]
-        if len(set(converters)) == 1:
-            # Optimize single-type data. Note that this is only reached if
-            # `_getconv` returns equal callables (i.e. not local lambdas) on
-            # equal dtypes.
-            def convert_row(vals, _conv=converters[0]):
-                return [*map(_conv, vals)]
-        else:
-            def convert_row(vals):
-                return [conv(val) for conv, val in zip(converters, vals)]
-
-        # read data in chunks and fill it into an array via resize
-        # over-allocating and shrinking the array later may be faster but is
-        # probably not relevant compared to the cost of actually reading and
-        # converting the data
-        X = None
-        while True:
-            chunk = []
-            for lineno, words in itertools.islice(
-                    lineno_words_iter, _loadtxt_chunksize):
-                if usecols_getter is not None:
-                    words = usecols_getter(words)
-                elif len(words) != ncols:
-                    raise ValueError(
-                        f"Wrong number of columns at line {lineno}")
-                # Convert each value according to its column, then pack it
-                # according to the dtype's nesting, and store it.
-                chunk.append(packer(convert_row(words)))
-            if not chunk:  # The islice is empty, i.e. we're done.
-                break
+    Two consecutive quote characters within a quoted field are treated as a
+    single escaped character:
 
-            if X is None:
-                X = np.array(chunk, dtype)
-            else:
-                nshape = list(X.shape)
-                pos = nshape[0]
-                nshape[0] += len(chunk)
-                X.resize(nshape, refcheck=False)
-                X[pos:, ...] = chunk
+    >>> s = StringIO('"Hello, my name is ""Monty""!"')
+    >>> np.loadtxt(s, dtype="U", delimiter=",", quotechar='"')
+    array('Hello, my name is "Monty"!', dtype='<U26')
 
-    if X is None:
-        X = np.array([], dtype)
+    """
 
-    # Multicolumn data are returned with shape (1, N, M), i.e.
-    # (1, 1, M) for a single row - remove the singleton dimension there
-    if X.ndim == 3 and X.shape[:2] == (1, 1):
-        X.shape = (1, -1)
+    if like is not None:
+        return _loadtxt_with_like(
+            fname, dtype=dtype, comments=comments, delimiter=delimiter,
+            converters=converters, skiprows=skiprows, usecols=usecols,
+            unpack=unpack, ndmin=ndmin, encoding=encoding,
+            max_rows=max_rows, like=like
+        )
 
-    X = _ensure_ndmin_ndarray(X, ndmin=ndmin)
+    if isinstance(delimiter, bytes):
+        delimiter.decode("latin1")
 
-    if unpack:
-        if len(dtype_types) > 1:
-            # For structured arrays, return an array for each field.
-            return [X[field] for field in dtype.names]
-        else:
-            return X.T
-    else:
-        return X
+    if dtype is None:
+        dtype = np.float64
+
+    comment = comments
+    # Control character type conversions for Py3 convenience
+    if comment is not None:
+        if isinstance(comment, (str, bytes)):
+            comment = [comment]
+        comment = [
+            x.decode('latin1') if isinstance(x, bytes) else x for x in comment]
+    if isinstance(delimiter, bytes):
+        delimiter = delimiter.decode('latin1')
+
+    arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter,
+                converters=converters, skiplines=skiprows, usecols=usecols,
+                unpack=unpack, ndmin=ndmin, encoding=encoding,
+                max_rows=max_rows, quote=quotechar)
+
+    return arr
 
 
 _loadtxt_with_like = array_function_dispatch(
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index b9b10bc06..a2758123b 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -695,7 +695,7 @@ class TestLoadTxt(LoadTxtBase):
         assert_array_equal(x, a)
 
         d = TextIO()
-        d.write('M 64.0 75.0\nF 25.0 60.0')
+        d.write('M 64 75.0\nF 25 60.0')
         d.seek(0)
         mydescriptor = {'names': ('gender', 'age', 'weight'),
                         'formats': ('S1', 'i4', 'f4')}
@@ -779,6 +779,8 @@ class TestLoadTxt(LoadTxtBase):
         a = np.array([[1, 2, 3], [4, 5, 6]], int)
         assert_array_equal(x, a)
 
+    @pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                        reason="PyPy bug in error formatting")
     def test_comments_multi_chars(self):
         c = TextIO()
         c.write('/* comment\n1,2,3,5\n')
@@ -871,16 +873,27 @@ class TestLoadTxt(LoadTxtBase):
         bogus_idx = 1.5
         assert_raises_regex(
             TypeError,
-            '^usecols must be.*%s' % type(bogus_idx),
+            '^usecols must be.*%s' % type(bogus_idx).__name__,
             np.loadtxt, c, usecols=bogus_idx
             )
 
         assert_raises_regex(
             TypeError,
-            '^usecols must be.*%s' % type(bogus_idx),
+            '^usecols must be.*%s' % type(bogus_idx).__name__,
             np.loadtxt, c, usecols=[0, bogus_idx, 0]
             )
 
+    def test_bad_usecols(self):
+        with pytest.raises(OverflowError):
+            np.loadtxt(["1\n"], usecols=[2**64], delimiter=",")
+        with pytest.raises((ValueError, OverflowError)):
+            # Overflow error on 32bit platforms
+            np.loadtxt(["1\n"], usecols=[2**62], delimiter=",")
+        with pytest.raises(TypeError,
+                match="If a structured dtype .*. But 1 usecols were given and "
+                      "the number of fields is 3."):
+            np.loadtxt(["1,1\n"], dtype="i,(2)i", usecols=[0], delimiter=",")
+
     def test_fancy_dtype(self):
         c = TextIO()
         c.write('1,2,3.0\n4,5,6.0\n')
@@ -919,8 +932,7 @@ class TestLoadTxt(LoadTxtBase):
             assert_array_equal(x, a)
 
     def test_empty_file(self):
-        with suppress_warnings() as sup:
-            sup.filter(message="loadtxt: Empty input file:")
+        with pytest.warns(UserWarning, match="input contained no data"):
             c = TextIO()
             x = np.loadtxt(c)
             assert_equal(x.shape, (0,))
@@ -981,29 +993,32 @@ class TestLoadTxt(LoadTxtBase):
         c.write(inp)
         for dt in [float, np.float32]:
             c.seek(0)
-            res = np.loadtxt(c, dtype=dt)
+            res = np.loadtxt(
+                c, dtype=dt, converters=float.fromhex, encoding="latin1")
             assert_equal(res, tgt, err_msg="%s" % dt)
 
+    @pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                        reason="PyPy bug in error formatting")
     def test_default_float_converter_no_default_hex_conversion(self):
         """
         Ensure that fromhex is only used for values with the correct prefix and
         is not called by default. Regression test related to gh-19598.
         """
         c = TextIO("a b c")
-        with pytest.raises(
-            ValueError, match="could not convert string to float"
-        ):
+        with pytest.raises(ValueError,
+                match=".*convert string 'a' to float64 at row 0, column 1"):
             np.loadtxt(c)
 
+    @pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                        reason="PyPy bug in error formatting")
     def test_default_float_converter_exception(self):
         """
         Ensure that the exception message raised during failed floating point
         conversion is correct. Regression test related to gh-19598.
         """
         c = TextIO("qrs tuv")  # Invalid values for default float converter
-        with pytest.raises(
-            ValueError, match="could not convert string to float"
-        ):
+        with pytest.raises(ValueError,
+                match="could not convert string 'qrs' to float64"):
             np.loadtxt(c)
 
     def test_from_complex(self):
@@ -1099,8 +1114,7 @@ class TestLoadTxt(LoadTxtBase):
         assert_(x.shape == (3,))
 
         # Test ndmin kw with empty file.
-        with suppress_warnings() as sup:
-            sup.filter(message="loadtxt: Empty input file:")
+        with pytest.warns(UserWarning, match="input contained no data"):
             f = TextIO()
             assert_(np.loadtxt(f, ndmin=2).shape == (0, 1,))
             assert_(np.loadtxt(f, ndmin=1).shape == (0,))
@@ -1132,8 +1146,8 @@ class TestLoadTxt(LoadTxtBase):
     @pytest.mark.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968',
                         reason="Wrong preferred encoding")
     def test_binary_load(self):
-        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
-                b"20,2,3,\xc3\x95scar\n\r"
+        butf8 = b"5,6,7,\xc3\x95scarscar\r\n15,2,3,hello\r\n"\
+                b"20,2,3,\xc3\x95scar\r\n"
         sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
         with temppath() as path:
             with open(path, "wb") as f:
@@ -1196,6 +1210,30 @@ class TestLoadTxt(LoadTxtBase):
         a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int)
         assert_array_equal(x, a)
 
+    @pytest.mark.parametrize(["skip", "data"], [
+            (1, ["ignored\n", "1,2\n", "\n", "3,4\n"]),
+            # "Bad" lines that do not end in newlines:
+            (1, ["ignored", "1,2", "", "3,4"]),
+            (1, StringIO("ignored\n1,2\n\n3,4")),
+            # Same as above, but do not skip any lines:
+            (0, ["-1,0\n", "1,2\n", "\n", "3,4\n"]),
+            (0, ["-1,0", "1,2", "", "3,4"]),
+            (0, StringIO("-1,0\n1,2\n\n3,4"))])
+    def test_max_rows_empty_lines(self, skip, data):
+        with pytest.warns(UserWarning,
+                    match=f"Input line 3.*max_rows={3-skip}"):
+            res = np.loadtxt(data, dtype=int, skiprows=skip, delimiter=",",
+                             max_rows=3-skip)
+            assert_array_equal(res, [[-1, 0], [1, 2], [3, 4]][skip:])
+
+        if isinstance(data, StringIO):
+            data.seek(0)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", UserWarning)
+            with pytest.raises(UserWarning):
+                np.loadtxt(data, dtype=int, skiprows=skip, delimiter=",",
+                           max_rows=3-skip)
 
 class Testfromregex:
     def test_record(self):
@@ -2397,6 +2435,13 @@ M   33  21.99
         assert_equal(test['f1'], 17179869184)
         assert_equal(test['f2'], 1024)
 
+    def test_unpack_float_data(self):
+        txt = TextIO("1,2,3\n4,5,6\n7,8,9\n0.0,1.0,2.0")
+        a, b, c = np.loadtxt(txt, delimiter=",", unpack=True)
+        assert_array_equal(a, np.array([1.0, 4.0, 7.0, 0.0]))
+        assert_array_equal(b, np.array([2.0, 5.0, 8.0, 1.0]))
+        assert_array_equal(c, np.array([3.0, 6.0, 9.0, 2.0]))
+
     def test_unpack_structured(self):
         # Regression test for gh-4341
         # Unpacking should work on structured arrays
diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py
new file mode 100644
index 000000000..cca328b16
--- /dev/null
+++ b/numpy/lib/tests/test_loadtxt.py
@@ -0,0 +1,1002 @@
+"""
+Tests specific to `np.loadtxt` added during the move of loadtxt to be backed
+by C code.
+These tests complement those found in `test_io.py`.
+"""
+
+import sys
+import pytest
+from tempfile import NamedTemporaryFile, mkstemp
+from io import StringIO
+
+import numpy as np
+from numpy.ma.testutils import assert_equal
+from numpy.testing import assert_array_equal, HAS_REFCOUNT, IS_PYPY
+
+
+def test_scientific_notation():
+    """Test that both 'e' and 'E' are parsed correctly."""
+    data = StringIO(
+        (
+            "1.0e-1,2.0E1,3.0\n"
+            "4.0e-2,5.0E-1,6.0\n"
+            "7.0e-3,8.0E1,9.0\n"
+            "0.0e-4,1.0E-1,2.0"
+        )
+    )
+    expected = np.array(
+        [[0.1, 20., 3.0], [0.04, 0.5, 6], [0.007, 80., 9], [0, 0.1, 2]]
+    )
+    assert_array_equal(np.loadtxt(data, delimiter=","), expected)
+
+
+@pytest.mark.parametrize("comment", ["..", "//", "@-", "this is a comment:"])
+def test_comment_multiple_chars(comment):
+    content = "# IGNORE\n1.5, 2.5# ABC\n3.0,4.0# XXX\n5.5,6.0\n"
+    txt = StringIO(content.replace("#", comment))
+    a = np.loadtxt(txt, delimiter=",", comments=comment)
+    assert_equal(a, [[1.5, 2.5], [3.0, 4.0], [5.5, 6.0]])
+
+
+@pytest.fixture
+def mixed_types_structured():
+    """
+    Fixture providing hetergeneous input data with a structured dtype, along
+    with the associated structured array.
+    """
+    data = StringIO(
+        (
+            "1000;2.4;alpha;-34\n"
+            "2000;3.1;beta;29\n"
+            "3500;9.9;gamma;120\n"
+            "4090;8.1;delta;0\n"
+            "5001;4.4;epsilon;-99\n"
+            "6543;7.8;omega;-1\n"
+        )
+    )
+    dtype = np.dtype(
+        [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
+    )
+    expected = np.array(
+        [
+            (1000, 2.4, "alpha", -34),
+            (2000, 3.1, "beta", 29),
+            (3500, 9.9, "gamma", 120),
+            (4090, 8.1, "delta", 0),
+            (5001, 4.4, "epsilon", -99),
+            (6543, 7.8, "omega", -1)
+        ],
+        dtype=dtype
+    )
+    return data, dtype, expected
+
+
+@pytest.mark.parametrize('skiprows', [0, 1, 2, 3])
+def test_structured_dtype_and_skiprows_no_empty_lines(
+        skiprows, mixed_types_structured):
+    data, dtype, expected = mixed_types_structured
+    a = np.loadtxt(data, dtype=dtype, delimiter=";", skiprows=skiprows)
+    assert_array_equal(a, expected[skiprows:])
+
+
+def test_unpack_structured(mixed_types_structured):
+    data, dtype, expected = mixed_types_structured
+
+    a, b, c, d = np.loadtxt(data, dtype=dtype, delimiter=";", unpack=True)
+    assert_array_equal(a, expected["f0"])
+    assert_array_equal(b, expected["f1"])
+    assert_array_equal(c, expected["f2"])
+    assert_array_equal(d, expected["f3"])
+
+
+def test_structured_dtype_with_shape():
+    dtype = np.dtype([("a", "u1", 2), ("b", "u1", 2)])
+    data = StringIO("0,1,2,3\n6,7,8,9\n")
+    expected = np.array([((0, 1), (2, 3)), ((6, 7), (8, 9))], dtype=dtype)
+    assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dtype), expected)
+
+
+def test_structured_dtype_with_multi_shape():
+    dtype = np.dtype([("a", "u1", (2, 2))])
+    data = StringIO("0 1 2 3\n")
+    expected = np.array([(((0, 1), (2, 3)),)], dtype=dtype)
+    assert_array_equal(np.loadtxt(data, dtype=dtype), expected)
+
+
+def test_nested_structured_subarray():
+    # Test from gh-16678
+    point = np.dtype([('x', float), ('y', float)])
+    dt = np.dtype([('code', int), ('points', point, (2,))])
+    data = StringIO("100,1,2,3,4\n200,5,6,7,8\n")
+    expected = np.array(
+        [
+            (100, [(1., 2.), (3., 4.)]),
+            (200, [(5., 6.), (7., 8.)]),
+        ],
+        dtype=dt
+    )
+    assert_array_equal(np.loadtxt(data, dtype=dt, delimiter=","), expected)
+
+
+def test_structured_dtype_offsets():
+    # An aligned structured dtype will have additional padding
+    dt = np.dtype("i1, i4, i1, i4, i1, i4", align=True)
+    data = StringIO("1,2,3,4,5,6\n7,8,9,10,11,12\n")
+    expected = np.array([(1, 2, 3, 4, 5, 6), (7, 8, 9, 10, 11, 12)], dtype=dt)
+    assert_array_equal(np.loadtxt(data, delimiter=",", dtype=dt), expected)
+
+
+@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
+def test_exception_negative_row_limits(param):
+    """skiprows and max_rows should raise for negative parameters."""
+    with pytest.raises(ValueError, match="argument must be nonnegative"):
+        np.loadtxt("foo.bar", **{param: -3})
+
+
+@pytest.mark.parametrize("param", ("skiprows", "max_rows"))
+def test_exception_noninteger_row_limits(param):
+    with pytest.raises(TypeError, match="argument must be an integer"):
+        np.loadtxt("foo.bar", **{param: 1.0})
+
+
+@pytest.mark.parametrize(
+    "data, shape",
+    [
+        ("1 2 3 4 5\n", (1, 5)),  # Single row
+        ("1\n2\n3\n4\n5\n", (5, 1)),  # Single column
+    ]
+)
+def test_ndmin_single_row_or_col(data, shape):
+    arr = np.array([1, 2, 3, 4, 5])
+    arr2d = arr.reshape(shape)
+
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int), arr)
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int, ndmin=0), arr)
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int, ndmin=1), arr)
+    assert_array_equal(np.loadtxt(StringIO(data), dtype=int, ndmin=2), arr2d)
+
+
+@pytest.mark.parametrize("badval", [-1, 3, None, "plate of shrimp"])
+def test_bad_ndmin(badval):
+    with pytest.raises(ValueError, match="Illegal value of ndmin keyword"):
+        np.loadtxt("foo.bar", ndmin=badval)
+
+
+@pytest.mark.parametrize(
+    "ws",
+    (
+            "\t",  # tab
+            "\u2003",  # em
+            "\u00A0",  # non-break
+            "\u3000",  # ideographic space
+    )
+)
+def test_blank_lines_spaces_delimit(ws):
+    txt = StringIO(
+        f"1 2{ws}30\n\n4 5 60\n  {ws}  \n7 8 {ws} 90\n  # comment\n3 2 1"
+    )
+    # NOTE: It is unclear that the `  # comment` should succeed. Except
+    #       for delimiter=None, which should use any whitespace (and maybe
+    #       should just be implemented closer to Python
+    expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
+    assert_equal(
+        np.loadtxt(txt, dtype=int, delimiter=None, comments="#"), expected
+    )
+
+
+def test_blank_lines_normal_delimiter():
+    txt = StringIO('1,2,30\n\n4,5,60\n\n7,8,90\n# comment\n3,2,1')
+    expected = np.array([[1, 2, 30], [4, 5, 60], [7, 8, 90], [3, 2, 1]])
+    assert_equal(
+        np.loadtxt(txt, dtype=int, delimiter=',', comments="#"), expected
+    )
+
+
+@pytest.mark.parametrize("dtype", (float, object))
+def test_maxrows_no_blank_lines(dtype):
+    txt = StringIO("1.5,2.5\n3.0,4.0\n5.5,6.0")
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", max_rows=2)
+    assert_equal(res.dtype, dtype)
+    assert_equal(res, np.array([["1.5", "2.5"], ["3.0", "4.0"]], dtype=dtype))
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+@pytest.mark.parametrize("dtype", (np.dtype("f8"), np.dtype("i2")))
+def test_exception_message_bad_values(dtype):
+    txt = StringIO("1,2\n3,XXX\n5,6")
+    msg = f"could not convert string 'XXX' to {dtype} at row 1, column 2"
+    with pytest.raises(ValueError, match=msg):
+        np.loadtxt(txt, dtype=dtype, delimiter=",")
+
+
+def test_converters_negative_indices():
+    txt = StringIO('1.5,2.5\n3.0,XXX\n5.5,6.0')
+    conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
+    expected = np.array([[1.5, 2.5], [3.0, np.nan], [5.5, 6.0]])
+    res = np.loadtxt(
+        txt, dtype=np.float64, delimiter=",", converters=conv, encoding=None
+    )
+    assert_equal(res, expected)
+
+
+def test_converters_negative_indices_with_usecols():
+    txt = StringIO('1.5,2.5,3.5\n3.0,4.0,XXX\n5.5,6.0,7.5\n')
+    conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
+    expected = np.array([[1.5, 3.5], [3.0, np.nan], [5.5, 7.5]])
+    res = np.loadtxt(
+        txt,
+        dtype=np.float64,
+        delimiter=",",
+        converters=conv,
+        usecols=[0, -1],
+        encoding=None,
+    )
+    assert_equal(res, expected)
+
+    # Second test with variable number of rows:
+    res = np.loadtxt(StringIO('''0,1,2\n0,1,2,3,4'''), delimiter=",",
+                     usecols=[0, -1], converters={-1: (lambda x: -1)})
+    assert_array_equal(res, [[0, -1], [0, -1]])
+
+def test_ragged_usecols():
+    # usecols, and negative ones, work even with varying number of columns.
+    txt = StringIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
+    expected = np.array([[0, 0], [0, 0], [0, 0]])
+    res = np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
+    assert_equal(res, expected)
+
+    txt = StringIO("0,0,XXX\n0\n0,XXX,XXX,0,XXX\n")
+    with pytest.raises(ValueError,
+                match="invalid column index -2 at row 1 with 2 columns"):
+        # There is no -2 column in the second row:
+        np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
+
+
+def test_empty_usecols():
+    txt = StringIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
+    res = np.loadtxt(txt, dtype=np.dtype([]), delimiter=",", usecols=[])
+    assert res.shape == (3,)
+    assert res.dtype == np.dtype([])
+
+
+@pytest.mark.parametrize("c1", ["a", "の", "🫕"])
+@pytest.mark.parametrize("c2", ["a", "の", "🫕"])
+def test_large_unicode_characters(c1, c2):
+    # c1 and c2 span ascii, 16bit and 32bit range.
+    txt = StringIO(f"a,{c1},c,1.0\ne,{c2},2.0,g")
+    res = np.loadtxt(txt, dtype=np.dtype('U12'), delimiter=",")
+    expected = np.array(
+        [f"a,{c1},c,1.0".split(","), f"e,{c2},2.0,g".split(",")],
+        dtype=np.dtype('U12')
+    )
+    assert_equal(res, expected)
+
+
+def test_unicode_with_converter():
+    txt = StringIO("cat,dog\nαβγ,δεζ\nabc,def\n")
+    conv = {0: lambda s: s.upper()}
+    res = np.loadtxt(
+        txt,
+        dtype=np.dtype("U12"),
+        converters=conv,
+        delimiter=",",
+        encoding=None
+    )
+    expected = np.array([['CAT', 'dog'], ['ΑΒΓ', 'δεζ'], ['ABC', 'def']])
+    assert_equal(res, expected)
+
+
+def test_converter_with_structured_dtype():
+    txt = StringIO('1.5,2.5,Abc\n3.0,4.0,dEf\n5.5,6.0,ghI\n')
+    dt = np.dtype([('m', np.int32), ('r', np.float32), ('code', 'U8')])
+    conv = {0: lambda s: int(10*float(s)), -1: lambda s: s.upper()}
+    res = np.loadtxt(txt, dtype=dt, delimiter=",", converters=conv)
+    expected = np.array(
+        [(15, 2.5, 'ABC'), (30, 4.0, 'DEF'), (55, 6.0, 'GHI')], dtype=dt
+    )
+    assert_equal(res, expected)
+
+
+def test_converter_with_unicode_dtype():
+    """
+    With the default 'bytes' encoding, tokens are encoded prior to being
+    passed to the converter. This means that the output of the converter may
+    be bytes instead of unicode as expected by `read_rows`.
+
+    This test checks that outputs from the above scenario are properly decoded
+    prior to parsing by `read_rows`.
+    """
+    txt = StringIO('abc,def\nrst,xyz')
+    conv = bytes.upper
+    res = np.loadtxt(
+            txt, dtype=np.dtype("U3"), converters=conv, delimiter=",")
+    expected = np.array([['ABC', 'DEF'], ['RST', 'XYZ']])
+    assert_equal(res, expected)
+
+
+def test_read_huge_row():
+    row = "1.5, 2.5," * 50000
+    row = row[:-1] + "\n"
+    txt = StringIO(row * 2)
+    res = np.loadtxt(txt, delimiter=",", dtype=float)
+    assert_equal(res, np.tile([1.5, 2.5], (2, 50000)))
+
+
+@pytest.mark.parametrize("dtype", "edfgFDG")
+def test_huge_float(dtype):
+    # Covers a non-optimized path that is rarely taken:
+    field = "0" * 1000 + ".123456789"
+    dtype = np.dtype(dtype)
+    value = np.loadtxt([field], dtype=dtype)[()]
+    assert value == dtype.type("0.123456789")
+
+
+@pytest.mark.parametrize(
+    ("given_dtype", "expected_dtype"),
+    [
+        ("S", np.dtype("S5")),
+        ("U", np.dtype("U5")),
+    ],
+)
+def test_string_no_length_given(given_dtype, expected_dtype):
+    """
+    The given dtype is just 'S' or 'U' with no length. In these cases, the
+    length of the resulting dtype is determined by the longest string found
+    in the file.
+    """
+    txt = StringIO("AAA,5-1\nBBBBB,0-3\nC,4-9\n")
+    res = np.loadtxt(txt, dtype=given_dtype, delimiter=",")
+    expected = np.array(
+        [['AAA', '5-1'], ['BBBBB', '0-3'], ['C', '4-9']], dtype=expected_dtype
+    )
+    assert_equal(res, expected)
+    assert_equal(res.dtype, expected_dtype)
+
+
+def test_float_conversion():
+    """
+    Some tests that the conversion to float64 works as accurately as the
+    Python built-in `float` function. In a naive version of the float parser,
+    these strings resulted in values that were off by an ULP or two.
+    """
+    strings = [
+        '0.9999999999999999',
+        '9876543210.123456',
+        '5.43215432154321e+300',
+        '0.901',
+        '0.333',
+    ]
+    txt = StringIO('\n'.join(strings))
+    res = np.loadtxt(txt)
+    expected = np.array([float(s) for s in strings])
+    assert_equal(res, expected)
+
+
+def test_bool():
+    # Simple test for bool via integer
+    txt = StringIO("1, 0\n10, -1")
+    res = np.loadtxt(txt, dtype=bool, delimiter=",")
+    assert res.dtype == bool
+    assert_array_equal(res, [[True, False], [True, True]])
+    # Make sure we use only 1 and 0 on the byte level:
+    assert_array_equal(res.view(np.uint8), [[1, 0], [1, 1]])
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
+def test_integer_signs(dtype):
+    dtype = np.dtype(dtype)
+    assert np.loadtxt(["+2"], dtype=dtype) == 2
+    if dtype.kind == "u":
+        with pytest.raises(ValueError):
+            np.loadtxt(["-1\n"], dtype=dtype)
+    else:
+        assert np.loadtxt(["-2\n"], dtype=dtype) == -2
+
+    for sign in ["++", "+-", "--", "-+"]:
+        with pytest.raises(ValueError):
+            np.loadtxt([f"{sign}2\n"], dtype=dtype)
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+@pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
+def test_implicit_cast_float_to_int_fails(dtype):
+    txt = StringIO("1.0, 2.1, 3.7\n4, 5, 6")
+    with pytest.raises(ValueError):
+        np.loadtxt(txt, dtype=dtype, delimiter=",")
+
+@pytest.mark.parametrize("dtype", (np.complex64, np.complex128))
+@pytest.mark.parametrize("with_parens", (False, True))
+def test_complex_parsing(dtype, with_parens):
+    s = "(1.0-2.5j),3.75,(7+-5.0j)\n(4),(-19e2j),(0)"
+    if not with_parens:
+        s = s.replace("(", "").replace(")", "")
+
+    res = np.loadtxt(StringIO(s), dtype=dtype, delimiter=",")
+    expected = np.array(
+        [[1.0-2.5j, 3.75, 7-5j], [4.0, -1900j, 0]], dtype=dtype
+    )
+    assert_equal(res, expected)
+
+
+def test_read_from_generator():
+    def gen():
+        for i in range(4):
+            yield f"{i},{2*i},{i**2}"
+
+    res = np.loadtxt(gen(), dtype=int, delimiter=",")
+    expected = np.array([[0, 0, 0], [1, 2, 1], [2, 4, 4], [3, 6, 9]])
+    assert_equal(res, expected)
+
+
+def test_read_from_generator_multitype():
+    def gen():
+        for i in range(3):
+            yield f"{i} {i / 4}"
+
+    res = np.loadtxt(gen(), dtype="i, d", delimiter=" ")
+    expected = np.array([(0, 0.0), (1, 0.25), (2, 0.5)], dtype="i, d")
+    assert_equal(res, expected)
+
+
+def test_read_from_bad_generator():
+    def gen():
+        for entry in ["1,2", b"3, 5", 12738]:
+            yield entry
+
+    with pytest.raises(
+            TypeError, match=r"non-string returned while reading data"):
+        np.loadtxt(gen(), dtype="i, i", delimiter=",")
+
+
+@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
+def test_object_cleanup_on_read_error():
+    sentinel = object()
+    already_read = 0
+
+    def conv(x):
+        nonlocal already_read
+        if already_read > 4999:
+            raise ValueError("failed half-way through!")
+        already_read += 1
+        return sentinel
+
+    txt = StringIO("x\n" * 10000)
+
+    with pytest.raises(ValueError, match="at row 5000, column 1"):
+        np.loadtxt(txt, dtype=object, converters={0: conv})
+
+    assert sys.getrefcount(sentinel) == 2
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+def test_character_not_bytes_compatible():
+    """Test exception when a character cannot be encoded as 'S'."""
+    data = StringIO("–")  # == \u2013
+    with pytest.raises(ValueError):
+        np.loadtxt(data, dtype="S5")
+
+
+@pytest.mark.parametrize("conv", (0, [float], ""))
+def test_invalid_converter(conv):
+    msg = (
+        "converters must be a dictionary mapping columns to converter "
+        "functions or a single callable."
+    )
+    with pytest.raises(TypeError, match=msg):
+        np.loadtxt(StringIO("1 2\n3 4"), converters=conv)
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+def test_converters_dict_raises_non_integer_key():
+    with pytest.raises(TypeError, match="keys of the converters dict"):
+        np.loadtxt(StringIO("1 2\n3 4"), converters={"a": int})
+    with pytest.raises(TypeError, match="keys of the converters dict"):
+        np.loadtxt(StringIO("1 2\n3 4"), converters={"a": int}, usecols=0)
+
+
+@pytest.mark.parametrize("bad_col_ind", (3, -3))
+def test_converters_dict_raises_non_col_key(bad_col_ind):
+    data = StringIO("1 2\n3 4")
+    with pytest.raises(ValueError, match="converter specified for column"):
+        np.loadtxt(data, converters={bad_col_ind: int})
+
+
+def test_converters_dict_raises_val_not_callable():
+    with pytest.raises(TypeError,
+                match="values of the converters dictionary must be callable"):
+        np.loadtxt(StringIO("1 2\n3 4"), converters={0: 1})
+
+
+@pytest.mark.parametrize("q", ('"', "'", "`"))
+def test_quoted_field(q):
+    txt = StringIO(
+        f"{q}alpha, x{q}, 2.5\n{q}beta, y{q}, 4.5\n{q}gamma, z{q}, 5.0\n"
+    )
+    dtype = np.dtype([('f0', 'U8'), ('f1', np.float64)])
+    expected = np.array(
+        [("alpha, x", 2.5), ("beta, y", 4.5), ("gamma, z", 5.0)], dtype=dtype
+    )
+
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar=q)
+    assert_array_equal(res, expected)
+
+
+def test_quote_support_default():
+    """Support for quoted fields is disabled by default."""
+    txt = StringIO('"lat,long", 45, 30\n')
+    dtype = np.dtype([('f0', 'U24'), ('f1', np.float64), ('f2', np.float64)])
+
+    with pytest.raises(ValueError, match="the number of columns changed"):
+        np.loadtxt(txt, dtype=dtype, delimiter=",")
+
+    # Enable quoting support with non-None value for quotechar param
+    txt.seek(0)
+    expected = np.array([("lat,long", 45., 30.)], dtype=dtype)
+
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar='"')
+    assert_array_equal(res, expected)
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+def test_quotechar_multichar_error():
+    txt = StringIO("1,2\n3,4")
+    msg = r".*must be a single unicode character or None"
+    with pytest.raises(TypeError, match=msg):
+        np.loadtxt(txt, delimiter=",", quotechar="''")
+
+
+def test_comment_multichar_error_with_quote():
+    txt = StringIO("1,2\n3,4")
+    msg = (
+        "when multiple comments or a multi-character comment is given, "
+        "quotes are not supported."
+    )
+    with pytest.raises(ValueError, match=msg):
+        np.loadtxt(txt, delimiter=",", comments="123", quotechar='"')
+    with pytest.raises(ValueError, match=msg):
+        np.loadtxt(txt, delimiter=",", comments=["#", "%"], quotechar='"')
+
+    # A single character string in a tuple is unpacked though:
+    res = np.loadtxt(txt, delimiter=",", comments=("#",), quotechar="'")
+    assert_equal(res, [[1, 2], [3, 4]])
+
+
+def test_structured_dtype_with_quotes():
+    data = StringIO(
+        (
+            "1000;2.4;'alpha';-34\n"
+            "2000;3.1;'beta';29\n"
+            "3500;9.9;'gamma';120\n"
+            "4090;8.1;'delta';0\n"
+            "5001;4.4;'epsilon';-99\n"
+            "6543;7.8;'omega';-1\n"
+        )
+    )
+    dtype = np.dtype(
+        [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
+    )
+    expected = np.array(
+        [
+            (1000, 2.4, "alpha", -34),
+            (2000, 3.1, "beta", 29),
+            (3500, 9.9, "gamma", 120),
+            (4090, 8.1, "delta", 0),
+            (5001, 4.4, "epsilon", -99),
+            (6543, 7.8, "omega", -1)
+        ],
+        dtype=dtype
+    )
+    res = np.loadtxt(data, dtype=dtype, delimiter=";", quotechar="'")
+    assert_array_equal(res, expected)
+
+
+def test_quoted_field_is_not_empty():
+    txt = StringIO('1\n\n"4"\n""')
+    expected = np.array(["1", "4", ""], dtype="U1")
+    res = np.loadtxt(txt, delimiter=",", dtype="U1", quotechar='"')
+    assert_equal(res, expected)
+
+def test_quoted_field_is_not_empty_nonstrict():
+    # Same as test_quoted_field_is_not_empty but check that we are not strict
+    # about missing closing quote (this is the `csv.reader` default also)
+    txt = StringIO('1\n\n"4"\n"')
+    expected = np.array(["1", "4", ""], dtype="U1")
+    res = np.loadtxt(txt, delimiter=",", dtype="U1", quotechar='"')
+    assert_equal(res, expected)
+
+def test_consecutive_quotechar_escaped():
+    txt = StringIO('"Hello, my name is ""Monty""!"')
+    expected = np.array('Hello, my name is "Monty"!', dtype="U40")
+    res = np.loadtxt(txt, dtype="U40", delimiter=",", quotechar='"')
+    assert_equal(res, expected)
+
+
+@pytest.mark.parametrize("data", ("", "\n\n\n", "# 1 2 3\n# 4 5 6\n"))
+@pytest.mark.parametrize("ndmin", (0, 1, 2))
+@pytest.mark.parametrize("usecols", [None, (1, 2, 3)])
+def test_warn_on_no_data(data, ndmin, usecols):
+    """Check that a UserWarning is emitted when no data is read from input."""
+    if usecols is not None:
+        expected_shape = (0, 3)
+    elif ndmin == 2:
+        expected_shape = (0, 1)  # guess a single column?!
+    else:
+        expected_shape = (0,)
+
+    txt = StringIO(data)
+    with pytest.warns(UserWarning, match="input contained no data"):
+        res = np.loadtxt(txt, ndmin=ndmin, usecols=usecols)
+    assert res.shape == expected_shape
+
+    with NamedTemporaryFile(mode="w") as fh:
+        fh.write(data)
+        fh.seek(0)
+        with pytest.warns(UserWarning, match="input contained no data"):
+            res = np.loadtxt(txt, ndmin=ndmin, usecols=usecols)
+        assert res.shape == expected_shape
+
+@pytest.mark.parametrize("skiprows", (2, 3))
+def test_warn_on_skipped_data(skiprows):
+    data = "1 2 3\n4 5 6"
+    txt = StringIO(data)
+    with pytest.warns(UserWarning, match="input contained no data"):
+        np.loadtxt(txt, skiprows=skiprows)
+
+
+@pytest.mark.parametrize(["dtype", "value"], [
+        ("i2", 0x0001), ("u2", 0x0001),
+        ("i4", 0x00010203), ("u4", 0x00010203),
+        ("i8", 0x0001020304050607), ("u8", 0x0001020304050607),
+        # The following values are constructed to lead to unique bytes:
+        ("float16", 3.07e-05),
+        ("float32", 9.2557e-41), ("complex64", 9.2557e-41+2.8622554e-29j),
+        ("float64", -1.758571353180402e-24),
+        # Here and below, the repr side-steps a small loss of precision in
+        # complex `str` in PyPy (which is probably fine, as repr works):
+        ("complex128", repr(5.406409232372729e-29-1.758571353180402e-24j)),
+        # Use integer values that fit into double.  Everything else leads to
+        # problems due to longdoubles going via double and decimal strings
+        # causing rounding errors.
+        ("longdouble", 0x01020304050607),
+        ("clongdouble", repr(0x01020304050607 + (0x00121314151617 * 1j))),
+        ("U2", "\U00010203\U000a0b0c")])
+@pytest.mark.parametrize("swap", [True, False])
+def test_byteswapping_and_unaligned(dtype, value, swap):
+    # Try to create "interesting" values within the valid unicode range:
+    dtype = np.dtype(dtype)
+    data = [f"x,{value}\n"]  # repr as PyPy `str` truncates some
+    if swap:
+        dtype = dtype.newbyteorder()
+    full_dt = np.dtype([("a", "S1"), ("b", dtype)], align=False)
+    # The above ensures that the interesting "b" field is unaligned:
+    assert full_dt.fields["b"][1] == 1
+    res = np.loadtxt(data, dtype=full_dt, delimiter=",", encoding=None,
+                     max_rows=1)  # max-rows prevents over-allocation
+    assert res["b"] == dtype.type(value)
+
+
+@pytest.mark.parametrize("dtype",
+        np.typecodes["AllInteger"] + "efdFD" + "?")
+def test_unicode_whitespace_stripping(dtype):
+    # Test that all numeric types (and bool) strip whitespace correctly
+    # \u202F is a narrow no-break space, `\n` is just a whitespace if quoted.
+    # Currently, skip float128 as it did not always support this and has no
+    # "custom" parsing:
+    txt = StringIO(' 3 ,"\u202F2\n"')
+    res = np.loadtxt(txt, dtype=dtype, delimiter=",", quotechar='"')
+    assert_array_equal(res, np.array([3, 2]).astype(dtype))
+
+
+@pytest.mark.parametrize("dtype", "FD")
+def test_unicode_whitespace_stripping_complex(dtype):
+    # Complex has a few extra cases since it has two components and
+    # parentheses
+    line = " 1 , 2+3j , ( 4+5j ), ( 6+-7j )  , 8j , ( 9j ) \n"
+    data = [line, line.replace(" ", "\u202F")]
+    res = np.loadtxt(data, dtype=dtype, delimiter=',')
+    assert_array_equal(res, np.array([[1, 2+3j, 4+5j, 6-7j, 8j, 9j]] * 2))
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+@pytest.mark.parametrize("dtype", "FD")
+@pytest.mark.parametrize("field",
+        ["1 +2j", "1+ 2j", "1+2 j", "1+-+3", "(1j", "(1", "(1+2j", "1+2j)"])
+def test_bad_complex(dtype, field):
+    with pytest.raises(ValueError):
+        np.loadtxt([field + "\n"], dtype=dtype, delimiter=",")
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+@pytest.mark.parametrize("dtype",
+            np.typecodes["AllInteger"] + "efgdFDG" + "?")
+def test_nul_character_error(dtype):
+    # Test that a \0 character is correctly recognized as an error even if
+    # what comes before is valid (not everything gets parsed internally).
+    if dtype.lower() == "g":
+        pytest.xfail("longdouble/clongdouble assignment may misbehave.")
+    with pytest.raises(ValueError):
+        np.loadtxt(["1\000"], dtype=dtype, delimiter=",", quotechar='"')
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+@pytest.mark.parametrize("dtype",
+        np.typecodes["AllInteger"] + "efgdFDG" + "?")
+def test_no_thousands_support(dtype):
+    # Mainly to document behaviour, Python supports thousands like 1_1.
+    # (e and G may end up using different conversion and support it, this is
+    # a bug but happens...)
+    if dtype == "e":
+        pytest.skip("half assignment currently uses Python float converter")
+    if dtype in "eG":
+        pytest.xfail("clongdouble assignment is buggy (uses `complex`?).")
+
+    assert int("1_1") == float("1_1") == complex("1_1") == 11
+    with pytest.raises(ValueError):
+        np.loadtxt(["1_1\n"], dtype=dtype)
+
+
+@pytest.mark.parametrize("data", [
+    ["1,2\n", "2\n,3\n"],
+    ["1,2\n", "2\r,3\n"]])
+def test_bad_newline_in_iterator(data):
+    # In NumPy <=1.22 this was accepted, because newlines were completely
+    # ignored when the input was an iterable.  This could be changed, but right
+    # now, we raise an error.
+    msg = "Found an unquoted embedded newline within a single line"
+    with pytest.raises(ValueError, match=msg):
+        np.loadtxt(data, delimiter=",")
+
+
+@pytest.mark.parametrize("data", [
+    ["1,2\n", "2,3\r\n"],  # a universal newline
+    ["1,2\n", "'2\n',3\n"],  # a quoted newline
+    ["1,2\n", "'2\r',3\n"],
+    ["1,2\n", "'2\r\n',3\n"],
+])
+def test_good_newline_in_iterator(data):
+    # The quoted newlines will be untransformed here, but are just whitespace.
+    res = np.loadtxt(data, delimiter=",", quotechar="'")
+    assert_array_equal(res, [[1., 2.], [2., 3.]])
+
+
+@pytest.mark.parametrize("newline", ["\n", "\r", "\r\n"])
+def test_universal_newlines_quoted(newline):
+    # Check that universal newline support within the tokenizer is not applied
+    # to quoted fields.  (note that lines must end in newline or quoted
+    # fields will not include a newline at all)
+    data = ['1,"2\n"\n', '3,"4\n', '1"\n']
+    data = [row.replace("\n", newline) for row in data]
+    res = np.loadtxt(data, dtype=object, delimiter=",", quotechar='"')
+    assert_array_equal(res, [['1', f'2{newline}'], ['3', f'4{newline}1']])
+
+
+def test_null_character():
+    # Basic tests to check that the NUL character is not special:
+    res = np.loadtxt(["1\0002\0003\n", "4\0005\0006"], delimiter="\000")
+    assert_array_equal(res, [[1, 2, 3], [4, 5, 6]])
+
+    # Also not as part of a field (avoid unicode/arrays as unicode strips \0)
+    res = np.loadtxt(["1\000,2\000,3\n", "4\000,5\000,6"],
+                     delimiter=",", dtype=object)
+    assert res.tolist() == [["1\000", "2\000", "3"], ["4\000", "5\000", "6"]]
+
+
+def test_iterator_fails_getting_next_line():
+    class BadSequence:
+        def __len__(self):
+            return 100
+
+        def __getitem__(self, item):
+            if item == 50:
+                raise RuntimeError("Bad things happened!")
+            return f"{item}, {item+1}"
+
+    with pytest.raises(RuntimeError, match="Bad things happened!"):
+        np.loadtxt(BadSequence(), dtype=int, delimiter=",")
+
+
+class TestCReaderUnitTests:
+    # These are internal tests for path that should not be possible to hit
+    # unless things go very very wrong somewhere.
+    def test_not_an_filelike(self):
+        with pytest.raises(AttributeError, match=".*read"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype=np.dtype("i"), filelike=True)
+
+    def test_filelike_read_fails(self):
+        # Can only be reached if loadtxt opens the file, so it is hard to do
+        # via the public interface (although maybe not impossible considering
+        # the current "DataClass" backing).
+        class BadFileLike:
+            counter = 0
+
+            def read(self, size):
+                self.counter += 1
+                if self.counter > 20:
+                    raise RuntimeError("Bad bad bad!")
+                return "1,2,3\n"
+
+        with pytest.raises(RuntimeError, match="Bad bad bad!"):
+            np.core._multiarray_umath._load_from_filelike(
+                BadFileLike(), dtype=np.dtype("i"), filelike=True)
+
+    def test_filelike_bad_read(self):
+        # Can only be reached if loadtxt opens the file, so it is hard to do
+        # via the public interface (although maybe not impossible considering
+        # the current "DataClass" backing).
+
+        class BadFileLike:
+            counter = 0
+
+            def read(self, size):
+                return 1234  # not a string!
+
+        with pytest.raises(TypeError,
+                    match="non-string returned while reading data"):
+            np.core._multiarray_umath._load_from_filelike(
+                BadFileLike(), dtype=np.dtype("i"), filelike=True)
+
+    def test_not_an_iter(self):
+        with pytest.raises(TypeError,
+                    match="error reading from object, expected an iterable"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype=np.dtype("i"), filelike=False)
+
+    def test_bad_type(self):
+        with pytest.raises(TypeError, match="internal error: dtype must"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype="i", filelike=False)
+
+    def test_bad_encoding(self):
+        with pytest.raises(TypeError, match="encoding must be a unicode"):
+            np.core._multiarray_umath._load_from_filelike(
+                object(), dtype=np.dtype("i"), filelike=False, encoding=123)
+
+    @pytest.mark.parametrize("newline", ["\r", "\n", "\r\n"])
+    def test_manual_universal_newlines(self, newline):
+        # This is currently not available to users, because we should always
+        # open files with universal newlines enabled `newlines=None`.
+        # (And reading from an iterator uses slightly different code paths.)
+        # We have no real support for `newline="\r"` or `newline="\n" as the
+        # user cannot specify those options.
+        data = StringIO('0\n1\n"2\n"\n3\n4 #\n'.replace("\n", newline),
+                        newline="")
+
+        res = np.core._multiarray_umath._load_from_filelike(
+            data, dtype=np.dtype("U10"), filelike=True,
+            quote='"', comment="#", skiplines=1)
+        assert_array_equal(res[:, 0], ["1", f"2{newline}", "3", "4 "])
+
+
+def test_delimiter_comment_collision_raises():
+    with pytest.raises(TypeError, match=".*control characters.*incompatible"):
+        np.loadtxt(StringIO("1, 2, 3"), delimiter=",", comments=",")
+
+
+def test_delimiter_quotechar_collision_raises():
+    with pytest.raises(TypeError, match=".*control characters.*incompatible"):
+        np.loadtxt(StringIO("1, 2, 3"), delimiter=",", quotechar=",")
+
+
+def test_comment_quotechar_collision_raises():
+    with pytest.raises(TypeError, match=".*control characters.*incompatible"):
+        np.loadtxt(StringIO("1 2 3"), comments="#", quotechar="#")
+
+
+def test_delimiter_and_multiple_comments_collision_raises():
+    with pytest.raises(
+        TypeError, match="Comment characters.*cannot include the delimiter"
+    ):
+        np.loadtxt(StringIO("1, 2, 3"), delimiter=",", comments=["#", ","])
+
+
+@pytest.mark.parametrize(
+    "ws",
+    (
+        " ",  # space
+        "\t",  # tab
+        "\u2003",  # em
+        "\u00A0",  # non-break
+        "\u3000",  # ideographic space
+    )
+)
+def test_collision_with_default_delimiter_raises(ws):
+    with pytest.raises(TypeError, match=".*control characters.*incompatible"):
+        np.loadtxt(StringIO(f"1{ws}2{ws}3\n4{ws}5{ws}6\n"), comments=ws)
+    with pytest.raises(TypeError, match=".*control characters.*incompatible"):
+        np.loadtxt(StringIO(f"1{ws}2{ws}3\n4{ws}5{ws}6\n"), quotechar=ws)
+
+
+@pytest.mark.parametrize("nl", ("\n", "\r"))
+def test_control_character_newline_raises(nl):
+    txt = StringIO(f"1{nl}2{nl}3{nl}{nl}4{nl}5{nl}6{nl}{nl}")
+    msg = "control character.*cannot be a newline"
+    with pytest.raises(TypeError, match=msg):
+        np.loadtxt(txt, delimiter=nl)
+    with pytest.raises(TypeError, match=msg):
+        np.loadtxt(txt, comments=nl)
+    with pytest.raises(TypeError, match=msg):
+        np.loadtxt(txt, quotechar=nl)
+
+
+@pytest.mark.parametrize(
+    ("generic_data", "long_datum", "unitless_dtype", "expected_dtype"),
+    [
+        ("2012-03", "2013-01-15", "M8", "M8[D]"),  # Datetimes
+        ("spam-a-lot", "tis_but_a_scratch", "U", "U17"),  # str
+    ],
+)
+@pytest.mark.parametrize("nrows", (10, 50000, 60000))  # lt, eq, gt chunksize
+def test_parametric_unit_discovery(
+    generic_data, long_datum, unitless_dtype, expected_dtype, nrows
+):
+    """Check that the correct unit (e.g. month, day, second) is discovered from
+    the data when a user specifies a unitless datetime."""
+    # Unit should be "D" (days) due to last entry
+    data = [generic_data] * 50000 + [long_datum]
+    expected = np.array(data, dtype=expected_dtype)
+
+    # file-like path
+    txt = StringIO("\n".join(data))
+    a = np.loadtxt(txt, dtype=unitless_dtype)
+    assert a.dtype == expected.dtype
+    assert_equal(a, expected)
+
+    # file-obj path
+    fd, fname = mkstemp()
+    with open(fname, "w") as fh:
+        fh.write("\n".join(data))
+    a = np.loadtxt(fname, dtype=unitless_dtype)
+    assert a.dtype == expected.dtype
+    assert_equal(a, expected)
+
+
+def test_str_dtype_unit_discovery_with_converter():
+    data = ["spam-a-lot"] * 60000 + ["XXXtis_but_a_scratch"]
+    expected = np.array(
+        ["spam-a-lot"] * 60000 + ["tis_but_a_scratch"], dtype="U17"
+    )
+    conv = lambda s: s.strip("XXX")
+
+    # file-like path
+    txt = StringIO("\n".join(data))
+    a = np.loadtxt(txt, dtype="U", converters=conv, encoding=None)
+    assert a.dtype == expected.dtype
+    assert_equal(a, expected)
+
+    # file-obj path
+    fd, fname = mkstemp()
+    with open(fname, "w") as fh:
+        fh.write("\n".join(data))
+    a = np.loadtxt(fname, dtype="U", converters=conv, encoding=None)
+    assert a.dtype == expected.dtype
+    assert_equal(a, expected)
+
+
+@pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8),
+                    reason="PyPy bug in error formatting")
+def test_control_character_empty():
+    with pytest.raises(TypeError, match="Text reading control character must"):
+        np.loadtxt(StringIO("1 2 3"), delimiter="")
+    with pytest.raises(TypeError, match="Text reading control character must"):
+        np.loadtxt(StringIO("1 2 3"), quotechar="")
+    with pytest.raises(ValueError, match="comments cannot be an empty string"):
+        np.loadtxt(StringIO("1 2 3"), comments="")
+    with pytest.raises(ValueError, match="comments cannot be an empty string"):
+        np.loadtxt(StringIO("1 2 3"), comments=["#", ""])
+
+
+def test_control_characters_as_bytes():
+    """Byte control characters (comments, delimiter) are supported."""
+    a = np.loadtxt(StringIO("#header\n1,2,3"), comments=b"#", delimiter=b",")
+    assert_equal(a, [1, 2, 3])
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index 2c71e45bd..80a6fdd10 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -810,7 +810,7 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True, header='',
                 'Mismatched elements: {} / {} ({:.3g}%)'.format(
                     n_mismatch, n_elements, percent_mismatch)]
 
-            with errstate(invalid='ignore', divide='ignore'):
+            with errstate(all='ignore'):
                 # ignore errors for non-numeric types
                 with contextlib.suppress(TypeError):
                     error = abs(x - y)
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index 31d2cdc76..919ca751f 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -207,6 +207,14 @@ class TestArrayEqual(_GenericTest):
         self._test_not_equal(a, b)
         self._test_not_equal(b, a)
 
+    def test_suppress_overflow_warnings(self):
+        # Based on issue #18992
+        with pytest.raises(AssertionError):
+            with np.errstate(all="raise"):
+                np.testing.assert_array_equal(
+                    np.array([1, 2, 3], np.float32), 
+                    np.array([1, 1e-40, 3], np.float32))
+
 
 class TestBuildErrorMessage: