BUG: Make sure num-fields is intp/ssize_t compatible

In theory (if homogeneous) we actually should support more than 2**31 columns. This should fix that. Also cap overallocation scheme, so that we don't waste quite so much memory in these extreme cases
author: Sebastian Berg <sebastian@sipsolutions.net> 2022-01-13 14:26:27 -0600
committer: Sebastian Berg <sebastian@sipsolutions.net> 2022-01-14 20:07:07 -0600
commit: c000c1e67477a6bfbc23326ed19af4177f5a80e9 (patch)
tree: 769d716e0e912049a7905db2720a2fd73b276e2c /numpy/core/src/multiarray
parent: e2d9f6b8f34b45657773b42f1c1334e075b443b3 (diff)
download: numpy-c000c1e67477a6bfbc23326ed19af4177f5a80e9.tar.gz
2 files changed, 13 insertions, 5 deletions
diff --git a/numpy/core/src/multiarray/textreading/growth.c b/numpy/core/src/multiarray/textreading/growth.c
index 2afd3f82c..49a09d572 100644
--- a/numpy/core/src/multiarray/textreading/growth.c
+++ b/numpy/core/src/multiarray/textreading/growth.c
@@ -6,6 +6,10 @@
 
 /*
  * Helper function taking the size input and growing it (based on min_grow).
+ * The current scheme is a minimum growth and a general growth by 25%
+ * overallocation.  This is then capped at 2**20 elements, as that propels us
+ * in the range of large page sizes (so it is presumably more than enough).
+ *
  * It further multiplies it with `itemsize` and ensures that all results fit
  * into an `npy_intp`.
  * Returns -1 if any overflow occurred or the result would not fit.
@@ -22,6 +26,10 @@ grow_size_and_multiply(npy_intp *size, npy_intp min_grow, npy_intp itemsize) {
         new_size += min_grow;
     }
     else {
+        if (growth > 1 << 20) {
+            /* limit growth to order of MiB (even hugepages are not larger) */
+            growth = 1 << 20;
+        }
         new_size += growth + min_grow - 1;
         new_size &= ~min_grow;
 
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index 8c95ba537..37a1bd67f 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -31,7 +31,7 @@
  */
 static PyObject **
 create_conv_funcs(
-        PyObject *converters, int num_fields, Py_ssize_t *usecols)
+        PyObject *converters, Py_ssize_t num_fields, const Py_ssize_t *usecols)
 {
     PyObject **conv_funcs = PyMem_Calloc(num_fields, sizeof(PyObject *));
     if (conv_funcs == NULL) {
@@ -44,7 +44,7 @@ create_conv_funcs(
     }
     else if (PyCallable_Check(converters)) {
         /* a single converter used for all columns individually */
-        for (int i = 0; i < num_fields; i++) {
+        for (Py_ssize_t i = 0; i < num_fields; i++) {
             Py_INCREF(converters);
             conv_funcs[i] = converters;
         }
@@ -77,7 +77,7 @@ create_conv_funcs(
              *    converters does not.  (This is a feature, since it allows
              *    us to correctly normalize converters to result column here.)
              */
-            int i = 0;
+            Py_ssize_t i = 0;
             for (; i < num_fields; i++) {
                 if (column == usecols[i]) {
                     column = i;
@@ -111,7 +111,7 @@ create_conv_funcs(
     return conv_funcs;
 
   error:
-    for (int i = 0; i < num_fields; i++) {
+    for (Py_ssize_t i = 0; i < num_fields; i++) {
         Py_XDECREF(conv_funcs[i]);
     }
     PyMem_FREE(conv_funcs);
@@ -184,7 +184,7 @@ read_rows(stream *s,
     }
 
     /* Set the actual number of fields if it is already known, otherwise -1 */
-    int actual_num_fields = -1;
+    Py_ssize_t actual_num_fields = -1;
     if (usecols != NULL) {
         actual_num_fields = num_usecols;
         assert(num_field_types == num_usecols);
author	Sebastian Berg <sebastian@sipsolutions.net>	2022-01-13 14:26:27 -0600
committer	Sebastian Berg <sebastian@sipsolutions.net>	2022-01-14 20:07:07 -0600
commit	c000c1e67477a6bfbc23326ed19af4177f5a80e9 (patch)
tree	769d716e0e912049a7905db2720a2fd73b276e2c /numpy/core/src/multiarray
parent	e2d9f6b8f34b45657773b42f1c1334e075b443b3 (diff)
download	numpy-c000c1e67477a6bfbc23326ed19af4177f5a80e9.tar.gz