diff options
| author | Sebastian Berg <sebastian@sipsolutions.net> | 2022-01-13 14:26:27 -0600 |
|---|---|---|
| committer | Sebastian Berg <sebastian@sipsolutions.net> | 2022-01-14 20:07:07 -0600 |
| commit | c000c1e67477a6bfbc23326ed19af4177f5a80e9 (patch) | |
| tree | 769d716e0e912049a7905db2720a2fd73b276e2c /numpy/core/src/multiarray | |
| parent | e2d9f6b8f34b45657773b42f1c1334e075b443b3 (diff) | |
| download | numpy-c000c1e67477a6bfbc23326ed19af4177f5a80e9.tar.gz | |
BUG: Make sure num-fields is intp/ssize_t compatible
In theory (if homogeneous) we actually should support more than
2**31 columns. This should fix that.
Also cap overallocation scheme, so that we don't waste quite so
much memory in these extreme cases
Diffstat (limited to 'numpy/core/src/multiarray')
| -rw-r--r-- | numpy/core/src/multiarray/textreading/growth.c | 8 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/textreading/rows.c | 10 |
2 files changed, 13 insertions, 5 deletions
diff --git a/numpy/core/src/multiarray/textreading/growth.c b/numpy/core/src/multiarray/textreading/growth.c index 2afd3f82c..49a09d572 100644 --- a/numpy/core/src/multiarray/textreading/growth.c +++ b/numpy/core/src/multiarray/textreading/growth.c @@ -6,6 +6,10 @@ /* * Helper function taking the size input and growing it (based on min_grow). + * The current scheme is a minimum growth and a general growth by 25% + * overallocation. This is then capped at 2**20 elements, as that propels us + * in the range of large page sizes (so it is presumably more than enough). + * * It further multiplies it with `itemsize` and ensures that all results fit * into an `npy_intp`. * Returns -1 if any overflow occurred or the result would not fit. @@ -22,6 +26,10 @@ grow_size_and_multiply(npy_intp *size, npy_intp min_grow, npy_intp itemsize) { new_size += min_grow; } else { + if (growth > 1 << 20) { + /* limit growth to order of MiB (even hugepages are not larger) */ + growth = 1 << 20; + } new_size += growth + min_grow - 1; new_size &= ~min_grow; diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c index 8c95ba537..37a1bd67f 100644 --- a/numpy/core/src/multiarray/textreading/rows.c +++ b/numpy/core/src/multiarray/textreading/rows.c @@ -31,7 +31,7 @@ */ static PyObject ** create_conv_funcs( - PyObject *converters, int num_fields, Py_ssize_t *usecols) + PyObject *converters, Py_ssize_t num_fields, const Py_ssize_t *usecols) { PyObject **conv_funcs = PyMem_Calloc(num_fields, sizeof(PyObject *)); if (conv_funcs == NULL) { @@ -44,7 +44,7 @@ create_conv_funcs( } else if (PyCallable_Check(converters)) { /* a single converter used for all columns individually */ - for (int i = 0; i < num_fields; i++) { + for (Py_ssize_t i = 0; i < num_fields; i++) { Py_INCREF(converters); conv_funcs[i] = converters; } @@ -77,7 +77,7 @@ create_conv_funcs( * converters does not. (This is a feature, since it allows * us to correctly normalize converters to result column here.) */ - int i = 0; + Py_ssize_t i = 0; for (; i < num_fields; i++) { if (column == usecols[i]) { column = i; @@ -111,7 +111,7 @@ create_conv_funcs( return conv_funcs; error: - for (int i = 0; i < num_fields; i++) { + for (Py_ssize_t i = 0; i < num_fields; i++) { Py_XDECREF(conv_funcs[i]); } PyMem_FREE(conv_funcs); @@ -184,7 +184,7 @@ read_rows(stream *s, } /* Set the actual number of fields if it is already known, otherwise -1 */ - int actual_num_fields = -1; + Py_ssize_t actual_num_fields = -1; if (usecols != NULL) { actual_num_fields = num_usecols; assert(num_field_types == num_usecols); |
