summaryrefslogtreecommitdiff
path: root/numpy/core/src/multiarray
diff options
context:
space:
mode:
authorSebastian Berg <sebastian@sipsolutions.net>2022-01-13 14:26:27 -0600
committerSebastian Berg <sebastian@sipsolutions.net>2022-01-14 20:07:07 -0600
commitc000c1e67477a6bfbc23326ed19af4177f5a80e9 (patch)
tree769d716e0e912049a7905db2720a2fd73b276e2c /numpy/core/src/multiarray
parente2d9f6b8f34b45657773b42f1c1334e075b443b3 (diff)
downloadnumpy-c000c1e67477a6bfbc23326ed19af4177f5a80e9.tar.gz
BUG: Make sure num-fields is intp/ssize_t compatible
In theory (if homogeneous) we actually should support more than 2**31 columns. This should fix that. Also cap overallocation scheme, so that we don't waste quite so much memory in these extreme cases
Diffstat (limited to 'numpy/core/src/multiarray')
-rw-r--r--numpy/core/src/multiarray/textreading/growth.c8
-rw-r--r--numpy/core/src/multiarray/textreading/rows.c10
2 files changed, 13 insertions, 5 deletions
diff --git a/numpy/core/src/multiarray/textreading/growth.c b/numpy/core/src/multiarray/textreading/growth.c
index 2afd3f82c..49a09d572 100644
--- a/numpy/core/src/multiarray/textreading/growth.c
+++ b/numpy/core/src/multiarray/textreading/growth.c
@@ -6,6 +6,10 @@
/*
* Helper function taking the size input and growing it (based on min_grow).
+ * The current scheme is a minimum growth and a general growth by 25%
+ * overallocation. This is then capped at 2**20 elements, as that propels us
+ * in the range of large page sizes (so it is presumably more than enough).
+ *
* It further multiplies it with `itemsize` and ensures that all results fit
* into an `npy_intp`.
* Returns -1 if any overflow occurred or the result would not fit.
@@ -22,6 +26,10 @@ grow_size_and_multiply(npy_intp *size, npy_intp min_grow, npy_intp itemsize) {
new_size += min_grow;
}
else {
+ if (growth > 1 << 20) {
+ /* limit growth to order of MiB (even hugepages are not larger) */
+ growth = 1 << 20;
+ }
new_size += growth + min_grow - 1;
new_size &= ~min_grow;
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index 8c95ba537..37a1bd67f 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -31,7 +31,7 @@
*/
static PyObject **
create_conv_funcs(
- PyObject *converters, int num_fields, Py_ssize_t *usecols)
+ PyObject *converters, Py_ssize_t num_fields, const Py_ssize_t *usecols)
{
PyObject **conv_funcs = PyMem_Calloc(num_fields, sizeof(PyObject *));
if (conv_funcs == NULL) {
@@ -44,7 +44,7 @@ create_conv_funcs(
}
else if (PyCallable_Check(converters)) {
/* a single converter used for all columns individually */
- for (int i = 0; i < num_fields; i++) {
+ for (Py_ssize_t i = 0; i < num_fields; i++) {
Py_INCREF(converters);
conv_funcs[i] = converters;
}
@@ -77,7 +77,7 @@ create_conv_funcs(
* converters does not. (This is a feature, since it allows
* us to correctly normalize converters to result column here.)
*/
- int i = 0;
+ Py_ssize_t i = 0;
for (; i < num_fields; i++) {
if (column == usecols[i]) {
column = i;
@@ -111,7 +111,7 @@ create_conv_funcs(
return conv_funcs;
error:
- for (int i = 0; i < num_fields; i++) {
+ for (Py_ssize_t i = 0; i < num_fields; i++) {
Py_XDECREF(conv_funcs[i]);
}
PyMem_FREE(conv_funcs);
@@ -184,7 +184,7 @@ read_rows(stream *s,
}
/* Set the actual number of fields if it is already known, otherwise -1 */
- int actual_num_fields = -1;
+ Py_ssize_t actual_num_fields = -1;
if (usecols != NULL) {
actual_num_fields = num_usecols;
assert(num_field_types == num_usecols);