diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2022-04-21 10:38:30 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-21 10:38:30 -0600 |
commit | 79a5000a72f6e4432c0a76efa6cf8e3f12ecef9e (patch) | |
tree | 9ad4917e66cb67598519671c3d7b191078911ce8 | |
parent | 03d6e82420dadcf5885d5d50e41db7e17f464e16 (diff) | |
parent | 622ca4874f229c83b75746ca5ef7ef9e8a536ea2 (diff) | |
download | numpy-79a5000a72f6e4432c0a76efa6cf8e3f12ecef9e.tar.gz |
Merge pull request #21324 from seberg/frombuffer-fix
BUG: Make mmap handling safer in frombuffer
-rw-r--r-- | numpy/core/_add_newdocs.py | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 32 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 24 |
3 files changed, 54 insertions, 6 deletions
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 23e82af93..bea8432ed 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -1557,6 +1557,10 @@ add_newdoc('numpy.core.multiarray', 'frombuffer', The data of the resulting array will not be byteswapped, but will be interpreted correctly. + This function creates a view into the original object. This should be safe + in general, but it may make sense to copy the result when the original + object is mutable or untrusted. + Examples -------- >>> s = b'hello world' diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 58ba0c2a1..0f1b0d99b 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -3683,28 +3683,44 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, return NULL; } + /* + * The array check is probably unnecessary. It preserves the base for + * arrays. This is the "old" buffer protocol, which had no release logic. + * (It was assumed that the result is always a view.) + * + * NOTE: We could also check if `bf_releasebuffer` is defined which should + * be the most precise and safe thing to do. But that should only be + * necessary if unexpected backcompat issues are found downstream. + */ + if (!PyArray_Check(buf)) { + buf = PyMemoryView_FromObject(buf); + if (buf == NULL) { + return NULL; + } + } + else { + Py_INCREF(buf); + } + if (PyObject_GetBuffer(buf, &view, PyBUF_WRITABLE|PyBUF_SIMPLE) < 0) { writeable = 0; PyErr_Clear(); if (PyObject_GetBuffer(buf, &view, PyBUF_SIMPLE) < 0) { + Py_DECREF(buf); Py_DECREF(type); return NULL; } } data = (char *)view.buf; ts = view.len; - /* - * In Python 3 both of the deprecated functions PyObject_AsWriteBuffer and - * PyObject_AsReadBuffer that this code replaces release the buffer. It is - * up to the object that supplies the buffer to guarantee that the buffer - * sticks around after the release. - */ + /* `buf` is an array or a memoryview; so we know `view` does not own data */ PyBuffer_Release(&view); if ((offset < 0) || (offset > ts)) { PyErr_Format(PyExc_ValueError, "offset must be non-negative and no greater than buffer "\ "length (%" NPY_INTP_FMT ")", (npy_intp)ts); + Py_DECREF(buf); Py_DECREF(type); return NULL; } @@ -3717,6 +3733,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, if (itemsize == 0) { PyErr_SetString(PyExc_ValueError, "cannot determine count if itemsize is 0"); + Py_DECREF(buf); Py_DECREF(type); return NULL; } @@ -3724,6 +3741,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, PyErr_SetString(PyExc_ValueError, "buffer size must be a multiple"\ " of element size"); + Py_DECREF(buf); Py_DECREF(type); return NULL; } @@ -3734,6 +3752,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, PyErr_SetString(PyExc_ValueError, "buffer is smaller than requested"\ " size"); + Py_DECREF(buf); Py_DECREF(type); return NULL; } @@ -3743,6 +3762,7 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, &PyArray_Type, type, 1, &n, NULL, data, NPY_ARRAY_DEFAULT, NULL, buf); + Py_DECREF(buf); if (ret == NULL) { return NULL; } diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 8c7f00cb4..d4cc3d753 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -18,6 +18,7 @@ from numpy.compat import pickle import pathlib import builtins from decimal import Decimal +import mmap import numpy as np import numpy.core._multiarray_tests as _multiarray_tests @@ -5475,9 +5476,32 @@ class TestFromBuffer: buf = x.tobytes() assert_array_equal(np.frombuffer(buf, dtype=dt), x.flat) + def test_array_base(self): + arr = np.arange(10) + new = np.frombuffer(arr) + # We currently special case arrays to ensure they are used as a base. + # This could probably be changed (removing the test). + assert new.base is arr + def test_empty(self): assert_array_equal(np.frombuffer(b''), np.array([])) + @pytest.mark.skipif(IS_PYPY, + reason="PyPy's memoryview currently does not track exports. See: " + "https://foss.heptapod.net/pypy/pypy/-/issues/3724") + def test_mmap_close(self): + # The old buffer protocol was not safe for some things that the new + # one is. But `frombuffer` always used the old one for a long time. + # Checks that it is safe with the new one (using memoryviews) + with tempfile.TemporaryFile(mode='wb') as tmp: + tmp.write(b"asdf") + tmp.flush() + mm = mmap.mmap(tmp.fileno(), 0) + arr = np.frombuffer(mm, dtype=np.uint8) + with pytest.raises(BufferError): + mm.close() # cannot close while array uses the buffer + del arr + mm.close() class TestFlat: def setup(self): |