diff options
-rw-r--r-- | doc/release/upcoming_changes/15769.improvement.rst | 15 | ||||
-rw-r--r-- | doc/source/reference/global_state.rst | 85 | ||||
-rw-r--r-- | doc/source/reference/index.rst | 1 | ||||
-rw-r--r-- | numpy/__init__.py | 21 | ||||
-rw-r--r-- | numpy/core/_add_newdocs.py | 8 | ||||
-rw-r--r-- | numpy/core/multiarray.py | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/alloc.c | 28 | ||||
-rw-r--r-- | numpy/core/src/multiarray/alloc.h | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 3 |
9 files changed, 164 insertions, 2 deletions
diff --git a/doc/release/upcoming_changes/15769.improvement.rst b/doc/release/upcoming_changes/15769.improvement.rst new file mode 100644 index 000000000..3f70058f6 --- /dev/null +++ b/doc/release/upcoming_changes/15769.improvement.rst @@ -0,0 +1,15 @@ +Ability to disable madvise hugepages +------------------------------------ + +On Linux NumPy has previously added support for madavise +hugepages which can improve performance for very large arrays. +Unfortunately, on older Kernel versions this led to peformance +regressions, thus by default the support has been disabled on +kernels before version 4.6. To override the default, you can +use the environment variable:: + + NUMPY_MADVISE_HUGEPAGE=0 + +or set it to 1 to force enabling support. Note that this only makes +a difference if the operating system is set up to use madvise +transparent hugepage. diff --git a/doc/source/reference/global_state.rst b/doc/source/reference/global_state.rst new file mode 100644 index 000000000..2a163390e --- /dev/null +++ b/doc/source/reference/global_state.rst @@ -0,0 +1,85 @@ +.. _global_state: + +************ +Global State +************ + +NumPy has a few import-time, compile-time, or runtime options +which change the global behaviour. +Most of these are related to performance or for debugging +purposes and will not be interesting to the vast majority +of users. + + +Performance-Related Options +=========================== + +Number of Threads used for Linear Algebra +----------------------------------------- + +NumPy itself is normally intentionally limited to a single thread +during function calls, however it does support multiple Python +threads running at the same time. +Note that for performant linear algebra NumPy uses a BLAS backend +such as OpenBLAS or MKL, which may use multiple threads that may +be controlled by environment variables such as ``OMP_NUM_THREADS`` +depending on what is used. +One way to control the number of threads is the package +`threadpoolctl <https://pypi.org/project/threadpoolctl/>`_ + + +Madvise Hugepage on Linux +------------------------- + +When working with very large arrays on modern Linux kernels, +you can experience a significant speedup when +`transparent hugepage <https://www.kernel.org/doc/html/latest/admin-guide/mm/transhuge.html>`_ +is used. +The current system policy for transparent hugepages can be seen by:: + + cat /sys/kernel/mm/transparent_hugepage/enabled + +When set to ``madvise`` NumPy will typically use hugepages for a performance +boost. This behaviour can be modified by setting the environment variable:: + + NUMPY_MADVISE_HUGEPAGE=0 + +or setting it to ``1`` to always enable it. When not set, the default +is to use madvise on Kernels 4.6 and newer. These kernels presumably +experience a large speedup with hugepage support. +This flag is checked at import time. + + +Interoperability-Related Options +================================ + +The array function protocol which allows array-like objects to +hook into the NumPy API is currently enabled by default. +This option exists since NumPy 1.16 and is enabled by default since +NumPy 1.17. It can be disabled using:: + + NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=0 + +See also :py:meth:`numpy.class.__array_function__` for more information. +This flag is checked at import time. + + +Debugging-Related Options +========================= + +Relaxed Strides Checking +------------------------ + +The *compile-time* environment variables:: + + NPY_RELAXED_STRIDES_DEBUG=0 + NPY_RELAXED_STRIDES_CHECKING=1 + +control how NumPy reports contiguity for arrays. +The default that it is enabled and the debug mode is disabled. +This setting should always be enabled. Setting the +debug option can be interesting for testing code written +in C which iterates through arrays that may or may not be +contiguous in memory. +Most users will have no reason to change these, for details +please see the `memory layout <memory-layout>`_ documentation. diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index 1ae205f91..2e1dcafa2 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -22,6 +22,7 @@ For learning how to use NumPy, see the :ref:`complete documentation <numpy_docs_ constants ufuncs routines + global_state distutils distutils_guide c-api/index diff --git a/numpy/__init__.py b/numpy/__init__.py index 2d3423c56..575e8ea3d 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -285,3 +285,24 @@ else: error_message)) raise RuntimeError(msg) del _mac_os_check + + # We usually use madvise hugepages support, but on some old kernels it + # is slow and thus better avoided. + # Specifically kernel version 4.6 had a bug fix which probably fixed this: + # https://github.com/torvalds/linux/commit/7cf91a98e607c2f935dbcc177d70011e95b8faff + import os + use_hugepage = os.environ.get("NUMPY_MADVISE_HUGEPAGE", None) + if sys.platform == "linux" and use_hugepage is None: + use_hugepage = 1 + kernel_version = os.uname().release.split(".")[:2] + kernel_version = tuple(int(v) for v in kernel_version) + if kernel_version < (4, 6): + use_hugepage = 0 + elif use_hugepage is None: + # This is not Linux, so it should not matter, just enable anyway + use_hugepage = 1 + else: + use_hugepage = int(use_hugepage) + + # Note that this will currently only make a difference on Linux + core.multiarray._set_madvise_hugepage(use_hugepage) diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 18ab10078..e54103634 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -4394,6 +4394,14 @@ add_newdoc('numpy.core.umath', '_add_newdoc_ufunc', and then throwing away the ufunc. """) +add_newdoc('numpy.core.multiarray', '_set_madvise_hugepage', + """ + _set_madvise_hugepage(enabled: bool) -> bool + + Set or unset use of ``madvise (2)`` MADV_HUGEPAGE support when + allocating the array data. Returns the previously set value. + See `global_state` for more information. + """) add_newdoc('numpy.core._multiarray_tests', 'format_float_OSprintf_g', """ diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index e207280f0..ec36f4f7e 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -17,7 +17,7 @@ from ._multiarray_umath import * # noqa: F403 # _get_ndarray_c_version is semi-public, on purpose not added to __all__ from ._multiarray_umath import ( _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string, - _ARRAY_API, _monotonicity, _get_ndarray_c_version + _ARRAY_API, _monotonicity, _get_ndarray_c_version, _set_madvise_hugepage, ) __all__ = [ diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c index c2b7e9ca7..795fc7315 100644 --- a/numpy/core/src/multiarray/alloc.c +++ b/numpy/core/src/multiarray/alloc.c @@ -47,6 +47,32 @@ typedef struct { static cache_bucket datacache[NBUCKETS]; static cache_bucket dimcache[NBUCKETS_DIM]; +static int _madvise_hugepage = 1; + + +/* + * This function enables or disables the use of `MADV_HUGEPAGE` on Linux + * by modifying the global static `_madvise_hugepage`. + * It returns the previous value of `_madvise_hugepage`. + * + * It is exposed to Python as `np.core.multiarray._set_madvise_hugepage`. + */ +NPY_NO_EXPORT PyObject * +_set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj) +{ + int was_enabled = _madvise_hugepage; + int enabled = PyObject_IsTrue(enabled_obj); + if (enabled < 0) { + return NULL; + } + _madvise_hugepage = enabled; + if (was_enabled) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + + /* as the cache is managed in global variables verify the GIL is held */ /* @@ -75,7 +101,7 @@ _npy_alloc_cache(npy_uintp nelem, npy_uintp esz, npy_uint msz, #endif #ifdef NPY_OS_LINUX /* allow kernel allocating huge pages for large arrays */ - if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u)))) { + if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u))) && _madvise_hugepage) { npy_uintp offset = 4096u - (npy_uintp)p % (4096u); npy_uintp length = nelem * esz - offset; /** diff --git a/numpy/core/src/multiarray/alloc.h b/numpy/core/src/multiarray/alloc.h index 2b69efc35..15e31ebb5 100644 --- a/numpy/core/src/multiarray/alloc.h +++ b/numpy/core/src/multiarray/alloc.h @@ -6,6 +6,9 @@ #define NPY_TRACE_DOMAIN 389047 +NPY_NO_EXPORT PyObject * +_set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj); + NPY_NO_EXPORT void * npy_alloc_cache(npy_uintp sz); diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 9e8022abd..4c316052d 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -34,6 +34,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; /* Internal APIs */ +#include "alloc.h" #include "arrayfunction_override.h" #include "arraytypes.h" #include "arrayobject.h" @@ -4161,6 +4162,8 @@ static struct PyMethodDef array_module_methods[] = { METH_VARARGS, NULL}, {"_add_newdoc_ufunc", (PyCFunction)add_newdoc_ufunc, METH_VARARGS, NULL}, + {"_set_madvise_hugepage", (PyCFunction)_set_madvise_hugepage, + METH_O, NULL}, {NULL, NULL, 0, NULL} /* sentinel */ }; |