summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/upcoming_changes/15769.improvement.rst15
-rw-r--r--doc/source/reference/global_state.rst85
-rw-r--r--doc/source/reference/index.rst1
-rw-r--r--numpy/__init__.py21
-rw-r--r--numpy/core/_add_newdocs.py8
-rw-r--r--numpy/core/multiarray.py2
-rw-r--r--numpy/core/src/multiarray/alloc.c28
-rw-r--r--numpy/core/src/multiarray/alloc.h3
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c3
9 files changed, 164 insertions, 2 deletions
diff --git a/doc/release/upcoming_changes/15769.improvement.rst b/doc/release/upcoming_changes/15769.improvement.rst
new file mode 100644
index 000000000..3f70058f6
--- /dev/null
+++ b/doc/release/upcoming_changes/15769.improvement.rst
@@ -0,0 +1,15 @@
+Ability to disable madvise hugepages
+------------------------------------
+
+On Linux NumPy has previously added support for madavise
+hugepages which can improve performance for very large arrays.
+Unfortunately, on older Kernel versions this led to peformance
+regressions, thus by default the support has been disabled on
+kernels before version 4.6. To override the default, you can
+use the environment variable::
+
+ NUMPY_MADVISE_HUGEPAGE=0
+
+or set it to 1 to force enabling support. Note that this only makes
+a difference if the operating system is set up to use madvise
+transparent hugepage.
diff --git a/doc/source/reference/global_state.rst b/doc/source/reference/global_state.rst
new file mode 100644
index 000000000..2a163390e
--- /dev/null
+++ b/doc/source/reference/global_state.rst
@@ -0,0 +1,85 @@
+.. _global_state:
+
+************
+Global State
+************
+
+NumPy has a few import-time, compile-time, or runtime options
+which change the global behaviour.
+Most of these are related to performance or for debugging
+purposes and will not be interesting to the vast majority
+of users.
+
+
+Performance-Related Options
+===========================
+
+Number of Threads used for Linear Algebra
+-----------------------------------------
+
+NumPy itself is normally intentionally limited to a single thread
+during function calls, however it does support multiple Python
+threads running at the same time.
+Note that for performant linear algebra NumPy uses a BLAS backend
+such as OpenBLAS or MKL, which may use multiple threads that may
+be controlled by environment variables such as ``OMP_NUM_THREADS``
+depending on what is used.
+One way to control the number of threads is the package
+`threadpoolctl <https://pypi.org/project/threadpoolctl/>`_
+
+
+Madvise Hugepage on Linux
+-------------------------
+
+When working with very large arrays on modern Linux kernels,
+you can experience a significant speedup when
+`transparent hugepage <https://www.kernel.org/doc/html/latest/admin-guide/mm/transhuge.html>`_
+is used.
+The current system policy for transparent hugepages can be seen by::
+
+ cat /sys/kernel/mm/transparent_hugepage/enabled
+
+When set to ``madvise`` NumPy will typically use hugepages for a performance
+boost. This behaviour can be modified by setting the environment variable::
+
+ NUMPY_MADVISE_HUGEPAGE=0
+
+or setting it to ``1`` to always enable it. When not set, the default
+is to use madvise on Kernels 4.6 and newer. These kernels presumably
+experience a large speedup with hugepage support.
+This flag is checked at import time.
+
+
+Interoperability-Related Options
+================================
+
+The array function protocol which allows array-like objects to
+hook into the NumPy API is currently enabled by default.
+This option exists since NumPy 1.16 and is enabled by default since
+NumPy 1.17. It can be disabled using::
+
+ NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=0
+
+See also :py:meth:`numpy.class.__array_function__` for more information.
+This flag is checked at import time.
+
+
+Debugging-Related Options
+=========================
+
+Relaxed Strides Checking
+------------------------
+
+The *compile-time* environment variables::
+
+ NPY_RELAXED_STRIDES_DEBUG=0
+ NPY_RELAXED_STRIDES_CHECKING=1
+
+control how NumPy reports contiguity for arrays.
+The default that it is enabled and the debug mode is disabled.
+This setting should always be enabled. Setting the
+debug option can be interesting for testing code written
+in C which iterates through arrays that may or may not be
+contiguous in memory.
+Most users will have no reason to change these, for details
+please see the `memory layout <memory-layout>`_ documentation.
diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst
index 1ae205f91..2e1dcafa2 100644
--- a/doc/source/reference/index.rst
+++ b/doc/source/reference/index.rst
@@ -22,6 +22,7 @@ For learning how to use NumPy, see the :ref:`complete documentation <numpy_docs_
constants
ufuncs
routines
+ global_state
distutils
distutils_guide
c-api/index
diff --git a/numpy/__init__.py b/numpy/__init__.py
index 2d3423c56..575e8ea3d 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -285,3 +285,24 @@ else:
error_message))
raise RuntimeError(msg)
del _mac_os_check
+
+ # We usually use madvise hugepages support, but on some old kernels it
+ # is slow and thus better avoided.
+ # Specifically kernel version 4.6 had a bug fix which probably fixed this:
+ # https://github.com/torvalds/linux/commit/7cf91a98e607c2f935dbcc177d70011e95b8faff
+ import os
+ use_hugepage = os.environ.get("NUMPY_MADVISE_HUGEPAGE", None)
+ if sys.platform == "linux" and use_hugepage is None:
+ use_hugepage = 1
+ kernel_version = os.uname().release.split(".")[:2]
+ kernel_version = tuple(int(v) for v in kernel_version)
+ if kernel_version < (4, 6):
+ use_hugepage = 0
+ elif use_hugepage is None:
+ # This is not Linux, so it should not matter, just enable anyway
+ use_hugepage = 1
+ else:
+ use_hugepage = int(use_hugepage)
+
+ # Note that this will currently only make a difference on Linux
+ core.multiarray._set_madvise_hugepage(use_hugepage)
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 18ab10078..e54103634 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -4394,6 +4394,14 @@ add_newdoc('numpy.core.umath', '_add_newdoc_ufunc',
and then throwing away the ufunc.
""")
+add_newdoc('numpy.core.multiarray', '_set_madvise_hugepage',
+ """
+ _set_madvise_hugepage(enabled: bool) -> bool
+
+ Set or unset use of ``madvise (2)`` MADV_HUGEPAGE support when
+ allocating the array data. Returns the previously set value.
+ See `global_state` for more information.
+ """)
add_newdoc('numpy.core._multiarray_tests', 'format_float_OSprintf_g',
"""
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index e207280f0..ec36f4f7e 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -17,7 +17,7 @@ from ._multiarray_umath import * # noqa: F403
# _get_ndarray_c_version is semi-public, on purpose not added to __all__
from ._multiarray_umath import (
_fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string,
- _ARRAY_API, _monotonicity, _get_ndarray_c_version
+ _ARRAY_API, _monotonicity, _get_ndarray_c_version, _set_madvise_hugepage,
)
__all__ = [
diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c
index c2b7e9ca7..795fc7315 100644
--- a/numpy/core/src/multiarray/alloc.c
+++ b/numpy/core/src/multiarray/alloc.c
@@ -47,6 +47,32 @@ typedef struct {
static cache_bucket datacache[NBUCKETS];
static cache_bucket dimcache[NBUCKETS_DIM];
+static int _madvise_hugepage = 1;
+
+
+/*
+ * This function enables or disables the use of `MADV_HUGEPAGE` on Linux
+ * by modifying the global static `_madvise_hugepage`.
+ * It returns the previous value of `_madvise_hugepage`.
+ *
+ * It is exposed to Python as `np.core.multiarray._set_madvise_hugepage`.
+ */
+NPY_NO_EXPORT PyObject *
+_set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj)
+{
+ int was_enabled = _madvise_hugepage;
+ int enabled = PyObject_IsTrue(enabled_obj);
+ if (enabled < 0) {
+ return NULL;
+ }
+ _madvise_hugepage = enabled;
+ if (was_enabled) {
+ Py_RETURN_TRUE;
+ }
+ Py_RETURN_FALSE;
+}
+
+
/* as the cache is managed in global variables verify the GIL is held */
/*
@@ -75,7 +101,7 @@ _npy_alloc_cache(npy_uintp nelem, npy_uintp esz, npy_uint msz,
#endif
#ifdef NPY_OS_LINUX
/* allow kernel allocating huge pages for large arrays */
- if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u)))) {
+ if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u))) && _madvise_hugepage) {
npy_uintp offset = 4096u - (npy_uintp)p % (4096u);
npy_uintp length = nelem * esz - offset;
/**
diff --git a/numpy/core/src/multiarray/alloc.h b/numpy/core/src/multiarray/alloc.h
index 2b69efc35..15e31ebb5 100644
--- a/numpy/core/src/multiarray/alloc.h
+++ b/numpy/core/src/multiarray/alloc.h
@@ -6,6 +6,9 @@
#define NPY_TRACE_DOMAIN 389047
+NPY_NO_EXPORT PyObject *
+_set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj);
+
NPY_NO_EXPORT void *
npy_alloc_cache(npy_uintp sz);
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 9e8022abd..4c316052d 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -34,6 +34,7 @@
NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
/* Internal APIs */
+#include "alloc.h"
#include "arrayfunction_override.h"
#include "arraytypes.h"
#include "arrayobject.h"
@@ -4161,6 +4162,8 @@ static struct PyMethodDef array_module_methods[] = {
METH_VARARGS, NULL},
{"_add_newdoc_ufunc", (PyCFunction)add_newdoc_ufunc,
METH_VARARGS, NULL},
+ {"_set_madvise_hugepage", (PyCFunction)_set_madvise_hugepage,
+ METH_O, NULL},
{NULL, NULL, 0, NULL} /* sentinel */
};