summaryrefslogtreecommitdiff
path: root/numpy/core/setup_common.py
diff options
context:
space:
mode:
authorJulian Taylor <jtaylor.debian@googlemail.com>2013-06-10 23:02:36 +0200
committerJulian Taylor <jtaylor.debian@googlemail.com>2013-06-13 18:52:31 +0200
commit75df68b5ca238eabc2de144dce08d481c59ffcaf (patch)
treec52fe52ada823c87ce5b264e5fecedecf56566a2 /numpy/core/setup_common.py
parent266a968d5d9b3cb5be59e30b697f4e9876c3a00c (diff)
downloadnumpy-75df68b5ca238eabc2de144dce08d481c59ffcaf.tar.gz
ENH: tell gcc to unroll strided copy loops
The strided copy loops profit a lot from unrolling as the number of operations executed is in each iterations very small. GCC needs to be told explicitly to do unrolling even on O3. Unrolling is only profitable if the move can be done in a single instruction, else the increased code size makes it slower, thus the flag is only used for operations on element sizes less equal the native pointer size. Tested to improve performance of by 20-50% on intel core2duo, xeon 5xxx/7xxx and amd phenom x4.
Diffstat (limited to 'numpy/core/setup_common.py')
-rw-r--r--numpy/core/setup_common.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 284acfe21..cb30c83c9 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -113,6 +113,13 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
("__builtin_bswap64", '5u'),
]
+# gcc function attributes
+# (attribute as understood by gcc, function name),
+# function name will be converted to HAVE_<upper-case-name> preprocessor macro
+OPTIONAL_GCC_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
+ 'attribute_optimize_unroll_loops'),
+ ]
+
# Subset of OPTIONAL_STDFUNCS which may alreay have HAVE_* defined by Python.h
OPTIONAL_STDFUNCS_MAYBE = ["expm1", "log1p", "acosh", "atanh", "asinh", "hypot",
"copysign"]