diff options
author | Alex Willmer <alex@moreati.org.uk> | 2015-08-05 17:21:33 +0100 |
---|---|---|
committer | Alex Willmer <alex@moreati.org.uk> | 2015-08-05 17:21:33 +0100 |
commit | ff668ba0d7652c12b28a6b6f9dbb3b581a383833 (patch) | |
tree | 7a4033c5be2deefac3812c7a221d8c9a741454e4 /numpy/_build_utils/src | |
parent | f179ec92d8ddb0dc5f7445255022be5c4765a704 (diff) | |
download | numpy-ff668ba0d7652c12b28a6b6f9dbb3b581a383833.tar.gz |
BLD: Move numpy.build_utils -> numpy._build_utils, add to MANIFEST.in
This fixes the distutils built from an sdist (e.g. under tox),
without including _build_utils in binary distributions or the installed numpy.
Diffstat (limited to 'numpy/_build_utils/src')
-rw-r--r-- | numpy/_build_utils/src/apple_sgemv_fix.c | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/numpy/_build_utils/src/apple_sgemv_fix.c b/numpy/_build_utils/src/apple_sgemv_fix.c new file mode 100644 index 000000000..ffdfb81f7 --- /dev/null +++ b/numpy/_build_utils/src/apple_sgemv_fix.c @@ -0,0 +1,227 @@ +/* This is a collection of ugly hacks to circumvent a bug in + * Apple Accelerate framework's SGEMV subroutine. + * + * See: https://github.com/numpy/numpy/issues/4007 + * + * SGEMV in Accelerate framework will segfault on MacOS X version 10.9 + * (aka Mavericks) if arrays are not aligned to 32 byte boundaries + * and the CPU supports AVX instructions. This can produce segfaults + * in np.dot. + * + * This patch overshadows the symbols cblas_sgemv, sgemv_ and sgemv + * exported by Accelerate to produce the correct behavior. The MacOS X + * version and CPU specs are checked on module import. If Mavericks and + * AVX are detected the call to SGEMV is emulated with a call to SGEMM + * if the arrays are not 32 byte aligned. If the exported symbols cannot + * be overshadowed on module import, a fatal error is produced and the + * process aborts. All the fixes are in a self-contained C file + * and do not alter the multiarray C code. The patch is not applied + * unless NumPy is configured to link with Apple's Accelerate + * framework. + * + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#include "Python.h" +#include "numpy/arrayobject.h" + +#include <string.h> +#include <dlfcn.h> +#include <stdlib.h> +#include <stdio.h> + +/* ----------------------------------------------------------------- */ +/* Original cblas_sgemv */ + +#define VECLIB_FILE "/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/vecLib" + +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +extern void cblas_xerbla(int info, const char *rout, const char *form, ...); + +typedef void cblas_sgemv_t(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, + const float beta, float *Y, const int incY); + +typedef void cblas_sgemm_t(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, + const int M, const int N, const int K, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, + const float beta, float *C, const int incC); + +typedef void fortran_sgemv_t( const char* trans, const int* m, const int* n, + const float* alpha, const float* A, const int* ldA, + const float* X, const int* incX, + const float* beta, float* Y, const int* incY ); + +static void *veclib = NULL; +static cblas_sgemv_t *accelerate_cblas_sgemv = NULL; +static cblas_sgemm_t *accelerate_cblas_sgemm = NULL; +static fortran_sgemv_t *accelerate_sgemv = NULL; +static int AVX_and_10_9 = 0; + +/* Dynamic check for AVX support + * __builtin_cpu_supports("avx") is available in gcc 4.8, + * but clang and icc do not currently support it. */ +#define cpu_supports_avx()\ +(system("sysctl -n machdep.cpu.features | grep -q AVX") == 0) + +/* Check if we are using MacOS X version 10.9 */ +#define using_mavericks()\ +(system("sw_vers -productVersion | grep -q 10\\.9\\.") == 0) + +__attribute__((destructor)) +static void unloadlib(void) +{ + if (veclib) dlclose(veclib); +} + +__attribute__((constructor)) +static void loadlib() +/* automatically executed on module import */ +{ + char errormsg[1024]; + int AVX, MAVERICKS; + memset((void*)errormsg, 0, sizeof(errormsg)); + /* check if the CPU supports AVX */ + AVX = cpu_supports_avx(); + /* check if the OS is MacOS X Mavericks */ + MAVERICKS = using_mavericks(); + /* we need the workaround when the CPU supports + * AVX and the OS version is Mavericks */ + AVX_and_10_9 = AVX && MAVERICKS; + /* load vecLib */ + veclib = dlopen(VECLIB_FILE, RTLD_LOCAL | RTLD_FIRST); + if (!veclib) { + veclib = NULL; + snprintf(errormsg, sizeof(errormsg), + "Failed to open vecLib from location '%s'.", VECLIB_FILE); + Py_FatalError(errormsg); /* calls abort() and dumps core */ + } + /* resolve Fortran SGEMV from Accelerate */ + accelerate_sgemv = (fortran_sgemv_t*) dlsym(veclib, "sgemv_"); + if (!accelerate_sgemv) { + unloadlib(); + Py_FatalError("Failed to resolve symbol 'sgemv_'."); + } + /* resolve cblas_sgemv from Accelerate */ + accelerate_cblas_sgemv = (cblas_sgemv_t*) dlsym(veclib, "cblas_sgemv"); + if (!accelerate_cblas_sgemv) { + unloadlib(); + Py_FatalError("Failed to resolve symbol 'cblas_sgemv'."); + } + /* resolve cblas_sgemm from Accelerate */ + accelerate_cblas_sgemm = (cblas_sgemm_t*) dlsym(veclib, "cblas_sgemm"); + if (!accelerate_cblas_sgemm) { + unloadlib(); + Py_FatalError("Failed to resolve symbol 'cblas_sgemm'."); + } +} + +/* ----------------------------------------------------------------- */ +/* Fortran SGEMV override */ + +void sgemv_( const char* trans, const int* m, const int* n, + const float* alpha, const float* A, const int* ldA, + const float* X, const int* incX, + const float* beta, float* Y, const int* incY ) +{ + /* It is safe to use the original SGEMV if we are not using AVX on Mavericks + * or the input arrays A, X and Y are all aligned on 32 byte boundaries. */ + #define BADARRAY(x) (((npy_intp)(void*)x) % 32) + const int use_sgemm = AVX_and_10_9 && (BADARRAY(A) || BADARRAY(X) || BADARRAY(Y)); + if (!use_sgemm) { + accelerate_sgemv(trans,m,n,alpha,A,ldA,X,incX,beta,Y,incY); + return; + } + + /* Arrays are misaligned, the CPU supports AVX, and we are running + * Mavericks. + * + * Emulation of SGEMV with SGEMM: + * + * SGEMV allows vectors to be strided. SGEMM requires all arrays to be + * contiguous along the leading dimension. To emulate striding in SGEMV + * with the leading dimension arguments in SGEMM we compute + * + * Y = alpha * op(A) @ X + beta * Y + * + * as + * + * Y.T = alpha * X.T @ op(A).T + beta * Y.T + * + * Because Fortran uses column major order and X.T and Y.T are row vectors, + * the leading dimensions of X.T and Y.T in SGEMM become equal to the + * strides of the the column vectors X and Y in SGEMV. */ + + switch (*trans) { + case 'T': + case 't': + case 'C': + case 'c': + accelerate_cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, + 1, *n, *m, *alpha, X, *incX, A, *ldA, *beta, Y, *incY ); + break; + case 'N': + case 'n': + accelerate_cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans, + 1, *m, *n, *alpha, X, *incX, A, *ldA, *beta, Y, *incY ); + break; + default: + cblas_xerbla(1, "SGEMV", "Illegal transpose setting: %c\n", *trans); + } +} + +/* ----------------------------------------------------------------- */ +/* Override for an alias symbol for sgemv_ in Accelerate */ + +void sgemv (char *trans, + const int *m, const int *n, + const float *alpha, + const float *A, const int *lda, + const float *B, const int *incB, + const float *beta, + float *C, const int *incC) +{ + sgemv_(trans,m,n,alpha,A,lda,B,incB,beta,C,incC); +} + +/* ----------------------------------------------------------------- */ +/* cblas_sgemv override, based on Netlib CBLAS code */ + +void cblas_sgemv(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, const float beta, + float *Y, const int incY) +{ + char TA; + if (order == CblasColMajor) + { + if (TransA == CblasNoTrans) TA = 'N'; + else if (TransA == CblasTrans) TA = 'T'; + else if (TransA == CblasConjTrans) TA = 'C'; + else + { + cblas_xerbla(2, "cblas_sgemv","Illegal TransA setting, %d\n", TransA); + } + sgemv_(&TA, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); + } + else if (order == CblasRowMajor) + { + if (TransA == CblasNoTrans) TA = 'T'; + else if (TransA == CblasTrans) TA = 'N'; + else if (TransA == CblasConjTrans) TA = 'N'; + else + { + cblas_xerbla(2, "cblas_sgemv", "Illegal TransA setting, %d\n", TransA); + return; + } + sgemv_(&TA, &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY); + } + else + cblas_xerbla(1, "cblas_sgemv", "Illegal Order setting, %d\n", order); +} |