summaryrefslogtreecommitdiff
path: root/numpy/core/src/common
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2023-02-04 20:03:09 +0200
committerSayed Adel <seiko@imavr.com>2023-02-20 05:34:00 +0200
commita6c0a92cc56c221b415ff60638bec39b6bfafe52 (patch)
tree83a2c8acae257f3ae0d568cba1bd79fd9dc84c29 /numpy/core/src/common
parent80b8893dc50433cd999c5950142681c367f790b5 (diff)
downloadnumpy-a6c0a92cc56c221b415ff60638bec39b6bfafe52.tar.gz
MAINT, SIMD: fix c++ build when AVX2 intrinsics are in the scope
Diffstat (limited to 'numpy/core/src/common')
-rw-r--r--numpy/core/src/common/simd/avx2/memory.h16
1 files changed, 8 insertions, 8 deletions
diff --git a/numpy/core/src/common/simd/avx2/memory.h b/numpy/core/src/common/simd/avx2/memory.h
index 64692b54c..81144a36b 100644
--- a/numpy/core/src/common/simd/avx2/memory.h
+++ b/numpy/core/src/common/simd/avx2/memory.h
@@ -215,7 +215,7 @@ NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, n
const __m256i steps = npyv_set_s64(0, 1, 2, 3);
__m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
- __m256i payload = _mm256_maskload_epi64((const void*)ptr, mask);
+ __m256i payload = _mm256_maskload_epi64((const long long*)ptr, mask);
return _mm256_blendv_epi8(vfill, payload, mask);
}
// fill zero to rest lanes
@@ -225,7 +225,7 @@ NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane)
const __m256i steps = npyv_set_s64(0, 1, 2, 3);
__m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
- return _mm256_maskload_epi64((const void*)ptr, mask);
+ return _mm256_maskload_epi64((const long long*)ptr, mask);
}
//// 64-bit nlane
@@ -240,7 +240,7 @@ NPY_FINLINE npyv_s32 npyv_load2_till_s32(const npy_int32 *ptr, npy_uintp nlane,
const __m256i steps = npyv_set_s64(0, 1, 2, 3);
__m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
- __m256i payload = _mm256_maskload_epi64((const void*)ptr, mask);
+ __m256i payload = _mm256_maskload_epi64((const long long*)ptr, mask);
return _mm256_blendv_epi8(vfill, payload, mask);
}
// fill zero to rest lanes
@@ -253,7 +253,7 @@ NPY_FINLINE npyv_u64 npyv_load2_tillz_s64(const npy_int64 *ptr, npy_uintp nlane)
assert(nlane > 0);
npy_int64 m = -((npy_int64)(nlane > 1));
__m256i mask = npyv_set_s64(-1, -1, m, m);
- return _mm256_maskload_epi64((const void*)ptr, mask);
+ return _mm256_maskload_epi64((const long long*)ptr, mask);
}
// fill zero to rest lanes
NPY_FINLINE npyv_u64 npyv_load2_till_s64(const npy_int64 *ptr, npy_uintp nlane,
@@ -262,7 +262,7 @@ NPY_FINLINE npyv_u64 npyv_load2_till_s64(const npy_int64 *ptr, npy_uintp nlane,
const __m256i vfill = npyv_set_s64(0, 0, fill_lo, fill_hi);
npy_int64 m = -((npy_int64)(nlane > 1));
__m256i mask = npyv_set_s64(-1, -1, m, m);
- __m256i payload = _mm256_maskload_epi64((const void*)ptr, mask);
+ __m256i payload = _mm256_maskload_epi64((const long long*)ptr, mask);
return _mm256_blendv_epi8(vfill, payload, mask);
}
/*********************************
@@ -295,7 +295,7 @@ npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_
const __m256i steps = npyv_set_s64(0, 1, 2, 3);
__m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
- return _mm256_mask_i64gather_epi64(vfill, (const void*)ptr, idx, mask, 8);
+ return _mm256_mask_i64gather_epi64(vfill, (const long long*)ptr, idx, mask, 8);
}
// fill zero to rest lanes
NPY_FINLINE npyv_s64
@@ -315,7 +315,7 @@ NPY_FINLINE npyv_s64 npyv_loadn2_till_s32(const npy_int32 *ptr, npy_intp stride,
const __m256i steps = npyv_set_s64(0, 1, 2, 3);
__m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
- return _mm256_mask_i64gather_epi64(vfill, (const void*)ptr, idx, mask, 4);
+ return _mm256_mask_i64gather_epi64(vfill, (const long long*)ptr, idx, mask, 4);
}
// fill zero to rest lanes
NPY_FINLINE npyv_s32 npyv_loadn2_tillz_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane)
@@ -361,7 +361,7 @@ NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a
const __m256i steps = npyv_set_s64(0, 1, 2, 3);
__m256i vnlane = npyv_setall_s64(nlane > 8 ? 8 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
- _mm256_maskstore_epi64((void*)ptr, mask, a);
+ _mm256_maskstore_epi64((long long*)ptr, mask, a);
}
//// 64-bit nlane