llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang <details> <summary>Changes</summary> For *_stream_* series intrinsics. -- Full diff: https://github.com/llvm/llvm-project/pull/66310.diff 10 Files Affected: - (modified) clang/lib/Headers/avx2intrin.h (+1-1) - (modified) clang/lib/Headers/avxintrin.h (+3-3) - (modified) clang/lib/Headers/emmintrin.h (+4-4) - (modified) clang/lib/Headers/smmintrin.h (+1-1) - (modified) clang/lib/Headers/xmmintrin.h (+1-1) - (modified) clang/test/CodeGen/X86/avx-builtins.c (+18) - (modified) clang/test/CodeGen/X86/avx2-builtins.c (+6) - (modified) clang/test/CodeGen/X86/sse-builtins.c (+6) - (modified) clang/test/CodeGen/X86/sse2-builtins.c (+24) - (modified) clang/test/CodeGen/X86/sse41-builtins.c (+6) <pre> diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c45006193eddcc9..675a93bba1c8a4f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b) /// A pointer to the 32-byte aligned memory containing the vector to load. /// \returns A 256-bit integer vector loaded from memory. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_stream_load_si256(__m256i const *__V) +_mm256_stream_load_si256(void const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 94fac5e6c9da471..b796bb773ec11f0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) /// \param __b /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_si256(__m256i *__a, __m256i __b) +_mm256_stream_si256(void *__a, __m256i __b) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x double] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_pd(double *__a, __m256d __b) +_mm256_stream_pd(void *__a, __m256d __b) { typedef __v4df __v4df_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b) /// \param __a /// A 256-bit vector of [8 x float] containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_stream_ps(float *__p, __m256 __a) +_mm256_stream_ps(void *__p, __m256 __a) { typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 064d974936598f8..eacb0182614304d 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A vector of [2 x double] containing the 64-bit values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, __m128d __a) { __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); } @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, /// A pointer to the 128-bit aligned memory location used to store the value. /// \param __a /// A 128-bit integer vector containing the values to be stored. -static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, __m128i __a) { __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); } @@ -3983,7 +3983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, /// A 32-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) - _mm_stream_si32(int *__p, int __a) { + _mm_stream_si32(void *__p, int __a) { __builtin_ia32_movnti(__p, __a); } @@ -4003,7 +4003,7 @@ static __inline__ void /// A 64-bit integer containing the value to be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) - _mm_stream_si64(long long *__p, long long __a) { + _mm_stream_si64(void *__p, long long __a) { __builtin_ia32_movnti64(__p, __a); } #endif diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 16d8855a1c0b5d0..4e2eb46bb5421f2 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, /// \returns A 128-bit integer vector containing the data stored at the /// specified memory location. static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_stream_load_si128(__m128i const *__V) { +_mm_stream_load_si128(void const *__V) { return (__m128i)__builtin_nontemporal_load((const __v2di *)__V); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 80aa2a817f6afc1..10b6907ace07cc4 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2140,7 +2140,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a) /// \param __a /// A 128-bit vector of [4 x float] containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS -_mm_stream_ps(float *__p, __m128 __a) +_mm_stream_ps(void *__p, __m128 __a) { __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index b68d192051b9bf4..06d3c321dd89592 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1940,18 +1940,36 @@ void test_mm256_stream_pd(double* A, __m256d B) { _mm256_stream_pd(A, B); } +void test_mm256_stream_pd_void(void* A, __m256d B) { + // CHECK-LABEL: test_mm256_stream_pd_void + // CHECK: store <4 x double> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal + _mm256_stream_pd(A, B); +} + void test_mm256_stream_ps(float* A, __m256 B) { // CHECK-LABEL: test_mm256_stream_ps // CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal _mm256_stream_ps(A, B); } +void test_mm256_stream_ps_void(void* A, __m256 B) { + // CHECK-LABEL: test_mm256_stream_ps_void + // CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal + _mm256_stream_ps(A, B); +} + void test_mm256_stream_si256(__m256i* A, __m256i B) { // CHECK-LABEL: test_mm256_stream_si256 // CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal _mm256_stream_si256(A, B); } +void test_mm256_stream_si256_void(void* A, __m256i B) { + // CHECK-LABEL: test_mm256_stream_si256_void + // CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal + _mm256_stream_si256(A, B); +} + __m256d test_mm256_sub_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_sub_pd // CHECK: fsub <4 x double> diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 2750e1b227483ee..5b8c6ded7f216b7 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1223,6 +1223,12 @@ __m256i test_mm256_stream_load_si256(__m256i const *a) { return _mm256_stream_load_si256(a); } +__m256i test_mm256_stream_load_si256_const(void const *a) { + // CHECK-LABEL: test_mm256_stream_load_si256_const + // CHECK: load <4 x i64>, ptr %{{.*}}, align 32, !nontemporal + return _mm256_stream_load_si256(a); +} + __m256i test_mm256_sub_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi8 // CHECK: sub <32 x i8> diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index da40380926d2c8a..9c64d420f7cdf10 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -720,6 +720,12 @@ void test_mm_stream_ps(float*A, __m128 B) { _mm_stream_ps(A, B); } +void test_mm_stream_ps_2(void*A, __m128 B) { + // CHECK-LABEL: test_mm_stream_ps_2 + // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal + _mm_stream_ps(A, B); +} + __m128 test_mm_sub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_sub_ps // CHECK: fsub <4 x float> diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 7c62a128c331fc5..7165d2791827cfc 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1488,18 +1488,36 @@ void test_mm_stream_pd(double *A, __m128d B) { _mm_stream_pd(A, B); } +void test_mm_stream_pd_void(void *A, __m128d B) { + // CHECK-LABEL: test_mm_stream_pd_void + // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal + _mm_stream_pd(A, B); +} + void test_mm_stream_si32(int *A, int B) { // CHECK-LABEL: test_mm_stream_si32 // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal _mm_stream_si32(A, B); } +void test_mm_stream_si32_void(void *A, int B) { + // CHECK-LABEL: test_mm_stream_si32_void + // CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal + _mm_stream_si32(A, B); +} + #ifdef __x86_64__ void test_mm_stream_si64(long long *A, long long B) { // X64-LABEL: test_mm_stream_si64 // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal _mm_stream_si64(A, B); } + +void test_mm_stream_si64_void(void *A, long long B) { + // X64-LABEL: test_mm_stream_si64_void + // X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal + _mm_stream_si64(A, B); +} #endif void test_mm_stream_si128(__m128i *A, __m128i B) { @@ -1508,6 +1526,12 @@ void test_mm_stream_si128(__m128i *A, __m128i B) { _mm_stream_si128(A, B); } +void test_mm_stream_si128_void(void *A, __m128i B) { + // CHECK-LABEL: test_mm_stream_si128_void + // CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal + _mm_stream_si128(A, B); +} + __m128i test_mm_sub_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi8 // CHECK: sub <16 x i8> diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index fe59cbcaf1938c6..ad486a6d9950af6 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -358,6 +358,12 @@ __m128i test_mm_stream_load_si128(__m128i const *a) { return _mm_stream_load_si128(a); } +__m128i test_mm_stream_load_si128_void(void const *a) { + // CHECK-LABEL: test_mm_stream_load_si128_void + // CHECK: load <2 x i64>, ptr %{{.*}}, align 16, !nontemporal + return _mm_stream_load_si128(a); +} + int test_mm_test_all_ones(__m128i x) { // CHECK-LABEL: test_mm_test_all_ones // CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) </pre> </details> https://github.com/llvm/llvm-project/pull/66310 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits