Author: Shreeyash Pandey Date: 2025-09-25T19:17:20Z New Revision: 93cac97d7d4e5259cb544caab54e9902d3d84dbe
URL: https://github.com/llvm/llvm-project/commit/93cac97d7d4e5259cb544caab54e9902d3d84dbe DIFF: https://github.com/llvm/llvm-project/commit/93cac97d7d4e5259cb544caab54e9902d3d84dbe.diff LOG: [Headers][X86] Allow AVX512fp16 initialization intrinsics to be used in constexpr (#159929) Fixes https://github.com/llvm/llvm-project/issues/156866 --------- Signed-off-by: Shreeyash Pandey <[email protected]> Co-authored-by: Simon Pilgrim <[email protected]> Added: Modified: clang/lib/Headers/avx512vlfp16intrin.h clang/test/CodeGen/X86/avx512vlfp16-builtins.c Removed: ################################################################################ diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index 16a4ff3034244..c0bcc080dbe93 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -42,7 +42,8 @@ static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) { return __a[0]; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_set_sh(_Float16 __h) { return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0}; } @@ -57,23 +58,23 @@ _mm256_set1_ph(_Float16 __h) { __h, __h, __h, __h, __h, __h, __h, __h}; } -static __inline __m128h __DEFAULT_FN_ATTRS128 +static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) { return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; } -static __inline __m256h __DEFAULT_FN_ATTRS256 +static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_set1_pch(_Float16 _Complex h) { return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h)); } -static __inline __m128h __DEFAULT_FN_ATTRS128 +static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_set1_pch(_Float16 _Complex h) { return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h)); } -static __inline __m256h __DEFAULT_FN_ATTRS256 +static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, @@ -83,13 +84,13 @@ _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, __h4, __h3, __h2, __h1}; } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4, _Float16 e5, _Float16 e6, _Float16 e7) { return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4, _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9, _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13, diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c index ce120b20a4cca..f1865aae4a9e2 100644 --- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c @@ -37,6 +37,8 @@ __m128h test_mm_set_sh(_Float16 __h) { return _mm_set_sh(__h); } +TEST_CONSTEXPR(match_m128h(_mm_set_sh(2.0), 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)); + __m128h test_mm_set1_ph(_Float16 h) { // CHECK-LABEL: test_mm_set1_ph // CHECK: insertelement <8 x half> {{.*}}, i32 0 @@ -84,6 +86,8 @@ __m128h test_mm_set1_pch(_Float16 _Complex h) { return _mm_set1_pch(h); } +TEST_CONSTEXPR(match_m128h(_mm_set1_pch(1.0), 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0)); + __m256h test_mm256_set1_pch(_Float16 _Complex h) { // CHECK-LABEL: test_mm256_set1_pch // CHECK: insertelement <8 x float> {{.*}}, i32 0 @@ -97,6 +101,8 @@ __m256h test_mm256_set1_pch(_Float16 _Complex h) { return _mm256_set1_pch(h); } +TEST_CONSTEXPR(match_m256h(_mm256_set1_pch(1.0), 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0)); + __m128h test_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) { // CHECK-LABEL: test_mm_set_ph @@ -110,6 +116,7 @@ __m128h test_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h // CHECK: insertelement <8 x half> {{.*}}, i32 7 return _mm_set_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8); } +TEST_CONSTEXPR(match_m128h(_mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0)); __m256h test_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, @@ -136,6 +143,8 @@ __m256h test_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h9, __h10, __h11, __h12, __h13, __h14, __h15, __h16); } +TEST_CONSTEXPR(match_m256h(_mm256_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0)); + __m128h test_mm_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) { // CHECK-LABEL: test_mm_setr_ph @@ -150,6 +159,8 @@ __m128h test_mm_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __ return _mm_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8); } +TEST_CONSTEXPR(match_m128h(_mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)); + __m256h test_mm256_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, @@ -175,6 +186,8 @@ __m256h test_mm256_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h9, __h10, __h11, __h12, __h13, __h14, __h15, __h16); } +TEST_CONSTEXPR(match_m256h(_mm256_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)); + __m256h test_mm256_add_ph(__m256h __A, __m256h __B) { // CHECK-LABEL: test_mm256_add_ph // CHECK: %{{.*}} = fadd <16 x half> %{{.*}}, %{{.*}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
