[clang] 3ee6170 - Allow vector zero padding intrinsics to be used in constexpr (#156441)

via cfe-commits Tue, 02 Sep 2025 06:52:10 -0700

Author: Ellis Kesterton
Date: 2025-09-02T13:33:10Z
New Revision: 3ee6170210796ae2837407cf46bf6cd02a6a3316


URL: 
https://github.com/llvm/llvm-project/commit/3ee6170210796ae2837407cf46bf6cd02a6a3316
DIFF: 
https://github.com/llvm/llvm-project/commit/3ee6170210796ae2837407cf46bf6cd02a6a3316.diff

LOG: Allow vector zero padding intrinsics to be used in constexpr (#156441)

Fix #156346 by marking intrinsics as constexpr. A test has been added
for each intrinsic.

The following instrinsics have been modified:
```
_mm256_zextpd128_pd256
_mm512_zextpd128_pd512
_mm512_zextpd256_pd512
_mm256_zextph128_ph256
_mm512_zextph128_ph512
_mm512_zextph256_ph512
_mm256_zextps128_ps256
_mm512_zextps128_ps512
_mm512_zextps256_ps512
_mm256_zextsi128_si256
_mm512_zextsi128_si512
_mm512_zextsi256_si512
```

Added: 
    

Modified: 
    clang/lib/Headers/avx512fintrin.h
    clang/lib/Headers/avx512fp16intrin.h
    clang/lib/Headers/avxintrin.h
    clang/test/CodeGen/X86/avx-builtins.c
    clang/test/CodeGen/X86/avx512f-builtins.c
    clang/test/CodeGen/X86/avx512fp16-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index 1968b2b3d91d2..5222141d21606 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -536,9 +536,8 @@ _mm512_mask2int(__mmask16 __a)
 ///    A 128-bit vector of [2 x double].
 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 
bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_zextpd128_pd512(__m128d __a)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextpd128_pd512(__m128d __a) {
   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 
2, 3, 2, 3, 2, 3);
 }
 
@@ -555,9 +554,8 @@ _mm512_zextpd128_pd512(__m128d __a)
 ///    A 256-bit vector of [4 x double].
 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 
bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_zextpd256_pd512(__m256d __a)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextpd256_pd512(__m256d __a) {
   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 
1, 2, 3, 4, 5, 6, 7);
 }
 
@@ -573,9 +571,8 @@ _mm512_zextpd256_pd512(__m256d __a)
 ///    A 128-bit vector of [4 x float].
 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 
bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_zextps128_ps512(__m128 __a)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextps128_ps512(__m128 __a) {
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 
2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
 }
 
@@ -591,9 +588,8 @@ _mm512_zextps128_ps512(__m128 __a)
 ///    A 256-bit vector of [8 x float].
 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 
bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_zextps256_ps512(__m256 __a)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextps256_ps512(__m256 __a) {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
@@ -609,9 +605,8 @@ _mm512_zextps256_ps512(__m256 __a)
 ///    A 128-bit integer vector.
 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
 ///    the parameter. The upper 384 bits are set to zero.
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_zextsi128_si512(__m128i __a)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextsi128_si512(__m128i __a) {
   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 
1, 2, 3, 2, 3, 2, 3);
 }
 
@@ -627,9 +622,8 @@ _mm512_zextsi128_si512(__m128i __a)
 ///    A 256-bit integer vector.
 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
 ///    the parameter. The upper 256 bits are set to zero.
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_zextsi256_si512(__m256i __a)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextsi256_si512(__m256i __a) {
   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 
0, 1, 2, 3, 4, 5, 6, 7);
 }
 

diff  --git a/clang/lib/Headers/avx512fp16intrin.h 
b/clang/lib/Headers/avx512fp16intrin.h
index 6989b86a7b68c..25f65aee7ff11 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -242,7 +242,7 @@ _mm512_castph256_ph512(__m256h __a) {
 ///    A 128-bit vector of [8 x half].
 /// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_zextph128_ph256(__m128h __a) {
   return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4,
                                  5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -261,7 +261,7 @@ _mm256_zextph128_ph256(__m128h __a) {
 ///    A 128-bit vector of [8 x half].
 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_zextph128_ph512(__m128h __a) {
   return __builtin_shufflevector(
       __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
@@ -281,7 +281,7 @@ _mm512_zextph128_ph512(__m128h __a) {
 ///    A 256-bit vector of [16 x half].
 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_zextph256_ph512(__m256h __a) {
   return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3,
                                  4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,

diff  --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 26096da949447..03c034f64cbea 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4583,9 +4583,8 @@ _mm256_castsi128_si256(__m128i __a)
 ///    A 128-bit vector of [2 x double].
 /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 
bits
 ///    contain the value of the parameter. The upper 128 bits are set to zero.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_zextpd128_pd256(__m128d __a)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_zextpd128_pd256(__m128d __a) {
   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 
2, 3);
 }
 
@@ -4601,9 +4600,8 @@ _mm256_zextpd128_pd256(__m128d __a)
 ///    A 128-bit vector of [4 x float].
 /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
 ///    contain the value of the parameter. The upper 128 bits are set to zero.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_zextps128_ps256(__m128 __a)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_zextps128_ps256(__m128 __a) {
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 
2, 3, 4, 5, 6, 7);
 }
 
@@ -4619,9 +4617,8 @@ _mm256_zextps128_ps256(__m128 __a)
 ///    A 128-bit integer vector.
 /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
 ///    the parameter. The upper 128 bits are set to zero.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_zextsi128_si256(__m128i __a)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_zextsi128_si256(__m128i __a) {
   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 
1, 2, 3);
 }
 

diff  --git a/clang/test/CodeGen/X86/avx-builtins.c 
b/clang/test/CodeGen/X86/avx-builtins.c
index 4a048744faa61..622ac5d50aaf0 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -2151,6 +2151,7 @@ __m256d test_mm256_zextpd128_pd256(__m128d A) {
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_zextpd128_pd256(A);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_zextpd128_pd256((__m128d){-4.0, -5.0}), 
-4.0, -5.0, +0.0, +0.0));
 
 __m256 test_mm256_zextps128_ps256(__m128 A) {
   // CHECK-LABEL: test_mm256_zextps128_ps256
@@ -2158,6 +2159,7 @@ __m256 test_mm256_zextps128_ps256(__m128 A) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_zextps128_ps256(A);
 }
+TEST_CONSTEXPR(match_m256(_mm256_zextps128_ps256((__m128){1.0f, 2.0f, 3.0f, 
4.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f));
 
 __m256i test_mm256_zextsi128_si256(__m128i A) {
   // CHECK-LABEL: test_mm256_zextsi128_si256
@@ -2165,3 +2167,4 @@ __m256i test_mm256_zextsi128_si256(__m128i A) {
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 
0, i32 1, i32 2, i32 3>
   return _mm256_zextsi128_si256(A);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_zextsi128_si256((__m128i){1, 2}), 1, 2, 0, 
0));

diff  --git a/clang/test/CodeGen/X86/avx512f-builtins.c 
b/clang/test/CodeGen/X86/avx512f-builtins.c
index a60bb83113087..eb3b28390947f 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -11038,6 +11038,7 @@ __m512d test_mm512_zextpd128_pd512(__m128d A) {
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x 
i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
   return _mm512_zextpd128_pd512(A);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_zextpd128_pd512((__m128d){+99.0, -1.0}), 
+99.0, -1.0, +0.0, +0.0, 0.0, 0.0, 0.0, 0.0));
 
 __m512d test_mm512_zextpd256_pd512(__m256d A) {
   // CHECK-LABEL: test_mm512_zextpd256_pd512
@@ -11045,6 +11046,7 @@ __m512d test_mm512_zextpd256_pd512(__m256d A) {
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x 
i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_zextpd256_pd512(A);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_zextpd256_pd512((__m256d){1.0, 2.0, 3.0, 
4.0}), 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0));
 
 __m512 test_mm512_zextps128_ps512(__m128 A) {
   // CHECK-LABEL: test_mm512_zextps128_ps512
@@ -11052,6 +11054,7 @@ __m512 test_mm512_zextps128_ps512(__m128 A) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, 
i32 7, i32 4, i32 5, i32 6, i32 7>
   return _mm512_zextps128_ps512(A);
 }
+TEST_CONSTEXPR(match_m512(_mm512_zextps128_ps512((__m128){1.0f, 2.0f, 3.0f, 
4.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 
0.0f, 0.0f, 0.0f, 0.0f));
 
 __m512 test_mm512_zextps256_ps512(__m256 A) {
   // CHECK-LABEL: test_mm512_zextps256_ps512
@@ -11059,6 +11062,7 @@ __m512 test_mm512_zextps256_ps512(__m256 A) {
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, 
i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_zextps256_ps512(A);
 }
+TEST_CONSTEXPR(match_m512(_mm512_zextps256_ps512((__m256){1.0f, 2.0f, 3.0f, 
4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
 
 __m512i test_mm512_zextsi128_si512(__m128i A) {
   // CHECK-LABEL: test_mm512_zextsi128_si512
@@ -11066,6 +11070,7 @@ __m512i test_mm512_zextsi128_si512(__m128i A) {
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 
0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
   return _mm512_zextsi128_si512(A);
 }
+TEST_CONSTEXPR(match_m512i(_mm512_zextsi128_si512((__m128i){1, 2}), 1, 2, 0, 
0, 0, 0, 0, 0));
 
 __m512i test_mm512_zextsi256_si512(__m256i A) {
   // CHECK-LABEL: test_mm512_zextsi256_si512
@@ -11073,6 +11078,7 @@ __m512i test_mm512_zextsi256_si512(__m256i A) {
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 
0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_zextsi256_si512(A);
 }
+TEST_CONSTEXPR(match_m512i(_mm512_zextsi256_si512((__m256i){1, 2, 3, 4}), 1, 
2, 3, 4, 0, 0, 0, 0));
 
 __m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
   // CHECK-LABEL: test_mm512_i32logather_pd

diff  --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c 
b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index 1dbbbe22bfa33..37443d584614d 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -382,18 +382,21 @@ __m256h test_mm256_zextph128_ph256(__m128h __a) {
   // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> {{.*}}, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, 
i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm256_zextph128_ph256(__a);
 }
+TEST_CONSTEXPR(match_m256h(_mm256_zextph128_ph256((__m128h){1.0f16, 2.0f16, 
3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16}), 1.0f16, 2.0f16, 3.0f16, 
4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 
0.0f16, 0.0f16, 0.0f16));
 
 __m512h test_mm512_zextph128_ph512(__m128h __a) {
   // CHECK-LABEL: test_mm512_zextph128_ph512
   // CHECK: shufflevector <8 x half> %{{.*}}, <8 x half> {{.*}}, <32 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, 
i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, 
i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, 
i32 15>
   return _mm512_zextph128_ph512(__a);
 }
+TEST_CONSTEXPR(match_m512h(_mm512_zextph128_ph512((__m128h){1.0f16, 2.0f16, 
3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16}), 1.0f16, 2.0f16, 3.0f16, 
4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 
0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 
0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16));
 
 __m512h test_mm512_zextph256_ph512(__m256h __a) {
   // CHECK-LABEL: test_mm512_zextph256_ph512
   // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> {{.*}}, <32 x i32>
   return _mm512_zextph256_ph512(__a);
 }
+TEST_CONSTEXPR(match_m512h(_mm512_zextph256_ph512((__m256h){1.0f16, 2.0f16, 
3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 
12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 
5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 
14.0f16, 15.0f16, 16.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 
0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 
0.0f16));
 
 int test_mm_comi_round_sh(__m128h __A, __m128h __B) {
   // CHECK-LABEL: test_mm_comi_round_sh


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 3ee6170 - Allow vector zero padding intrinsics to be used in constexpr (#156441)

Reply via email to