[clang] [Clang][X86] Add constexpr support for permute4x64_pd and permute4x64_epi64 (PR #170442)

Simon Pilgrim via cfe-commits Wed, 03 Dec 2025 11:56:27 -0800

https://github.com/RKSimon updated 
https://github.com/llvm/llvm-project/pull/170442


>From 89e3954737c3648dacbee1ba27b03a57fc02f433 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 10:39:43 +0200
Subject: [PATCH 1/9] feat: Add constexpr support for permdi256 and permdf256

---
 clang/include/clang/Basic/BuiltinsX86.td |  5 ++++-
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 10 ++++++++++
 clang/lib/AST/ExprConstant.cpp           | 13 +++++++++++++
 clang/lib/Headers/avx2intrin.h           | 12 ++++++++----
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 98cea35beb0ea..d07ded80c2b1b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -577,8 +577,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
   def psadbw256
       : X86Builtin<
             "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
-  def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant 
int)">;
   def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>, _Constant int)">;
+}
+
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant 
int)">;
   def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Constant int)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 971fce541bb88..3ff5dc3eb5600 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4944,6 +4944,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           return std::make_pair(0, static_cast<int>(LaneOffset + Index));
         });
 
+  case X86::BI__builtin_ia32_permdf256:
+  case X86::BI__builtin_ia32_permdi256:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned Control) {
+          // permute4x64 operates on 4 64-bit elements
+          // For element i (0-3), extract bits [2*i+1:2*i] from Control
+          unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
+          return std::make_pair(0, static_cast<int>(Index));
+        });
+
   case X86::BI__builtin_ia32_vpmultishiftqb128:
   case X86::BI__builtin_ia32_vpmultishiftqb256:
   case X86::BI__builtin_ia32_vpmultishiftqb512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e5af4cb049ba9..13f27be6df58f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13122,6 +13122,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_permdf256:
+  case X86::BI__builtin_ia32_permdi256: {
+    APValue R;
+    if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
+          // permute4x64 operates on 4 64-bit elements
+          // For element i (0-3), extract bits [2*i+1:2*i] from Control
+          unsigned Index = (Control >> (2 * DstIdx)) & 0x3;
+          return std::make_pair(0, static_cast<int>(Index));
+        }))
+      return false;
+    return Success(R, E);
+  }
+
   case X86::BI__builtin_ia32_vpermilvarps:
   case X86::BI__builtin_ia32_vpermilvarps256:
   case X86::BI__builtin_ia32_vpermilvarps512: {
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index d3ceb2327ac62..dbc3dd01c3cc7 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3238,8 +3238,10 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-#define _mm256_permute4x64_pd(V, M) \
-  ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)))
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permute4x64_pd(__m256d __V, const int __M) {
+  return (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(__V), (int)(__M));
+}
 
 /// Sets the result's 256-bit vector of [8 x float] to copies of elements of
 ///    the 256-bit vector of [8 x float] in \a __a as specified by indexes in
@@ -3295,8 +3297,10 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-#define _mm256_permute4x64_epi64(V, M) \
-  ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)))
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permute4x64_epi64(__m256i __V, const int __M) {
+  return (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(__V), (int)(__M));
+}
 
 /// Sets each half of the 256-bit result either to zero or to one of the
 ///    four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2,

>From b949e990f95d60b8bc9ae8db559268a684c37f0b Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 10:48:49 +0200
Subject: [PATCH 2/9] feat: add tests

---
 clang/test/CodeGen/X86/avx2-builtins.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index d6facfea8962e..83e7f2a25cadb 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1111,12 +1111,34 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 
3, i32 0, i32 2, i32 0>
   return _mm256_permute4x64_epi64(a, 35);
 }
+// Control value 0x00: [0,0,0,0] -> broadcast element 0
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x00), 10LL, 10LL, 10LL, 10LL));
+// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL));
+// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL));
+// Control value 0x12: [2,1,0,1] -> [C,B,A,B]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x12), 30LL, 20LL, 10LL, 20LL));
+// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL));
+// Test with negative values
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(-40LL, 
-30LL, -20LL, -10LL), 0x1B), -40LL, -30LL, -20LL, -10LL));
 
 __m256d test_mm256_permute4x64_pd(__m256d a) {
   // CHECK-LABEL: test_mm256_permute4x64_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> 
<i32 1, i32 2, i32 1, i32 0>
   return _mm256_permute4x64_pd(a, 25);
 }
+// Control value 0x00: [0,0,0,0] -> broadcast element 0
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x00), 1.0, 1.0, 1.0, 1.0));
+// Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x1B), 4.0, 3.0, 2.0, 1.0));
+// Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x39), 2.0, 3.0, 4.0, 1.0));
+// Control value 0x12: [2,1,0,1] -> [C,B,A,B]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x12), 3.0, 2.0, 1.0, 2.0));
+// Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0xE4), 1.0, 2.0, 3.0, 4.0));
 
 __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_permutevar8x32_epi32

>From 962ed403e39bb5e3d66812653c9b7453bce53023 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 10:53:34 +0200
Subject: [PATCH 3/9] chore: format files

---
 clang/include/clang/Basic/BuiltinsX86.td | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index d07ded80c2b1b..23eee6df926a1 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -580,12 +580,13 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
   def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>, _Constant int)">;
 }
 
-let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
-  def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant 
int)">;
+let Features = "avx2",
+    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def permdf256
+      : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
   def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Constant int)">;
 }
 
-
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
   def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned 
char>, _Vector<32, unsigned char>)">;

>From 4ec125ac773efc610424bce2b4cc6887d54f871f Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 11:04:31 +0200
Subject: [PATCH 4/9] refactor: Fix tests and revert changes in intrinsics
 header file

---
 clang/lib/Headers/avx2intrin.h         | 12 ++++--------
 clang/test/CodeGen/X86/avx2-builtins.c |  8 ++++----
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index dbc3dd01c3cc7..d3ceb2327ac62 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3238,10 +3238,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_permute4x64_pd(__m256d __V, const int __M) {
-  return (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(__V), (int)(__M));
-}
+#define _mm256_permute4x64_pd(V, M) \
+  ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)))
 
 /// Sets the result's 256-bit vector of [8 x float] to copies of elements of
 ///    the 256-bit vector of [8 x float] in \a __a as specified by indexes in
@@ -3297,10 +3295,8 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_permute4x64_epi64(__m256i __V, const int __M) {
-  return (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(__V), (int)(__M));
-}
+#define _mm256_permute4x64_epi64(V, M) \
+  ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)))
 
 /// Sets each half of the 256-bit result either to zero or to one of the
 ///    four possible 128-bit halves of the 256-bit vectors \a V1 and \a V2,
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index 83e7f2a25cadb..1f7b2fe7e2d39 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1117,8 +1117,8 @@ 
TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 30LL,
 TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL));
 // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
 TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL));
-// Control value 0x12: [2,1,0,1] -> [C,B,A,B]
-TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x12), 30LL, 20LL, 10LL, 20LL));
+// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x12), 30LL, 10LL, 20LL, 10LL));
 // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
 TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL));
 // Test with negative values
@@ -1135,8 +1135,8 @@ 
TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 1.
 TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x1B), 4.0, 3.0, 2.0, 1.0));
 // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
 TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x39), 2.0, 3.0, 4.0, 1.0));
-// Control value 0x12: [2,1,0,1] -> [C,B,A,B]
-TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x12), 3.0, 2.0, 1.0, 2.0));
+// Control value 0x12: [2,0,1,0] -> [C,A,B,A]
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x12), 3.0, 1.0, 2.0, 1.0));
 // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
 TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0xE4), 1.0, 2.0, 3.0, 4.0));
 

>From 10510e276975a110ca05a6df4555e0aa8968fc21 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 11:04:49 +0200
Subject: [PATCH 5/9] chore: Fix formatting

---
 clang/lib/Headers/avx2intrin.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index d3ceb2327ac62..4c73a4a59e326 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3238,7 +3238,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-#define _mm256_permute4x64_pd(V, M) \
+#define _mm256_permute4x64_pd(V, M)                                            
\
   ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)))
 
 /// Sets the result's 256-bit vector of [8 x float] to copies of elements of
@@ -3295,7 +3295,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-#define _mm256_permute4x64_epi64(V, M) \
+#define _mm256_permute4x64_epi64(V, M)                                         
\
   ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)))
 
 /// Sets each half of the 256-bit result either to zero or to one of the

>From 86b263b620acedbbbde20fc2e02b05105a64a271 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 11:14:12 +0200
Subject: [PATCH 6/9] chore: revert formatting changes

---
 clang/lib/Headers/avx2intrin.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 4c73a4a59e326..d3ceb2327ac62 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -3238,7 +3238,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-#define _mm256_permute4x64_pd(V, M)                                            
\
+#define _mm256_permute4x64_pd(V, M) \
   ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)))
 
 /// Sets the result's 256-bit vector of [8 x float] to copies of elements of
@@ -3295,7 +3295,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
 ///    \a M[1:0] specifies the index in \a a for element 0 of the result,
 ///    \a M[3:2] specifies the index for element 1, and so forth.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-#define _mm256_permute4x64_epi64(V, M)                                         
\
+#define _mm256_permute4x64_epi64(V, M) \
   ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)))
 
 /// Sets each half of the 256-bit result either to zero or to one of the

>From 358fb5fabe7b5f0937b3a9d7ece80436662ddea6 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 13:35:14 +0200
Subject: [PATCH 7/9] refactor: move to avx2 existing container

---
 clang/include/clang/Basic/BuiltinsX86.td | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 23eee6df926a1..1dbbe9af98207 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -580,14 +580,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
   def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Vector<4, long long int>, _Constant int)">;
 }
 
-let Features = "avx2",
-    Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
-  def permdf256
-      : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
-  def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Constant int)">;
-}
-
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant 
int)">;
+  def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Constant int)">;
   def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
   def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned 
char>, _Vector<32, unsigned char>)">;
   def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned 
short>, _Vector<16, unsigned short>)">;

>From 7d93e7ff5de8badecdae149003c400e38c4349a5 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 13:37:18 +0200
Subject: [PATCH 8/9] chore: Update formatiing

---
 clang/include/clang/Basic/BuiltinsX86.td | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 1dbbe9af98207..03c82acfda0a8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -581,8 +581,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] i
 }
 
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
-  def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant 
int)">;
-  def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long 
int>, _Constant int)">;
+  def permdf256
+      : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
+  def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
+                             "int>, _Constant int)">;
   def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
   def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned 
char>, _Vector<32, unsigned char>)">;
   def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned 
short>, _Vector<16, unsigned short>)">;

>From b14e20f5b48d183aabde4ec8345e16ec4925c417 Mon Sep 17 00:00:00 2001
From: ahmed <[email protected]>
Date: Wed, 3 Dec 2025 21:40:40 +0200
Subject: [PATCH 9/9] refactor:  use brace initialization

---
 clang/test/CodeGen/X86/avx2-builtins.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index 1f7b2fe7e2d39..c9474e94476fc 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1112,17 +1112,17 @@ __m256i test_mm256_permute4x64_epi64(__m256i a) {
   return _mm256_permute4x64_epi64(a, 35);
 }
 // Control value 0x00: [0,0,0,0] -> broadcast element 0
-TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x00), 10LL, 10LL, 10LL, 10LL));
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x00), 40LL, 40LL, 40LL, 40LL));
 // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
-TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x1B), 40LL, 30LL, 20LL, 10LL));
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x1B), 10LL, 20LL, 30LL, 40LL));
 // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
-TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x39), 20LL, 30LL, 40LL, 10LL));
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x39), 30LL, 20LL, 10LL, 40LL));
 // Control value 0x12: [2,0,1,0] -> [C,A,B,A]
-TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0x12), 30LL, 10LL, 20LL, 10LL));
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0x12), 20LL, 40LL, 30LL, 40LL));
 // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
-TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(40LL, 
30LL, 20LL, 10LL), 0xE4), 10LL, 20LL, 30LL, 40LL));
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){40LL, 
30LL, 20LL, 10LL}), 0xE4), 40LL, 30LL, 20LL, 10LL));
 // Test with negative values
-TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(_mm256_set_epi64x(-40LL, 
-30LL, -20LL, -10LL), 0x1B), -40LL, -30LL, -20LL, -10LL));
+TEST_CONSTEXPR(match_v4di(_mm256_permute4x64_epi64(((__m256i)(__v4di){-40LL, 
-30LL, -20LL, -10LL}), 0x1B), -10LL, -20LL, -30LL, -40LL));
 
 __m256d test_mm256_permute4x64_pd(__m256d a) {
   // CHECK-LABEL: test_mm256_permute4x64_pd
@@ -1130,15 +1130,15 @@ __m256d test_mm256_permute4x64_pd(__m256d a) {
   return _mm256_permute4x64_pd(a, 25);
 }
 // Control value 0x00: [0,0,0,0] -> broadcast element 0
-TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x00), 1.0, 1.0, 1.0, 1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x00), 4.0, 4.0, 4.0, 4.0));
 // Control value 0x1B: [0,1,2,3] -> reverse order [3,2,1,0] = [D,C,B,A]
-TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x1B), 4.0, 3.0, 2.0, 1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x1B), 1.0, 2.0, 3.0, 4.0));
 // Control value 0x39: [1,2,3,0] -> rotate left [B,C,D,A]
-TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x39), 2.0, 3.0, 4.0, 1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x39), 3.0, 2.0, 1.0, 4.0));
 // Control value 0x12: [2,0,1,0] -> [C,A,B,A]
-TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0x12), 3.0, 1.0, 2.0, 1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0x12), 2.0, 4.0, 3.0, 4.0));
 // Control value 0xE4: [3,2,1,0] -> identity [A,B,C,D]
-TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(_mm256_set_pd(4.0, 3.0, 2.0, 
1.0), 0xE4), 1.0, 2.0, 3.0, 4.0));
+TEST_CONSTEXPR(match_m256d(_mm256_permute4x64_pd(((__m256d){4.0, 3.0, 2.0, 
1.0}), 0xE4), 4.0, 3.0, 2.0, 1.0));
 
 __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_permutevar8x32_epi32

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][X86] Add constexpr support for permute4x64_pd and permute4x64_epi64 (PR #170442)

Reply via email to