https://github.com/stomfaig updated 
https://github.com/llvm/llvm-project/pull/168861

>From b0b258c0b1a613b43d43d25b1498df858fd7e01d Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Fri, 14 Nov 2025 20:48:04 +0000
Subject: [PATCH 1/9] adding cases for vpermilvarpd and vpermilvarps

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 22 +++++++++++++++++
 clang/lib/AST/ExprConstant.cpp           | 30 ++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index cee3c1b8cf8f3..ee0f9deaee46e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4563,6 +4563,28 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           return std::make_pair(0, static_cast<int>(LaneBase + Sel));
         });
 
+  case X86::BI__builtin_ia32_vpermilvarpd:
+  case X86::BI__builtin_ia32_vpermilvarpd256:
+  case X86::BI__builtin_ia32_vpermilvarpd512:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned NumElemPerLane = 2;
+          unsigned Lane = DstIdx / NumElemPerLane;
+          unsigned Offset = ShuffleMask & 0b1;
+          return std::make_pair(0, static_cast<int>(Lane + Offset));
+        });
+    
+  case X86::BI__builtin_ia32_vpermilvarps:
+  case X86::BI__builtin_ia32_vpermilvarps256:
+  case X86::BI__builtin_ia32_vpermilvarps512:
+    return interp__builtin_ia32_shuffle_generic(
+        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+          unsigned NumElemPerLane = 4;
+          unsigned Lane = DstIdx / NumElemPerLane;
+          unsigned Offset = ShuffleMask & 0b11;
+          return std::make_pair(0, static_cast<int>(Lane + Offset));
+        });
+
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:
   case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b7da89ab3dcf2..c3c084d67ab66 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13015,6 +13015,36 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_vpermilvarpd:
+  case X86::BI__builtin_ia32_vpermilvarpd256:
+  case X86::BI__builtin_ia32_vpermilvarpd512:
+    APValue R;
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+            unsigned NumElemPerLane = 2;
+            unsigned Lane = DstIdx / NumElemPerLane;
+            unsigned Offset = Mask & 0b1;
+            return std::make_pair(0, static_cast<int>(Lane + Offset));
+          }))
+      return false;
+    return Success(R, E);
+    
+  case X86::BI__builtin_ia32_vpermilvarps:
+  case X86::BI__builtin_ia32_vpermilvarps256:
+  case X86::BI__builtin_ia32_vpermilvarps512:
+    APValue R;
+    if (!evalShuffleGeneric(
+            Info, E, R,
+            [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+            unsigned NumElemPerLane = 4;
+            unsigned Lane = DstIdx / NumElemPerLane;
+            unsigned Offset = Mask & 0b11;
+            return std::make_pair(0, static_cast<int>(Lane + Offset));
+        }))
+      return false;
+    return Success(R, E);
+
   case X86::BI__builtin_ia32_phminposuw128: {
     APValue Source;
     if (!Evaluate(Source, Info, E->getArg(0)))

>From 18f7f8eb509a54d4ff437e3f44296c34a94fd9fa Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Fri, 14 Nov 2025 20:48:52 +0000
Subject: [PATCH 2/9] format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  2 +-
 clang/lib/AST/ExprConstant.cpp           | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ee0f9deaee46e..9972c0924826f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4573,7 +4573,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
           unsigned Offset = ShuffleMask & 0b1;
           return std::make_pair(0, static_cast<int>(Lane + Offset));
         });
-    
+
   case X86::BI__builtin_ia32_vpermilvarps:
   case X86::BI__builtin_ia32_vpermilvarps256:
   case X86::BI__builtin_ia32_vpermilvarps512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index c3c084d67ab66..e6f0a5964894f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13022,14 +13022,14 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
     if (!evalShuffleGeneric(
             Info, E, R,
             [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
-            unsigned NumElemPerLane = 2;
-            unsigned Lane = DstIdx / NumElemPerLane;
-            unsigned Offset = Mask & 0b1;
-            return std::make_pair(0, static_cast<int>(Lane + Offset));
-          }))
+              unsigned NumElemPerLane = 2;
+              unsigned Lane = DstIdx / NumElemPerLane;
+              unsigned Offset = Mask & 0b1;
+              return std::make_pair(0, static_cast<int>(Lane + Offset));
+            }))
       return false;
     return Success(R, E);
-    
+
   case X86::BI__builtin_ia32_vpermilvarps:
   case X86::BI__builtin_ia32_vpermilvarps256:
   case X86::BI__builtin_ia32_vpermilvarps512:
@@ -13037,11 +13037,11 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
     if (!evalShuffleGeneric(
             Info, E, R,
             [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
-            unsigned NumElemPerLane = 4;
-            unsigned Lane = DstIdx / NumElemPerLane;
-            unsigned Offset = Mask & 0b11;
-            return std::make_pair(0, static_cast<int>(Lane + Offset));
-        }))
+              unsigned NumElemPerLane = 4;
+              unsigned Lane = DstIdx / NumElemPerLane;
+              unsigned Offset = Mask & 0b11;
+              return std::make_pair(0, static_cast<int>(Lane + Offset));
+            }))
       return false;
     return Success(R, E);
 

>From 6926f0bd637a625d458a386d7dde957fb6b878bc Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Thu, 20 Nov 2025 09:50:40 +0000
Subject: [PATCH 3/9] correct logic

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  6 +++---
 clang/lib/AST/ExprConstant.cpp           | 14 ++++++++------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9972c0924826f..ce15048d6d31a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4570,8 +4570,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
         S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
           unsigned NumElemPerLane = 2;
           unsigned Lane = DstIdx / NumElemPerLane;
-          unsigned Offset = ShuffleMask & 0b1;
-          return std::make_pair(0, static_cast<int>(Lane + Offset));
+          unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
+          return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + 
Offset));
         });
 
   case X86::BI__builtin_ia32_vpermilvarps:
@@ -4582,7 +4582,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
           unsigned NumElemPerLane = 4;
           unsigned Lane = DstIdx / NumElemPerLane;
           unsigned Offset = ShuffleMask & 0b11;
-          return std::make_pair(0, static_cast<int>(Lane + Offset));
+          return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + 
Offset));
         });
 
   case X86::BI__builtin_ia32_kandqi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e6f0a5964894f..e8b7f78848aeb 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13017,22 +13017,23 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
 
   case X86::BI__builtin_ia32_vpermilvarpd:
   case X86::BI__builtin_ia32_vpermilvarpd256:
-  case X86::BI__builtin_ia32_vpermilvarpd512:
+  case X86::BI__builtin_ia32_vpermilvarpd512: {
     APValue R;
     if (!evalShuffleGeneric(
             Info, E, R,
             [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
               unsigned NumElemPerLane = 2;
               unsigned Lane = DstIdx / NumElemPerLane;
-              unsigned Offset = Mask & 0b1;
-              return std::make_pair(0, static_cast<int>(Lane + Offset));
+              unsigned Offset = Mask & 0b10 ? 1 : 0;
+              return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane 
+ Offset));
             }))
       return false;
     return Success(R, E);
+  }
 
   case X86::BI__builtin_ia32_vpermilvarps:
   case X86::BI__builtin_ia32_vpermilvarps256:
-  case X86::BI__builtin_ia32_vpermilvarps512:
+  case X86::BI__builtin_ia32_vpermilvarps512: {
     APValue R;
     if (!evalShuffleGeneric(
             Info, E, R,
@@ -13040,11 +13041,12 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
               unsigned NumElemPerLane = 4;
               unsigned Lane = DstIdx / NumElemPerLane;
               unsigned Offset = Mask & 0b11;
-              return std::make_pair(0, static_cast<int>(Lane + Offset));
+              return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane 
+ Offset));
             }))
       return false;
     return Success(R, E);
-
+  }
+  
   case X86::BI__builtin_ia32_phminposuw128: {
     APValue Source;
     if (!Evaluate(Source, Info, E->getArg(0)))

>From 62653fe6ccf55b01bbf6b9ea9de80139e2fed451 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Thu, 20 Nov 2025 09:52:01 +0000
Subject: [PATCH 4/9] make intrinsics constexpr

---
 clang/include/clang/Basic/BuiltinsX86.td |  8 +++++++-
 clang/lib/Headers/avx512fintrin.h        | 12 ++++++------
 clang/lib/Headers/avx512vlintrin.h       | 16 ++++++++--------
 clang/lib/Headers/avxintrin.h            |  8 ++++----
 4 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 69d18679fd6ec..9382a1168a294 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -493,11 +493,14 @@ let Features = "avx", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWid
   def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant int)">;
 }
 
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, 
_Vector<2, long long int>)">;
   def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<4, int>)">;
   def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, 
_Vector<4, long long int>)">;
   def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, 
_Vector<8, int>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
   def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant char)">;
   def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, 
double>, _Constant char)">;
   def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, 
float>, _Constant char)">;
@@ -2369,6 +2372,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
 let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
   def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Constant int)">;
   def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, long long int>)">;
   def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, int>)">;
 }
diff --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index 79c37173ac838..59b58f7a0680a 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5897,13 +5897,13 @@ _mm_cvttss_u64 (__m128 __A)
                                        (__v16sf)_mm512_permute_ps((X), (C)), \
                                        (__v16sf)_mm512_setzero_ps()))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_permutevar_pd(__m512d __A, __m512i __C)
 {
   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5911,7 +5911,7 @@ _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, 
__m512d __A, __m512i __C)
                                          (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5919,13 +5919,13 @@ _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, 
__m512i __C)
                                          (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_permutevar_ps(__m512 __A, __m512i __C)
 {
   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -5933,7 +5933,7 @@ _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, 
__m512 __A, __m512i __C)
                                         (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
diff --git a/clang/lib/Headers/avx512vlintrin.h 
b/clang/lib/Headers/avx512vlintrin.h
index 1e6e42df6b5fb..9697eacda2c7d 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5855,7 +5855,7 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
                                        (__v8sf)_mm256_permute_ps((X), (C)), \
                                        (__v8sf)_mm256_setzero_ps()))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5863,7 +5863,7 @@ _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d 
__A, __m128i __C)
                                             (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5871,7 +5871,7 @@ _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, 
__m128i __C)
                                             (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5879,7 +5879,7 @@ _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, 
__m256d __A, __m256i __C)
                                          (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5887,7 +5887,7 @@ _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, 
__m256i __C)
                                          (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5895,7 +5895,7 @@ _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 
__A, __m128i __C)
                                             (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5903,7 +5903,7 @@ _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i 
__C)
                                             (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5911,7 +5911,7 @@ _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, 
__m256 __A, __m256i __C)
                                           (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 4aef9245323fb..247530e2f56c0 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -789,7 +789,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR 
_mm256_hsub_ps(__m256 __a,
 ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
 ///         returned vector.
 /// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline __m128d __DEFAULT_FN_ATTRS128
+static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_permutevar_pd(__m128d __a, __m128i __c)
 {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
@@ -828,7 +828,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
 ///      1: Bits [255:192] of the source are copied to bits [255:192] of the
 ///    returned vector.
 /// \returns A 256-bit vector of [4 x double] containing the copied values.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_permutevar_pd(__m256d __a, __m256i __c)
 {
   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
@@ -883,7 +883,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
 ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
 ///          returned vector.
 /// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_permutevar_ps(__m128 __a, __m128i __c)
 {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
@@ -974,7 +974,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
 ///      11: Bits [255:224] of the source are copied to bits [255:224] of the
 ///          returned vector.
 /// \returns A 256-bit vector of [8 x float] containing the copied values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_permutevar_ps(__m256 __a, __m256i __c)
 {
   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);

>From 4786836d15f01a536ad1dd9a7b898c0437d85376 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Thu, 20 Nov 2025 09:52:17 +0000
Subject: [PATCH 5/9] add tests

---
 clang/test/CodeGen/X86/avx-builtins.c      | 28 +++++++++
 clang/test/CodeGen/X86/avx512f-builtins.c  | 48 +++++++++++++++
 clang/test/CodeGen/X86/avx512vl-builtins.c | 68 ++++++++++++++++++++++
 3 files changed, 144 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx-builtins.c 
b/clang/test/CodeGen/X86/avx-builtins.c
index 737febbc7fef6..ec58d646117aa 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1447,24 +1447,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
   // CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> 
%{{.*}}, <2 x i64> %{{.*}})
   return _mm_permutevar_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(
+  _mm_permutevar_pd(
+    ((__m128d){0.0, 1.0}),
+    ((__m128i){0b10, 0b00})
+  ),
+  1.0, 0.0
+));
 
 __m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
   // CHECK-LABEL: test_mm256_permutevar_pd
   // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x 
double> %{{.*}}, <4 x i64> %{{.*}})
   return _mm256_permutevar_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(
+  _mm256_permutevar_pd(
+    ((__m256d){0.0, 1.0, 2.0, 3.0}),
+    ((__m256i){0b10, 0b00, 0b10, 0b00})
+  ),
+  1.0, 0.0, 3.0, 2.0
+));
 
 __m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
   // CHECK-LABEL: test_mm_permutevar_ps
   // CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> 
%{{.*}}, <4 x i32> %{{.*}})
   return _mm_permutevar_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(
+  _mm_permutevar_ps(
+    ((__m128){0.0, 1.0, 2.0, 3.0}),
+    ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 2.0, 1.0, 0.0
+));
 
 __m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
   // CHECK-LABEL: test_mm256_permutevar_ps
   // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> 
%{{.*}}, <8 x i32> %{{.*}})
   return _mm256_permutevar_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(
+  _mm256_permutevar_ps(
+    ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m256i){(0b10ULL << 32) + 0b11, 0b01, (0b10ULL << 32) + 0b11, 0b01})
+  ),
+  3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0
+));
 
 __m256 test_mm256_rcp_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_rcp_ps
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c 
b/clang/test/CodeGen/X86/avx512f-builtins.c
index 71e700af0069e..33047fd351039 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5488,6 +5488,13 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i 
__C) {
   // CHECK: @llvm.x86.avx512.vpermilvar.pd.512
   return _mm512_permutevar_pd(__A, __C); 
 }
+TEST_CONSTEXPR(match_m512d(
+  _mm512_permutevar_pd(
+    ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+  ),
+  1.0, 0.0, 3.0, 2.0, 5.0, 4.0, 7.0, 6.0
+));
 
 __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, 
__m512i __C) {
   // CHECK-LABEL: test_mm512_mask_permutevar_pd
@@ -5495,6 +5502,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, 
__mmask8 __U, __m512d __A, __
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_permutevar_pd(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512d(
+  _mm512_mask_permutevar_pd(
+    ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+    (__mmask8)0b01010101,
+    ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+  ),
+  1.0, 9.0, 3.0, 11.0, 5.0, 13.0, 7.0, 15.0
+));
 
 __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 
{
   // CHECK-LABEL: test_mm512_maskz_permutevar_pd
@@ -5502,12 +5518,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, 
__m512d __A, __m512i __C) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_permutevar_pd(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512d(
+  _mm512_maskz_permutevar_pd(
+    (__mmask8)0b01010101,
+    ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+  ),
+  1.0, 0.0, 3.0, 0.0, 5.0, 0.0, 7.0, 0.0
+));
 
 __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
   // CHECK-LABEL: test_mm512_permutevar_ps
   // CHECK: @llvm.x86.avx512.vpermilvar.ps.512
   return _mm512_permutevar_ps(__A, __C); 
 }
+TEST_CONSTEXPR(match_m512(
+  _mm512_permutevar_ps(
+    ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
+    ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 
0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0, 11.0, 10.0, 9.0, 8.0, 15.0, 14.0, 
13.0, 12.0
+));
 
 __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, 
__m512i __C) {
   // CHECK-LABEL: test_mm512_mask_permutevar_ps
@@ -5515,6 +5546,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, 
__mmask16 __U, __m512 __A, __m5
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> 
%{{.*}}
   return _mm512_mask_permutevar_ps(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512(
+  _mm512_mask_permutevar_ps(
+    ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 
26.0, 27.0, 28.0, 29.0, 30.0, 31.0}),
+    (__mmask16)0b0101010101010101,
+    ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
+    ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 
0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 17.0, 1.0, 19.0, 7.0, 21.0, 5.0, 23.0, 11.0, 25.0, 9.0, 27.0, 15.0, 
29.0, 13.0, 31.0
+));
 
 __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
   // CHECK-LABEL: test_mm512_maskz_permutevar_ps
@@ -5522,6 +5562,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, 
__m512 __A, __m512i __C) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> 
%{{.*}}
   return _mm512_maskz_permutevar_ps(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m512(
+  _mm512_maskz_permutevar_ps(
+    (__mmask16)0b0101010101010101,
+    ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
+    ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 
0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0, 11.0, 0.0, 9.0, 0.0, 15.0, 0.0, 
13.0, 0.0
+));
 
 __m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
   // CHECK-LABEL: test_mm512_permutex2var_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c 
b/clang/test/CodeGen/X86/avx512vl-builtins.c
index a7eee79c97539..be0d1bbd4fdbf 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8007,6 +8007,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 
__U, __m128d __A, __m12
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_permutevar_pd(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128d(
+  _mm_mask_permutevar_pd(
+    ((__m128d){3.0, 4.0}),
+    (__mmask8)0b01,
+    ((__m128d){0.0, 1.0}),
+    ((__m128i){0b10, 0b00})
+  ),
+  1.0, 4.0
+));
 
 __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
   // CHECK-LABEL: test_mm_maskz_permutevar_pd
@@ -8014,6 +8023,14 @@ __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, 
__m128d __A, __m128i __C) {
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_permutevar_pd(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128d(
+  _mm_maskz_permutevar_pd(
+    (__mmask8)0b01,
+    ((__m128d){0.0, 1.0}),
+    ((__m128i){0b10, 0b00})
+  ),
+  1.0, 0.0
+));
 
 __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, 
__m256i __C) {
   // CHECK-LABEL: test_mm256_mask_permutevar_pd
@@ -8021,6 +8038,15 @@ __m256d test_mm256_mask_permutevar_pd(__m256d __W, 
__mmask8 __U, __m256d __A, __
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_permutevar_pd(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256d(
+  _mm256_mask_permutevar_pd(
+    ((__m256d){4.0, 5.0, 6.0, 7.0}),
+    (__mmask8)0b0101,
+    ((__m256d){0.0, 1.0, 2.0, 3.0}),
+    ((__m256i){0b10, 0b00, 0b10, 0b00})
+  ),
+  1.0, 5.0, 3.0, 7.0
+));
 
 __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 
{
   // CHECK-LABEL: test_mm256_maskz_permutevar_pd
@@ -8028,6 +8054,14 @@ __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, 
__m256d __A, __m256i __C) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_permutevar_pd(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256d(
+  _mm256_maskz_permutevar_pd(
+    (__mmask8)0b0101,
+    ((__m256d){0.0, 1.0, 2.0, 3.0}),
+    ((__m256i){0b10, 0b00, 0b10, 0b00})
+  ),
+  1.0, 0.0, 3.0, 0.0
+));
 
 __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, 
__m128i __C) {
   // CHECK-LABEL: test_mm_mask_permutevar_ps
@@ -8035,6 +8069,15 @@ __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 
__U, __m128 __A, __m128i
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_mask_permutevar_ps(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128(
+  _mm_mask_permutevar_ps(
+    ((__m128){4.0, 5.0, 6.0, 7.0}),
+    (__mmask8)0b0101,
+    ((__m128){0.0, 1.0, 2.0, 3.0}),
+    ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 5.0, 1.0, 7.0
+));
 
 __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
   // CHECK-LABEL: test_mm_maskz_permutevar_ps
@@ -8042,6 +8085,14 @@ __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 
__A, __m128i __C) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_maskz_permutevar_ps(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m128(
+  _mm_maskz_permutevar_ps(
+    (__mmask8)0b0101,
+    ((__m128){0.0, 1.0, 2.0, 3.0}),
+    ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 0.0, 1.0, 0.0
+));
 
 __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, 
__m256i __C) {
   // CHECK-LABEL: test_mm256_mask_permutevar_ps
@@ -8049,6 +8100,15 @@ __m256 test_mm256_mask_permutevar_ps(__m256 __W, 
__mmask8 __U, __m256 __A, __m25
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_permutevar_ps(__W, __U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256(
+  _mm256_mask_permutevar_ps(
+    ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+    (__mmask8)0b01010101,
+    ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 9.0, 1.0, 11.0, 7.0, 13.0, 5.0, 15.0
+));
 
 __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
   // CHECK-LABEL: test_mm256_maskz_permutevar_ps
@@ -8056,6 +8116,14 @@ __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, 
__m256 __A, __m256i __C) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_permutevar_ps(__U, __A, __C); 
 }
+TEST_CONSTEXPR(match_m256(
+  _mm256_maskz_permutevar_ps(
+    (__mmask8)0b01010101,
+    ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+    ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+  ),
+  3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0
+));
 
 __mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_test_epi32_mask

>From 00ae3e0687859c5982057b7f71622324d7473865 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Thu, 20 Nov 2025 11:47:28 +0000
Subject: [PATCH 6/9] format

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp |  2 +-
 clang/lib/AST/ExprConstant.cpp           |  2 +-
 clang/lib/Headers/avx512fintrin.h        | 18 ++++++------------
 clang/lib/Headers/avx512vlintrin.h       | 24 ++++++++----------------
 clang/lib/Headers/avxintrin.h            | 12 ++++--------
 5 files changed, 20 insertions(+), 38 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8bccac746fb51..316595b81224c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4652,7 +4652,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
           unsigned Offset = ShuffleMask & 0b11;
           return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + 
Offset));
         });
-        
+
   case X86::BI__builtin_ia32_vpermilpd:
   case X86::BI__builtin_ia32_vpermilpd256:
   case X86::BI__builtin_ia32_vpermilpd512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0ee748075a6e0..36a37723d75be 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13058,7 +13058,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
       return false;
     return Success(R, E);
   }
-  
+
   case X86::BI__builtin_ia32_vpermilpd:
   case X86::BI__builtin_ia32_vpermilpd256:
   case X86::BI__builtin_ia32_vpermilpd512: {
diff --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index 3f5028f335155..e1de56069870b 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5880,44 +5880,38 @@ _mm_cvttss_u64 (__m128 __A)
                                        (__v16sf)_mm512_setzero_ps()))
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_permutevar_pd(__m512d __A, __m512i __C)
-{
+_mm512_permutevar_pd(__m512d __A, __m512i __C) {
   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
-{
+_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 
{
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                          (__v8df)_mm512_permutevar_pd(__A, 
__C),
                                          (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
-{
+_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                          (__v8df)_mm512_permutevar_pd(__A, 
__C),
                                          (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_permutevar_ps(__m512 __A, __m512i __C)
-{
+_mm512_permutevar_ps(__m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
-{
+_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                         (__v16sf)_mm512_permutevar_ps(__A, 
__C),
                                         (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
-{
+_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                         (__v16sf)_mm512_permutevar_ps(__A, 
__C),
                                         (__v16sf)_mm512_setzero_ps());
diff --git a/clang/lib/Headers/avx512vlintrin.h 
b/clang/lib/Headers/avx512vlintrin.h
index e7407bd1c722c..99c057030a4cc 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5848,64 +5848,56 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
                                        (__v8sf)_mm256_setzero_ps()))
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
-{
+_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                             (__v2df)_mm_permutevar_pd(__A, 
__C),
                                             (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
-{
+_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                             (__v2df)_mm_permutevar_pd(__A, 
__C),
                                             (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
-{
+_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 
{
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                          (__v4df)_mm256_permutevar_pd(__A, 
__C),
                                          (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
-{
+_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                          (__v4df)_mm256_permutevar_pd(__A, 
__C),
                                          (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
-{
+_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                             (__v4sf)_mm_permutevar_ps(__A, 
__C),
                                             (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
-{
+_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                             (__v4sf)_mm_permutevar_ps(__A, 
__C),
                                             (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
-{
+_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                           (__v8sf)_mm256_permutevar_ps(__A, 
__C),
                                           (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
-{
+_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                           (__v8sf)_mm256_permutevar_ps(__A, 
__C),
                                           (__v8sf)_mm256_setzero_ps());
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 1ea15b3e68811..44ef88db5cbce 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -788,8 +788,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR 
_mm256_hsub_ps(__m256 __a,
 ///         returned vector.
 /// \returns A 128-bit vector of [2 x double] containing the copied values.
 static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_permutevar_pd(__m128d __a, __m128i __c)
-{
+_mm_permutevar_pd(__m128d __a, __m128i __c) {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
 }
 
@@ -827,8 +826,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
 ///    returned vector.
 /// \returns A 256-bit vector of [4 x double] containing the copied values.
 static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_permutevar_pd(__m256d __a, __m256i __c)
-{
+_mm256_permutevar_pd(__m256d __a, __m256i __c) {
   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
 }
 
@@ -882,8 +880,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
 ///          returned vector.
 /// \returns A 128-bit vector of [4 x float] containing the copied values.
 static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_permutevar_ps(__m128 __a, __m128i __c)
-{
+_mm_permutevar_ps(__m128 __a, __m128i __c) {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
 }
 
@@ -973,8 +970,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
 ///          returned vector.
 /// \returns A 256-bit vector of [8 x float] containing the copied values.
 static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_permutevar_ps(__m256 __a, __m256i __c)
-{
+_mm256_permutevar_ps(__m256 __a, __m256i __c) {
   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
 }
 

>From 7459dc456239005e395abfe19e67bd870d09922a Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Thu, 20 Nov 2025 11:53:47 +0000
Subject: [PATCH 7/9] format again

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 ++++--
 clang/lib/AST/ExprConstant.cpp           | 8 +++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 316595b81224c..511b8032b28e6 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4639,7 +4639,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
           unsigned NumElemPerLane = 2;
           unsigned Lane = DstIdx / NumElemPerLane;
           unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
-          return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + 
Offset));
+          return std::make_pair(
+              0, static_cast<int>(Lane * NumElemPerLane + Offset));
         });
 
   case X86::BI__builtin_ia32_vpermilvarps:
@@ -4650,7 +4651,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const 
CallExpr *Call,
           unsigned NumElemPerLane = 4;
           unsigned Lane = DstIdx / NumElemPerLane;
           unsigned Offset = ShuffleMask & 0b11;
-          return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + 
Offset));
+          return std::make_pair(
+              0, static_cast<int>(Lane * NumElemPerLane + Offset));
         });
 
   case X86::BI__builtin_ia32_vpermilpd:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 36a37723d75be..5c4e844ece04c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13053,7 +13053,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
               unsigned NumElemPerLane = 2;
               unsigned Lane = DstIdx / NumElemPerLane;
               unsigned Offset = Mask & 0b10 ? 1 : 0;
-              return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane 
+ Offset));
+              return std::make_pair(
+                  0, static_cast<int>(Lane * NumElemPerLane + Offset));
             }))
       return false;
     return Success(R, E);
@@ -13088,12 +13089,13 @@ bool VectorExprEvaluator::VisitCallExpr(const 
CallExpr *E) {
               unsigned NumElemPerLane = 4;
               unsigned Lane = DstIdx / NumElemPerLane;
               unsigned Offset = Mask & 0b11;
-              return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane 
+ Offset));
+              return std::make_pair(
+                  0, static_cast<int>(Lane * NumElemPerLane + Offset));
             }))
       return false;
     return Success(R, E);
   }
-  
+
   case X86::BI__builtin_ia32_phminposuw128: {
     APValue Source;
     if (!Evaluate(Source, Info, E->getArg(0)))

>From 33563f6d2a79e73dad003d13fb7a4d95a94cbb20 Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Thu, 20 Nov 2025 16:44:21 +0000
Subject: [PATCH 8/9] resolving comments

---
 clang/include/clang/Basic/BuiltinsX86.td   | 10 ++++-----
 clang/test/CodeGen/X86/avx-builtins.c      | 10 ++++-----
 clang/test/CodeGen/X86/avx512f-builtins.c  | 24 +++++++++++-----------
 clang/test/CodeGen/X86/avx512vl-builtins.c | 20 +++++++++---------
 4 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index d842988d92812..fb2f2a21c3ef6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2378,12 +2378,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
   def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
   def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant 
int)">;
 }
-let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
-  def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Constant int)">;
-  def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Constant int)">;
-  def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, long long int>)">;
-  def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, int>)">;
-}
 
 let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
   def rndscalesd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, 
double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int, 
_Constant int)">;
@@ -2476,6 +2470,10 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>
 let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, double>, _Constant int)">;
   def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, float>, _Constant int)">;
+  def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Constant int)">;
+  def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Constant int)">;
+  def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, long long int>)">;
+  def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, int>)">;
 }
 
 let Features = "avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
diff --git a/clang/test/CodeGen/X86/avx-builtins.c 
b/clang/test/CodeGen/X86/avx-builtins.c
index 0c0c8bb3326aa..00bcf9cc1da58 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1470,9 +1470,9 @@ __m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
 TEST_CONSTEXPR(match_m256d(
   _mm256_permutevar_pd(
     ((__m256d){0.0, 1.0, 2.0, 3.0}),
-    ((__m256i){0b10, 0b00, 0b10, 0b00})
+    ((__m256i){0b10, 0b00, 0b00, 0b10})
   ),
-  1.0, 0.0, 3.0, 2.0
+  1.0, 0.0, 2.0, 3.0
 ));
 
 __m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
@@ -1483,7 +1483,7 @@ __m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
 TEST_CONSTEXPR(match_m128(
   _mm_permutevar_ps(
     ((__m128){0.0, 1.0, 2.0, 3.0}),
-    ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+    ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
   ),
   3.0, 2.0, 1.0, 0.0
 ));
@@ -1496,9 +1496,9 @@ __m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
 TEST_CONSTEXPR(match_m256(
   _mm256_permutevar_ps(
     ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
-    ((__m256i){(0b10ULL << 32) + 0b11, 0b01, (0b10ULL << 32) + 0b11, 0b01})
+    ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10})
   ),
-  3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0
+  3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0
 ));
 
 __m256 test_mm256_rcp_ps(__m256 A) {
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c 
b/clang/test/CodeGen/X86/avx512f-builtins.c
index 9273ca89516cb..8baf00b8ff1e7 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5591,9 +5591,9 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i 
__C) {
 TEST_CONSTEXPR(match_m512d(
   _mm512_permutevar_pd(
     ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
-    ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+    ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
   ),
-  1.0, 0.0, 3.0, 2.0, 5.0, 4.0, 7.0, 6.0
+  0.0, 0.0, 3.0, 2.0, 4.0, 5.0, 7.0, 7.0
 ));
 
 __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, 
__m512i __C) {
@@ -5607,9 +5607,9 @@ TEST_CONSTEXPR(match_m512d(
     ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
     (__mmask8)0b01010101,
     ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
-    ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+    ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
   ),
-  1.0, 9.0, 3.0, 11.0, 5.0, 13.0, 7.0, 15.0
+  0.0, 9.0, 3.0, 11.0, 4.0, 13.0, 7.0, 15.0
 ));
 
 __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 
{
@@ -5622,9 +5622,9 @@ TEST_CONSTEXPR(match_m512d(
   _mm512_maskz_permutevar_pd(
     (__mmask8)0b01010101,
     ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
-    ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+    ((__m512i){0b00, 0b00, 0b10, 0b00, 0b00, 0b10, 0b10, 0b10})
   ),
-  1.0, 0.0, 3.0, 0.0, 5.0, 0.0, 7.0, 0.0
+  0.0, 0.0, 3.0, 0.0, 4.0, 0.0, 7.0, 0.0
 ));
 
 __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
@@ -5635,9 +5635,9 @@ __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
 TEST_CONSTEXPR(match_m512(
   _mm512_permutevar_ps(
     ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
-    ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 
0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+    ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 
0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
   ),
-  3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0, 11.0, 10.0, 9.0, 8.0, 15.0, 14.0, 
13.0, 12.0
+  3.0, 2.0, 1.0, 0.0, 5.0, 4.0, 7.0, 6.0, 10.0, 11.0, 8.0, 9.0, 12.0, 15.0, 
13.0, 14.0
 ));
 
 __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, 
__m512i __C) {
@@ -5651,9 +5651,9 @@ TEST_CONSTEXPR(match_m512(
     ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 
26.0, 27.0, 28.0, 29.0, 30.0, 31.0}),
     (__mmask16)0b0101010101010101,
     ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
-    ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 
0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+    ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 
0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
   ),
-  3.0, 17.0, 1.0, 19.0, 7.0, 21.0, 5.0, 23.0, 11.0, 25.0, 9.0, 27.0, 15.0, 
29.0, 13.0, 31.0
+  3.0, 17.0, 1.0, 19.0, 5.0, 21.0, 7.0, 23.0, 10.0, 25.0, 8.0, 27.0, 12.0, 
29.0, 13.0, 31.0
 ));
 
 __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
@@ -5666,9 +5666,9 @@ TEST_CONSTEXPR(match_m512(
   _mm512_maskz_permutevar_ps(
     (__mmask16)0b0101010101010101,
     ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 
12.0, 13.0, 14.0, 15.0}),
-    ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 
0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+    ((__m512i)(__v16si){0b11, 0b10, 0b01, 0b00, 0b01, 0b00, 0b11, 0b10, 0b10, 
0b11, 0b00, 0b01, 0b00, 0b11, 0b01, 0b10})
   ),
-  3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0, 11.0, 0.0, 9.0, 0.0, 15.0, 0.0, 
13.0, 0.0
+  3.0, 0.0, 1.0, 0.0, 5.0, 0.0, 7.0, 0.0, 10.0, 0.0, 8.0, 0.0, 12.0, 0.0, 
13.0, 0.0
 ));
 
 __m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c 
b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 8192a32cf5113..5efcaadb065be 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8167,9 +8167,9 @@ TEST_CONSTEXPR(match_m256d(
     ((__m256d){4.0, 5.0, 6.0, 7.0}),
     (__mmask8)0b0101,
     ((__m256d){0.0, 1.0, 2.0, 3.0}),
-    ((__m256i){0b10, 0b00, 0b10, 0b00})
+    ((__m256i){0b10, 0b00, 0b00, 0b10})
   ),
-  1.0, 5.0, 3.0, 7.0
+  1.0, 5.0, 2.0, 7.0
 ));
 
 __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 
{
@@ -8182,9 +8182,9 @@ TEST_CONSTEXPR(match_m256d(
   _mm256_maskz_permutevar_pd(
     (__mmask8)0b0101,
     ((__m256d){0.0, 1.0, 2.0, 3.0}),
-    ((__m256i){0b10, 0b00, 0b10, 0b00})
+    ((__m256i){0b10, 0b00, 0b00, 0b10})
   ),
-  1.0, 0.0, 3.0, 0.0
+  1.0, 0.0, 2.0, 0.0
 ));
 
 __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, 
__m128i __C) {
@@ -8198,7 +8198,7 @@ TEST_CONSTEXPR(match_m128(
     ((__m128){4.0, 5.0, 6.0, 7.0}),
     (__mmask8)0b0101,
     ((__m128){0.0, 1.0, 2.0, 3.0}),
-    ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+    ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
   ),
   3.0, 5.0, 1.0, 7.0
 ));
@@ -8213,7 +8213,7 @@ TEST_CONSTEXPR(match_m128(
   _mm_maskz_permutevar_ps(
     (__mmask8)0b0101,
     ((__m128){0.0, 1.0, 2.0, 3.0}),
-    ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+    ((__m128i)(__v4si){0b11, 0b10, 0b01, 0b00})
   ),
   3.0, 0.0, 1.0, 0.0
 ));
@@ -8229,9 +8229,9 @@ TEST_CONSTEXPR(match_m256(
     ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
     (__mmask8)0b01010101,
     ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
-    ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+    ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10})
   ),
-  3.0, 9.0, 1.0, 11.0, 7.0, 13.0, 5.0, 15.0
+  3.0, 9.0, 1.0, 11.0, 4.0, 13.0, 5.0, 15.0
 ));
 
 __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
@@ -8244,9 +8244,9 @@ TEST_CONSTEXPR(match_m256(
   _mm256_maskz_permutevar_ps(
     (__mmask8)0b01010101,
     ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
-    ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+    ((__m256i)(__v8si){0b11, 0b10, 0b01, 0b00, 0b00, 0b11, 0b01, 0b10})
   ),
-  3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0
+  3.0, 0.0, 1.0, 0.0, 4.0, 0.0, 5.0, 0.0
 ));
 
 __mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) {

>From 46909c1d815775fb4e1c0610934022afc7a57c7f Mon Sep 17 00:00:00 2001
From: stomfaig <[email protected]>
Date: Thu, 20 Nov 2025 16:45:48 +0000
Subject: [PATCH 9/9] format

---
 clang/include/clang/Basic/BuiltinsX86.td | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index fb2f2a21c3ef6..b07a7b0cb2793 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2470,10 +2470,14 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>
 let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, double>, _Constant int)">;
   def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, float>, _Constant int)">;
-  def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Constant int)">;
-  def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Constant int)">;
-  def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, 
_Vector<8, long long int>)">;
-  def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, 
_Vector<16, int>)">;
+  def vpermilpd512
+      : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
+  def vpermilps512
+      : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
+  def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, "
+                                   "_Vector<8, long long int>)">;
+  def vpermilvarps512
+      : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
 }
 
 let Features = "avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to