https://github.com/adream307 updated 
https://github.com/llvm/llvm-project/pull/202257

>From 18757a4a761d087d8a2e99cab4624b2f13ddebee Mon Sep 17 00:00:00 2001
From: adream307 <[email protected]>
Date: Thu, 4 Jun 2026 11:29:09 +0800
Subject: [PATCH] [clang][X86] Add constexpr support for mpsadbw128/256
 intrinsics

Enable constexpr evaluation for `_mm_mpsadbw_epu8` and `_mm256_mpsadbw_epu8` 
(`__builtin_ia32_mpsadbw128`/`mpsadbw256`).

Fixes #157522.

Signed-off-by: adream307 <[email protected]>
---
 clang/include/clang/Basic/BuiltinsX86.td |  7 ++--
 clang/lib/AST/ByteCode/InterpBuiltin.cpp | 52 ++++++++++++++++++++++++
 clang/lib/AST/ExprConstant.cpp           | 40 ++++++++++++++++++
 clang/test/CodeGen/X86/avx2-builtins.c   | 17 ++++++++
 clang/test/CodeGen/X86/sse41-builtins.c  |  8 ++++
 5 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index c8c371625b568..3fedae5ac289a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -315,10 +315,11 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>]
   def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant 
int)">;
   def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, 
float>, _Constant char)">;
   def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, 
_Vector<2,double>, _Constant char)">;
-  def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, 
_Vector<16, char>, _Constant char)">;
 }
 
 let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
+  def mpsadbw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, 
_Vector<16, char>, _Constant char)">;
+
   def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, 
_Vector<4, float>, _Constant char)">;
 
   def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, 
short>, _Constant int)">;
@@ -574,14 +575,14 @@ let Features = "avx", Attributes = [NoThrow, Const, 
Constexpr, RequiredVectorWid
 }
 
 let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] 
in {
-  def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, 
_Vector<32, char>, _Constant char)">;
-
   def psadbw256
       : X86Builtin<
             "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
 }
 
 let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, 
_Vector<32, char>, _Constant char)">;
+
   def permdf256
       : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
   def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long "
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 501c7f76a0376..d3cca9f808365 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3007,6 +3007,54 @@ static bool interp__builtin_ia32_dbpsadbw(InterpState 
&S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_mpsadbw(InterpState &S, CodePtr OpPC,
+                                         const CallExpr *Call) {
+  assert(Call->getNumArgs() == 3);
+  uint64_t Imm;
+  if (!popToUInt64(S, Call->getArg(2), Imm))
+    return false;
+
+  const Pointer &Src2 = S.Stk.pop<Pointer>();
+  const Pointer &Src1 = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  const auto *SrcVT = Call->getArg(0)->getType()->castAs<VectorType>();
+  PrimType SrcElemT = *S.getContext().classify(SrcVT->getElementType());
+  unsigned SourceLen = SrcVT->getNumElements(); // 16 or 32
+
+  const auto *DestVT = Call->getType()->castAs<VectorType>();
+  PrimType DestElemT = *S.getContext().classify(DestVT->getElementType());
+  bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+  constexpr unsigned LaneSize = 16; // 128-bit lane = 16 bytes
+  unsigned NumLanes = SourceLen / LaneSize;
+
+  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+    unsigned Ctrl = (Imm >> (3 * Lane)) & 0x7;
+    unsigned AOff = ((Ctrl >> 2) & 1) * 4;
+    unsigned BOff = (Ctrl & 3) * 4;
+    for (unsigned J = 0; J < 8; ++J) {
+      uint16_t Sad = 0;
+      for (unsigned K = 0; K < 4; ++K) {
+        uint8_t A, B;
+        INT_TYPE_SWITCH_NO_BOOL(SrcElemT, {
+          A = static_cast<uint8_t>(
+              Src1.elem<T>(Lane * LaneSize + AOff + J + K));
+          B = static_cast<uint8_t>(Src2.elem<T>(Lane * LaneSize + BOff + K));
+        });
+        Sad += (A > B) ? (A - B) : (B - A);
+      }
+      INT_TYPE_SWITCH_NO_BOOL(DestElemT, {
+        Dst.elem<T>(Lane * 8 + J) =
+            static_cast<T>(APSInt(APInt(16, Sad), DestUnsigned));
+      });
+    }
+  }
+
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp_builtin_horizontal_int_binop(
     InterpState &S, CodePtr OpPC, const CallExpr *Call,
     llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
@@ -5299,6 +5347,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
   case clang::X86::BI__builtin_ia32_dbpsadbw512:
     return interp__builtin_ia32_dbpsadbw(S, OpPC, Call);
 
+  case clang::X86::BI__builtin_ia32_mpsadbw128:
+  case clang::X86::BI__builtin_ia32_mpsadbw256:
+    return interp__builtin_ia32_mpsadbw(S, OpPC, Call);
+
   case clang::X86::BI__builtin_ia32_pmulhuw128:
   case clang::X86::BI__builtin_ia32_pmulhuw256:
   case clang::X86::BI__builtin_ia32_pmulhuw512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 33df4cab06e7c..82891f9d7aa44 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12695,6 +12695,46 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
+  case clang::X86::BI__builtin_ia32_mpsadbw128:
+  case clang::X86::BI__builtin_ia32_mpsadbw256: {
+    APValue SourceA, SourceB, SourceImm;
+    if (!EvaluateAsRValue(Info, E->getArg(0), SourceA) ||
+        !EvaluateAsRValue(Info, E->getArg(1), SourceB) ||
+        !EvaluateAsRValue(Info, E->getArg(2), SourceImm))
+      return false;
+    unsigned SourceLen = SourceA.getVectorLength();
+    constexpr unsigned LaneSize = 16;
+    unsigned NumLanes = SourceLen / LaneSize;
+    unsigned Imm = SourceImm.getInt().getZExtValue();
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+    SmallVector<APValue, 16> ResultElements;
+    ResultElements.reserve(SourceLen / 2);
+
+    for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+      unsigned Ctrl = (Imm >> (3 * Lane)) & 0x7;
+      unsigned AOff = ((Ctrl >> 2) & 1) * 4;
+      unsigned BOff = (Ctrl & 3) * 4;
+      for (unsigned J = 0; J < 8; ++J) {
+        uint16_t Sad = 0;
+        for (unsigned K = 0; K < 4; ++K) {
+          uint8_t A = static_cast<uint8_t>(
+              SourceA.getVectorElt(Lane * 16 + AOff + J + K)
+                  .getInt()
+                  .getZExtValue());
+          uint8_t B =
+              static_cast<uint8_t>(SourceB.getVectorElt(Lane * 16 + BOff + K)
+                                       .getInt()
+                                       .getZExtValue());
+          Sad += (A > B) ? (A - B) : (B - A);
+        }
+        ResultElements.push_back(APValue(APSInt(APInt(16, Sad), 
DestUnsigned)));
+      }
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+
   case clang::X86::BI__builtin_ia32_pmulhuw128:
   case clang::X86::BI__builtin_ia32_pmulhuw256:
   case clang::X86::BI__builtin_ia32_pmulhuw512:
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index de3d92ea1c6cc..b829d16c3566f 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1009,6 +1009,23 @@ __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
   // CHECK: call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %{{.*}}, <32 x 
i8> %{{.*}}, i8 3)
   return _mm256_mpsadbw_epu8(x, y, 3);
 }
+// imm=0 both lanes.  Lane0 A=4,B=1 -> 12 each ; Lane1 A=8,B=1 -> |8-1|*4=28 
each
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 0),    353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 1),    415,406,396,384,372,360,344,330,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 2),    517,508,498,486,474,462,446,432,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 3),    603,594,584,572,560,548,532,518,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 4),    310,298,282,268,250,232,218,202,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 5),    372,360,344,330,312,294,280,264,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 6),    474,462,446,432,414,396,382,366,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 7),    560,548,532,518,500,482,468,452,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 0<<3), 353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 1<<3), 353,344,334,322,310,298,282,268,522,508,490,474,456,432,410,390));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 2<<3), 353,344,334,322,310,298,282,268,632,618,600,584,566,542,520,500));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 3<<3), 353,344,334,322,310,298,282,268,706,692,674,658,640,616,594,574));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 4<<3), 353,344,334,322,310,298,282,268,376,352,330,310,292,280,268,244));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 5<<3), 353,344,334,322,310,298,282,268,456,432,410,390,372,360,348,324));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 6<<3), 353,344,334,322,310,298,282,268,566,542,520,500,482,470,458,434));
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
 
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 7<<3), 353,344,334,322,310,298,282,268,640,616,594,574,556,544,532,508));
 
 __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_mul_epi32
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c 
b/clang/test/CodeGen/X86/sse41-builtins.c
index 1be1aa71de737..dceb86b87ce3d 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -402,6 +402,14 @@ __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
   // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %{{.*}}, <16 x 
i8> %{{.*}}, i8 1)
   return _mm_mpsadbw_epu8(x, y, 1);
 }
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 0), 640,628,612,598,580,562,548,532));
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 1), 720,708,692,678,660,642,628,612));
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 2), 830,818,802,788,770,752,738,722));
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 3), 904,892,876,862,844,826,812,796));
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 4), 580,562,548,532,516,498,480,460));
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 5), 660,642,628,612,596,578,560,540));
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 6), 770,752,738,722,706,688,670,650));
+TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}),
 
((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
 7), 844,826,812,796,780,762,744,724));
 
 __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_mul_epi32

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to