https://github.com/ahmednoursphinx created https://github.com/llvm/llvm-project/pull/167512
Recent commits (7fe069121b57a, 53ddeb493529a) marked several x86 intrinsics as constexpr in headers without providing the necessary constant evaluation support in the compiler backend. This caused compilation failures when attempting to use these intrinsics in constant expressions. Resolves https://github.com/llvm/llvm-project/issues/166814 Thanks for @ykhatav for syncing and collaborating on a solution for this issue >From f26a73529db09f1bc6c423f930945de56656e6fd Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Tue, 11 Nov 2025 15:10:28 +0200 Subject: [PATCH 1/2] [X86] Add constexpr support for addsub and select intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 6 ++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 32 ++++++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 31 +++++++++++++++++++++++ clang/lib/Headers/pmmintrin.h | 2 +- 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cd5f2c3012712..08a2be02d1929 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -93,7 +93,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } - let Features = "sse3" in { + let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; @@ -121,7 +121,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { +let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in { foreach Op = ["addsub", "max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; @@ -4124,7 +4124,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ef130c0a55df..3090d660dee3c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, F.subtract(RHS, RM); return F; }); + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned NumElts = VT->getNumElements(); + + using T = PrimConv<PT_Float>::T; + for (unsigned I = 0; I < NumElts; ++I) { + APFloat LElem = LHS.elem<T>(I).getAPFloat(); + APFloat RElem = RHS.elem<T>(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem<T>(I) = static_cast<T>(LElem); + } + Dst.initializeAllElements(); + return true; + } case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: @@ -4433,6 +4463,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 972d9fe3b5e4f..58527eff88e6e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12889,6 +12889,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: @@ -13383,6 +13385,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElts = SourceLHS.getVectorLength(); + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(NumElts); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + + for (unsigned I = 0; I < NumElts; ++I) { + APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); + APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LHS.subtract(RHS, RM); + } else { + // Odd indices: add + LHS.add(RHS, RM); + } + ResultElements.push_back(APValue(LHS)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 6b152bde29fc1..42bd343e326de 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -166,7 +166,7 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } >From 05ab6c9909822d411178d31c923a735a868c4271 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Tue, 11 Nov 2025 16:18:27 +0200 Subject: [PATCH 2/2] feat: Add tests --- clang/include/clang/Basic/BuiltinsX86.td | 2 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 -- clang/lib/AST/ExprConstant.cpp | 2 -- clang/lib/Headers/avx512fintrin.h | 8 ++++---- clang/lib/Headers/avxintrin.h | 4 ++-- clang/lib/Headers/pmmintrin.h | 2 +- clang/test/CodeGen/X86/avx-builtins.c | 2 ++ clang/test/CodeGen/X86/sse3-builtins.c | 2 ++ 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 08a2be02d1929..7d110fc71e15d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -4124,7 +4124,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3090d660dee3c..8e73ec24902b2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4463,8 +4463,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 58527eff88e6e..3ba7520adf195 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12889,8 +12889,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 997e9608e112f..39a5c2d4c218c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8383,23 +8383,23 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 4aef9245323fb..33b8eaec1f99a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -147,7 +147,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); @@ -166,7 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 42bd343e326de..4b284c41181ca 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -60,7 +60,7 @@ _mm_lddqu_si128(__m128i_u const *__p) /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 737febbc7fef6..46bc28b85d8db 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0)); __m256 test_mm256_addsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_addsub_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f)); __m256d test_mm256_and_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_and_pd diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index a82dd4080670b..44389fbdc6f77 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0)); __m128 test_mm_addsub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_addsub_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f)); __m128d test_mm_hadd_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_hadd_pd _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
