https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/167512
>From f26a73529db09f1bc6c423f930945de56656e6fd Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Tue, 11 Nov 2025 15:10:28 +0200 Subject: [PATCH 1/9] [X86] Add constexpr support for addsub and select intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 6 ++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 32 ++++++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 31 +++++++++++++++++++++++ clang/lib/Headers/pmmintrin.h | 2 +- 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cd5f2c3012712..08a2be02d1929 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -93,7 +93,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } - let Features = "sse3" in { + let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; @@ -121,7 +121,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { +let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in { foreach Op = ["addsub", "max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; @@ -4124,7 +4124,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ef130c0a55df..3090d660dee3c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, F.subtract(RHS, RM); return F; }); + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned NumElts = VT->getNumElements(); + + using T = PrimConv<PT_Float>::T; + for (unsigned I = 0; I < NumElts; ++I) { + APFloat LElem = LHS.elem<T>(I).getAPFloat(); + APFloat RElem = RHS.elem<T>(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem<T>(I) = static_cast<T>(LElem); + } + Dst.initializeAllElements(); + return true; + } case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: @@ -4433,6 +4463,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 972d9fe3b5e4f..58527eff88e6e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12889,6 +12889,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: + case X86::BI__builtin_ia32_selectss_128: + case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: @@ -13383,6 +13385,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_addsubpd: + case clang::X86::BI__builtin_ia32_addsubps: + case clang::X86::BI__builtin_ia32_addsubpd256: + case clang::X86::BI__builtin_ia32_addsubps256: { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + unsigned NumElts = SourceLHS.getVectorLength(); + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(NumElts); + llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); + + for (unsigned I = 0; I < NumElts; ++I) { + APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); + APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LHS.subtract(RHS, RM); + } else { + // Odd indices: add + LHS.add(RHS, RM); + } + ResultElements.push_back(APValue(LHS)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 6b152bde29fc1..42bd343e326de 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -166,7 +166,7 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } >From 05ab6c9909822d411178d31c923a735a868c4271 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Tue, 11 Nov 2025 16:18:27 +0200 Subject: [PATCH 2/9] feat: Add tests --- clang/include/clang/Basic/BuiltinsX86.td | 2 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 -- clang/lib/AST/ExprConstant.cpp | 2 -- clang/lib/Headers/avx512fintrin.h | 8 ++++---- clang/lib/Headers/avxintrin.h | 4 ++-- clang/lib/Headers/pmmintrin.h | 2 +- clang/test/CodeGen/X86/avx-builtins.c | 2 ++ clang/test/CodeGen/X86/sse3-builtins.c | 2 ++ 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 08a2be02d1929..7d110fc71e15d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -4124,7 +4124,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">; def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3090d660dee3c..8e73ec24902b2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4463,8 +4463,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 58527eff88e6e..3ba7520adf195 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12889,8 +12889,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: - case X86::BI__builtin_ia32_selectss_128: - case X86::BI__builtin_ia32_selectsd_128: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 997e9608e112f..39a5c2d4c218c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8383,23 +8383,23 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 4aef9245323fb..33b8eaec1f99a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -147,7 +147,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, /// A 256-bit vector of [4 x double] containing the right source operand. /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); @@ -166,7 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the right source operand. /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 42bd343e326de..4b284c41181ca 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -60,7 +60,7 @@ _mm_lddqu_si128(__m128i_u const *__p) /// A 128-bit vector of [4 x float] containing the right source operand. /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 737febbc7fef6..46bc28b85d8db 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0)); __m256 test_mm256_addsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_addsub_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f)); __m256d test_mm256_and_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_and_pd diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index a82dd4080670b..44389fbdc6f77 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_addsub_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0)); __m128 test_mm_addsub_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_addsub_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_addsub_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f)); __m128d test_mm_hadd_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_hadd_pd >From beebb7d6db98b7295cdbeb03ad37415f3695f872 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Tue, 11 Nov 2025 16:37:08 +0200 Subject: [PATCH 3/9] chore: update formatting --- clang/include/clang/Basic/BuiltinsX86.td | 7 ++++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- clang/lib/AST/ExprConstant.cpp | 2 +- clang/lib/Headers/avx512fintrin.h | 22 ++++++++++++++-------- clang/lib/Headers/avxintrin.h | 6 ++---- clang/lib/Headers/pmmintrin.h | 3 +-- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 7d110fc71e15d..f87644830c33b 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -92,8 +92,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; } - - let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + let Features = "sse3", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { foreach Op = ["addsub"] in { def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; @@ -121,7 +121,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], Features = "avx" in { +let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], + Features = "avx" in { foreach Op = ["addsub", "max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8e73ec24902b2..4bedb14c61fdb 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4292,7 +4292,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::RoundingMode RM = getRoundingMode(FPO); const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); unsigned NumElts = VT->getNumElements(); - + using T = PrimConv<PT_Float>::T; for (unsigned I = 0; I < NumElts; ++I) { APFloat LElem = LHS.elem<T>(I).getAPFloat(); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3ba7520adf195..2489252268a49 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13397,7 +13397,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { SmallVector<APValue, 8> ResultElements; ResultElements.reserve(NumElts); llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); - + for (unsigned I = 0; I < NumElts; ++I) { APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 39a5c2d4c218c..ef6690b46ab44 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8383,24 +8383,30 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, + __mmask8 __U, + __m128 __A, + __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, + __m128 __A, + __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, + __mmask8 __U, + __m128d __A, + __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, + __m128d __A, + __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); } diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 33b8eaec1f99a..3e1618ed192c8 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -148,8 +148,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, /// \returns A 256-bit vector of [4 x double] containing the alternating sums /// and differences between both operands. static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_addsub_pd(__m256d __a, __m256d __b) -{ +_mm256_addsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); } @@ -167,8 +166,7 @@ _mm256_addsub_pd(__m256d __a, __m256d __b) /// \returns A 256-bit vector of [8 x float] containing the alternating sums and /// differences between both operands. static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_addsub_ps(__m256 __a, __m256 __b) -{ +_mm256_addsub_ps(__m256 __a, __m256 __b) { return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 4b284c41181ca..a9a65440363c3 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -61,8 +61,7 @@ _mm_lddqu_si128(__m128i_u const *__p) /// \returns A 128-bit vector of [4 x float] containing the alternating sums and /// differences of both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm_addsub_ps(__m128 __a, __m128 __b) -{ +_mm_addsub_ps(__m128 __a, __m128 __b) { return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); } >From 83a05549d73304cabfd775204ffcdd0f27def278 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Mon, 17 Nov 2025 13:50:12 +0200 Subject: [PATCH 4/9] feat: use static func --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 58 +++++++++++++----------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 4bedb14c61fdb..915aa3b8c0a0b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2714,6 +2714,35 @@ static bool interp_builtin_horizontal_fp_binop( return true; } +static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + // Addsub: alternates between subtraction and addition + // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) + const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); + llvm::RoundingMode RM = getRoundingMode(FPO); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned NumElts = VT->getNumElements(); + + using T = PrimConv<PT_Float>::T; + for (unsigned I = 0; I < NumElts; ++I) { + APFloat LElem = LHS.elem<T>(I).getAPFloat(); + APFloat RElem = RHS.elem<T>(I).getAPFloat(); + if (I % 2 == 0) { + // Even indices: subtract + LElem.subtract(RElem, RM); + } else { + // Odd indices: add + LElem.add(RElem, RM); + } + Dst.elem<T>(I) = static_cast<T>(LElem); + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop_fp( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APFloat(const APFloat &, const APFloat &, @@ -4282,33 +4311,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_addsubpd: case clang::X86::BI__builtin_ia32_addsubps: case clang::X86::BI__builtin_ia32_addsubpd256: - case clang::X86::BI__builtin_ia32_addsubps256: { - // Addsub: alternates between subtraction and addition - // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) - const Pointer &RHS = S.Stk.pop<Pointer>(); - const Pointer &LHS = S.Stk.pop<Pointer>(); - const Pointer &Dst = S.Stk.peek<Pointer>(); - FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); - llvm::RoundingMode RM = getRoundingMode(FPO); - const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); - unsigned NumElts = VT->getNumElements(); - - using T = PrimConv<PT_Float>::T; - for (unsigned I = 0; I < NumElts; ++I) { - APFloat LElem = LHS.elem<T>(I).getAPFloat(); - APFloat RElem = RHS.elem<T>(I).getAPFloat(); - if (I % 2 == 0) { - // Even indices: subtract - LElem.subtract(RElem, RM); - } else { - // Odd indices: add - LElem.add(RElem, RM); - } - Dst.elem<T>(I) = static_cast<T>(LElem); - } - Dst.initializeAllElements(); - return true; - } + case clang::X86::BI__builtin_ia32_addsubps256: + return interp__builtin_ia32_addsub(S, OpPC, Call); case clang::X86::BI__builtin_ia32_pmuldq128: case clang::X86::BI__builtin_ia32_pmuldq256: >From 8a025596b4f562a97b6a9949b87d60773088e030 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Mon, 17 Nov 2025 13:53:22 +0200 Subject: [PATCH 5/9] refactor: change property name --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 915aa3b8c0a0b..3dc95278644e5 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2724,10 +2724,10 @@ static bool interp__builtin_ia32_addsub(InterpState &S, CodePtr OpPC, FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); llvm::RoundingMode RM = getRoundingMode(FPO); const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); - unsigned NumElts = VT->getNumElements(); + unsigned NumElems = VT->getNumElements(); using T = PrimConv<PT_Float>::T; - for (unsigned I = 0; I < NumElts; ++I) { + for (unsigned I = 0; I != NumElems; ++I) { APFloat LElem = LHS.elem<T>(I).getAPFloat(); APFloat RElem = RHS.elem<T>(I).getAPFloat(); if (I % 2 == 0) { >From 4ddc53323d5df7869c64f9ad5ed248526a31d344 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Mon, 17 Nov 2025 13:59:16 +0200 Subject: [PATCH 6/9] feat: move addsub to it's own block --- clang/include/clang/Basic/BuiltinsX86.td | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index f87644830c33b..07a0b47a1641d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -121,9 +121,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>], - Features = "avx" in { - foreach Op = ["addsub", "max", "min"] in { +let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { + foreach Op = ["max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; } @@ -572,6 +571,12 @@ let Features = "avx", def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } +let Features = "avx", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def addsubpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def addsubps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; +} + let Features = "avx", Attributes = [NoThrow] in { def vzeroall : X86Builtin<"void()">; def vzeroupper : X86Builtin<"void()">; >From dd637d341fb77c01710b14ee7332fc030bc66ef3 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Mon, 17 Nov 2025 14:04:42 +0200 Subject: [PATCH 7/9] feat: revert ss/sd operations --- clang/lib/Headers/avx512fintrin.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index ef6690b46ab44..997e9608e112f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8383,30 +8383,24 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, - __mmask8 __U, - __m128 __A, - __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, - __m128 __A, - __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, - __mmask8 __U, - __m128d __A, - __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, - __m128d __A, - __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); } >From cbe50dd97ab29beb48de6ea313688bc02e35b40f Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Mon, 17 Nov 2025 14:05:22 +0200 Subject: [PATCH 8/9] feat: format files --- clang/include/clang/Basic/BuiltinsX86.td | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 07a0b47a1641d..6ab8dcf8993fe 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -121,7 +121,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } // AVX -let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { +let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], + Features = "avx" in { foreach Op = ["max", "min"] in { def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; @@ -573,8 +574,11 @@ let Features = "avx", let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def addsubpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; - def addsubps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; + def addsubpd256 + : X86Builtin< + "_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def addsubps256 + : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; } let Features = "avx", Attributes = [NoThrow] in { >From 5a09a8f6e2dc5d1dacbe44f239f1caa3cea228a4 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Mon, 17 Nov 2025 16:00:31 +0200 Subject: [PATCH 9/9] refactor: provide better naming and enhance loop condition --- clang/lib/AST/ExprConstant.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 57c44c25d0bf8..ed1f1b7508ffc 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13383,12 +13383,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) return false; - unsigned NumElts = SourceLHS.getVectorLength(); + unsigned NumElems = SourceLHS.getVectorLength(); SmallVector<APValue, 8> ResultElements; - ResultElements.reserve(NumElts); + ResultElements.reserve(NumElems); llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E); - for (unsigned I = 0; I < NumElts; ++I) { + for (unsigned I = 0; I != NumElems; ++I) { APFloat LHS = SourceLHS.getVectorElt(I).getFloat(); APFloat RHS = SourceRHS.getVectorElt(I).getFloat(); if (I % 2 == 0) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
