https://github.com/kimsh02 created https://github.com/llvm/llvm-project/pull/163685
Fix #155812 >From 5c58df9dce4491a1cf2f6bedd8a222a5a1746d09 Mon Sep 17 00:00:00 2001 From: kimsh02 <[email protected]> Date: Wed, 15 Oct 2025 19:45:55 -0700 Subject: [PATCH] Draft --- clang/include/clang/Basic/BuiltinsX86.td | 29 +++++++++----- clang/lib/AST/ExprConstant.cpp | 31 +++++++++++++++ clang/lib/Headers/avx2intrin.h | 21 +++++------ clang/lib/Headers/tmmintrin.h | 48 +++++++++++------------- clang/test/CodeGen/X86/mmx-builtins.c | 11 ++---- 5 files changed, 84 insertions(+), 56 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 279c0c7935e36..e8b4c8f66cfa0 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -124,13 +124,17 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } let Features = "ssse3" in { - def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; - def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + def pmulhrsw128 + : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def psignb128 + : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; + def psignw128 + : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def psignd128 + : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">; def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; } @@ -609,10 +613,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; - def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; - def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def psadbw256 + : X86Builtin< + "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; @@ -682,7 +685,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - + + def psignb256 + : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; + def psignw256 + : X86Builtin< + "_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + def psignd256 + : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a07eb2254e13f..f9b68718b51a2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12206,6 +12206,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_psignb128: + case X86::BI__builtin_ia32_psignb256: + case X86::BI__builtin_ia32_psignw128: + case X86::BI__builtin_ia32_psignw256: + case X86::BI__builtin_ia32_psignd128: + case X86::BI__builtin_ia32_psignd256: { + APValue ASource, BSource; + if (!EvaluateAsRValue(Info, E->getArg(0), ASource) || + !EvaluateAsRValue(Info, E->getArg(1), BSource)) + return false; + unsigned SourceLen = ASource.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>(); + QualType ElemT = VT->getElementType(); + unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemT); + QualType ResultElemT = E->getType()->castAs<VectorType>()->getElementType(); + bool ResultElemUnsigned = ResultElemT->isUnsignedIntegerOrEnumerationType(); + + SmallVector<APValue, 16> Result; + Result.reserve(SourceLen); + for (unsigned I = 0; I != SourceLen; ++I) { + APSInt &AElem = ASource.getVectorElt(I).getInt(); + APSInt &BElem = BSource.getVectorElt(I).getInt(); + APSInt ResultElem = + (BElem.isNegative() ? -AElem + : BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned) + : AElem); + Result.emplace_back(ResultElem); + } + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: case X86::BI__builtin_ia32_blendvps: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index fa7f4c25061cf..015a932840189 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1978,10 +1978,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) { /// \param __b /// A 256-bit integer vector]. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi8(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi8(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } /// Sets each element of the result to the corresponding element of the @@ -1999,10 +1998,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } /// Sets each element of the result to the corresponding element of the @@ -2020,10 +2018,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); } /// Shifts each 128-bit half of the 256-bit integer vector \a a left by diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index ee96caa53f33d..6bb012e933980 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -642,10 +642,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) { /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi8(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); } /// For each 16-bit integer in the first source operand, perform one of @@ -668,10 +667,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b) /// A 128-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); } /// For each 32-bit integer in the first source operand, perform one of @@ -694,10 +692,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b) /// A 128-bit integer vector containing control doublewords corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); } /// For each 8-bit integer in the first source operand, perform one of @@ -720,11 +717,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b) /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a), + (__v16qi)__anyext128(__b))); } /// For each 16-bit integer in the first source operand, perform one of @@ -747,11 +743,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a), + (__v8hi)__anyext128(__b))); } /// For each 32-bit integer in the first source operand, perform one of @@ -774,11 +769,10 @@ _mm_sign_pi16(__m64 __a, __m64 __b) /// A 64-bit integer vector containing two control doublewords corresponding /// to positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a), - (__v4si)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a), + (__v4si)__anyext128(__b))); } #undef __anyext128 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index d9041d4afe5e2..a0a1f3514de95 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -7,14 +7,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx + #include <immintrin.h> @@ -597,11 +590,13 @@ __m64 test_mm_shuffle_pi16(__m64 a) { return _mm_shuffle_pi16(a, 3); } TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0)); + __m64 test_mm_sign_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sign_pi8 // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128( return _mm_sign_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){0,0,0,0, 0,0,0,0}, (__m64)(__v8qi){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0)); __m64 test_mm_sign_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sign_pi16 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
