https://github.com/stomfaig updated https://github.com/llvm/llvm-project/pull/168861
>From b0b258c0b1a613b43d43d25b1498df858fd7e01d Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Fri, 14 Nov 2025 20:48:04 +0000 Subject: [PATCH 1/7] adding cases for vpermilvarpd and vpermilvarps --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 22 +++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 30 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index cee3c1b8cf8f3..ee0f9deaee46e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4563,6 +4563,28 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return std::make_pair(0, static_cast<int>(LaneBase + Sel)); }); + case X86::BI__builtin_ia32_vpermilvarpd: + case X86::BI__builtin_ia32_vpermilvarpd256: + case X86::BI__builtin_ia32_vpermilvarpd512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b1; + return std::make_pair(0, static_cast<int>(Lane + Offset)); + }); + + case X86::BI__builtin_ia32_vpermilvarps: + case X86::BI__builtin_ia32_vpermilvarps256: + case X86::BI__builtin_ia32_vpermilvarps512: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = ShuffleMask & 0b11; + return std::make_pair(0, static_cast<int>(Lane + Offset)); + }); + case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b7da89ab3dcf2..c3c084d67ab66 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13015,6 +13015,36 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_vpermilvarpd: + case X86::BI__builtin_ia32_vpermilvarpd256: + case X86::BI__builtin_ia32_vpermilvarpd512: + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b1; + return std::make_pair(0, static_cast<int>(Lane + Offset)); + })) + return false; + return Success(R, E); + + case X86::BI__builtin_ia32_vpermilvarps: + case X86::BI__builtin_ia32_vpermilvarps256: + case X86::BI__builtin_ia32_vpermilvarps512: + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b11; + return std::make_pair(0, static_cast<int>(Lane + Offset)); + })) + return false; + return Success(R, E); + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) >From 18f7f8eb509a54d4ff437e3f44296c34a94fd9fa Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Fri, 14 Nov 2025 20:48:52 +0000 Subject: [PATCH 2/7] format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- clang/lib/AST/ExprConstant.cpp | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ee0f9deaee46e..9972c0924826f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4573,7 +4573,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned Offset = ShuffleMask & 0b1; return std::make_pair(0, static_cast<int>(Lane + Offset)); }); - + case X86::BI__builtin_ia32_vpermilvarps: case X86::BI__builtin_ia32_vpermilvarps256: case X86::BI__builtin_ia32_vpermilvarps512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index c3c084d67ab66..e6f0a5964894f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13022,14 +13022,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { - unsigned NumElemPerLane = 2; - unsigned Lane = DstIdx / NumElemPerLane; - unsigned Offset = Mask & 0b1; - return std::make_pair(0, static_cast<int>(Lane + Offset)); - })) + unsigned NumElemPerLane = 2; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b1; + return std::make_pair(0, static_cast<int>(Lane + Offset)); + })) return false; return Success(R, E); - + case X86::BI__builtin_ia32_vpermilvarps: case X86::BI__builtin_ia32_vpermilvarps256: case X86::BI__builtin_ia32_vpermilvarps512: @@ -13037,11 +13037,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { - unsigned NumElemPerLane = 4; - unsigned Lane = DstIdx / NumElemPerLane; - unsigned Offset = Mask & 0b11; - return std::make_pair(0, static_cast<int>(Lane + Offset)); - })) + unsigned NumElemPerLane = 4; + unsigned Lane = DstIdx / NumElemPerLane; + unsigned Offset = Mask & 0b11; + return std::make_pair(0, static_cast<int>(Lane + Offset)); + })) return false; return Success(R, E); >From 6926f0bd637a625d458a386d7dde957fb6b878bc Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Thu, 20 Nov 2025 09:50:40 +0000 Subject: [PATCH 3/7] correct logic --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 +++--- clang/lib/AST/ExprConstant.cpp | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 9972c0924826f..ce15048d6d31a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4570,8 +4570,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { unsigned NumElemPerLane = 2; unsigned Lane = DstIdx / NumElemPerLane; - unsigned Offset = ShuffleMask & 0b1; - return std::make_pair(0, static_cast<int>(Lane + Offset)); + unsigned Offset = ShuffleMask & 0b10 ? 1 : 0; + return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); }); case X86::BI__builtin_ia32_vpermilvarps: @@ -4582,7 +4582,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned NumElemPerLane = 4; unsigned Lane = DstIdx / NumElemPerLane; unsigned Offset = ShuffleMask & 0b11; - return std::make_pair(0, static_cast<int>(Lane + Offset)); + return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); }); case X86::BI__builtin_ia32_kandqi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e6f0a5964894f..e8b7f78848aeb 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13017,22 +13017,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_vpermilvarpd: case X86::BI__builtin_ia32_vpermilvarpd256: - case X86::BI__builtin_ia32_vpermilvarpd512: + case X86::BI__builtin_ia32_vpermilvarpd512: { APValue R; if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> { unsigned NumElemPerLane = 2; unsigned Lane = DstIdx / NumElemPerLane; - unsigned Offset = Mask & 0b1; - return std::make_pair(0, static_cast<int>(Lane + Offset)); + unsigned Offset = Mask & 0b10 ? 1 : 0; + return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); })) return false; return Success(R, E); + } case X86::BI__builtin_ia32_vpermilvarps: case X86::BI__builtin_ia32_vpermilvarps256: - case X86::BI__builtin_ia32_vpermilvarps512: + case X86::BI__builtin_ia32_vpermilvarps512: { APValue R; if (!evalShuffleGeneric( Info, E, R, @@ -13040,11 +13041,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned NumElemPerLane = 4; unsigned Lane = DstIdx / NumElemPerLane; unsigned Offset = Mask & 0b11; - return std::make_pair(0, static_cast<int>(Lane + Offset)); + return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); })) return false; return Success(R, E); - + } + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) >From 62653fe6ccf55b01bbf6b9ea9de80139e2fed451 Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Thu, 20 Nov 2025 09:52:01 +0000 Subject: [PATCH 4/7] make intrinsics constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 8 +++++++- clang/lib/Headers/avx512fintrin.h | 12 ++++++------ clang/lib/Headers/avx512vlintrin.h | 16 ++++++++-------- clang/lib/Headers/avxintrin.h | 8 ++++---- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 69d18679fd6ec..9382a1168a294 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -493,11 +493,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">; def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; +} + +let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">; def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">; @@ -2369,6 +2372,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; +} + +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">; def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">; } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 79c37173ac838..59b58f7a0680a 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -5897,13 +5897,13 @@ _mm_cvttss_u64 (__m128 __A) (__v16sf)_mm512_permute_ps((X), (C)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_pd(__m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -5911,7 +5911,7 @@ _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -5919,13 +5919,13 @@ _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_permutevar_ps(__m512 __A, __m512i __C) { return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -5933,7 +5933,7 @@ _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 1e6e42df6b5fb..9697eacda2c7d 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -5855,7 +5855,7 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) (__v8sf)_mm256_permute_ps((X), (C)), \ (__v8sf)_mm256_setzero_ps())) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5863,7 +5863,7 @@ _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5871,7 +5871,7 @@ _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5879,7 +5879,7 @@ _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5887,7 +5887,7 @@ _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5895,7 +5895,7 @@ _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5903,7 +5903,7 @@ _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5911,7 +5911,7 @@ _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 4aef9245323fb..247530e2f56c0 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -789,7 +789,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline __m128d __DEFAULT_FN_ATTRS128 +static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); @@ -828,7 +828,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c) /// 1: Bits [255:192] of the source are copied to bits [255:192] of the /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_permutevar_pd(__m256d __a, __m256i __c) { return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); @@ -883,7 +883,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c) /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline __m128 __DEFAULT_FN_ATTRS128 +static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); @@ -974,7 +974,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c) /// 11: Bits [255:224] of the source are copied to bits [255:224] of the /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_permutevar_ps(__m256 __a, __m256i __c) { return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); >From 4786836d15f01a536ad1dd9a7b898c0437d85376 Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Thu, 20 Nov 2025 09:52:17 +0000 Subject: [PATCH 5/7] add tests --- clang/test/CodeGen/X86/avx-builtins.c | 28 +++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 48 +++++++++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 68 ++++++++++++++++++++++ 3 files changed, 144 insertions(+) diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 737febbc7fef6..ec58d646117aa 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1447,24 +1447,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}}) return _mm_permutevar_pd(A, B); } +TEST_CONSTEXPR(match_m128d( + _mm_permutevar_pd( + ((__m128d){0.0, 1.0}), + ((__m128i){0b10, 0b00}) + ), + 1.0, 0.0 +)); __m256d test_mm256_permutevar_pd(__m256d A, __m256i B) { // CHECK-LABEL: test_mm256_permutevar_pd // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_permutevar_pd(A, B); } +TEST_CONSTEXPR(match_m256d( + _mm256_permutevar_pd( + ((__m256d){0.0, 1.0, 2.0, 3.0}), + ((__m256i){0b10, 0b00, 0b10, 0b00}) + ), + 1.0, 0.0, 3.0, 2.0 +)); __m128 test_mm_permutevar_ps(__m128 A, __m128i B) { // CHECK-LABEL: test_mm_permutevar_ps // CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}}) return _mm_permutevar_ps(A, B); } +TEST_CONSTEXPR(match_m128( + _mm_permutevar_ps( + ((__m128){0.0, 1.0, 2.0, 3.0}), + ((__m128i){0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 2.0, 1.0, 0.0 +)); __m256 test_mm256_permutevar_ps(__m256 A, __m256i B) { // CHECK-LABEL: test_mm256_permutevar_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_permutevar_ps(A, B); } +TEST_CONSTEXPR(match_m256( + _mm256_permutevar_ps( + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m256i){(0b10ULL << 32) + 0b11, 0b01, (0b10ULL << 32) + 0b11, 0b01}) + ), + 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0 +)); __m256 test_mm256_rcp_ps(__m256 A) { // CHECK-LABEL: test_mm256_rcp_ps diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 71e700af0069e..33047fd351039 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -5488,6 +5488,13 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) { // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 return _mm512_permutevar_pd(__A, __C); } +TEST_CONSTEXPR(match_m512d( + _mm512_permutevar_pd( + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00}) + ), + 1.0, 0.0, 3.0, 2.0, 5.0, 4.0, 7.0, 6.0 +)); __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { // CHECK-LABEL: test_mm512_mask_permutevar_pd @@ -5495,6 +5502,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permutevar_pd(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m512d( + _mm512_mask_permutevar_pd( + ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + (__mmask8)0b01010101, + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00}) + ), + 1.0, 9.0, 3.0, 11.0, 5.0, 13.0, 7.0, 15.0 +)); __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_permutevar_pd @@ -5502,12 +5518,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permutevar_pd(__U, __A, __C); } +TEST_CONSTEXPR(match_m512d( + _mm512_maskz_permutevar_pd( + (__mmask8)0b01010101, + ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00}) + ), + 1.0, 0.0, 3.0, 0.0, 5.0, 0.0, 7.0, 0.0 +)); __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) { // CHECK-LABEL: test_mm512_permutevar_ps // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 return _mm512_permutevar_ps(__A, __C); } +TEST_CONSTEXPR(match_m512( + _mm512_permutevar_ps( + ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0, 11.0, 10.0, 9.0, 8.0, 15.0, 14.0, 13.0, 12.0 +)); __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { // CHECK-LABEL: test_mm512_mask_permutevar_ps @@ -5515,6 +5546,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permutevar_ps(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m512( + _mm512_mask_permutevar_ps( + ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0}), + (__mmask16)0b0101010101010101, + ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 17.0, 1.0, 19.0, 7.0, 21.0, 5.0, 23.0, 11.0, 25.0, 9.0, 27.0, 15.0, 29.0, 13.0, 31.0 +)); __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_permutevar_ps @@ -5522,6 +5562,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permutevar_ps(__U, __A, __C); } +TEST_CONSTEXPR(match_m512( + _mm512_maskz_permutevar_ps( + (__mmask16)0b0101010101010101, + ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0, 11.0, 0.0, 9.0, 0.0, 15.0, 0.0, 13.0, 0.0 +)); __m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { // CHECK-LABEL: test_mm512_permutex2var_epi32 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index a7eee79c97539..be0d1bbd4fdbf 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -8007,6 +8007,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m12 // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_permutevar_pd(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m128d( + _mm_mask_permutevar_pd( + ((__m128d){3.0, 4.0}), + (__mmask8)0b01, + ((__m128d){0.0, 1.0}), + ((__m128i){0b10, 0b00}) + ), + 1.0, 4.0 +)); __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { // CHECK-LABEL: test_mm_maskz_permutevar_pd @@ -8014,6 +8023,14 @@ __m128d test_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_permutevar_pd(__U, __A, __C); } +TEST_CONSTEXPR(match_m128d( + _mm_maskz_permutevar_pd( + (__mmask8)0b01, + ((__m128d){0.0, 1.0}), + ((__m128i){0b10, 0b00}) + ), + 1.0, 0.0 +)); __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { // CHECK-LABEL: test_mm256_mask_permutevar_pd @@ -8021,6 +8038,15 @@ __m256d test_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_permutevar_pd(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m256d( + _mm256_mask_permutevar_pd( + ((__m256d){4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b0101, + ((__m256d){0.0, 1.0, 2.0, 3.0}), + ((__m256i){0b10, 0b00, 0b10, 0b00}) + ), + 1.0, 5.0, 3.0, 7.0 +)); __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_permutevar_pd @@ -8028,6 +8054,14 @@ __m256d test_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_permutevar_pd(__U, __A, __C); } +TEST_CONSTEXPR(match_m256d( + _mm256_maskz_permutevar_pd( + (__mmask8)0b0101, + ((__m256d){0.0, 1.0, 2.0, 3.0}), + ((__m256i){0b10, 0b00, 0b10, 0b00}) + ), + 1.0, 0.0, 3.0, 0.0 +)); __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { // CHECK-LABEL: test_mm_mask_permutevar_ps @@ -8035,6 +8069,15 @@ __m128 test_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_permutevar_ps(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m128( + _mm_mask_permutevar_ps( + ((__m128){4.0, 5.0, 6.0, 7.0}), + (__mmask8)0b0101, + ((__m128){0.0, 1.0, 2.0, 3.0}), + ((__m128i){0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 5.0, 1.0, 7.0 +)); __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { // CHECK-LABEL: test_mm_maskz_permutevar_ps @@ -8042,6 +8085,14 @@ __m128 test_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_permutevar_ps(__U, __A, __C); } +TEST_CONSTEXPR(match_m128( + _mm_maskz_permutevar_ps( + (__mmask8)0b0101, + ((__m128){0.0, 1.0, 2.0, 3.0}), + ((__m128i){0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 0.0, 1.0, 0.0 +)); __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { // CHECK-LABEL: test_mm256_mask_permutevar_ps @@ -8049,6 +8100,15 @@ __m256 test_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m25 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_permutevar_ps(__W, __U, __A, __C); } +TEST_CONSTEXPR(match_m256( + _mm256_mask_permutevar_ps( + ((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}), + (__mmask8)0b01010101, + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 9.0, 1.0, 11.0, 7.0, 13.0, 5.0, 15.0 +)); __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_permutevar_ps @@ -8056,6 +8116,14 @@ __m256 test_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_permutevar_ps(__U, __A, __C); } +TEST_CONSTEXPR(match_m256( + _mm256_maskz_permutevar_ps( + (__mmask8)0b01010101, + ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), + ((__m256i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01}) + ), + 3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0 +)); __mmask8 test_mm_test_epi32_mask(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_test_epi32_mask >From 00ae3e0687859c5982057b7f71622324d7473865 Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Thu, 20 Nov 2025 11:47:28 +0000 Subject: [PATCH 6/7] format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- clang/lib/AST/ExprConstant.cpp | 2 +- clang/lib/Headers/avx512fintrin.h | 18 ++++++------------ clang/lib/Headers/avx512vlintrin.h | 24 ++++++++---------------- clang/lib/Headers/avxintrin.h | 12 ++++-------- 5 files changed, 20 insertions(+), 38 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8bccac746fb51..316595b81224c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4652,7 +4652,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned Offset = ShuffleMask & 0b11; return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); }); - + case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_vpermilpd512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 0ee748075a6e0..36a37723d75be 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13058,7 +13058,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } - + case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_vpermilpd512: { diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 3f5028f335155..e1de56069870b 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -5880,44 +5880,38 @@ _mm_cvttss_u64 (__m128 __A) (__v16sf)_mm512_setzero_ps())) static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_permutevar_pd(__m512d __A, __m512i __C) -{ +_mm512_permutevar_pd(__m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) -{ +_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) -{ +_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_permutevar_pd(__A, __C), (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_permutevar_ps(__m512 __A, __m512i __C) -{ +_mm512_permutevar_ps(__m512 __A, __m512i __C) { return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) -{ +_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) -{ +_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_permutevar_ps(__A, __C), (__v16sf)_mm512_setzero_ps()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index e7407bd1c722c..99c057030a4cc 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -5848,64 +5848,56 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) (__v8sf)_mm256_setzero_ps())) static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) -{ +_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) -{ +_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_permutevar_pd(__A, __C), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) -{ +_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) -{ +_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_permutevar_pd(__A, __C), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) -{ +_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) -{ +_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_permutevar_ps(__A, __C), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) -{ +_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) -{ +_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_permutevar_ps(__A, __C), (__v8sf)_mm256_setzero_ps()); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 1ea15b3e68811..44ef88db5cbce 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -788,8 +788,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_permutevar_pd(__m128d __a, __m128i __c) -{ +_mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); } @@ -827,8 +826,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c) /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_permutevar_pd(__m256d __a, __m256i __c) -{ +_mm256_permutevar_pd(__m256d __a, __m256i __c) { return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); } @@ -882,8 +880,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c) /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_permutevar_ps(__m128 __a, __m128i __c) -{ +_mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); } @@ -973,8 +970,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c) /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_permutevar_ps(__m256 __a, __m256i __c) -{ +_mm256_permutevar_ps(__m256 __a, __m256i __c) { return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); } >From 7459dc456239005e395abfe19e67bd870d09922a Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Thu, 20 Nov 2025 11:53:47 +0000 Subject: [PATCH 7/7] format again --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 ++++-- clang/lib/AST/ExprConstant.cpp | 8 +++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 316595b81224c..511b8032b28e6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4639,7 +4639,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned NumElemPerLane = 2; unsigned Lane = DstIdx / NumElemPerLane; unsigned Offset = ShuffleMask & 0b10 ? 1 : 0; - return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); }); case X86::BI__builtin_ia32_vpermilvarps: @@ -4650,7 +4651,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned NumElemPerLane = 4; unsigned Lane = DstIdx / NumElemPerLane; unsigned Offset = ShuffleMask & 0b11; - return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); }); case X86::BI__builtin_ia32_vpermilpd: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 36a37723d75be..5c4e844ece04c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13053,7 +13053,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned NumElemPerLane = 2; unsigned Lane = DstIdx / NumElemPerLane; unsigned Offset = Mask & 0b10 ? 1 : 0; - return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); })) return false; return Success(R, E); @@ -13088,12 +13089,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned NumElemPerLane = 4; unsigned Lane = DstIdx / NumElemPerLane; unsigned Offset = Mask & 0b11; - return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset)); + return std::make_pair( + 0, static_cast<int>(Lane * NumElemPerLane + Offset)); })) return false; return Success(R, E); } - + case X86::BI__builtin_ia32_phminposuw128: { APValue Source; if (!Evaluate(Source, Info, E->getArg(0))) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
