https://github.com/markbhasawut updated https://github.com/llvm/llvm-project/pull/157464
>From 441f060fee2f624e1940f04ade9272cb50b0b33e Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Mon, 8 Sep 2025 15:51:33 +0700 Subject: [PATCH 1/8] [Headers][X86] Enable constexpr handling for MMX/SSE/AVX/AVX512 avg intrinsics Updates the avg builtins to support constant expression handling. --- clang/include/clang/Basic/BuiltinsX86.td | 13 +++++++------ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b4ff550d27279..995708e8374fe 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -93,8 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } let Features = "sse2" in { - def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; - def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; @@ -106,6 +104,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">; + def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">; def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">; } @@ -575,8 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; - def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; - def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; @@ -618,6 +616,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; + def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; + def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">; def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">; @@ -1307,8 +1308,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">; def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">; - def pavgb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; - def pavgw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } @@ -1350,6 +1349,8 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 } let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def pavgb512 : X86Builtin<"_Vector<64, unsigned char>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">; + def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ca930737474df..47a3dfeae24d6 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11694,6 +11694,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case clang::X86::BI__builtin_ia32_pavgb128: + case clang::X86::BI__builtin_ia32_pavgw128: + case clang::X86::BI__builtin_ia32_pavgb256: + case clang::X86::BI__builtin_ia32_pavgw256: + case clang::X86::BI__builtin_ia32_pavgb512: + case clang::X86::BI__builtin_ia32_pavgw512: + return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: >From ddc9acc9a08c71397b6c6928f7a7eccd4db5f11a Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Mon, 8 Sep 2025 09:51:38 +0000 Subject: [PATCH 2/8] [clang][bytecode] Handle ia32_pavg* builtins This PR handles the __builtin_ia32_pavg builtins inside VectorExprEvaluator::VisitCallExpr. --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a0dcdace854b9..b712222b32992 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3299,6 +3299,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case clang::X86::BI__builtin_ia32_pavgb128: + case clang::X86::BI__builtin_ia32_pavgw128: + case clang::X86::BI__builtin_ia32_pavgb256: + case clang::X86::BI__builtin_ia32_pavgw256: + case clang::X86::BI__builtin_ia32_pavgb512: + case clang::X86::BI__builtin_ia32_pavgw512: + return interp__builtin_elementwise_int_binop(S, OpPC, Call, + llvm::APIntOps::avgCeilU); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: >From 09b56548169a86cf234609bb2334c897d51e72ed Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Mon, 8 Sep 2025 21:25:53 +0700 Subject: [PATCH 3/8] Update MMX/SSE/AVX/AVX512 AVG intrinsics to be used in constexpr --- clang/lib/Headers/avx2intrin.h | 10 +++---- clang/lib/Headers/avx512bwintrin.h | 32 +++++++++------------- clang/lib/Headers/avx512vlbwintrin.h | 40 +++++++++++----------------- clang/lib/Headers/emmintrin.h | 8 +++--- clang/lib/Headers/xmmintrin.h | 10 +++---- 5 files changed, 40 insertions(+), 60 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 2cacdc3c4596c..ee2dcd70d6daa 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -496,9 +496,8 @@ _mm256_andnot_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_avg_epu8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_avg_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); } @@ -522,9 +521,8 @@ _mm256_avg_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_avg_epu16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_avg_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 31e0a2242240c..94d02d2557b05 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -690,47 +690,39 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_avg_epu8 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_avg_epu8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_avg_epu16 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_avg_epu16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi) _mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 846cda67bce3f..02d44527895fc 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -795,65 +795,57 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index a366e0df407a9..1a631db6705bd 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2247,8 +2247,8 @@ _mm_adds_epu16(__m128i __a, __m128i __b) { /// A 128-bit unsigned [16 x i8] vector. /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded /// averages of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_avg_epu8(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); } @@ -2266,8 +2266,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, /// A 128-bit unsigned [8 x i16] vector. /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded /// averages of both parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_avg_epu16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 4b52904315451..b4d2a2386fd08 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2539,9 +2539,8 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_avg_pu8(__m64 __a, __m64 __b) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_avg_pu8(__m64 __a, __m64 __b) { return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a), (__v16qi)__anyext128(__b))); } @@ -2559,9 +2558,8 @@ _mm_avg_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_avg_pu16(__m64 __a, __m64 __b) -{ +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_avg_pu16(__m64 __a, __m64 __b) { return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a), (__v8hi)__anyext128(__b))); } >From b4e63e12560dd549a1b983f0c2ac28f7c20e101e Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Tue, 9 Sep 2025 00:47:03 +0700 Subject: [PATCH 4/8] Convert builtin types to unsigned to reflect changes --- clang/lib/Headers/avx2intrin.h | 4 +-- clang/lib/Headers/avx512bwintrin.h | 24 ++++++++---------- clang/lib/Headers/avx512vlbwintrin.h | 38 +++++++++++++--------------- clang/lib/Headers/emmintrin.h | 4 +-- clang/lib/Headers/xmmintrin.h | 8 +++--- 5 files changed, 36 insertions(+), 42 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index ee2dcd70d6daa..fc12a9bf15e57 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -498,7 +498,7 @@ _mm256_andnot_si256(__m256i __a, __m256i __b) /// \returns A 256-bit integer vector containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_avg_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_ia32_pavgb256((__v32qu)__a, (__v32qu)__b); } /// Computes the averages of the corresponding unsigned 16-bit integers in @@ -523,7 +523,7 @@ _mm256_avg_epu8(__m256i __a, __m256i __b) { /// \returns A 256-bit vector of [16 x i16] containing the result. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_avg_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_ia32_pavgw256((__v16hu)__a, (__v16hu)__b); } /// Merges 8-bit integer values from either of the two 256-bit vectors diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 94d02d2557b05..a4b34835e8363 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -692,40 +692,38 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_avg_epu8(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_ia32_pavgb512((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, - (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)__W); + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qu)_mm512_avg_epu8(__A, __B), (__v64qu)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, - (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)_mm512_setzero_si512()); + (__v64qu)_mm512_avg_epu8(__A, __B), + (__v64qu)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_avg_epu16(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_ia32_pavgw512((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi)__W); + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hu)_mm512_avg_epu16(__A, __B), (__v32hu)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, - (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi) _mm512_setzero_si512()); + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hu)_mm512_avg_epu16(__A, __B), + (__v32hu)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 02d44527895fc..13fe0abc480e0 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -797,58 +797,54 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, - (__v16qi)_mm_avg_epu8(__A, __B), - (__v16qi)__W); + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qu)_mm_avg_epu8(__A, __B), (__v16qu)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, - (__v16qi)_mm_avg_epu8(__A, __B), - (__v16qi)_mm_setzero_si128()); + (__v16qu)_mm_avg_epu8(__A, __B), + (__v16qu)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, - (__v32qi)_mm256_avg_epu8(__A, __B), - (__v32qi)__W); + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qu)_mm256_avg_epu8(__A, __B), (__v32qu)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, - (__v32qi)_mm256_avg_epu8(__A, __B), - (__v32qi)_mm256_setzero_si256()); + (__v32qu)_mm256_avg_epu8(__A, __B), + (__v32qu)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm_avg_epu16(__A, __B), - (__v8hi)__W); + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hu)_mm_avg_epu16(__A, __B), (__v8hu)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm_avg_epu16(__A, __B), - (__v8hi)_mm_setzero_si128()); + (__v8hu)_mm_avg_epu16(__A, __B), + (__v8hu)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_avg_epu16(__A, __B), - (__v16hi)__W); + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hu)_mm256_avg_epu16(__A, __B), (__v16hu)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm256_avg_epu16(__A, __B), - (__v16hi)_mm256_setzero_si256()); + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hu)_mm256_avg_epu16(__A, __B), + (__v16hu)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 1a631db6705bd..60d80195d60e7 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2249,7 +2249,7 @@ _mm_adds_epu16(__m128i __a, __m128i __b) { /// averages of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_avg_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); + return (__m128i)__builtin_ia32_pavgb128((__v16qu)__a, (__v16qu)__b); } /// Computes the rounded averages of corresponding elements of two @@ -2268,7 +2268,7 @@ _mm_avg_epu8(__m128i __a, __m128i __b) { /// averages of both parameters. static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_avg_epu16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_ia32_pavgw128((__v8hu)__a, (__v8hu)__b); } /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index b4d2a2386fd08..d45fd31dc4430 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2541,8 +2541,8 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) /// \returns A 64-bit integer vector containing the averages of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_avg_pu8(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); + return __trunc64(__builtin_ia32_pavgb128((__v16qu)__anyext128(__a), + (__v16qu)__anyext128(__b))); } /// Computes the rounded averages of the packed unsigned 16-bit integer @@ -2560,8 +2560,8 @@ _mm_avg_pu8(__m64 __a, __m64 __b) { /// \returns A 64-bit integer vector containing the averages of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_avg_pu16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); + return __trunc64(__builtin_ia32_pavgw128((__v8hu)__anyext128(__a), + (__v8hu)__anyext128(__b))); } /// Subtracts the corresponding 8-bit unsigned integer values of the two >From 6422433419ebd511920f3a0e91b96d20376fd172 Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Tue, 9 Sep 2025 00:58:41 +0700 Subject: [PATCH 5/8] Add typedef for __v16qu in xmmintrin.h --- clang/lib/Headers/xmmintrin.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d45fd31dc4430..eb941dfc419c0 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -24,6 +24,7 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); typedef unsigned short __v8hu __attribute__((__vector_size__(16))); +typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* This header should only be included in a hosted environment as it depends on * a standard library to provide allocation routines. */ >From eb312ad712feed0a56e645160d90adbe8b77663c Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Tue, 9 Sep 2025 15:31:36 +0700 Subject: [PATCH 6/8] Add constexpr tests for MMX/SSE/AVX/AVX512 AVG intrinsics --- clang/test/CodeGen/X86/avx2-builtins.c | 2 ++ clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 16 ++++++++++++++++ clang/test/CodeGen/X86/mmx-builtins.c | 2 ++ clang/test/CodeGen/X86/sse2-builtins.c | 2 ++ 5 files changed, 34 insertions(+) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 724a5f693f9fe..aeb1aee4ea946 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -128,12 +128,14 @@ __m256i test_mm256_avg_epu8(__m256i a, __m256i b) { // CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_avg_epu8(a, b); } +TEST_CONSTEXPR(match_v32qu(_mm256_avg_epu8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); __m256i test_mm256_avg_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_avg_epu16 // CHECK: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_avg_epu16(a, b); } +TEST_CONSTEXPR(match_v16hu(_mm256_avg_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); // FIXME: We should also lower the __builtin_ia32_pblendw128 (and similar) // functions to this IR. In the future we could delete the corresponding diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 1f67a9e4d2e53..74cd448093c16 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1060,35 +1060,47 @@ __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.pavg.b.512 return _mm512_avg_epu8(__A,__B); } +TEST_CONSTEXPR(match_v64qu(_mm512_avg_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64)); + __m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_avg_epu8 // CHECK: @llvm.x86.avx512.pavg.b.512 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v64qu(_mm512_mask_avg_epu8(0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu8 // CHECK: @llvm.x86.avx512.pavg.b.512 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v64qu(_mm512_maskz_avg_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 return _mm512_avg_epu16(__A,__B); } +TEST_CONSTEXPR(match_v32hu(_mm512_avg_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); + __m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v32hu(_mm512_mask_avg_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu16 // CHECK: @llvm.x86.avx512.pavg.w.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v32hu(_mm512_maskz_avg_epu16(0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epi8 // CHECK: [[RES:%.*]] = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %{{.*}}, <64 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index cadfd462a1e6d..f29dfdde6e7e2 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -1206,48 +1206,64 @@ __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i _ // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v16qu(_mm_mask_avg_epu8((__m128i)(__v16qu){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m128i)(__v16qu){0, 1, 2, 3, 180, 150, 120, 200, 255, 254, 253, 252, 100, 50, 25, 0}, (__m128u)(__v16qu){0, 1, 2, 3, 160, 30, 200, 10, 0, 1, 2, 3, 200, 150, 100, 50}), 0, 1, 2, 3, 170, 90, 160, 105, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu8 // CHECK: @llvm.x86.sse2.pavg.b // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v16qu(_mm_maskz_avg_epu8(0x00FF, (__m128i)(__v16qu){0, 1, 2, 3, 180, 150, 120, 200, 255, 254, 253, 252, 100, 50, 25, 0}, (__m128i)(__v16qu){0, 1, 2, 3, 160, 30, 200, 10, 0, 1, 2, 3, 200, 150, 100, 50}), 0, 1, 2, 3, 170, 90, 160, 105, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu8 // CHECK: @llvm.x86.avx2.pavg.b // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_avg_epu8(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v32qu(_mm256_mask_avg_epu8((__m256i)(__v32qu){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m256i)(__v32qu){0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 127, 127, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 192, 192, 192, 192, 192, 192, 255, 255, 255, 255}, (__m256i)(__v32qu){0, 127, 128, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 127, 128, 255}), 0, 63, 64, 127, 32, 64, 95, 96, 128, 191, 63, 95, 127, 191, 191, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu8 // CHECK: @llvm.x86.avx2.pavg.b // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_avg_epu8(__U,__A,__B); } +TEST_CONSTEXPR(match_v32qu(_mm256_maskz_avg_epu8(0x0000FFFF, (__m256i)(__v32qu){0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 127, 127, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 192, 192, 192, 192, 192, 192, 255, 255, 255, 255}, (__m256i)(__v32qu){0, 127, 128, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 127, 128, 255}), 0, 63, 64, 127, 32, 64, 95, 96, 128, 191, 63, 95, 127, 191, 191, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_avg_epu16 // CHECK: @llvm.x86.sse2.pavg.w // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v8hu(_mm_mask_avg_epu16((__m128i)(__v8hu){0, 1, 2, 3, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hu){0, 1, 2, 3, 180, 150, 120, 200}, (__m128i)(__v8hu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 0, 0, 0, 0)); + __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu16 // CHECK: @llvm.x86.sse2.pavg.w // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v8hu(_mm_maskz_avg_epu16(0x0F, (__m128i)(__v8hu){0, 1, 2, 3, 180, 150, 120, 200}, (__m128i)(__v8hu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 0, 0, 0, 0)); + __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu16 // CHECK: @llvm.x86.avx2.pavg.w // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_avg_epu16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v16hu(_mm256_maskz_avg_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu16 // CHECK: @llvm.x86.avx2.pavg.w // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_avg_epu16(__U,__A,__B); } +TEST_CONSTEXPR(match_v16hu(_mm256_maskz_avg_epu16(0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); + __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epi8 // CHECK: [[RES:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index b19e82383cbfd..d61c0701adc3a 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -123,12 +123,14 @@ __m64 test_mm_avg_pu8(__m64 a, __m64 b) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b( return _mm_avg_pu8(a, b); } +TEST_CONSTEXPR(match_v8qu(_mm_avg_pu8((__m64)(__v8qu){0, 1, 2, 3, 180, 150, 120, 200}, (__m64)(__v8qu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 170, 90, 160, 105)); __m64 test_mm_avg_pu16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_avg_pu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w( return _mm_avg_pu16(a, b); } +TEST_CONSTEXPR(match_v4hu(_mm_avg_pu16((__m64)(__v4hu){0, 1, 2, 3}, (__m64)(__v4u){0, 1, 2, 3}), 0, 1, 2, 3)); __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpeq_pi8 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index f5de5069c0046..18a3ba6267a1f 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -134,12 +134,14 @@ __m128i test_mm_avg_epu8(__m128i A, __m128i B) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_avg_epu8(A, B); } +TEST_CONSTEXPR(match_v16qu(_mm_avg_epu8((__m128i)(__v16qu){0, 1, 2, 3, 180, 150, 120, 200, 255, 254, 253, 252, 100, 50, 25, 0}, (__m128i)(__v16qu){0, 1, 2, 3, 160, 30, 200, 10, 0, 1, 2, 3, 200, 150, 100, 50}), 0, 1, 2, 3, 170, 90, 160, 105, 128, 128, 128, 128, 150, 100, 62, 25)); __m128i test_mm_avg_epu16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_avg_epu16(A, B); } +TEST_CONSTEXPR(match_v8hu(_mm_avg_epu16((__m128i)(__v8hu){0, 1, 2, 3, 180, 150, 120, 200}, (__m128i)(__v8hu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 170, 90, 160, 105)); __m128i test_mm_bslli_si128(__m128i A) { // CHECK-LABEL: test_mm_bslli_si128 >From 825ab60e27a0714d7c64b2421f90be354953e743 Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Tue, 9 Sep 2025 15:59:30 +0700 Subject: [PATCH 7/8] Remove redundant typedef for __v16qu in emmintrin.h --- clang/lib/Headers/emmintrin.h | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 60d80195d60e7..e4fbe011239d6 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -27,7 +27,6 @@ typedef double __v2df __attribute__((__vector_size__(16))); /* Unsigned types */ typedef unsigned long long __v2du __attribute__((__vector_size__(16))); -typedef unsigned char __v16qu __attribute__((__vector_size__(16))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ >From 35b0d7ae6b9a2f256ff33507349e24ced14bd649 Mon Sep 17 00:00:00 2001 From: Bhasawut Singhaphan <bhasa...@gmail.com> Date: Tue, 9 Sep 2025 16:37:05 +0700 Subject: [PATCH 8/8] misc: Correct builtin types for mask/maskz variant and some tests fixes --- clang/lib/Headers/avx512bwintrin.h | 12 +++++----- clang/lib/Headers/avx512vlbwintrin.h | 24 ++++++++++---------- clang/test/CodeGen/X86/avx512bw-builtins.c | 8 +++---- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 16 ++++++------- clang/test/CodeGen/X86/mmx-builtins.c | 4 ++-- clang/test/CodeGen/X86/sse2-builtins.c | 4 ++-- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index a4b34835e8363..8882048cdd07b 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -698,14 +698,14 @@ _mm512_avg_epu8(__m512i __A, __m512i __B) { static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qu)_mm512_avg_epu8(__A, __B), (__v64qu)__W); + (__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, - (__v64qu)_mm512_avg_epu8(__A, __B), - (__v64qu)_mm512_setzero_si512()); + (__v64qi)_mm512_avg_epu8(__A, __B), + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR @@ -716,14 +716,14 @@ _mm512_avg_epu16(__m512i __A, __m512i __B) { static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512( - (__mmask32)__U, (__v32hu)_mm512_avg_epu16(__A, __B), (__v32hu)__W); + (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512( - (__mmask32)__U, (__v32hu)_mm512_avg_epu16(__A, __B), - (__v32hu)_mm512_setzero_si512()); + (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 13fe0abc480e0..7067209f09cdf 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -798,53 +798,53 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qu)_mm_avg_epu8(__A, __B), (__v16qu)__W); + (__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, - (__v16qu)_mm_avg_epu8(__A, __B), - (__v16qu)_mm_setzero_si128()); + (__v16qi)_mm_avg_epu8(__A, __B), + (__v16qi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask32)__U, (__v32qu)_mm256_avg_epu8(__A, __B), (__v32qu)__W); + (__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, - (__v32qu)_mm256_avg_epu8(__A, __B), - (__v32qu)_mm256_setzero_si256()); + (__v32qi)_mm256_avg_epu8(__A, __B), + (__v32qi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128( - (__mmask8)__U, (__v8hu)_mm_avg_epu16(__A, __B), (__v8hu)__W); + (__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hu)_mm_avg_epu16(__A, __B), - (__v8hu)_mm_setzero_si128()); + (__v8hi)_mm_avg_epu16(__A, __B), + (__v8hi)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256( - (__mmask16)__U, (__v16hu)_mm256_avg_epu16(__A, __B), (__v16hu)__W); + (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256( - (__mmask16)__U, (__v16hu)_mm256_avg_epu16(__A, __B), - (__v16hu)_mm256_setzero_si256()); + (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), + (__v16hi)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 74cd448093c16..053302ae39dee 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1068,7 +1068,7 @@ __m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_avg_epu8(__W,__U,__A,__B); } -TEST_CONSTEXPR(match_v64qu(_mm512_mask_avg_epu8(0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v64qi(_mm512_mask_avg_epu8((__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00000000FFFFFFFF, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu8 @@ -1076,7 +1076,7 @@ __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_avg_epu8(__U,__A,__B); } -TEST_CONSTEXPR(match_v64qu(_mm512_maskz_avg_epu8((__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00000000FFFFFFFF, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_avg_epu8(0x00000000FFFFFFFF, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}, (__m512i)(__v64qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_avg_epu16 @@ -1091,7 +1091,7 @@ __m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m51 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_avg_epu16(__W,__U,__A,__B); } -TEST_CONSTEXPR(match_v32hu(_mm512_mask_avg_epu16((__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_mask_avg_epu16((__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_avg_epu16 @@ -1099,7 +1099,7 @@ __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_avg_epu16(__U,__A,__B); } -TEST_CONSTEXPR(match_v32hu(_mm512_maskz_avg_epu16(0x0000FFFF, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_avg_epu16(0x0000FFFF, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m512i)(__v32hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_max_epi8 diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index f29dfdde6e7e2..ffa6bc30adf4d 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -1206,7 +1206,7 @@ __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i _ // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_avg_epu8(__W,__U,__A,__B); } -TEST_CONSTEXPR(match_v16qu(_mm_mask_avg_epu8((__m128i)(__v16qu){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m128i)(__v16qu){0, 1, 2, 3, 180, 150, 120, 200, 255, 254, 253, 252, 100, 50, 25, 0}, (__m128u)(__v16qu){0, 1, 2, 3, 160, 30, 200, 10, 0, 1, 2, 3, 200, 150, 100, 50}), 0, 1, 2, 3, 170, 90, 160, 105, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16qi(_mm_mask_avg_epu8((__m128i)(__v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu8 @@ -1214,7 +1214,7 @@ __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_avg_epu8(__U,__A,__B); } -TEST_CONSTEXPR(match_v16qu(_mm_maskz_avg_epu8(0x00FF, (__m128i)(__v16qu){0, 1, 2, 3, 180, 150, 120, 200, 255, 254, 253, 252, 100, 50, 25, 0}, (__m128i)(__v16qu){0, 1, 2, 3, 160, 30, 200, 10, 0, 1, 2, 3, 200, 150, 100, 50}), 0, 1, 2, 3, 170, 90, 160, 105, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16qi(_mm_maskz_avg_epu8(0x00FF, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu8 @@ -1222,7 +1222,7 @@ __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256 // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_avg_epu8(__W,__U,__A,__B); } -TEST_CONSTEXPR(match_v32qu(_mm256_mask_avg_epu8((__m256i)(__v32qu){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m256i)(__v32qu){0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 127, 127, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 192, 192, 192, 192, 192, 192, 255, 255, 255, 255}, (__m256i)(__v32qu){0, 127, 128, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 127, 128, 255}), 0, 63, 64, 127, 32, 64, 95, 96, 128, 191, 63, 95, 127, 191, 191, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32qi(_mm256_mask_avg_epu8((__m256i)(__v32qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu8 @@ -1230,7 +1230,7 @@ __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_avg_epu8(__U,__A,__B); } -TEST_CONSTEXPR(match_v32qu(_mm256_maskz_avg_epu8(0x0000FFFF, (__m256i)(__v32qu){0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 127, 127, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 192, 192, 192, 192, 192, 192, 255, 255, 255, 255}, (__m256i)(__v32qu){0, 127, 128, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 64, 127, 128, 192, 255, 0, 127, 128, 255}), 0, 63, 64, 127, 32, 64, 95, 96, 128, 191, 63, 95, 127, 191, 191, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_avg_epu8(0x0000FFFF, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_avg_epu16 @@ -1238,7 +1238,7 @@ __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i _ // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_avg_epu16(__W,__U,__A,__B); } -TEST_CONSTEXPR(match_v8hu(_mm_mask_avg_epu16((__m128i)(__v8hu){0, 1, 2, 3, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hu){0, 1, 2, 3, 180, 150, 120, 200}, (__m128i)(__v8hu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_avg_epu16((__m128i)(__v8hi){0, 1, 2, 3, 0, 0, 0, 0}, 0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 0, 0, 0, 0)); __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_avg_epu16 @@ -1246,7 +1246,7 @@ __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_avg_epu16(__U,__A,__B); } -TEST_CONSTEXPR(match_v8hu(_mm_maskz_avg_epu16(0x0F, (__m128i)(__v8hu){0, 1, 2, 3, 180, 150, 120, 200}, (__m128i)(__v8hu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_avg_epu16(0x0F, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 0, 0, 0, 0)); __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_avg_epu16 @@ -1254,7 +1254,7 @@ __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m25 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_avg_epu16(__W,__U,__A,__B); } -TEST_CONSTEXPR(match_v16hu(_mm256_maskz_avg_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_avg_epu16((__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0}, 0x00FF, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_avg_epu16 @@ -1262,7 +1262,7 @@ __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_avg_epu16(__U,__A,__B); } -TEST_CONSTEXPR(match_v16hu(_mm256_maskz_avg_epu16(0x00FF, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_avg_epu16(0x00FF, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_max_epi8 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index d61c0701adc3a..4bfb0bc831eff 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -123,14 +123,14 @@ __m64 test_mm_avg_pu8(__m64 a, __m64 b) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b( return _mm_avg_pu8(a, b); } -TEST_CONSTEXPR(match_v8qu(_mm_avg_pu8((__m64)(__v8qu){0, 1, 2, 3, 180, 150, 120, 200}, (__m64)(__v8qu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 170, 90, 160, 105)); +TEST_CONSTEXPR(match_v8qu(_mm_avg_pu8((__m64)(__v8qu){0, 1, 2, 3, 18, 15, 12, 20}, (__m64)(__v8qu){0, 1, 2, 3, 16, 3, 20, 10}), 0, 1, 2, 3, 17, 9, 16, 15)); __m64 test_mm_avg_pu16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_avg_pu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w( return _mm_avg_pu16(a, b); } -TEST_CONSTEXPR(match_v4hu(_mm_avg_pu16((__m64)(__v4hu){0, 1, 2, 3}, (__m64)(__v4u){0, 1, 2, 3}), 0, 1, 2, 3)); +TEST_CONSTEXPR(match_v4hu(_mm_avg_pu16((__m64)(__v4hu){0, 1, 2, 3}, (__m64)(__vh4u){0, 1, 2, 3}), 0, 1, 2, 3)); __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpeq_pi8 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 18a3ba6267a1f..0ba32bb230cdd 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -134,14 +134,14 @@ __m128i test_mm_avg_epu8(__m128i A, __m128i B) { // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_avg_epu8(A, B); } -TEST_CONSTEXPR(match_v16qu(_mm_avg_epu8((__m128i)(__v16qu){0, 1, 2, 3, 180, 150, 120, 200, 255, 254, 253, 252, 100, 50, 25, 0}, (__m128i)(__v16qu){0, 1, 2, 3, 160, 30, 200, 10, 0, 1, 2, 3, 200, 150, 100, 50}), 0, 1, 2, 3, 170, 90, 160, 105, 128, 128, 128, 128, 150, 100, 62, 25)); +TEST_CONSTEXPR(match_v16qu(_mm_avg_epu8((__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); __m128i test_mm_avg_epu16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu16 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_avg_epu16(A, B); } -TEST_CONSTEXPR(match_v8hu(_mm_avg_epu16((__m128i)(__v8hu){0, 1, 2, 3, 180, 150, 120, 200}, (__m128i)(__v8hu){0, 1, 2, 3, 160, 30, 200, 10}), 0, 1, 2, 3, 170, 90, 160, 105)); +TEST_CONSTEXPR(match_v8hu(_mm_avg_epu16((__m128i)(__v8hu){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hu){1, 2, 3, 4, 5, 6, 7, 8}), 1, 2, 3, 4, 5, 6, 7, 8)); __m128i test_mm_bslli_si128(__m128i A) { // CHECK-LABEL: test_mm_bslli_si128 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits