https://github.com/hhy3 created https://github.com/llvm/llvm-project/pull/187656
#163732 >From efe264e92ac06abf360c2c0709d6a55a0e658af3 Mon Sep 17 00:00:00 2001 From: zh Wang <[email protected]> Date: Fri, 20 Mar 2026 15:47:02 +0800 Subject: [PATCH] [Clang][X86] Support constexpr for AVX512 compress intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 30 ++++----- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 51 +++++++++++++++ clang/lib/AST/ExprConstant.cpp | 40 ++++++++++++ clang/lib/Headers/avx512fintrin.h | 40 +++++------- clang/lib/Headers/avx512vbmi2intrin.h | 20 +++--- clang/lib/Headers/avx512vlintrin.h | 64 +++++++++---------- clang/lib/Headers/avx512vlvbmi2intrin.h | 40 +++++------- clang/test/CodeGen/X86/avx512f-builtins.c | 11 +++- clang/test/CodeGen/X86/avx512vbmi2-builtins.c | 5 ++ clang/test/CodeGen/X86/avx512vl-builtins.c | 22 ++++++- .../test/CodeGen/X86/avx512vlvbmi2-builtins.c | 10 +++ 11 files changed, 223 insertions(+), 110 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 09b4d1c9970fd..4cadd570cc6c4 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1365,51 +1365,51 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def subsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def compressdf128_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def compressdf256_mask : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def compressdi128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def compressdi256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, unsigned char)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def compresshi128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, unsigned char)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def compresshi256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, unsigned short)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def compressqi128_mask : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, unsigned short)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def compressqi256_mask : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, unsigned int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def compresssf128_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def compresssf256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def compresssi128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def compresssi256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, unsigned char)">; } @@ -3210,17 +3210,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 def psadbw512 : X86Builtin<"_Vector<8, long long int>(_Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def compressdf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">; def compressdi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">; } -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def compresshi512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, unsigned int)">; def compressqi512_mask : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def compresssf512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, unsigned short)">; def compresssi512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, unsigned short)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index abc746af81306..5a9a8e7ae95b7 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3550,6 +3550,38 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_compress(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 3); + + APSInt Mask = popToAPSInt(S, Call->getArg(2)); + const Pointer &Passthru = S.Stk.pop<Pointer>(); + const Pointer &Source = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + unsigned NumElems = Source.getNumElems(); + PrimType ElemT = Source.getFieldDesc()->getPrimType(); + + unsigned J = 0; + for (unsigned I = 0; I != NumElems; ++I) { + if (Mask[I]) { + if (ElemT == PT_Float) + Dst.elem<Floating>(J) = Source.elem<Floating>(I); + else + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(J) = Source.elem<T>(I); }); + ++J; + } + } + for (unsigned I = J; I != NumElems; ++I) { + if (ElemT == PT_Float) + Dst.elem<Floating>(I) = Passthru.elem<Floating>(I); + else + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(I) = Passthru.elem<T>(I); }); + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID) { @@ -5112,6 +5144,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vpconflictdi_256: case X86::BI__builtin_ia32_vpconflictdi_512: return interp__builtin_ia32_vpconflict(S, OpPC, Call); + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + return interp__builtin_ia32_compress(S, OpPC, Call); case clang::X86::BI__builtin_ia32_blendpd: case clang::X86::BI__builtin_ia32_blendpd256: case clang::X86::BI__builtin_ia32_blendps: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 429fef0a1afa8..b459d4043e13d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12870,6 +12870,46 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: { + APValue Source, Passthru; + if (!EvaluateAsRValue(Info, E->getArg(0), Source) || + !EvaluateAsRValue(Info, E->getArg(1), Passthru)) + return false; + APSInt Mask; + if (!EvaluateInteger(E->getArg(2), Mask, Info)) + return false; + + unsigned NumElts = Source.getVectorLength(); + SmallVector<APValue, 64> ResultElements; + ResultElements.reserve(NumElts); + + for (unsigned I = 0; I != NumElts; ++I) { + if (Mask[I]) + ResultElements.push_back(Source.getVectorElt(I)); + } + for (unsigned I = ResultElements.size(); I != NumElts; ++I) { + ResultElements.push_back(Passthru.getVectorElt(I)); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case X86::BI__builtin_ia32_vpconflictsi_128: case X86::BI__builtin_ia32_vpconflictsi_256: case X86::BI__builtin_ia32_vpconflictsi_512: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 51f8369296e53..a5d2f1726cff4 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8096,68 +8096,60 @@ _mm512_stream_ps (void *__P, __m512 __A) __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, (__v8df) __W, (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_compress_pd(__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, (__v8df) _mm512_setzero_pd (), (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, (__v8di) __W, (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, (__v16sf) __W, (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_compress_ps(__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, (__v16sf) _mm512_setzero_ps (), (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, (__v16si) __W, (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, (__v16si) _mm512_setzero_si512 (), diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index a24b6e592197c..380d1eeb5c38b 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -25,33 +25,29 @@ #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, (__v32hi) __S, __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, (__v32hi) _mm512_setzero_si512(), __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, (__v64qi) __S, __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, (__v64qi) _mm512_setzero_si512(), __U); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index ea43046240cc0..03d132839d9a9 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -1530,120 +1530,120 @@ _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) { (__v4di) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) _mm_setzero_pd (), (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) _mm_setzero_si128 (), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) _mm256_setzero_si256 (), diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index da295d2a12e62..7fa3a9ad31e82 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -32,33 +32,29 @@ #define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, (__v8hi) __S, __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, (__v8hi) _mm_setzero_si128(), __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, (__v16qi) __S, __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, (__v16qi) _mm_setzero_si128(), __U); @@ -142,33 +138,29 @@ _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, (__v16hi) __S, __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, (__v16hi) _mm256_setzero_si256(), __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, (__v32qi) __S, __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, (__v32qi) _mm256_setzero_si256(), __U); diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index e67e21daaa329..b457fd7c93b58 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9732,9 +9732,18 @@ __m512i test_mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A) __m512i test_mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_compress_epi32 // CHECK: @llvm.x86.avx512.mask.compress - return _mm512_maskz_compress_epi32(__U, __A); + return _mm512_maskz_compress_epi32(__U, __A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_compress_pd((__m512d){99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0}, 0xB4, (__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 2.0, 4.0, 5.0, 7.0, 99.0, 99.0, 99.0, 99.0)); +TEST_CONSTEXPR(match_m512d(_mm512_maskz_compress_pd(0xB4, (__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 2.0, 4.0, 5.0, 7.0, 0.0, 0.0, 0.0, 0.0)); +TEST_CONSTEXPR(match_v8di(_mm512_mask_compress_epi64((__m512i)(__v8di){99, 99, 99, 99, 99, 99, 99, 99}, 0xB4, (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), 12, 14, 15, 17, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v8di(_mm512_maskz_compress_epi64(0xB4, (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}), 12, 14, 15, 17, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_m512(_mm512_mask_compress_ps((__m512){99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0xA635, (__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}), 0.0f, 2.0f, 4.0f, 5.0f, 9.0f, 10.0f, 13.0f, 15.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f)); +TEST_CONSTEXPR(match_m512(_mm512_maskz_compress_ps(0xA635, (__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}), 0.0f, 2.0f, 4.0f, 5.0f, 9.0f, 10.0f, 13.0f, 15.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); +TEST_CONSTEXPR(match_v16si(_mm512_mask_compress_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xA635, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 2, 4, 5, 9, 10, 13, 15, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_compress_epi32(0xA635, (__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 2, 4, 5, 9, 10, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0)); + __mmask8 test_mm_cmp_round_ss_mask(__m128 __X, __m128 __Y) { // CHECK-LABEL: test_mm_cmp_round_ss_mask // CHECK: @llvm.x86.avx512.mask.cmp diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index 560035598a6e4..22109489c1eba 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -30,6 +30,11 @@ __m512i test_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) { return _mm512_maskz_compress_epi8(__U, __D); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_compress_epi16((__m512i)(__v32hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x80000007, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 31, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_compress_epi16(0x80000007, (__m512i)(__v32hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 2, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v64qi(_mm512_mask_compress_epi8((__m512i)(__v64qs){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x8000000000000003ULL, (__m512i)(__v64qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 63, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_compress_epi8(0x8000000000000003ULL, (__m512i)(__v64qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 1, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + void test_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) { // CHECK-LABEL: test_mm512_mask_compressstoreu_epi16 // CHECK: call void @llvm.masked.compressstore.v32i16(<32 x i16> %{{.*}}, ptr %{{.*}}, <32 x i1> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index cccd67927a286..c5abf69a72c51 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -3856,8 +3856,26 @@ __m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) { __m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_compress_epi32 // CHECK: @llvm.x86.avx512.mask.compress - return _mm256_maskz_compress_epi32(__U,__A); -} + return _mm256_maskz_compress_epi32(__U,__A); +} + +TEST_CONSTEXPR(match_m128d(_mm_mask_compress_pd((__m128d){99.0, 99.0}, 0x2, (__m128d){1.0, 2.0}), 2.0, 99.0)); +TEST_CONSTEXPR(match_m128d(_mm_maskz_compress_pd(0x2, (__m128d){1.0, 2.0}), 2.0, 0.0)); +TEST_CONSTEXPR(match_m256d(_mm256_mask_compress_pd((__m256d){99.0, 99.0, 99.0, 99.0}, 0xA, (__m256d){1.0, 2.0, 3.0, 4.0}), 2.0, 4.0, 99.0, 99.0)); +TEST_CONSTEXPR(match_m256d(_mm256_maskz_compress_pd(0xA, (__m256d){1.0, 2.0, 3.0, 4.0}), 2.0, 4.0, 0.0, 0.0)); +TEST_CONSTEXPR(match_v2di(_mm_mask_compress_epi64((__m128i)(__v2di){99, 99}, 0x1, (__m128i)(__v2di){10, 20}), 10, 99)); +TEST_CONSTEXPR(match_v2di(_mm_maskz_compress_epi64(0x1, (__m128i)(__v2di){10, 20}), 10, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_mask_compress_epi64((__m256i)(__v4di){99, 99, 99, 99}, 0x6, (__m256i)(__v4di){10, 20, 30, 40}), 20, 30, 99, 99)); +TEST_CONSTEXPR(match_v4di(_mm256_maskz_compress_epi64(0x6, (__m256i)(__v4di){10, 20, 30, 40}), 20, 30, 0, 0)); +TEST_CONSTEXPR(match_m128(_mm_mask_compress_ps((__m128){99.0f, 99.0f, 99.0f, 99.0f}, 0x5, (__m128){1.0f, 2.0f, 3.0f, 4.0f}), 1.0f, 3.0f, 99.0f, 99.0f)); +TEST_CONSTEXPR(match_m128(_mm_maskz_compress_ps(0x5, (__m128){1.0f, 2.0f, 3.0f, 4.0f}), 1.0f, 3.0f, 0.0f, 0.0f)); +TEST_CONSTEXPR(match_m256(_mm256_mask_compress_ps((__m256){99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f}, 0xA5, (__m256){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), 0.0f, 2.0f, 5.0f, 7.0f, 99.0f, 99.0f, 99.0f, 99.0f)); +TEST_CONSTEXPR(match_m256(_mm256_maskz_compress_ps(0xA5, (__m256){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), 0.0f, 2.0f, 5.0f, 7.0f, 0.0f, 0.0f, 0.0f, 0.0f)); +TEST_CONSTEXPR(match_v4si(_mm_mask_compress_epi32((__m128i)(__v4si){99, 99, 99, 99}, 0x9, (__m128i)(__v4si){10, 20, 30, 40}), 10, 40, 99, 99)); +TEST_CONSTEXPR(match_v4si(_mm_maskz_compress_epi32(0x9, (__m128i)(__v4si){10, 20, 30, 40}), 10, 40, 0, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_mask_compress_epi32((__m256i)(__v8si){99, 99, 99, 99, 99, 99, 99, 99}, 0xA5, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}), 0, 2, 5, 7, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_compress_epi32(0xA5, (__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}), 0, 2, 5, 7, 0, 0, 0, 0)); + void test_mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_compressstoreu_pd // CHECK: @llvm.masked.compressstore.v2f64(<2 x double> %{{.*}}, ptr %{{.*}}, <2 x i1> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c index e1e8578ea414a..fc120a4bc6b26 100644 --- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c @@ -30,6 +30,11 @@ __m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { return _mm_maskz_compress_epi8(__U, __D); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_compress_epi16((__m128i)(__v8hi){99, 99, 99, 99, 99, 99, 99, 99}, 0xA5, (__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}), 0, 2, 5, 7, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_compress_epi16(0xA5, (__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}), 0, 2, 5, 7, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16qi(_mm_mask_compress_epi8((__m128i)(__v16qs){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x8003, (__m128i)(__v16qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v16qi(_mm_maskz_compress_epi8(0x8003, (__m128i)(__v16qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + void test_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) { // CHECK-LABEL: test_mm_mask_compressstoreu_epi16 // CHECK: call void @llvm.masked.compressstore.v8i16(<8 x i16> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}}) @@ -114,6 +119,11 @@ __m256i test_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { return _mm256_maskz_compress_epi8(__U, __D); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_compress_epi16((__m256i)(__v16hi){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x8003, (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_compress_epi16(0x8003, (__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 0, 1, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32qi(_mm256_mask_compress_epi8((__m256i)(__v32qs){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x80000003, (__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 31, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_compress_epi8(0x80000003, (__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 1, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + void test_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) { // CHECK-LABEL: test_mm256_mask_compressstoreu_epi16 // CHECK: call void @llvm.masked.compressstore.v16i16(<16 x i16> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}}) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
