Author: ctopper Date: Wed Jan 16 14:34:33 2019 New Revision: 351390 URL: http://llvm.org/viewvc/llvm-project?rev=351390&view=rev Log: [X86] Add versions of the avx512 gather intrinsics that take the mask as a vXi1 vector instead of a scalar
We need to custom handle these so we can turn the scalar mask into a vXi1 vector. Differential Revision: https://reviews.llvm.org/D56530 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=351390&r1=351389&r2=351390&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Jan 16 14:34:33 2019 @@ -10073,6 +10073,114 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + IID = Intrinsic::x86_avx512_mask_gather3div2_df; + break; + case X86::BI__builtin_ia32_gather3div2di: + IID = Intrinsic::x86_avx512_mask_gather3div2_di; + break; + case X86::BI__builtin_ia32_gather3div4df: + IID = Intrinsic::x86_avx512_mask_gather3div4_df; + break; + case X86::BI__builtin_ia32_gather3div4di: + IID = Intrinsic::x86_avx512_mask_gather3div4_di; + break; + case X86::BI__builtin_ia32_gather3div4sf: + IID = Intrinsic::x86_avx512_mask_gather3div4_sf; + break; + case X86::BI__builtin_ia32_gather3div4si: + IID = Intrinsic::x86_avx512_mask_gather3div4_si; + break; + case X86::BI__builtin_ia32_gather3div8sf: + IID = Intrinsic::x86_avx512_mask_gather3div8_sf; + break; + case X86::BI__builtin_ia32_gather3div8si: + IID = Intrinsic::x86_avx512_mask_gather3div8_si; + break; + case X86::BI__builtin_ia32_gather3siv2df: + IID = Intrinsic::x86_avx512_mask_gather3siv2_df; + break; + case X86::BI__builtin_ia32_gather3siv2di: + IID = Intrinsic::x86_avx512_mask_gather3siv2_di; + break; + case X86::BI__builtin_ia32_gather3siv4df: + IID = Intrinsic::x86_avx512_mask_gather3siv4_df; + break; + case X86::BI__builtin_ia32_gather3siv4di: + IID = Intrinsic::x86_avx512_mask_gather3siv4_di; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; + break; + case X86::BI__builtin_ia32_gather3siv4si: + IID = Intrinsic::x86_avx512_mask_gather3siv4_si; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; + break; + case X86::BI__builtin_ia32_gather3siv8si: + IID = Intrinsic::x86_avx512_mask_gather3siv8_si; + break; + case X86::BI__builtin_ia32_gathersiv8df: + IID = Intrinsic::x86_avx512_mask_gather_dpd_512; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_dps_512; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + IID = Intrinsic::x86_avx512_mask_gather_qpd_512; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_qps_512; + break; + case X86::BI__builtin_ia32_gathersiv8di: + IID = Intrinsic::x86_avx512_mask_gather_dpq_512; + break; + case X86::BI__builtin_ia32_gathersiv16si: + IID = Intrinsic::x86_avx512_mask_gather_dpi_512; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + IID = Intrinsic::x86_avx512_mask_gather_qpq_512; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + IID = Intrinsic::x86_avx512_mask_gather_qpi_512; + break; + } + + unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(), + Ops[2]->getType()->getVectorNumElements()); + Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } + case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=351390&r1=351389&r2=351390&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed Jan 16 14:34:33 2019 @@ -7002,97 +7002,97 @@ __m512 test_mm512_maskz_getexp_ps(__mmas __m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i64gather_ps - // CHECK: @llvm.x86.avx512.gather.qps.512 + // CHECK: @llvm.x86.avx512.mask.gather.qps.512 return _mm512_i64gather_ps(__index, __addr, 2); } __m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i64gather_ps - // CHECK: @llvm.x86.avx512.gather.qps.512 + // CHECK: @llvm.x86.avx512.mask.gather.qps.512 return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i64gather_epi32 - // CHECK: @llvm.x86.avx512.gather.qpi.512 + // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 return _mm512_i64gather_epi32(__index, __addr, 2); } __m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i64gather_epi32 - // CHECK: @llvm.x86.avx512.gather.qpi.512 + // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i64gather_pd - // CHECK: @llvm.x86.avx512.gather.qpd.512 + // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 return _mm512_i64gather_pd(__index, __addr, 2); } __m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i64gather_pd - // CHECK: @llvm.x86.avx512.gather.qpd.512 + // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i64gather_epi64 - // CHECK: @llvm.x86.avx512.gather.qpq.512 + // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 return _mm512_i64gather_epi64(__index, __addr, 2); } __m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i64gather_epi64 - // CHECK: @llvm.x86.avx512.gather.qpq.512 + // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i32gather_ps - // CHECK: @llvm.x86.avx512.gather.dps.512 + // CHECK: @llvm.x86.avx512.mask.gather.dps.512 return _mm512_i32gather_ps(__index, __addr, 2); } __m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i32gather_ps - // CHECK: @llvm.x86.avx512.gather.dps.512 + // CHECK: @llvm.x86.avx512.mask.gather.dps.512 return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i32gather_epi32 - // CHECK: @llvm.x86.avx512.gather.dpi.512 + // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 return _mm512_i32gather_epi32(__index, __addr, 2); } __m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i32gather_epi32 - // CHECK: @llvm.x86.avx512.gather.dpi.512 + // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i32gather_pd - // CHECK: @llvm.x86.avx512.gather.dpd.512 + // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 return _mm512_i32gather_pd(__index, __addr, 2); } __m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i32gather_pd - // CHECK: @llvm.x86.avx512.gather.dpd.512 + // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_i32gather_epi64 - // CHECK: @llvm.x86.avx512.gather.dpq.512 + // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 return _mm512_i32gather_epi64(__index, __addr, 2); } __m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm512_mask_i32gather_epi64 - // CHECK: @llvm.x86.avx512.gather.dpq.512 + // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=351390&r1=351389&r2=351390&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Wed Jan 16 14:34:33 2019 @@ -9280,97 +9280,97 @@ __m256 test_mm256_maskz_getmant_ps(__mma __m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mmask_i64gather_pd - // CHECK: @llvm.x86.avx512.gather3div2.df + // CHECK: @llvm.x86.avx512.mask.gather3div2.df return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mmask_i64gather_epi64 - // CHECK: @llvm.x86.avx512.gather3div2.di + // CHECK: @llvm.x86.avx512.mask.gather3div2.di return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mmask_i64gather_pd - // CHECK: @llvm.x86.avx512.gather3div4.df + // CHECK: @llvm.x86.avx512.mask.gather3div4.df return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mmask_i64gather_epi64 - // CHECK: @llvm.x86.avx512.gather3div4.di + // CHECK: @llvm.x86.avx512.mask.gather3div4.di return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mmask_i64gather_ps - // CHECK: @llvm.x86.avx512.gather3div4.sf + // CHECK: @llvm.x86.avx512.mask.gather3div4.sf return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mmask_i64gather_epi32 - // CHECK: @llvm.x86.avx512.gather3div4.si + // CHECK: @llvm.x86.avx512.mask.gather3div4.si return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mmask_i64gather_ps - // CHECK: @llvm.x86.avx512.gather3div8.sf + // CHECK: @llvm.x86.avx512.mask.gather3div8.sf return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mmask_i64gather_epi32 - // CHECK: @llvm.x86.avx512.gather3div8.si + // CHECK: @llvm.x86.avx512.mask.gather3div8.si return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mask_i32gather_pd - // CHECK: @llvm.x86.avx512.gather3siv2.df + // CHECK: @llvm.x86.avx512.mask.gather3siv2.df return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mask_i32gather_epi64 - // CHECK: @llvm.x86.avx512.gather3siv2.di + // CHECK: @llvm.x86.avx512.mask.gather3siv2.di return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mask_i32gather_pd - // CHECK: @llvm.x86.avx512.gather3siv4.df + // CHECK: @llvm.x86.avx512.mask.gather3siv4.df return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mask_i32gather_epi64 - // CHECK: @llvm.x86.avx512.gather3siv4.di + // CHECK: @llvm.x86.avx512.mask.gather3siv4.di return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mask_i32gather_ps - // CHECK: @llvm.x86.avx512.gather3siv4.sf + // CHECK: @llvm.x86.avx512.mask.gather3siv4.sf return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); } __m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) { // CHECK-LABEL: @test_mm_mask_i32gather_epi32 - // CHECK: @llvm.x86.avx512.gather3siv4.si + // CHECK: @llvm.x86.avx512.mask.gather3siv4.si return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mask_i32gather_ps - // CHECK: @llvm.x86.avx512.gather3siv8.sf + // CHECK: @llvm.x86.avx512.mask.gather3siv8.sf return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { // CHECK-LABEL: @test_mm256_mask_i32gather_epi32 - // CHECK: @llvm.x86.avx512.gather3siv8.si + // CHECK: @llvm.x86.avx512.mask.gather3siv8.si return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits