Author: ShihPo Hung Date: 2020-12-29T18:38:15-08:00 New Revision: 096b02ebbff72c403379b28a40f14a8c48e640f8
URL: https://github.com/llvm/llvm-project/commit/096b02ebbff72c403379b28a40f14a8c48e640f8 DIFF: https://github.com/llvm/llvm-project/commit/096b02ebbff72c403379b28a40f14a8c48e640f8.diff LOG: [RISCV] Add intrinsics for vcompress instruction This patch defines vcompress intrinsics and lower to V instructions. We work with @rogfer01 from BSC to come out this patch. Authored-by: Roger Ferrer Ibanez <rofir...@gmail.com> Co-Authored-by: ShihPo Hung <shihpo.h...@sifive.com> Differential revision: https://reviews.llvm.org/D93809 Added: llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll Modified: llvm/include/llvm/IR/IntrinsicsRISCV.td llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td Removed: ################################################################################ diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 8e3d6f2ed675..430687d796ba 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -189,6 +189,13 @@ let TargetPrefix = "riscv" in { LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic; + // For destination vector type is the same as first source vector (with mask). + // Input: (maskedoff, vector_in, mask, vl) + class RISCVUnaryAAMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first and second source vector. // Input: (vector_in, vector_in, vl) class RISCVBinaryAAANoMask @@ -680,6 +687,8 @@ let TargetPrefix = "riscv" in { defm vrgather : RISCVBinaryAAX; + def "int_riscv_vcompress_mask" : RISCVUnaryAAMask; + defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; defm vasubu : RISCVSaturatingBinaryAAX; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index b0e3b455339f..6a89f34f9047 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -669,6 +669,27 @@ class VPseudoUnaryMOutMask: let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); } +// Mask can be V0~V31 +class VPseudoUnaryAnyMask<VReg RetClass, + VReg Op1Class> : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$merge, + Op1Class:$rs2, + VR:$vm, GPR:$vl, ixlenimm:$sew), + []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let usesCustomInserter = 1; + let Constraints = "@earlyclobber $rd, $rd = $merge"; + let Uses = [VL, VTYPE]; + let VLIndex = 4; + let SEWIndex = 5; + let MergeOpIndex = 1; + let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); +} + class VPseudoBinaryNoMask<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, @@ -905,6 +926,13 @@ multiclass VPseudoUnaryV_M { } } +multiclass VPseudoUnaryV_V_AnyMask { + foreach m = MxList.m in { + let VLMul = m.value in + def _VM # "_" # m.MX # "_MASK" : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>; + } +} + multiclass VPseudoBinary<VReg RetClass, VReg Op1Class, DAGOperand Op2Class, @@ -1311,6 +1339,27 @@ class VPatMaskUnaryMask<string intrinsic_name, (mti.Mask VR:$rs2), (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>; +class VPatUnaryAnyMask<string intrinsic, + string inst, + string kind, + ValueType result_type, + ValueType op1_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg result_reg_class, + VReg op1_reg_class> : + Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (mask_type VR:$rs2), + (XLenVT GPR:$vl))), + (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK") + (result_type result_reg_class:$merge), + (op1_type op1_reg_class:$rs1), + (mask_type VR:$rs2), + (NoX0 GPR:$vl), sew)>; + class VPatBinaryNoMask<string intrinsic_name, string inst, string kind, @@ -1541,6 +1590,16 @@ multiclass VPatUnaryS_M<string intrinsic_name, } } +multiclass VPatUnaryV_V_AnyMask<string intrinsic, string instruction, + list<VTypeInfo> vtilist> { + foreach vti = vtilist in { + def : VPatUnaryAnyMask<intrinsic, instruction, "VM", + vti.Vector, vti.Vector, vti.Mask, + vti.SEW, vti.LMul, vti.RegClass, + vti.RegClass>; + } +} + multiclass VPatUnaryM_M<string intrinsic, string inst> { @@ -2645,6 +2704,11 @@ let Predicates = [HasStdExtV, HasStdExtF] in { //===----------------------------------------------------------------------===// defm PseudoVRGATHER : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">; +//===----------------------------------------------------------------------===// +// 17.5. Vector Compress Instruction +//===----------------------------------------------------------------------===// +defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask; + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -3201,5 +3265,16 @@ let Predicates = [HasStdExtV, HasStdExtF] in { AllFloatVectors, uimm5>; } // Predicates = [HasStdExtV, HasStdExtF] +//===----------------------------------------------------------------------===// +// 17.5. Vector Compress Instruction +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtV] in { + defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>; +} // Predicates = [HasStdExtV] + +let Predicates = [HasStdExtV, HasStdExtF] in { + defm "" : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>; +} // Predicates = [HasStdExtV, HasStdExtF] + // Include the non-intrinsic ISel patterns include "RISCVInstrInfoVSDPatterns.td" diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll new file mode 100644 index 000000000000..b8d42eeb9e6c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll @@ -0,0 +1,650 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8( + <vscale x 1 x i8>, + <vscale x 1 x i8>, + <vscale x 1 x i1>, + i32); + +define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8( + <vscale x 1 x i8> %0, + <vscale x 1 x i8> %1, + <vscale x 1 x i1> %2, + i32 %3) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8( + <vscale x 2 x i8>, + <vscale x 2 x i8>, + <vscale x 2 x i1>, + i32); + +define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8( + <vscale x 2 x i8> %0, + <vscale x 2 x i8> %1, + <vscale x 2 x i1> %2, + i32 %3) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8( + <vscale x 4 x i8>, + <vscale x 4 x i8>, + <vscale x 4 x i1>, + i32); + +define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8( + <vscale x 4 x i8> %0, + <vscale x 4 x i8> %1, + <vscale x 4 x i1> %2, + i32 %3) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8( + <vscale x 8 x i8>, + <vscale x 8 x i8>, + <vscale x 8 x i1>, + i32); + +define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8( + <vscale x 8 x i8> %0, + <vscale x 8 x i8> %1, + <vscale x 8 x i1> %2, + i32 %3) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8( + <vscale x 16 x i8>, + <vscale x 16 x i8>, + <vscale x 16 x i1>, + i32); + +define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8( + <vscale x 16 x i8> %0, + <vscale x 16 x i8> %1, + <vscale x 16 x i1> %2, + i32 %3) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8( + <vscale x 32 x i8>, + <vscale x 32 x i8>, + <vscale x 32 x i1>, + i32); + +define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8( + <vscale x 32 x i8> %0, + <vscale x 32 x i8> %1, + <vscale x 32 x i1> %2, + i32 %3) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8( + <vscale x 64 x i8>, + <vscale x 64 x i8>, + <vscale x 64 x i1>, + i32); + +define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e8,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8( + <vscale x 64 x i8> %0, + <vscale x 64 x i8> %1, + <vscale x 64 x i1> %2, + i32 %3) + + ret <vscale x 64 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16( + <vscale x 1 x i16>, + <vscale x 1 x i16>, + <vscale x 1 x i1>, + i32); + +define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16( + <vscale x 1 x i16> %0, + <vscale x 1 x i16> %1, + <vscale x 1 x i1> %2, + i32 %3) + + ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16( + <vscale x 2 x i16>, + <vscale x 2 x i16>, + <vscale x 2 x i1>, + i32); + +define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16( + <vscale x 2 x i16> %0, + <vscale x 2 x i16> %1, + <vscale x 2 x i1> %2, + i32 %3) + + ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16( + <vscale x 4 x i16>, + <vscale x 4 x i16>, + <vscale x 4 x i1>, + i32); + +define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16( + <vscale x 4 x i16> %0, + <vscale x 4 x i16> %1, + <vscale x 4 x i1> %2, + i32 %3) + + ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16( + <vscale x 8 x i16>, + <vscale x 8 x i16>, + <vscale x 8 x i1>, + i32); + +define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16( + <vscale x 8 x i16> %0, + <vscale x 8 x i16> %1, + <vscale x 8 x i1> %2, + i32 %3) + + ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16( + <vscale x 16 x i16>, + <vscale x 16 x i16>, + <vscale x 16 x i1>, + i32); + +define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16( + <vscale x 16 x i16> %0, + <vscale x 16 x i16> %1, + <vscale x 16 x i1> %2, + i32 %3) + + ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16( + <vscale x 32 x i16>, + <vscale x 32 x i16>, + <vscale x 32 x i1>, + i32); + +define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16( + <vscale x 32 x i16> %0, + <vscale x 32 x i16> %1, + <vscale x 32 x i1> %2, + i32 %3) + + ret <vscale x 32 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32( + <vscale x 1 x i32>, + <vscale x 1 x i32>, + <vscale x 1 x i1>, + i32); + +define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32( + <vscale x 1 x i32> %0, + <vscale x 1 x i32> %1, + <vscale x 1 x i1> %2, + i32 %3) + + ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32( + <vscale x 2 x i32>, + <vscale x 2 x i32>, + <vscale x 2 x i1>, + i32); + +define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32( + <vscale x 2 x i32> %0, + <vscale x 2 x i32> %1, + <vscale x 2 x i1> %2, + i32 %3) + + ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32( + <vscale x 4 x i32>, + <vscale x 4 x i32>, + <vscale x 4 x i1>, + i32); + +define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32( + <vscale x 4 x i32> %0, + <vscale x 4 x i32> %1, + <vscale x 4 x i1> %2, + i32 %3) + + ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32( + <vscale x 8 x i32>, + <vscale x 8 x i32>, + <vscale x 8 x i1>, + i32); + +define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32( + <vscale x 8 x i32> %0, + <vscale x 8 x i32> %1, + <vscale x 8 x i1> %2, + i32 %3) + + ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32( + <vscale x 16 x i32>, + <vscale x 16 x i32>, + <vscale x 16 x i1>, + i32); + +define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32( + <vscale x 16 x i32> %0, + <vscale x 16 x i32> %1, + <vscale x 16 x i1> %2, + i32 %3) + + ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16( + <vscale x 1 x half>, + <vscale x 1 x half>, + <vscale x 1 x i1>, + i32); + +define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16( + <vscale x 1 x half> %0, + <vscale x 1 x half> %1, + <vscale x 1 x i1> %2, + i32 %3) + + ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16( + <vscale x 2 x half>, + <vscale x 2 x half>, + <vscale x 2 x i1>, + i32); + +define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16( + <vscale x 2 x half> %0, + <vscale x 2 x half> %1, + <vscale x 2 x i1> %2, + i32 %3) + + ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16( + <vscale x 4 x half>, + <vscale x 4 x half>, + <vscale x 4 x i1>, + i32); + +define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16( + <vscale x 4 x half> %0, + <vscale x 4 x half> %1, + <vscale x 4 x i1> %2, + i32 %3) + + ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16( + <vscale x 8 x half>, + <vscale x 8 x half>, + <vscale x 8 x i1>, + i32); + +define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16( + <vscale x 8 x half> %0, + <vscale x 8 x half> %1, + <vscale x 8 x i1> %2, + i32 %3) + + ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16( + <vscale x 16 x half>, + <vscale x 16 x half>, + <vscale x 16 x i1>, + i32); + +define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16( + <vscale x 16 x half> %0, + <vscale x 16 x half> %1, + <vscale x 16 x i1> %2, + i32 %3) + + ret <vscale x 16 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16( + <vscale x 32 x half>, + <vscale x 32 x half>, + <vscale x 32 x i1>, + i32); + +define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16( + <vscale x 32 x half> %0, + <vscale x 32 x half> %1, + <vscale x 32 x i1> %2, + i32 %3) + + ret <vscale x 32 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x i1>, + i32); + +define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + <vscale x 1 x i1> %2, + i32 %3) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x i1>, + i32); + +define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + <vscale x 2 x i1> %2, + i32 %3) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x i1>, + i32); + +define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + <vscale x 4 x i1> %2, + i32 %3) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x i1>, + i32); + +define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + <vscale x 8 x i1> %2, + i32 %3) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x i1>, + i32); + +define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + <vscale x 16 x i1> %2, + i32 %3) + + ret <vscale x 16 x float> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll new file mode 100644 index 000000000000..a2e9df6e3fa2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll @@ -0,0 +1,830 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8( + <vscale x 1 x i8>, + <vscale x 1 x i8>, + <vscale x 1 x i1>, + i64); + +define <vscale x 1 x i8> @intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i8_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vcompress.mask.nxv1i8( + <vscale x 1 x i8> %0, + <vscale x 1 x i8> %1, + <vscale x 1 x i1> %2, + i64 %3) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8( + <vscale x 2 x i8>, + <vscale x 2 x i8>, + <vscale x 2 x i1>, + i64); + +define <vscale x 2 x i8> @intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i8_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vcompress.mask.nxv2i8( + <vscale x 2 x i8> %0, + <vscale x 2 x i8> %1, + <vscale x 2 x i1> %2, + i64 %3) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8( + <vscale x 4 x i8>, + <vscale x 4 x i8>, + <vscale x 4 x i1>, + i64); + +define <vscale x 4 x i8> @intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i8_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vcompress.mask.nxv4i8( + <vscale x 4 x i8> %0, + <vscale x 4 x i8> %1, + <vscale x 4 x i1> %2, + i64 %3) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8( + <vscale x 8 x i8>, + <vscale x 8 x i8>, + <vscale x 8 x i1>, + i64); + +define <vscale x 8 x i8> @intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vcompress.mask.nxv8i8( + <vscale x 8 x i8> %0, + <vscale x 8 x i8> %1, + <vscale x 8 x i1> %2, + i64 %3) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8( + <vscale x 16 x i8>, + <vscale x 16 x i8>, + <vscale x 16 x i1>, + i64); + +define <vscale x 16 x i8> @intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vcompress.mask.nxv16i8( + <vscale x 16 x i8> %0, + <vscale x 16 x i8> %1, + <vscale x 16 x i1> %2, + i64 %3) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8( + <vscale x 32 x i8>, + <vscale x 32 x i8>, + <vscale x 32 x i1>, + i64); + +define <vscale x 32 x i8> @intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vcompress.mask.nxv32i8( + <vscale x 32 x i8> %0, + <vscale x 32 x i8> %1, + <vscale x 32 x i1> %2, + i64 %3) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8( + <vscale x 64 x i8>, + <vscale x 64 x i8>, + <vscale x 64 x i1>, + i64); + +define <vscale x 64 x i8> @intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, <vscale x 64 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv64i8_nxv64i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e8,m8,ta,mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e8,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 64 x i8> @llvm.riscv.vcompress.mask.nxv64i8( + <vscale x 64 x i8> %0, + <vscale x 64 x i8> %1, + <vscale x 64 x i1> %2, + i64 %3) + + ret <vscale x 64 x i8> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16( + <vscale x 1 x i16>, + <vscale x 1 x i16>, + <vscale x 1 x i1>, + i64); + +define <vscale x 1 x i16> @intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i16_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vcompress.mask.nxv1i16( + <vscale x 1 x i16> %0, + <vscale x 1 x i16> %1, + <vscale x 1 x i1> %2, + i64 %3) + + ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16( + <vscale x 2 x i16>, + <vscale x 2 x i16>, + <vscale x 2 x i1>, + i64); + +define <vscale x 2 x i16> @intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i16_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vcompress.mask.nxv2i16( + <vscale x 2 x i16> %0, + <vscale x 2 x i16> %1, + <vscale x 2 x i1> %2, + i64 %3) + + ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16( + <vscale x 4 x i16>, + <vscale x 4 x i16>, + <vscale x 4 x i1>, + i64); + +define <vscale x 4 x i16> @intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vcompress.mask.nxv4i16( + <vscale x 4 x i16> %0, + <vscale x 4 x i16> %1, + <vscale x 4 x i1> %2, + i64 %3) + + ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16( + <vscale x 8 x i16>, + <vscale x 8 x i16>, + <vscale x 8 x i1>, + i64); + +define <vscale x 8 x i16> @intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vcompress.mask.nxv8i16( + <vscale x 8 x i16> %0, + <vscale x 8 x i16> %1, + <vscale x 8 x i1> %2, + i64 %3) + + ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16( + <vscale x 16 x i16>, + <vscale x 16 x i16>, + <vscale x 16 x i1>, + i64); + +define <vscale x 16 x i16> @intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vcompress.mask.nxv16i16( + <vscale x 16 x i16> %0, + <vscale x 16 x i16> %1, + <vscale x 16 x i1> %2, + i64 %3) + + ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16( + <vscale x 32 x i16>, + <vscale x 32 x i16>, + <vscale x 32 x i1>, + i64); + +define <vscale x 32 x i16> @intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32i16_nxv32i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vcompress.mask.nxv32i16( + <vscale x 32 x i16> %0, + <vscale x 32 x i16> %1, + <vscale x 32 x i1> %2, + i64 %3) + + ret <vscale x 32 x i16> %a +} + +declare <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32( + <vscale x 1 x i32>, + <vscale x 1 x i32>, + <vscale x 1 x i1>, + i64); + +define <vscale x 1 x i32> @intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i32_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vcompress.mask.nxv1i32( + <vscale x 1 x i32> %0, + <vscale x 1 x i32> %1, + <vscale x 1 x i1> %2, + i64 %3) + + ret <vscale x 1 x i32> %a +} + +declare <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32( + <vscale x 2 x i32>, + <vscale x 2 x i32>, + <vscale x 2 x i1>, + i64); + +define <vscale x 2 x i32> @intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x i32> @llvm.riscv.vcompress.mask.nxv2i32( + <vscale x 2 x i32> %0, + <vscale x 2 x i32> %1, + <vscale x 2 x i1> %2, + i64 %3) + + ret <vscale x 2 x i32> %a +} + +declare <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32( + <vscale x 4 x i32>, + <vscale x 4 x i32>, + <vscale x 4 x i1>, + i64); + +define <vscale x 4 x i32> @intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x i32> @llvm.riscv.vcompress.mask.nxv4i32( + <vscale x 4 x i32> %0, + <vscale x 4 x i32> %1, + <vscale x 4 x i1> %2, + i64 %3) + + ret <vscale x 4 x i32> %a +} + +declare <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32( + <vscale x 8 x i32>, + <vscale x 8 x i32>, + <vscale x 8 x i1>, + i64); + +define <vscale x 8 x i32> @intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x i32> @llvm.riscv.vcompress.mask.nxv8i32( + <vscale x 8 x i32> %0, + <vscale x 8 x i32> %1, + <vscale x 8 x i1> %2, + i64 %3) + + ret <vscale x 8 x i32> %a +} + +declare <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32( + <vscale x 16 x i32>, + <vscale x 16 x i32>, + <vscale x 16 x i1>, + i64); + +define <vscale x 16 x i32> @intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16i32_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x i32> @llvm.riscv.vcompress.mask.nxv16i32( + <vscale x 16 x i32> %0, + <vscale x 16 x i32> %1, + <vscale x 16 x i1> %2, + i64 %3) + + ret <vscale x 16 x i32> %a +} + +declare <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64( + <vscale x 1 x i64>, + <vscale x 1 x i64>, + <vscale x 1 x i1>, + i64); + +define <vscale x 1 x i64> @intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1i64_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x i64> @llvm.riscv.vcompress.mask.nxv1i64( + <vscale x 1 x i64> %0, + <vscale x 1 x i64> %1, + <vscale x 1 x i1> %2, + i64 %3) + + ret <vscale x 1 x i64> %a +} + +declare <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64( + <vscale x 2 x i64>, + <vscale x 2 x i64>, + <vscale x 2 x i1>, + i64); + +define <vscale x 2 x i64> @intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2i64_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x i64> @llvm.riscv.vcompress.mask.nxv2i64( + <vscale x 2 x i64> %0, + <vscale x 2 x i64> %1, + <vscale x 2 x i1> %2, + i64 %3) + + ret <vscale x 2 x i64> %a +} + +declare <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64( + <vscale x 4 x i64>, + <vscale x 4 x i64>, + <vscale x 4 x i1>, + i64); + +define <vscale x 4 x i64> @intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4i64_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x i64> @llvm.riscv.vcompress.mask.nxv4i64( + <vscale x 4 x i64> %0, + <vscale x 4 x i64> %1, + <vscale x 4 x i1> %2, + i64 %3) + + ret <vscale x 4 x i64> %a +} + +declare <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64( + <vscale x 8 x i64>, + <vscale x 8 x i64>, + <vscale x 8 x i1>, + i64); + +define <vscale x 8 x i64> @intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8i64_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x i64> @llvm.riscv.vcompress.mask.nxv8i64( + <vscale x 8 x i64> %0, + <vscale x 8 x i64> %1, + <vscale x 8 x i1> %2, + i64 %3) + + ret <vscale x 8 x i64> %a +} + +declare <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16( + <vscale x 1 x half>, + <vscale x 1 x half>, + <vscale x 1 x i1>, + i64); + +define <vscale x 1 x half> @intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f16_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x half> @llvm.riscv.vcompress.mask.nxv1f16( + <vscale x 1 x half> %0, + <vscale x 1 x half> %1, + <vscale x 1 x i1> %2, + i64 %3) + + ret <vscale x 1 x half> %a +} + +declare <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16( + <vscale x 2 x half>, + <vscale x 2 x half>, + <vscale x 2 x i1>, + i64); + +define <vscale x 2 x half> @intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f16_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x half> @llvm.riscv.vcompress.mask.nxv2f16( + <vscale x 2 x half> %0, + <vscale x 2 x half> %1, + <vscale x 2 x i1> %2, + i64 %3) + + ret <vscale x 2 x half> %a +} + +declare <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16( + <vscale x 4 x half>, + <vscale x 4 x half>, + <vscale x 4 x i1>, + i64); + +define <vscale x 4 x half> @intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x half> @llvm.riscv.vcompress.mask.nxv4f16( + <vscale x 4 x half> %0, + <vscale x 4 x half> %1, + <vscale x 4 x i1> %2, + i64 %3) + + ret <vscale x 4 x half> %a +} + +declare <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16( + <vscale x 8 x half>, + <vscale x 8 x half>, + <vscale x 8 x i1>, + i64); + +define <vscale x 8 x half> @intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f16_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x half> @llvm.riscv.vcompress.mask.nxv8f16( + <vscale x 8 x half> %0, + <vscale x 8 x half> %1, + <vscale x 8 x i1> %2, + i64 %3) + + ret <vscale x 8 x half> %a +} + +declare <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16( + <vscale x 16 x half>, + <vscale x 16 x half>, + <vscale x 16 x i1>, + i64); + +define <vscale x 16 x half> @intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f16_nxv16f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x half> @llvm.riscv.vcompress.mask.nxv16f16( + <vscale x 16 x half> %0, + <vscale x 16 x half> %1, + <vscale x 16 x i1> %2, + i64 %3) + + ret <vscale x 16 x half> %a +} + +declare <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16( + <vscale x 32 x half>, + <vscale x 32 x half>, + <vscale x 32 x i1>, + i64); + +define <vscale x 32 x half> @intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv32f16_nxv32f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 32 x half> @llvm.riscv.vcompress.mask.nxv32f16( + <vscale x 32 x half> %0, + <vscale x 32 x half> %1, + <vscale x 32 x i1> %2, + i64 %3) + + ret <vscale x 32 x half> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x i1>, + i64); + +define <vscale x 1 x float> @intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f32_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vcompress.mask.nxv1f32( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + <vscale x 1 x i1> %2, + i64 %3) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x i1>, + i64); + +define <vscale x 2 x float> @intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vcompress.mask.nxv2f32( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + <vscale x 2 x i1> %2, + i64 %3) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x i1>, + i64); + +define <vscale x 4 x float> @intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f32_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vcompress.mask.nxv4f32( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + <vscale x 4 x i1> %2, + i64 %3) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x i1>, + i64); + +define <vscale x 8 x float> @intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f32_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vcompress.mask.nxv8f32( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + <vscale x 8 x i1> %2, + i64 %3) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x i1>, + i64); + +define <vscale x 16 x float> @intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv16f32_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vcompress.mask.nxv16f32( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + <vscale x 16 x i1> %2, + i64 %3) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64( + <vscale x 1 x double>, + <vscale x 1 x double>, + <vscale x 1 x i1>, + i64); + +define <vscale x 1 x double> @intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu +; CHECK-NEXT: vcompress.vm v16, v17, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 1 x double> @llvm.riscv.vcompress.mask.nxv1f64( + <vscale x 1 x double> %0, + <vscale x 1 x double> %1, + <vscale x 1 x i1> %2, + i64 %3) + + ret <vscale x 1 x double> %a +} + +declare <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64( + <vscale x 2 x double>, + <vscale x 2 x double>, + <vscale x 2 x i1>, + i64); + +define <vscale x 2 x double> @intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv2f64_nxv2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu +; CHECK-NEXT: vcompress.vm v16, v18, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 2 x double> @llvm.riscv.vcompress.mask.nxv2f64( + <vscale x 2 x double> %0, + <vscale x 2 x double> %1, + <vscale x 2 x i1> %2, + i64 %3) + + ret <vscale x 2 x double> %a +} + +declare <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64( + <vscale x 4 x double>, + <vscale x 4 x double>, + <vscale x 4 x i1>, + i64); + +define <vscale x 4 x double> @intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv4f64_nxv4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu +; CHECK-NEXT: vcompress.vm v16, v20, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 4 x double> @llvm.riscv.vcompress.mask.nxv4f64( + <vscale x 4 x double> %0, + <vscale x 4 x double> %1, + <vscale x 4 x i1> %2, + i64 %3) + + ret <vscale x 4 x double> %a +} + +declare <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64( + <vscale x 8 x double>, + <vscale x 8 x double>, + <vscale x 8 x i1>, + i64); + +define <vscale x 8 x double> @intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vcompress_mask_vm_nxv8f64_nxv8f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call <vscale x 8 x double> @llvm.riscv.vcompress.mask.nxv8f64( + <vscale x 8 x double> %0, + <vscale x 8 x double> %1, + <vscale x 8 x i1> %2, + i64 %3) + + ret <vscale x 8 x double> %a +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits