llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-loongarch Author: hev (heiher) <details> <summary>Changes</summary> This patch introduces legalization and instruction patterns for vector sign and zero extension operations. --- Patch is 155.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160810.diff 10 Files Affected: - (modified) llvm/lib/Target/LoongArch/LoongArch.td (+1) - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+13) - (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+60) - (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+26) - (modified) llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll (+104-795) - (modified) llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll (+112-935) - (modified) llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll (+212-274) - (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll (+14-22) - (modified) llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll (+221-178) - (modified) llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll (+22-35) ``````````diff diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 6497ff999f6fa..62e837aad10b7 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -59,6 +59,7 @@ def FeatureExtLSX : SubtargetFeature<"lsx", "HasExtLSX", "true", "'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>; def HasExtLSX : Predicate<"Subtarget->hasExtLSX()">; +def IsExtLSX : Predicate<"Subtarget->hasExtLSX() && !Subtarget->hasExtLASX()">; // Loongson Advanced SIMD eXtension (LASX) def FeatureExtLASX diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 801e557a22520..104b315d9bfcc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -385,6 +385,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); } + for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16}) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); + } } // Set operations for 'LASX' feature. @@ -446,6 +450,15 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) { + setOperationAction(ISD::SIGN_EXTEND, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND, VT, Legal); + } + for (MVT VT : + {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); + } } // Set DAG combine for LA32 and LA64. diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index adfe990ba1234..b3389463b633c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2063,6 +2063,66 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>; defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>; defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>; +// Sign extensions +def : Pat<(v4i64 (sext v4i32:$vj)), + (v4i64 (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>; +def : Pat<(v8i32 (sext v8i16:$vj)), + (v8i32 (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>; +def : Pat<(v16i16 (sext v16i8:$vj)), + (v16i16 (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>; + +def : Pat<(v2i64 (sext_invec v16i8:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (sext_invec v8i16:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (sext_invec v4i32:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (sext_invec v16i8:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (sext_invec v8i16:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_W_H (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i64 (sext_invec v32i8:$xj)), (v4i64 (VEXT2XV_D_B v32i8:$xj))>; +def : Pat<(v4i64 (sext_invec v16i16:$xj)), (v4i64 (VEXT2XV_D_H v16i16:$xj))>; +def : Pat<(v8i16 (sext_invec v16i8:$vj)), + (v8i16 (EXTRACT_SUBREG (VEXT2XV_H_B (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v8i32 (sext_invec v32i8:$xj)), (v8i32 (VEXT2XV_W_B v32i8:$xj))>; + +// Zero extensions +def : Pat<(v4i64 (zext v4i32:$vj)), + (v4i64 (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)))>; +def : Pat<(v8i32 (zext v8i16:$vj)), + (v8i32 (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)))>; +def : Pat<(v16i16 (zext v16i8:$vj)), + (v16i16 (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)))>; + +def : Pat<(v2i64 (zext_invec v16i8:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (zext_invec v8i16:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v2i64 (zext_invec v4i32:$vj)), + (v2i64 (EXTRACT_SUBREG (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (zext_invec v16i8:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i32 (zext_invec v8i16:$vj)), + (v4i32 (EXTRACT_SUBREG (VEXT2XV_WU_HU (SUBREG_TO_REG (i64 0), v8i16:$vj, sub_128)), + sub_128))>; +def : Pat<(v4i64 (zext_invec v32i8:$xj)), (v4i64 (VEXT2XV_DU_BU v32i8:$xj))>; +def : Pat<(v4i64 (zext_invec v16i16:$xj)), (v4i64 (VEXT2XV_DU_HU v16i16:$xj))>; +def : Pat<(v8i16 (zext_invec v16i8:$vj)), + (v8i16 (EXTRACT_SUBREG (VEXT2XV_HU_BU (SUBREG_TO_REG (i64 0), v16i8:$vj, sub_128)), + sub_128))>; +def : Pat<(v8i32 (zext_invec v32i8:$xj)), (v8i32 (VEXT2XV_WU_BU v32i8:$xj))>; + } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index b0eb51a92c6c6..eb1fe93475f50 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2174,6 +2174,32 @@ def : Pat<(loongarch_vmsknez (v16i8 LSX128:$vj)), (PseudoVMSKNEZ_B LSX128:$vj)>; } // Predicates = [HasExtLSX] +let Predicates = [IsExtLSX] in { + +// Sign extensions +def : Pat<(v2i64 (sext_invec v16i8:$vj)), + (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0), 0))>; +def : Pat<(v2i64 (sext_invec v8i16:$vj)), + (v2i64 (VSLLWIL_D_W (VSLLWIL_W_H v8i16:$vj, 0), 0))>; +def : Pat<(v2i64 (sext_invec v4i32:$vj)), (v2i64 (VSLLWIL_D_W v4i32:$vj, 0))>; +def : Pat<(v4i32 (sext_invec v16i8:$vj)), + (v4i32 (VSLLWIL_W_H (VSLLWIL_H_B v16i8:$vj, 0), 0))>; +def : Pat<(v4i32 (sext_invec v8i16:$vj)), (v4i32 (VSLLWIL_W_H v8i16:$vj, 0))>; +def : Pat<(v8i16 (sext_invec v16i8:$vj)), (v8i16 (VSLLWIL_H_B v16i8:$vj, 0))>; + +// Zero extensions +def : Pat<(v2i64 (zext_invec v16i8:$vj)), + (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0), 0))>; +def : Pat<(v2i64 (zext_invec v8i16:$vj)), + (v2i64 (VSLLWIL_DU_WU (VSLLWIL_WU_HU v8i16:$vj, 0), 0))>; +def : Pat<(v2i64 (zext_invec v4i32:$vj)), (v2i64 (VSLLWIL_DU_WU v4i32:$vj, 0))>; +def : Pat<(v4i32 (zext_invec v16i8:$vj)), + (v4i32 (VSLLWIL_WU_HU (VSLLWIL_HU_BU v16i8:$vj, 0), 0))>; +def : Pat<(v4i32 (zext_invec v8i16:$vj)), (v4i32 (VSLLWIL_WU_HU v8i16:$vj, 0))>; +def : Pat<(v8i16 (zext_invec v16i8:$vj)), (v8i16 (VSLLWIL_HU_BU v16i8:$vj, 0))>; + +} // Predicates = [IsExtLSX] + /// Intrinsic pattern class deriveLSXIntrinsic<string Inst> { diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll index 953e6c45608c0..8884aacc16f51 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll @@ -7,11 +7,7 @@ define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 56 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 +; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -26,10 +22,7 @@ define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 +; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -45,9 +38,8 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; LA32-NEXT: vslli.d $vr0, $vr0, 32 -; LA32-NEXT: vsrai.d $vr0, $vr0, 32 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vext2xv.d.w $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -55,9 +47,7 @@ define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16 -; LA64-NEXT: vslli.d $vr0, $vr0, 32 -; LA64-NEXT: vsrai.d $vr0, $vr0, 32 +; LA64-NEXT: vext2xv.d.w $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -72,10 +62,7 @@ define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 +; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -89,13 +76,8 @@ define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_4i8_to_4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 -; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 -; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; CHECK-NEXT: xvslli.d $xr0, $xr0, 56 -; CHECK-NEXT: xvsrai.d $xr0, $xr0, 56 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: ret entry: @@ -112,9 +94,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.w $vr0, $vr0, 16 -; LA32-NEXT: vsrai.w $vr0, $vr0, 16 +; LA32-NEXT: vext2xv.w.h $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -122,9 +102,7 @@ define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.w $vr0, $vr0, 16 -; LA64-NEXT: vsrai.w $vr0, $vr0, 16 +; LA64-NEXT: vext2xv.w.h $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -139,27 +117,17 @@ define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) -; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI6_0) -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA32-NEXT: xvshuf.h $xr0, $xr0, $xr1 -; LA32-NEXT: xvslli.d $xr0, $xr0, 48 -; LA32-NEXT: xvsrai.d $xr0, $xr0, 48 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vext2xv.d.h $xr0, $xr0 ; LA32-NEXT: xvst $xr0, $a1, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_sext_4i16_to_4i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0) -; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI6_0) -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA64-NEXT: xvshuf.h $xr0, $xr0, $xr1 -; LA64-NEXT: xvslli.d $xr0, $xr0, 48 -; LA64-NEXT: xvsrai.d $xr0, $xr0, 48 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vext2xv.d.h $xr0, $xr0 ; LA64-NEXT: xvst $xr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -170,43 +138,12 @@ entry: } define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_4i32_to_4i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: vextrins.w $vr1, $vr0, 2 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: vextrins.w $vr1, $vr0, 35 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vori.b $vr2, $vr0, 0 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; LA32-NEXT: vextrins.w $vr2, $vr0, 33 -; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA32-NEXT: srai.w $a0, $a0, 31 -; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA32-NEXT: xvst $xr2, $a1, 0 -; LA32-NEXT: ret -; -; LA64-LABEL: load_sext_4i32_to_4i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: vld $vr0, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 -; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 -; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 -; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 -; LA64-NEXT: xvst $xr2, $a1, 0 -; LA64-NEXT: ret +; CHECK-LABEL: load_sext_4i32_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret entry: %A = load <4 x i32>, ptr %ptr %B = sext <4 x i32> %A to <4 x i64> @@ -221,9 +158,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.h $vr0, $vr0, 8 -; LA32-NEXT: vsrai.h $vr0, $vr0, 8 +; LA32-NEXT: vext2xv.h.b $xr0, $xr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; @@ -231,9 +166,7 @@ define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.h $vr0, $vr0, 8 -; LA64-NEXT: vsrai.h $vr0, $vr0, 8 +; LA64-NEXT: vext2xv.h.b $xr0, $xr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -248,27 +181,17 @@ define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) { ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI9_0) -; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI9_0) -; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; LA32-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; LA32-NEXT: xvslli.w $xr0, $xr0, 24 -; LA32-NEXT: xvsrai.w $xr0, $xr0, 24 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vext2xv.w.b $xr0, $xr0 ; LA32-NEXT: xvst $xr0, $a1, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: load_sext_8i8_to_8i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI9_0) -; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI9_0) -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; LA64-NEXT: xvslli.w $xr0, $xr0, 24 -; LA64-NEXT: xvsrai.w $xr0, $xr0, 24 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vext2xv.w.b $xr0, $xr0 ; LA64-NEXT: xvst $xr0, $a1, 0 ; LA64-NEXT: ret entry: @@ -282,21 +205,13 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA32-LABEL: load_sext_8i8_to_8i64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: xvpermi.d $xr1, $xr0, 68 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vext2xv.d.b $xr1, $xr0 ; LA32-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 -; LA32-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0) -; LA32-NEXT: xvld $xr2, $a2, %pc_lo12(.LCPI10_0) ; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 ; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 -; LA32-NEXT: xvpermi.d $xr0, $xr0, 68 -; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr2 -; LA32-NEXT: xvslli.d $xr0, $xr0, 56 -; LA32-NEXT: xvsrai.d $xr0, $xr0, 56 -; LA32-NEXT: xvshuf.b $xr1, $xr0, $xr1, $xr2 -; LA32-NEXT: xvslli.d $xr1, $xr1, 56 -; LA32-NEXT: xvsrai.d $xr1, $xr1, 56 +; LA32-NEXT: vext2xv.d.b $xr0, $xr0 ; LA32-NEXT: xvst $xr1, $a1, 0 ; LA32-NEXT: xvst $xr0, $a1, 32 ; LA32-NEXT: ret @@ -304,20 +219,12 @@ define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) { ; LA64-LABEL: load_sext_8i8_to_8i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0) -; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI10_0) -; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 -; LA64-NEXT: vsrli.d $vr2, $vr1, 32 -; LA64-NEXT: xvpermi.d $xr2, $xr2, 68 -; LA64-NEXT: xvshuf.b $xr2, $xr0, $xr2, $xr0 -; LA64-NEXT: xvslli.d $xr2, $xr2, 56 -; LA64-NEXT: xvsrai.d $xr2, $xr2, 56 -; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 -; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 -; LA64-NEXT: xvslli.d $xr0, $xr0, 56 -; LA64-NEXT: xvsrai.d $xr0, $xr0, 56 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vsrli.d $vr1, $vr0, 32 +; LA64-NEXT: vext2xv.d.b $xr1, $xr1 +; LA64-NEXT: vext2xv.d.b $xr0, $xr0 ; LA64-NEXT: xvst $xr0, $a1, 0 -; LA64-NEXT: xvst $xr2, $a1, 32 +; LA64-NEXT: xvst $xr1, $a1, 32 ; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr @@ -330,32 +237,8 @@ define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_sext_8i16_to_8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: ext.w.h $a0, $a0 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 -; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: ret entry: %A = load <8 ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/160810 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
