llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/170093.diff 2 Files Affected: - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+20) - (modified) llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll (+29-69) ``````````diff diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ba9d0682b26dd..5a3b53437a750 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -5164,6 +5164,26 @@ void LoongArchTargetLowering::ReplaceNodeResults( } } + // Only v4i64->v4i16/v4i8 and v8i32->v8i8 will reach the code below. + if (InBits == 256 && (InVT == MVT::v4i64 || InVT == MVT::v8i32)) { + if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) != 0) + return; + + MVT DWidenVT = EltVT == MVT::i16 ? MVT::v16i16 : MVT::v32i8; + unsigned WidenNumElts = DWidenVT.getVectorNumElements(); + int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits(); + SmallVector<int, 32> TruncMask(WidenNumElts, -1); + for (unsigned I = 0; I < MinElts; ++I) + TruncMask[I] = Scale * I; + + SDValue CastIn = DAG.getBitcast(DWidenVT, In); + SDValue Result = + DAG.getVectorShuffle(DWidenVT, DL, CastIn, CastIn, TruncMask); + Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, WidenVT, Result, + DAG.getVectorIdxConstant(0, DL))); + return; + } + break; } } diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll index 3802b9df6043d..b5950fd55606e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-trunc.ll @@ -41,14 +41,10 @@ define void @trunc_v4i64_to_v4i16(ptr %res, ptr %a) nounwind { ; LA32-LABEL: trunc_v4i64_to_v4i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 3 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0) +; LA32-NEXT: xvpermi.d $xr2, $xr0, 78 +; LA32-NEXT: xvshuf.h $xr1, $xr2, $xr0 ; LA32-NEXT: vpickve2gr.w $a1, $vr1, 1 ; LA32-NEXT: st.w $a1, $a0, 4 ; LA32-NEXT: vpickve2gr.w $a1, $vr1, 0 @@ -58,14 +54,10 @@ define void @trunc_v4i64_to_v4i16(ptr %res, ptr %a) nounwind { ; LA64-LABEL: trunc_v4i64_to_v4i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 2 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 3 +; LA64-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA64-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0) +; LA64-NEXT: xvpermi.d $xr2, $xr0, 78 +; LA64-NEXT: xvshuf.h $xr1, $xr2, $xr0 ; LA64-NEXT: vstelm.d $vr1, $a0, 0, 0 ; LA64-NEXT: ret entry: @@ -79,30 +71,22 @@ define void @trunc_v4i64_to_v4i8(ptr %res, ptr %a) nounwind { ; LA32-LABEL: trunc_v4i64_to_v4i8: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 3 -; LA32-NEXT: vpickve2gr.w $a1, $vr1, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI2_0) +; LA32-NEXT: xvpermi.d $xr2, $xr0, 78 +; LA32-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1 +; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0 ; LA32-NEXT: st.w $a1, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: trunc_v4i64_to_v4i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 2 -; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 3 -; LA64-NEXT: vstelm.w $vr1, $a0, 0, 0 +; LA64-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; LA64-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI2_0) +; LA64-NEXT: xvpermi.d $xr2, $xr0, 78 +; LA64-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1 +; LA64-NEXT: vstelm.w $vr0, $a0, 0, 0 ; LA64-NEXT: ret entry: %v = load <4 x i64>, ptr %a @@ -166,48 +150,24 @@ define void @trunc_v8i32_to_v8i8(ptr %res, ptr %a) nounwind { ; LA32-LABEL: trunc_v8i32_to_v8i8: ; LA32: # %bb.0: # %entry ; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 0 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 1 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 2 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 3 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 4 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 5 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 6 -; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; LA32-NEXT: vinsgr2vr.b $vr1, $a1, 7 -; LA32-NEXT: vpickve2gr.w $a1, $vr1, 1 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) +; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI4_0) +; LA32-NEXT: xvpermi.d $xr2, $xr0, 78 +; LA32-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1 +; LA32-NEXT: vpickve2gr.w $a1, $vr0, 1 ; LA32-NEXT: st.w $a1, $a0, 4 -; LA32-NEXT: vpickve2gr.w $a1, $vr1, 0 +; LA32-NEXT: vpickve2gr.w $a1, $vr0, 0 ; LA32-NEXT: st.w $a1, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: trunc_v8i32_to_v8i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 0 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 1 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 1 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 2 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 3 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 4 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 5 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 5 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 6 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 6 -; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 7 -; LA64-NEXT: vinsgr2vr.b $vr1, $a1, 7 -; LA64-NEXT: vstelm.d $vr1, $a0, 0, 0 +; LA64-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) +; LA64-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI4_0) +; LA64-NEXT: xvpermi.d $xr2, $xr0, 78 +; LA64-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1 +; LA64-NEXT: vstelm.d $vr0, $a0, 0, 0 ; LA64-NEXT: ret entry: %v = load <8 x i32>, ptr %a `````````` </details> https://github.com/llvm/llvm-project/pull/170093 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
