https://github.com/dzhidzhoev updated https://github.com/llvm/llvm-project/pull/65263:
>From a88ee53623a3ff4774bc7a5df13379e55a0f9945 Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev <vdzhidzh...@accesssoftek.com> Date: Mon, 4 Sep 2023 12:35:48 +0200 Subject: [PATCH 1/2] [GlobalISel][AArch64] Combine unmerge(G_EXT v, undef) to unmerge(v). When having <N x t> d1, unused = unmerge(G_EXT <2*N x t> v1, undef, N), it is possible to express it just as unused, d1 = unmerge v1. It is useful for tackling regressions in arm64-vcvt_f.ll, introduced in https://reviews.llvm.org/D144670. --- llvm/lib/Target/AArch64/AArch64Combine.td | 11 ++- .../GISel/AArch64PostLegalizerLowering.cpp | 47 ++++++++++++ .../AArch64/arm64-neon-add-pairwise.ll | 6 +- llvm/test/CodeGen/AArch64/arm64-vabs.ll | 76 +++++++------------ llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll | 17 ++--- 5 files changed, 93 insertions(+), 64 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 6bcb014d22035e..699310d0627dba 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -206,6 +206,14 @@ def vector_sext_inreg_to_shift : GICombineRule< (apply [{ applyVectorSextInReg(*${d}, MRI, B, Observer); }]) >; +def unmerge_ext_to_unmerge_matchdata : GIDefMatchData<"Register">; +def unmerge_ext_to_unmerge : GICombineRule< + (defs root:$d, unmerge_ext_to_unmerge_matchdata:$matchinfo), + (match (wip_match_opcode G_UNMERGE_VALUES):$d, + [{ return matchUnmergeExtToUnmerge(*${d}, MRI, ${matchinfo}); }]), + (apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -214,7 +222,8 @@ def AArch64PostLegalizerLowering [shuffle_vector_lowering, vashr_vlshr_imm, icmp_lowering, build_vector_lowering, lower_vector_fcmp, form_truncstore, - vector_sext_inreg_to_shift]> { + vector_sext_inreg_to_shift, + unmerge_ext_to_unmerge]> { } // Post-legalization combines which are primarily optimizations. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 57117ea143d7e9..e9386d77b2559f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -1066,6 +1066,53 @@ void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI, Helper.lower(MI, 0, /* Unused hint type */ LLT()); } +/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N) +/// => unused, <N x t> = unmerge v +bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + Register &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); + if (MI.getNumDefs() != 2) + return false; + if (!MRI.use_nodbg_empty(MI.getOperand(1).getReg())) + return false; + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (!DstTy.isVector()) + return false; + + MachineInstr *Ext = getDefIgnoringCopies( + MI.getOperand(MI.getNumExplicitDefs()).getReg(), MRI); + if (!Ext || Ext->getOpcode() != AArch64::G_EXT) + return false; + + Register ExtSrc1 = Ext->getOperand(1).getReg(); + Register ExtSrc2 = Ext->getOperand(2).getReg(); + auto LowestVal = + getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI); + if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes()) + return false; + + MachineInstr *Undef = getDefIgnoringCopies(ExtSrc2, MRI); + if (!Undef) + return false; + + MatchInfo = ExtSrc1; + + return Undef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; +} + +void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + GISelChangeObserver &Observer, Register &SrcReg) { + Observer.changingInstr(MI); + // Swap dst registers. + Register Dst1 = MI.getOperand(0).getReg(); + MI.getOperand(0).setReg(MI.getOperand(1).getReg()); + MI.getOperand(1).setReg(Dst1); + MI.getOperand(2).setReg(SrcReg); + Observer.changedInstr(MI); +} + class AArch64PostLegalizerLoweringImpl : public Combiner { protected: // TODO: Make CombinerHelper methods const. diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll index aa048eea302c97..17fb312c63754d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll @@ -137,7 +137,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-LABEL: addp_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: rev64 v1.2s, v0.2s ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s @@ -164,7 +164,7 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-GI-LABEL: addp_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: addp v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: ret %1 = add <8 x i16> %a, %b @@ -185,7 +185,7 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-GI-LABEL: addp_v16i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: addp v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: ret %1 = add <16 x i8> %a, %b diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index 18efdc86e16aa8..cff60bdf44ca48 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -71,9 +71,7 @@ define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-GI-NEXT: sabdl.8h v0, v0, v1 +; CHECK-GI-NEXT: sabdl2.8h v0, v0, v1 ; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -96,9 +94,7 @@ define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-GI-NEXT: sabdl.4s v0, v0, v1 +; CHECK-GI-NEXT: sabdl2.4s v0, v0, v1 ; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B @@ -121,9 +117,7 @@ define <2 x i64> @sabdl2_2d(ptr %A, ptr %B) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-GI-NEXT: sabdl.2d v0, v0, v1 +; CHECK-GI-NEXT: sabdl2.2d v0, v0, v1 ; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B @@ -188,9 +182,7 @@ define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-GI-NEXT: uabdl.8h v0, v0, v1 +; CHECK-GI-NEXT: uabdl2.8h v0, v0, v1 ; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -214,9 +206,7 @@ define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-GI-NEXT: uabdl.4s v0, v0, v1 +; CHECK-GI-NEXT: uabdl2.4s v0, v0, v1 ; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B @@ -239,9 +229,7 @@ define <2 x i64> @uabdl2_2d(ptr %A, ptr %B) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr q0, [x0] ; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-GI-NEXT: uabdl.2d v0, v0, v1 +; CHECK-GI-NEXT: uabdl2.2d v0, v0, v1 ; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B @@ -1132,12 +1120,10 @@ define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { ; ; CHECK-GI-LABEL: sabal2_8h: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] ; CHECK-GI-NEXT: ldr q0, [x2] -; CHECK-GI-NEXT: sabal.8h v0, v2, v1 +; CHECK-GI-NEXT: sabal2.8h v0, v1, v2 ; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -1161,12 +1147,10 @@ define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { ; ; CHECK-GI-LABEL: sabal2_4s: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] ; CHECK-GI-NEXT: ldr q0, [x2] -; CHECK-GI-NEXT: sabal.4s v0, v2, v1 +; CHECK-GI-NEXT: sabal2.4s v0, v1, v2 ; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B @@ -1190,12 +1174,10 @@ define <2 x i64> @sabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { ; ; CHECK-GI-LABEL: sabal2_2d: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] ; CHECK-GI-NEXT: ldr q0, [x2] -; CHECK-GI-NEXT: sabal.2d v0, v2, v1 +; CHECK-GI-NEXT: sabal2.2d v0, v1, v2 ; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B @@ -1270,12 +1252,10 @@ define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { ; ; CHECK-GI-LABEL: uabal2_8h: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] ; CHECK-GI-NEXT: ldr q0, [x2] -; CHECK-GI-NEXT: uabal.8h v0, v2, v1 +; CHECK-GI-NEXT: uabal2.8h v0, v1, v2 ; CHECK-GI-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -1299,12 +1279,10 @@ define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { ; ; CHECK-GI-LABEL: uabal2_4s: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] ; CHECK-GI-NEXT: ldr q0, [x2] -; CHECK-GI-NEXT: uabal.4s v0, v2, v1 +; CHECK-GI-NEXT: uabal2.4s v0, v1, v2 ; CHECK-GI-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B @@ -1328,12 +1306,10 @@ define <2 x i64> @uabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { ; ; CHECK-GI-LABEL: uabal2_2d: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q1, [x1] -; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8 -; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8 +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] ; CHECK-GI-NEXT: ldr q0, [x2] -; CHECK-GI-NEXT: uabal.2d v0, v2, v1 +; CHECK-GI-NEXT: uabal2.2d v0, v1, v2 ; CHECK-GI-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B @@ -1607,7 +1583,7 @@ define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { ; CHECK-GI-LABEL: uabdl2_from_extract_dup: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: dup.2s v1, w0 -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 +; CHECK-GI-NEXT: mov d0, v0[1] ; CHECK-GI-NEXT: uabdl.2d v0, v0, v1 ; CHECK-GI-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 @@ -1642,7 +1618,7 @@ define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { ; CHECK-GI-LABEL: sabdl2_from_extract_dup: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: dup.2s v1, w0 -; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8 +; CHECK-GI-NEXT: mov d0, v0[1] ; CHECK-GI-NEXT: sabdl.2d v0, v0, v1 ; CHECK-GI-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index f40baef48e719b..e94aac3b59c69a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -31,8 +31,7 @@ define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ss ; ; GISEL-LABEL: test_vcvt_high_f64_f32: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.2d, v0.2s +; GISEL-NEXT: fcvtl2 v0.2d, v0.4s ; GISEL-NEXT: ret %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3> %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double> @@ -80,8 +79,7 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea ; ; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.2d, v0.2s +; GISEL-NEXT: fcvtl2 v0.2d, v0.4s ; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %bc2 = bitcast <2 x i32> %ext to <2 x float> @@ -97,7 +95,7 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea ; ; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: mov d0, v0[1] ; GISEL-NEXT: fcvtl v0.2d, v0.2s ; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -114,7 +112,7 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read ; ; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: mov d0, v0[1] ; GISEL-NEXT: fcvtl v0.2d, v0.2s ; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> @@ -147,7 +145,7 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read ; ; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: mov d0, v0[1] ; GISEL-NEXT: fcvtl v0.4s, v0.4h ; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> @@ -164,8 +162,7 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read ; ; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 -; GISEL-NEXT: fcvtl v0.4s, v0.4h +; GISEL-NEXT: fcvtl2 v0.4s, v0.8h ; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %bc2 = bitcast <4 x i16> %ext to <4 x half> @@ -181,7 +178,7 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn ; ; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast: ; GISEL: // %bb.0: -; GISEL-NEXT: ext.16b v0, v0, v0, #8 +; GISEL-NEXT: mov d0, v0[1] ; GISEL-NEXT: fcvtl v0.4s, v0.4h ; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> >From b8e671f178ea5e8f30917bd289fcf1312668474d Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev <vdzhidzh...@accesssoftek.com> Date: Tue, 5 Sep 2023 13:56:54 +0200 Subject: [PATCH 2/2] Refactored, added MIR test. --- .../GISel/AArch64PostLegalizerLowering.cpp | 18 +- .../postlegalizer-lowering-unmerge-ext.mir | 154 ++++++++++++++++++ 2 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-unmerge-ext.mir diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index e9386d77b2559f..d2a99b65cc6aab 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -1071,18 +1071,18 @@ void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI, bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); - if (MI.getNumDefs() != 2) + auto &Unmerge = cast<GUnmerge>(MI); + if (Unmerge.getNumDefs() != 2) return false; - if (!MRI.use_nodbg_empty(MI.getOperand(1).getReg())) + if (!MRI.use_nodbg_empty(Unmerge.getOperand(1).getReg())) return false; - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT DstTy = MRI.getType(Unmerge.getOperand(0).getReg()); if (!DstTy.isVector()) return false; - MachineInstr *Ext = getDefIgnoringCopies( - MI.getOperand(MI.getNumExplicitDefs()).getReg(), MRI); - if (!Ext || Ext->getOpcode() != AArch64::G_EXT) + MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI); + if (!Ext) return false; Register ExtSrc1 = Ext->getOperand(1).getReg(); @@ -1092,13 +1092,11 @@ bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes()) return false; - MachineInstr *Undef = getDefIgnoringCopies(ExtSrc2, MRI); - if (!Undef) + if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI)) return false; MatchInfo = ExtSrc1; - - return Undef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; + return true; } void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-unmerge-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-unmerge-ext.mir new file mode 100644 index 00000000000000..2452c3083cc86e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-unmerge-ext.mir @@ -0,0 +1,154 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: v4s32 +legalized: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: v4s32 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: %unused:_(<2 x s32>), %unmerge:_(<2 x s32>) = G_UNMERGE_VALUES %v1(<4 x s32>) + ; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %unmerge(<2 x s32>) + ; CHECK-NEXT: $q0 = COPY %fpext(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<4 x s32>) = COPY $q0 + %implicit:_(<4 x s32>) = G_IMPLICIT_DEF + %C:_(s32) = G_CONSTANT i32 8 + %ext:_(<4 x s32>) = G_EXT %v1:_, %implicit:_, %C:_(s32) + %unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext:_(<4 x s32>) + %fpext:_(<2 x s64>) = G_FPEXT %unmerge:_(<2 x s32>) + $q0 = COPY %fpext + RET_ReallyLR implicit $q0 +... +--- +name: v8s16 +legalized: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: v8s16 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: %unused:_(<4 x s16>), %unmerge:_(<4 x s16>) = G_UNMERGE_VALUES %v1(<8 x s16>) + ; CHECK-NEXT: %fpext:_(<4 x s32>) = G_FPEXT %unmerge(<4 x s16>) + ; CHECK-NEXT: $q0 = COPY %fpext(<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<8 x s16>) = COPY $q0 + %implicit:_(<8 x s16>) = G_IMPLICIT_DEF + %C:_(s32) = G_CONSTANT i32 8 + %ext:_(<8 x s16>) = G_EXT %v1:_, %implicit:_, %C:_(s32) + %unmerge:_(<4 x s16>), %unused:_(<4 x s16>) = G_UNMERGE_VALUES %ext:_(<8 x s16>) + %fpext:_(<4 x s32>) = G_FPEXT %unmerge:_(<4 x s16>) + $q0 = COPY %fpext + RET_ReallyLR implicit $q0 +... +--- +name: v16s8 +legalized: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: v16s8 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: %unused:_(<8 x s8>), %unmerge:_(<8 x s8>) = G_UNMERGE_VALUES %v1(<16 x s8>) + ; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>) + ; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %implicit:_(<16 x s8>) = G_IMPLICIT_DEF + %C:_(s32) = G_CONSTANT i32 8 + %ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32) + %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>) + %fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>) + $q0 = COPY %fpext + RET_ReallyLR implicit $q0 +... +--- +name: skip_not_const +legalized: true +body: | + bb.0.entry: + liveins: $q0, $w0 + ; CHECK-LABEL: name: skip_not_const + ; CHECK: liveins: $q0, $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: %implicit:_(<16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %C:_(s32) = COPY $w0 + ; CHECK-NEXT: %ext:_(<16 x s8>) = G_EXT %v1, %implicit, %C(s32) + ; CHECK-NEXT: %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext(<16 x s8>) + ; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>) + ; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %implicit:_(<16 x s8>) = G_IMPLICIT_DEF + %C:_(s32) = COPY $w0 + %ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32) + %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>) + %fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>) + $q0 = COPY %fpext + RET_ReallyLR implicit $q0 +... +--- +name: skip_not_unused +legalized: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: skip_not_unused + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: %implicit:_(<16 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %C:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: %ext:_(<16 x s8>) = G_EXT %v1, %implicit, %C(s32) + ; CHECK-NEXT: %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext(<16 x s8>) + ; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>) + ; CHECK-NEXT: %fpext2:_(<8 x s16>) = G_FPEXT %unused(<8 x s8>) + ; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>) + ; CHECK-NEXT: $q1 = COPY %fpext2(<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %v1:_(<16 x s8>) = COPY $q0 + %implicit:_(<16 x s8>) = G_IMPLICIT_DEF + %C:_(s32) = G_CONSTANT i32 8 + %ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32) + %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>) + %fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>) + %fpext2:_(<8 x s16>) = G_FPEXT %unused:_(<8 x s8>) + $q0 = COPY %fpext + $q1 = COPY %fpext2 + RET_ReallyLR implicit $q0, implicit $q1 +... +--- +name: skip_borders +legalized: true +body: | + bb.0.entry: + liveins: $q0 + ; CHECK-LABEL: name: skip_borders + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %v1:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: %implicit:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: %C:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: %ext:_(<4 x s32>) = G_EXT %v1, %implicit, %C(s32) + ; CHECK-NEXT: %unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext(<4 x s32>) + ; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %unmerge(<2 x s32>) + ; CHECK-NEXT: $q0 = COPY %fpext(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %v1:_(<4 x s32>) = COPY $q0 + %implicit:_(<4 x s32>) = G_IMPLICIT_DEF + %C:_(s32) = G_CONSTANT i32 9 + %ext:_(<4 x s32>) = G_EXT %v1:_, %implicit:_, %C:_(s32) + %unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext:_(<4 x s32>) + %fpext:_(<2 x s64>) = G_FPEXT %unmerge:_(<2 x s32>) + $q0 = COPY %fpext + RET_ReallyLR implicit $q0 +... _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits