https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/127051
Previously this would give up on folding subregister copies through a reg_sequence if the input operand already had a subregister index. d246cc618adc52fdbd69d44a2a375c8af97b6106 stopped introducing these subregister uses, and this is the first step to lifting that restriction. I was expecting to be able to implement this only purely with compose / reverse compose, but I wasn't able to make it work so relies on testing the lanemasks for whether the copy reads a subset of the input. >From a009eec4bc6159c26a909cbbb19debdc6206ffe7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Tue, 4 Feb 2025 17:05:35 +0700 Subject: [PATCH] PeepholeOpt: Handle subregister compose when looking through reg_sequence Previously this would give up on folding subregister copies through a reg_sequence if the input operand already had a subregister index. d246cc618adc52fdbd69d44a2a375c8af97b6106 stopped introducing these subregister uses, and this is the first step to lifting that restriction. I was expecting to be able to implement this only purely with compose / reverse compose, but I wasn't able to make it work so relies on testing the lanemasks for whether the copy reads a subset of the input. --- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 33 +++++++++- .../AMDGPU/GlobalISel/extractelement.ll | 10 ++-- ...e92561-restore-undef-scc-verifier-error.ll | 60 ++++++++++--------- .../peephole-opt-fold-reg-sequence-subreg.mir | 52 ++++++++-------- 4 files changed, 94 insertions(+), 61 deletions(-) diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 745c0d4b36a62..24bd9938bc45c 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -1984,12 +1984,43 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { // We are looking at: // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... - // Check if one of the operand defines the subreg we are interested in. + // + // Check if one of the operands exactly defines the subreg we are interested + // in. for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) { if (RegSeqInput.SubIdx == DefSubReg) return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); } + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + + // If we did not find an exact match, see if we can do a composition to + // extract a sub-subregister. + for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) { + // We don't check if the resulting class supports the subregister index + // yet. This will occur before any rewrite when looking for an eligible + // source. + + LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg); + LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx); + + // Check that this extract reads a subset of this single reg_sequence input. + // + // FIXME: We should be able to filter this in terms of the indexes directly + // without checking the lanemasks. + if ((DefMask & ThisOpRegMask) != DefMask) + continue; + + unsigned ReverseDefCompose = + TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg); + if (!ReverseDefCompose) + continue; + + unsigned ComposedDefInSrcReg1 = + TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose); + return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1); + } + // If the subreg we are tracking is super-defined by another subreg, // we could follow this value. However, this would require to compose // the subreg and we do not do that for now. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index f2a4332bcb8ba..c136028f2de43 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2872,8 +2872,8 @@ define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { ; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v14 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: dyn_extract_v7f64_v_v: @@ -2898,8 +2898,8 @@ define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v1, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: dyn_extract_v7f64_v_v: @@ -2918,7 +2918,7 @@ define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 -; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 +; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v0 :: v_dual_cndmask_b32 v1, v1, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <7 x double> %vec, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll b/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll index 3eb9d474ec030..f961e857f39e5 100644 --- a/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll +++ b/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll @@ -79,9 +79,9 @@ define void @issue92561(ptr addrspace(1) %arg) { ; GISEL: ; %bb.0: ; %bb ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_clause 0x1 -; GISEL-NEXT: global_load_b128 v[2:5], v[0:1], off -; GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:16 -; GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16 +; GISEL-NEXT: v_mov_b32_e32 v8, 0 ; GISEL-NEXT: s_mov_b32 s20, 0 ; GISEL-NEXT: s_mov_b32 s3, exec_lo ; GISEL-NEXT: s_mov_b32 s21, s20 @@ -97,19 +97,19 @@ define void @issue92561(ptr addrspace(1) %arg) { ; GISEL-NEXT: s_mov_b32 s11, s20 ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s12, v2 -; GISEL-NEXT: v_readfirstlane_b32 s13, v3 -; GISEL-NEXT: v_readfirstlane_b32 s14, v4 -; GISEL-NEXT: v_readfirstlane_b32 s15, v5 -; GISEL-NEXT: v_readfirstlane_b32 s16, v6 -; GISEL-NEXT: v_readfirstlane_b32 s17, v7 -; GISEL-NEXT: v_readfirstlane_b32 s18, v8 -; GISEL-NEXT: v_readfirstlane_b32 s19, v9 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[12:13], v[2:3] -; GISEL-NEXT: v_cmp_eq_u64_e64 s0, s[14:15], v[4:5] -; GISEL-NEXT: v_cmp_eq_u64_e64 s1, s[16:17], v[6:7] +; GISEL-NEXT: v_readfirstlane_b32 s12, v4 +; GISEL-NEXT: v_readfirstlane_b32 s13, v5 +; GISEL-NEXT: v_readfirstlane_b32 s14, v6 +; GISEL-NEXT: v_readfirstlane_b32 s15, v7 +; GISEL-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL-NEXT: v_readfirstlane_b32 s18, v2 +; GISEL-NEXT: v_readfirstlane_b32 s19, v3 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[12:13], v[4:5] +; GISEL-NEXT: v_cmp_eq_u64_e64 s0, s[14:15], v[6:7] +; GISEL-NEXT: v_cmp_eq_u64_e64 s1, s[16:17], v[0:1] ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GISEL-NEXT: v_cmp_eq_u64_e64 s2, s[18:19], v[8:9] +; GISEL-NEXT: v_cmp_eq_u64_e64 s2, s[18:19], v[2:3] ; GISEL-NEXT: s_and_b32 s0, vcc_lo, s0 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) ; GISEL-NEXT: s_and_b32 s0, s0, s1 @@ -117,29 +117,31 @@ define void @issue92561(ptr addrspace(1) %arg) { ; GISEL-NEXT: s_and_b32 s0, s0, s2 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-NEXT: s_and_saveexec_b32 s0, s0 -; GISEL-NEXT: image_sample_c_lz v1, [v0, v0, v0, v0], s[12:19], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY -; GISEL-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 -; GISEL-NEXT: ; implicit-def: $vgpr0 +; GISEL-NEXT: image_sample_c_lz v9, [v8, v8, v8, v8], s[12:19], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 +; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GISEL-NEXT: ; implicit-def: $vgpr8 ; GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GISEL-NEXT: s_cbranch_execnz .LBB0_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b32 exec_lo, s3 -; GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1.0 -; GISEL-NEXT: v_mov_b32_e32 v0, 0x7fc00000 +; GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7fc00000 +; GISEL-NEXT: v_mov_b32_e32 v2, 1.0 ; GISEL-NEXT: s_clause 0x2 -; GISEL-NEXT: image_sample_c_lz v0, [v2, v2, v0, v2], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY -; GISEL-NEXT: image_sample_c_lz v3, [v2, v3, v2, v2], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY -; GISEL-NEXT: image_sample_c_lz v4, [v2, v2, v2, v2], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GISEL-NEXT: image_sample_c_lz v0, [v1, v1, v0, v1], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GISEL-NEXT: image_sample_c_lz v2, [v1, v2, v1, v1], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GISEL-NEXT: image_sample_c_lz v3, [v1, v1, v1, v1], s[4:11], s[20:23] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; GISEL-NEXT: s_waitcnt vmcnt(2) -; GISEL-NEXT: v_add_f32_e32 v0, v1, v0 +; GISEL-NEXT: v_add_f32_e32 v0, v9, v0 ; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GISEL-NEXT: v_dual_add_f32 v0, v3, v0 :: v_dual_mov_b32 v3, v2 +; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GISEL-NEXT: v_add_f32_e32 v0, v2, v0 +; GISEL-NEXT: v_mov_b32_e32 v2, v1 ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_add_f32_e32 v0, v4, v0 +; GISEL-NEXT: v_add_f32_e32 v0, v3, v0 ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL-NEXT: v_mul_f32_e32 v1, 0x3e800000, v0 -; GISEL-NEXT: image_store v[1:3], [v2, v2], s[4:11] dim:SQ_RSRC_IMG_2D unorm +; GISEL-NEXT: v_mul_f32_e32 v0, 0x3e800000, v0 +; GISEL-NEXT: image_store v[0:2], [v1, v1], s[4:11] dim:SQ_RSRC_IMG_2D unorm ; GISEL-NEXT: s_setpc_b64 s[30:31] bb: %descriptor = load <8 x i32>, ptr addrspace(1) %arg, align 32 diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir index d4a20c1074a95..ec492dc02949c 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir @@ -162,7 +162,7 @@ body: | ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY4]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 @@ -189,7 +189,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub0, [[COPY1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -212,7 +212,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub0, [[COPY1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -285,7 +285,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[V_MOV_B32_e32_]], %subreg.sub2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]] %0:vreg_64 = COPY $vgpr1_vgpr2 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -311,8 +311,8 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY]].sub0, %subreg.sub2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 @@ -340,7 +340,7 @@ body: | ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY]].sub0, %subreg.sub2, [[V_MOV_B32_e32_1]], %subreg.sub3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -367,8 +367,8 @@ body: | ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY]].sub0, %subreg.sub2, [[V_MOV_B32_e32_1]], %subreg.sub3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -420,7 +420,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 @@ -444,7 +444,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 @@ -468,7 +468,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 @@ -492,7 +492,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 @@ -516,7 +516,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY1]].sub0_sub1, %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 @@ -540,7 +540,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]].sub0_sub1, %subreg.sub2_sub3, [[COPY]].sub2_sub3, %subreg.sub0_sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 @@ -564,7 +564,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]].sub0_sub1, %subreg.sub2_sub3, [[COPY]].sub2_sub3, %subreg.sub0_sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_128 = COPY $vgpr5_vgpr6_vgpr7_vgpr8 @@ -588,7 +588,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr5_vgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub1_sub2_sub3, %subreg.sub0_sub1_sub2, [[COPY1]].sub1, %subreg.sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_64 = COPY $vgpr5_vgpr6 @@ -615,9 +615,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr5_vgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub1_sub2_sub3, %subreg.sub0_sub1_sub2, [[COPY1]].sub1, %subreg.sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 @@ -650,12 +650,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr5_vgpr6 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_192 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub4_sub5, [[COPY]].sub1_sub2, %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub2 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub3 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub4 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub2 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub3 ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]] %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4 %1:vreg_64 = COPY $vgpr5_vgpr6 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits