https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/141589
>From d906a978145aabae8b2d1a029477d5a08272ae8c Mon Sep 17 00:00:00 2001 From: pvanhout <pierre.vanhoutr...@amd.com> Date: Tue, 27 May 2025 11:16:16 +0200 Subject: [PATCH 1/3] [AMDGPU] Move S_BFE lowering into RegBankCombiner --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 14 +- .../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 51 +++++++ .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 125 ++++++++---------- 3 files changed, 119 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 9587fad1ecd63..94e1175b06b14 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -151,6 +151,17 @@ def zext_of_shift_amount_combines : GICombineGroup<[ canonicalize_zext_lshr, canonicalize_zext_ashr, canonicalize_zext_shl ]>; +// Early select of uniform BFX into S_BFE instructions. +// These instructions encode the offset/width in a way that requires using +// bitwise operations. Selecting these instructions early allow the combiner +// to potentially fold these. +class lower_uniform_bfx<Instruction bfx> : GICombineRule< + (defs root:$bfx), + (combine (bfx $dst, $src, $o, $w):$bfx, [{ return lowerUniformBFX(*${bfx}); }])>; + +def lower_uniform_sbfx : lower_uniform_bfx<G_SBFX>; +def lower_uniform_ubfx : lower_uniform_bfx<G_UBFX>; + let Predicates = [Has16BitInsts, NotHasMed3_16] in { // For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This // saves one instruction compared to the promotion. @@ -198,5 +209,6 @@ def AMDGPURegBankCombiner : GICombiner< zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp, identity_combines, redundant_and, constant_fold_cast_op, - cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines]> { + cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines, + lower_uniform_sbfx, lower_uniform_ubfx]> { } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index ee324a5e93f0f..2100900bb8eb2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -89,6 +89,8 @@ class AMDGPURegBankCombinerImpl : public Combiner { void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const; + bool lowerUniformBFX(MachineInstr &MI) const; + private: SIModeRegisterDefaults getMode() const; bool getIEEE() const; @@ -392,6 +394,55 @@ void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt( MI.eraseFromParent(); } +bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const { + assert(MI.getOpcode() == TargetOpcode::G_UBFX || + MI.getOpcode() == TargetOpcode::G_SBFX); + const bool Signed = (MI.getOpcode() == TargetOpcode::G_SBFX); + + Register DstReg = MI.getOperand(0).getReg(); + const RegisterBank *RB = RBI.getRegBank(DstReg, MRI, TRI); + assert(RB && "No RB?"); + if (RB->getID() != AMDGPU::SGPRRegBankID) + return false; + + Register SrcReg = MI.getOperand(1).getReg(); + Register OffsetReg = MI.getOperand(2).getReg(); + Register WidthReg = MI.getOperand(3).getReg(); + + const LLT S32 = LLT::scalar(32); + LLT Ty = MRI.getType(DstReg); + + const unsigned Opc = (Ty == S32) + ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) + : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); + + // Ensure the high bits are clear to insert the offset. + auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6)); + auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask); + + // Zeros out the low bits, so don't bother clamping the input value. + auto ShiftAmt = B.buildConstant(S32, 16); + auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt); + + // Transformation function, pack the offset and width of a BFE into + // the format expected by the S_BFE_I32 / S_BFE_U32. In the second + // source, bits [5:0] contain the offset and bits [22:16] the width. + auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth); + + MRI.setRegBank(OffsetMask.getReg(0), *RB); + MRI.setRegBank(ClampOffset.getReg(0), *RB); + MRI.setRegBank(ShiftAmt.getReg(0), *RB); + MRI.setRegBank(ShiftWidth.getReg(0), *RB); + MRI.setRegBank(MergedInputs.getReg(0), *RB); + + auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs}); + if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI)) + llvm_unreachable("failed to constrain BFE"); + + MI.eraseFromParent(); + return true; +} + SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const { return MF.getInfo<SIMachineFunctionInfo>()->getMode(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 6874657a4ffe7..140c2babb013f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1492,88 +1492,73 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B, Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg(); const RegisterBank *DstBank = - OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; - if (DstBank == &AMDGPU::VGPRRegBank) { - if (Ty == S32) - return true; - - // There is no 64-bit vgpr bitfield extract instructions so the operation - // is expanded to a sequence of instructions that implement the operation. - ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank); + OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; - const LLT S64 = LLT::scalar(64); - // Shift the source operand so that extracted bits start at bit 0. - auto ShiftOffset = Signed ? B.buildAShr(S64, SrcReg, OffsetReg) - : B.buildLShr(S64, SrcReg, OffsetReg); - auto UnmergeSOffset = B.buildUnmerge({S32, S32}, ShiftOffset); - - // A 64-bit bitfield extract uses the 32-bit bitfield extract instructions - // if the width is a constant. - if (auto ConstWidth = getIConstantVRegValWithLookThrough(WidthReg, MRI)) { - // Use the 32-bit bitfield extract instruction if the width is a constant. - // Depending on the width size, use either the low or high 32-bits. - auto Zero = B.buildConstant(S32, 0); - auto WidthImm = ConstWidth->Value.getZExtValue(); - if (WidthImm <= 32) { - // Use bitfield extract on the lower 32-bit source, and then sign-extend - // or clear the upper 32-bits. - auto Extract = - Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg) - : B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg); - auto Extend = - Signed ? B.buildAShr(S32, Extract, B.buildConstant(S32, 31)) : Zero; - B.buildMergeLikeInstr(DstReg, {Extract, Extend}); - } else { - // Use bitfield extract on upper 32-bit source, and combine with lower - // 32-bit source. - auto UpperWidth = B.buildConstant(S32, WidthImm - 32); - auto Extract = - Signed - ? B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth) - : B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth); - B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract}); - } - MI.eraseFromParent(); + if (DstBank != &AMDGPU::VGPRRegBank) { + // SGPR: Canonicalize to a G_S/UBFX + if (!isa<GIntrinsic>(MI)) return true; - } - // Expand to Src >> Offset << (64 - Width) >> (64 - Width) using 64-bit - // operations. - auto ExtShift = B.buildSub(S32, B.buildConstant(S32, 64), WidthReg); - auto SignBit = B.buildShl(S64, ShiftOffset, ExtShift); + ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank); if (Signed) - B.buildAShr(S64, SignBit, ExtShift); + B.buildSbfx(DstReg, SrcReg, OffsetReg, WidthReg); else - B.buildLShr(S64, SignBit, ExtShift); + B.buildUbfx(DstReg, SrcReg, OffsetReg, WidthReg); MI.eraseFromParent(); return true; } - // The scalar form packs the offset and width in a single operand. - - ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank); - - // Ensure the high bits are clear to insert the offset. - auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6)); - auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask); - - // Zeros out the low bits, so don't bother clamping the input value. - auto ShiftWidth = B.buildShl(S32, WidthReg, B.buildConstant(S32, 16)); - - // Transformation function, pack the offset and width of a BFE into - // the format expected by the S_BFE_I32 / S_BFE_U32. In the second - // source, bits [5:0] contain the offset and bits [22:16] the width. - auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth); + // VGPR + if (Ty == S32) + return true; - // TODO: It might be worth using a pseudo here to avoid scc clobber and - // register class constraints. - unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) : - (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); + // There is no 64-bit vgpr bitfield extract instructions so the operation + // is expanded to a sequence of instructions that implement the operation. + ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank); - auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs}); - if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this)) - llvm_unreachable("failed to constrain BFE"); + const LLT S64 = LLT::scalar(64); + // Shift the source operand so that extracted bits start at bit 0. + auto ShiftOffset = Signed ? B.buildAShr(S64, SrcReg, OffsetReg) + : B.buildLShr(S64, SrcReg, OffsetReg); + auto UnmergeSOffset = B.buildUnmerge({S32, S32}, ShiftOffset); + + // A 64-bit bitfield extract uses the 32-bit bitfield extract instructions + // if the width is a constant. + if (auto ConstWidth = getIConstantVRegValWithLookThrough(WidthReg, MRI)) { + // Use the 32-bit bitfield extract instruction if the width is a constant. + // Depending on the width size, use either the low or high 32-bits. + auto Zero = B.buildConstant(S32, 0); + auto WidthImm = ConstWidth->Value.getZExtValue(); + if (WidthImm <= 32) { + // Use bitfield extract on the lower 32-bit source, and then sign-extend + // or clear the upper 32-bits. + auto Extract = + Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg) + : B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg); + auto Extend = + Signed ? B.buildAShr(S32, Extract, B.buildConstant(S32, 31)) : Zero; + B.buildMergeLikeInstr(DstReg, {Extract, Extend}); + } else { + // Use bitfield extract on upper 32-bit source, and combine with lower + // 32-bit source. + auto UpperWidth = B.buildConstant(S32, WidthImm - 32); + auto Extract = + Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth) + : B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth); + B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract}); + } + MI.eraseFromParent(); + return true; + } + // Expand to Src >> Offset << (64 - Width) >> (64 - Width) using 64-bit + // operations. + auto ExtShift = B.buildSub(S32, B.buildConstant(S32, 64), WidthReg); + auto SignBit = B.buildShl(S64, ShiftOffset, ExtShift); + if (Signed) + B.buildAShr(S64, SignBit, ExtShift); + else + B.buildLShr(S64, SignBit, ExtShift); MI.eraseFromParent(); return true; } >From 1fadb4e1cb00432ff55b3180bd37756419bd2ab2 Mon Sep 17 00:00:00 2001 From: pvanhout <pierre.vanhoutr...@amd.com> Date: Wed, 28 May 2025 10:37:09 +0200 Subject: [PATCH 2/3] style change --- llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index 2100900bb8eb2..a364887fa69d5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -412,7 +412,7 @@ bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const { const LLT S32 = LLT::scalar(32); LLT Ty = MRI.getType(DstReg); - const unsigned Opc = (Ty == S32) + const unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); >From 9927a43fca7371b02cbdfb0d17b3a9498258b8ba Mon Sep 17 00:00:00 2001 From: pvanhout <pierre.vanhoutr...@amd.com> Date: Tue, 1 Jul 2025 11:46:19 +0200 Subject: [PATCH 3/3] comments --- .../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 7 +- .../GlobalISel/regbankcombiner-lower-bfx.mir | 107 ++++++++++++++++++ 2 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index a364887fa69d5..8d0c1b673ee40 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -416,6 +416,10 @@ bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const { ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); + // Pack the offset and width of a BFE into + // the format expected by the S_BFE_I32 / S_BFE_U32. In the second + // source, bits [5:0] contain the offset and bits [22:16] the width. + // Ensure the high bits are clear to insert the offset. auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6)); auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask); @@ -424,9 +428,6 @@ bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const { auto ShiftAmt = B.buildConstant(S32, 16); auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt); - // Transformation function, pack the offset and width of a BFE into - // the format expected by the S_BFE_I32 / S_BFE_U32. In the second - // source, bits [5:0] contain the offset and bits [22:16] the width. auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth); MRI.setRegBank(OffsetMask.getReg(0), *RB); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir new file mode 100644 index 0000000000000..3b7cb4158a897 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir @@ -0,0 +1,107 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_s_bfe_i32__constants +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $sgpr0 + + ; CHECK-LABEL: name: test_s_bfe_i32__constants + ; CHECK: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:sreg_32(s32) = COPY $sgpr0 + ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]] + ; CHECK-NEXT: %bfx:sreg_32(s32) = S_BFE_I32 %reg(s32), [[OR]](s32), implicit-def $scc + ; CHECK-NEXT: $sgpr0 = COPY %bfx(s32) + %reg:sgpr(s32) = COPY $sgpr0 + %width:sgpr(s32) = G_CONSTANT i32 5 + %offset:sgpr(s32) = G_CONSTANT i32 7 + %bfx:sgpr(s32) = G_SBFX %reg, %offset, %width + $sgpr0 = COPY %bfx +... +--- +name: test_s_bfe_u32__constants +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $sgpr0 + + ; CHECK-LABEL: name: test_s_bfe_u32__constants + ; CHECK: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:sreg_32(s32) = COPY $sgpr0 + ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]] + ; CHECK-NEXT: %bfx:sreg_32(s32) = S_BFE_U32 %reg(s32), [[OR]](s32), implicit-def $scc + ; CHECK-NEXT: $sgpr0 = COPY %bfx(s32) + %reg:sgpr(s32) = COPY $sgpr0 + %width:sgpr(s32) = G_CONSTANT i32 5 + %offset:sgpr(s32) = G_CONSTANT i32 7 + %bfx:sgpr(s32) = G_UBFX %reg, %offset, %width + $sgpr0 = COPY %bfx +... +--- +name: test_s_bfe_i64__constants +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: test_s_bfe_i64__constants + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]] + ; CHECK-NEXT: %bfx:sreg_64(s64) = S_BFE_I64 %reg(s64), [[OR]](s32), implicit-def $scc + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY %bfx(s64) + %reg:sgpr(s64) = COPY $sgpr0_sgpr1 + %width:sgpr(s32) = G_CONSTANT i32 5 + %offset:sgpr(s32) = G_CONSTANT i32 7 + %bfx:sgpr(s64) = G_SBFX %reg, %offset, %width + $sgpr0_sgpr1 = COPY %bfx +... +--- +name: test_s_bfe_u64__constants +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: test_s_bfe_u64__constants + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]] + ; CHECK-NEXT: %bfx:sreg_64(s64) = S_BFE_U64 %reg(s64), [[OR]](s32), implicit-def $scc + ; CHECK-NEXT: $sgpr0_sgpr1 = COPY %bfx(s64) + %reg:sgpr(s64) = COPY $sgpr0_sgpr1 + %width:sgpr(s32) = G_CONSTANT i32 5 + %offset:sgpr(s32) = G_CONSTANT i32 7 + %bfx:sgpr(s64) = G_UBFX %reg, %offset, %width + $sgpr0_sgpr1 = COPY %bfx +... _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits