[llvm-branch-commits] [llvm] [AMDGPU] Move S_BFE lowering into RegBankCombiner (PR #141589)

Pierre van Houtryve via llvm-branch-commits Tue, 01 Jul 2025 02:56:29 -0700

https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/141589


>From d906a978145aabae8b2d1a029477d5a08272ae8c Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutr...@amd.com>
Date: Tue, 27 May 2025 11:16:16 +0200
Subject: [PATCH 1/3] [AMDGPU] Move S_BFE lowering into RegBankCombiner

---
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td       |  14 +-
 .../Target/AMDGPU/AMDGPURegBankCombiner.cpp   |  51 +++++++
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  | 125 ++++++++----------
 3 files changed, 119 insertions(+), 71 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td 
b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 9587fad1ecd63..94e1175b06b14 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -151,6 +151,17 @@ def zext_of_shift_amount_combines : GICombineGroup<[
   canonicalize_zext_lshr, canonicalize_zext_ashr, canonicalize_zext_shl
 ]>;
 
+// Early select of uniform BFX into S_BFE instructions.
+// These instructions encode the offset/width in a way that requires using
+// bitwise operations. Selecting these instructions early allow the combiner
+// to potentially fold these.
+class lower_uniform_bfx<Instruction bfx> : GICombineRule<
+  (defs root:$bfx),
+  (combine (bfx $dst, $src, $o, $w):$bfx, [{ return lowerUniformBFX(*${bfx}); 
}])>;
+
+def lower_uniform_sbfx : lower_uniform_bfx<G_SBFX>;
+def lower_uniform_ubfx : lower_uniform_bfx<G_UBFX>;
+
 let Predicates = [Has16BitInsts, NotHasMed3_16] in {
 // For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
 // saves one instruction compared to the promotion.
@@ -198,5 +209,6 @@ def AMDGPURegBankCombiner : GICombiner<
    zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
    fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
    identity_combines, redundant_and, constant_fold_cast_op,
-   cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines]> {
+   cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
+   lower_uniform_sbfx, lower_uniform_ubfx]> {
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index ee324a5e93f0f..2100900bb8eb2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -89,6 +89,8 @@ class AMDGPURegBankCombinerImpl : public Combiner {
 
   void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) 
const;
 
+  bool lowerUniformBFX(MachineInstr &MI) const;
+
 private:
   SIModeRegisterDefaults getMode() const;
   bool getIEEE() const;
@@ -392,6 +394,55 @@ void 
AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
   MI.eraseFromParent();
 }
 
+bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const {
+  assert(MI.getOpcode() == TargetOpcode::G_UBFX ||
+         MI.getOpcode() == TargetOpcode::G_SBFX);
+  const bool Signed = (MI.getOpcode() == TargetOpcode::G_SBFX);
+
+  Register DstReg = MI.getOperand(0).getReg();
+  const RegisterBank *RB = RBI.getRegBank(DstReg, MRI, TRI);
+  assert(RB && "No RB?");
+  if (RB->getID() != AMDGPU::SGPRRegBankID)
+    return false;
+
+  Register SrcReg = MI.getOperand(1).getReg();
+  Register OffsetReg = MI.getOperand(2).getReg();
+  Register WidthReg = MI.getOperand(3).getReg();
+
+  const LLT S32 = LLT::scalar(32);
+  LLT Ty = MRI.getType(DstReg);
+
+  const unsigned Opc = (Ty == S32)
+                           ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
+                           : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
+
+  // Ensure the high bits are clear to insert the offset.
+  auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
+  auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
+
+  // Zeros out the low bits, so don't bother clamping the input value.
+  auto ShiftAmt = B.buildConstant(S32, 16);
+  auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt);
+
+  // Transformation function, pack the offset and width of a BFE into
+  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+  // source, bits [5:0] contain the offset and bits [22:16] the width.
+  auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);
+
+  MRI.setRegBank(OffsetMask.getReg(0), *RB);
+  MRI.setRegBank(ClampOffset.getReg(0), *RB);
+  MRI.setRegBank(ShiftAmt.getReg(0), *RB);
+  MRI.setRegBank(ShiftWidth.getReg(0), *RB);
+  MRI.setRegBank(MergedInputs.getReg(0), *RB);
+
+  auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
+  if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
+    llvm_unreachable("failed to constrain BFE");
+
+  MI.eraseFromParent();
+  return true;
+}
+
 SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
   return MF.getInfo<SIMachineFunctionInfo>()->getMode();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 6874657a4ffe7..140c2babb013f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1492,88 +1492,73 @@ bool 
AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
   Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg();
 
   const RegisterBank *DstBank =
-    OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
-  if (DstBank == &AMDGPU::VGPRRegBank) {
-    if (Ty == S32)
-      return true;
-
-    // There is no 64-bit vgpr bitfield extract instructions so the operation
-    // is expanded to a sequence of instructions that implement the operation.
-    ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);
+      OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
 
-    const LLT S64 = LLT::scalar(64);
-    // Shift the source operand so that extracted bits start at bit 0.
-    auto ShiftOffset = Signed ? B.buildAShr(S64, SrcReg, OffsetReg)
-                              : B.buildLShr(S64, SrcReg, OffsetReg);
-    auto UnmergeSOffset = B.buildUnmerge({S32, S32}, ShiftOffset);
-
-    // A 64-bit bitfield extract uses the 32-bit bitfield extract instructions
-    // if the width is a constant.
-    if (auto ConstWidth = getIConstantVRegValWithLookThrough(WidthReg, MRI)) {
-      // Use the 32-bit bitfield extract instruction if the width is a 
constant.
-      // Depending on the width size, use either the low or high 32-bits.
-      auto Zero = B.buildConstant(S32, 0);
-      auto WidthImm = ConstWidth->Value.getZExtValue();
-      if (WidthImm <= 32) {
-        // Use bitfield extract on the lower 32-bit source, and then 
sign-extend
-        // or clear the upper 32-bits.
-        auto Extract =
-            Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
-                   : B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, 
WidthReg);
-        auto Extend =
-            Signed ? B.buildAShr(S32, Extract, B.buildConstant(S32, 31)) : 
Zero;
-        B.buildMergeLikeInstr(DstReg, {Extract, Extend});
-      } else {
-        // Use bitfield extract on upper 32-bit source, and combine with lower
-        // 32-bit source.
-        auto UpperWidth = B.buildConstant(S32, WidthImm - 32);
-        auto Extract =
-            Signed
-                ? B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
-                : B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
-        B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
-      }
-      MI.eraseFromParent();
+  if (DstBank != &AMDGPU::VGPRRegBank) {
+    // SGPR: Canonicalize to a G_S/UBFX
+    if (!isa<GIntrinsic>(MI))
       return true;
-    }
 
-    // Expand to Src >> Offset << (64 - Width) >> (64 - Width) using 64-bit
-    // operations.
-    auto ExtShift = B.buildSub(S32, B.buildConstant(S32, 64), WidthReg);
-    auto SignBit = B.buildShl(S64, ShiftOffset, ExtShift);
+    ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
     if (Signed)
-      B.buildAShr(S64, SignBit, ExtShift);
+      B.buildSbfx(DstReg, SrcReg, OffsetReg, WidthReg);
     else
-      B.buildLShr(S64, SignBit, ExtShift);
+      B.buildUbfx(DstReg, SrcReg, OffsetReg, WidthReg);
     MI.eraseFromParent();
     return true;
   }
 
-  // The scalar form packs the offset and width in a single operand.
-
-  ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::SGPRRegBank);
-
-  // Ensure the high bits are clear to insert the offset.
-  auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
-  auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
-
-  // Zeros out the low bits, so don't bother clamping the input value.
-  auto ShiftWidth = B.buildShl(S32, WidthReg, B.buildConstant(S32, 16));
-
-  // Transformation function, pack the offset and width of a BFE into
-  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
-  // source, bits [5:0] contain the offset and bits [22:16] the width.
-  auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);
+  // VGPR
+  if (Ty == S32)
+    return true;
 
-  // TODO: It might be worth using a pseudo here to avoid scc clobber and
-  // register class constraints.
-  unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
-                             (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
+  // There is no 64-bit vgpr bitfield extract instructions so the operation
+  // is expanded to a sequence of instructions that implement the operation.
+  ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank);
 
-  auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
-  if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
-    llvm_unreachable("failed to constrain BFE");
+  const LLT S64 = LLT::scalar(64);
+  // Shift the source operand so that extracted bits start at bit 0.
+  auto ShiftOffset = Signed ? B.buildAShr(S64, SrcReg, OffsetReg)
+                            : B.buildLShr(S64, SrcReg, OffsetReg);
+  auto UnmergeSOffset = B.buildUnmerge({S32, S32}, ShiftOffset);
+
+  // A 64-bit bitfield extract uses the 32-bit bitfield extract instructions
+  // if the width is a constant.
+  if (auto ConstWidth = getIConstantVRegValWithLookThrough(WidthReg, MRI)) {
+    // Use the 32-bit bitfield extract instruction if the width is a constant.
+    // Depending on the width size, use either the low or high 32-bits.
+    auto Zero = B.buildConstant(S32, 0);
+    auto WidthImm = ConstWidth->Value.getZExtValue();
+    if (WidthImm <= 32) {
+      // Use bitfield extract on the lower 32-bit source, and then sign-extend
+      // or clear the upper 32-bits.
+      auto Extract =
+          Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
+                 : B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
+      auto Extend =
+          Signed ? B.buildAShr(S32, Extract, B.buildConstant(S32, 31)) : Zero;
+      B.buildMergeLikeInstr(DstReg, {Extract, Extend});
+    } else {
+      // Use bitfield extract on upper 32-bit source, and combine with lower
+      // 32-bit source.
+      auto UpperWidth = B.buildConstant(S32, WidthImm - 32);
+      auto Extract =
+          Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
+                 : B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, 
UpperWidth);
+      B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
+    }
+    MI.eraseFromParent();
+    return true;
+  }
 
+  // Expand to Src >> Offset << (64 - Width) >> (64 - Width) using 64-bit
+  // operations.
+  auto ExtShift = B.buildSub(S32, B.buildConstant(S32, 64), WidthReg);
+  auto SignBit = B.buildShl(S64, ShiftOffset, ExtShift);
+  if (Signed)
+    B.buildAShr(S64, SignBit, ExtShift);
+  else
+    B.buildLShr(S64, SignBit, ExtShift);
   MI.eraseFromParent();
   return true;
 }

>From 1fadb4e1cb00432ff55b3180bd37756419bd2ab2 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutr...@amd.com>
Date: Wed, 28 May 2025 10:37:09 +0200
Subject: [PATCH 2/3] style change

---
 llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 2100900bb8eb2..a364887fa69d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -412,7 +412,7 @@ bool 
AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const {
   const LLT S32 = LLT::scalar(32);
   LLT Ty = MRI.getType(DstReg);
 
-  const unsigned Opc = (Ty == S32)
+  const unsigned Opc = Ty == S32
                            ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
                            : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
 

>From 9927a43fca7371b02cbdfb0d17b3a9498258b8ba Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutr...@amd.com>
Date: Tue, 1 Jul 2025 11:46:19 +0200
Subject: [PATCH 3/3] comments

---
 .../Target/AMDGPU/AMDGPURegBankCombiner.cpp   |   7 +-
 .../GlobalISel/regbankcombiner-lower-bfx.mir  | 107 ++++++++++++++++++
 2 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index a364887fa69d5..8d0c1b673ee40 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -416,6 +416,10 @@ bool 
AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const {
                            ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
                            : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
 
+  // Pack the offset and width of a BFE into
+  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+  // source, bits [5:0] contain the offset and bits [22:16] the width.
+
   // Ensure the high bits are clear to insert the offset.
   auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
   auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
@@ -424,9 +428,6 @@ bool 
AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const {
   auto ShiftAmt = B.buildConstant(S32, 16);
   auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt);
 
-  // Transformation function, pack the offset and width of a BFE into
-  // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
-  // source, bits [5:0] contain the offset and bits [22:16] the width.
   auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);
 
   MRI.setRegBank(OffsetMask.getReg(0), *RB);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir
new file mode 100644
index 0000000000000..3b7cb4158a897
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-lower-bfx.mir
@@ -0,0 +1,107 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 
-run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_s_bfe_i32__constants
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  bb.1:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_s_bfe_i32__constants
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %reg:sreg_32(s32) = COPY $sgpr0
+    ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
+    ; CHECK-NEXT: %bfx:sreg_32(s32) = S_BFE_I32 %reg(s32), [[OR]](s32), 
implicit-def $scc
+    ; CHECK-NEXT: $sgpr0 = COPY %bfx(s32)
+    %reg:sgpr(s32) = COPY $sgpr0
+    %width:sgpr(s32) = G_CONSTANT i32 5
+    %offset:sgpr(s32) = G_CONSTANT i32 7
+    %bfx:sgpr(s32) = G_SBFX %reg, %offset, %width
+    $sgpr0 = COPY %bfx
+...
+---
+name: test_s_bfe_u32__constants
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  bb.1:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_s_bfe_u32__constants
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %reg:sreg_32(s32) = COPY $sgpr0
+    ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
+    ; CHECK-NEXT: %bfx:sreg_32(s32) = S_BFE_U32 %reg(s32), [[OR]](s32), 
implicit-def $scc
+    ; CHECK-NEXT: $sgpr0 = COPY %bfx(s32)
+    %reg:sgpr(s32) = COPY $sgpr0
+    %width:sgpr(s32) = G_CONSTANT i32 5
+    %offset:sgpr(s32) = G_CONSTANT i32 7
+    %bfx:sgpr(s32) = G_UBFX %reg, %offset, %width
+    $sgpr0 = COPY %bfx
+...
+---
+name: test_s_bfe_i64__constants
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  bb.1:
+    liveins: $sgpr0_sgpr1
+
+    ; CHECK-LABEL: name: test_s_bfe_i64__constants
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %reg:sreg_64(s64) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
+    ; CHECK-NEXT: %bfx:sreg_64(s64) = S_BFE_I64 %reg(s64), [[OR]](s32), 
implicit-def $scc
+    ; CHECK-NEXT: $sgpr0_sgpr1 = COPY %bfx(s64)
+    %reg:sgpr(s64) = COPY $sgpr0_sgpr1
+    %width:sgpr(s32) = G_CONSTANT i32 5
+    %offset:sgpr(s32) = G_CONSTANT i32 7
+    %bfx:sgpr(s64) = G_SBFX %reg, %offset, %width
+    $sgpr0_sgpr1 = COPY %bfx
+...
+---
+name: test_s_bfe_u64__constants
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+  bb.1:
+    liveins: $sgpr0_sgpr1
+
+    ; CHECK-LABEL: name: test_s_bfe_u64__constants
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %reg:sreg_64(s64) = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
+    ; CHECK-NEXT: %bfx:sreg_64(s64) = S_BFE_U64 %reg(s64), [[OR]](s32), 
implicit-def $scc
+    ; CHECK-NEXT: $sgpr0_sgpr1 = COPY %bfx(s64)
+    %reg:sgpr(s64) = COPY $sgpr0_sgpr1
+    %width:sgpr(s32) = G_CONSTANT i32 5
+    %offset:sgpr(s32) = G_CONSTANT i32 7
+    %bfx:sgpr(s64) = G_UBFX %reg, %offset, %width
+    $sgpr0_sgpr1 = COPY %bfx
+...

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Move S_BFE lowering into RegBankCombiner (PR #141589)

Reply via email to