[llvm-branch-commits] [llvm] edaf6a0 - [AMDGPU][GISel] Combine G_INSERT_VECTOR_ELT to G_SHUFFLE_VECTOR

2022-10-19 Thread Pierre van Houtryve via llvm-branch-commits

Author: Pierre van Houtryve
Date: 2022-10-19T10:16:08Z
New Revision: edaf6a07a4aafd963ea958703890d03ab58ff2dd

URL: 
https://github.com/llvm/llvm-project/commit/edaf6a07a4aafd963ea958703890d03ab58ff2dd
DIFF: 
https://github.com/llvm/llvm-project/commit/edaf6a07a4aafd963ea958703890d03ab58ff2dd.diff

LOG: [AMDGPU][GISel] Combine G_INSERT_VECTOR_ELT to G_SHUFFLE_VECTOR

Depends on D134967

Differential Revision: https://reviews.llvm.org/D135145

Added: 

llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-insertvecelt-to-shufflevector.mir

Modified: 
llvm/lib/Target/AMDGPU/AMDGPUCombine.td
llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td 
b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 2415fdfecaae2..8b2ff164d3365 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -45,6 +45,12 @@ def cvt_f32_ubyteN : GICombineRule<
  [{ return PostLegalizerHelper.matchCvtF32UByteN(*${cvt_f32_ubyteN}, 
${matchinfo}); }]),
   (apply [{ PostLegalizerHelper.applyCvtF32UByteN(*${cvt_f32_ubyteN}, 
${matchinfo}); }])>;
 
+def insert_vec_elt_to_shuffle : GICombineRule<
+  (defs root:$insertelt, unsigned_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_INSERT_VECTOR_ELT):$insertelt,
+  [{ return 
PreLegalizerHelper.matchInsertVectorEltToShuffle(*${insertelt}, ${matchinfo}); 
}]),
+  (apply [{ PreLegalizerHelper.applyInsertVectorEltToShuffle(*${insertelt}, 
${matchinfo}); }])>;
+
 def clamp_i64_to_i16_matchdata : 
GIDefMatchData<"AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo">;
 
 def clamp_i64_to_i16 : GICombineRule<
@@ -109,7 +115,7 @@ def gfx6gfx7_combines : 
GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
 
 def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
   "AMDGPUGenPreLegalizerCombinerHelper",
-  [all_combines, clamp_i64_to_i16, foldable_fneg]> {
+  [all_combines, clamp_i64_to_i16, foldable_fneg, insert_vec_elt_to_shuffle]> {
   let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
   let StateClass = "AMDGPUPreLegalizerCombinerHelperState";
   let AdditionalArguments = [];

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 6d6c69adaa658..08eefc6da4d31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -55,6 +55,9 @@ class AMDGPUPreLegalizerCombinerHelper {
 
   void applyClampI64ToI16(MachineInstr &MI,
   const ClampI64ToI16MatchInfo &MatchInfo);
+
+  bool matchInsertVectorEltToShuffle(MachineInstr &MI, unsigned &Idx);
+  void applyInsertVectorEltToShuffle(MachineInstr &MI, unsigned &Idx);
 };
 
 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16(
@@ -154,6 +157,73 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
   MI.eraseFromParent();
 }
 
+bool AMDGPUPreLegalizerCombinerHelper::matchInsertVectorEltToShuffle(
+MachineInstr &MI, unsigned &Idx) {
+  // Transfroms a G_INSERT_VECTOR_ELT into an equivalent G_SHUFFLE_MASK if:
+  //- Scalar Pack insts are present (for <32 bits element types)
+  //- The vector has <= 4 elements.
+  // as this is a preferred canonical form of the operation.
+  //
+  // Note that both restrictions are arbitrary. Currently, it's mostly targeted
+  // towards 2x16 vectors. Restrictions could be relaxed or entirely removed in
+  // the future if codegen can handle it without causing regressions.
+
+  LLT VecTy = MRI.getType(MI.getOperand(0).getReg());
+  const unsigned EltSize = VecTy.getElementType().getSizeInBits();
+  if (EltSize < 32 &&
+  !MI.getMF()->getSubtarget().hasScalarPackInsts())
+return false;
+
+  if (VecTy.isScalable() || VecTy.getNumElements() > 4)
+return false;
+
+  Optional MaybeIdxVal =
+  getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+  if (!MaybeIdxVal)
+return false;
+
+  Idx = MaybeIdxVal->Value.getZExtValue();
+  return true;
+}
+
+void AMDGPUPreLegalizerCombinerHelper::applyInsertVectorEltToShuffle(
+MachineInstr &MI, unsigned &Idx) {
+  B.setInstrAndDebugLoc(MI);
+
+  Register Ins = MI.getOperand(2).getReg();
+  Register Vec = MI.getOperand(1).getReg();
+  Register Dst = MI.getOperand(0).getReg();
+
+  LLT VecTy = MRI.getType(Dst);
+  LLT EltTy = VecTy.getElementType();
+  const unsigned NumElts = VecTy.getNumElements();
+
+  const auto Undef = MRI.createGenericVirtualRegister(EltTy);
+  B.buildUndef(Undef);
+
+  const auto OtherVec = MRI.createGenericVirtualRegister(VecTy);
+
+  SmallVector Srcs;
+  Srcs.push_back(Ins);
+  for (unsigned K = 1; K < NumElts; ++K)
+Srcs.push_back(Undef);
+
+  B.buildBuildVector(OtherVec, Srcs);
+
+  // NumElts == Ins in OtherVec
+  // 0...(NumElts-1) = Original elements
+  SmallVector ShuffleMask;
+  for (unsig

[llvm-branch-commits] [llvm] 007ef6f - [AMDGPU][GISel] Constrain selected operands in selectG_BUILD_VECTOR

2022-10-19 Thread Pierre van Houtryve via llvm-branch-commits

Author: Pierre van Houtryve
Date: 2022-10-19T10:16:08Z
New Revision: 007ef6fa4d89f7e60a82af8c7cc004a6204fd72b

URL: 
https://github.com/llvm/llvm-project/commit/007ef6fa4d89f7e60a82af8c7cc004a6204fd72b
DIFF: 
https://github.com/llvm/llvm-project/commit/007ef6fa4d89f7e60a82af8c7cc004a6204fd72b.diff

LOG: [AMDGPU][GISel] Constrain selected operands in selectG_BUILD_VECTOR

Small bugfix. Currently harmless but a case in D134354 triggers it.

Differential Revision: https://reviews.llvm.org/D136235

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 7f41e8593692..0a6896693510 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -686,13 +686,19 @@ bool 
AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
   // TODO: Can be improved?
   if (IsVector) {
 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
-.addImm(0x)
-.addReg(Src0);
-BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
-.addReg(Src1)
-.addImm(16)
-.addReg(TmpReg);
+auto MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
+   .addImm(0x)
+   .addReg(Src0);
+if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
+  return false;
+
+MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
+  .addReg(Src1)
+  .addImm(16)
+  .addReg(TmpReg);
+if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
+  return false;
+
 MI.eraseFromParent();
 return true;
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits