[llvm-branch-commits] [llvm] a6a72df - [AMDGPU][GlobalISel] Avoid selecting S_PACK with constants

2021-01-20 Thread Mirko Brkusanin via llvm-branch-commits

Author: Mirko Brkusanin
Date: 2021-01-20T11:54:53+01:00
New Revision: a6a72dfdf2e132d64ea73ddbbc0d3431b6483724

URL: 
https://github.com/llvm/llvm-project/commit/a6a72dfdf2e132d64ea73ddbbc0d3431b6483724
DIFF: 
https://github.com/llvm/llvm-project/commit/a6a72dfdf2e132d64ea73ddbbc0d3431b6483724.diff

LOG: [AMDGPU][GlobalISel] Avoid selecting S_PACK with constants

If constants are hidden behind G_ANYEXT we can treat them same way as G_SEXT.
For that purpose we extend getConstantVRegValWithLookThrough with option
to handle G_ANYEXT same way as G_SEXT.

Differential Revision: https://reviews.llvm.org/D92219

Added: 


Modified: 
llvm/include/llvm/CodeGen/GlobalISel/Utils.h
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h 
b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index ed75cde6f316..68553ab5b1a8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -146,10 +146,13 @@ struct ValueAndVReg {
 /// When \p LookThroughInstrs == false this function behaves like
 /// getConstantVRegVal.
 /// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
+/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as
+/// G_SEXT.
 Optional
 getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo 
&MRI,
   bool LookThroughInstrs = true,
-  bool HandleFConstants = true);
+  bool HandleFConstants = true,
+  bool LookThroughAnyExt = false);
 const ConstantFP* getConstantFPVRegVal(Register VReg,
const MachineRegisterInfo &MRI);
 

diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp 
b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 868385c2deff..cd2483224489 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -283,7 +283,7 @@ Optional llvm::getConstantVRegSExtVal(Register 
VReg,
 
 Optional llvm::getConstantVRegValWithLookThrough(
 Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
-bool HandleFConstant) {
+bool HandleFConstant, bool LookThroughAnyExt) {
   SmallVector, 4> SeenOpcodes;
   MachineInstr *MI;
   auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
@@ -310,6 +310,10 @@ Optional 
llvm::getConstantVRegValWithLookThrough(
   while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
  LookThroughInstrs) {
 switch (MI->getOpcode()) {
+case TargetOpcode::G_ANYEXT:
+  if (!LookThroughAnyExt)
+return None;
+  LLVM_FALLTHROUGH;
 case TargetOpcode::G_TRUNC:
 case TargetOpcode::G_SEXT:
 case TargetOpcode::G_ZEXT:
@@ -343,6 +347,7 @@ Optional 
llvm::getConstantVRegValWithLookThrough(
 case TargetOpcode::G_TRUNC:
   Val = Val.trunc(OpcodeAndSize.second);
   break;
+case TargetOpcode::G_ANYEXT:
 case TargetOpcode::G_SEXT:
   Val = Val.sext(OpcodeAndSize.second);
   break;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c6ae4a1a27eb..2fae3d69a700 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -597,9 +597,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
   const DebugLoc &DL = MI.getDebugLoc();
   MachineBasicBlock *BB = MI.getParent();
 
-  auto ConstSrc1 = getConstantVRegValWithLookThrough(Src1, *MRI, true, true);
+  auto ConstSrc1 =
+  getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true);
   if (ConstSrc1) {
-auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true);
+auto ConstSrc0 =
+getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true);
 if (ConstSrc0) {
   const int64_t K0 = ConstSrc0->Value.getSExtValue();
   const int64_t K1 = ConstSrc1->Value.getSExtValue();

diff  --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir
index 056ea79a9898..ccd6fd71b3da 100644
--- 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir
+++ 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir
@@ -579,10 +579,8 @@ body: |
   bb.0:
 
 ; GFX9-LABEL: name: 
test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant
-; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
-; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 456
-; G

[llvm-branch-commits] [llvm] 0c7cce5 - [AMDGPU] Resolve issues when picking between ds_read/write and ds_read2/write2

2020-12-10 Thread Mirko Brkusanin via llvm-branch-commits

Author: Mirko Brkusanin
Date: 2020-12-10T12:40:49+01:00
New Revision: 0c7cce54eba3249489530040f41103dd8e0049f7

URL: 
https://github.com/llvm/llvm-project/commit/0c7cce54eba3249489530040f41103dd8e0049f7
DIFF: 
https://github.com/llvm/llvm-project/commit/0c7cce54eba3249489530040f41103dd8e0049f7.diff

LOG: [AMDGPU] Resolve issues when picking between ds_read/write and 
ds_read2/write2

Both ds_read_b128 and ds_read2_b64 are valid for 128bit 16-byte aligned
loads but the one that will be selected is determined either by the order in
tablegen or by the AddedComplexity attribute. Currently ds_read_b128 has
priority.

While ds_read2_b64 has lower alignment requirements, we cannot always
restrict ds_read_b128 to 16-byte alignment because of unaligned-access-mode
option. This was causing ds_read_b128 to be selected for 8-byte aligned
loads regardles of chosen access mode.

To resolve this we use two patterns for selecting ds_read_b128. One
requires alignment of 16-byte and the other requires
unaligned-access-mode option.

Same goes for ds_write2_b64 and ds_write_b128.

Differential Revision: https://reviews.llvm.org/D92767

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
llvm/test/CodeGen/AMDGPU/load-local.128.ll
llvm/test/CodeGen/AMDGPU/store-local.128.ll

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.td 
b/llvm/lib/Target/AMDGPU/AMDGPU.td
index f27ee1975a7f..77063f370976 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1089,11 +1089,6 @@ def isGFX7GFX10 :
 "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
   AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>;
 
-def isGFX7GFX8 :
-  Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
-"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS">,
-  AssemblerPredicate<(all_of FeatureSouthernIslands, FeatureCIInsts)>;
-
 def isGFX7GFX8GFX9 :
   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS 
||"
@@ -1299,6 +1294,9 @@ def EnableFlatScratch : 
Predicate<"Subtarget->enableFlatScratch()">;
 
 def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">;
 
+def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">,
+  AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"

diff  --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 2e38619e2333..328c81005df4 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -680,7 +680,7 @@ foreach vt = VReg_64.RegTypes in {
 defm : DSReadPat_mc ;
 }
 
-let SubtargetPredicate = isGFX7GFX8 in {
+let SubtargetPredicate = isGFX7Plus in {
 
 foreach vt = VReg_96.RegTypes in {
 defm : DSReadPat_mc ;
@@ -690,9 +690,7 @@ foreach vt = VReg_128.RegTypes in {
 defm : DSReadPat_mc ;
 }
 
-}
-
-let SubtargetPredicate = isGFX9Plus in {
+let SubtargetPredicate = HasUnalignedAccessMode in {
 
 foreach vt = VReg_96.RegTypes in {
 defm : DSReadPat_mc ;
@@ -702,7 +700,9 @@ foreach vt = VReg_128.RegTypes in {
 defm : DSReadPat_mc ;
 }
 
-}
+} // End SubtargetPredicate = HasUnalignedAccessMode
+
+} // End SubtargetPredicate = isGFX7Plus
 
 } // End AddedComplexity = 100
 
@@ -835,7 +835,7 @@ foreach vt = VReg_64.RegTypes in {
 defm : DSWritePat_mc ;
 }
 
-let SubtargetPredicate = isGFX7GFX8 in {
+let SubtargetPredicate = isGFX7Plus in {
 
 foreach vt = VReg_96.RegTypes in {
 defm : DSWritePat_mc ;
@@ -845,9 +845,7 @@ foreach vt = VReg_128.RegTypes in {
 defm : DSWritePat_mc ;
 }
 
-}
-
-let SubtargetPredicate = isGFX9Plus in {
+let SubtargetPredicate = HasUnalignedAccessMode in {
 
 foreach vt = VReg_96.RegTypes in {
 defm : DSWritePat_mc ;
@@ -857,9 +855,12 @@ foreach vt = VReg_128.RegTypes in {
 defm : DSWritePat_mc ;
 }
 
-}
+} // End SubtargetPredicate = HasUnalignedAccessMode
+
+} // End SubtargetPredicate = isGFX7Plus
 
 } // End AddedComplexity = 100
+
 class DSAtomicRetPat : 
GCNPat <
   (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
   (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds))

diff  --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
index e7c646ee73a7..71fc286dc75c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-loca