[llvm-branch-commits] [llvm] a6a72df - [AMDGPU][GlobalISel] Avoid selecting S_PACK with constants
Author: Mirko Brkusanin Date: 2021-01-20T11:54:53+01:00 New Revision: a6a72dfdf2e132d64ea73ddbbc0d3431b6483724 URL: https://github.com/llvm/llvm-project/commit/a6a72dfdf2e132d64ea73ddbbc0d3431b6483724 DIFF: https://github.com/llvm/llvm-project/commit/a6a72dfdf2e132d64ea73ddbbc0d3431b6483724.diff LOG: [AMDGPU][GlobalISel] Avoid selecting S_PACK with constants If constants are hidden behind G_ANYEXT we can treat them same way as G_SEXT. For that purpose we extend getConstantVRegValWithLookThrough with option to handle G_ANYEXT same way as G_SEXT. Differential Revision: https://reviews.llvm.org/D92219 Added: Modified: llvm/include/llvm/CodeGen/GlobalISel/Utils.h llvm/lib/CodeGen/GlobalISel/Utils.cpp llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll Removed: diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index ed75cde6f316..68553ab5b1a8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -146,10 +146,13 @@ struct ValueAndVReg { /// When \p LookThroughInstrs == false this function behaves like /// getConstantVRegVal. /// When \p HandleFConstants == false the function bails on G_FCONSTANTs. +/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as +/// G_SEXT. Optional getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs = true, - bool HandleFConstants = true); + bool HandleFConstants = true, + bool LookThroughAnyExt = false); const ConstantFP* getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 868385c2deff..cd2483224489 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -283,7 +283,7 @@ Optional llvm::getConstantVRegSExtVal(Register VReg, Optional llvm::getConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, -bool HandleFConstant) { +bool HandleFConstant, bool LookThroughAnyExt) { SmallVector, 4> SeenOpcodes; MachineInstr *MI; auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { @@ -310,6 +310,10 @@ Optional llvm::getConstantVRegValWithLookThrough( while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && LookThroughInstrs) { switch (MI->getOpcode()) { +case TargetOpcode::G_ANYEXT: + if (!LookThroughAnyExt) +return None; + LLVM_FALLTHROUGH; case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: @@ -343,6 +347,7 @@ Optional llvm::getConstantVRegValWithLookThrough( case TargetOpcode::G_TRUNC: Val = Val.trunc(OpcodeAndSize.second); break; +case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: Val = Val.sext(OpcodeAndSize.second); break; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index c6ae4a1a27eb..2fae3d69a700 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -597,9 +597,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock *BB = MI.getParent(); - auto ConstSrc1 = getConstantVRegValWithLookThrough(Src1, *MRI, true, true); + auto ConstSrc1 = + getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true); if (ConstSrc1) { -auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true); +auto ConstSrc0 = +getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true); if (ConstSrc0) { const int64_t K0 = ConstSrc0->Value.getSExtValue(); const int64_t K1 = ConstSrc1->Value.getSExtValue(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir index 056ea79a9898..ccd6fd71b3da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -579,10 +579,8 @@ body: | bb.0: ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant -; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 -; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 456 -; G
[llvm-branch-commits] [llvm] 0c7cce5 - [AMDGPU] Resolve issues when picking between ds_read/write and ds_read2/write2
Author: Mirko Brkusanin Date: 2020-12-10T12:40:49+01:00 New Revision: 0c7cce54eba3249489530040f41103dd8e0049f7 URL: https://github.com/llvm/llvm-project/commit/0c7cce54eba3249489530040f41103dd8e0049f7 DIFF: https://github.com/llvm/llvm-project/commit/0c7cce54eba3249489530040f41103dd8e0049f7.diff LOG: [AMDGPU] Resolve issues when picking between ds_read/write and ds_read2/write2 Both ds_read_b128 and ds_read2_b64 are valid for 128bit 16-byte aligned loads but the one that will be selected is determined either by the order in tablegen or by the AddedComplexity attribute. Currently ds_read_b128 has priority. While ds_read2_b64 has lower alignment requirements, we cannot always restrict ds_read_b128 to 16-byte alignment because of unaligned-access-mode option. This was causing ds_read_b128 to be selected for 8-byte aligned loads regardles of chosen access mode. To resolve this we use two patterns for selecting ds_read_b128. One requires alignment of 16-byte and the other requires unaligned-access-mode option. Same goes for ds_write2_b64 and ds_write_b128. Differential Revision: https://reviews.llvm.org/D92767 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/DSInstructions.td llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll llvm/test/CodeGen/AMDGPU/load-local.128.ll llvm/test/CodeGen/AMDGPU/store-local.128.ll Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index f27ee1975a7f..77063f370976 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1089,11 +1089,6 @@ def isGFX7GFX10 : "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">, AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>; -def isGFX7GFX8 : - Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" -"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS">, - AssemblerPredicate<(all_of FeatureSouthernIslands, FeatureCIInsts)>; - def isGFX7GFX8GFX9 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" @@ -1299,6 +1294,9 @@ def EnableFlatScratch : Predicate<"Subtarget->enableFlatScratch()">; def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">; +def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">, + AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 2e38619e2333..328c81005df4 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -680,7 +680,7 @@ foreach vt = VReg_64.RegTypes in { defm : DSReadPat_mc ; } -let SubtargetPredicate = isGFX7GFX8 in { +let SubtargetPredicate = isGFX7Plus in { foreach vt = VReg_96.RegTypes in { defm : DSReadPat_mc ; @@ -690,9 +690,7 @@ foreach vt = VReg_128.RegTypes in { defm : DSReadPat_mc ; } -} - -let SubtargetPredicate = isGFX9Plus in { +let SubtargetPredicate = HasUnalignedAccessMode in { foreach vt = VReg_96.RegTypes in { defm : DSReadPat_mc ; @@ -702,7 +700,9 @@ foreach vt = VReg_128.RegTypes in { defm : DSReadPat_mc ; } -} +} // End SubtargetPredicate = HasUnalignedAccessMode + +} // End SubtargetPredicate = isGFX7Plus } // End AddedComplexity = 100 @@ -835,7 +835,7 @@ foreach vt = VReg_64.RegTypes in { defm : DSWritePat_mc ; } -let SubtargetPredicate = isGFX7GFX8 in { +let SubtargetPredicate = isGFX7Plus in { foreach vt = VReg_96.RegTypes in { defm : DSWritePat_mc ; @@ -845,9 +845,7 @@ foreach vt = VReg_128.RegTypes in { defm : DSWritePat_mc ; } -} - -let SubtargetPredicate = isGFX9Plus in { +let SubtargetPredicate = HasUnalignedAccessMode in { foreach vt = VReg_96.RegTypes in { defm : DSWritePat_mc ; @@ -857,9 +855,12 @@ foreach vt = VReg_128.RegTypes in { defm : DSWritePat_mc ; } -} +} // End SubtargetPredicate = HasUnalignedAccessMode + +} // End SubtargetPredicate = isGFX7Plus } // End AddedComplexity = 100 + class DSAtomicRetPat : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir index e7c646ee73a7..71fc286dc75c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-loca