Author: Jay Foad Date: 2021-03-06T09:00:01Z New Revision: 99682bc039dfec3e30e6e2b97b4b663f412e0d71
URL: https://github.com/llvm/llvm-project/commit/99682bc039dfec3e30e6e2b97b4b663f412e0d71 DIFF: https://github.com/llvm/llvm-project/commit/99682bc039dfec3e30e6e2b97b4b663f412e0d71.diff LOG: Revert "Revert "[AMDGPU] Restore the s_memtime instruction in gfx1030"" This reverts commit e58d68fcd06ddc7743e0419c0b364df3d44121b6. This reinstates commit fc28f600e558c1344618bda149a068d6162b6f0b with a fix to initialize HasShaderCyclesRegister. See https://reviews.llvm.org/D97928. Added: Modified: clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGenOpenCL/amdgpu-features.cl llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp llvm/lib/Target/AMDGPU/GCNSubtarget.h llvm/lib/Target/AMDGPU/SMInstructions.td llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll llvm/test/MC/AMDGPU/gfx1030_err.s Removed: clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl ################################################################################ diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 0f1211d6c409..a84422e412ff 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -192,6 +192,7 @@ bool AMDGPUTargetInfo::initFeatureMap( Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; break; case GK_GFX1012: case GK_GFX1011: diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index f387c93bd6e0..930c53705d84 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -58,9 +58,9 @@ // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" kernel void test() {} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl deleted file mode 100644 index 34149148a5f3..000000000000 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1030.cl +++ /dev/null @@ -1,7 +0,0 @@ -// REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1030 -verify -S -o - %s - -void test_gfx1030_s_memtime() -{ - __builtin_amdgcn_s_memtime(); // expected-error {{'__builtin_amdgcn_s_memtime' needs target feature s-memtime-inst}} -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 452d3bb6c9a9..e2e9c42dc985 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -563,6 +563,12 @@ def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst", "Has s_memtime instruction" >; +def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", + "HasShaderCyclesRegister", + "true", + "Has SHADER_CYCLES hardware register" +>; + def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", "HasMadMacF32Insts", "true", @@ -777,7 +783,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, - FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, + FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess ] >; @@ -988,7 +994,6 @@ def FeatureISAVersion10_1_0 : FeatureSet< FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureGetWaveIdInst, - FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, @@ -1009,7 +1014,6 @@ def FeatureISAVersion10_1_1 : FeatureSet< FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureGetWaveIdInst, - FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, @@ -1030,7 +1034,6 @@ def FeatureISAVersion10_1_2 : FeatureSet< FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureGetWaveIdInst, - FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, @@ -1047,7 +1050,8 @@ def FeatureISAVersion10_3_0 : FeatureSet< FeatureDot5Insts, FeatureDot6Insts, FeatureNSAEncoding, - FeatureWavefrontSize32]>; + FeatureWavefrontSize32, + FeatureShaderCyclesRegister]>; //===----------------------------------------------------------------------===// @@ -1377,7 +1381,8 @@ def HasSMemRealTime : Predicate<"Subtarget->hasSMemRealTime()">, def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, AssemblerPredicate<(all_of FeatureSMemTimeInst)>; -def HasNoSMemTimeInst : Predicate<"!Subtarget->hasSMemTimeInst()">; +def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, + AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index ec34237b201c..dfab7ccc6aae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -276,6 +276,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasVscnt(false), HasGetWaveIdInst(false), HasSMemTimeInst(false), + HasShaderCyclesRegister(false), HasRegisterBanking(false), HasVOP3Literal(false), HasNoDataDepHazard(false), diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index c2d3491c23f8..fc45f7ee11dc 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -163,6 +163,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasVscnt; bool HasGetWaveIdInst; bool HasSMemTimeInst; + bool HasShaderCyclesRegister; bool HasRegisterBanking; bool HasVOP3Literal; bool HasNoDataDepHazard; @@ -714,6 +715,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return HasSMemTimeInst; } + bool hasShaderCyclesRegister() const { + return HasShaderCyclesRegister; + } + bool hasRegisterBanking() const { return HasRegisterBanking; } diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 19afd72b3211..d2b5652a0db8 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -866,14 +866,16 @@ def : GCNPat < >; } // let OtherPredicates = [HasSMemTimeInst] -let OtherPredicates = [HasNoSMemTimeInst] in { +let OtherPredicates = [HasShaderCyclesRegister] in { def : GCNPat < (i64 (readcyclecounter)), (REG_SEQUENCE SReg_64, (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0, - (S_MOV_B32 (i32 0)), sub1) ->; -} // let OtherPredicates = [HasNoSMemTimeInst] + (S_MOV_B32 (i32 0)), sub1)> { + // Prefer this to s_memtime because it has lower and more predictable latency. + let AddedComplexity = 1; +} +} // let OtherPredicates = [HasShaderCyclesRegister] //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll index c9f0591a5dcc..cd2eada5b82e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefixes=SIVI,GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=SIVI,GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GFX1030-ERR %s +; RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.s.memtime() #0 @@ -13,7 +13,6 @@ declare i64 @llvm.amdgcn.s.memtime() #0 ; SIVI-NOT: lgkmcnt ; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}} ; GCN: {{buffer|global}}_store_dwordx2 -; GFX1030-ERR: ERROR define amdgpu_kernel void @test_s_memtime(i64 addrspace(1)* %out) #0 { %cycle0 = call i64 @llvm.amdgcn.s.memtime() store volatile i64 %cycle0, i64 addrspace(1)* %out diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index dbee18bd2d91..756983ef49b5 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -21,9 +21,6 @@ v_dot8c_i32_i4 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 s_get_waveid_in_workgroup s0 // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU -s_memtime s[0:1] -// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: specified hardware register is not supported on this GPU _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits