https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/80102
>From b64f7ba4afc6cbb3e5e34757e6979a0d5ee73e2b Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe <sameer.sahasrabud...@amd.com> Date: Tue, 30 Jan 2024 11:26:53 +0530 Subject: [PATCH] [AMDGPU] Every convergent operation needs post-isel processing --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 3 +++ llvm/lib/Target/AMDGPU/DSInstructions.td | 7 ++++--- llvm/lib/Target/AMDGPU/SIInstructions.td | 17 ++++++++++++++--- llvm/lib/Target/AMDGPU/SOPInstructions.td | 15 +++++++++++++++ llvm/lib/Target/AMDGPU/VOP1Instructions.td | 1 + llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 2 +- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 12 ++++++++++++ llvm/lib/Target/AMDGPU/VOPInstructions.td | 2 ++ 9 files changed, 53 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 360aafedc5224..3645381919217 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -46,6 +46,9 @@ class AMDGPUInst <dag outs, dag ins, string asm = "", let TSFlags{63} = isRegisterLoad; let TSFlags{62} = isRegisterStore; + + assert !if(!eq(isConvergent, 1), !eq(hasPostISelHook, 1), true), + !strconcat(NAME, ": Every convergent operation needs post-isel processing."); } class AMDGPUShaderInst <dag outs, dag ins, string asm = "", diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 0888fb84a22fa..2e4fcdb2945ce 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -432,6 +432,7 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag, let mayLoad = 0; let mayStore = 0; let isConvergent = 1; + let hasPostISelHook = 1; let has_data1 = 0; let has_gds = 0; @@ -582,7 +583,7 @@ defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>; defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>; defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>; -let isConvergent = 1, usesCustomInserter = 1 in { +let isConvergent = 1, hasPostISelHook = 1, usesCustomInserter = 1 in { def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> { let mayLoad = 0; } @@ -627,7 +628,7 @@ def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">; def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">; } // End SubtargetPredicate = HasDsSrc2Insts -let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in { +let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1, hasPostISelHook = 1 in { def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>; } @@ -677,7 +678,7 @@ let SubtargetPredicate = isGFX7Plus in { defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>; defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>; -let isConvergent = 1, usesCustomInserter = 1 in { +let isConvergent = 1, hasPostISelHook = 1, usesCustomInserter = 1 in { def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 788e3162fb37e..66ce001a0d766 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -177,6 +177,7 @@ def WWM_COPY : SPseudoInstSI < let hasSideEffects = 0; let isAsCheapAsAMove = 1; let isConvergent = 1; + let hasPostISelHook = 1; } def ENTER_STRICT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> { @@ -246,7 +247,7 @@ def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), // Invert the exec mask and overwrite the inactive lanes of dst with inactive, // restoring it after we're done. -let Defs = [SCC], isConvergent = 1 in { +let Defs = [SCC], isConvergent = 1, hasPostISelHook = 1 in { def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst), (ins VSrc_b32: $src, VSrc_b32:$inactive), [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> { @@ -369,6 +370,7 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins), let mayLoad = 0; let mayStore = 0; let isConvergent = 1; + let hasPostISelHook = 1; let FixedSize = 1; let Size = 0; let isMeta = 1; @@ -382,6 +384,7 @@ def SCHED_BARRIER : SPseudoInstSI<(outs), (ins i32imm:$mask), let mayLoad = 0; let mayStore = 0; let isConvergent = 1; + let hasPostISelHook = 1; let FixedSize = 1; let Size = 0; let isMeta = 1; @@ -397,6 +400,7 @@ def SCHED_GROUP_BARRIER : SPseudoInstSI< let mayLoad = 0; let mayStore = 0; let isConvergent = 1; + let hasPostISelHook = 1; let FixedSize = 1; let Size = 0; let isMeta = 1; @@ -410,6 +414,7 @@ def IGLP_OPT : SPseudoInstSI<(outs), (ins i32imm:$mask), let mayLoad = 0; let mayStore = 0; let isConvergent = 1; + let hasPostISelHook = 1; let FixedSize = 1; let Size = 0; let isMeta = 1; @@ -509,6 +514,7 @@ multiclass PseudoInstKill <dag ins> { let Defs = [EXEC,SCC] in def _PSEUDO : PseudoInstSI <(outs), ins> { let isConvergent = 1; + let hasPostISelHook = 1; let usesCustomInserter = 1; } @@ -626,6 +632,7 @@ def SI_CALL_ISEL : SPseudoInstSI < let usesCustomInserter = 1; // TODO: Should really base this on the call target let isConvergent = 1; + let hasPostISelHook = 1; } def : GCNPat< @@ -644,6 +651,7 @@ def SI_CALL : SPseudoInstSI < let SchedRW = [WriteBranch]; // TODO: Should really base this on the call target let isConvergent = 1; + let hasPostISelHook = 1; } class SI_TCRETURN_Pseudo<RegisterClass rc, SDNode sd> : SPseudoInstSI <(outs), @@ -659,6 +667,7 @@ class SI_TCRETURN_Pseudo<RegisterClass rc, SDNode sd> : SPseudoInstSI <(outs), let SchedRW = [WriteBranch]; // TODO: Should really base this on the call target let isConvergent = 1; + let hasPostISelHook = 1; } // Tail call handling pseudo @@ -693,6 +702,7 @@ class SI_CS_CHAIN_TC< let UseNamedOperandTable = 1; let SchedRW = [WriteBranch]; let isConvergent = 1; + let hasPostISelHook = 1; let WaveSizePredicate = wavesizepred; } @@ -931,7 +941,7 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>; defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>; defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>; -let SGPRSpill = 1, VALU = 1, isConvergent = 1 in { +let SGPRSpill = 1, VALU = 1, isConvergent = 1, hasPostISelHook = 1 in { def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst), (ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> { let Size = 4; @@ -1031,7 +1041,7 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>; defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>; defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>; -let isConvergent = 1 in { +let isConvergent = 1, hasPostISelHook = 1 in { defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>; defm SI_SPILL_WWM_AV32 : SI_SPILL_VGPR <AV_32, 1>; } @@ -3974,6 +3984,7 @@ def G_SI_CALL : AMDGPUGenericInstruction { let SchedRW = [WriteBranch]; // TODO: Should really base this on the call target let isConvergent = 1; + let hasPostISelHook = 1; } def G_FPTRUNC_ROUND_UPWARD : AMDGPUGenericInstruction { diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index ae5ef0541929b..e2f5c3494d784 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -448,6 +448,7 @@ def S_BARRIER_SIGNAL_M0 : SOP1_Pseudo <"s_barrier_signal m0", (outs), (ins), "", [(int_amdgcn_s_barrier_signal_var M0)]>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (outs), (ins), @@ -455,30 +456,35 @@ def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (o let Defs = [SCC]; let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_INIT_M0 : SOP1_Pseudo <"s_barrier_init m0", (outs), (ins), "", []>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_INIT_IMM : SOP1_Pseudo <"s_barrier_init", (outs), (ins SplitBarrier:$src0), "$src0", []>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins), "", []>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins), "", []>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } } // End Uses = [M0] @@ -486,6 +492,7 @@ def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs), (ins SplitBarrier:$src0), "$src0", [(int_amdgcn_s_barrier_signal timm:$src0)]>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (outs), @@ -493,18 +500,21 @@ def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (out let Defs = [SCC]; let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs), (ins SplitBarrier:$src0), "$src0", []>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs), (ins SplitBarrier:$src0), "$src0", []>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } } // End has_sdst = 0 @@ -512,6 +522,7 @@ def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32 (ins SplitBarrier:$src0), "$sdst, $src0", []>{ let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_GET_BARRIER_STATE_M0 : SOP1_Pseudo <"s_get_barrier_state $sdst, m0", (outs SSrc_b32:$sdst), @@ -519,6 +530,7 @@ def S_GET_BARRIER_STATE_M0 : SOP1_Pseudo <"s_get_barrier_state $sdst, m0", (outs let Uses = [M0]; let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } } // End hasSideEffects = 1 @@ -1561,12 +1573,14 @@ def S_BARRIER : SOPP_Pseudo <"s_barrier", (ins), "", let simm16 = 0; let fixed_imm = 1; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_wait timm:$simm16)]> { let SchedRW = [WriteBarrier]; let isConvergent = 1; + let hasPostISelHook = 1; } def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins), "", @@ -1575,6 +1589,7 @@ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins), "", let simm16 = 0; let fixed_imm = 1; let isConvergent = 1; + let hasPostISelHook = 1; let Defs = [SCC]; } diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 920c220fb2c65..53d3ef4be6d05 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -256,6 +256,7 @@ def V_READFIRSTLANE_B32 : let VALU = 1; let Uses = [EXEC]; let isConvergent = 1; + let hasPostISelHook = 1; bits<8> vdst; bits<9> src0; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index eba9bf64884ec..d305e5f09e4ad 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -768,7 +768,7 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, } // End isCommutable = 1 // These are special and do not read the exec mask. -let isConvergent = 1, Uses = []<Register> in { +let isConvergent = 1, hasPostISelHook = 1, Uses = []<Register> in { def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index ef14a587c42e7..876f2fd39908f 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -677,7 +677,7 @@ class MAIInst<string OpName, VOPProfile P, SDPatternOperator node> multiclass MAIInst<string OpName, string P, SDPatternOperator node, bit NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap> { - let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { + let isConvergent = 1, hasPostISelHook = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in { def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 861c9f75e866d..8a6b6c79fdebb 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -285,6 +285,7 @@ multiclass VOPC_Pseudos <string opName, let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let SchedRW = P.Schedule; let isConvergent = DefExec; + let hasPostISelHook = DefExec; let isCompare = 1; let isCommutable = 1; } @@ -304,6 +305,7 @@ multiclass VOPC_Pseudos <string opName, let Defs = !if(DefExec, [EXEC], []); let SchedRW = P.Schedule; let isConvergent = DefExec; + let hasPostISelHook = DefExec; let isCompare = 1; } @@ -313,6 +315,7 @@ multiclass VOPC_Pseudos <string opName, let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let SchedRW = P.Schedule; let isConvergent = DefExec; + let hasPostISelHook = DefExec; let isCompare = 1; let VOPC = 1; let Constraints = ""; @@ -342,6 +345,7 @@ multiclass VOPCX_Pseudos <string opName, let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isConvergent = 1; + let hasPostISelHook = 1; let isCompare = 1; let isCommutable = 1; let SubtargetPredicate = HasNoSdstCMPX; @@ -365,6 +369,7 @@ multiclass VOPCX_Pseudos <string opName, let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isConvergent = 1; + let hasPostISelHook = 1; let isCompare = 1; let SubtargetPredicate = HasNoSdstCMPX; } @@ -375,6 +380,7 @@ multiclass VOPCX_Pseudos <string opName, let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isConvergent = 1; + let hasPostISelHook = 1; let isCompare = 1; let VOPC = 1; let Constraints = ""; @@ -846,6 +852,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec, !if(DefVcc, [VCC], [])); let SchedRW = p.Schedule; let isConvergent = DefExec; + let hasPostISelHook = DefExec; } def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>, @@ -860,6 +867,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec, !if(DefVcc, [VCC], [])); let SchedRW = p.Schedule; let isConvergent = DefExec; + let hasPostISelHook = DefExec; } let SubtargetPredicate = isGFX11Plus in { @@ -869,6 +877,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec, !if(DefVcc, [VCC], [])); let SchedRW = p.Schedule; let isConvergent = DefExec; + let hasPostISelHook = DefExec; let VOPC = 1; let Constraints = ""; } @@ -892,6 +901,7 @@ multiclass VOPCX_Class_Pseudos <string opName, let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isConvergent = 1; + let hasPostISelHook = 1; let SubtargetPredicate = HasNoSdstCMPX; } @@ -907,6 +917,7 @@ multiclass VOPCX_Class_Pseudos <string opName, let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isConvergent = 1; + let hasPostISelHook = 1; let SubtargetPredicate = HasNoSdstCMPX; } @@ -916,6 +927,7 @@ multiclass VOPCX_Class_Pseudos <string opName, let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; let isConvergent = 1; + let hasPostISelHook = 1; let VOPC = 1; let Constraints = ""; } diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 20d7c88fb7e59..bfe2ed7892042 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -825,6 +825,7 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[], let mayRaiseFPException = ReadsModeReg; let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let isConvergent = 1; + let hasPostISelHook = 1; string Mnemonic = OpName; string AsmOperands = asmOps; @@ -870,6 +871,7 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> : // Copy relevant pseudo op flags let isConvergent = ps.isConvergent; + let hasPostISelHook = ps.hasPostISelHook; let SubtargetPredicate = ps.SubtargetPredicate; let AssemblerPredicate = ps.AssemblerPredicate; let OtherPredicates = ps.OtherPredicates; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits