kerbowa created this revision.
Herald added subscribers: hsmhsm, foad, hiraditya, t-tye, tpr, dstuttard,
yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
Herald added a project: All.
kerbowa requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, wdng.
Herald added projects: clang, LLVM.
Adds an intrinsic/builtin that can be used to fine tune scheduler behavior. If
there is a need to have highly optimized codegen and kernel developers have
knowledge of inter-wave runtime behavior which is unknown to the compiler this
builtin can be used to tune scheduling.
This intrinsic creates a barrier between scheduling regions. The immediate
parameter is a mask to determine the types of instructions that should be
prevented from crossing the sched_barrier. In this initial patch, there are only
two variations. A mask of 0 means that no instructions may be scheduled across
the sched_barrier. A mask of 1 means that non-memory, non-side-effect inducing
instructions may cross the sched_barrier.
Note that this intrinsic is only meant to work with the scheduling passes. Any
other transformations that may move code will not be impacted in the ways
described above.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D124700
Files:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/SemaOpenCL/builtins-amdgcn-error.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @test_wave_barrier() #0 {
+; GCN-LABEL: test_wave_barrier:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: ; sched_barrier mask(0)
+; GCN-NEXT: ; sched_barrier mask(1)
+; GCN-NEXT: ; sched_barrier mask(4)
+; GCN-NEXT: ; sched_barrier mask(15)
+; GCN-NEXT: s_endpgm
+entry:
+ call void @llvm.amdgcn.sched.barrier(i16 0) #1
+ call void @llvm.amdgcn.sched.barrier(i16 1) #1
+ call void @llvm.amdgcn.sched.barrier(i16 4) #1
+ call void @llvm.amdgcn.sched.barrier(i16 15) #1
+ ret void
+}
+
+declare void @llvm.amdgcn.sched.barrier(i16) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { convergent nounwind }
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -148,6 +148,7 @@
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_s_barrier:
case Intrinsic::amdgcn_wave_barrier:
+ case Intrinsic::amdgcn_sched_barrier:
return false;
default:
break;
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -313,6 +313,18 @@
let Size = 0;
}
+def SCHED_BARRIER : SPseudoInstSI<(outs), (ins i16imm:$mask),
+ [(int_amdgcn_sched_barrier (i16 timm:$mask))]> {
+ let SchedRW = [];
+ let hasNoSchedulingInfo = 1;
+ let hasSideEffects = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let isConvergent = 1;
+ let FixedSize = 1;
+ let Size = 0;
+}
+
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1773,6 +1773,7 @@
// hazard, even if one exist, won't really be visible. Should we handle it?
case AMDGPU::SI_MASKED_UNREACHABLE:
case AMDGPU::WAVE_BARRIER:
+ case AMDGPU::SCHED_BARRIER:
return 0;
}
}
@@ -3490,6 +3491,9 @@
if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
return true;
+ if (MI.getOpcode() == AMDGPU::SCHED_BARRIER && MI.getOperand(0).getImm() == 0)
+ return true;
+
// Target-independent instructions do not have an implicit-use of EXEC, even
// when they operate on VGPRs. Treating EXEC modifications as scheduling
// boundaries prevents incorrect movements of such instructions.
Index: llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -207,6 +207,14 @@
return;
}
+ if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) {
+ if (isVerbose()) {
+ OutStreamer->emitRawComment(" sched_barrier mask(" +
+ Twine(MI->getOperand(0).getImm()) + ")");
+ }
+ return;
+ }
+
if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) {
if (isVerbose())
OutStreamer->emitRawComment(" divergent unreachable");
Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -213,6 +213,15 @@
def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
+// The 1st parameter is a mask for the types of instructions that may be allowed
+// to cross the SCHED_BARRIER during scheduling.
+// MASK = 0: No instructions may be scheduled across SCHED_BARRIER.
+// MASK = 1: Non-memory, non-side-effect producing instructions may be
+// scheduled across SCHED_BARRIER, i.e. allow ALU instructions to pass.
+def int_amdgcn_sched_barrier : GCCBuiltin<"__builtin_amdgcn_sched_barrier">,
+ Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
+ IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
+
def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===================================================================
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -60,6 +60,12 @@
__builtin_amdgcn_s_setprio(65536); // expected-warning {{implicit conversion from 'int' to 'short' changes value from 65536 to 0}}
}
+void test_sched_barrier(int x)
+{
+ __builtin_amdgcn_sched_barrier(x); // expected-error {{argument to '__builtin_amdgcn_sched_barrier' must be a constant integer}}
+ __builtin_amdgcn_sched_barrier(65536); // expected-warning {{implicit conversion from 'int' to 'short' changes value from 65536 to 0}}
+}
+
void test_sicmp_i32(global ulong* out, int a, int b, uint c)
{
*out = __builtin_amdgcn_sicmp(a, b, c); // expected-error {{argument to '__builtin_amdgcn_sicmp' must be a constant integer}}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -396,6 +396,19 @@
__builtin_amdgcn_wave_barrier();
}
+// CHECK-LABEL: @test_sched_barrier
+// CHECK: call void @llvm.amdgcn.sched.barrier(i16 0)
+// CHECK: call void @llvm.amdgcn.sched.barrier(i16 1)
+// CHECK: call void @llvm.amdgcn.sched.barrier(i16 4)
+// CHECK: call void @llvm.amdgcn.sched.barrier(i16 15)
+void test_sched_barrier()
+{
+ __builtin_amdgcn_sched_barrier(0);
+ __builtin_amdgcn_sched_barrier(1);
+ __builtin_amdgcn_sched_barrier(4);
+ __builtin_amdgcn_sched_barrier(15);
+}
+
// CHECK-LABEL: @test_s_sleep
// CHECK: call void @llvm.amdgcn.s.sleep(i32 1)
// CHECK: call void @llvm.amdgcn.s.sleep(i32 15)
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -62,6 +62,7 @@
BUILTIN(__builtin_amdgcn_s_sendmsghalt, "vIiUi", "n")
BUILTIN(__builtin_amdgcn_s_barrier, "v", "n")
BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n")
+BUILTIN(__builtin_amdgcn_sched_barrier, "vIs", "n")
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits