https://github.com/durga4github updated https://github.com/llvm/llvm-project/pull/141278
>From ba3a94813adcb77262eb008eeee5911e68e558c8 Mon Sep 17 00:00:00 2001 From: Durgadoss R <durgado...@nvidia.com> Date: Fri, 23 May 2025 20:43:18 +0530 Subject: [PATCH] [NVPTX] Add pm_event intrinsics This patch adds pm_event.mask intrinsic and its clang-builtin. Signed-off-by: Durgadoss R <durgado...@nvidia.com> --- clang/include/clang/Basic/BuiltinsNVPTX.td | 1 + clang/test/CodeGen/builtins-nvptx.c | 7 +++++++ llvm/docs/NVPTXUsage.rst | 23 ++++++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 5 +++++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 10 ++++++++++ llvm/test/CodeGen/NVPTX/pm-event.ll | 15 ++++++++++++++ 6 files changed, 61 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/pm-event.ll diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td index 2cea44e224674..6e531eff6dd1d 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.td +++ b/clang/include/clang/Basic/BuiltinsNVPTX.td @@ -177,6 +177,7 @@ let Attributes = [NoReturn] in { } let Attributes = [NoThrow] in { def __nvvm_nanosleep : NVPTXBuiltinSMAndPTX<"void(unsigned int)", SM_70, PTX63>; + def __nvvm_pm_event_mask : NVPTXBuiltin<"void(_Constant unsigned short)">; } // Min Max diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index cef529163bb39..f994adb14e457 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -883,6 +883,13 @@ __device__ void nvvm_vote(int pred) { // CHECK: ret void } +// CHECK-LABEL: nvvm_pm_event_mask +__device__ void nvvm_pm_event_mask() { + // CHECK: call void @llvm.nvvm.pm.event.mask(i16 255) + __nvvm_pm_event_mask(255); + // CHECK: ret void +} + // CHECK-LABEL: nvvm_nanosleep __device__ void nvvm_nanosleep(int d) { #if __CUDA_ARCH__ >= 700 diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 8bb0f2ed17c32..d51686c0b830c 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -1868,6 +1868,29 @@ If the request failed, the behavior of these intrinsics is undefined. For more information, refer `PTX ISA <https://docs.nvidia.com/cuda/parallel-thread-execution/?a#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-query-cancel>`__. +Perf Monitor Event Intrinsics +----------------------------- + +'``llvm.nvvm.pm.event.mask``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare void @llvm.nvvm.pm.event.mask(i16 immarg %mask_val) + +Overview: +""""""""" + +The '``llvm.nvvm.pm.event.mask``' intrinsic triggers one or more +performance monitor events. Each bit in the 16-bit immediate operand +``%mask_val`` controls an event. + +For more information on the pmevent instructions, refer to the PTX ISA +`<https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent>`_. + Other Intrinsics ---------------- diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 91e7d188c8533..8c8e778b57061 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -768,6 +768,11 @@ let TargetPrefix = "nvvm" in { DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoMem, IntrHasSideEffects]>; + // Performance Monitor Events (pm events) intrinsics + def int_nvvm_pm_event_mask : NVVMBuiltin, + DefaultAttrsIntrinsic<[], [llvm_i16_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects, + ImmArg<ArgIndex<0>>]>; // // Min Max // diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 8fb5884fa2a20..71da857841c95 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1052,6 +1052,16 @@ def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$ def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;", [(int_nvvm_nanosleep i32:$i)]>, Requires<[hasPTX<63>, hasSM<70>]>; + +let hasSideEffects = 1 in { +// Performance Monitor events +def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs), + (ins i16imm:$mask), + "pmevent.mask", + [(int_nvvm_pm_event_mask timm:$mask)]>, + Requires<[hasSM<20>, hasPTX<30>]>; +} // hasSideEffects + // // Min Max // diff --git a/llvm/test/CodeGen/NVPTX/pm-event.ll b/llvm/test/CodeGen/NVPTX/pm-event.ll new file mode 100644 index 0000000000000..871da6d414978 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/pm-event.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} + +declare void @llvm.nvvm.pm.event.mask(i16 %mask) + +; CHECK-LABEL: test_pm_event +define void @test_pm_event() { + ; CHECK: pmevent.mask 255; + call void @llvm.nvvm.pm.event.mask(i16 u0xff) + + ; CHECK: pmevent.mask 4096; + call void @llvm.nvvm.pm.event.mask(i16 u0x1000) + + ret void +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits