https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/79765
>From 9a07e319274f4ec2f7b12a174b7664af118de4e9 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Mon, 29 Jan 2024 08:12:35 -0600 Subject: [PATCH] [NVPTX} Add builtin support for 'globaltimer' Summary: This patch adds support for `globaltimer` to match `clock` and `clock64`. See the PTX ISA reference fro details. This patch does not implement the `hi` or `lo` variants for brevity as they can be obtained from this with the cost of an additional register. https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer-globaltimer-lo-globaltimer-hi --- clang/include/clang/Basic/BuiltinsNVPTX.def | 1 + clang/test/CodeGen/builtins-nvptx.c | 4 +++- llvm/include/llvm/IR/IntrinsicsNVVM.td | 2 ++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 ++ llvm/test/CodeGen/NVPTX/intrinsics.ll | 12 ++++++++++++ 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 0f2e8260143be78..57a229ded49f886 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -146,6 +146,7 @@ BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") +BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n") BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 353f3ebb608c2b1..5aab6bee5b1cc1d 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -134,11 +134,13 @@ __device__ long long read_clocks() { // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock() // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64() +// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer() int a = __nvvm_read_ptx_sreg_clock(); long long b = __nvvm_read_ptx_sreg_clock64(); + long long c = __nvvm_read_ptx_sreg_globaltimer(); - return a + b; + return a + b + c; } __device__ int read_pms() { diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 5a5ba2592e1467e..8c9ed4a349ba998 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4506,6 +4506,8 @@ def int_nvvm_read_ptx_sreg_lanemask_gt : def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">; def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">; +def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">; + def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">; def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">; def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 33f1e4a43e072af..5c509b50411701a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6364,6 +6364,8 @@ def INT_PTX_SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; def INT_PTX_SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; +def INT_PTX_SREG_GLOBALTIMER : + PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>; def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll index c09c7a72fd10181..26900543d922ba3 100644 --- a/llvm/test/CodeGen/NVPTX/intrinsics.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll @@ -133,6 +133,17 @@ define i64 @test_clock64() { ret i64 %ret } +; CHECK-LABEL: test_globaltimer +define i64 @test_globaltimer() { +; CHECK: mov.u64 %r{{.*}}, %globaltimer; + %a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer() +; CHECK: mov.u64 %r{{.*}}, %globaltimer; + %b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer() + %ret = add i64 %a, %b +; CHECK: ret + ret i64 %ret +} + declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare float @llvm.nvvm.sqrt.f(float) @@ -146,3 +157,4 @@ declare i64 @llvm.ctpop.i64(i64) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() declare i32 @llvm.nvvm.read.ptx.sreg.clock() declare i64 @llvm.nvvm.read.ptx.sreg.clock64() +declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer() _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits