https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/81277
>From 7b97388a5f251684cf4ae69c3b0cae0ff6fe1397 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 9 Feb 2024 10:50:20 -0600 Subject: [PATCH 1/2] [NVPTX] Add clang builtin for `__nvvm_reflect` intrinsic Summary: Some recent support made usage of `__nvvm_reflect` more consistent. We should expose it as an intrinsic rather than forcing users to externally define the function. --- clang/include/clang/Basic/BuiltinsNVPTX.def | 1 + clang/test/CodeGen/builtins-nvptx.c | 8 ++++++++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 5 +++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 7819e71d7fe2aa..8d3c5e69d55cf4 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -159,6 +159,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") BUILTIN(__nvvm_prmt, "UiUiUiUi", "") BUILTIN(__nvvm_exit, "v", "r") +BUILTIN(__nvvm_reflect, "UicC*", "r") TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63)) // Min Max diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index ad7c27f2d60d26..4dba7670b5c43e 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -44,6 +44,14 @@ __device__ int read_tid() { } +__device__ bool reflect() { + +// CHECK: call i32 @llvm.nvvm.reflect(ptr {{.*}}) + + unsigned x = __nvvm_reflect("__CUDA_ARCH"); + return x >= 700; +} + __device__ int read_ntid() { // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index d825dc82156432..f7b0fe926959b1 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1624,8 +1624,9 @@ def int_nvvm_compiler_error : def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">; -def int_nvvm_reflect : - Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.reflect">; +def int_nvvm_reflect : + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">, + ClangBuiltin<"__nvvm_reflect">; // isspacep.{const, global, local, shared} def int_nvvm_isspacep_const >From d36b7484f67a617629d021da8eb4e80e625eb2a2 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 9 Feb 2024 13:26:21 -0600 Subject: [PATCH 2/2] OpenCL test and fix tests --- clang/test/CodeGenOpenCL/reflect.cl | 28 +++++++++++++++++++ .../test/CodeGen/NVPTX/nvvm-reflect-opaque.ll | 4 +-- llvm/test/CodeGen/NVPTX/nvvm-reflect.ll | 4 +-- 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/reflect.cl diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl new file mode 100644 index 00000000000000..9ae4a5f027d358 --- /dev/null +++ b/clang/test/CodeGenOpenCL/reflect.cl @@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s + +// CHECK-LABEL: define dso_local zeroext i1 @device_function( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.reflect(ptr addrspacecast (ptr addrspace(4) @.str to ptr)) +// CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[TMP0]], 700 +// CHECK-NEXT: ret i1 [[CMP]] +// +bool device_function() { + return __nvvm_reflect("__CUDA_ARCH") >= 700; +} + +// CHECK-LABEL: define dso_local spir_kernel void @kernel_function( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4 +// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @device_function() #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[I_ADDR]], align 4 +// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP0]], align 4 +// CHECK-NEXT: ret void +// +__kernel void kernel_function(__global int *i) { + *i = device_function(); +} diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll index 1cb5c87fae826b..46ab79d9858cad 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll @@ -41,7 +41,7 @@ exit: ret float %ret } -declare i32 @llvm.nvvm.reflect.p0(ptr) +declare i32 @llvm.nvvm.reflect(ptr) ; CHECK-LABEL: define noundef i32 @intrinsic define i32 @intrinsic() { @@ -49,7 +49,7 @@ define i32 @intrinsic() { ; USE_FTZ_0: ret i32 0 ; USE_FTZ_1: ret i32 1 %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str) - %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr) + %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr) ret i32 %reflect } diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll index 9b1939f372082f..2ed9f7c11bcf9b 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -41,7 +41,7 @@ exit: ret float %ret } -declare i32 @llvm.nvvm.reflect.p0(ptr) +declare i32 @llvm.nvvm.reflect(ptr) ; CHECK-LABEL: define noundef i32 @intrinsic define i32 @intrinsic() { @@ -49,7 +49,7 @@ define i32 @intrinsic() { ; USE_FTZ_0: ret i32 0 ; USE_FTZ_1: ret i32 1 %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str) - %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr) + %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr) ret i32 %reflect } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits