hdelan created this revision.
Herald added subscribers: mattd, gchakrabarti, asavonic, hiraditya.
Herald added a project: All.
hdelan requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, jholewinski.
Herald added projects: clang, LLVM.

This patch adds __nvvm_reflect as a clang builtin for NVPTX backend. This means
that __nvvm_reflect can be used in source code in order to enable conditional 
compilation
based on compute capability and FTZ properties.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D137154

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/test/CodeGenCUDA/nvvm-reflect.cu
  llvm/include/llvm/IR/IntrinsicsNVVM.td
  llvm/lib/Target/NVPTX/NVVMReflect.cpp
  llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
  llvm/test/CodeGen/NVPTX/nvvm-reflect.ll

Index: llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -41,7 +41,7 @@
   ret float %ret
 }
 
-declare i32 @llvm.nvvm.reflect.p0i8(i8*)
+declare i32 @llvm.nvvm.reflect(i8*)
 
 ; CHECK-LABEL: define i32 @intrinsic
 define i32 @intrinsic() {
@@ -49,7 +49,7 @@
 ; USE_FTZ_0: ret i32 0
 ; USE_FTZ_1: ret i32 1
   %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
-  %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
+  %reflect = tail call i32 @llvm.nvvm.reflect(i8* %ptr)
   ret i32 %reflect
 }
 
Index: llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
+++ llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
@@ -41,7 +41,7 @@
   ret float %ret
 }
 
-declare i32 @llvm.nvvm.reflect.p0i8(ptr)
+declare i32 @llvm.nvvm.reflect(ptr)
 
 ; CHECK-LABEL: define i32 @intrinsic
 define i32 @intrinsic() {
@@ -49,7 +49,7 @@
 ; USE_FTZ_0: ret i32 0
 ; USE_FTZ_1: ret i32 1
   %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(ptr addrspace(4) @str)
-  %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(ptr %ptr)
+  %reflect = tail call i32 @llvm.nvvm.reflect(ptr %ptr)
   ret i32 %reflect
 }
 
Index: llvm/lib/Target/NVPTX/NVVMReflect.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -40,6 +40,7 @@
 #include <sstream>
 #include <string>
 #define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
+#define NVVM_REFLECT_LLVM_INTRINSIC_NAME "llvm.nvvm.reflect"
 
 using namespace llvm;
 
@@ -119,6 +120,7 @@
       continue;
     Function *Callee = Call->getCalledFunction();
     if (!Callee || (Callee->getName() != NVVM_REFLECT_FUNCTION &&
+                    Callee->getName() != NVVM_REFLECT_LLVM_INTRINSIC_NAME &&
                     Callee->getIntrinsicID() != Intrinsic::nvvm_reflect))
       continue;
 
Index: llvm/include/llvm/IR/IntrinsicsNVVM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1578,7 +1578,8 @@
     Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
 
 def int_nvvm_reflect :
-  Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty], [IntrNoMem], "llvm.nvvm.reflect">;
+  Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">,
+  ClangBuiltin<"__nvvm_reflect">;
 
 // isspacep.{const, global, local, shared}
 def int_nvvm_isspacep_const
Index: clang/test/CodeGenCUDA/nvvm-reflect.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/nvvm-reflect.cu
@@ -0,0 +1,81 @@
+// REQUIRES: nvptx-registered-target
+
+// Checking to see that __nvvm_reflect resolves to the correct llvm.nvvm.reflect
+// intrinsic
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -disable-llvm-passes -S -emit-llvm -x c++ %s -o - | FileCheck %s --check-prefix=NO_NVVM_REFLECT_PASS
+
+// Prepare bitcode file to link with
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
+// RUN:    -disable-llvm-passes -o %t.bc %s
+
+// Checking to see if the correct values are substituted for the nvvm_reflect
+// call when llvm passes are enabled.
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_50 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_1
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_52 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_2
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_53 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_3
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_60 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_4
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_61 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_5
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_62 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_6
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_70 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_7
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_72 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_8
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_75 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_9
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_80 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_10
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda -target-cpu \
+// RUN:    sm_86 -S -o /dev/null %s -mllvm -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=ARCH_REFLECT_11
+
+// Check to see that nvvm_reflect("__CUDA_FTZ") returns 1 or 0 based on value
+// of -fdenormal-fp-math-f32 flag
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda \
+// RUN:    -fdenormal-fp-math-f32=preserve-sign -S -o /dev/null %s -mllvm \
+// RUN:    -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=FTZ_REFLECT
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx64-nvidia-cuda \
+// RUN:    -fdenormal-fp-math-f32=ieee -S -o /dev/null %s -mllvm \
+// RUN:    -print-after-all 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=NO_FTZ_REFLECT
+
+#include "Inputs/cuda.h"
+
+__device__ int foo_arch() {
+  // NO_NVVM_REFLECT_PASS: call i32 @llvm.nvvm.reflect
+  // ARCH_REFLECT_1: ret i32 500
+  // ARCH_REFLECT_2: ret i32 520
+  // ARCH_REFLECT_3: ret i32 530
+  // ARCH_REFLECT_4: ret i32 600
+  // ARCH_REFLECT_5: ret i32 610
+  // ARCH_REFLECT_6: ret i32 620
+  // ARCH_REFLECT_7: ret i32 700
+  // ARCH_REFLECT_8: ret i32 720
+  // ARCH_REFLECT_9: ret i32 750
+  // ARCH_REFLECT_10: ret i32 800
+  // ARCH_REFLECT_11: ret i32 860
+  return __nvvm_reflect("__CUDA_ARCH");
+}
+
+__device__ int foo_ftz() {
+  // FTZ_REFLECT: ret i32 1
+  // NO_FTZ_REFLECT: ret i32 0
+  return __nvvm_reflect("__CUDA_FTZ");
+}
+
Index: clang/include/clang/Basic/BuiltinsNVPTX.def
===================================================================
--- clang/include/clang/Basic/BuiltinsNVPTX.def
+++ clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -824,6 +824,8 @@
 BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
 BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
 
+BUILTIN(__nvvm_reflect, "icC*", "nc")
+
 // Builtins to support WMMA instructions on sm_70
 TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
 TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to