yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall.
yaxunl requested review of this revision.

In device compilation, clang emit global variables in device or constant
address space then cast them to default address space. If global variables
are not used, there are invisible address space casts as LLVM constants
in the LLVM module. These casts cause spurious use of global variables
and prevent them from being eliminated by global DCE.

Such casts will disappear if the module is saved and reloaded, but stays
if the module is not saved and reloaded. This causes difference in generated ISA
depending on whether -save-temps is used.

The patch removes the invisible unused casts of global variables.


https://reviews.llvm.org/D98783

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/test/CodeGenCUDA/unused-global-var.cu

Index: clang/test/CodeGenCUDA/unused-global-var.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/unused-global-var.cu
@@ -0,0 +1,50 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// AMDGPU internalize unused global variables for whole-program compilation
+// (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then
+// eliminated by global DCE. If there are invisible unused address space casts
+// for global variables, the internalization and elimination of unused global
+// variales will be hindered. This test makes sure no such address space
+// casts.
+
+// Check unused device/constant variables are eliminated.
+
+// CHECK-NOT: @v1
+__device__ int v1;
+
+// CHECK-NOT: @v2
+__constant__ int v2;
+
+// CHECK-NOT: @_ZL2v3
+constexpr int v3 = 1;
+
+// Check managed variables are always kept.
+
+// CHECK: @v4
+__managed__ int v4;
+
+// Check used device/constant variables are not eliminated.
+// CHECK: @u1
+__device__ int u1;
+
+// CHECK: @u2
+__constant__ int u2;
+
+// Check u3 is kept because its address is taken.
+// CHECK: @_ZL2u3
+constexpr int u3 = 2;
+
+// Check u4 is not kept because it is not ODR-use.
+// CHECK-NOT: @_ZL2u4
+constexpr int u4 = 3;
+
+__device__ int fun1(const int& x);
+
+__global__ void kern1(int *x) {
+  *x = u1 + u2 + fun1(u3) + u4;
+}
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -371,6 +371,14 @@
   llvm::SmallVector<std::pair<llvm::GlobalValue *, llvm::Constant *>, 8>
     GlobalValReplacements;
 
+  /// Potentially unused address space casts of global variables to be cleaned
+  /// up. In CUDA/HIP, global variables are emitted as global variables in
+  /// device or constant address space which are then casted to default address
+  /// space. If the global variables are not used, the address space casts
+  /// become invisible LLVM constants, causing spurious use of the global
+  /// variables which prevents them from being erased.
+  llvm::DenseSet<llvm::Constant *> GlobalVarCasts;
+
   /// Variables for which we've emitted globals containing their constant
   /// values along with the corresponding globals, for opportunistic reuse.
   llvm::DenseMap<const VarDecl*, llvm::GlobalVariable*> InitializerConstants;
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -768,6 +768,11 @@
   // that might affect the DLL storage class or the visibility, and
   // before anything that might act on these.
   setVisibilityFromDLLStorageClass(LangOpts, getModule());
+
+  // Remove unused address space casts of global variables.
+  for (auto *Cast : GlobalVarCasts)
+    if (Cast->use_empty())
+      Cast->destroyConstant();
 }
 
 void CodeGenModule::EmitOpenCLMetadata() {
@@ -3938,9 +3943,13 @@
         : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default);
   assert(getContext().getTargetAddressSpace(ExpectedAS) ==
          Ty->getPointerAddressSpace());
-  if (AddrSpace != ExpectedAS)
-    return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace,
-                                                       ExpectedAS, Ty);
+  if (AddrSpace != ExpectedAS) {
+    auto *Cast = getTargetCodeGenInfo().performAddrSpaceCast(
+        *this, GV, AddrSpace, ExpectedAS, Ty);
+    // Record address space casts of global variables for cleaning up if unused.
+    GlobalVarCasts.insert(Cast);
+    return Cast;
+  }
 
   return GV;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to