JonChesterfield created this revision.
JonChesterfield added reviewers: jdoerfert, ABataev, grokos, tianshilei1992, 
ye-luo.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
JonChesterfield requested review of this revision.
[libomptarget][nvptx] Undef, weak shared variables

Shared variables on nvptx, and LDS on amdgcn, are uninitialized at
the start of kernel execution. Therefore create the variables with
undef instead of zeros, motivated in part by the amdgcn back end
rejecting LDS+initializer.

Common is zero initialized, which seems incompatible with shared. Thus
change them to weak, following the direction of
https://reviews.llvm.org/rG7b3eabdcd215


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D90248

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/test/OpenMP/nvptx_data_sharing.cpp
  clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
  clang/test/OpenMP/nvptx_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
  clang/test/OpenMP/nvptx_teams_codegen.cpp
  clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp

Index: clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -17,14 +17,14 @@
 // CHECK-DAG: [[TEAMS_REDUCE_UNION_TY:%.+]] = type { [[TEAM1_REDUCE_TY]] }
 // SEQ-DAG: [[MAP_TY:%.+]] = type { [128 x i8] }
 
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
 // SEQ-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1
 // SEQ-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} {{16|8}}
 // SEQ-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} 16
 
 // Check for the data transfer medium in shared memory to transfer the reduction list to the first warp.
-// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = common addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
+// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
 
 // Check that the execution mode of 2 target regions is set to Non-SPMD and the 3rd is in SPMD.
 // CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 1
Index: clang/test/OpenMP/nvptx_teams_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_teams_codegen.cpp
+++ clang/test/OpenMP/nvptx_teams_codegen.cpp
@@ -30,8 +30,8 @@
 }
 
 // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // SEQ-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
 // SEQ-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
@@ -119,8 +119,8 @@
 }
 
 // SEQ2: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ2-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ2-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ2-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ2-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // SEQ2-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} {{8|4}}
 // SEQ2-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1
Index: clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -76,8 +76,8 @@
 }
 
 // SEQ-DAG: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
Index: clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -93,8 +93,8 @@
 }
 
 // SEQ-DAG: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
Index: clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -9,7 +9,7 @@
 #define HEADER
 
 // Check for the data transfer medium in shared memory to transfer the reduction list to the first warp.
-// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = common addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
+// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
 
 // Check that the execution mode of all 3 target regions is set to Spmd Mode.
 // CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 0
Index: clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
+++ clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
@@ -32,8 +32,8 @@
 }
 
 // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
Index: clang/test/OpenMP/nvptx_parallel_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -76,8 +76,8 @@
 }
 
 // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 4
 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
Index: clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
+++ clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
@@ -35,8 +35,8 @@
 }
 
 // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 40
 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 // CHECK-DAG: @__omp_offloading_{{.*}}_main_[[LINE:l.+]]_exec_mode = weak constant i8 0
Index: clang/test/OpenMP/nvptx_data_sharing.cpp
===================================================================
--- clang/test/OpenMP/nvptx_data_sharing.cpp
+++ clang/test/OpenMP/nvptx_data_sharing.cpp
@@ -28,8 +28,8 @@
   }
 }
 // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
-// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = common addrspace(3) global [[MEM_TY]] zeroinitializer
-// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null
+// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
+// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
 // SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i64 8
 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1102,7 +1102,7 @@
     KernelStaticGlobalized = new llvm::GlobalVariable(
         CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
         llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        llvm::UndefValue::get(CGM.VoidPtrTy),
         "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
         llvm::GlobalValue::NotThreadLocal,
         CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
@@ -1234,7 +1234,7 @@
     KernelStaticGlobalized = new llvm::GlobalVariable(
         CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
         llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        llvm::UndefValue::get(CGM.VoidPtrTy),
         "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
         llvm::GlobalValue::NotThreadLocal,
         CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
@@ -2855,8 +2855,8 @@
     auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
     unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
     TransferMedium = new llvm::GlobalVariable(
-        M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
-        llvm::Constant::getNullValue(Ty), TransferMediumName,
+        M, Ty, /*isConstant=*/false, llvm::GlobalVariable::WeakAnyLinkage,
+        llvm::UndefValue::get(Ty), TransferMediumName,
         /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
         SharedAddressSpace);
     CGM.addCompilerUsedGlobal(TransferMedium);
@@ -4791,8 +4791,8 @@
       llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
       auto *GV = new llvm::GlobalVariable(
           CGM.getModule(), LLVMStaticTy,
-          /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
-          llvm::Constant::getNullValue(LLVMStaticTy),
+          /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
+          llvm::UndefValue::get(LLVMStaticTy),
           "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
           llvm::GlobalValue::NotThreadLocal,
           C.getTargetAddressSpace(LangAS::cuda_shared));
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D90248: [libompt... Jon Chesterfield via Phabricator via cfe-commits

Reply via email to