JonChesterfield created this revision. JonChesterfield added reviewers: jdoerfert, ABataev, grokos, tianshilei1992, ye-luo. Herald added a project: clang. Herald added a subscriber: cfe-commits. JonChesterfield requested review of this revision.
[libomptarget][nvptx] Undef, internal shared variables Shared variables on nvptx, and LDS on amdgcn, are uninitialized at the start of kernel execution. Therefore create the variables with undef instead of zeros, motivated in part by the amdgcn back end rejecting LDS+initializer. Common is zero initialized, which seems incompatible with shared. Thus change them to internal, following the direction of https://reviews.llvm.org/rG7b3eabdcd215 WIP, other tests need to be updated if direction is good Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D89994 Files: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
Index: clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -17,14 +17,14 @@ // CHECK-DAG: [[TEAMS_REDUCE_UNION_TY:%.+]] = type { [[TEAM1_REDUCE_TY]] } // SEQ-DAG: [[MAP_TY:%.+]] = type { [128 x i8] } -// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* null +// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef // SEQ-DAG: [[KERNEL_SHARED1:@.+]] = internal unnamed_addr constant i16 1 // SEQ-DAG: [[KERNEL_SHARED2:@.+]] = internal unnamed_addr constant i16 1 // SEQ-DAG: [[KERNEL_SIZE1:@.+]] = internal unnamed_addr constant i{{64|32}} {{16|8}} // SEQ-DAG: [[KERNEL_SIZE2:@.+]] = internal unnamed_addr constant i{{64|32}} 16 // Check for the data transfer medium in shared memory to transfer the reduction list to the first warp. -// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = common addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32] +// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = internal addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32] // Check that the execution mode of 2 target regions is set to Non-SPMD and the 3rd is in SPMD. // CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 1 @@ -220,7 +220,7 @@ // CHECK: [[BASE_ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // CHECK: [[ELT:%.+]] = getelementptr i32, i32* [[BASE_ELT]], i32 [[CNT]] // - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[WARPID]] // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], // CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], // CHECK: br label {{%?}}[[COPY_CONT:.+]] @@ -238,7 +238,7 @@ // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] // // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[TID]] // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT_BASE:%.+]] = bitcast i8* [[ELT_VOID]] to i32* @@ -531,7 +531,7 @@ // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // - // CHECK: [[MEDIUM_ELT64:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] + // CHECK: [[MEDIUM_ELT64:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[WARPID]] // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT64]] to i8 addrspace([[SHARED_ADDRSPACE]])* // CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align // CHECK: store volatile i8 [[ELT_VAL]], i8 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align @@ -550,7 +550,7 @@ // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] // // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] + // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[TID]] // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i8 addrspace([[SHARED_ADDRSPACE]])* // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], @@ -571,7 +571,7 @@ // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[WARPID]] // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align // CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] @@ -589,7 +589,7 @@ // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] // // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[TID]] // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* @@ -982,7 +982,7 @@ // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[WARPID]] // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align // CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] @@ -1000,7 +1000,7 @@ // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] // // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[TID]] // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* @@ -1021,7 +1021,7 @@ // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] + // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[WARPID]] // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align // CHECK: store volatile i16 [[ELT_VAL]], i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align @@ -1040,7 +1040,7 @@ // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] // // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] + // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[TID]] // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], @@ -1216,7 +1216,7 @@ // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* // - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[WARPID]] // CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align // CHECK: store volatile i32 [[ELT_VAL]], i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align // CHECK: br label {{%?}}[[COPY_CONT:.+]] @@ -1234,7 +1234,7 @@ // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] // // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] + // CHECK: [[MEDIUM_ELT:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[TID]] // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 0 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32* @@ -1255,7 +1255,7 @@ // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16* // - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[WARPID]] + // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[WARPID]] // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* // CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align // CHECK: store volatile i16 [[ELT_VAL]], i16 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT]], align @@ -1274,7 +1274,7 @@ // CHECK: br i1 [[IS_W0_ACTIVE_THREAD]], label {{%?}}[[DO_READ:.+]], label {{%?}}[[READ_ELSE:.+]] // // CHECK: [[DO_READ]] - // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE]], i64 0, i32 [[TID]] + // CHECK: [[MEDIUM_ELT32:%.+]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace([[SHARED_ADDRSPACE]])* [[TRANSFER_STORAGE:@.+]], i64 0, i32 [[TID]] // CHECK: [[MEDIUM_ELT:%.+]] = bitcast i32 addrspace([[SHARED_ADDRSPACE]])* [[MEDIUM_ELT32]] to i16 addrspace([[SHARED_ADDRSPACE]])* // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i{{32|64}} 0, i{{32|64}} 1 // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]], Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1102,7 +1102,7 @@ KernelStaticGlobalized = new llvm::GlobalVariable( CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + llvm::UndefValue::get(CGM.VoidPtrTy), "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); @@ -1234,7 +1234,7 @@ KernelStaticGlobalized = new llvm::GlobalVariable( CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + llvm::UndefValue::get(CGM.VoidPtrTy), "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); @@ -2855,8 +2855,8 @@ auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize); unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); TransferMedium = new llvm::GlobalVariable( - M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, - llvm::Constant::getNullValue(Ty), TransferMediumName, + M, Ty, /*isConstant=*/false, llvm::GlobalVariable::InternalLinkage, + llvm::UndefValue::get(Ty), TransferMediumName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, SharedAddressSpace); CGM.addCompilerUsedGlobal(TransferMedium); @@ -4791,8 +4791,8 @@ llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy); auto *GV = new llvm::GlobalVariable( CGM.getModule(), LLVMStaticTy, - /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, - llvm::Constant::getNullValue(LLVMStaticTy), + /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, + llvm::UndefValue::get(LLVMStaticTy), "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, C.getTargetAddressSpace(LangAS::cuda_shared));
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits