tianshilei1992 created this revision.
tianshilei1992 added reviewers: jdoerfert, jhuber6.
Herald added subscribers: ormris, guansong, hiraditya, yaxunl.
tianshilei1992 requested review of this revision.
Herald added subscribers: llvm-commits, openmp-commits, cfe-commits, sstefan1.
Herald added projects: clang, OpenMP, LLVM.
The execution mode of a kernel is stored in a global variable, whose value
means:
- 0 - SPMD mode
- 1 - indicates generic mode
- 2 - SPMD mode execution with generic mode semantics
We are going to add support for SIMD execution mode. It will be come with
another
execution mode, such as SIMD-generic mode. As a result, this value-based
indicator
is not flexible.
This patch changes to bitset based solution to encode execution mode. Each
position is:
[0] - generic mode
[1] - SPMD mode
[2] - SIMD mode (will be added later)
In this way, `0x1` is generic mode, `0x2` is SPMD mode, and `0x3` is SPMD mode
execution with generic mode semantics. In the future after we add the support
for
SIMD mode, `0b1xx` will be in SIMD mode.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D110029
Files:
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
clang/test/OpenMP/nvptx_target_simd_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
llvm/test/Transforms/OpenMP/spmdization.ll
llvm/test/Transforms/OpenMP/spmdization_assumes.ll
llvm/test/Transforms/OpenMP/spmdization_guarding.ll
openmp/libomptarget/plugins/cuda/src/rtl.cpp
Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -71,12 +71,10 @@
std::vector<__tgt_offload_entry> Entries;
};
-enum ExecutionModeType {
- SPMD, // constructors, destructors,
- // combined constructs (`teams distribute parallel for [simd]`)
- GENERIC, // everything else
- SPMD_GENERIC, // Generic kernel with SPMD execution
- NONE
+enum ExecutionModeMaskType {
+ SPMD = 0x2, // constructors, destructors, combined constructs (`teams
+ // distribute parallel for [simd]`)
+ GENERIC = 0x1, // everything else
};
/// Use a single entity to encode a kernel and a set of flags.
@@ -84,9 +82,9 @@
CUfunction Func;
// execution mode of kernel
- // 0 - SPMD mode (without master warp)
- // 1 - Generic mode (with master warp)
- // 2 - SPMD mode execution with Generic mode semantics.
+ // ExecutionMode | 0x2 - SPMD mode (without master warp)
+ // ExecutionMode | 0x1 - Generic mode (with master warp)
+ // ExecutionMode | 0x3 - SPMD mode execution with Generic mode semantics.
int8_t ExecutionMode;
/// Maximal number of threads per block for this kernel.
@@ -866,7 +864,7 @@
DPxPTR(E - HostBegin), E->name, DPxPTR(Func));
// default value GENERIC (in case symbol is missing from cubin file)
- int8_t ExecModeVal = ExecutionModeType::GENERIC;
+ int8_t ExecModeVal = 1;
std::string ExecModeNameStr(E->name);
ExecModeNameStr += "_exec_mode";
const char *ExecModeName = ExecModeNameStr.c_str();
@@ -889,12 +887,6 @@
CUDA_ERR_STRING(Err);
return nullptr;
}
-
- if (ExecModeVal < 0 || ExecModeVal > 2) {
- DP("Error wrong exec_mode value specified in cubin file: %d\n",
- ExecModeVal);
- return nullptr;
- }
} else {
DP("Loading global exec_mode '%s' - symbol missing, using default "
"value GENERIC (1)\n",
@@ -1135,13 +1127,22 @@
unsigned int CudaBlocksPerGrid;
if (TeamNum <= 0) {
if (LoopTripCount > 0 && EnvNumTeams < 0) {
- if (KernelInfo->ExecutionMode == SPMD) {
+ if (KernelInfo->ExecutionMode & ExecutionModeMaskType::SPMD &&
+ KernelInfo->ExecutionMode & ExecutionModeMaskType::GENERIC) {
+ // If we reach this point, then we are executing a kernel that was
+ // transformed from Generic-mode to SPMD-mode. This kernel has
+ // SPMD-mode execution, but needs its blocks to be scheduled
+ // differently because the current loop trip count only applies to the
+ // `teams distribute` region and will create var too few blocks using
+ // the regular SPMD-mode method.
+ CudaBlocksPerGrid = LoopTripCount;
+ } else if (KernelInfo->ExecutionMode & ExecutionModeMaskType::SPMD) {
// We have a combined construct, i.e. `target teams distribute
// parallel for [simd]`. We launch so many teams so that each thread
// will execute one iteration of the loop. round up to the nearest
// integer
CudaBlocksPerGrid = ((LoopTripCount - 1) / CudaThreadsPerBlock) + 1;
- } else if (KernelInfo->ExecutionMode == GENERIC) {
+ } else if (KernelInfo->ExecutionMode & ExecutionModeMaskType::GENERIC) {
// If we reach this point, then we have a non-combined construct, i.e.
// `teams distribute` with a nested `parallel for` and each team is
// assigned one iteration of the `distribute` loop. E.g.:
@@ -1155,14 +1156,6 @@
// Threads within a team will execute the iterations of the `parallel`
// loop.
CudaBlocksPerGrid = LoopTripCount;
- } else if (KernelInfo->ExecutionMode == SPMD_GENERIC) {
- // If we reach this point, then we are executing a kernel that was
- // transformed from Generic-mode to SPMD-mode. This kernel has
- // SPMD-mode execution, but needs its blocks to be scheduled
- // differently because the current loop trip count only applies to the
- // `teams distribute` region and will create var too few blocks using
- // the regular SPMD-mode method.
- CudaBlocksPerGrid = LoopTripCount;
} else {
REPORT("Unknown execution mode: %d\n", KernelInfo->ExecutionMode);
return OFFLOAD_FAIL;
Index: llvm/test/Transforms/OpenMP/spmdization_guarding.ll
===================================================================
--- llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -45,7 +45,7 @@
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
+; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
;.
; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
Index: llvm/test/Transforms/OpenMP/spmdization_assumes.ll
===================================================================
--- llvm/test/Transforms/OpenMP/spmdization_assumes.ll
+++ llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -23,7 +23,7 @@
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
+; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
;.
define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
Index: llvm/test/Transforms/OpenMP/spmdization.ll
===================================================================
--- llvm/test/Transforms/OpenMP/spmdization.ll
+++ llvm/test/Transforms/OpenMP/spmdization.ll
@@ -91,10 +91,10 @@
;.
; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
-; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
-; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
-; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
+; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; AMDGPU: @[[__OMP_OFFLOADING_14_A34CA11_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata"
; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32
@@ -102,10 +102,10 @@
;.
; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
-; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
-; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
-; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
+; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; NVPTX: @[[__OMP_OFFLOADING_14_A34CA11_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [5 x i8*] [i8* @__omp_offloading_14_a34ca11_sequential_loop_l5_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65_exec_mode], section "llvm.metadata"
; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 32
Index: llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
===================================================================
--- llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
+++ llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
@@ -13,7 +13,7 @@
;.
; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
-; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
+; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
===================================================================
--- llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
+++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -11,7 +11,7 @@
; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
+; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
;.
Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp
===================================================================
--- llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -3282,15 +3282,18 @@
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
- assert(ExecMode->getInitializer() &&
- ExecMode->getInitializer()->isOneValue() &&
- "Initially non-SPMD kernel has SPMD exec mode!");
+ assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
// Set the global exec mode flag to indicate SPMD-Generic mode.
- constexpr int SPMDGeneric = 2;
- if (!ExecMode->getInitializer()->isZeroValue())
- ExecMode->setInitializer(
- ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric));
+ constexpr int SPMDGenericMask = 0x3;
+ assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
+ "ExecMode is not an integer!");
+ const int ExecModeVal =
+ cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
+ assert(ExecModeVal & 0x1 &&
+ "Initially non-SPMD kernel has SPMD exec mode!");
+ ExecMode->setInitializer(ConstantInt::get(
+ ExecMode->getInitializer()->getType(), ExecModeVal | SPMDGenericMask));
// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
const int InitIsSPMDArgNo = 1;
Index: clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
+++ clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
@@ -16,10 +16,10 @@
#define HEADER
// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l53}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l53}}_exec_mode = weak constant i8 2
#define N 1000
#define M 10
Index: clang/test/OpenMP/nvptx_target_simd_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_simd_codegen.cpp
+++ clang/test/OpenMP/nvptx_target_simd_codegen.cpp
@@ -16,10 +16,10 @@
#define HEADER
// Check that the execution mode of all 2 target regions on the gpu is set to NonSPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 2
#define N 1000
Index: clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -12,9 +12,9 @@
// CHECK-DAG: [[TRANSFER_STORAGE:@.+]] = weak addrspace([[SHARED_ADDRSPACE:[0-9]+]]) global [32 x i32]
// Check that the execution mode of all 3 target regions is set to Spmd Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 2
template<typename tx>
tx ftemplate(int n) {
Index: clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
+++ clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
@@ -16,9 +16,9 @@
#define HEADER
// Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 0
-// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 2
+// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 2
template<typename tx>
tx ftemplate(int n) {
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1112,11 +1112,11 @@
// warps participate in parallel work.
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
bool Mode) {
- auto *GVMode =
- new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::WeakAnyLinkage,
- llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1),
- Twine(Name, "_exec_mode"));
+ auto *GVMode = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0x2 : 0x1),
+ Twine(Name, "_exec_mode"));
CGM.addCompilerUsedGlobal(GVMode);
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits