Hahnfeld created this revision. Hahnfeld added reviewers: ABataev, gtbercea. Herald added subscribers: cfe-commits, guansong, jholewinski.
Worker threads fork off to the compiler generated worker function directly after entering the kernel function. Hence, there is no need to check whether the current thread is the master if we are outside of a parallel region (neither SPMD nor parallel_level > 0). Repository: rC Clang https://reviews.llvm.org/D52732 Files: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp test/OpenMP/nvptx_target_codegen.cpp
Index: test/OpenMP/nvptx_target_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_codegen.cpp +++ test/OpenMP/nvptx_target_codegen.cpp @@ -557,7 +557,6 @@ // CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]], // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* - // CHECK: [[GTID_ADDR:%.+]] = alloca i32, // CHECK: store i32 0, i32* [[ZERO_ADDR]] // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode() // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0 @@ -583,9 +582,6 @@ // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]]) // CHECK: br label - // CHECK: icmp eq i32 - // CHECK: br i1 - // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2) // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]], @@ -597,10 +593,6 @@ // CHECK: call void @__kmpc_end_sharing_variables() // CHECK: br label - // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]], - // CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) - // CHECK: br label - // CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]], // CHECK: store i32 [[RES]], i32* [[RET:%.+]], // CHECK: br i1 [[IS_SPMD]], label Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -2207,30 +2207,24 @@ Work.emplace_back(WFn); }; - auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen, - &ThreadIDAddr](CodeGenFunction &CGF, - PrePostActionTy &Action) { - RegionCodeGenTy RCG(CodeGen); + auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen]( + CodeGenFunction &CGF, PrePostActionTy &Action) { if (IsInParallelRegion) { SeqGen(CGF, Action); } else if (IsInTargetMasterThreadRegion) { L0ParallelGen(CGF, Action); - } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) { - RCG(CGF); } else { // Check for master and then parallelism: // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) { - // Serialized execution. - // } else if (master) { - // Worker call. + // Serialized execution. // } else { - // Outlined function call. + // Worker call. // } CGBuilderTy &Bld = CGF.Builder; llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); - llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); + llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); @@ -2243,29 +2237,17 @@ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); - Bld.CreateCondBr(Res, SeqBB, MasterCheckBB); + Bld.CreateCondBr(Res, SeqBB, MasterBB); CGF.EmitBlock(SeqBB); SeqGen(CGF, Action); CGF.EmitBranch(ExitBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(MasterCheckBB); - llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock("master.then"); - llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); - llvm::Value *IsMaster = - Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); - Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock); - CGF.EmitBlock(MasterThenBB); + CGF.EmitBlock(MasterBB); L0ParallelGen(CGF, Action); CGF.EmitBranch(ExitBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ElseBlock); - // In the worker need to use the real thread id. - ThreadIDAddr = emitThreadIDAddress(CGF, Loc); - RCG(CGF); - // There is no need to emit line number for unconditional branch. - (void)ApplyDebugLocation::CreateEmpty(CGF); // Emit the continuation block for code after the if. CGF.EmitBlock(ExitBB, /*IsFinished=*/true); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits