Hahnfeld created this revision. Herald added a subscriber: jholewinski. In the future the compiler will analyze whether the OpenMP runtime needs to be (fully) initialized and avoid that overhead if possible. The functions already take an argument to transfer that information to the runtime, so pass in the default value 1. (This is needed for binary compatibility with libomptarget-nvptx currently being upstreamed.)
https://reviews.llvm.org/D40354 Files: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp test/OpenMP/nvptx_parallel_codegen.cpp test/OpenMP/nvptx_target_codegen.cpp test/OpenMP/nvptx_target_teams_codegen.cpp test/OpenMP/nvptx_teams_reduction_codegen.cpp
Index: test/OpenMP/nvptx_teams_reduction_codegen.cpp =================================================================== --- test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -84,7 +84,7 @@ // CHECK: br label %[[EXIT]] // // CHECK: [[EXIT]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // // Reduction function @@ -360,7 +360,7 @@ // CHECK: br label %[[EXIT]] // // CHECK: [[EXIT]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // // Reduction function @@ -776,7 +776,7 @@ // CHECK: br label %[[EXIT]] // // CHECK: [[EXIT]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // // Reduction function Index: test/OpenMP/nvptx_target_teams_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_teams_codegen.cpp +++ test/OpenMP/nvptx_target_teams_codegen.cpp @@ -125,7 +125,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -211,7 +211,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // Index: test/OpenMP/nvptx_target_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_codegen.cpp +++ test/OpenMP/nvptx_target_codegen.cpp @@ -91,7 +91,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -168,7 +168,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -278,7 +278,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -441,7 +441,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -531,7 +531,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -616,7 +616,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // Index: test/OpenMP/nvptx_parallel_codegen.cpp =================================================================== --- test/OpenMP/nvptx_parallel_codegen.cpp +++ test/OpenMP/nvptx_parallel_codegen.cpp @@ -166,7 +166,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -303,7 +303,7 @@ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -22,19 +22,21 @@ namespace { enum OpenMPRTLFunctionNVPTX { - /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit); + /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit, + /// int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_kernel_init, - /// \brief Call to void __kmpc_kernel_deinit(); + /// \brief Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_deinit, /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - /// short RequiresOMPRuntime, short RequiresDataSharing); + /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); OMPRTL_NVPTX__kmpc_spmd_kernel_init, /// \brief Call to void __kmpc_spmd_kernel_deinit(); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, /// \brief Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function); + /// *outlined_function, void ***args, kmp_int32 nArgs); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function); + /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void + /// ***args); OMPRTL_NVPTX__kmpc_kernel_parallel, /// \brief Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -355,7 +357,8 @@ CGF.EmitBlock(MasterBB); // First action in sequential region: // Initialize the state of the OpenMP runtime library on the GPU. - llvm::Value *Args[] = {getThreadLimit(CGF)}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = {getThreadLimit(CGF), Bld.getInt16(/*RequiresOMPRuntime=*/1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); } @@ -370,8 +373,10 @@ CGF.EmitBlock(TerminateBB); // Signal termination condition. + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), None); + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -597,23 +602,24 @@ llvm::Constant *RTLFn = nullptr; switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) { case OMPRTL_NVPTX__kmpc_kernel_init: { - // Build void __kmpc_kernel_init(kmp_int32 thread_limit); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; + // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); break; } case OMPRTL_NVPTX__kmpc_kernel_deinit: { - // Build void __kmpc_kernel_deinit(); + // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); break; } case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - // short RequiresOMPRuntime, short RequiresDataSharing); + // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits