================ @@ -5229,13 +5362,288 @@ static void emitTargetOutlinedFunction( OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, true, OutlinedFn, OutlinedFnID); } +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask( + Function *OutlinedFn, Value *OutlinedFnID, + EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, + Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, + SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies, + bool HasNoWait) { + + // When we arrive at this function, the target region itself has been + // outlined into the function OutlinedFn. + // So at ths point, for + // -------------------------------------------------- + // void user_code_that_offloads(...) { + // omp target depend(..) map(from:a) map(to:b, c) + // a = b + c + // } + // + // -------------------------------------------------- + // + // we have + // + // -------------------------------------------------- + // + // void user_code_that_offloads(...) { + // %.offload_baseptrs = alloca [3 x ptr], align 8 + // %.offload_ptrs = alloca [3 x ptr], align 8 + // %.offload_mappers = alloca [3 x ptr], align 8 + // ;; target region has been outlined and now we need to + // ;; offload to it via a target task. + // } + // void outlined_device_function(ptr a, ptr b, ptr c) { + // *a = *b + *c + // } + // + // We have to now do the following + // (i) Make an offloading call to outlined_device_function using the OpenMP + // RTL. See 'kernel_launch_function' in the pseudo code below. This is + // emitted by emitKernelLaunch + // (ii) Create a task entry point function that calls kernel_launch_function + // and is the entry point for the target task. See + // '@.omp_target_task_proxy_func in the pseudocode below. + // (iii) Create a task with the task entry point created in (ii) + // + // That is we create the following + // + // void user_code_that_offloads(...) { + // %.offload_baseptrs = alloca [3 x ptr], align 8 + // %.offload_ptrs = alloca [3 x ptr], align 8 + // %.offload_mappers = alloca [3 x ptr], align 8 + // + // %structArg = alloca { ptr, ptr, ptr }, align 8 + // %strucArg[0] = %.offload_baseptrs + // %strucArg[1] = %.offload_ptrs + // %strucArg[2] = %.offload_mappers + // proxy_target_task = @__kmpc_omp_task_alloc(..., + // @.omp_target_task_proxy_func) + // memcpy(proxy_target_task->shareds, %structArg, sizeof(structArg)) + // dependencies_array = ... + // ;; if nowait not present + // call @__kmpc_omp_wait_deps(..., dependencies_array) + // call @__kmpc_omp_task_begin_if0(...) + // call @ @.omp_target_task_proxy_func(i32 thread_id, ptr + // %proxy_target_task) call @__kmpc_omp_task_complete_if0(...) + // } + // + // define internal void @.omp_target_task_proxy_func(i32 %thread.id, + // ptr %task) { + // %structArg = alloca {ptr, ptr, ptr} + // %shared_data = load (getelementptr %task, 0, 0) + // mempcy(%structArg, %shared_data, sizeof(structArg)) + // kernel_launch_function(%thread.id, %structArg) + // } + // + // We need the proxy function because the signature of the task entry point + // expected by kmpc_omp_task is always the same and will be different from + // that of the kernel_launch function. + // + // kernel_launch_function is generated by emitKernelLaunch and has the + // always_inline attribute. void kernel_launch_function(thread_id, + // structArg) + // alwaysinline { + // %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8 + // offload_baseptrs = load(getelementptr structArg, 0, 0) + // offload_ptrs = load(getelementptr structArg, 0, 1) + // offload_mappers = load(getelementptr structArg, 0, 2) + // ; setup kernel_args using offload_baseptrs, offload_ptrs and + // ; offload_mappers + // call i32 @__tgt_target_kernel(..., + // outlined_device_function, + // ptr %kernel_args) + // } + // void outlined_device_function(ptr a, ptr b, ptr c) { + // *a = *b + *c + // } + // + BasicBlock *TargetTaskBodyBB = + splitBB(Builder, /*CreateBranch=*/true, "target.task.body"); + BasicBlock *TargetTaskAllocaBB = + splitBB(Builder, /*CreateBranch=*/true, "target.task.alloca"); + + InsertPointTy TargetTaskAllocaIP = + InsertPointTy(TargetTaskAllocaBB, TargetTaskAllocaBB->begin()); + InsertPointTy TargetTaskBodyIP = + InsertPointTy(TargetTaskBodyBB, TargetTaskBodyBB->begin()); + + OutlineInfo OI; + OI.EntryBB = TargetTaskAllocaBB; + OI.OuterAllocaBB = AllocaIP.getBlock(); + + // Add the thread ID argument. + std::stack<Instruction *> ToBeDeleted; + OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal( + Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP, "global.tid", false)); + + Builder.restoreIP(TargetTaskBodyIP); + + // emitKernelLaunch makes the necessary runtime call to offload the kernel. + // We then outline all that code into a separate function + // ('kernel_launch_function' in the pseudo code above). This function is then + // called by the target task proxy function (see + // '@.omp_target_task_proxy_func' in the pseudo code above) + // "@.omp_target_task_proxy_func' is generated by emitProxyTaskFunction + Builder.restoreIP(emitKernelLaunch(Builder, OutlinedFn, OutlinedFnID, + EmitTargetCallFallbackCB, Args, DeviceID, + RTLoc, TargetTaskAllocaIP)); + + OI.ExitBB = Builder.saveIP().getBlock(); + OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, ---------------- ergawy wrote:
I there are pieces in this lambda that can be outlined into shared utils and called from here and `OpenMPIRBuilder::createTask`. For example, the logic to setup `TaskData` (but, other parts as well). https://github.com/llvm/llvm-project/pull/93977 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits