================
@@ -4645,12 +4789,23 @@ initTargetDefaultAttrs(omp::TargetOp targetOp,
Operation *capturedOp,
(maxThreadsVal >= 0 && maxThreadsVal < combinedMaxThreadsVal))
combinedMaxThreadsVal = maxThreadsVal;
+ // Calculate reduction data size, limited to single reduction variable for
+ // now.
+ int32_t reductionDataSize = 0;
+ if (isGPU && capturedOp) {
+ if (auto teamsOp = castOrGetParentOfType<omp::TeamsOp>(capturedOp))
+ reductionDataSize = getReductionDataSize(teamsOp);
+ }
+
// Update kernel bounds structure for the `OpenMPIRBuilder` to use.
attrs.ExecFlags = targetOp.getKernelExecFlags(capturedOp);
attrs.MinTeams = minTeamsVal;
attrs.MaxTeams.front() = maxTeamsVal;
attrs.MinThreads = 1;
attrs.MaxThreads.front() = combinedMaxThreadsVal;
+ attrs.ReductionDataSize = reductionDataSize;
+ if (attrs.ReductionDataSize != 0)
+ attrs.ReductionBufferLength = 1024;
----------------
skatrak wrote:
Nit: Could you add some comment to document the reasoning for this size, or add
a TODO to actually calculate this based on the actual reductions?
https://github.com/llvm/llvm-project/pull/133310
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits