ABataev created this revision. ABataev added reviewers: jdoerfert, mikerice. Herald added subscribers: guansong, yaxunl. Herald added a project: All. ABataev requested review of this revision. Herald added a subscriber: sstefan1. Herald added a project: clang.
Need to emit final update of the inscan reduction variables. For worksharing loops, the reduction values are stored in the temp array, need to copy the last element to the original var at the end of the construct. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D121156 Files: clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/parallel_for_scan_codegen.cpp
Index: clang/test/OpenMP/parallel_for_scan_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_for_scan_codegen.cpp +++ clang/test/OpenMP/parallel_for_scan_codegen.cpp @@ -27,6 +27,13 @@ // CHECK: [[B_BUF:%.+]] = alloca double, i64 10, // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( + // CHECK: [[LAST:%.+]] = mul nsw i64 9, % + // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]] + // CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false) + // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9 + // CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]], + // CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b, // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]] Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3511,6 +3511,57 @@ } } +/// Copies final inscan reductions values to the original variables. +/// The code is the following: +/// \code +/// <orig_var> = buffer[num_iters-1]; +/// \endcode +static void emitScanBasedDirectiveFinals( + CodeGenFunction &CGF, const OMPLoopDirective &S, + llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { + llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( + NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> CopyOps; + SmallVector<const Expr *, 4> CopyArrayElems; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + assert(C->getModifier() == OMPC_REDUCTION_inscan && + "Only inscan reductions are expected."); + Shareds.append(C->varlist_begin(), C->varlist_end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + Privates.append(C->privates().begin(), C->privates().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + // Create temp var and copy LHS value to this temp value. + // LHS = TMP[LastIter]; + llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( + OMPScanNumIterations, + llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(OMPLast)); + LValue DestLVal = CGF.EmitLValue(OrigExpr); + LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); + CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF), + SrcLVal.getAddress(CGF), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } +} + /// Emits the code for the directive with inscan reductions. /// The code is the following: /// \code @@ -3705,6 +3756,8 @@ if (!isOpenMPParallelDirective(S.getDirectiveKind())) emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); + if (!isOpenMPParallelDirective(S.getDirectiveKind())) + emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); } else { CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), HasCancel); @@ -4289,23 +4342,25 @@ (void)emitWorksharingDirective(CGF, S, S.hasCancel()); }; { - if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), [](const OMPReductionClause *C) { return C->getModifier() == OMPC_REDUCTION_inscan; - })) { - const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); - CGCapturedStmtInfo CGSI(CR_OpenMP); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); - OMPLoopScope LoopScope(CGF, S); - return CGF.EmitScalarExpr(S.getNumIterations()); - }; + }); + if (IsInscan) emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); - } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, emitEmptyBoundParameters); + if (IsInscan) + emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); @@ -4320,23 +4375,25 @@ (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; { - if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), [](const OMPReductionClause *C) { return C->getModifier() == OMPC_REDUCTION_inscan; - })) { - const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); - CGCapturedStmtInfo CGSI(CR_OpenMP); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); - OMPLoopScope LoopScope(CGF, S); - return CGF.EmitScalarExpr(S.getNumIterations()); - }; + }); + if (IsInscan) emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); - } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, emitEmptyBoundParameters); + if (IsInscan) + emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits