https://github.com/amitamd7 updated https://github.com/llvm/llvm-project/pull/144635
>From 1383c0e58feff9aabbffab23dc705c497baa0f2d Mon Sep 17 00:00:00 2001 From: amtiwari <amtiw...@amd.com> Date: Mon, 16 Jun 2025 01:07:01 -0400 Subject: [PATCH] strided_update_offloading with lit-tests added --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 189 ++++++++++-------- .../test/offloading/strided_multiple_update.c | 61 ++++++ .../test/offloading/strided_partial_update.c | 63 ++++++ offload/test/offloading/strided_update.c | 54 +++++ 4 files changed, 282 insertions(+), 85 deletions(-) create mode 100644 offload/test/offloading/strided_multiple_update.c create mode 100644 offload/test/offloading/strided_partial_update.c create mode 100644 offload/test/offloading/strided_update.c diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8ccc37ef98a74..785eb5f6a869d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -490,11 +490,11 @@ enum OpenMPLocationFlags : unsigned { /// member */ /// kmp_int32 reserved_2; /**< not really used in Fortran any more; /// see above */ -///#if USE_ITT_BUILD +/// #if USE_ITT_BUILD /// /* but currently used for storing /// region-specific ITT */ /// /* contextual information. */ -///#endif /* USE_ITT_BUILD */ +/// #endif /* USE_ITT_BUILD */ /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for /// C++ */ /// char const *psource; /**< String describing the source location. @@ -714,16 +714,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, if (DRD) { // Shift the address forward by one element. - llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, - "omp.arraycpy.dest.element"); + llvm::Value *SrcElementNext = + CGF.Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, + /*Idx0=*/1, "omp.arraycpy.dest.element"); SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); } // Shift the address forward by one element. - llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, - "omp.arraycpy.dest.element"); + llvm::Value *DestElementNext = + CGF.Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, + /*Idx0=*/1, "omp.arraycpy.dest.element"); // Check whether we've reached the end. llvm::Value *Done = CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); @@ -973,8 +973,8 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, llvm::Value *PrivatePointer = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivateAddr.emitRawPointer(CGF), SharedAddr.getType()); - llvm::Value *Ptr = CGF.Builder.CreateGEP( - SharedAddr.getElementType(), PrivatePointer, Adjustment); + llvm::Value *Ptr = CGF.Builder.CreateGEP(SharedAddr.getElementType(), + PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue.getAddress(), Ptr); @@ -1599,12 +1599,11 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy), CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), getOrCreateThreadPrivateCache(VD)}; - return Address( - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), - Args), - CGF.Int8Ty, VDAddr.getAlignment()); + return Address(CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), + Args), + CGF.Int8Ty, VDAddr.getAlignment()); } void CGOpenMPRuntime::emitThreadPrivateVarInit( @@ -1629,8 +1628,8 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( } llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( - const VarDecl *VD, Address VDAddr, SourceLocation Loc, - bool PerformInit, CodeGenFunction *CGF) { + const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, + CodeGenFunction *CGF) { if (CGM.getLangOpts().OpenMPUseTLS && CGM.getContext().getTargetInfo().isTLSSupported()) return nullptr; @@ -1692,7 +1691,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, Loc, Loc); - // Create a scope with an artificial location for the body of this function. + // Create a scope with an artificial location for the body of this + // function. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), @@ -1933,8 +1933,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, QualType Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); - CGF.EmitStoreOfScalar(ThreadID, - CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); + CGF.EmitStoreOfScalar(ThreadID, CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); return ThreadIDTemp; } @@ -2435,8 +2434,8 @@ bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, return Schedule == OMP_sch_static_chunked; } -bool CGOpenMPRuntime::isStaticChunked( - OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { +bool CGOpenMPRuntime::isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind, + bool Chunked) const { OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); return Schedule == OMP_dist_sch_static_chunked; } @@ -2600,10 +2599,10 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && "Expected loop-based or sections-based directive."); - llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, - isOpenMPLoopDirective(DKind) - ? OMP_IDENT_WORK_LOOP - : OMP_IDENT_WORK_SECTIONS); + llvm::Value *UpdatedLocation = emitUpdateLocation( + CGF, Loc, + isOpenMPLoopDirective(DKind) ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction = OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, @@ -2678,9 +2677,8 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, - bool IVSigned, Address IL, - Address LB, Address UB, - Address ST) { + bool IVSigned, Address IL, Address LB, + Address UB, Address ST) { // Call __kmpc_dispatch_next( // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, @@ -2858,8 +2856,8 @@ static bool isAllocatableDecl(const VarDecl *VD) { !AA->getAllocator()); } -static RecordDecl * -createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { +static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM, + ArrayRef<PrivateDataTy> Privates) { if (!Privates.empty()) { ASTContext &C = CGM.getContext(); // Build struct .kmp_privates_t. { @@ -3364,7 +3362,6 @@ static bool checkInitIsRequired(CodeGenFunction &CGF, return InitRequired; } - /// Emit task_dup function (for initialization of /// private/firstprivate/lastprivate vars and last_iter flag) /// \code @@ -3721,10 +3718,14 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); - SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), - getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, - SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TaskEntry, KmpRoutineEntryPtrTy)}; + SmallVector<llvm::Value *, 8> AllocArgs = { + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), + TaskFlags, + KmpTaskTWithPrivatesTySize, + SharedsSize, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry, + KmpRoutineEntryPtrTy)}; llvm::Value *NewTask; if (D.hasClausesOfKind<OMPNowaitClause>()) { // Check if we have any device clause associated with the directive. @@ -3915,13 +3916,13 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Copy shareds if there are any. Address KmpTaskSharedsPtr = Address::invalid(); if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { - KmpTaskSharedsPtr = Address( - CGF.EmitLoadOfScalar( - CGF.EmitLValueForField( - TDBase, - *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), - Loc), - CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); + KmpTaskSharedsPtr = + Address(CGF.EmitLoadOfScalar( + CGF.EmitLValueForField( + TDBase, *std::next(KmpTaskTQTyRD->field_begin(), + KmpTaskTShareds)), + Loc), + CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); @@ -4508,7 +4509,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // list is not empty llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; + llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask}; llvm::Value *DepTaskArgs[7]; if (!Data.Dependences.empty()) { DepTaskArgs[0] = UpLoc; @@ -4753,12 +4754,12 @@ static void EmitOMPAggregateReduction( Scope.ForceCleanup(); // Shift the address forward by one element. - llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( - LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, - "omp.arraycpy.dest.element"); - llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( - RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, - "omp.arraycpy.src.element"); + llvm::Value *LHSElementNext = + CGF.Builder.CreateConstGEP1_32(LHSAddr.getElementType(), LHSElementPHI, + /*Idx0=*/1, "omp.arraycpy.dest.element"); + llvm::Value *RHSElementNext = + CGF.Builder.CreateConstGEP1_32(RHSAddr.getElementType(), RHSElementPHI, + /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. llvm::Value *Done = CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); @@ -5708,7 +5709,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); - const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *FlagsFD = addFieldToRecordDecl( @@ -6218,7 +6219,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( /// Checks if the expression is constant or does not have non-trivial function /// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { +static bool isTrivial(ASTContext &Ctx, const Expr *E) { // We can skip constant expressions. // We can skip expressions with trivial calls or simple expressions. return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || @@ -6413,10 +6414,11 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( const auto *CS = D.getInnermostCapturedStmt(); CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + /*isSigned=*/true); } case OMPD_target_teams: case OMPD_target_teams_distribute: @@ -6424,10 +6426,11 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + /*isSigned=*/true); } default: break; @@ -7378,7 +7381,31 @@ class MappableExprsHandler { // dimension. uint64_t DimSize = 1; - bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; + // Detects non-contiguous updates due to strided accesses. + // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set + // correctly when generating information to be passed to the runtime. The + // flag is set to true if any array section has a stride not equal to 1, or + // if the stride is not a constant expression (conservatively assumed + // non-contiguous). + bool IsNonContiguous = [&]() -> bool { + for (const auto &Component : Components) { + const auto *OASE = + dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression()); + if (OASE) { + const Expr *StrideExpr = OASE->getStride(); + if (StrideExpr) { + if (const auto Constant = + StrideExpr->getIntegerConstantExpr(CGF.getContext())) { + if (!Constant->isOne()) { + return true; + } + } + } + } + } + return false; + }(); + bool IsPrevMemberReference = false; bool IsPartialMapped = @@ -7550,8 +7577,8 @@ class MappableExprsHandler { LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), TypeSize.getQuantity() - 1); PartialStruct.HighestElem = { - std::numeric_limits<decltype( - PartialStruct.HighestElem.first)>::max(), + std::numeric_limits< + decltype(PartialStruct.HighestElem.first)>::max(), HB}; PartialStruct.Base = BP; PartialStruct.LB = LB; @@ -8966,7 +8993,8 @@ class MappableExprsHandler { generateInfoForComponentList( MapType, MapModifiers, {}, Components, CombinedInfo, StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, - IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, + IsImplicit, + /*GenerateAllInfoForClauses*/ false, Mapper, /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); IsFirstComponentList = false; } @@ -10133,7 +10161,7 @@ bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) return false; const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); - switch(A->getAllocatorType()) { + switch (A->getAllocatorType()) { case OMPAllocateDeclAttr::OMPNullMemAlloc: case OMPAllocateDeclAttr::OMPDefaultMemAlloc: // Not supported, fallback to the default mem space. @@ -10237,7 +10265,8 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, CGF.CGM.Int32Ty, /* isSigned = */ true) : CGF.Builder.getInt32(0); - // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) + // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, + // thread_limit) llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, ThreadLimitVal}; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( @@ -10546,7 +10575,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( } namespace { - /// Kind of parameter in a function with 'declare simd' directive. +/// Kind of parameter in a function with 'declare simd' directive. enum ParamKindTy { Linear, LinearRef, @@ -10672,18 +10701,10 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, unsigned VecRegSize; }; ISADataTy ISAData[] = { - { - 'b', 128 - }, // SSE - { - 'c', 256 - }, // AVX - { - 'd', 256 - }, // AVX2 - { - 'e', 512 - }, // AVX512 + {'b', 128}, // SSE + {'c', 256}, // AVX + {'d', 256}, // AVX2 + {'e', 512}, // AVX512 }; llvm::SmallVector<char, 2> Masked; switch (State) { @@ -11675,7 +11696,8 @@ Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, FiredField = addFieldToRecordDecl(C, RD, C.CharTy); RD->completeDefinition(); NewType = C.getRecordType(RD); - Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); + Address Addr = + CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); } else { @@ -11684,8 +11706,7 @@ Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, FiredField = std::get<2>(VI->getSecond()); BaseLVal = std::get<3>(VI->getSecond()); } - LValue FiredLVal = - CGF.EmitLValueForField(BaseLVal, FiredField); + LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredField); CGF.EmitStoreOfScalar( llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), FiredLVal); @@ -11872,7 +11893,7 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, assert(It != LastprivateConditionalToTypes[FoundFn].end() && "Lastprivate conditional is not found in outer region."); QualType StructTy = std::get<0>(It->getSecond()); - const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); + const FieldDecl *FiredDecl = std::get<2>(It->getSecond()); LValue PrivLVal = CGF.EmitLValue(FoundE); Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivLVal.getAddress(), @@ -12225,9 +12246,7 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { llvm_unreachable("Not supported in SIMD-only mode"); } -bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { - return false; -} +bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { return false; } void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, diff --git a/offload/test/offloading/strided_multiple_update.c b/offload/test/offloading/strided_multiple_update.c new file mode 100644 index 0000000000000..b089746d56e84 --- /dev/null +++ b/offload/test/offloading/strided_multiple_update.c @@ -0,0 +1,61 @@ +// This test checks that #pragma omp target update from(data1[0:3:4], +// data2[0:2:5]) correctly updates disjoint strided sections of multiple arrays +// from the device to the host. + +// RUN: %libomptarget-compile-run-and-check-generic +#include <omp.h> +#include <stdio.h> + +int main() { + int len = 12; + double data1[len], data2[len]; + +// Initial values +#pragma omp target map(tofrom : data1[0 : len], data2[0 : len]) + { + for (int i = 0; i < len; i++) { + data1[i] = i; + data2[i] = i * 10; + } + } + + printf("original host array values:\n"); + printf("data1: "); + for (int i = 0; i < len; i++) + printf("%.1f ", data1[i]); + printf("\ndata2: "); + for (int i = 0; i < len; i++) + printf("%.1f ", data2[i]); + printf("\n\n"); + +#pragma omp target data map(to : data1[0 : len], data2[0 : len]) + { +// Modify arrays on device +#pragma omp target + { + for (int i = 0; i < len; i++) + data1[i] += i; + for (int i = 0; i < len; i++) + data2[i] += 100; + } + +// data1[0:3:4] // indices 0,4,8 +// data2[0:2:5] // indices 0,5 +#pragma omp target update from(data1[0 : 3 : 4], data2[0 : 2 : 5]) + } + + printf("device array values after update from:\n"); + printf("data1: "); + for (int i = 0; i < len; i++) + printf("%.1f ", data1[i]); + printf("\ndata2: "); + for (int i = 0; i < len; i++) + printf("%.1f ", data2[i]); + printf("\n\n"); + + // CHECK: data1: 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 + // CHECK: data2: 0.0 10.0 20.0 30.0 40.0 50.0 60.0 70.0 80.0 90.0 100.0 110.0 + + // CHECK: data1: 0.0 1.0 2.0 3.0 8.0 5.0 6.0 7.0 16.0 9.0 10.0 11.0 + // CHECK: data2: 100.0 10.0 20.0 30.0 40.0 150.0 60.0 70.0 80.0 90.0 100.0 110.0 +} diff --git a/offload/test/offloading/strided_partial_update.c b/offload/test/offloading/strided_partial_update.c new file mode 100644 index 0000000000000..0a28caf4cb401 --- /dev/null +++ b/offload/test/offloading/strided_partial_update.c @@ -0,0 +1,63 @@ +// This test checks that #pragma omp target update from(data[0:4:3]) correctly updates +// every third element (stride 3) from the device to the host, partially across +// the array + +// RUN: %libomptarget-compile-run-and-check-generic +#include <omp.h> +#include <stdio.h> + +int main() { + int len = 11; + double data[len]; + +#pragma omp target map(tofrom : data[0 : len]) + { + for (int i = 0; i < len; i++) + data[i] = i; + } + + // initial values + printf("original host array values:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + +#pragma omp target data map(to : data[0 : len]) + { +// Modify arrays on device +#pragma omp target + for (int i = 0; i < len; i++) + data[i] += i; + +#pragma omp target update from(data[0 : 4 : 3]) // indices 0,3,6,9 + } + + printf("device array values after update from:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + + // CHECK: 0.000000 + // CHECK: 1.000000 + // CHECK: 2.000000 + // CHECK: 3.000000 + // CHECK: 4.000000 + // CHECK: 5.000000 + // CHECK: 6.000000 + // CHECK: 7.000000 + // CHECK: 8.000000 + // CHECK: 9.000000 + // CHECK: 10.000000 + + // CHECK: 0.000000 + // CHECK: 1.000000 + // CHECK: 2.000000 + // CHECK: 6.000000 + // CHECK: 4.000000 + // CHECK: 5.000000 + // CHECK: 12.000000 + // CHECK: 7.000000 + // CHECK: 8.000000 + // CHECK: 18.000000 + // CHECK: 10.000000 +} diff --git a/offload/test/offloading/strided_update.c b/offload/test/offloading/strided_update.c new file mode 100644 index 0000000000000..6626a3286063d --- /dev/null +++ b/offload/test/offloading/strided_update.c @@ -0,0 +1,54 @@ +// This test checks that "update from" clause in OpenMP is supported when the +// elements are updated in a non-contiguous manner. This test checks that +// #pragma omp target update from(data[0:4:2]) correctly updates only every +// other element (stride 2) from the device to the host + +// RUN: %libomptarget-compile-run-and-check-generic +#include <omp.h> +#include <stdio.h> + +int main() { + int len = 8; + double data[len]; +#pragma omp target map(tofrom : len, data[0 : len]) + { + for (int i = 0; i < len; i++) { + data[i] = i; + } + } + // Initial values + printf("original host array values:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + +#pragma omp target data map(to : len, data[0 : len]) + { +// Modify arrays on device +#pragma omp target + for (int i = 0; i < len; i++) { + data[i] += i; + } + +#pragma omp target update from(data[0 : 4 : 2]) + } + // CHECK: 0.000000 + // CHECK: 1.000000 + // CHECK: 4.000000 + // CHECK: 3.000000 + // CHECK: 8.000000 + // CHECK: 5.000000 + // CHECK: 12.000000 + // CHECK: 7.000000 + // CHECK-NOT: 2.000000 + // CHECK-NOT: 6.000000 + // CHECK-NOT: 10.000000 + // CHECK-NOT: 14.000000 + + printf("from target array results:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + + return 0; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits