https://github.com/amitamd7 updated 
https://github.com/llvm/llvm-project/pull/144635

>From 1383c0e58feff9aabbffab23dc705c497baa0f2d Mon Sep 17 00:00:00 2001
From: amtiwari <amtiw...@amd.com>
Date: Mon, 16 Jun 2025 01:07:01 -0400
Subject: [PATCH] strided_update_offloading with lit-tests added

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 189 ++++++++++--------
 .../test/offloading/strided_multiple_update.c |  61 ++++++
 .../test/offloading/strided_partial_update.c  |  63 ++++++
 offload/test/offloading/strided_update.c      |  54 +++++
 4 files changed, 282 insertions(+), 85 deletions(-)
 create mode 100644 offload/test/offloading/strided_multiple_update.c
 create mode 100644 offload/test/offloading/strided_partial_update.c
 create mode 100644 offload/test/offloading/strided_update.c

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8ccc37ef98a74..785eb5f6a869d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -490,11 +490,11 @@ enum OpenMPLocationFlags : unsigned {
 ///                                  member  */
 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
 ///                                  see above */
-///#if USE_ITT_BUILD
+/// #if USE_ITT_BUILD
 ///                            /*  but currently used for storing
 ///                                region-specific ITT */
 ///                            /*  contextual information. */
-///#endif /* USE_ITT_BUILD */
+/// #endif /* USE_ITT_BUILD */
 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
 ///                                 C++  */
 ///    char const *psource;    /**< String describing the source location.
@@ -714,16 +714,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, 
Address DestAddr,
 
   if (DRD) {
     // Shift the address forward by one element.
-    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
-        SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
-        "omp.arraycpy.dest.element");
+    llvm::Value *SrcElementNext =
+        CGF.Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
+                                       /*Idx0=*/1, 
"omp.arraycpy.dest.element");
     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
   }
 
   // Shift the address forward by one element.
-  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
-      DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
-      "omp.arraycpy.dest.element");
+  llvm::Value *DestElementNext =
+      CGF.Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
+                                     /*Idx0=*/1, "omp.arraycpy.dest.element");
   // Check whether we've reached the end.
   llvm::Value *Done =
       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
@@ -973,8 +973,8 @@ Address 
ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
     llvm::Value *PrivatePointer =
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
             PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
-    llvm::Value *Ptr = CGF.Builder.CreateGEP(
-        SharedAddr.getElementType(), PrivatePointer, Adjustment);
+    llvm::Value *Ptr = CGF.Builder.CreateGEP(SharedAddr.getElementType(),
+                                             PrivatePointer, Adjustment);
     return castToBase(CGF, OrigVD->getType(),
                       SharedAddresses[N].first.getType(),
                       OriginalBaseLValue.getAddress(), Ptr);
@@ -1599,12 +1599,11 @@ Address 
CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
       getOrCreateThreadPrivateCache(VD)};
-  return Address(
-      CGF.EmitRuntimeCall(
-          OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
-          Args),
-      CGF.Int8Ty, VDAddr.getAlignment());
+  return Address(CGF.EmitRuntimeCall(
+                     OMPBuilder.getOrCreateRuntimeFunction(
+                         CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
+                     Args),
+                 CGF.Int8Ty, VDAddr.getAlignment());
 }
 
 void CGOpenMPRuntime::emitThreadPrivateVarInit(
@@ -1629,8 +1628,8 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
 }
 
 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
-    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
-    bool PerformInit, CodeGenFunction *CGF) {
+    const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
+    CodeGenFunction *CGF) {
   if (CGM.getLangOpts().OpenMPUseTLS &&
       CGM.getContext().getTargetInfo().isTLSSupported())
     return nullptr;
@@ -1692,7 +1691,8 @@ llvm::Function 
*CGOpenMPRuntime::emitThreadPrivateVarDefinition(
       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 
Args,
                             Loc, Loc);
-      // Create a scope with an artificial location for the body of this 
function.
+      // Create a scope with an artificial location for the body of this
+      // function.
       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
           DtorCGF.GetAddrOfLocalVar(&Dst),
@@ -1933,8 +1933,7 @@ Address 
CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
   QualType Int32Ty =
       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ 
true);
   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ 
".threadid_temp.");
-  CGF.EmitStoreOfScalar(ThreadID,
-                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
+  CGF.EmitStoreOfScalar(ThreadID, CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
 
   return ThreadIDTemp;
 }
@@ -2435,8 +2434,8 @@ bool 
CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
   return Schedule == OMP_sch_static_chunked;
 }
 
-bool CGOpenMPRuntime::isStaticChunked(
-    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+bool CGOpenMPRuntime::isStaticChunked(OpenMPDistScheduleClauseKind 
ScheduleKind,
+                                      bool Chunked) const {
   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
   return Schedule == OMP_dist_sch_static_chunked;
 }
@@ -2600,10 +2599,10 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction 
&CGF,
       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
          "Expected loop-based or sections-based directive.");
-  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
-                                             isOpenMPLoopDirective(DKind)
-                                                 ? OMP_IDENT_WORK_LOOP
-                                                 : OMP_IDENT_WORK_SECTIONS);
+  llvm::Value *UpdatedLocation = emitUpdateLocation(
+      CGF, Loc,
+      isOpenMPLoopDirective(DKind) ? OMP_IDENT_WORK_LOOP
+                                   : OMP_IDENT_WORK_SECTIONS);
   llvm::Value *ThreadId = getThreadID(CGF, Loc);
   llvm::FunctionCallee StaticInitFunction =
       OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
@@ -2678,9 +2677,8 @@ void 
CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
 
 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
                                           SourceLocation Loc, unsigned IVSize,
-                                          bool IVSigned, Address IL,
-                                          Address LB, Address UB,
-                                          Address ST) {
+                                          bool IVSigned, Address IL, Address 
LB,
+                                          Address UB, Address ST) {
   // Call __kmpc_dispatch_next(
   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
@@ -2858,8 +2856,8 @@ static bool isAllocatableDecl(const VarDecl *VD) {
            !AA->getAllocator());
 }
 
-static RecordDecl *
-createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) 
{
+static RecordDecl *createPrivatesRecordDecl(CodeGenModule &CGM,
+                                            ArrayRef<PrivateDataTy> Privates) {
   if (!Privates.empty()) {
     ASTContext &C = CGM.getContext();
     // Build struct .kmp_privates_t. {
@@ -3364,7 +3362,6 @@ static bool checkInitIsRequired(CodeGenFunction &CGF,
   return InitRequired;
 }
 
-
 /// Emit task_dup function (for initialization of
 /// private/firstprivate/lastprivate vars and last_iter flag)
 /// \code
@@ -3721,10 +3718,14 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, 
SourceLocation Loc,
           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
-  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
-      getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
-      SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-          TaskEntry, KmpRoutineEntryPtrTy)};
+  SmallVector<llvm::Value *, 8> AllocArgs = {
+      emitUpdateLocation(CGF, Loc),
+      getThreadID(CGF, Loc),
+      TaskFlags,
+      KmpTaskTWithPrivatesTySize,
+      SharedsSize,
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
+                                                      KmpRoutineEntryPtrTy)};
   llvm::Value *NewTask;
   if (D.hasClausesOfKind<OMPNowaitClause>()) {
     // Check if we have any device clause associated with the directive.
@@ -3915,13 +3916,13 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, 
SourceLocation Loc,
   // Copy shareds if there are any.
   Address KmpTaskSharedsPtr = Address::invalid();
   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
-    KmpTaskSharedsPtr = Address(
-        CGF.EmitLoadOfScalar(
-            CGF.EmitLValueForField(
-                TDBase,
-                *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
-            Loc),
-        CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
+    KmpTaskSharedsPtr =
+        Address(CGF.EmitLoadOfScalar(
+                    CGF.EmitLValueForField(
+                        TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
+                                           KmpTaskTShareds)),
+                    Loc),
+                CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
@@ -4508,7 +4509,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, 
SourceLocation Loc,
   // list is not empty
   llvm::Value *ThreadID = getThreadID(CGF, Loc);
   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
-  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
+  llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
   llvm::Value *DepTaskArgs[7];
   if (!Data.Dependences.empty()) {
     DepTaskArgs[0] = UpLoc;
@@ -4753,12 +4754,12 @@ static void EmitOMPAggregateReduction(
   Scope.ForceCleanup();
 
   // Shift the address forward by one element.
-  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
-      LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
-      "omp.arraycpy.dest.element");
-  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
-      RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
-      "omp.arraycpy.src.element");
+  llvm::Value *LHSElementNext =
+      CGF.Builder.CreateConstGEP1_32(LHSAddr.getElementType(), LHSElementPHI,
+                                     /*Idx0=*/1, "omp.arraycpy.dest.element");
+  llvm::Value *RHSElementNext =
+      CGF.Builder.CreateConstGEP1_32(RHSAddr.getElementType(), RHSElementPHI,
+                                     /*Idx0=*/1, "omp.arraycpy.src.element");
   // Check whether we've reached the end.
   llvm::Value *Done =
       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
@@ -5708,7 +5709,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
-  const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   const FieldDecl *FlagsFD = addFieldToRecordDecl(
@@ -6218,7 +6219,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
 
 /// Checks if the expression is constant or does not have non-trivial function
 /// calls.
-static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+static bool isTrivial(ASTContext &Ctx, const Expr *E) {
   // We can skip constant expressions.
   // We can skip expressions with trivial calls or simple expressions.
   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
@@ -6413,10 +6414,11 @@ llvm::Value 
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
       const auto *CS = D.getInnermostCapturedStmt();
       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-      llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
-                                                  /*IgnoreResultAssign*/ true);
+      llvm::Value *NumTeamsVal =
+          CGF.EmitScalarExpr(NumTeams,
+                             /*IgnoreResultAssign*/ true);
       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
-                             /*isSigned=*/true);
+                               /*isSigned=*/true);
     }
     case OMPD_target_teams:
     case OMPD_target_teams_distribute:
@@ -6424,10 +6426,11 @@ llvm::Value 
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
     case OMPD_target_teams_distribute_parallel_for:
     case OMPD_target_teams_distribute_parallel_for_simd: {
       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
-      llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
-                                                  /*IgnoreResultAssign*/ true);
+      llvm::Value *NumTeamsVal =
+          CGF.EmitScalarExpr(NumTeams,
+                             /*IgnoreResultAssign*/ true);
       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
-                             /*isSigned=*/true);
+                               /*isSigned=*/true);
     }
     default:
       break;
@@ -7378,7 +7381,31 @@ class MappableExprsHandler {
     // dimension.
     uint64_t DimSize = 1;
 
-    bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
+    // Detects non-contiguous updates due to strided accesses.
+    // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
+    // correctly when generating information to be passed to the runtime. The
+    // flag is set to true if any array section has a stride not equal to 1, or
+    // if the stride is not a constant expression (conservatively assumed
+    // non-contiguous).
+    bool IsNonContiguous = [&]() -> bool {
+      for (const auto &Component : Components) {
+        const auto *OASE =
+            dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
+        if (OASE) {
+          const Expr *StrideExpr = OASE->getStride();
+          if (StrideExpr) {
+            if (const auto Constant =
+                    StrideExpr->getIntegerConstantExpr(CGF.getContext())) {
+              if (!Constant->isOne()) {
+                return true;
+              }
+            }
+          }
+        }
+      }
+      return false;
+    }();
+
     bool IsPrevMemberReference = false;
 
     bool IsPartialMapped =
@@ -7550,8 +7577,8 @@ class MappableExprsHandler {
                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
               TypeSize.getQuantity() - 1);
           PartialStruct.HighestElem = {
-              std::numeric_limits<decltype(
-                  PartialStruct.HighestElem.first)>::max(),
+              std::numeric_limits<
+                  decltype(PartialStruct.HighestElem.first)>::max(),
               HB};
           PartialStruct.Base = BP;
           PartialStruct.LB = LB;
@@ -8966,7 +8993,8 @@ class MappableExprsHandler {
       generateInfoForComponentList(
           MapType, MapModifiers, {}, Components, CombinedInfo,
           StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
-          IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
+          IsImplicit,
+          /*GenerateAllInfoForClauses*/ false, Mapper,
           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
       IsFirstComponentList = false;
     }
@@ -10133,7 +10161,7 @@ bool 
CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
     return false;
   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
-  switch(A->getAllocatorType()) {
+  switch (A->getAllocatorType()) {
   case OMPAllocateDeclAttr::OMPNullMemAlloc:
   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
   // Not supported, fallback to the default mem space.
@@ -10237,7 +10265,8 @@ void 
CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
           : CGF.Builder.getInt32(0);
 
-  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, 
thread_limit)
+  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams,
+  // thread_limit)
   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
                                      ThreadLimitVal};
   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
@@ -10546,7 +10575,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
 }
 
 namespace {
-  /// Kind of parameter in a function with 'declare simd' directive.
+/// Kind of parameter in a function with 'declare simd' directive.
 enum ParamKindTy {
   Linear,
   LinearRef,
@@ -10672,18 +10701,10 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, 
llvm::Function *Fn,
     unsigned VecRegSize;
   };
   ISADataTy ISAData[] = {
-      {
-          'b', 128
-      }, // SSE
-      {
-          'c', 256
-      }, // AVX
-      {
-          'd', 256
-      }, // AVX2
-      {
-          'e', 512
-      }, // AVX512
+      {'b', 128}, // SSE
+      {'c', 256}, // AVX
+      {'d', 256}, // AVX2
+      {'e', 512}, // AVX512
   };
   llvm::SmallVector<char, 2> Masked;
   switch (State) {
@@ -11675,7 +11696,8 @@ Address 
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
     RD->completeDefinition();
     NewType = C.getRecordType(RD);
-    Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), 
VD->getName());
+    Address Addr =
+        CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
   } else {
@@ -11684,8 +11706,7 @@ Address 
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
     FiredField = std::get<2>(VI->getSecond());
     BaseLVal = std::get<3>(VI->getSecond());
   }
-  LValue FiredLVal =
-      CGF.EmitLValueForField(BaseLVal, FiredField);
+  LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredField);
   CGF.EmitStoreOfScalar(
       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
       FiredLVal);
@@ -11872,7 +11893,7 @@ void 
CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
            "Lastprivate conditional is not found in outer region.");
     QualType StructTy = std::get<0>(It->getSecond());
-    const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
+    const FieldDecl *FiredDecl = std::get<2>(It->getSecond());
     LValue PrivLVal = CGF.EmitLValue(FoundE);
     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
         PrivLVal.getAddress(),
@@ -12225,9 +12246,7 @@ bool 
CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
   llvm_unreachable("Not supported in SIMD-only mode");
 }
 
-bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
-  return false;
-}
+bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { return false; }
 
 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
                                         const OMPExecutableDirective &D,
diff --git a/offload/test/offloading/strided_multiple_update.c 
b/offload/test/offloading/strided_multiple_update.c
new file mode 100644
index 0000000000000..b089746d56e84
--- /dev/null
+++ b/offload/test/offloading/strided_multiple_update.c
@@ -0,0 +1,61 @@
+// This test checks that #pragma omp target update from(data1[0:3:4],
+// data2[0:2:5]) correctly updates disjoint strided sections of multiple arrays
+// from the device to the host.
+
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+  int len = 12;
+  double data1[len], data2[len];
+
+// Initial values
+#pragma omp target map(tofrom : data1[0 : len], data2[0 : len])
+  {
+    for (int i = 0; i < len; i++) {
+      data1[i] = i;
+      data2[i] = i * 10;
+    }
+  }
+
+  printf("original host array values:\n");
+  printf("data1: ");
+  for (int i = 0; i < len; i++)
+    printf("%.1f ", data1[i]);
+  printf("\ndata2: ");
+  for (int i = 0; i < len; i++)
+    printf("%.1f ", data2[i]);
+  printf("\n\n");
+
+#pragma omp target data map(to : data1[0 : len], data2[0 : len])
+  {
+// Modify arrays on device
+#pragma omp target
+    {
+      for (int i = 0; i < len; i++)
+        data1[i] += i;
+      for (int i = 0; i < len; i++)
+        data2[i] += 100;
+    }
+
+// data1[0:3:4]  // indices 0,4,8
+// data2[0:2:5]  // indices 0,5
+#pragma omp target update from(data1[0 : 3 : 4], data2[0 : 2 : 5])
+  }
+
+  printf("device array values after update from:\n");
+  printf("data1: ");
+  for (int i = 0; i < len; i++)
+    printf("%.1f ", data1[i]);
+  printf("\ndata2: ");
+  for (int i = 0; i < len; i++)
+    printf("%.1f ", data2[i]);
+  printf("\n\n");
+
+  // CHECK: data1: 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0
+  // CHECK: data2: 0.0 10.0 20.0 30.0 40.0 50.0 60.0 70.0 80.0 90.0 100.0 110.0
+
+  // CHECK: data1: 0.0 1.0 2.0 3.0 8.0 5.0 6.0 7.0 16.0 9.0 10.0 11.0
+  // CHECK: data2: 100.0 10.0 20.0 30.0 40.0 150.0 60.0 70.0 80.0 90.0 100.0 
110.0
+}
diff --git a/offload/test/offloading/strided_partial_update.c 
b/offload/test/offloading/strided_partial_update.c
new file mode 100644
index 0000000000000..0a28caf4cb401
--- /dev/null
+++ b/offload/test/offloading/strided_partial_update.c
@@ -0,0 +1,63 @@
+// This test checks that #pragma omp target update from(data[0:4:3]) correctly 
updates
+// every third element (stride 3) from the device to the host, partially across
+// the array
+
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+  int len = 11;
+  double data[len];
+
+#pragma omp target map(tofrom : data[0 : len])
+  {
+    for (int i = 0; i < len; i++)
+      data[i] = i;
+  }
+
+  // initial values
+  printf("original host array values:\n");
+  for (int i = 0; i < len; i++)
+    printf("%f\n", data[i]);
+  printf("\n");
+
+#pragma omp target data map(to : data[0 : len])
+  {
+// Modify arrays on device
+#pragma omp target
+    for (int i = 0; i < len; i++)
+      data[i] += i;
+
+#pragma omp target update from(data[0 : 4 : 3]) // indices 0,3,6,9
+  }
+
+  printf("device array values after update from:\n");
+  for (int i = 0; i < len; i++)
+    printf("%f\n", data[i]);
+  printf("\n");
+
+  // CHECK: 0.000000
+  // CHECK: 1.000000
+  // CHECK: 2.000000
+  // CHECK: 3.000000
+  // CHECK: 4.000000
+  // CHECK: 5.000000
+  // CHECK: 6.000000
+  // CHECK: 7.000000
+  // CHECK: 8.000000
+  // CHECK: 9.000000
+  // CHECK: 10.000000
+
+  // CHECK: 0.000000
+  // CHECK: 1.000000
+  // CHECK: 2.000000
+  // CHECK: 6.000000
+  // CHECK: 4.000000
+  // CHECK: 5.000000
+  // CHECK: 12.000000
+  // CHECK: 7.000000
+  // CHECK: 8.000000
+  // CHECK: 18.000000
+  // CHECK: 10.000000
+}
diff --git a/offload/test/offloading/strided_update.c 
b/offload/test/offloading/strided_update.c
new file mode 100644
index 0000000000000..6626a3286063d
--- /dev/null
+++ b/offload/test/offloading/strided_update.c
@@ -0,0 +1,54 @@
+// This test checks that "update from" clause in OpenMP is supported when the
+// elements are updated in a non-contiguous manner. This test checks that
+// #pragma omp target update from(data[0:4:2]) correctly updates only every
+// other element (stride 2) from the device to the host
+
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+  int len = 8;
+  double data[len];
+#pragma omp target map(tofrom : len, data[0 : len])
+  {
+    for (int i = 0; i < len; i++) {
+      data[i] = i;
+    }
+  }
+  // Initial values
+  printf("original host array values:\n");
+  for (int i = 0; i < len; i++)
+    printf("%f\n", data[i]);
+  printf("\n");
+
+#pragma omp target data map(to : len, data[0 : len])
+  {
+// Modify arrays on device
+#pragma omp target
+    for (int i = 0; i < len; i++) {
+      data[i] += i;
+    }
+
+#pragma omp target update from(data[0 : 4 : 2])
+  }
+  // CHECK: 0.000000
+  // CHECK: 1.000000
+  // CHECK: 4.000000
+  // CHECK: 3.000000
+  // CHECK: 8.000000
+  // CHECK: 5.000000
+  // CHECK: 12.000000
+  // CHECK: 7.000000
+  // CHECK-NOT: 2.000000
+  // CHECK-NOT: 6.000000
+  // CHECK-NOT: 10.000000
+  // CHECK-NOT: 14.000000
+
+  printf("from target array results:\n");  
+  for (int i = 0; i < len; i++)  
+    printf("%f\n", data[i]);  
+  printf("\n");  
+  
+  return 0;  
+}  

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to