[clang] [llvm] [OpenMP] Fix non-contiguous array omp target update (PR #156889)

via cfe-commits Sun, 07 Sep 2025 07:04:09 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Ivan R. Ivanov (ivanradanov)

<details>
<summary>Changes</summary>

The existing implementation has three issues which this patch addresses.

1. The last dimension which represents the bytes in the type, has the wrong 
stride and count. For example, for a 4 byte int, count=1 and stride=4. The 
correct representation here is count=4 and stride=1 because there are 4 bytes 
(count=4) that we need to copy and we do not skip any bytes (stride=1).

2. The size of the data copy was computed using the last dimension. However, 
this is incorrect in cases where some of the final dimensions get merged into 
one. In this case we need to take the combined size of the merged dimensions, 
which is (Count * Stride) of the first merged dimension.

3. The Offset into a dimension was computed as a multiple of its Stride. 
However, this Stride which is in bytes, already includes the stride multiplier 
given by the user. This means that when the user specified 1:3:2, i.e. elements 
1, 3, 5, the runtime incorrectly copied elements 2, 4, 6. Fix this by 
precomputing at compile time the Offset to be in bytes by correctly multiplying 
the offset by the stride of the dimension without the user-specified multiplier.

---

Patch is 21.76 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/156889.diff


4 Files Affected:

- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+12-8) 
- (modified) clang/test/OpenMP/target_update_codegen.cpp (+22-21) 
- (modified) offload/libomptarget/omptarget.cpp (+11-3) 
- (modified) offload/test/offloading/non_contiguous_update.cpp (+71-36) 


``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f98339d472fa9..691dd62bf549a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7874,8 +7874,8 @@ class MappableExprsHandler {
     // For supporting stride in array section, we need to initialize the first
     // dimension size as 1, first offset as 0, and first count as 1
     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
-    MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
-    MapValuesArrayTy CurStrides;
+    MapValuesArrayTy CurCounts;
+    MapValuesArrayTy CurStrides = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
     uint64_t ElementTypeSize;
 
@@ -7899,8 +7899,8 @@ class MappableExprsHandler {
              "Should be either ConstantArray or VariableArray if not the "
              "first Component");
 
-      // Get element size if CurStrides is empty.
-      if (CurStrides.empty()) {
+      // Get element size if CurCounts is empty.
+      if (CurCounts.empty()) {
         const Type *ElementType = nullptr;
         if (CAT)
           ElementType = CAT->getElementType().getTypePtr();
@@ -7920,7 +7920,7 @@ class MappableExprsHandler {
             ElementType = ElementType->getPointeeOrArrayElementType();
           ElementTypeSize =
               Context.getTypeSizeInChars(ElementType).getQuantity();
-          CurStrides.push_back(
+          CurCounts.push_back(
               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
         }
       }
@@ -7980,7 +7980,6 @@ class MappableExprsHandler {
                                            CGF.Int64Ty,
                                            /*isSigned=*/false);
       }
-      CurOffsets.push_back(Offset);
 
       // Count
       const Expr *CountExpr = OASE->getLength();
@@ -8017,11 +8016,12 @@ class MappableExprsHandler {
       CurCounts.push_back(Count);
 
       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
+      // Offset_n' = Offset_n * (D_0 * D_1 ... * D_n-1) * Unit size
       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
       //              Offset      Count     Stride
-      //    D0          0           1         4    (int)    <- dummy dimension
+      //    D0          0           4         1    (int)    <- dummy dimension
       //    D1          0           2         8    (2 * (1) * 4)
-      //    D2          1           2         20   (1 * (1 * 5) * 4)
+      //    D2          100         2         20   (1 * (1 * 5) * 4)
       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
       const Expr *StrideExpr = OASE->getStride();
       llvm::Value *Stride =
@@ -8034,6 +8034,10 @@ class MappableExprsHandler {
         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
       else
         CurStrides.push_back(DimProd);
+
+      Offset = CGF.Builder.CreateNUWMul(DimProd, Offset);
+      CurOffsets.push_back(Offset);
+
       if (DI != DimSizes.end())
         ++DI;
     }
diff --git a/clang/test/OpenMP/target_update_codegen.cpp 
b/clang/test/OpenMP/target_update_codegen.cpp
index c8211f475c7fc..648ad58787660 100644
--- a/clang/test/OpenMP/target_update_codegen.cpp
+++ b/clang/test/OpenMP/target_update_codegen.cpp
@@ -1134,7 +1134,7 @@ void foo(int arg) {
   // CK20: store i64 {{32|64}}, ptr [[STRIDE]],
   // CK20: [[DIM_2:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], 
ptr [[DIMS]], {{.+}} 0, {{.+}} 1
   // CK20: [[OFFSET_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 0
-  // CK20: store i64 1, ptr [[OFFSET_2]],
+  // CK20: store i64 {{8|16}}, ptr [[OFFSET_2]],
   // CK20: [[COUNT_2:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_2]], {{.+}} 0, {{.+}} 1
   // CK20: store i64 4, ptr [[COUNT_2]],
   // CK20: [[STRIDE_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 2
@@ -1143,9 +1143,9 @@ void foo(int arg) {
   // CK20: [[OFFSET_3:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_3]], {{.+}} 0, {{.+}} 0
   // CK20: store i64 0, ptr [[OFFSET_3]],
   // CK20: [[COUNT_3:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_3]], {{.+}} 0, {{.+}} 1
-  // CK20: store i64 1, ptr [[COUNT_3]],
+  // CK20: store i64 {{8|16}}, ptr [[COUNT_3]],
   // CK20: [[STRIDE_3:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_3]], {{.+}} 0, {{.+}} 2
-  // CK20: store i64 {{8|16}}, ptr [[STRIDE_3]],
+  // CK20: store i64 1, ptr [[STRIDE_3]],
   // CK20-DAG: call void @__tgt_target_data_update_mapper(ptr @{{.+}}, i64 -1, 
i32 1, ptr [[GEPBP:%.+]], ptr [[GEPP:%.+]], ptr [[MSIZE]], ptr [[MTYPE]]{{.+}})
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -1202,7 +1202,7 @@ struct ST {
     // CK21: store i64 {{400|800}}, ptr [[STRIDE_1]],
     // CK21: [[DIM_2:%.+]] = getelementptr inbounds [4 x 
[[STRUCT_DESCRIPTOR]]], ptr [[DIMS]], {{.+}} 0, {{.+}} 1
     // CK21: [[OFFSET_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 0
-    // CK21: store i64 1, ptr [[OFFSET_2]],
+    // CK21: store i64 {{40|80}}, ptr [[OFFSET_2]],
     // CK21: [[COUNT_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 1
     // CK21: store i64 3, ptr [[COUNT_2]],
     // CK21: [[STRIDE_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 2
@@ -1218,9 +1218,9 @@ struct ST {
     // CK21: [[OFFSET_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 0
     // CK21: store i64 0, ptr [[OFFSET_4]],
     // CK21: [[COUNT_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 1
-    // CK21: store i64 1, ptr [[COUNT_4]],
+    // CK21: store i64 {{4|8}}, ptr [[COUNT_4]],
     // CK21: [[STRIDE_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 2
-    // CK21: store i64 {{4|8}}, ptr [[STRIDE_4]],
+    // CK21: store i64 1, ptr [[STRIDE_4]],
     // CK21-DAG: call void @__tgt_target_data_update_mapper(ptr @{{.+}}, i64 
-1, i32 2, ptr [[GEPBP:%.+]], ptr [[GEPP:%.+]], ptr [[GEPSZ:%.+]], ptr 
[[MTYPE]]{{.+}})
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -1276,7 +1276,7 @@ struct ST {
     // CK22: store i64 200, ptr [[STRIDE]],
     // CK22: [[DIM_2:%.+]] = getelementptr inbounds [4 x 
[[STRUCT_DESCRIPTOR]]], ptr [[DIMS]], {{.+}} 0, {{.+}} 1
     // CK22: [[OFFSET:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 0
-    // CK22: store i64 1, ptr [[OFFSET]],
+    // CK22: store i64 40, ptr [[OFFSET]],
     // CK22: [[COUNT:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_2]], {{.+}} 0, {{.+}} 1
     // CK22: store i64 3, ptr [[COUNT]],
     // CK22: [[STRIDE:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 2
@@ -1292,9 +1292,9 @@ struct ST {
     // CK22: [[OFFSET:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 0
     // CK22: store i64 0, ptr [[OFFSET]],
     // CK22: [[COUNT:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_4]], {{.+}} 0, {{.+}} 1
-    // CK22: store i64 1, ptr [[COUNT]],
+    // CK22: store i64 4, ptr [[COUNT]],
     // CK22: [[STRIDE:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 2
-    // CK22: store i64 4, ptr [[STRIDE]],
+    // CK22: store i64 1, ptr [[STRIDE]],
     // CK22-DAG: call void @__tgt_target_data_update_mapper(ptr @{{.+}}, i64 
-1, i32 1, ptr [[GEPBP:%.+]], ptr [[GEPP:%.+]], ptr [[MSIZE]], ptr 
[[MTYPE]]{{.+}})
     // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
     // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -1357,7 +1357,7 @@ void foo(int arg) {
   // CK23: store i64 200, ptr [[STRIDE]],
   // CK23: [[DIM_2:%.+]] = getelementptr inbounds [4 x [[STRUCT_DESCRIPTOR]]], 
ptr [[DIMS]], {{.+}} 0, {{.+}} 1
   // CK23: [[OFFSET_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 0
-  // CK23: store i64 1, ptr [[OFFSET_2]],
+  // CK23: store i64 20, ptr [[OFFSET_2]],
   // CK23: [[COUNT_2:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_2]], {{.+}} 0, {{.+}} 1
   // CK23: store i64 2, ptr [[COUNT_2]],
   // CK23: [[STRIDE_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 2
@@ -1373,9 +1373,9 @@ void foo(int arg) {
   // CK23: [[OFFSET_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 0
   // CK23: store i64 0, ptr [[OFFSET_4]],
   // CK23: [[COUNT_4:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_4]], {{.+}} 0, {{.+}} 1
-  // CK23: store i64 1, ptr [[COUNT_4]],
+  // CK23: store i64 4, ptr [[COUNT_4]],
   // CK23: [[STRIDE_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 2
-  // CK23: store i64 4, ptr [[STRIDE_4]],
+  // CK23: store i64 1, ptr [[STRIDE_4]],
   // CK23-DAG: call void @__tgt_target_data_update_mapper(ptr @{{.+}}, i64 -1, 
i32 1, ptr [[GEPBP:%.+]], ptr [[GEPP:%.+]], ptr [[MSIZE]], ptr [[MTYPE]]{{.+}})
   // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -1419,6 +1419,7 @@ void foo(int arg) {
   // CK24: [[MUL:%.+]] = mul nuw i64 8,
   // CK24: [[SUB:%.+]] = sub nuw i64 4, [[ARG:%.+]]
   // CK24: [[LEN:%.+]] = udiv {{.+}} [[SUB]], 1
+  // CK24: [[MUL_ARG:%.+]] = mul nuw i64 40, [[ARG]]
   // CK24: [[BP0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[BP:%.+]], 
{{.+}} 0, {{.+}} 0
   // CK24: store ptr [[ARR]], ptr [[BP0]],
   // CK24: [[P0:%.+]] = getelementptr inbounds [1 x ptr], ptr [[P:%.+]], 
{{.+}} 0, {{.+}} 0
@@ -1432,7 +1433,7 @@ void foo(int arg) {
   // CK24: store i64 320, ptr [[STRIDE]],
   // CK24: [[DIM_2:%.+]] = getelementptr inbounds [4 x [[STRUCT_DESCRIPTOR]]], 
ptr [[DIMS]], {{.+}} 0, {{.+}} 1
   // CK24: [[OFFSET_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 0
-  // CK24: store i64 [[ARG]], ptr [[OFFSET_2]],
+  // CK24: store i64 [[MUL_ARG]], ptr [[OFFSET_2]],
   // CK24: [[COUNT_2:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_2]], {{.+}} 0, {{.+}} 1
   // CK24: store i64 [[LEN]], ptr [[COUNT_2]],
   // CK24: [[STRIDE_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_2]], {{.+}} 0, {{.+}} 2
@@ -1448,9 +1449,9 @@ void foo(int arg) {
   // CK24: [[OFFSET_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 0
   // CK24: store i64 0, ptr [[OFFSET_4]],
   // CK24: [[COUNT_4:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_4]], {{.+}} 0, {{.+}} 1
-  // CK24: store i64 1, ptr [[COUNT_4]],
+  // CK24: store i64 8, ptr [[COUNT_4]],
   // CK24: [[STRIDE_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 2
-  // CK24: store i64 8, ptr [[STRIDE_4]],
+  // CK24: store i64 1, ptr [[STRIDE_4]],
   // CK24-DAG: call void @__tgt_target_data_update_mapper(ptr @{{.+}}, i64 -1, 
i32 1, ptr [[GEPBP:%.+]], ptr [[GEPP:%.+]], ptr [[MSIZE]], ptr [[MTYPE]]{{.+}})
   // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -1515,7 +1516,7 @@ void foo(int arg) {
   // CK25: store i64 20, ptr [[STRIDE_2]],
   // CK25: [[DIM_3:%.+]] = getelementptr inbounds [4 x [[STRUCT_DESCRIPTOR]]], 
ptr [[DIMS]], {{.+}} 0, {{.+}} 2
   // CK25: [[OFFSET_3:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_3]], {{.+}} 0, {{.+}} 0
-  // CK25: store i64 1, ptr [[OFFSET_3]],
+  // CK25: store i64 4, ptr [[OFFSET_3]],
   // CK25: [[COUNT_3:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_3]], {{.+}} 0, {{.+}} 1
   // CK25: store i64 4, ptr [[COUNT_3]],
   // CK25: [[STRIDE_3:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_3]], {{.+}} 0, {{.+}} 2
@@ -1524,9 +1525,9 @@ void foo(int arg) {
   // CK25: [[OFFSET_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 0
   // CK25: store i64 0, ptr [[OFFSET_4]],
   // CK25: [[COUNT_4:%.+]] = getelementptr inbounds nuw [[STRUCT_DESCRIPTOR]], 
ptr [[DIM_4]], {{.+}} 0, {{.+}} 1
-  // CK25: store i64 1, ptr [[COUNT_4]],
+  // CK25: store i64 4, ptr [[COUNT_4]],
   // CK25: [[STRIDE_4:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_4]], {{.+}} 0, {{.+}} 2
-  // CK25: store i64 4, ptr [[STRIDE_4]],
+  // CK25: store i64 1, ptr [[STRIDE_4]],
   // CK25: [[PTRS:%.+]] = getelementptr inbounds [3 x ptr], ptr 
%.offload_ptrs, i32 0, i32 0
   // CK25: store ptr [[DIMS]], ptr [[PTRS]],
   // CK25: [[DIM_5:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], 
ptr [[DIMS_2]], {{.+}} 0, {{.+}} 0
@@ -1538,7 +1539,7 @@ void foo(int arg) {
   // CK25: store i64 12, ptr [[STRIDE_2_1]],
   // CK25: [[DIM_6:%.+]] = getelementptr inbounds [3 x [[STRUCT_DESCRIPTOR]]], 
ptr [[DIMS_2]], {{.+}} 0, {{.+}} 1
   // CK25: [[OFFSET_2_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_6]], {{.+}} 0, {{.+}} 0
-  // CK25: store i64 1, ptr [[OFFSET_2_2]],
+  // CK25: store i64 4, ptr [[OFFSET_2_2]],
   // CK25: [[COUNT_2_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_6]], {{.+}} 0, {{.+}} 1
   // CK25: store i64 2, ptr [[COUNT_2_2]],
   // CK25: [[STRIDE_2_2:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_6]], {{.+}} 0, {{.+}} 2
@@ -1547,9 +1548,9 @@ void foo(int arg) {
   // CK25: [[OFFSET_2_3:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_7]], {{.+}} 0, {{.+}} 0
   // CK25: store i64 0, ptr [[OFFSET_2_3]],
   // CK25: [[COUNT_2_3:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_7]], {{.+}} 0, {{.+}} 1
-  // CK25: store i64 1, ptr [[COUNT_2_3]],
+  // CK25: store i64 4, ptr [[COUNT_2_3]],
   // CK25: [[STRIDE_2_3:%.+]] = getelementptr inbounds nuw 
[[STRUCT_DESCRIPTOR]], ptr [[DIM_7]], {{.+}} 0, {{.+}} 2
-  // CK25: store i64 4, ptr [[STRIDE_2_3]],
+  // CK25: store i64 1, ptr [[STRIDE_2_3]],
   // CK25: [[PTRS_2:%.+]] = getelementptr inbounds [3 x ptr], ptr 
%.offload_ptrs, i32 0, i32 2
   // CK25: store ptr [[DIMS_2]], ptr [[PTRS_2]],
   // CK25-DAG: call void @__tgt_target_data_update_mapper(ptr @{{.+}}, i64 -1, 
i32 3, ptr [[GEPBP:%.+]], ptr [[GEPP:%.+]], ptr [[MSIZE]], ptr [[MTYPE]]{{.+}})
diff --git a/offload/libomptarget/omptarget.cpp 
b/offload/libomptarget/omptarget.cpp
index 32e89cc75efc9..f2e01993938b0 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -1221,7 +1221,7 @@ static int targetDataNonContiguous(ident_t *Loc, DeviceTy 
&Device,
   if (CurrentDim < DimSize) {
     for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
       uint64_t CurOffset =
-          (NonContig[CurrentDim].Offset + I) * NonContig[CurrentDim].Stride;
+          NonContig[CurrentDim].Offset + I * NonContig[CurrentDim].Stride;
       // we only need to transfer the first element for the last dimension
       // since we've already got a contiguous piece.
       if (CurrentDim != DimSize - 1 || I == 0) {
@@ -1293,9 +1293,17 @@ int targetDataUpdate(ident_t *Loc, DeviceTy &Device, 
int32_t ArgNum,
     if (ArgTypes[I] & OMP_TGT_MAPTYPE_NON_CONTIG) {
       __tgt_target_non_contig *NonContig = (__tgt_target_non_contig *)Args[I];
       int32_t DimSize = ArgSizes[I];
-      uint64_t Size =
-          NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
+      DP("Non contig descriptor:\n");
+      for (int I = 0; I < DimSize; I++)
+        DP("  Dim %" PRId32 " : Offset %" PRIu64 " Count %" PRIu64
+           " Stride %" PRIu64 "\n",
+           I, NonContig[I].Offset, NonContig[I].Count, NonContig[I].Stride);
       int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
+      DP("Merged %" PRId32 " dimensions\n", MergedDim);
+      __tgt_target_non_contig &FirstMergedDim =
+          NonContig[DimSize - MergedDim - 1];
+      uint64_t Size = FirstMergedDim.Count * FirstMergedDim.Stride;
+      DP("Transfer size %" PRIu64 "\n", Size);
       Ret = targetDataNonContiguous(
           Loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
           /*current_dim=*/0, DimSize - MergedDim, /*offset=*/0, AsyncInfo);
diff --git a/offload/test/offloading/non_contiguous_update.cpp 
b/offload/test/offloading/non_contiguous_update.cpp
index 609f0f967fb17..3973174bf2c5e 100644
--- a/offload/test/offloading/non_contiguous_update.cpp
+++ b/offload/test/offloading/non_contiguous_update.cpp
@@ -7,9 +7,9 @@
 
 // Data structure definitions copied from OpenMP RTL.
 struct __tgt_target_non_contig {
-  int64_t offset;
-  int64_t width;
-  int64_t stride;
+  int64_t Offset;
+  int64_t Count;
+  int64_t Stride;
 };
 
 enum tgt_map_type { OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000 };
@@ -18,21 +18,22 @@ enum tgt_map_type { OMP_TGT_MAPTYPE_NON_CONTIG = 
0x100000000000 };
 #ifdef __cplusplus
 extern "C" {
 #endif
-void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
-                              void **args_base, void **args, int64_t 
*arg_sizes,
-                              int64_t *arg_types);
+  void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+                                void **args_base, void **args, int64_t 
*arg_sizes,
+                                int64_t *arg_types);
 #ifdef __cplusplus
 }
 #endif
 
 int main() {
+  {
   // case 1
   // int arr[3][4][5][6];
   // #pragma omp target update to(arr[0:2][1:3][1:2][:])
   // set up descriptor
   __tgt_target_non_contig non_contig[5] = {
-      {0, 2, 480}, {1, 3, 120}, {1, 2, 24}, {0, 6, 4}, {0, 1, 4}};
-  int64_t size = 4, type = OMP_TGT_MAPTYPE_NON_CONTIG;
+    {0, 2, 480}, {120, 3, 120}, {24, 2, 24}, {0, 6, 4}, {0, 4, 1}};
+  int64_t size = sizeof(non_contig) / sizeof(non_contig[0]), type = 
OMP_TGT_MAPTYPE_NON_CONTIG;
 
   void *base;
   void *begin = &non_contig;
@@ -40,9 +41,9 @@ int main() {
   int64_t *types = &type;
 
   // The below diagram is the visualization of the non-contiguous transfer 
after
-  // optimization. Note that each element represent the innermost dimension
-  // (unit size = 24) since the stride * count of last dimension is equal to 
the
-  // stride of second last dimension.
+  // optimization. Note that each element represent the merged innermost
+  // dimension (unit size = 24) since the stride * count of last dimension is
+  // equal to the stride of second last dimension.
   //
   // OOOOO OOOOO OOOOO
   // OXXOO OXXOO OOOOO
@@ -50,44 +51,78 @@ int main() {
   // OXXOO OXXOO OOOOO
   __tgt_target_data_update(/*device_id*/ -1, /*arg_num*/ 1, &base, &begin,
                            sizes, types);
-  // DEBUG: offset 144
-  // DEBUG: offset 264
-  // DEBUG: offset 384
-  // DEBUG: offset 624
-  // DEBUG: offset 744
-  // DEBUG: offset 864
+  // DEBUG: offset 144 len 48
+  // DEBUG: offset 264 len 48
+  // DEBUG: offset 384 len 48
+  // DEBUG: offset 624 len 48
+  // DEBUG: offset 744 len 48
+  // DEBUG: offset 864 len 48
+  }
 
+  {
   // case 2
   // double darr[3][4][5];
   // #pragma omp target update to(darr[0:2:2][2:2][:2:2])
   // set up descriptor
-  __tgt_target_non_contig non_contig_2[4] = {
-      {0, 2, 320}, {2, 2, 40}, {0, 2, 16}, {0, 1, 8}};
-  int64_t size_2 = 4, type_2 = OMP_TGT_MAPTYPE_NON_CONTIG;
+  __tgt_target_non_contig non_contig[4] = {
+    {0, 2, 320}, {80, 2, 40}, {0, 2, 16}, {0, 8, 1}};
+  int64_t size = sizeof(non_contig) / sizeof(non_contig[0]), type = 
OMP_TGT_MAPTYPE_NON_CONTIG;
 
-  void *base_2;
-  void *begin_2 = &non_contig_2;
-  int64_t *sizes_2 = &size_2;
-  int64_t *types_2 = &type_2;
+  void *base;
+  void *begin = &non_contig;
+  int64_t *sizes = &size;
+  int64_t *types = &type;
 
   // The below diagram is the visualization of the non-contiguous transfer 
after
   // optimization. Note that each element represent the innermost dimension
-  // (uni...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/156889
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [OpenMP] Fix non-contiguous array omp target update (PR #156889)

Reply via email to