Hi Tobias!

On 2024-07-26T20:05:43+0200, Tobias Burnus <tbur...@baylibre.com> wrote:
> The main idea of 'link' is to permit putting only a subset of a
> huge array on the device. Well, in order to make this work properly,
> it requires that one can map an array section, which does not
> start with the first element.
>
> This patch adjusts the pointers such, that this actually works.
>
> (Tested on x86-64-gnu-linux with Nvptx offloading.)
> Comments, suggestions, remarks before I commit it?

> libgomp: Fix declare target link with offset array-section mapping [PR116107]
>
> Assume that 'int var[100]' is 'omp declare target link(var)'. When now
> mapping an array section with offset such as 'map(to:var[20:10])',
> the device-side link pointer has to store &<device-storage-data>[0] minus
> the offset such that var[20] will access <device-storage-data>[0]. But
> the offset calculation was missed such that the device-side 'var' pointed
> to the first element of the mapped data - and var[20] points beyond at
> some invalid memory.
>
>       PR middle-end/116107
>
> libgomp/ChangeLog:
>
>       * target.c (gomp_map_vars_internal): Honor array mapping offsets
>       with declare-target 'link' variables.
>       * testsuite/libgomp.c-c++-common/target-link-2.c: New test.
>
>  libgomp/target.c                                   |  7 ++-
>  .../testsuite/libgomp.c-c++-common/target-link-2.c | 59 
> ++++++++++++++++++++++
>  2 files changed, 64 insertions(+), 2 deletions(-)

The new test case 'libgomp.c-c++-common/target-link-2.c' generally PASSes
on one-GPU systems, but on a multi-GPU system (tested nvidia5):

    $ nvidia-smi -L
    GPU 0: Tesla K80 (UUID: [...])
    GPU 1: Tesla K80 (UUID: [...])

..., I see:

    +PASS: libgomp.c/../libgomp.c-c++-common/target-link-2.c (test for excess 
errors)
    +FAIL: libgomp.c/../libgomp.c-c++-common/target-link-2.c execution test

    +PASS: libgomp.c++/../libgomp.c-c++-common/target-link-2.c (test for excess 
errors)
    +FAIL: libgomp.c++/../libgomp.c-c++-common/target-link-2.c execution test

    [...]
    #2  0x00007ffff7b548fc in __GI_abort () at abort.c:79
    #3  0x0000000010000bd4 in main () at 
[...]/libgomp.c-c++-common/target-link-2.c:38
    (gdb) frame 3
    #3  0x0000000010000bd4 in main () at 
[...]/libgomp.c-c++-common/target-link-2.c:38
    38              __builtin_abort ();
    (gdb) list
    33
    34            #pragma omp target map(from: res2) device(dev)
    35              res2 = arr[5];
    36
    37            if (res2 != 6)
    38              __builtin_abort ();
    [...]
    (gdb) print res2
    $1 = 60

I first thought that maybe just:

    --- libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
    +++ libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
    @@ -54,6 +54,8 @@ int main()
           for (int i = 0; i < 10; i++)
            if (res[i] != (4 + i)*10)
              __builtin_abort ();
    +
    +      #pragma omp target exit data map(release:arr[3:10]) device(dev)
         }
       return 0;
     }

... was missing, but that doesn't resolve the issue: same error state.
Could you please have a look what other state needs to be reset, in which
way?


Grüße
 Thomas


> diff --git a/libgomp/target.c b/libgomp/target.c
> index aa01c1367b9..e3e648f5443 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -1820,8 +1820,11 @@ gomp_map_vars_internal (struct gomp_device_descr 
> *devicep,
>               if (k->aux && k->aux->link_key)
>                 {
>                   /* Set link pointer on target to the device address of the
> -                    mapped object.  */
> -                 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
> +                    mapped object. Also deal with offsets due to
> +                    array-section mapping. */
> +                 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
> +                                            - (k->host_start
> +                                               - 
> k->aux->link_key->host_start));
>                   /* We intentionally do not use coalescing here, as it's not
>                      data allocated by the current call to this function.  */
>                   gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
> diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c 
> b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
> new file mode 100644
> index 00000000000..4ff4080da76
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
> @@ -0,0 +1,59 @@
> +/* PR middle-end/116107  */
> +
> +#include <omp.h>
> +
> +int arr[15] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> +#pragma omp declare target link(arr)
> +
> +#pragma omp begin declare target
> +void f(int *res)
> +{
> +  __builtin_memcpy (res, &arr[5], sizeof(int)*10);
> +}
> +
> +void g(int *res)
> +{
> +  __builtin_memcpy (res, &arr[3], sizeof(int)*10);
> +}
> +#pragma omp end declare target
> +
> +int main()
> +{
> +  int res[10], res2;
> +  for (int dev = 0; dev < omp_get_num_devices(); dev++)
> +    {
> +      __builtin_memset (res, 0, sizeof (res));
> +      res2 = 99;
> +
> +      #pragma omp target enter data map(arr[5:10]) device(dev)
> +
> +      #pragma omp target map(from: res) device(dev)
> +     f (res);
> +
> +      #pragma omp target map(from: res2) device(dev)
> +     res2 = arr[5];
> +
> +      if (res2 != 6)
> +     __builtin_abort ();
> +      for (int i = 0; i < 10; i++)
> +     if (res[i] != 6 + i)
> +       __builtin_abort ();
> +
> +      #pragma omp target exit data map(release:arr[5:10]) device(dev)
> +
> +      for (int i = 0; i < 15; i++)
> +     res[i] *= 10;
> +       __builtin_abort ();
> +
> +      #pragma omp target enter data map(arr[3:10]) device(dev)
> +      __builtin_memset (res, 0, sizeof (res));
> +
> +      #pragma omp target map(from: res) device(dev)
> +     g (res);
> +
> +      for (int i = 0; i < 10; i++)
> +     if (res[i] != (4 + i)*10)
> +       __builtin_abort ();
> +    }
> +  return 0;
> +}

Reply via email to