https://gcc.gnu.org/g:2d14b0787c3f5acbbcd642ebf0352cb120e3012d
commit 2d14b0787c3f5acbbcd642ebf0352cb120e3012d Author: Julian Brown <jul...@codesourcery.com> Date: Wed Sep 13 13:31:48 2023 +0000 OpenMP: Support accelerated 2D/3D memory copies for AMD GCN [OG14-only part] This patch only adds the bits missing from mainline: Support is also added for 1-dimensional strided accesses: these are treated as a special case of 2-dimensional transfers, where the innermost dimension is formed from the stride length (in bytes). 2023-09-19 Julian Brown <jul...@codesourcery.com> libgomp/ * target.c (omp_target_memcpy_rect_worker): Add 1D strided transfer support. Diff: --- libgomp/ChangeLog.omp | 5 +++++ libgomp/target.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index cdb3b42be14..a053803a9ef 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,8 @@ +2023-09-19 Julian Brown <jul...@codesourcery.com> + + * target.c (omp_target_memcpy_rect_worker): Add 1D strided transfer + support. + 2023-08-10 Julian Brown <jul...@codesourcery.com> * testsuite/libgomp.c-c++-common/declare-mapper-18.c: New test. diff --git a/libgomp/target.c b/libgomp/target.c index c28c3e1e5bb..23dc72476ec 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -5180,6 +5180,37 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, if (__builtin_mul_overflow (span, strides[0], &stride)) return EINVAL; + if (((src_devicep && src_devicep->memcpy2d_func) + || (dst_devicep && dst_devicep->memcpy2d_func)) + && (stride % element_size) == 0) + { + /* Try using memcpy2d for a 1-dimensional strided access. Here we + treat the transfer as a 2-dimensional array, where the inner + dimension is calculated to be (stride in bytes) / element_size. + Indices/offsets are adjusted so the source/destination pointers + point to the first element to be transferred, to make the sums + easier. (There are some configurations of 2D strided accesses + that memcpy3d could handle similarly, but those are probably rare + and are unimplemented for now.) */ + + /* If stride is element size, this is a contiguous transfer and + should have been handled above. */ + assert (stride > element_size); + + int dst_id = dst_devicep ? dst_devicep->target_id : -1; + int src_id = src_devicep ? src_devicep->target_id : -1; + void *subarray_src = (char *) src + src_off; + void *subarray_dst = (char *) dst + dst_off; + + struct gomp_device_descr *devp = dst_devicep ? dst_devicep + : src_devicep; + ret = devp->memcpy2d_func (dst_id, src_id, element_size, volume[0], + subarray_dst, 0, 0, stride, subarray_src, + 0, 0, stride); + if (ret != -1) + return ret ? 0 : EINVAL; + } + for (i = 0, ret = 1; i < volume[0] && ret; i++) { if (src_devicep == NULL)