On 09/11/15 16:35, Tom de Vries wrote:
Hi,this patch series for stage1 trunk adds support to: - parallelize oacc kernels regions using parloops, and - map the loops onto the oacc gang dimension. The patch series contains these patches: 1 Insert new exit block only when needed in transform_to_exit_first_loop_alt 2 Make create_parallel_loop return void 3 Ignore reduction clause on kernels directive 4 Implement -foffload-alias 5 Add in_oacc_kernels_region in struct loop 6 Add pass_oacc_kernels 7 Add pass_dominator_oacc_kernels 8 Add pass_ch_oacc_kernels 9 Add pass_parallelize_loops_oacc_kernels 10 Add pass_oacc_kernels pass group in passes.def 11 Update testcases after adding kernels pass group 12 Handle acc loop directive 13 Add c-c++-common/goacc/kernels-*.c 14 Add gfortran.dg/goacc/kernels-*.f95 15 Add libgomp.oacc-c-c++-common/kernels-*.c 16 Add libgomp.oacc-fortran/kernels-*.f95 The first 9 patches are more or less independent, but patches 10-16 are intended to be committed at the same time. Bootstrapped and reg-tested on x86_64. Build and reg-tested with nvidia accelerator, in combination with a patch that enables accelerator testing (which is submitted at https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ). I'll post the individual patches in reply to this message.
This patch adds Fortran oacc kernels compilation tests. Thanks, - Tom
Add gfortran.dg/goacc/kernels-*.f95 2015-11-09 Tom de Vries <[email protected]> * gfortran.dg/goacc/kernels-loop-2.f95: New test. * gfortran.dg/goacc/kernels-loop-data-2.f95: New test. * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: New test. * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: New test. * gfortran.dg/goacc/kernels-loop-data-update.f95: New test. * gfortran.dg/goacc/kernels-loop-data.f95: New test. * gfortran.dg/goacc/kernels-loop.f95: New test. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: New test. --- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 | 45 +++++++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data-2.f95 | 51 ++++++++++++++++++++++ .../goacc/kernels-loop-data-enter-exit-2.f95 | 51 ++++++++++++++++++++++ .../goacc/kernels-loop-data-enter-exit.f95 | 49 +++++++++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data-update.f95 | 48 ++++++++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data.f95 | 49 +++++++++++++++++++++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 | 39 +++++++++++++++++ .../kernels-parallel-loop-data-enter-exit.f95 | 50 +++++++++++++++++++++ 8 files changed, 382 insertions(+) create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 new file mode 100644 index 0000000..7fd6d4e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 @@ -0,0 +1,45 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc kernels copyout (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc kernels copyout (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + + !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 new file mode 100644 index 0000000..f788f67 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 @@ -0,0 +1,51 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc data copyout (a(0:n-1)) + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + !$acc end data + + !$acc data copyout (b(0:n-1)) + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + !$acc end data + + !$acc data copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + !$acc end data + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 new file mode 100644 index 0000000..3599052 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 @@ -0,0 +1,51 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1)) + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + !$acc exit data copyout (a(0:n-1)) + + !$acc enter data create (b(0:n-1)) + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + !$acc exit data copyout (b(0:n-1)) + + !$acc enter data copyin (a(0:n-1), b(0:n-1)) create (c(0:n-1)) + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + !$acc exit data copyout (c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 new file mode 100644 index 0000000..562422e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 @@ -0,0 +1,49 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 new file mode 100644 index 0000000..ed18fe1 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 @@ -0,0 +1,48 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + do i = 0, n -1 + b(i) = i * 4 + end do + + !$acc update device (b(0:n-1)) + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 2 "parloops_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 new file mode 100644 index 0000000..177aa64 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 @@ -0,0 +1,49 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc end data + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 new file mode 100644 index 0000000..c9364dd --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 @@ -0,0 +1,39 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + do i = 0, n - 1 + a(i) = i * 2 + end do + + do i = 0, n -1 + b(i) = i * 4 + end do + + !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only one loop is analyzed, and that it can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 new file mode 100644 index 0000000..d805938 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 @@ -0,0 +1,50 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc parallel present (b(0:n-1)) + !$acc loop + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end parallel + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 2 "parloops_oacc_kernels" } } -- 1.9.1
