On Thu, Apr 12, 2018 at 11:14:45PM +0200, Thomas Koenig wrote:
> 2018-04-12  Thomas Koenig  <tkoe...@gcc.gnu.org>
> 
>         PR fortran/83064
>         PR testsuite/85346
>         * trans-stmt.c (gfc_trans_forall_loop): Use annot_expr_ivdep_kind
>         for annotation and remove dependence on -ftree-parallelize-loops.
> 
> 2018-04-12  Thomas Koenig  <tkoe...@gcc.gnu.org>
> 
>         PR fortran/83064
>         PR testsuite/85346
>         * gfortran.dg/do_concurrent_5.f90: Dynamically allocate main work
>         array and move test to libgomp/testsuite/libgomp.fortran.
>         * gfortran.dg/do_concurrent_6.f90: New test.
> 
> 2018-04-12  Thomas Koenig  <tkoe...@gcc.gnu.org>
> 
>         PR fortran/83064
>         PR testsuite/85346
>         * testsuite/libgomp.fortran: Move modified test from gfortran.dg
>         to here.

Please use full filename here, like:
        * testsuite/libgomp.fortran/do_concurrent_5.f90: New test, moved
        from gfortran.dg.  Make edof array allocatable.

Ok with that change.

> Index: trans-stmt.c
> ===================================================================
> --- trans-stmt.c      (Revision 259326)
> +++ trans-stmt.c      (Arbeitskopie)
> @@ -3643,12 +3643,12 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tr
>        cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
>                             count, build_int_cst (TREE_TYPE (count), 0));
>  
> -      /* PR 83064 means that we cannot use the annotation if the
> -      autoparallelizer is active.  */
> -      if (forall_tmp->do_concurrent && ! flag_tree_parallelize_loops)
> +      /* PR 83064 means that we cannot use annot_expr_parallel_kind until
> +       the autoparallelizer can hande this.  */
> +      if (forall_tmp->do_concurrent)
>       cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
>                      build_int_cst (integer_type_node,
> -                                   annot_expr_parallel_kind),
> +                                   annot_expr_ivdep_kind),
>                      integer_zero_node);
>  
>        tmp = build1_v (GOTO_EXPR, exit_label);

> ! { dg-do  run }
> ! PR 83064 - this used to give wrong results.
> ! { dg-additional-options "-O1 -ftree-parallelize-loops=2" }
> ! Original test case by Christian Felter
> 
> program main
>     use, intrinsic :: iso_fortran_env
>     implicit none
> 
>     integer, parameter :: nsplit = 4
>     integer(int64), parameter :: ne = 2**20
>     integer(int64) :: stride, low(nsplit), high(nsplit), i
>     integer(int64), dimension(:), allocatable :: edof
>     real(real64), dimension(nsplit) :: pi
>     
>     allocate (edof(ne))
>     edof(1::4) = 1
>     edof(2::4) = 2
>     edof(3::4) = 3
>     edof(4::4) = 4
>     
>     stride = ceiling(real(ne)/nsplit)
>     do i = 1, nsplit
>         high(i) = stride*i
>     end do
>     do i = 2, nsplit
>         low(i) = high(i-1) + 1
>     end do
>     low(1) = 1
>     high(nsplit) = ne
> 
>     pi = 0
>     do concurrent (i = 1:nsplit)
>         pi(i) = sum(compute( low(i), high(i) ))
>     end do
>     if (abs (sum(pi) - atan(1.0d0)) > 1e-5) STOP 1
>     
> contains
>     
>     pure function compute( low, high ) result( ttt )        
>         integer(int64), intent(in) :: low, high
>         real(real64), dimension(nsplit) :: ttt
>         integer(int64) :: j, k
>         
>         ttt = 0
> 
>         ! Unrolled loop
> !         do j = low, high, 4
> !             k = 1
> !             ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 )                   
>          
> !             k = 2
> !             ttt(k) = ttt(k) + (-1)**(j+2) / real( 2*j+1 )                   
>          
> !             k = 3
> !             ttt(k) = ttt(k) + (-1)**(j+3) / real( 2*j+3 )                   
>          
> !             k = 4
> !             ttt(k) = ttt(k) + (-1)**(j+4) / real( 2*j+5 )                   
>          
> !         end do
>         
>         ! Loop with modulo operation
> !         do j = low, high
> !             k = mod( j, nsplit ) + 1
> !             ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 )                   
>                      
> !         end do
>         
>         ! Loop with subscripting via host association
>         do j = low, high
>             k = edof(j)
>             ttt(k) = ttt(k) + (-1.0_real64)**(j+1) / real( 2*j-1 )            
>                             
>         end do
>     end function
>     
> end program main

> ! { dg-do compile }
> ! { dg-additional-options "-fdump-tree-original" }
> 
> program main
>   real, dimension(100) :: a,b
>   call random_number(a)
>   do concurrent (i=1:100)
>      b(i) = a(i)*a(i)
>   end do
>   print *,sum(a)
> end program main
> 
> ! { dg-final { scan-tree-dump-times "ivdep" 1 "original" } }


        Jakub

Reply via email to