https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98391
Bug ID: 98391 Summary: Wrong results with small matrix size Product: gcc Version: 10.2.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: libgomp Assignee: unassigned at gcc dot gnu.org Reporter: mehdi.chinoune at hotmail dot com CC: jakub at gcc dot gnu.org Target Milestone: --- $ cat matmul.f90 program main implicit none integer, parameter :: sp = selected_real_kind(6,37) integer :: i, j, k integer :: n character(len=5) :: var real(sp), allocatable :: a(:,:), b(:,:), c(:,:), cc(:,:) integer :: t1, t2, t3 real(sp) :: tic ! call get_command_argument( 1, var ) read( var, '(i5.3)') n print*, n ! allocate( a(n,n), b(n,n), c(n,n), cc(n,n) ) call random_number(a) call random_number(b) c = 0._sp ! call system_clock(t1, tic) cc = 0._sp do j = 1, n do k = 1, n do i = 1, n cc(i,j) = a(i,k)*b(k,j) + cc(i,j) end do end do end do call system_clock( t2 ) print*, "T1 = ", (t2-t1)/tic !$acc parallel loop collapse(3) copyin(a,b) copy(c) !$omp target teams distribute parallel do collapse(3) map( to:a,b) map( tofrom:c ) do j = 1, n do k = 1, n do i = 1, n c(i,j) = a(i,k)*b(k,j) + c(i,j) end do end do end do call system_clock(t3) print*, "T2 = ", (t3-t2)/tic ! print*, "Max(|C2-C1|) = ", maxval( abs(cc-c) ) print '(8(f8.6,x))', abs(cc-c) ! end program main $ gfortran-10 -fopenmp -foffload=nvptx-none matmul.f90 -o matmul.x $ ./matmul.x 16 16 T1 = 0.00000000 T2 = 0.129999995 Max(|C2-C1|) = 1.94201875 1.431175 1.329625 0.592309 1.422428 0.535594 0.723758 0.365623 0.492199 0.108950 0.489844 1.100990 0.736585 1.182946 0.372061 0.185741 1.237249 0.629846 0.706569 0.913222 1.189827 0.617232 0.973922 0.290312 0.755902 0.886491 0.874653 1.642155 1.499317 1.506070 1.541327 1.228426 1.011003 0.967729 0.587790 0.353565 0.241555 0.283207 0.498868 0.628635 0.259884 0.469164 0.637377 0.428355 0.807801 0.409959 0.630940 0.384264 0.590675 0.805984 1.565366 0.850695 1.189638 1.602054 0.152907 0.648516 0.345356 0.261692 0.754292 0.995521 1.108027 0.837113 0.498883 1.011412 0.765647 0.741106 0.335977 0.721143 0.176006 0.169367 0.256758 0.311771 0.158323 0.401097 0.566629 0.787195 0.316571 0.179871 0.129046 0.326540 0.400939 1.898347 0.547532 1.015191 0.659491 0.223342 1.052599 1.158586 0.745466 0.491430 0.677603 0.854077 0.635704 0.475488 0.692096 0.749846 1.144690 0.712732 0.894958 0.874708 0.775818 0.674985 1.034302 1.350598 0.503635 0.082981 0.620052 0.888901 1.243073 1.147498 0.785260 0.260857 0.790078 0.571918 0.326864 0.090428 0.353512 0.167201 0.553904 0.378190 0.368513 0.238174 0.247996 0.038351 0.118958 0.041398 0.009377 0.049255 0.042368 1.942019 0.577147 1.252101 1.314539 0.845633 0.898366 1.743726 0.886740 0.674877 0.428364 0.431692 0.143040 0.551206 0.222548 0.058435 0.401517 0.172672 0.128829 0.063327 0.320080 0.120494 0.111500 0.341694 0.348082 0.408216 0.414768 0.706280 0.923327 0.265845 0.318310 0.082875 0.091210 1.165700 1.020596 0.282284 0.086616 0.350870 0.860692 0.299911 0.278247 0.756335 0.357223 0.633629 1.136913 0.815934 0.602087 0.790322 0.821650 1.006154 1.494015 0.950610 1.597616 0.962432 0.411526 1.268245 1.035166 0.755580 0.614513 1.165176 0.863545 0.478808 1.186599 1.245776 1.010780 1.108782 0.462348 0.980049 0.717201 1.594621 1.345250 1.080317 1.295755 0.406487 0.916595 1.325978 1.354560 1.209628 0.775334 1.611006 0.734939 0.604795 0.831590 0.316839 0.720204 0.739102 0.406983 0.807365 0.439079 0.654506 0.749812 1.219932 1.221502 0.534549 0.366653 0.989139 1.047143 0.643886 0.319602 0.621458 0.171583 0.052870 0.410973 0.266432 0.346806 0.124081 0.744631 0.876374 0.983852 1.121478 0.226588 0.390455 0.458977 0.646022 0.169284 0.593737 0.848242 1.612478 1.024757 1.028395 1.715509 1.202510 1.336850 1.219257 0.633523 0.988450 1.170813 0.852149 0.870198 It happens with both OpenACC and OpenMP.