https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64247
--- Comment #5 from Joost VandeVondele <Joost.VandeVondele at mat dot ethz.ch> --- The following is a test program that illustrates the issue: > cat test.f90 SUBROUTINE gemm(C,A,B,N) REAL*8 :: A(N,N), B(N,N),C(N,N) C=0 DO i=1,N DO j=1,N DO k=1,N C(i,j)=C(i,j)+A(k,i)*B(k,j) ENDDO ENDDO ENDDO END SUBROUTINE ! ! simple test driver, computing the same matrix multiplication ! with the same data, but with arrays aligned differently, ! once at x, once at x+16 bytes. ! Illustrates that results will vary, ! depending randomly on what alignment malloc returns. ! PROGRAM TEST REAL*8, DIMENSION(:,:), POINTER, CONTIGUOUS :: A,B,C REAL*8, DIMENSION(:), POINTER, CONTIGUOUS :: Av1,Bv1,Cv1 REAL*8, DIMENSION(:), POINTER, CONTIGUOUS :: Av2,Bv2,Cv2 REAL*8, DIMENSION(:), POINTER, CONTIGUOUS :: Av3,Bv3,Cv3 INTEGER :: N READ(5,*) N ALLOCATE(Av1(N*N+2),Av2(N*N+2),Bv1(N*N+2), & Bv2(N*N+2),Cv1(N*N+2),Cv2(N*N+2)) CALL RANDOM_NUMBER(Av1) CALL RANDOM_NUMBER(Bv1) ! try one: results as if allocated at boundary x Av2(1:N*N)=Av1(1:N*N) Bv2(1:N*N)=Bv1(1:N*N) A(1:N,1:N)=>Av2(1:N*N) B(1:N,1:N)=>Bv2(1:N*N) C(1:N,1:N)=>Cv2(1:N*N) CALL gemm(C,A,B,N) Cv1(1:N*N)=Cv2(1:N*N) ! try two: results as if allocated at boundary x+16 Av2(1+2:N*N+2)=Av1(1:N*N) Bv2(1+2:N*N+2)=Bv1(1:N*N) A(1:N,1:N)=>Av2(1+2:N*N+2) B(1:N,1:N)=>Bv2(1+2:N*N+2) C(1:N,1:N)=>Cv2(1+2:N*N+2) CALL gemm(C,A,B,N) IF (ANY(Cv1(1:N*N).NE.Cv2(1+2:N*N+2))) CALL ABORT() END PROGRAM TEST > gfortran -g -march=sandybridge -mavx -O3 -ffast-math test.f90 && echo 10 | > ./a.out Program aborted. Backtrace: #0 0x7F5C53950387 #1 0x7F5C53951A72 #2 0x7F5C53A23378 #3 0x4012D6 in test at test.f90:48 Aborted