Package: libscalapack-openmpi1 Version: 1.8.0-13 Severity: important Dear Maintainer,
After upgrade to the latest Debian testing my code started leaking memory at horrendous pace. The valgrind output traced the leak to Cpdgemr2d and Cpzgemr2d (see below). It could be that the actual leak is in OpenMPI, but somehow it triggered only by pXgemr2d. The code works fine on jessy and older (not updated since November) testing. I wrote a sample code which reproduces the bug (attached). I played around with different processor grids and every time it was leaking. Many thanks, Grigory valgrind output: ==2843== 217,280 bytes in 194 blocks are definitely lost in loss record 904 of 908 ==2843== at 0x4C2BBAF: malloc (vg_replace_malloc.c:299) ==2843== by 0x1B076856: ??? ==2843== by 0x70015FA: mca_coll_base_comm_select (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2) ==2843== by 0x6FA19AD: ??? (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2) ==2843== by 0x6FA3C92: ??? (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2) ==2843== by 0x90A909B: opal_progress (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libopen-pal.so.20.2.0) ==2843== by 0x6FA3894: ompi_comm_activate (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2) ==2843== by 0x6F9FA96: ompi_comm_split (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2) ==2843== by 0x6FD5F5D: PMPI_Comm_split (in /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2) ==2843== by 0x5986CF7: Cblacs_gridmap (in /usr/lib/libblacs-openmpi.so.1.1) ==2843== by 0x6038D8C: Cpdgemr2d (in /usr/lib/libscalapack-openmpi.so.1.8.0) ==2843== by 0x362823: __m_evol4_lz_MOD_evol4_pade (evol4_lz.F:956) ==2843== !*********** Sample code (fortran) **************************************! ! compiled with ! mpif90 -O2 -o mx_pdgemr2d mx_pdgemr2d.f90 -llapack -lblas -lblacs-openmpi -lblacsF77init-openmpi /usr/lib/libscalapack-openmpi.so.1 !************************************************************************! program main use mpi implicit none integer, parameter :: Msz = 9 integer :: myrank,np integer :: errorflag, info integer :: i,j, k double precision ,allocatable :: M(:,:) double precision ,allocatable :: M0(:,:) integer:: blacs_grid, nprows, npcols, myrow, mycol, numr, numc integer :: desc(9), desc0(9), blacs_grid0 integer :: Mb, Nb integer, external :: numroc, indxl2g, indxg2p, indxg2l, izamax call MPI_Init(errorflag) call MPI_Comm_rank(MPI_COMM_WORLD, myrank, errorflag) call MPI_Comm_size(MPI_COMM_WORLD, np, errorflag) nprows = int( sqrt( real(np) ) ) npcols = np / nprows if (nprows .ne. npcols) then npcols = nprows endif !if(myrank == 0) then ! print *, "nprows=", nprows, "npcols=", npcols !endif call Blacs_get( 0, 0, blacs_grid ) call Blacs_gridinit( blacs_grid, 'Column-major', nprows, npcols ) call Blacs_gridinfo( blacs_grid, nprows, npcols, myrow, mycol ) ! the nodes that do not fit into the square ! are idle if( myrow < 0 .or. mycol < 0 ) go to 10 ! calculate the number of rows/columns on the current ! node Mb = 4 Nb = 4 numr = Numroc(Msz, Mb, myrow, 0, nprows) numc = Numroc(Msz, Nb, mycol, 0, npcols) allocate(M(numr,numc)) M(:,:) = 0.d0 call Descinit(desc, Msz, Msz, Mb, Nb, & 0, 0, blacs_grid, numr, info) if(myrank .eq. 0) then allocate(M0(Msz,Msz)) M0(:,:) = 3.14d0 endif call Blacs_get( 0, 0, blacs_grid0 ) if( nprows .ne. 1 .and. npcols .ne. 1) then !call Blacs_gridinit(blacs_grid0, 'Column-major', nprows, npcols ) call Blacs_gridinit(blacs_grid0, 'Column-major', 1, 1 ) else blacs_grid0 = blacs_grid endif !call Descinit(desc0, Msz, Msz, Msz, Msz, 0, 0, & ! blacs_grid0, Msz, info) desc0 = (/1 , blacs_grid0, Msz , Msz, Msz, Msz, 0, 0, Msz/) if(myrank .ne. 0) desc0(1) = -1 do i=1, 9999999 call pdgemr2d(Msz, Msz, M0, 1, 1, desc0, M , 1, 1, desc, & blacs_grid) call pdgemr2d(Msz,Msz,M,1,1,desc, & M0,1,1,desc0, blacs_grid) enddo 10 call blacs_exit(1) call MPI_Finalize(errorflag) call exit(0) end program main -- System Information: Debian Release: 9.0 APT prefers testing APT policy: (500, 'testing') Architecture: amd64 (x86_64) Kernel: Linux 4.8.0-2-amd64 (SMP w/8 CPU cores) Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Init: systemd (via /run/systemd/system) Versions of packages libscalapack-openmpi1 depends on: ii libatlas3-base [liblapack.so.3] 3.10.3-1+b1 ii libblacs-openmpi1 1.1-38 ii libblas3 [libblas.so.3] 3.7.0-1 ii libc6 2.24-10 ii libgfortran3 6.3.0-18 ii liblapack3 [liblapack.so.3] 3.7.0-1 ii libopenmpi2 2.0.2-2 ii mpi-default-bin 1.8 libscalapack-openmpi1 recommends no packages. Versions of packages libscalapack-openmpi1 suggests: ii scalapack-doc 1.5-11 -- no debconf information