------- Comment #43 from pthaugen at gcc dot gnu dot org  2008-04-30 18:49 
-------
Created an attachment (id=15553)
 --> (http://gcc.gnu.org/bugzilla/attachment.cgi?id=15553&action=view)
Testcase

I tried a mainline with the latest patch.  While we no longer have problems
with the prior testcases, there is no improvement for leslie3d on ppc64. I can
still double the performance of the benchmark by specifying --param
max-aliased-vops=10000.

Including a new trimmed down testcase from the benchmark where I'm still seeing
poor code when max-aliased-vops is not increased, compiled with 'gfortran -m32
-O2'.

Refer to the first nested loop in procedure FLUXK():

            DO I = I1, I2
               QS(I) = WAV(I,J,K) * ZAREA
            END DO


Base:

.L150:
        lwz 0,24(18)     # <variable>.stride, <variable>.stride
        lwz 9,36(18)     # <variable>.stride, <variable>.stride
        lwz 11,12(18)    # <variable>.stride, <variable>.stride
        lwz 10,4(18)     # wav.offset, wav.offset
        mullw 0,17,0     # tmp660, ivtmp.602, <variable>.stride
        lwz 8,0(18)      # wav.data, wav.data
        mullw 9,30,9     # tmp666, ivtmp.590, <variable>.stride
        mullw 11,6,11    # tmp670, i, <variable>.stride
        add 0,0,9        # tmp672, tmp660, tmp666
        addi 6,6,1       # i, i,
        add 0,0,11       # tmp673, tmp672, tmp670
        add 0,0,10       # tmp674, tmp673, wav.offset
        slwi 0,0,3       # tmp676, tmp674,
        lfdx 0,8,0       #, tmp678
        fmul 0,0,8       # tmp679, tmp678, zarea.64
        stfdx 0,15,7     #* ivtmp.589, tmp679
        addi 7,7,8       # ivtmp.589, ivtmp.589,
        bdnz .L150       #



With --param max-aliased-vops=1000:

.L150:
        lfd 0,0(11)      #* ivtmp.599, tmp739
        add 11,11,30     # ivtmp.599, ivtmp.599, D.2783
        fmul 0,0,8       # tmp740, tmp739, zarea.64
        stfdx 0,22,9     #* ivtmp.602, tmp740
        addi 9,9,8       # ivtmp.602, ivtmp.602,
        bdnz .L150       #


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32921

Reply via email to