Hello Kugah,
Thanks a lot for your support.

I attached source code and corresponding assembly codes which was generated by 
using gcc-4.8 and gcc-5.1. The compiler flags is "-O3".


Best Regards
Ron

-----Original Message-----
From: Kugan [mailto:kugan.vivekanandara...@linaro.org] 
Sent: Tuesday, January 05, 2016 6:51 PM
To: Xiaofeng Ren <xiaofeng....@nxp.com>; Bernie Ogden <bernie.og...@linaro.org>
Cc: linaro-toolchain@lists.linaro.org
Subject: Re: gcc-linaro-5.1 vs gcc-linaro-4.8

Hi Ron,
> 
> Following part of assembly code for fcp function: 
> 
> Gcc-5.1:
>   40110c:       3dc00c6c        ldr     q12, [x3,#48]
>   401110:       3dc0106b        ldr     q11, [x3,#64]
>   401114:       3dc0146a        ldr     q10, [x3,#80]
>   401118:       3dc01869        ldr     q9, [x3,#96]
>   40111c:       3dc01c68        ldr     q8, [x3,#112]
>   401120:       3dc0207f        ldr     q31, [x3,#128]
>   401124:       3dc0247e        ldr     q30, [x3,#144]
>   401128:       3dc0287d        ldr     q29, [x3,#160]
>   40112c:       3dc02c7c        ldr     q28, [x3,#176]
>   401130:       3dc0307b        ldr     q27, [x3,#192]
>   401134:       3dc0347a        ldr     q26, [x3,#208]
>   401138:       3dc03879        ldr     q25, [x3,#224]
>   40113c:       3dc03c78        ldr     q24, [x3,#240]
>   401140:       3dc04077        ldr     q23, [x3,#256]
>   401144:       3dc04476        ldr     q22, [x3,#272]
>   401148:       3dc04875        ldr     q21, [x3,#288]
>   40114c:       3dc04c74        ldr     q20, [x3,#304]
>   401150:       3dc05073        ldr     q19, [x3,#320]
>   401154:       3dc05472        ldr     q18, [x3,#336]
>   401158:       3dc05871        ldr     q17, [x3,#352]
>   40115c:       3dc05c70        ldr     q16, [x3,#368]
>   401160:       3dc06067        ldr     q7, [x3,#384]
>   401164:       3dc06466        ldr     q6, [x3,#400]
>   401168:       3dc06865        ldr     q5, [x3,#416]
>   40116c:       3dc06c64        ldr     q4, [x3,#432]
>   401170:       3dc07063        ldr     q3, [x3,#448]
>   401174:       3dc07462        ldr     q2, [x3,#464]
>   401178:       3dc07861        ldr     q1, [x3,#480]
>   40117c:       3dc07c60        ldr     q0, [x3,#496]
>   401180:       3dc0006f        ldr     q15, [x3]
>   401184:       91080063        add     x3, x3, #0x200
> 
> Gcc-4.8:
>   40135c:       4cdf78af        ld1     {v15.4s}, [x5], #16
>   401360:       4c40790d        ld1     {v13.4s}, [x8]
>   401364:       4c4078ae        ld1     {v14.4s}, [x5]
>   401368:       9100c048        add     x8, x2, #0x30
>   40136c:       91010045        add     x5, x2, #0x40
>   401370:       4c40790c        ld1     {v12.4s}, [x8]
>   401374:       4c4078ab        ld1     {v11.4s}, [x5]
>   401378:       91014048        add     x8, x2, #0x50
>   40137c:       91018045        add     x5, x2, #0x60
>   401380:       4c40790a        ld1     {v10.4s}, [x8]
>   401384:       4c4078a9        ld1     {v9.4s}, [x5]
>   401388:       9101c048        add     x8, x2, #0x70
>   40138c:       91020045        add     x5, x2, #0x80
>   401390:       4c407908        ld1     {v8.4s}, [x8]
>   401394:       4c4078bf        ld1     {v31.4s}, [x5]
>   401398:       91024048        add     x8, x2, #0x90
>   40139c:       91028045        add     x5, x2, #0xa0
>   4013a0:       4c40791e        ld1     {v30.4s}, [x8]
>   4013a4:       4c4078bd        ld1     {v29.4s}, [x5]
>   4013a8:       9102c048        add     x8, x2, #0xb0
>   4013ac:       91030045        add     x5, x2, #0xc0
> 

Is it possible to create a compilable testcase with "fcp" so that we can 
reproduce the above? It need not be an executable test-case.

Thanks,
Kugah

Attachment: fcp_gcc51_O3.s
Description: fcp_gcc51_O3.s

#define TYPE    int


typedef struct _state {
        double  overhead;
        int  nbytes;
        int     need_buf2;
        int     aligned;
        TYPE    *buf;
        TYPE    *buf2;
        TYPE    *buf2_orig;
        TYPE    *lastone;
        int  N;
} state_t;


void
fcp(int iterations, void *cookie)
{

        state_t *state = (state_t *) cookie;
        register TYPE *lastone = state->lastone;

        while (iterations-- > 0) {
            register char *p = state->buf;
            register TYPE *dst = state->buf2;
            while (p <= lastone) {
#define DOIT(i) dst[i]=p[i];
                DOIT(0) DOIT(1) DOIT(2) DOIT(3) DOIT(4) DOIT(5) DOIT(6)
                DOIT(7) DOIT(8) DOIT(9) DOIT(10) DOIT(11) DOIT(12)
                DOIT(13) DOIT(14) DOIT(15) DOIT(16) DOIT(17) DOIT(18)
                DOIT(19) DOIT(20) DOIT(21) DOIT(22) DOIT(23) DOIT(24)
                DOIT(25) DOIT(26) DOIT(27) DOIT(28) DOIT(29) DOIT(30)
                DOIT(31) DOIT(32) DOIT(33) DOIT(34) DOIT(35) DOIT(36)
                DOIT(37) DOIT(38) DOIT(39) DOIT(40) DOIT(41) DOIT(42)
                DOIT(43) DOIT(44) DOIT(45) DOIT(46) DOIT(47) DOIT(48)
                DOIT(49) DOIT(50) DOIT(51) DOIT(52) DOIT(53) DOIT(54)
                DOIT(55) DOIT(56) DOIT(57) DOIT(58) DOIT(59) DOIT(60)
                DOIT(61) DOIT(62) DOIT(63) DOIT(64) DOIT(65) DOIT(66)
                DOIT(67) DOIT(68) DOIT(69) DOIT(70) DOIT(71) DOIT(72)
                DOIT(73) DOIT(74) DOIT(75) DOIT(76) DOIT(77) DOIT(78)
                DOIT(79) DOIT(80) DOIT(81) DOIT(82) DOIT(83) DOIT(84)
                DOIT(85) DOIT(86) DOIT(87) DOIT(88) DOIT(89) DOIT(90)
                DOIT(91) DOIT(92) DOIT(93) DOIT(94) DOIT(95) DOIT(96)
                DOIT(97) DOIT(98) DOIT(99) DOIT(100) DOIT(101) DOIT(102)
                DOIT(103) DOIT(104) DOIT(105) DOIT(106) DOIT(107)
                DOIT(108) DOIT(109) DOIT(110) DOIT(111) DOIT(112)
                DOIT(113) DOIT(114) DOIT(115) DOIT(116) DOIT(117)
                DOIT(118) DOIT(119) DOIT(120) DOIT(121) DOIT(122)
                DOIT(123) DOIT(124) DOIT(125) DOIT(126) DOIT(127)
                p += 128;
                dst += 128;
            }
}
}

Attachment: fcp_gcc48_O3.s
Description: fcp_gcc48_O3.s

_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to