m68k - GCC 4.4.0 generates not so good code from asm inline
Hi, Here is a C source code which I compiled with GCC 3.4.0 and GCC 4.4.0. GCC 3.4.0 output looks a lot better. #include #include #define umul_ppmm(xh, xl, a, b) \ __asm__ ("| Inlined umul_ppmm\n" \ " move.l %0,%/d5\n" \ " move.l %1,%/d4\n" \ " moveq #16,%/d3\n" \ " move.l %0,%/d2\n" \ " mulu %1,%0\n" \ " lsr.l %/d3,%/d4\n" \ " lsr.l %/d3,%/d5\n" \ " mulu %/d4,%/d2\n" \ " mulu %/d5,%1\n" \ " mulu %/d5,%/d4\n" \ " move.l %/d2,%/d5\n" \ " lsr.l %/d3,%/d2\n" \ " add.w %1,%/d5\n" \ " addx.l %/d2,%/d4\n" \ " lsl.l %/d3,%/d5\n" \ " lsr.l %/d3,%1\n" \ " add.l %/d5,%0\n" \ " addx.l %/d4,%1" \ : "=d" ((uint32_t) (xl)), "=d" ((uint32_t) (xh)) \ : "0" ((uint32_t) (a)), "1" ((uint32_t) (b)) \ : "d2", "d3", "d4", "d5") inline int64_t MUL64(int a, int b) { uint32_t au = a; uint32_t bu = b; uint32_t resh, resl; uint64_t res; umul_ppmm(resh, resl, au, bu); if (a < 0) resh -= bu; if (b < 0) resh -= au; res = ((uint64_t)resh << 32) | resl; return res; } GCC 4.4.0 asm output: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16128,-(sp) move.l 28(sp),d0 move.l 32(sp),a0 move.l d0,d6 move.l a0,d1 #APP ;# 36 "mul642.c" 1 | Inlined umul_ppmm move.l d6,d5 move.l d1,d4 moveq #16,d3 move.l d6,d2 mulu d1,d6 lsr.l d3,d4 lsr.l d3,d5 mulu d4,d2 mulu d5,d1 mulu d5,d4 move.l d2,d5 lsr.l d3,d2 add.w d1,d5 addx.l d2,d4 lsl.l d3,d5 lsr.l d3,d1 add.l d5,d6 addx.l d4,d1 #NO_APP tst.l d0 jlt L6 tst.l a0 jlt L7 L3: move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L7: sub.l d0,d1 move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L6: sub.l a0,d1 tst.l a0 jge L3 jra L7 GCC 3.4.0 asm output: #NO_APP .text .even .globl _MUL64 _MUL64: moveml #0x3f00,s...@- movel sp@(28),d1 movel sp@(32),d0 movel d1,d7 movel d0,d6 #APP | Inlined umul_ppmm move.l d7,d5 move.l d6,d4 moveq #16,d3 move.l d7,d2 mulu d6,d7 lsr.l d3,d4 lsr.l d3,d5 mulu d4,d2 mulu d5,d6 mulu d5,d4 move.l d2,d5 lsr.l d3,d2 add.w d6,d5 addx.l d2,d4 lsl.l d3,d5 lsr.l d3,d6 add.l d5,d7 addx.l d4,d6 #NO_APP tstl d1 jlt L5 tstl d0 jge L3 jra L6 .even L5: subl d0,d6 tstl d0 jge L3 .even L6: subl d1,d6 .even L3: movel d6,d0 clrl d1 orl d7,d1 moveml s...@+,#0xfc rts Is it a regression? Regards
Re: m68k - GCC 4.4.0 generates not so good code from asm inline
When I use -O1 with GCC 4.4.0 (-m68060 -fomit-frame-pointer), I get better code. #include #include inline int64_t MUL64(int a, int b) { uint32_t resh, resl; uint32_t au = a; uint32_t bu = b; __asm__ ("move.l %0, d5\n\t" "move.l %1, d4\n\t" "moveq #16, d3\n\t" "move.l %0, d2\n\t" "mulu %1, %0\n\t" "lsr.l d3, d4\n\t" "lsr.l d3, d5\n\t" "mulu d4, d2\n\t" "mulu d5, %1\n\t" "mulu d5, d4\n\t" "move.l d2, d5\n\t" "lsr.l d3, d2\n\t" "add.w %1, d5\n\t" "addx.l d2, d4\n\t" "lsl.l d3, d5\n\t" "lsr.l d3, %1\n\t" "add.l d5, %0\n\t" "addx.l d4, %1\n\t" : "=d"(resl), "=d"(resh) : "0"(au), "1"(bu) : "d2", "d3", "d4", "d5"); if (a < 0) resh -= bu; if (b < 0) resh -= au; return ((uint64_t)resh << 32) | resl; } GCC 4.4.0 -O3: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16128,-(sp) move.l 28(sp),d0 move.l 32(sp),a0 move.l d0,d6 move.l a0,d1 #APP ;# 11 "mul645.c" 1 move.l d6, d5 move.l d1, d4 moveq #16, d3 move.l d6, d2 mulu d1, d6 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d1 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d1, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d1 add.l d5, d6 addx.l d4, d1 #NO_APP tst.l d0 jlt L6 tst.l a0 jlt L7 L3: move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L7: sub.l d0,d1 move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L6: sub.l a0,d1 tst.l a0 jge L3 jra L7 GCC 4.4.0 -O2: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16128,-(sp) move.l 28(sp),d0 move.l 32(sp),a0 move.l d0,d6 move.l a0,d1 #APP ;# 11 "mul645.c" 1 move.l d6, d5 move.l d1, d4 moveq #16, d3 move.l d6, d2 mulu d1, d6 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d1 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d1, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d1 add.l d5, d6 addx.l d4, d1 #NO_APP tst.l d0 jlt L6 tst.l a0 jlt L7 L3: move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L7: sub.l d0,d1 move.l d1,d2 clr.l d3 move.l d2,d0 move.l d3,d1 or.l d6,d1 move.l d0,d6 move.l d1,d7 move.l d7,d1 movem.l (sp)+,#252 rts L6: sub.l a0,d1 tst.l a0 jge L3 jra L7 GCC 4.4.0 -O1: #NO_APP .text .even .globl _MUL64 _MUL64: movem.l #16176,-(sp) move.l 40(sp),d0 move.l 36(sp),a2 move.l a2,d7 move.l d0,d6 #APP ;# 11 "mul645.c" 1 move.l d7, d5 move.l d6, d4 moveq #16, d3 move.l d7, d2 mulu d6, d7 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d6 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d6, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d6 add.l d5, d7 addx.l d4, d6 #NO_APP tst.l a2 jge L2 sub.l d0,d6 L2: tst.l d0 jge L3 sub.l a2,d6 L3: move.l d6,d1 clr.l d2 or.l d7,d2 move.l d1,d0 move.l d2,d1 movem.l (sp)+,#3324 rts GCC 4.4.0 -O0: #NO_APP .text .even .globl _MUL64 _MUL64: lea (-16,sp),sp movem.l #16128,-(sp) move.l 44(sp),32(sp) move.l 48(sp),36(sp) move.l 32(sp),d1 move.l 36(sp),d0 #APP ;# 11 "mul645.c" 1 move.l d1, d5 move.l d0, d4 moveq #16, d3 move.l d1, d2 mulu d0, d1 lsr.l d3, d4 lsr.l d3, d5 mulu d4, d2 mulu d5, d0 mulu d5, d4 move.l d2, d5 lsr.l d3, d2 add.w d0, d5 addx.l d2, d4 lsl.l d3, d5 lsr.l d3, d0 add.l d5, d1 addx.l d4, d0 #NO_APP move.l d1,28(sp) move.l d0,24(sp) tst.l 44(sp) jge L2 move.l 36(sp),d0 sub.l d0,24(sp) L2: tst.l 48(sp) jge L3 move.l 32(sp),d2 sub.l d2,24(sp) L3: move.l 24(sp),d7 clr.l d6 move.l d7,d0 clr.l d1 move.l 28(sp),a1 lea 0.w,a0 move.l a0,d2 move.l a1,d3 or.l d2,d0 or.l d3,d1 movem.l (sp)+,#252 lea (16,sp),sp rts Regards
GCC's bug reports not available for search
Hi, Please someone change the status of these bug reports to be available in the search engine of bugtracker: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40819 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40977 Thanks Regards
Re: GCC's bug reports not available for search
Hi, > You have to be more specific on what you refer to with "search engine > of bugtracker". http://gcc.gnu.org/bugzilla/query.cgi Sorry, these bugs are available for search, but not with "m68k" keyword. Maybe this can be fixed. Regards
Re: GCC's bug reports not available for search
Hi, Ok, I didn't use "advenced search". Problem solved. Regards
GCC 4..4.x speed regression - help?
Hi, I found out that GCC 4.4.x build of minigzip from zlib package is a lot slower compared to GCC 3.4.0 build. Maybe someone can compile minigzip for his system with GCC 3.4.x and GCC 4.4.x and compare time of compression with bigger file? This way we would know if this regression only happens on the m68k GCC or maybe on other platforms too. I really don't like regressions like this :/ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40454 Regards
Re: GCC 4..4.x speed regression - help?
Hi, The problematic source code is deflate.c from libz. CFLAGS=-O3 -DUSE_MMAP -m68060 -fomit-frame-pointer When I compile all source code with GCC 4.4.1, I get slow minigzip binary. When I compile all source code with GCC 4.4.1 except deflate.c (this one I compile with GCC 3.4.0), I get minigzip binary with normal speed. Regards
"-fno-unswitch-loops" option have no effect?
Hi, I found out that when I use "-fno-unswitch-loops" option, it have no effect - loops are unswitched anyway: -O3 -fno-unswitch-loops Because of that to avoid -funswitch-loops optimization I must use something like this: -O2 -finline-functions -fpredictive-commoning -fgcse-after-reload -ftree-vectorize Is that a bug? Tested with GCC 4.4.1 (m68k). Regards
Re: "-fno-unswitch-loops" option have no effect?
Hi, > Hundreds and hundreds of people read this list, so every low-detail > "I think there may be a bug" message you send wastes hours of other > people's time. Ok, Ok, but if someone will reproduce the same problem on his system I can fill bugreport, otherwise I will only waste my time, hours of my time, to start detailed bugreport. Maybe this is only fault of my GCC's build? > If it *is* a bug, on the bug tracker, it will be picked up by people > interested in that specific area It may takes months before someone will look at the bugreport. If it's really a bug IMHO it's a bit critical bug. > though you don't seem to do much > in-depth research, nor do you supply any detail of what you did and > what you expected. There is no need for much detail, just compile any bigger sourcecode with -O3 -fno-.. and after that with -O2 When the object files are not identical, something is wrong and I can start a bug report. Regards
Re: "-fno-unswitch-loops" option have no effect?
Hi, > Works fine for me. gcc.dg/tree-ssa/loop-6.c is unswitched with -O3 > but not with -O3 -fno-unswitch-loops. This one works for me too. Could you try to compile "deflate.c" from libz? Here are my results: -m68060 -O3 -fno-unswitch-loops - 12,9kb -m68060 -O3 - 12,9kb -m68060 -O2 -finline-functions -fgcse-after-reload -ftree-vectorize -fpredictive-commoning - 12,4kb but it's a lot more visible with "libavcodec/dsputil.c" from FFmpeg package: OPTFLAGS= -mnobitfield -m68060 -std=c99 -Wdeclaration-after-statement -Wdisabled-optimization -fno-math-errno -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-common -fomit-frame-pointer -Wall -Wno-switch -Wpointer-arith -Wredundant-decls -Wcast-qual -Wwrite-strings -Wundef -O2 -finline-functions -fgcse-after-reload -ftree-vectorize -fpredictive-commoning: 306kb OPTFLAGS= -mnobitfield -m68060 -std=c99 -Wdeclaration-after-statement -Wdisabled-optimization -fno-math-errno -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-common -fomit-frame-pointer -Wall -Wno-switch -Wpointer-arith -Wredundant-decls -Wcast-qual -Wwrite-strings -Wundef -O3 -fno-unswitch-loops: 1,18mb OPTFLAGS= -mnobitfield -m68060 -std=c99 -Wdeclaration-after-statement -Wdisabled-optimization -fno-math-errno -D_ISOC99_SOURCE -D_POSIX_C_SOURCE=200112 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -fno-common -fomit-frame-pointer -Wall -Wno-switch -Wpointer-arith -Wredundant-decls -Wcast-qual -Wwrite-strings -Wundef -O3 1,18mb Regards
Re: "-fno-unswitch-loops" option have no effect?
Hi, > > > > Here are my results: > > > > -m68060 -O3 -fno-unswitch-loops > >- 12,9kb > > -m68060 -O3 > >- 12,9kb > > -m68060 -O2 -finline-functions -fgcse-after-reload -ftree-vectorize > > -fpredictive-commoning - 12,4kb > > And why do you think that loop unswitching has anything to do with the > size growth? Shouldn't I get the same file sizes with: -m68060 -O3 -fno-unswitch-loops and -m68060 -O2 -finline-functions -fgcse-after-reload -ftree-vectorize -fpredictive-commoning ? I don't know, but I think the same optimalizations should be used in these cases, so file size should be the same. I'm wrong?
Re: "-fno-unswitch-loops" option have no effect?
Hi, > gcc > 4 AOS 68k builds are build from offical gcc sourcetree and there are > no changes Yes, I know, but some problems may happens anyway like it happened before (problems with defines). I think if there is a problem the best is to compare GCCs from different sources. > how much is size diffrent ? 306kb vs 1,18mb - "dsputil.c" > .but i > think you can easy test, compile the ffmpeg, if it give no internal > compiler error with -fno then it work. I get the same ICE with "-O3" and "-O3 -fno-unswitch-loops" - "mpegvideo.c" file. Regards
Speed regression (m68k)
Hi, I notice about 20% speed regression with GCC 4.4.0 PNGCrush build compared to GCC 3.4.0 build (Amiga 68...@50mhz). CFLAGS = -I. -DNO_FSEEKO -O2 -fomit-frame-pointer -Wall -m68060 -s Here are the results: GCC 3.4.0: CPU time used = 267.340 seconds (decoding 16.940, encoding 247.800, other 2.600 seconds) GCC 4.4.0: CPU time used = 328.360 seconds (decoding 16.800, encoding 309.260, other 2.300 seconds) Maybe someone with m68k Debian/PPC/x86 can compile PNGCrush with GCC 3.4.0 and GCC 4.4.0, so we will know if this regression happens there too? Regards
Re: Speed regression (m68k)
Hi, > I assume that you are measuring the speed of the compiled code, not the > speed of the compiler itself. Yes. > Can you open a bug report about this, > with enough information for other people to reproduce the results (e.g., > pointer to source code being compiled and input file(s)). Done. Bug #40454. Regards