On Mon, 2012-10-08 at 09:45 +0900, Kaz Kojima wrote: > Oleg Endo <oleg.e...@t-online.de> wrote: > > The attached patch improves comparisons such as > > 'unsigned int <= 0x7FFFFFFF' on SH. > > As mentioned in the PR, for some reason, those comparisons do not go > > through the cstore expander. As a consequence the comparison doesn't > > get the chance to be canonicalized by the target code and ends up as > > '(~x) >> 31'. > > I've not investigated this further and just fixed the symptoms on SH. I > > don't know whether it's also an issue on other targets. > > > > Tested on rev 192142 with > > make -k check RUNTESTFLAGS="--target_board=sh-sim > > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}" > > > > and no new failures. > > OK? > > I've run CSiBE with and without the patch for sh4-unknown-linux-gnu > at -O2. Only one difference in the resulted sizes: jpeg-6b/jcphuff > increases 5336 bytes to 5340 bytes with the patch. Could you look > into it?
Yep, that's actually the only place in the CSiBE set where this case hits. The function in question is encode_mcu_AC_refine. The increase seems to be due to different register allocation and different spill code :T I've attached the asm diff. Cheers, Oleg
--- CSiBE/m4-single-ml-O2-trunk/jpeg-6b/jcphuff.s +++ CSiBE/m4-single-ml-O2/jpeg-6b/jcphuff.s @@ -2147,7 +2147,7 @@ bt/s .L611 mov.l r2,@(24,r15) bra .L612 - mov.l @(44,r15),r0 + mov.l @(44,r15),r3 .L611: mov.l .L565,r4 mov r2,r5 @@ -2513,21 +2513,21 @@ mov r0,r1 mov r9,r0 and r2,r1 - mov.l @(24,r15),r3 + mov.l @(28,r15),r3 mov.b r1,@(r0,r8) mov r9,r11 - mov.l @(28,r15),r0 - add #1,r3 - mov.l @(36,r15),r1 + mov.l @(24,r15),r2 + add #4,r3 + mov.l @(36,r15),r0 add #1,r11 - mov.l @(40,r15),r2 + mov.l @(40,r15),r1 + add #1,r2 add #4,r0 - add #4,r1 - mov.l r3,@(24,r15) - mov.l r0,@(28,r15) - cmp/ge r3,r2 + mov.l r2,@(24,r15) + mov.l r3,@(28,r15) + cmp/ge r2,r1 bt/s .L599 - mov.l r1,@(36,r15) + mov.l r0,@(36,r15) tst r11,r11 bt/s .L555 mov r12,r14 @@ -2545,21 +2545,23 @@ mov.w .L578,r3 cmp/hi r3,r2 bf/s .L612 - mov.l @(44,r15),r0 + mov.l @(44,r15),r3 .L515: mov.l .L582,r2 jsr @r2 mov r14,r4 .L459: + mov.l @(44,r15),r3 +.L612: mov.l @(44,r15),r0 -.L612: + mov.l @(24,r3),r2 mov.l @(16,r14),r3 - mov.l @(24,r0),r2 mov.l r3,@r2 mov.l @(20,r14),r3 mov.l r3,@(4,r2) mov.w .L580,r2 - mov.l @(r0,r2),r2 + add r0,r2 + mov.l @(8,r2),r2 tst r2,r2 bt .L544 add #64,r14 @@ -2594,18 +2596,18 @@ add #1,r2 mov.l r2,@(16,r15) .L467: - mov.l @(24,r15),r3 - mov.l @(28,r15),r0 - mov.l @(36,r15),r1 - add #1,r3 - mov.l @(40,r15),r2 + mov.l @(24,r15),r2 + mov.l @(28,r15),r3 + mov.l @(36,r15),r0 + add #1,r2 + mov.l @(40,r15),r1 + add #4,r3 add #4,r0 - add #4,r1 - mov.l r3,@(24,r15) - mov.l r0,@(28,r15) - cmp/ge r3,r2 + mov.l r2,@(24,r15) + mov.l r3,@(28,r15) + cmp/ge r2,r1 bf/s .L603 - mov.l r1,@(36,r15) + mov.l r0,@(36,r15) .L599: bra .L617 mov.l @(28,r15),r1 @@ -2614,8 +2616,8 @@ bf/s .L523 mov r12,r14 .L555: - mov.l @(16,r15),r3 - cmp/pl r3 + mov.l @(16,r15),r2 + cmp/pl r2 bf .L459 mov.l @(56,r14),r3 bra .L625 @@ -2642,13 +2644,13 @@ add #1,r2 mov.l r2,@r3 .L511: - mov.l @(20,r15),r1 + mov.l @(20,r15),r0 .L620: - mov #0,r2 + mov #0,r1 mov #0,r11 - mov.l r2,@(16,r15) + mov.l r1,@(16,r15) bra .L467 - mov.l @(0,r1),r8 + mov.l @(0,r0),r8 .align 1 .L522: bra .L619 @@ -2659,7 +2661,7 @@ .L578: .short 937 .L580: - .short 196 + .short 188 .L581: .short 312 .L583: @@ -2728,16 +2730,15 @@ tst r3,r3 mov.l r14,@(28,r12) mov.l @r1,r0 - mov.l @(52,r15),r2 + mov.l @(52,r15),r1 add r0,r0 mov.l r11,@(24,r12) bf/s .L511 - mov.w @(r0,r2),r1 - not r1,r1 - mov r14,r10 - shll r1 + mov.w @(r0,r1),r2 + cmp/pz r2 neg r14,r3 movt r1 + mov r14,r10 add #23,r3 shld r3,r1 add #1,r10 @@ -2784,7 +2785,7 @@ mov r9,r6 .L601: bra .L620 - mov.l @(20,r15),r1 + mov.l @(20,r15),r0 .align 1 .L556: mov.l .L589,r1 @@ -2812,9 +2813,9 @@ add #-8,r10 .align 1 .L558: - mov.l .L589,r3 + mov.l .L589,r2 mov r12,r4 - jsr @r3 + jsr @r2 mov.l r1,@(4,r15) mov.l @(4,r15),r1 cmp/eq r13,r1 @@ -2830,8 +2831,8 @@ dt r2 bf/s .L507 mov.l r2,@(20,r12) - mov.l .L589,r0 - jsr @r0 + mov.l .L589,r3 + jsr @r3 mov r12,r4 bra .L622 add #-8,r10