Re: SH optimized software floating point routines

2010-07-21 Thread Christian Bruel

Hi Kaz,

Kaz Kojima wrote:



BTW, it looks that softfp __unord?f2 routines check signaling NaNs
only.  This makes __builtin_isnan return false for quiet NaNs for
which current fp-bit ones return true when -mieee enabled.  Perhaps
that change of behavior might be OK for software FP.


I use the attached patch to handle the QNaNs in the assembly solf-fp. 
Need to be updated for trunk (and update the dates in changelogs). Will do.


Cheers

Christian

2010-04-20  Christian Bruel  

* gcc.dg/builtins-nan.c: New test.

2010-04-20  Christian Bruel  

* config/sh/ieee-754-df.S (nedf2f): Don't check Qbit for NaNs.
* config/sh/ieee-754-sf.S (nesf2f): Likewise.
* config/sh/sh.md (cmpunsf_i1, cmpundf_i1): Likewise. Clobber R2.




2010-04-20  Christian Bruel  

* gcc.dg/builtins-nan.c: New test.

2010-04-20  Christian Bruel  

* config/sh/ieee-754-df.S (nedf2f): Don't check Qbit for NaNs.
* config/sh/ieee-754-sf.S (nesf2f): Likewise.
* config/sh/sh.md (cmpunsf_i1, cmpundf_i1): Likewise. Clobber R2.

Index: gcc/config/sh/ieee-754-df.S
===
--- gcc/config/sh/ieee-754-df.S (revision 1352)
+++ gcc/config/sh/ieee-754-df.S (revision 1373)
@@ -88,11 +88,12 @@
HIDDEN_FUNC(GLOBAL(nedf2f))
 GLOBAL(nedf2f):
cmp/eq  DBL0L,DBL1L
+   bf.sLOCAL(ne)
+   mov #1,r0
+   cmp/eq  DBL0H,DBL1H
mov.l   LOCAL(c_DF_NAN_MASK),r1
-   bf LOCAL(ne)
-   cmp/eq  DBL0H,DBL1H
-   not DBL0H,r0
-   bt  LOCAL(check_nan)
+   bt.sLOCAL(check_nan)
+   not DBL0H,r0
mov DBL0H,r0
or  DBL1H,r0
add r0,r0
@@ -100,11 +101,17 @@
or  DBL0L,r0
 LOCAL(check_nan):
tst r1,r0
-   rts
+   bt.sLOCAL(nan)
+   mov #12,r2
+   shll16  r2
+   xor r2,r1
+   tst r1,r0
+LOCAL(nan):
movtr0
 LOCAL(ne):
rts
-   mov #1,r0
+   nop
+   
.balign 4
 LOCAL(c_DF_NAN_MASK):
.long DF_NAN_MASK

Index: gcc/config/sh/ieee-754-sf.S
===
--- gcc/config/sh/ieee-754-sf.S (revision 1352)
+++ gcc/config/sh/ieee-754-sf.S (revision 1373)
@@ -55,19 +55,27 @@
   the values are NaN.  */
cmp/eq  r4,r5
mov.l   LOCAL(c_SF_NAN_MASK),r1
+   bt.sLOCAL(check_nan)
not r4,r0
-   bt  LOCAL(check_nan)
mov r4,r0
or  r5,r0
rts
add r0,r0
 LOCAL(check_nan):
tst r1,r0
+   bt.sLOCAL(nan)
+   mov #96,r2
+   shll16  r2
+   xor r2,r1
+   tst r1,r0   
+LOCAL(nan):
rts
movtr0
+   
.balign 4
 LOCAL(c_SF_NAN_MASK):
.long SF_NAN_MASK
+LOCAL(c_SF_SNAN_MASK):
ENDFUNC(GLOBAL(nesf2f))
 #endif /* L_nesf2f */
 
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md (revision 1352)
+++ gcc/config/sh/sh.md (revision 1373)
@@ -11182,6 +11182,7 @@
 (clobber (reg:SI T_REG))
 (clobber (reg:SI PR_REG))
 (clobber (reg:SI R1_REG))
+(clobber (reg:SI R2_REG))
 (use (match_operand:SI 1 "arith_reg_operand" "r"))]
"TARGET_SH1 && ! TARGET_SH2E"
"jsr@%1%#"
@@ -11257,13 +11258,18 @@
 
  (define_insn "cmpunsf_i1"
[(set (reg:SI T_REG)
-   (unordered:SI (match_operand:SF 0 "arith_reg_operand" "r,r")
- (match_operand:SF 1 "arith_reg_operand" "r,r")))
-(use (match_operand:SI 2 "arith_reg_operand" "r,r"))
-(clobber (match_scratch:SI 3 "=0,&r"))]
+   (unordered:SI (match_operand:SF 0 "arith_reg_operand" "r")
+ (match_operand:SF 1 "arith_reg_operand" "r")))
+(use (match_operand:SI 2 "arith_reg_operand" "r"))
+(clobber (match_scratch:SI 3 "=&r"))]
"TARGET_SH1 && ! TARGET_SH2E"
-   "not\t%0,%3\;tst\t%2,%3\;not\t%1,%3\;bt\t0f\;tst\t%2,%3\;0:"
-   [(set_attr "length" "10")])
+   "not\t%0,%3\;tst\t%2,%3\;bt.s\t0f
+\tnot\t%1,%3\;tst\t%2,%3\;bt.s\t0f
+\tmov\t#96,%3\;shll16\t%3\;xor\t%3,%2
+\tnot\t%0,%3\;tst\t%2,%3\;bt.s\t0f
+\tnot\t%1,%3\;tst\t%2,%3
+0:"
+   [(set_attr "length" "28")])
 
  ;; ??? This is a lot of code with a lot of branches; a library function
  ;; might be better.
@@ -11967,6 +11973,7 @@
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
   "TARGET_SH1_SOFTFP"
   "jsr @%1%#"
@@ -12008,13 +12015,18 @@
 
 (define_insn "cmpundf_i1"
   [(set (reg:SI T_REG)
-   (unordered:SI (match_operand:DF 0 "arith_reg_operand" "r,r")
- (match_operand:DF 1 "arith_reg_operand" "r,r")))
-   (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
-   (clobber (match_scratch:SI 3 "=0,&r"))]
+   (unordered:SI (match_operand:DF 0 "

Re: Reload problems with only one base reg for "base + offset" addressing mode

2010-07-21 Thread redriver jiang
Hi,

You mean I should define insn like this:

(define_insn "*iorqi3_imm"
 [(set (mem:QI (match_operand:HI 0 "register_operand"   "b"))
(ior:QI (mem:QI (match_operand:HI 1 "register_operand"   "b")
  (mem:QI (plus: HI (match_operand:HI 2 "register_operand"  
"f")
(match_operand: 3 HI "immediate_operand" 
"K")   ]
""
"..."
[( set_attr "length" "1" )])

"b" for R16,R17,R18, and "f" for R18, "K" for immediate operand with
range "0-127"?


Thanks!



2010/7/20 Ian Lance Taylor :
> redriver jiang  writes:
>
>> I am porting GCC to a 8bit architecture, and now I have problem on
>> reload problem on addressing mode.
>> Besides 15 general registers, it has three 16bit address registers,
>> R16,R17,R18.
>> R16,R17,R18 are able to be as base register in "base" address mode,
>> but only R18 can be base regs for "base+offse(immediate)t" address
>> mode.
>> I make "BASE_REGS" class  for "R16,R17,R18", and "POINTER_REG" class
>> for R18, and frame pointer is R18, the maxim "offset" in "base+offset"
>> is 127.
>>
>> And now the test compiler sometimes generate following errors:
>>
>> test3.c: In function 'OS_EventTaskWait':
>> test3.c:62: error: unable to find a register to spill in class 'POINTER_REG'
>> test3.c:62: error: this is the insn:
>> (insn 58 57 61 2 (set (mem/s:QI (plus:HI (reg:HI 16 R16 [51])
>>                 (const_int 5 [0x5])) [0 .OSEventTbl S1 A8])
>>         (ior:QI (mem/s:QI (plus:HI (reg:HI 16 R16 [51])
>>                     (const_int 5 [0x5])) [0 .OSEventTbl S1 A8])
>>             (mem/s:QI (plus:HI (reg:HI 17 R17 [orig:38 OSTCBCur.0 ] [38])
>>                     (const_int 14 [0xe])) [0 .OSTCBBitX+0 S1
>> A8]))) 25 {*iorqi3_noimm} (insn_list:REG_DEP_TRUE 51 (nil))
>>     (expr_list:REG_DEAD (reg:HI 17 R17 [orig:38 OSTCBCur.0 ] [38])
>>         (expr_list:REG_DEAD (reg:HI 16 R16 [51])
>> test3.c:62: confused by earlier errors, bailing out.
>
> You should be able to fix this by using constraints.  Define a
> constraint which uses the base register and define one which permits one
> of the indirect registers.  Write different alternatives such that only
> one operand uses the base register in each alternative.  Then reload
> should be able to pick the best one, and reload the other addresses into
> the indirect register.
>
> Ian
>


Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Steven Bosscher
Hello,

Back in 2001, GCC could disambiguate almost no MEMs on ia64 because
ia64 has no (reg+offs) addressing modes. Bernd added a trick to alias
and to sched-ebb to use cselib, to substitute a reg address with a
reg+offs address recorded by cselib (see
http://gcc.gnu.org/viewcvs?view=revision&revision=32538 and
http://gcc.gnu.org/viewcvs?view=revision&revision=44716).

There are a couple of problems with this approach:

1. This feature gets almost no testing. By default, the sched-ebb.c
code is only used by ia64 and picochip. A special flag
"-fsched2-use-superblocks" must be given by the user to enable
sched-ebb on other targets. (A bit of searching with Google Code
Search shows that this is not a very popular option ;-). For ia64,
sched-ebb is even only used at -O1 and -O2. At -O3 the selective
scheduler is used instead. All this considered, I think we may
conclude that this code is not used all that much.
As a result, the code appears to be buggy. See
http://gcc.gnu.org/PR43494, and http://gcc.gnu.org/35658 is almost
certainly the same issue.

2. sched-ebb is the only scheduler that uses this. The new "ia64
scheduler" (selective scheduler) does not use cselib but still
outperforms sched-ebb in terms of code generation.

3. GCC now has better alias analysis than it used to, especially with
the alias-exporting stuff that exports the GIMPLE points-to analysis
results, but also just all the other little things that were
contributed over the last 10 years (little things like tree-ssa :)


It is unclear whether the use of cselib brings any benefit. I tried to
measure this by instrumenting true_dependence() and compiling a set of
cc1-i files.

It looks like ~9% extra !true_dependence cases are found with cselib,
with is not insignificant:

situationcalls   depends ratio
with_cselib  186764  70463   0.377284
asis 186764  76375   0.408939 (i.e. no cselib)

On the other hand, the difference in instruction count is really small
(408 instructions more, or 0.02%):

situation# insns  # bundles  # stops
with_cselib  1984191  611991 530006
asis 1984599  612127 530337
 +0.021%   +0.022%   +0.062%
(insns counted with egrep "^\s+[a-z].* *.s
bundles counted with egrep "^\s+\.[a-z]{3}\s*$" *.s
stops counted with egrep "\s+;;\s*$" *.s)

The "asis" results are the numbers with this patch applied, to disable
cselib in sched-ebb:

Index: sched-ebb.c
===
--- sched-ebb.c (revision 162278)
+++ sched-ebb.c (working copy)
@@ -275,7 +275,7 @@
 ebb_compute_jump_reg_dependencies,
 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 NULL,
-1, 0, 0
+0, 0, 0
   };

 static struct haifa_sched_info ebb_sched_info =


I would like to remove the cselib code from the scheduler (and,
perhaps, later also from alias.c). It seems to me that:

* the benefit is really small
* there is a better alternative available (selective scheduler)
* that it is just safer to try to minimize the amount of code that
receives so little testing

The latter is especially true for such complicated code as the
scheduler and alias analysis.

What do you all think about this?

Ciao!
Steven


Re: Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Bernd Schmidt
On 07/21/2010 03:06 PM, Steven Bosscher wrote:

> It looks like ~9% extra !true_dependence cases are found with cselib,
> with is not insignificant:
> 
> situationcalls   depends ratio
> with_cselib  186764  70463   0.377284
> asis 186764  76375   0.408939 (i.e. no cselib)
> 
> On the other hand, the difference in instruction count is really small
> (408 instructions more, or 0.02%):
> 
> situation# insns  # bundles  # stops
> with_cselib  1984191  611991 530006
> asis 1984599  612127 530337
>  +0.021%   +0.022%   +0.062%
> (insns counted with egrep "^\s+[a-z].* *.s
> bundles counted with egrep "^\s+\.[a-z]{3}\s*$" *.s
> stops counted with egrep "\s+;;\s*$" *.s)

Isn't that completely the wrong thing to measure when examining
schedules?  The former is meaningful, as it shows how much room the
scheduler has to move memory accesses around.

> I would like to remove the cselib code from the scheduler (and,
> perhaps, later also from alias.c).

I do not like removing working, useful code.  If there weren't any users
of sched-ebb left, then yes.


Bernd


Re: Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Bernd Schmidt
On 07/21/2010 03:06 PM, Steven Bosscher wrote:
> 3. GCC now has better alias analysis than it used to, especially with
> the alias-exporting stuff that exports the GIMPLE points-to analysis
> results, but also just all the other little things that were
> contributed over the last 10 years (little things like tree-ssa :)
[...]
> It looks like ~9% extra !true_dependence cases are found with cselib,
> with is not insignificant:

So, if you want to do something useful in this area, try finding out why
cselib is still useful despite your point 3 above.  Maybe alias analysis
can be improved?

If that can't be improved, I think that rather than remove cselib from
the scheduler, the question should be: if it's useful, why don't we use
it for other schedulers rather than only sched-ebb?


Bernd


Re: Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Steven Bosscher
On Wed, Jul 21, 2010 at 4:44 PM, Bernd Schmidt  wrote:
> On 07/21/2010 03:06 PM, Steven Bosscher wrote:
>> 3. GCC now has better alias analysis than it used to, especially with
>> the alias-exporting stuff that exports the GIMPLE points-to analysis
>> results, but also just all the other little things that were
>> contributed over the last 10 years (little things like tree-ssa :)
> [...]
>> It looks like ~9% extra !true_dependence cases are found with cselib,
>> with is not insignificant:
>
> So, if you want to do something useful in this area, try finding out why
> cselib is still useful despite your point 3 above.  Maybe alias analysis
> can be improved?

Yes, this is what I planned to do anyway.

> If that can't be improved, I think that rather than remove cselib from
> the scheduler, the question should be: if it's useful, why don't we use
> it for other schedulers rather than only sched-ebb?

Well, for one thing: It currently breaks things. See PRs I referenced.
We end up not translating the VALUE rtx'en to normal addresses, and
missing real true dependencies.

Ciao!
Steven


Re: gcc command line exceeds 8191 when building in XP

2010-07-21 Thread IceColdBeer



Cedric Roux-4 wrote:
> 
> Tim Prince wrote:
>> On 7/19/2010 4:13 PM, IceColdBeer wrote:
>>> Hi,
>>>
>>> I'm building a project using GNU gcc, but the command line used to build
>>> each source file sometimes exceeds 8191 characters, which is the maximum
>>> supported command line length under Win XP.Even worst under Win
>>> 2000,
>>> where the maximum command line length is limited to 2047 characters.
>>>
>>> Can the GNU gcc read the build options from a file instead ?I have
>>> searched, but cannot find an option in the documentation.
>>>
>>> Thanks in advance,
>>> ICB
>>>
>>>
>>>
>> redirecting to gcc-help.
>> The gcc builds for Windows themselves use a scheme for splitting the 
>> link into multiple steps in order to deal with command line length 
>> limits.  I would suggest adapting that.  Can't study it myself now while 
>> travelling.
>> 
> 
> @file says the manpage. "Read command-line options from file."
> 
> 

Thanks, yes this is available in version 4.5.0 (tdm-1) of gcc. Correct
me if I wrong, but this feature is not available in gcc version 3.4.5.


-- 
View this message in context: 
http://old.nabble.com/gcc-command-line-exceeds-8191-when-building-in-XP-tp29205136p29226909.html
Sent from the gcc - Dev mailing list archive at Nabble.com.



Re: Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Jakub Jelinek
On Wed, Jul 21, 2010 at 04:57:10PM +0200, Steven Bosscher wrote:
> On Wed, Jul 21, 2010 at 4:44 PM, Bernd Schmidt  
> wrote:
> > On 07/21/2010 03:06 PM, Steven Bosscher wrote:
> >> 3. GCC now has better alias analysis than it used to, especially with
> >> the alias-exporting stuff that exports the GIMPLE points-to analysis
> >> results, but also just all the other little things that were
> >> contributed over the last 10 years (little things like tree-ssa :)
> > [...]
> >> It looks like ~9% extra !true_dependence cases are found with cselib,
> >> with is not insignificant:
> >
> > So, if you want to do something useful in this area, try finding out why
> > cselib is still useful despite your point 3 above.  Maybe alias analysis
> > can be improved?
> 
> Yes, this is what I planned to do anyway.
> 
> > If that can't be improved, I think that rather than remove cselib from
> > the scheduler, the question should be: if it's useful, why don't we use
> > it for other schedulers rather than only sched-ebb?
> 
> Well, for one thing: It currently breaks things. See PRs I referenced.

Just because some part of GCC contains a bug doesn't mean we need to nuke
everything related to that bug.  There are bugs in lots of parts of GCC.
There were several bugs fixed in -fsched2-use-superblocks support in the
past year or two, this bug is just something that needs analysing and
fixing.

> We end up not translating the VALUE rtx'en to normal addresses, and
> missing real true dependencies.

I don't think that is a bug, if a VALUE lost all locations, what else
should get_addr return?  I guess the bug is how we are handling such
location-less VALUE.

Jakub


Re: Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Steven Bosscher
On Wed, Jul 21, 2010 at 5:14 PM, Jakub Jelinek  wrote:
>> > If that can't be improved, I think that rather than remove cselib from
>> > the scheduler, the question should be: if it's useful, why don't we use
>> > it for other schedulers rather than only sched-ebb?
>>
>> Well, for one thing: It currently breaks things. See PRs I referenced.
>
> Just because some part of GCC contains a bug doesn't mean we need to nuke
> everything related to that bug.

Agreed, and I never suggested that.

However, IMHO the situation *may* be different for code that has known
bugs, *and* is de facto target specific,  *and* receives little
testing, *and* gives no measurable benefit in generated code,  *and*
receives little attention from developers (both bugs I referenced are
wrong-code bugs that took very long even to just confirm). I know this
opinion is not shared by most people in the GCC community.

Bernd's suggestion to enable this in other schedulers is a good idea
IMHO because at least that way the code gets much wider test coverage.
There are very few people who fix ia64-specific problems, but if this
feature is enabled in other schedulers then it's suddenly not
ia64-specific anymore. Good.

(NB: Alexander Monakov already explained why it's hard for sel-sched
to use cselib, see http://gcc.gnu.org/PR43494#c21)

But there is a bug to be fixed first.

I'll just sit back, enjoy myself, and watch to see if anyone besides
me cares enough to have this bug fixed in the not-too-distant future
:-)


>  There are bugs in lots of parts of GCC.
> There were several bugs fixed in -fsched2-use-superblocks support in the
> past year or two, this bug is just something that needs analysing and
> fixing.

Note, I already did my best to analyze the bug, or this whole
discussion wouldn't have come up in the first place. But I don't fully
understand the problem and so I also don't know how to fix it.


>> We end up not translating the VALUE rtx'en to normal addresses, and
>> missing real true dependencies.
>
> I don't think that is a bug, if a VALUE lost all locations, what else
> should get_addr return?  I guess the bug is how we are handling such
> location-less VALUE.

Well, I don't understand how we can end up with a VALUE in sched-deps
that has lost all its locations in the first place. The VALUE without
locations is in a MEM of the insn that sched_analyze_insn is working
on, so there should be a location for this VALUE (i.e. in the insn
that's being analyzed).

Ciao!
Steven


Re: Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Maxim Kuvyrkov

On 7/21/10 6:44 PM, Bernd Schmidt wrote:

On 07/21/2010 03:06 PM, Steven Bosscher wrote:

3. GCC now has better alias analysis than it used to, especially with
the alias-exporting stuff that exports the GIMPLE points-to analysis
results, but also just all the other little things that were
contributed over the last 10 years (little things like tree-ssa :)

[...]

It looks like ~9% extra !true_dependence cases are found with cselib,
with is not insignificant:

...

If that can't be improved, I think that rather than remove cselib from
the scheduler, the question should be: if it's useful, why don't we use
it for other schedulers rather than only sched-ebb?


Cselib can /always/ be used during second scheduling pass and on 
single-block regions during the first scheduling pass (after RA 
sched-rgn operates on single-block regions).


Modulo the bugs enabling cselib might surface, the only reason not to 
enable cselib for single-block regions in sched-rgn may be increased 
compile time.  That requires some benchmarking, but my gut feeling is 
that the benefits would outweigh the compile-time cost.


--
Maxim Kuvyrkov
CodeSourcery
ma...@codesourcery.com
(650) 331-3385 x724


Re: Revisiting the use of cselib in alias.c for scheduling

2010-07-21 Thread Steven Bosscher
On Wed, Jul 21, 2010 at 10:09 PM, Maxim Kuvyrkov  wrote:
> Cselib can /always/ be used during second scheduling pass

Except with the selective scheduler when it works on regions that are
not extended basic blocks, I suppose?

> and on
> single-block regions during the first scheduling pass (after RA sched-rgn
> operates on single-block regions).
>
> Modulo the bugs enabling cselib might surface, the only reason not to enable
> cselib for single-block regions in sched-rgn may be increased compile time.
>  That requires some benchmarking, but my gut feeling is that the benefits
> would outweigh the compile-time cost.

So something like the following _should_ work? If so, I'll give it a
try on x86*.

Ciao!
Steven

Index: sched-rgn.c
===
--- sched-rgn.c (revision 162355)
+++ sched-rgn.c (working copy)
@@ -3285,8 +3285,11 @@
 rgn_setup_sched_infos (void)
 {
   if (!sel_sched_p ())
-memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
-   sizeof (rgn_sched_deps_info));
+{
+  memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
+ sizeof (rgn_sched_deps_info));
+  rgn_sched_deps_info.use_cselib = reload_completed;
+}
   else
 memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
sizeof (rgn_sched_deps_info));


Re: SH optimized software floating point routines

2010-07-21 Thread Kaz Kojima
> I'm trying the attached patch over sh-softfp-20100718-2131 patch.
> All regressions go away with it on cross sh4-unknown-linux-gnu,
> though the native bootstrap will take a few days more.

There are a few warnings in bootstrap:

../trunk/gcc/config/sh/sh.c: In function 'sh_soft_fp_cmp':
../trunk/gcc/config/sh/sh.c:2193:8: error: enum conversion in initialization is 
invalid in C++ [-Werror=c++-compat]
../trunk/gcc/config/sh/sh.c:2248:3: error: enum conversion when passing 
argument 1 of 'gen_rtx_fmt_ee_stat' is invalid in C++ [-Werror=c++-compat]
./genrtl.h:24:1: note: expected 'enum rtx_code' but argument is of type 'int'
../trunk/gcc/config/sh/sh.c: In function 'expand_sfunc_op':
../trunk/gcc/config/sh/sh.c:8744:19: error: variable 'insn' set but not used 
[-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

The attached is a fixup for them.

Unfortunately, the bootstrap has failed with stage2/stage3
comparison failure, though it might be irrelevant with softfp
patches.  I'll try again with some older revision.

Regards,
kaz
--

--- ORIG/trunk/gcc/config/sh/sh.c   2010-07-19 10:58:36.0 +0900
+++ trunk/gcc/config/sh/sh.c2010-07-21 06:45:18.0 +0900
@@ -2185,12 +2185,13 @@ sh_emit_cheap_store_flag (enum machine_m
 }
 
 static rtx
-sh_soft_fp_cmp (int code, enum machine_mode op_mode, rtx op0, rtx op1)
+sh_soft_fp_cmp (enum rtx_code code, enum machine_mode op_mode, rtx op0,
+   rtx op1)
 {
   const char *name = NULL;
   rtx (*fun) (rtx, rtx), addr, tmp, last, equiv;
   int df = op_mode == DFmode;
-  enum machine_mode mode = CODE_FOR_nothing; /* shut up warning.  */
+  enum machine_mode mode = MAX_MACHINE_MODE; /* shut up warning.  */
 
   switch (code)
 {
@@ -8741,13 +8742,13 @@ expand_sfunc_op (int nargs, enum machine
 const char *name, rtx equiv, rtx *operands)
 {
   int next_reg = FIRST_PARM_REG, i;
-  rtx addr, last, insn;
+  rtx addr, last;
 
   addr = gen_reg_rtx (Pmode);
   function_symbol (addr, name, SFUNC_FREQUENT);
   for ( i = 1; i <= nargs; i++)
 {
-  insn = emit_move_insn (gen_rtx_REG (mode, next_reg), operands[i]);
+  emit_move_insn (gen_rtx_REG (mode, next_reg), operands[i]);
   next_reg += GET_MODE_SIZE (mode) / UNITS_PER_WORD;
 }
   last = emit_insn ((*fun) (operands[0], addr));


Re: SH optimized software floating point routines

2010-07-21 Thread Joern Rennecke

Quoting Kaz Kojima :


I've got some regressions with "make check" on sh4-unknown-linux-gnu.
It looks that all of them are failed with the undefined references to
__unorddf2/__unordsf2 when -mieee enabled.


That's a bug, then; we shouldn't use a library function there,
but the cmpordered[sd]f_t_4 patterns.


I'm trying the attached patch over sh-softfp-20100718-2131 patch.
All regressions go away with it on cross sh4-unknown-linux-gnu,
though the native bootstrap will take a few days more.


But it's really the instruction expansion that needs to be fixed.


BTW, it looks that softfp __unord?f2 routines check signaling NaNs
only.  This makes __builtin_isnan return false for quiet NaNs for
which current fp-bit ones return true when -mieee enabled.  Perhaps
that change of behavior might be OK for software FP.


The SH port so far has been using fp-bit.c, which does not actually
support floating point signals, and neither does this optimized software
floating point.
So in essence, we only have quit NaNs.  We might as well choose a bit pattern
that is easy to process, to keep code size down and improve performance.
Having a mantissa bit set that is adjacent to the exponent makes for easier
testing.
There is precedence for having the signalling bit in different places and
with different values (i.e. some have 1 == signalling, others 0 ==  
signalling).

So we could say that the bit two below the exponent is the signalling bit,
and is active-low.  Thus a 0x in the high or only word is a  quiet
NaN.
Tests that feed specific NaN hex values could be disabled or feed modified
values for the SH[123].

OTOH for unorddf / unordsf support with sh4, you would want to keep the
distinction between signalling / quiet NaNs.
(Although I doubt many use signalling support,considering the cost when
 you take a trap on every floating point instruction).

The sh.md cmpordered* patterns should do the right thing there, we just
have to keep emitting them.