In my fix for PR target/65240, I removed the special -ffast-math code that delayed dealing with constants until reload time. In this patch, constants are now pushed to memory earlier, and the compiler uses LFS (load floating point single) to load double precision constants. When you use the LRA register allocator (-mlra), it uses the Altivec registers for scalar data more frequently, and there appears to be interactions between values loaded up as single constants that are moved to the Altivec registers via XXLOR.
This patch makes (float_extend (mem)) slightly more costly than just (mem) and the code in expr.c will not compress the constant. In addition, for scalar single precision moves it uses copy sign instead of or to move the data. The copy sign instruction deals with single precision values that would create denormals. While working in the code, I also noticed that truncdfsf2 did not have support for ISA 2.07, so I added support for it. I have done bootstraps and make check with no regressions (after fixing the two tests that were checking that LFS was used). I have also built and run the Spec 2006 benchmark bwaves with the patch, and it now runs when compiled with -mlra and upper register support. Is the patch ok to commit to trunk? [gcc] 2015-04-03 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/65614 * config/rs6000/rs6000.c (rs6000_rtx_costs): Make FLOAT_EXTEND more expensive, so that LFD is used to load double constants, and not LFS. * config/rs6000/rs6000.md (extendsfdf2_fpr): Generate XSCPSGNDP instead of XXLOR to copy SFmode to clear out dirty bits created when SFmode denormals are generated. (mov<mode>_hardfloat, FMOVE32 case): Likewise. (truncdfsf2_fpr): Add support for ISA 2.07 XSRSP instruction. [gcc/testsuite] 2015-04-03 Michael Meissner <meiss...@linux.vnet.ibm.com> PR target/65614 * gcc.target/powerpc/compress-float-ppc-pic.c: Run test on power5 to get floating point compression. * gcc.target/powerpc/compress-foat-ppc.c: Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 221802) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -30479,8 +30479,10 @@ rs6000_rtx_costs (rtx x, int code, int o return false; case FLOAT_EXTEND: + /* Make converts on newer machines slightly more expensive to encourage + expr.c to not use a LFS instead of LFD to load constants. */ if (mode == DFmode) - *total = 0; + *total = (TARGET_VSX || TARGET_POPCNTD) ? 1 : 0; else *total = rs6000_cost->fp; return false; Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 221802) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -5222,7 +5222,7 @@ (define_insn_and_split "*extendsfdf2_fpr fmr %0,%1 lfs%U1%X1 %0,%1 # - xxlor %x0,%x1,%x1 + xscpsgndp %x0,%x1,%x1 lxsspx %x0,%y1" "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])" [(const_int 0)] @@ -5230,7 +5230,7 @@ (define_insn_and_split "*extendsfdf2_fpr emit_note (NOTE_INSN_DELETED); DONE; } - [(set_attr "type" "fp,fp,fpload,fp,vecsimple,fpload")]) + [(set_attr "type" "fp,fp,fpload,fp,fp,fpload")]) (define_expand "truncdfsf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") @@ -5239,10 +5239,12 @@ (define_expand "truncdfsf2" "") (define_insn "*truncdfsf2_fpr" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))] + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy") + (float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d,ws")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" - "frsp %0,%1" + "@ + frsp %0,%1 + xsrsp %x0,%x1" [(set_attr "type" "fp")]) ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in @@ -8058,7 +8060,7 @@ (define_insn "mov<mode>_hardfloat" lwz%U1%X1 %0,%1 stw%U0%X0 %1,%0 fmr %0,%1 - xxlor %x0,%x1,%x1 + xscpsgndp %x0,%x1,%x1 xxlxor %x0,%x0,%x0 li %0,0 <f32_li> @@ -8070,7 +8072,7 @@ (define_insn "mov<mode>_hardfloat" mt%0 %1 mf%1 %0 nop" - [(set_attr "type" "*,load,store,fp,vecsimple,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*") + [(set_attr "type" "*,load,store,fp,fp,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*") (set_attr "length" "4")]) (define_insn "*mov<mode>_softfloat" Index: gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c (revision 221802) +++ gcc/testsuite/gcc.target/powerpc/compress-float-ppc.c (working copy) @@ -1,5 +1,7 @@ /* { dg-do compile { target powerpc_fprs } } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -mcpu=power5" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */ + double foo (double x) { return x + 1.75; } Index: gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c (revision 221802) +++ gcc/testsuite/gcc.target/powerpc/compress-float-ppc-pic.c (working copy) @@ -1,5 +1,7 @@ /* { dg-do compile { target powerpc_fprs } } */ -/* { dg-options "-O2 -fpic" } */ +/* { dg-options "-O2 -fpic -mcpu=power5" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */ + double foo (double x) { return x + 1.75; }