After my upper regs patches went in, I noticed that the gcc.dg/c11-atomic-2.c
test would fail on a power8 host that was running in little endian mode.  This
particular test only fails if you are compiling this code with no optimization,
and power8 selected as the cpu.  Ultimately, it fails in reload when an array
index is way out of bounds.

In looking at it, it is due to rs6000_emit_move creating two separate moves of
SUBREG's of TFmode to assign a constant during RTL generation.  I fixed this so
this 'optimization' is only done if DFmode values can only go in the
traditional registers.  While I was at it, I optimized setting TFmode variables
to 0.0L to use xxlxor rather than loading up 2 double words of memory.

I have done bootstraps on big endian power7, big endian power8, and little
endian power8 with no regressions in the test suite.  I also have built the
Spec 2006 test suite for power7.  Can I install these patches?

2014-12-05  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        PR target/64204
        * config/rs6000/rs6000.c (rs6000_emit_move): Do not split TFmode
        constant moves if -mupper-regs-df.

        * config/rs6000/rs6000.md (mov<mode>_64bit_dm): Optimize moving
        0.0L to TFmode.
        (movtd_64bit_nodm): Likewise.
        (mov<mode>_32bit, FMOVE128 case): Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 218388)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -8396,9 +8396,11 @@ rs6000_emit_move (rtx dest, rtx source, 
          || ! nonimmediate_operand (operands[0], mode)))
     goto emit_set;
 
-  /* 128-bit constant floating-point values on Darwin should really be
-     loaded as two parts.  */
+  /* 128-bit constant floating-point values on Darwin should really be loaded
+     as two parts.  However, this premature splitting is a problem when DFmode
+     values can go into Altivec registers.  */
   if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
+      && !reg_addr[DFmode].scalar_in_vmx_p
       && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
     {
       rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 218388)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -8086,8 +8086,8 @@ (define_expand "mov<mode>"
 ;; problematical.  Don't allow direct move for this case.
 
 (define_insn_and_split "*mov<mode>_64bit_dm"
-  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r,r,wm")
-       (match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r,wm,r"))]
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" 
"=m,d,d,ws,Y,r,r,r,wm")
+       (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jYGHF,r,wm,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
    && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
    && (gpc_reg_operand (operands[0], <MODE>mode)
@@ -8096,11 +8096,11 @@ (define_insn_and_split "*mov<mode>_64bit
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,12,12,8,8,8")])
+  [(set_attr "length" "8,8,8,8,12,12,8,8,8")])
 
 (define_insn_and_split "*movtd_64bit_nodm"
-  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-       (match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
+       (match_operand:TD 1 "input_operand" "d,m,d,j,r,jYGHF,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && !WORDS_BIG_ENDIAN
    && (gpc_reg_operand (operands[0], TDmode)
        || gpc_reg_operand (operands[1], TDmode))"
@@ -8108,11 +8108,11 @@ (define_insn_and_split "*movtd_64bit_nod
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,12,12,8")])
+  [(set_attr "length" "8,8,8,8,12,12,8")])
 
 (define_insn_and_split "*mov<mode>_32bit"
-  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-       (match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
+       (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jYGHF,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8120,7 +8120,7 @@ (define_insn_and_split "*mov<mode>_32bit
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,20,20,16")])
+  [(set_attr "length" "8,8,8,8,20,20,16")])
 
 (define_insn_and_split "*mov<mode>_softfloat"
   [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r")

Reply via email to