On Wed, May 13, 2020 at 1:05 PM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Tue, May 12, 2020 at 10:07 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > Update STV pass to properly count cost of XMM register push.  In 32-bit
> > mode, to convert XMM register push in DImode, we do an XMM store in
> > DImode, followed by 2 memory pushes in SImode, instead of 2 integer
> > register pushes in SImode.  To convert XM register push in SImode, we
> > do an XMM register to integer register move in SImode, followed an
> > integer register push in SImode, instead of an integer register push in
> > SImode.  In 64-bit mode, we do an XMM register to integer register move
> > in SImode or DImode, followed an integer register push in SImode or
> > DImode, instead of an integer register push SImode or DImode.
> >
> > Tested on Linux/x86 and Linux/x86-64.
>
> I think it is better to implement XMM register pushes, and split them
> after reload to a sequence of:
>
> (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8)))
> (set (match_dup 0) (match_dup 1))
>
> This is definitely better than trips through memory to stack.

Attached (untested patch) allows fake pushes from XMM registers, so
STV pass can allow pushes.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 722eb9b5ec8..9f741ce7602 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1049,6 +1049,9 @@
 ;; SWI and DWI together.
 (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])
 
+;; SWI48 and DWI together.
+(define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")])
+
 ;; GET_MODE_SIZE for selected modes.  As GET_MODE_SIZE is not
 ;; compile time constant, it is faster to use <MODE_SIZE> than
 ;; GET_MODE_SIZE (<MODE>mode).  For XFmode which depends on
@@ -1670,8 +1673,8 @@
 ;; Push/pop instructions.
 
 (define_insn "*push<mode>2"
-  [(set (match_operand:DWI 0 "push_operand" "=<")
-       (match_operand:DWI 1 "general_no_elim_operand" "riF*o"))]
+  [(set (match_operand:DWI 0 "push_operand" "=<,<")
+       (match_operand:DWI 1 "general_no_elim_operand" "riF*o,v"))]
   ""
   "#"
   [(set_attr "type" "multi")
@@ -1685,13 +1688,14 @@
   "ix86_split_long_move (operands); DONE;")
 
 (define_insn "*pushdi2_rex64"
-  [(set (match_operand:DI 0 "push_operand" "=<,!<")
-       (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
+  [(set (match_operand:DI 0 "push_operand" "=<,<,!<")
+       (match_operand:DI 1 "general_no_elim_operand" "re*m,v,n"))]
   "TARGET_64BIT"
   "@
    push{q}\t%1
+   #
    #"
-  [(set_attr "type" "push,multi")
+  [(set_attr "type" "push,multi,multi")
    (set_attr "mode" "DI")])
 
 ;; Convert impossible pushes of immediate to existing instructions.
@@ -1726,11 +1730,13 @@
 })
 
 (define_insn "*pushsi2"
-  [(set (match_operand:SI 0 "push_operand" "=<")
-       (match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+  [(set (match_operand:SI 0 "push_operand" "=<,<")
+       (match_operand:SI 1 "general_no_elim_operand" "ri*m,v"))]
   "!TARGET_64BIT"
-  "push{l}\t%1"
-  [(set_attr "type" "push")
+  "@
+   push{l}\t%1
+   #"
+  [(set_attr "type" "push,multi")
    (set_attr "mode" "SI")])
 
 ;; emit_push_insn when it calls move_by_pieces requires an insn to
@@ -1739,11 +1745,13 @@
 
 ;; For TARGET_64BIT we always round up to 8 bytes.
 (define_insn "*push<mode>2_rex64"
-  [(set (match_operand:SWI124 0 "push_operand" "=X")
-       (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))]
+  [(set (match_operand:SWI124 0 "push_operand" "=X,X")
+       (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>,v"))]
   "TARGET_64BIT"
-  "push{q}\t%q1"
-  [(set_attr "type" "push")
+  "@
+   push{q}\t%q1
+   #"
+  [(set_attr "type" "push,multi")
    (set_attr "mode" "DI")])
 
 (define_insn "*push<mode>2"
@@ -1754,6 +1762,18 @@
   [(set_attr "type" "push")
    (set_attr "mode" "SI")])
 
+(define_split
+  [(set (match_operand:SWI48DWI 0 "push_operand")
+       (match_operand:SWI48DWI 1 "sse_reg_operand"))]
+  "TARGET_SSE && reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+    (set (match_dup 0) (match_dup 1))]
+{
+  operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode)));
+  /* Preserve memory attributes. */
+  operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
+
 (define_insn "*push<mode>2_prologue"
   [(set (match_operand:W 0 "push_operand" "=<")
        (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))

Reply via email to