On Wed, May 13, 2020 at 1:05 PM Uros Bizjak <[email protected]> wrote:
>
> On Tue, May 12, 2020 at 10:07 PM H.J. Lu <[email protected]> wrote:
> >
> > Update STV pass to properly count cost of XMM register push. In 32-bit
> > mode, to convert XMM register push in DImode, we do an XMM store in
> > DImode, followed by 2 memory pushes in SImode, instead of 2 integer
> > register pushes in SImode. To convert XM register push in SImode, we
> > do an XMM register to integer register move in SImode, followed an
> > integer register push in SImode, instead of an integer register push in
> > SImode. In 64-bit mode, we do an XMM register to integer register move
> > in SImode or DImode, followed an integer register push in SImode or
> > DImode, instead of an integer register push SImode or DImode.
> >
> > Tested on Linux/x86 and Linux/x86-64.
>
> I think it is better to implement XMM register pushes, and split them
> after reload to a sequence of:
>
> (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8)))
> (set (match_dup 0) (match_dup 1))
>
> This is definitely better than trips through memory to stack.
Attached (untested patch) allows fake pushes from XMM registers, so
STV pass can allow pushes.
Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 722eb9b5ec8..9f741ce7602 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1049,6 +1049,9 @@
;; SWI and DWI together.
(define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])
+;; SWI48 and DWI together.
+(define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")])
+
;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not
;; compile time constant, it is faster to use <MODE_SIZE> than
;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
@@ -1670,8 +1673,8 @@
;; Push/pop instructions.
(define_insn "*push<mode>2"
- [(set (match_operand:DWI 0 "push_operand" "=<")
- (match_operand:DWI 1 "general_no_elim_operand" "riF*o"))]
+ [(set (match_operand:DWI 0 "push_operand" "=<,<")
+ (match_operand:DWI 1 "general_no_elim_operand" "riF*o,v"))]
""
"#"
[(set_attr "type" "multi")
@@ -1685,13 +1688,14 @@
"ix86_split_long_move (operands); DONE;")
(define_insn "*pushdi2_rex64"
- [(set (match_operand:DI 0 "push_operand" "=<,!<")
- (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
+ [(set (match_operand:DI 0 "push_operand" "=<,<,!<")
+ (match_operand:DI 1 "general_no_elim_operand" "re*m,v,n"))]
"TARGET_64BIT"
"@
push{q}\t%1
+ #
#"
- [(set_attr "type" "push,multi")
+ [(set_attr "type" "push,multi,multi")
(set_attr "mode" "DI")])
;; Convert impossible pushes of immediate to existing instructions.
@@ -1726,11 +1730,13 @@
})
(define_insn "*pushsi2"
- [(set (match_operand:SI 0 "push_operand" "=<")
- (match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+ [(set (match_operand:SI 0 "push_operand" "=<,<")
+ (match_operand:SI 1 "general_no_elim_operand" "ri*m,v"))]
"!TARGET_64BIT"
- "push{l}\t%1"
- [(set_attr "type" "push")
+ "@
+ push{l}\t%1
+ #"
+ [(set_attr "type" "push,multi")
(set_attr "mode" "SI")])
;; emit_push_insn when it calls move_by_pieces requires an insn to
@@ -1739,11 +1745,13 @@
;; For TARGET_64BIT we always round up to 8 bytes.
(define_insn "*push<mode>2_rex64"
- [(set (match_operand:SWI124 0 "push_operand" "=X")
- (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))]
+ [(set (match_operand:SWI124 0 "push_operand" "=X,X")
+ (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>,v"))]
"TARGET_64BIT"
- "push{q}\t%q1"
- [(set_attr "type" "push")
+ "@
+ push{q}\t%q1
+ #"
+ [(set_attr "type" "push,multi")
(set_attr "mode" "DI")])
(define_insn "*push<mode>2"
@@ -1754,6 +1762,18 @@
[(set_attr "type" "push")
(set_attr "mode" "SI")])
+(define_split
+ [(set (match_operand:SWI48DWI 0 "push_operand")
+ (match_operand:SWI48DWI 1 "sse_reg_operand"))]
+ "TARGET_SSE && reload_completed"
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+ (set (match_dup 0) (match_dup 1))]
+{
+ operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode)));
+ /* Preserve memory attributes. */
+ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+})
+
(define_insn "*push<mode>2_prologue"
[(set (match_operand:W 0 "push_operand" "=<")
(match_operand:W 1 "general_no_elim_operand" "r<i>*m"))