On Wed, May 13, 2020 at 1:05 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Tue, May 12, 2020 at 10:07 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > Update STV pass to properly count cost of XMM register push. In 32-bit > > mode, to convert XMM register push in DImode, we do an XMM store in > > DImode, followed by 2 memory pushes in SImode, instead of 2 integer > > register pushes in SImode. To convert XM register push in SImode, we > > do an XMM register to integer register move in SImode, followed an > > integer register push in SImode, instead of an integer register push in > > SImode. In 64-bit mode, we do an XMM register to integer register move > > in SImode or DImode, followed an integer register push in SImode or > > DImode, instead of an integer register push SImode or DImode. > > > > Tested on Linux/x86 and Linux/x86-64. > > I think it is better to implement XMM register pushes, and split them > after reload to a sequence of: > > (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8))) > (set (match_dup 0) (match_dup 1)) > > This is definitely better than trips through memory to stack.
Attached (untested patch) allows fake pushes from XMM registers, so STV pass can allow pushes. Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 722eb9b5ec8..9f741ce7602 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1049,6 +1049,9 @@ ;; SWI and DWI together. (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")]) +;; SWI48 and DWI together. +(define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")]) + ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not ;; compile time constant, it is faster to use <MODE_SIZE> than ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on @@ -1670,8 +1673,8 @@ ;; Push/pop instructions. (define_insn "*push<mode>2" - [(set (match_operand:DWI 0 "push_operand" "=<") - (match_operand:DWI 1 "general_no_elim_operand" "riF*o"))] + [(set (match_operand:DWI 0 "push_operand" "=<,<") + (match_operand:DWI 1 "general_no_elim_operand" "riF*o,v"))] "" "#" [(set_attr "type" "multi") @@ -1685,13 +1688,14 @@ "ix86_split_long_move (operands); DONE;") (define_insn "*pushdi2_rex64" - [(set (match_operand:DI 0 "push_operand" "=<,!<") - (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] + [(set (match_operand:DI 0 "push_operand" "=<,<,!<") + (match_operand:DI 1 "general_no_elim_operand" "re*m,v,n"))] "TARGET_64BIT" "@ push{q}\t%1 + # #" - [(set_attr "type" "push,multi") + [(set_attr "type" "push,multi,multi") (set_attr "mode" "DI")]) ;; Convert impossible pushes of immediate to existing instructions. @@ -1726,11 +1730,13 @@ }) (define_insn "*pushsi2" - [(set (match_operand:SI 0 "push_operand" "=<") - (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + [(set (match_operand:SI 0 "push_operand" "=<,<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m,v"))] "!TARGET_64BIT" - "push{l}\t%1" - [(set_attr "type" "push") + "@ + push{l}\t%1 + #" + [(set_attr "type" "push,multi") (set_attr "mode" "SI")]) ;; emit_push_insn when it calls move_by_pieces requires an insn to @@ -1739,11 +1745,13 @@ ;; For TARGET_64BIT we always round up to 8 bytes. (define_insn "*push<mode>2_rex64" - [(set (match_operand:SWI124 0 "push_operand" "=X") - (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))] + [(set (match_operand:SWI124 0 "push_operand" "=X,X") + (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>,v"))] "TARGET_64BIT" - "push{q}\t%q1" - [(set_attr "type" "push") + "@ + push{q}\t%q1 + #" + [(set_attr "type" "push,multi") (set_attr "mode" "DI")]) (define_insn "*push<mode>2" @@ -1754,6 +1762,18 @@ [(set_attr "type" "push") (set_attr "mode" "SI")]) +(define_split + [(set (match_operand:SWI48DWI 0 "push_operand") + (match_operand:SWI48DWI 1 "sse_reg_operand"))] + "TARGET_SSE && reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode))); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + (define_insn "*push<mode>2_prologue" [(set (match_operand:W 0 "push_operand" "=<") (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))