1. Update PUSH_ARGS to accept an argument.  When the PUSH instruction
usage is optional, pass the number of bytes to push to PUSH_ARGS so that
the backend can decide if PUSH instructions should be generated.
2. Change x86 PUSH_ARGS to return 0 when number of bytes to push is more
than a word to avoid generating non-existent PUSH instructions.
3. Remove target PUSH_ARGS definitions which return 0 as it is the same
as the default.

gcc/

        PR target/100704
        * calls.c (expand_call): Add 0 to PUSH_ARGS.
        (emit_library_call_value_1): Likewise.
        * defaults.h (PUSH_ARGS): Add npush.
        (PUSH_ARGS_REVERSED): Add 0 to PUSH_ARGS.
        * expr.c (block_move_libcall_safe_for_call_parm): Add 0 to
        PUSH_ARGS.
        (emit_push_insn): Pass the number bytes to push to PUSH_ARGS.
        Pass 0 to PUSH_ARGS if ARGS_ADDR is 0.
        * rtlanal.c (nonzero_bits1): Add 0 to PUSH_ARGS.
        * config/bpf/bpf.h (PUSH_ARGS): Removed.
        * config/cr16/cr16.h (PUSH_ARGS): Updated.
        * config/i386/i386.h (PUSH_ARGS): Return 0 if the number of
        bytes to push is more than UNITS_PER_WORD.
        * config/m32c/m32c.h (PUSH_ARGS): Updated.
        * config/nios2/nios2.h (PUSH_ARGS): Removed.
        * config/pru/pru.h (PUSH_ARGS): Removed.
        * doc/tm.texi.in: Update PUSH_ARGS documentation.
        * doc/tm.texi: Regenerated.

gcc/testsuite/

        PR target/100704
        * gcc.target/i386/pr100704-1.c: New test.
        * gcc.target/i386/pr100704-2.c: Likewise.
---
 gcc/calls.c                                |  6 +++---
 gcc/config/bpf/bpf.h                       |  3 ---
 gcc/config/cr16/cr16.h                     |  2 +-
 gcc/config/i386/i386.h                     | 12 +++++++----
 gcc/config/m32c/m32c.h                     |  2 +-
 gcc/config/nios2/nios2.h                   |  1 -
 gcc/config/pru/pru.h                       |  1 -
 gcc/defaults.h                             |  6 +++---
 gcc/doc/tm.texi                            |  8 +++++---
 gcc/doc/tm.texi.in                         |  8 +++++---
 gcc/expr.c                                 | 14 ++++++++++---
 gcc/rtlanal.c                              |  2 +-
 gcc/testsuite/gcc.target/i386/pr100704-1.c | 24 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr100704-2.c | 23 +++++++++++++++++++++
 14 files changed, 85 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100704-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100704-2.c

diff --git a/gcc/calls.c b/gcc/calls.c
index f3da1839dc5..0cb7c23d310 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3727,7 +3727,7 @@ expand_call (tree exp, rtx target, int ignore)
      So the entire argument block must then be preallocated (i.e., we
      ignore PUSH_ROUNDING in that case).  */
 
-  int must_preallocate = !PUSH_ARGS;
+  int must_preallocate = !PUSH_ARGS (0);
 
   /* Size of the stack reserved for parameter registers.  */
   int reg_parm_stack_space = 0;
@@ -3836,7 +3836,7 @@ expand_call (tree exp, rtx target, int ignore)
 #endif
 
   if (! OUTGOING_REG_PARM_STACK_SPACE ((!fndecl ? fntype : TREE_TYPE (fndecl)))
-      && reg_parm_stack_space > 0 && PUSH_ARGS)
+      && reg_parm_stack_space > 0 && PUSH_ARGS (0))
     must_preallocate = 1;
 
   /* Set up a place to return a structure.  */
@@ -5477,7 +5477,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
     }
   else
     {
-      if (!PUSH_ARGS)
+      if (!PUSH_ARGS (0))
        argblock = push_block (gen_int_mode (args_size.constant, Pmode), 0, 0);
     }
 
diff --git a/gcc/config/bpf/bpf.h b/gcc/config/bpf/bpf.h
index 4c5b19e262b..80195cea5b2 100644
--- a/gcc/config/bpf/bpf.h
+++ b/gcc/config/bpf/bpf.h
@@ -288,9 +288,6 @@ enum reg_class
    never used when passing arguments.  However, we still have to
    define the constants below.  */
 
-/* If nonzero, push insns will be used to pass outgoing arguments.  */
-#define PUSH_ARGS 0
-
 /* If nonzero, function arguments will be evaluated from last to
    first, rather than from first to last.  */
 #define PUSH_ARGS_REVERSED 1
diff --git a/gcc/config/cr16/cr16.h b/gcc/config/cr16/cr16.h
index 4ce9e81b0e3..68db73348bf 100644
--- a/gcc/config/cr16/cr16.h
+++ b/gcc/config/cr16/cr16.h
@@ -376,7 +376,7 @@ enum reg_class
 
 #define ACCUMULATE_OUTGOING_ARGS 0
 
-#define PUSH_ARGS 1
+#define PUSH_ARGS(npush) 1
 
 #define PUSH_ROUNDING(BYTES) cr16_push_rounding (BYTES)
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 53d503fc6e0..48b99033c28 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1462,10 +1462,14 @@ enum reg_class
    || TARGET_64BIT_MS_ABI \
    || (TARGET_MACHO && crtl->profile))
 
-/* If defined, a C expression whose value is nonzero when we want to use PUSH
-   instructions to pass outgoing arguments.  */
-
-#define PUSH_ARGS (TARGET_PUSH_ARGS && !ACCUMULATE_OUTGOING_ARGS)
+/* If defined, a C expression whose value is nonzero when we want to
+   use PUSH instructions to pass outgoing arguments.  NPUSH is the
+   number of bytes to push.  */
+
+#define PUSH_ARGS(npush) \
+  ((npush) <= UNITS_PER_WORD \
+   && TARGET_PUSH_ARGS \
+   && !ACCUMULATE_OUTGOING_ARGS)
 
 /* We want the stack and args grow in opposite directions, even if
    PUSH_ARGS is 0.  */
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
index 635f5924c20..33b96d973c8 100644
--- a/gcc/config/m32c/m32c.h
+++ b/gcc/config/m32c/m32c.h
@@ -472,7 +472,7 @@ enum reg_class
 
 /* Passing Function Arguments on the Stack */
 
-#define PUSH_ARGS 1
+#define PUSH_ARGS(npush) 1
 #define PUSH_ROUNDING(N) m32c_push_rounding (N)
 #define CALL_POPS_ARGS(C) 0
 
diff --git a/gcc/config/nios2/nios2.h b/gcc/config/nios2/nios2.h
index 1840a466f96..dfca12cc525 100644
--- a/gcc/config/nios2/nios2.h
+++ b/gcc/config/nios2/nios2.h
@@ -297,7 +297,6 @@ typedef struct nios2_args
   ((REGNO) >= FIRST_ARG_REGNO && (REGNO) <= LAST_ARG_REGNO)
 
 /* Passing function arguments on stack.  */
-#define PUSH_ARGS 0
 #define ACCUMULATE_OUTGOING_ARGS 1
 
 /* We define TARGET_RETURN_IN_MEMORY, so set to zero.  */
diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h
index 4c35a7d7ee3..9b6be323e6d 100644
--- a/gcc/config/pru/pru.h
+++ b/gcc/config/pru/pru.h
@@ -339,7 +339,6 @@ typedef struct pru_args
   ((REGNO) >= FIRST_ARG_REGNUM && (REGNO) <= LAST_ARG_REGNUM)
 
 /* Passing function arguments on stack.  */
-#define PUSH_ARGS 0
 #define ACCUMULATE_OUTGOING_ARGS 1
 
 /* We define TARGET_RETURN_IN_MEMORY, so set to zero.  */
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 91216593e75..09446981d3d 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -804,9 +804,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 /* Supply a default definition for PUSH_ARGS.  */
 #ifndef PUSH_ARGS
 #ifdef PUSH_ROUNDING
-#define PUSH_ARGS      !ACCUMULATE_OUTGOING_ARGS
+#define PUSH_ARGS(npush)       !ACCUMULATE_OUTGOING_ARGS
 #else
-#define PUSH_ARGS      0
+#define PUSH_ARGS(npush)       0
 #endif
 #endif
 
@@ -820,7 +820,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 
 #ifndef PUSH_ARGS_REVERSED
 #if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD)
-#define PUSH_ARGS_REVERSED  PUSH_ARGS
+#define PUSH_ARGS_REVERSED  PUSH_ARGS(0)
 #endif
 #endif
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index e3a080e4a7c..03586e2a1fe 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3807,9 +3807,11 @@ cases of mismatch, it also makes for better code on 
certain machines.
 The default is to not promote prototypes.
 @end deftypefn
 
-@defmac PUSH_ARGS
-A C expression.  If nonzero, push insns will be used to pass
-outgoing arguments.
+@defmac PUSH_ARGS (@var{npush})
+A C expression.  If nonzero, push insns will be used to pass outgoing
+arguments.  When the push instruction usage is optional, @var{npush} is
+nonzero to indicate the number of bytes to push.  Otherwise,
+@var{npush} is zero.
 If the target machine does not have a push instruction, set it to zero.
 That directs GCC to use an alternate strategy: to
 allocate the entire argument block and then store the arguments into
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index d9fbbe20e6f..62e3456a10a 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3100,9 +3100,11 @@ control passing certain arguments in registers.
 
 @hook TARGET_PROMOTE_PROTOTYPES
 
-@defmac PUSH_ARGS
-A C expression.  If nonzero, push insns will be used to pass
-outgoing arguments.
+@defmac PUSH_ARGS (@var{npush})
+A C expression.  If nonzero, push insns will be used to pass outgoing
+arguments.  When the push instruction usage is optional, @var{npush} is
+nonzero to indicate the number of bytes to push.  Otherwise,
+@var{npush} is zero.
 If the target machine does not have a push instruction, set it to zero.
 That directs GCC to use an alternate strategy: to
 allocate the entire argument block and then store the arguments into
diff --git a/gcc/expr.c b/gcc/expr.c
index e4660f0e90a..1b565d9be3a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1823,7 +1823,7 @@ block_move_libcall_safe_for_call_parm (void)
   tree fn;
 
   /* If arguments are pushed on the stack, then they're safe.  */
-  if (PUSH_ARGS)
+  if (PUSH_ARGS (0))
     return true;
 
   /* If registers go on the stack anyway, any argument is sure to clobber
@@ -4639,11 +4639,19 @@ emit_push_insn (rtx x, machine_mode mode, tree type, 
rtx size,
       skip = (reg_parm_stack_space == 0) ? 0 : used;
 
 #ifdef PUSH_ROUNDING
+      /* NB: Let the backend known the number of bytes to push and
+        decide if push insns should be generated.  */
+      unsigned int push_size;
+      if (CONST_INT_P (size))
+       push_size = INTVAL (size);
+      else
+       push_size = 0;
+
       /* Do it with several push insns if that doesn't take lots of insns
         and if there is no difficulty with push insns that skip bytes
         on the stack for alignment purposes.  */
       if (args_addr == 0
-         && PUSH_ARGS
+         && PUSH_ARGS (push_size)
          && CONST_INT_P (size)
          && skip == 0
          && MEM_ALIGN (xinner) >= align
@@ -4848,7 +4856,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx 
size,
        anti_adjust_stack (gen_int_mode (extra, Pmode));
 
 #ifdef PUSH_ROUNDING
-      if (args_addr == 0 && PUSH_ARGS)
+      if (args_addr == 0 && PUSH_ARGS (0))
        emit_single_push_insn (mode, x, type);
       else
 #endif
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 712c2c28578..1082d8cbb30 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -4870,7 +4870,7 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode, 
const_rtx known_x,
          /* If PUSH_ROUNDING is defined, it is possible for the
             stack to be momentarily aligned only to that amount,
             so we pick the least alignment.  */
-         if (x == stack_pointer_rtx && PUSH_ARGS)
+         if (x == stack_pointer_rtx && PUSH_ARGS (0))
            {
              poly_uint64 rounded_1 = PUSH_ROUNDING (poly_int64 (1));
              alignment = MIN (known_alignment (rounded_1), alignment);
diff --git a/gcc/testsuite/gcc.target/i386/pr100704-1.c 
b/gcc/testsuite/gcc.target/i386/pr100704-1.c
new file mode 100644
index 00000000000..02461db9695
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100704-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+struct S
+{
+  long long s1 __attribute__ ((aligned (8)));
+  unsigned s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14;
+};
+
+extern struct S a[];
+
+void bar (struct S);
+
+void
+foo (void)
+{
+  bar (a[0]);
+}
+
+/* { dg-final { scan-assembler-not "pushq" } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
16\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
48\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100704-2.c 
b/gcc/testsuite/gcc.target/i386/pr100704-2.c
new file mode 100644
index 00000000000..07b9bd18c7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100704-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+struct S
+{
+  char array[64];
+};
+
+extern struct S a[];
+
+void bar (struct S);
+
+void
+foo (void)
+{
+  bar (a[0]);
+}
+
+/* { dg-final { scan-assembler-not "pushq" } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
16\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, 
48\\(%\[\^,\]+\\)" 1 } } */
-- 
2.31.1

Reply via email to