On Fri, May 15, 2020 at 11:21:30AM +0200, Uros Bizjak wrote:
> On Wed, May 13, 2020 at 5:58 PM H.J. Lu <[email protected]> wrote:
>
> > > > > The question is, why STV pass creates its funny sequence? The original
> > > > > sequence should be easily solved by storing DImode from XMM register
> > > > > and (with patched gcc) pushing DImode value from the same XMM
> > > > > register.
> > > >
> > > > STV pass is mostly OK since it has to use XMM to move upper and lower
> > > > 32 bits of a 64-bit integer. The problem is that push XMM becomes very
> > > > expensive later.
> > >
> > > As shown in the patch, XMM push should be just decrement of SP reg and
> > > move to the created stack slot.
> >
> > OK for master if there are no regressions?
>
> diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
> index 78fb373db6e..6cad125fa83 100644
> --- a/gcc/config/i386/i386-features.c
> +++ b/gcc/config/i386/i386-features.c
> @@ -1264,7 +1264,8 @@ has_non_address_hard_reg (rtx_insn *insn)
> FOR_EACH_INSN_DEF (ref, insn)
> if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
> && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
> - && DF_REF_REGNO (ref) != FLAGS_REG)
> + && DF_REF_REGNO (ref) != FLAGS_REG
> + && DF_REF_REGNO (ref) != SP_REG)
> return true;
>
> I don't think this part is correct. The function comment says:
>
> /* Return 1 if INSN uses or defines a hard register.
> Hard register uses in a memory address are ignored.
> Clobbers and flags definitions are ignored. */
>
> and you are putting SP_REG into clobber part.
>
> OTOH, SP_REG in:
>
> (insn 28 27 29 3 (set (mem:DI (pre_dec:SI (reg/f:SI 7 sp)) [2 S8 A64])
> (reg/v:DI 85 [ target ])) "x.i":19:5 40 {*pushdi2}
>
> is inside memory, so REG_SP should already be ignored. Please
> investigate, why it is not the case.
Push isn't a simple memory access since it also updates stack pointer.
Fixed to handle pseudo register push.
>
> +(define_insn "*pushv1ti2"
> + [(set (match_operand:V1TI 0 "push_operand" "=<")
> + (match_operand:V1TI 1 "general_no_elim_operand" "v"))]
> + ""
> + "#"
> + [(set_attr "type" "multi")
> + (set_attr "mode" "TI")])
> +
> +(define_split
> + [(set (match_operand:V1TI 0 "push_operand" "")
> + (match_operand:V1TI 1 "sse_reg_operand" ""))]
> + "TARGET_64BIT && reload_completed"
> + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
> + (set (match_dup 0) (match_dup 1))]
> +{
> + operands[2] = GEN_INT (-PUSH_ROUNDING (16));
> + /* Preserve memory attributes. */
> + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
> +})
>
> The above part is wrong on many levels, e.g. using wrong predicate,
> unnecessary rounding, it should be defined as define_insn_and_split,
> conditionalized on TARGET_64BIT && TARGET_STV and put nearby existing
> pushti patterns.
Fixed.
>
> I will implement push changes (modulo V1T1mode) by myself, since they
> are independent of STV stuff.
>
Here is the updated patch. Tested on Linux/x86 and Linux/x86-64. OK
for master?
Thanks.
H.J.
---
Add V1TI vector register push and split it after reload to a sequence
of:
(set (reg:P SP_REG) (plus:P SP_REG) (const_int -8)))
(set (match_dup 0) (match_dup 1))
so that STV pass can convert TI mode integer push to V1TI vector register
push. Rename has_non_address_hard_reg to pseudo_reg_set, combine calls
of single_set and has_non_address_hard_reg to pseudo_reg_set, to ignore
pseudo register push.
Remove c-c++-common/dfp/func-vararg-mixed-2.c since it is compiled with
-mpreferred-stack-boundary=2 and leads to segfault:
Dump of assembler code for function __bid_nesd2:
0x08049210 <+0>: endbr32
0x08049214 <+4>: push %esi
0x08049215 <+5>: push %ebx
0x08049216 <+6>: call 0x8049130 <__x86.get_pc_thunk.bx>
0x0804921b <+11>: add $0x8de5,%ebx
0x08049221 <+17>: sub $0x20,%esp
0x08049224 <+20>: mov 0x30(%esp),%esi
0x08049228 <+24>: pushl 0x2c(%esp)
0x0804922c <+28>: call 0x804e600 <__bid32_to_bid64>
0x08049231 <+33>: mov %esi,(%esp)
0x08049234 <+36>: movd %edx,%xmm1
0x08049238 <+40>: movd %eax,%xmm0
0x0804923c <+44>: punpckldq %xmm1,%xmm0
=> 0x08049240 <+48>: movaps %xmm0,0x10(%esp)
0x08049245 <+53>: call 0x804e600 <__bid32_to_bid64>
0x0804924a <+58>: push %edx
0x0804924b <+59>: push %eax
0x0804924c <+60>: pushl 0x1c(%esp)
0x08049250 <+64>: pushl 0x1c(%esp)
0x08049254 <+68>: call 0x804b260 <__bid64_quiet_not_equal>
0x08049259 <+73>: add $0x34,%esp
0x0804925c <+76>: pop %ebx
0x0804925d <+77>: pop %esi
0x0804925e <+78>: ret
when libgcc is compiled with -msse2. According to GCC manual:
'-mpreferred-stack-boundary=NUM'
Attempt to keep the stack boundary aligned to a 2 raised to NUM
byte boundary. If '-mpreferred-stack-boundary' is not specified,
the default is 4 (16 bytes or 128-bits).
*Warning:* If you use this switch, then you must build all modules
with the same value, including any libraries. This includes the
system libraries and startup modules.
c-c++-common/dfp/func-vararg-mixed-2.c, which was added by
commit 3b2488ca6ece182f2136a20ee5fa0bb92f935b0f
Author: H.J. Lu <[email protected]>
Date: Wed Jul 30 19:24:02 2008 +0000
func-vararg-alternate-d128-2.c: New.
2008-07-30 H.J. Lu <[email protected]>
Joey Ye <[email protected]>
* gcc.dg/dfp/func-vararg-alternate-d128-2.c: New.
* gcc.dg/dfp/func-vararg-mixed-2.c: Likewise.
isn't expected to work with libgcc.
gcc/
PR target/95021
* config/i386/i386-features.c (has_non_address_hard_reg):
Renamed to ...
(pseudo_reg_set): This. Return the SET expression. Ignore
pseudo register push.
(general_scalar_to_vector_candidate_p): Combine single_set and
has_non_address_hard_reg calls to pseudo_reg_set.
(timode_scalar_to_vector_candidate_p): Likewise.
* config/i386/i386.md (*pushv1ti2): New pattern.
gcc/testsuite/
PR target/95021
* c-c++-common/dfp/func-vararg-mixed-2.c: Removed.
* gcc.target/i386/pr95021-1.c: New test.
* gcc.target/i386/pr95021-2.c: Likewise.
* gcc.target/i386/pr95021-3.c: Likewise.
* gcc.target/i386/pr95021-4.c: Likewise.
* gcc.target/i386/pr95021-5.c: Likewise.
---
gcc/config/i386/i386-features.c | 37 +++---
gcc/config/i386/i386.md | 16 +++
.../c-c++-common/dfp/func-vararg-mixed-2.c | 105 ------------------
gcc/testsuite/gcc.target/i386/pr95021-1.c | 25 +++++
gcc/testsuite/gcc.target/i386/pr95021-2.c | 53 +++++++++
gcc/testsuite/gcc.target/i386/pr95021-3.c | 25 +++++
gcc/testsuite/gcc.target/i386/pr95021-4.c | 25 +++++
gcc/testsuite/gcc.target/i386/pr95021-5.c | 59 ++++++++++
8 files changed, 224 insertions(+), 121 deletions(-)
delete mode 100644 gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-5.c
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
index 78fb373db6e..b9b764c092a 100644
--- a/gcc/config/i386/i386-features.c
+++ b/gcc/config/i386/i386-features.c
@@ -1253,25 +1253,36 @@ scalar_chain::convert ()
return converted_insns;
}
-/* Return 1 if INSN uses or defines a hard register.
- Hard register uses in a memory address are ignored.
- Clobbers and flags definitions are ignored. */
+/* Return the SET expression if INSN doesn't reference hard register.
+ Return NULL if INSN uses or defines a hard register, excluding
+ pseudo register pushes, hard register uses in a memory address,
+ clobbers and flags definitions. */
-static bool
-has_non_address_hard_reg (rtx_insn *insn)
+static rtx
+pseudo_reg_set (rtx_insn *insn)
{
+ rtx set = single_set (insn);
+ if (!set)
+ return NULL;
+
+ /* Check pseudo register push first. */
+ if (REG_P (SET_SRC (set))
+ && !HARD_REGISTER_P (SET_SRC (set))
+ && push_operand (SET_DEST (set), GET_MODE (SET_DEST (set))))
+ return set;
+
df_ref ref;
FOR_EACH_INSN_DEF (ref, insn)
if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
&& !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
&& DF_REF_REGNO (ref) != FLAGS_REG)
- return true;
+ return NULL;
FOR_EACH_INSN_USE (ref, insn)
if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
- return true;
+ return NULL;
- return false;
+ return set;
}
/* Check if comparison INSN may be transformed
@@ -1345,14 +1356,11 @@ convertible_comparison_p (rtx_insn *insn, enum
machine_mode mode)
static bool
general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
{
- rtx def_set = single_set (insn);
+ rtx def_set = pseudo_reg_set (insn);
if (!def_set)
return false;
- if (has_non_address_hard_reg (insn))
- return false;
-
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
@@ -1442,14 +1450,11 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn,
enum machine_mode mode)
static bool
timode_scalar_to_vector_candidate_p (rtx_insn *insn)
{
- rtx def_set = single_set (insn);
+ rtx def_set = pseudo_reg_set (insn);
if (!def_set)
return false;
- if (has_non_address_hard_reg (insn))
- return false;
-
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1bf0c1a7f01..bcb44a3fb46 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1700,6 +1700,22 @@ (define_insn "*pushdi2_rex64"
[(set_attr "type" "push,multi,multi")
(set_attr "mode" "DI")])
+(define_insn_and_split "*pushv1ti2"
+ [(set (match_operand:V1TI 0 "push_operand" "=<")
+ (match_operand:V1TI 1 "register_operand" "v"))]
+ "TARGET_64BIT && TARGET_STV"
+ "#"
+ "&& reload_completed"
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+ (set (match_dup 0) (match_dup 1))]
+{
+ operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode)));
+ /* Preserve memory attributes. */
+ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
+}
+ [(set_attr "type" "multi")
+ (set_attr "mode" "TI")])
+
;; Convert impossible pushes of immediate to existing instructions.
;; First try to get scratch register and go through it. In case this
;; fails, push sign extended lower part first and then overwrite
diff --git a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c
b/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c
deleted file mode 100644
index 02cafb016d1..00000000000
--- a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
-/* { dg-options "-mpreferred-stack-boundary=2" } */
-
-/* C99 6.5.2.2 Function calls.
- Test passing varargs of the combination of decimal float types and
- other types. */
-
-#include <stdarg.h>
-#include "dfp-dbg.h"
-
-/* Supposing the list of varying number of arguments is:
- unsigned int, _Decimal128, double, _Decimal32, _Decimal64. */
-
-static _Decimal32
-vararg_d32 (unsigned arg, ...)
-{
- va_list ap;
- _Decimal32 result;
-
- va_start (ap, arg);
-
- va_arg (ap, unsigned int);
- va_arg (ap, _Decimal128);
- va_arg (ap, double);
- result = va_arg (ap, _Decimal32);
-
- va_end (ap);
- return result;
-}
-
-static _Decimal32
-vararg_d64 (unsigned arg, ...)
-{
- va_list ap;
- _Decimal64 result;
-
- va_start (ap, arg);
-
- va_arg (ap, unsigned int);
- va_arg (ap, _Decimal128);
- va_arg (ap, double);
- va_arg (ap, _Decimal32);
- result = va_arg (ap, _Decimal64);
-
- va_end (ap);
- return result;
-}
-
-static _Decimal128
-vararg_d128 (unsigned arg, ...)
-{
- va_list ap;
- _Decimal128 result;
-
- va_start (ap, arg);
-
- va_arg (ap, unsigned int);
- result = va_arg (ap, _Decimal128);
-
- va_end (ap);
- return result;
-}
-
-static unsigned int
-vararg_int (unsigned arg, ...)
-{
- va_list ap;
- unsigned int result;
-
- va_start (ap, arg);
-
- result = va_arg (ap, unsigned int);
-
- va_end (ap);
- return result;
-}
-
-static double
-vararg_double (unsigned arg, ...)
-{
- va_list ap;
- float result;
-
- va_start (ap, arg);
-
- va_arg (ap, unsigned int);
- va_arg (ap, _Decimal128);
- result = va_arg (ap, double);
-
- va_end (ap);
- return result;
-}
-
-
-int
-main ()
-{
- if (vararg_d32 (3, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 3.0df) FAILURE
- if (vararg_d64 (4, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 4.0dd) FAILURE
- if (vararg_d128 (1, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 1.0dl) FAILURE
- if (vararg_int (0, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 0) FAILURE
- if (vararg_double (2, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 2.0) FAILURE
-
- FINISH
-}
diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c
b/gcc/testsuite/gcc.target/i386/pr95021-1.c
new file mode 100644
index 00000000000..218776240ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse2 -mstv -mregparm=0 -W" } */
+/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */
+
+typedef long long a;
+struct __jmp_buf_tag {
+};
+typedef struct __jmp_buf_tag sigjmp_buf[1];
+sigjmp_buf jmp_buf;
+int __sigsetjmp (sigjmp_buf, int);
+typedef struct {
+ a target;
+} b;
+extern a *target_p;
+b *c;
+extern void foo (a);
+void
+d(void)
+{
+ if (__sigsetjmp(jmp_buf, 1)) {
+ a target = *target_p;
+ c->target = target;
+ foo(target);
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c
b/gcc/testsuite/gcc.target/i386/pr95021-2.c
new file mode 100644
index 00000000000..473b5f81664
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run { target ia32 } } */
+/* { dg-require-effective-target sse2_runtime } */
+/* { dg-options "-O2 -msse2 -mstv -W" } */
+
+#include <stdlib.h>
+#include <setjmp.h>
+
+jmp_buf buf;
+
+long long *target_p;
+long long *c;
+
+int count;
+
+__attribute__ ((noclone, noinline))
+void
+foo (long long x)
+{
+ if (x != *c)
+ abort ();
+ if (!count)
+ {
+ count++;
+ longjmp (buf, 1);
+ }
+}
+
+__attribute__ ((noclone, noinline))
+void
+bar (void)
+{
+ if (setjmp (buf))
+ {
+ long long target = *target_p;
+ *c = target;
+ foo (target);
+ }
+ else
+ foo (0);
+}
+
+int
+main ()
+{
+ long long val = 30;
+ long long local = 0;
+ target_p = &val;
+ c = &local;
+ bar ();
+ if (val != local)
+ abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr95021-3.c
b/gcc/testsuite/gcc.target/i386/pr95021-3.c
new file mode 100644
index 00000000000..b213b5db072
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95021-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */
+/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */
+
+typedef long long a;
+struct __jmp_buf_tag {
+};
+typedef struct __jmp_buf_tag sigjmp_buf[1];
+sigjmp_buf jmp_buf;
+int __sigsetjmp (sigjmp_buf, int);
+typedef struct {
+ a target;
+} b;
+extern a *target_p;
+b *c;
+extern void foo (a);
+void
+d(void)
+{
+ if (__sigsetjmp(jmp_buf, 1)) {
+ a target = *target_p;
+ c->target = target;
+ foo(target);
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr95021-4.c
b/gcc/testsuite/gcc.target/i386/pr95021-4.c
new file mode 100644
index 00000000000..a080d7b289f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95021-4.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse2 -mstv -W" } */
+/* { dg-final { scan-assembler "movdqa\[ \t\]+\[^\n\]*, %xmm" } } */
+
+typedef __int128 a;
+struct __jmp_buf_tag {
+};
+typedef struct __jmp_buf_tag sigjmp_buf[1];
+sigjmp_buf jmp_buf;
+int __sigsetjmp (sigjmp_buf, int);
+typedef struct {
+ a target;
+} b;
+extern a *target_p;
+b *c;
+extern void foo (int, int, int, int, int, int, a);
+void
+d(void)
+{
+ if (__sigsetjmp(jmp_buf, 1)) {
+ a target = *target_p;
+ c->target = target;
+ foo(1, 2, 3, 4, 5, 6, target);
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr95021-5.c
b/gcc/testsuite/gcc.target/i386/pr95021-5.c
new file mode 100644
index 00000000000..3fcf2c3922e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95021-5.c
@@ -0,0 +1,59 @@
+/* { dg-do run { target int128 } } */
+/* { dg-require-effective-target sse2_runtime } */
+/* { dg-options "-O2 -msse2 -mstv -W" } */
+
+#include <stdlib.h>
+#include <setjmp.h>
+
+jmp_buf buf;
+
+__int128 *target_p;
+__int128 *c;
+
+int count;
+
+__attribute__ ((noclone, noinline))
+void
+foo (__int128 i1, __int128 i2, __int128 i3, __int128 x)
+{
+ if (i1 != 0xbadbeef1)
+ abort ();
+ if (i2 != 0x2badbeef)
+ abort ();
+ if (i3 != 0xbad3beef)
+ abort ();
+ if (x != *c)
+ abort ();
+ if (!count)
+ {
+ count++;
+ longjmp (buf, 1);
+ }
+}
+
+__attribute__ ((noclone, noinline))
+void
+bar (void)
+{
+ if (setjmp (buf))
+ {
+ __int128 target = *target_p;
+ *c = target;
+ foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, target);
+ }
+ else
+ foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, 0);
+}
+
+int
+main ()
+{
+ __int128 val = 30;
+ __int128 local = 0;
+ target_p = &val;
+ c = &local;
+ bar ();
+ if (val != local)
+ abort ();
+ return 0;
+}
--
2.26.2