On Sun, May 17, 2020 at 9:07 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Sat, May 16, 2020 at 8:13 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Fri, May 15, 2020 at 11:21:30AM +0200, Uros Bizjak wrote: > > > On Wed, May 13, 2020 at 5:58 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > > > > > The question is, why STV pass creates its funny sequence? The > > > > > > > original > > > > > > > sequence should be easily solved by storing DImode from XMM > > > > > > > register > > > > > > > and (with patched gcc) pushing DImode value from the same XMM > > > > > > > register. > > > > > > > > > > > > STV pass is mostly OK since it has to use XMM to move upper and > > > > > > lower > > > > > > 32 bits of a 64-bit integer. The problem is that push XMM becomes > > > > > > very > > > > > > expensive later. > > > > > > > > > > As shown in the patch, XMM push should be just decrement of SP reg and > > > > > move to the created stack slot. > > > > > > > > OK for master if there are no regressions? > > > > > > diff --git a/gcc/config/i386/i386-features.c > > > b/gcc/config/i386/i386-features.c > > > index 78fb373db6e..6cad125fa83 100644 > > > --- a/gcc/config/i386/i386-features.c > > > +++ b/gcc/config/i386/i386-features.c > > > @@ -1264,7 +1264,8 @@ has_non_address_hard_reg (rtx_insn *insn) > > > FOR_EACH_INSN_DEF (ref, insn) > > > if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) > > > && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) > > > - && DF_REF_REGNO (ref) != FLAGS_REG) > > > + && DF_REF_REGNO (ref) != FLAGS_REG > > > + && DF_REF_REGNO (ref) != SP_REG) > > > return true; > > > > > > I don't think this part is correct. The function comment says: > > > > > > /* Return 1 if INSN uses or defines a hard register. > > > Hard register uses in a memory address are ignored. > > > Clobbers and flags definitions are ignored. */ > > > > > > and you are putting SP_REG into clobber part. > > > > > > OTOH, SP_REG in: > > > > > > (insn 28 27 29 3 (set (mem:DI (pre_dec:SI (reg/f:SI 7 sp)) [2 S8 A64]) > > > (reg/v:DI 85 [ target ])) "x.i":19:5 40 {*pushdi2} > > > > > > is inside memory, so REG_SP should already be ignored. Please > > > investigate, why it is not the case. > > > > Push isn't a simple memory access since it also updates stack pointer. > > Fixed to handle pseudo register push. > > > > > > > > +(define_insn "*pushv1ti2" > > > + [(set (match_operand:V1TI 0 "push_operand" "=<") > > > + (match_operand:V1TI 1 "general_no_elim_operand" "v"))] > > > + "" > > > + "#" > > > + [(set_attr "type" "multi") > > > + (set_attr "mode" "TI")]) > > > + > > > +(define_split > > > + [(set (match_operand:V1TI 0 "push_operand" "") > > > + (match_operand:V1TI 1 "sse_reg_operand" ""))] > > > + "TARGET_64BIT && reload_completed" > > > + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) > > > + (set (match_dup 0) (match_dup 1))] > > > +{ > > > + operands[2] = GEN_INT (-PUSH_ROUNDING (16)); > > > + /* Preserve memory attributes. */ > > > + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); > > > +}) > > > > > > The above part is wrong on many levels, e.g. using wrong predicate, > > > unnecessary rounding, it should be defined as define_insn_and_split, > > > conditionalized on TARGET_64BIT && TARGET_STV and put nearby existing > > > pushti patterns. > > > > Fixed. > > > > > > > > I will implement push changes (modulo V1T1mode) by myself, since they > > > are independent of STV stuff. > > > > > > > Here is the updated patch. Tested on Linux/x86 and Linux/x86-64. OK > > for master? > > > > Thanks. > > > > H.J. > > --- > > Add V1TI vector register push and split it after reload to a sequence > > of: > > > > (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8))) > > (set (match_dup 0) (match_dup 1)) > > > > so that STV pass can convert TI mode integer push to V1TI vector register > > push. Rename has_non_address_hard_reg to pseudo_reg_set, combine calls > > of single_set and has_non_address_hard_reg to pseudo_reg_set, to ignore > > pseudo register push. > > > > Remove c-c++-common/dfp/func-vararg-mixed-2.c since it is compiled with > > -mpreferred-stack-boundary=2 and leads to segfault: > > > > Dump of assembler code for function __bid_nesd2: > > 0x08049210 <+0>: endbr32 > > 0x08049214 <+4>: push %esi > > 0x08049215 <+5>: push %ebx > > 0x08049216 <+6>: call 0x8049130 <__x86.get_pc_thunk.bx> > > 0x0804921b <+11>: add $0x8de5,%ebx > > 0x08049221 <+17>: sub $0x20,%esp > > 0x08049224 <+20>: mov 0x30(%esp),%esi > > 0x08049228 <+24>: pushl 0x2c(%esp) > > 0x0804922c <+28>: call 0x804e600 <__bid32_to_bid64> > > 0x08049231 <+33>: mov %esi,(%esp) > > 0x08049234 <+36>: movd %edx,%xmm1 > > 0x08049238 <+40>: movd %eax,%xmm0 > > 0x0804923c <+44>: punpckldq %xmm1,%xmm0 > > => 0x08049240 <+48>: movaps %xmm0,0x10(%esp) > > 0x08049245 <+53>: call 0x804e600 <__bid32_to_bid64> > > 0x0804924a <+58>: push %edx > > 0x0804924b <+59>: push %eax > > 0x0804924c <+60>: pushl 0x1c(%esp) > > 0x08049250 <+64>: pushl 0x1c(%esp) > > 0x08049254 <+68>: call 0x804b260 <__bid64_quiet_not_equal> > > 0x08049259 <+73>: add $0x34,%esp > > 0x0804925c <+76>: pop %ebx > > 0x0804925d <+77>: pop %esi > > 0x0804925e <+78>: ret > > > > when libgcc is compiled with -msse2. According to GCC manual: > > > > '-mpreferred-stack-boundary=NUM' > > Attempt to keep the stack boundary aligned to a 2 raised to NUM > > byte boundary. If '-mpreferred-stack-boundary' is not specified, > > the default is 4 (16 bytes or 128-bits). > > > > *Warning:* If you use this switch, then you must build all modules > > with the same value, including any libraries. This includes the > > system libraries and startup modules. > > > > c-c++-common/dfp/func-vararg-mixed-2.c, which was added by > > > > commit 3b2488ca6ece182f2136a20ee5fa0bb92f935b0f > > Author: H.J. Lu <hongjiu...@intel.com> > > Date: Wed Jul 30 19:24:02 2008 +0000 > > > > func-vararg-alternate-d128-2.c: New. > > > > 2008-07-30 H.J. Lu <hongjiu...@intel.com> > > Joey Ye <joey...@intel.com> > > > > * gcc.dg/dfp/func-vararg-alternate-d128-2.c: New. > > * gcc.dg/dfp/func-vararg-mixed-2.c: Likewise. > > > > isn't expected to work with libgcc. > > > > gcc/ > > > > PR target/95021 > > * config/i386/i386-features.c (has_non_address_hard_reg): > > Renamed to ... > > (pseudo_reg_set): This. Return the SET expression. Ignore > > pseudo register push. > > (general_scalar_to_vector_candidate_p): Combine single_set and > > has_non_address_hard_reg calls to pseudo_reg_set. > > (timode_scalar_to_vector_candidate_p): Likewise. > > * config/i386/i386.md (*pushv1ti2): New pattern. > > > > gcc/testsuite/ > > > > PR target/95021 > > * c-c++-common/dfp/func-vararg-mixed-2.c: Removed. > > * gcc.target/i386/pr95021-1.c: New test. > > * gcc.target/i386/pr95021-2.c: Likewise. > > * gcc.target/i386/pr95021-3.c: Likewise. > > * gcc.target/i386/pr95021-4.c: Likewise. > > * gcc.target/i386/pr95021-5.c: Likewise. > > OK with a small nit in i386.md and testcase changes, as explained inline. > > Thanks, > Uros. > > > --- > > gcc/config/i386/i386-features.c | 37 +++--- > > gcc/config/i386/i386.md | 16 +++ > > .../c-c++-common/dfp/func-vararg-mixed-2.c | 105 ------------------ > > gcc/testsuite/gcc.target/i386/pr95021-1.c | 25 +++++ > > gcc/testsuite/gcc.target/i386/pr95021-2.c | 53 +++++++++ > > gcc/testsuite/gcc.target/i386/pr95021-3.c | 25 +++++ > > gcc/testsuite/gcc.target/i386/pr95021-4.c | 25 +++++ > > gcc/testsuite/gcc.target/i386/pr95021-5.c | 59 ++++++++++ > > 8 files changed, 224 insertions(+), 121 deletions(-) > > delete mode 100644 gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-3.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-4.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-5.c > > > > diff --git a/gcc/config/i386/i386-features.c > > b/gcc/config/i386/i386-features.c > > index 78fb373db6e..b9b764c092a 100644 > > --- a/gcc/config/i386/i386-features.c > > +++ b/gcc/config/i386/i386-features.c > > @@ -1253,25 +1253,36 @@ scalar_chain::convert () > > return converted_insns; > > } > > > > -/* Return 1 if INSN uses or defines a hard register. > > - Hard register uses in a memory address are ignored. > > - Clobbers and flags definitions are ignored. */ > > +/* Return the SET expression if INSN doesn't reference hard register. > > + Return NULL if INSN uses or defines a hard register, excluding > > + pseudo register pushes, hard register uses in a memory address, > > + clobbers and flags definitions. */ > > > > -static bool > > -has_non_address_hard_reg (rtx_insn *insn) > > +static rtx > > +pseudo_reg_set (rtx_insn *insn) > > { > > + rtx set = single_set (insn); > > + if (!set) > > + return NULL; > > + > > + /* Check pseudo register push first. */ > > + if (REG_P (SET_SRC (set)) > > + && !HARD_REGISTER_P (SET_SRC (set)) > > + && push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))) > > + return set; > > + > > df_ref ref; > > FOR_EACH_INSN_DEF (ref, insn) > > if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) > > && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) > > && DF_REF_REGNO (ref) != FLAGS_REG) > > - return true; > > + return NULL; > > > > FOR_EACH_INSN_USE (ref, insn) > > if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) > > - return true; > > + return NULL; > > > > - return false; > > + return set; > > } > > > > /* Check if comparison INSN may be transformed > > @@ -1345,14 +1356,11 @@ convertible_comparison_p (rtx_insn *insn, enum > > machine_mode mode) > > static bool > > general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode > > mode) > > { > > - rtx def_set = single_set (insn); > > + rtx def_set = pseudo_reg_set (insn); > > > > if (!def_set) > > return false; > > > > - if (has_non_address_hard_reg (insn)) > > - return false; > > - > > rtx src = SET_SRC (def_set); > > rtx dst = SET_DEST (def_set); > > > > @@ -1442,14 +1450,11 @@ general_scalar_to_vector_candidate_p (rtx_insn > > *insn, enum machine_mode mode) > > static bool > > timode_scalar_to_vector_candidate_p (rtx_insn *insn) > > { > > - rtx def_set = single_set (insn); > > + rtx def_set = pseudo_reg_set (insn); > > > > if (!def_set) > > return false; > > > > - if (has_non_address_hard_reg (insn)) > > - return false; > > - > > rtx src = SET_SRC (def_set); > > rtx dst = SET_DEST (def_set); > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index 1bf0c1a7f01..bcb44a3fb46 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -1700,6 +1700,22 @@ (define_insn "*pushdi2_rex64" > > [(set_attr "type" "push,multi,multi") > > (set_attr "mode" "DI")]) > > > > +(define_insn_and_split "*pushv1ti2" > > + [(set (match_operand:V1TI 0 "push_operand" "=<") > > + (match_operand:V1TI 1 "register_operand" "v"))] > > + "TARGET_64BIT && TARGET_STV" > > + "#" > > + "&& reload_completed" > > + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) > > + (set (match_dup 0) (match_dup 1))] > > +{ > > + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode))); > > + /* Preserve memory attributes. */ > > + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); > > +} > > + [(set_attr "type" "multi") > > + (set_attr "mode" "TI")]) > > Please put the above pattern just above DWI "*push<mode>2".
Fixed. > > + > > ;; Convert impossible pushes of immediate to existing instructions. > > ;; First try to get scratch register and go through it. In case this > > ;; fails, push sign extended lower part first and then overwrite > > diff --git a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c > > b/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c > > deleted file mode 100644 > > index 02cafb016d1..00000000000 > > --- a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c > > +++ /dev/null > > @@ -1,105 +0,0 @@ > > -/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ > > -/* { dg-options "-mpreferred-stack-boundary=2" } */ > > - > > -/* C99 6.5.2.2 Function calls. > > - Test passing varargs of the combination of decimal float types and > > - other types. */ > > - > > -#include <stdarg.h> > > -#include "dfp-dbg.h" > > - > > -/* Supposing the list of varying number of arguments is: > > - unsigned int, _Decimal128, double, _Decimal32, _Decimal64. */ > > - > > -static _Decimal32 > > -vararg_d32 (unsigned arg, ...) > > -{ > > - va_list ap; > > - _Decimal32 result; > > - > > - va_start (ap, arg); > > - > > - va_arg (ap, unsigned int); > > - va_arg (ap, _Decimal128); > > - va_arg (ap, double); > > - result = va_arg (ap, _Decimal32); > > - > > - va_end (ap); > > - return result; > > -} > > - > > -static _Decimal32 > > -vararg_d64 (unsigned arg, ...) > > -{ > > - va_list ap; > > - _Decimal64 result; > > - > > - va_start (ap, arg); > > - > > - va_arg (ap, unsigned int); > > - va_arg (ap, _Decimal128); > > - va_arg (ap, double); > > - va_arg (ap, _Decimal32); > > - result = va_arg (ap, _Decimal64); > > - > > - va_end (ap); > > - return result; > > -} > > - > > -static _Decimal128 > > -vararg_d128 (unsigned arg, ...) > > -{ > > - va_list ap; > > - _Decimal128 result; > > - > > - va_start (ap, arg); > > - > > - va_arg (ap, unsigned int); > > - result = va_arg (ap, _Decimal128); > > - > > - va_end (ap); > > - return result; > > -} > > - > > -static unsigned int > > -vararg_int (unsigned arg, ...) > > -{ > > - va_list ap; > > - unsigned int result; > > - > > - va_start (ap, arg); > > - > > - result = va_arg (ap, unsigned int); > > - > > - va_end (ap); > > - return result; > > -} > > - > > -static double > > -vararg_double (unsigned arg, ...) > > -{ > > - va_list ap; > > - float result; > > - > > - va_start (ap, arg); > > - > > - va_arg (ap, unsigned int); > > - va_arg (ap, _Decimal128); > > - result = va_arg (ap, double); > > - > > - va_end (ap); > > - return result; > > -} > > - > > - > > -int > > -main () > > -{ > > - if (vararg_d32 (3, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 3.0df) FAILURE > > - if (vararg_d64 (4, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 4.0dd) FAILURE > > - if (vararg_d128 (1, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 1.0dl) FAILURE > > - if (vararg_int (0, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 0) FAILURE > > - if (vararg_double (2, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 2.0) FAILURE > > - > > - FINISH > > -} > > diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c > > b/gcc/testsuite/gcc.target/i386/pr95021-1.c > > new file mode 100644 > > index 00000000000..218776240ee > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c > > @@ -0,0 +1,25 @@ > > +/* { dg-do compile { target ia32 } } */ > > +/* { dg-options "-O2 -msse2 -mstv -mregparm=0 -W" } * > > No need for -mregparm=0, it is the the default. Also, do we really need -W? Fixed. -W is added for [hjl@gnu-cfl-2 i386]$ gcc -m32 pr95021-1.c -msse2 -mstv -W -O2 -S -o /tmp/x.s pr95021-1.c: In function ‘bar’: pr95021-1.c:21:17: warning: variable ‘target’ might be clobbered by ‘longjmp’ or ‘vfork’ [-Wclobbered] 21 | long long target = *target_p; | ^~~~~~ [hjl@gnu-cfl-2 i386]$ > > +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */ > > Please rather scan for "movq %xmmX, (%esp)", this is what we are > looking for, the bad code has: > > movd %xmm1, 20(%esp) > pushl 20(%esp) Fixed. > Also, we can check that "psrlq $32, %xmm1" is not there, which means > that DImode value wasn't broken to SImode. Please improve this test in > all 32bit testcases. Fixed. I consolidated compile-time tests with run-time tests. > > + > > +typedef long long a; > > +struct __jmp_buf_tag { > > +}; > > +typedef struct __jmp_buf_tag sigjmp_buf[1]; > > +sigjmp_buf jmp_buf; > > +int __sigsetjmp (sigjmp_buf, int); > > +typedef struct { > > + a target; > > +} b; > > +extern a *target_p; > > +b *c; > > +extern void foo (a); > > +void > > +d(void) > > +{ > > + if (__sigsetjmp(jmp_buf, 1)) { > > + a target = *target_p; > > + c->target = target; > > + foo(target); > > + } > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c > > b/gcc/testsuite/gcc.target/i386/pr95021-2.c > > new file mode 100644 > > index 00000000000..473b5f81664 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c > > @@ -0,0 +1,53 @@ > > +/* { dg-do run { target ia32 } } */ > > +/* { dg-require-effective-target sse2_runtime } */ > > +/* { dg-options "-O2 -msse2 -mstv -W" } */ > > This is the same testcase as the one above. The default is -mregparm=0. I added this run-time to verify that the new *pushv1ti2 pattern works correctly. > > + > > + > > +#include <stdlib.h> > > +#include <setjmp.h> > > + > > +jmp_buf buf; > > + > > +long long *target_p; > > +long long *c; > > + > > +int count; > > + > > +__attribute__ ((noclone, noinline)) > > +void > > +foo (long long x) > > +{ > > + if (x != *c) > > + abort (); > > + if (!count) > > + { > > + count++; > > + longjmp (buf, 1); > > + } > > +} > > + > > +__attribute__ ((noclone, noinline)) > > +void > > +bar (void) > > +{ > > + if (setjmp (buf)) > > + { > > + long long target = *target_p; > > + *c = target; > > + foo (target); > > + } > > + else > > + foo (0); > > +} > > + > > +int > > +main () > > +{ > > + long long val = 30; > > + long long local = 0; > > + target_p = &val; > > + c = &local; > > + bar (); > > + if (val != local) > > + abort (); > > + return 0; > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr95021-3.c > > b/gcc/testsuite/gcc.target/i386/pr95021-3.c > > new file mode 100644 > > index 00000000000..b213b5db072 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr95021-3.c > > @@ -0,0 +1,25 @@ > > +/* { dg-do compile { target ia32 } } */ > > +/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */ > > +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */ > > Just #include "pr95021-1.c". No need to dupicate the same code. Fixed. > > +typedef long long a; > > +struct __jmp_buf_tag { > > +}; > > +typedef struct __jmp_buf_tag sigjmp_buf[1]; > > +sigjmp_buf jmp_buf; > > +int __sigsetjmp (sigjmp_buf, int); > > +typedef struct { > > + a target; > > +} b; > > +extern a *target_p; > > +b *c; > > +extern void foo (a); > > +void > > +d(void) > > +{ > > + if (__sigsetjmp(jmp_buf, 1)) { > > + a target = *target_p; > > + c->target = target; > > + foo(target); > > + }> +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr95021-4.c > > b/gcc/testsuite/gcc.target/i386/pr95021-4.c > > new file mode 100644 > > index 00000000000..a080d7b289f > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr95021-4.c > > @@ -0,0 +1,25 @@ > > +/* { dg-do compile { target int128 } } */ > > +/* { dg-options "-O2 -msse2 -mstv -W" } */ > > +/* { dg-final { scan-assembler "movdqa\[ \t\]+\[^\n\]*, %xmm" } } */ > > Please scan for "movaps %xmmX, (%rsp)" here. Fixed. > > + > > +typedef __int128 a; > > +struct __jmp_buf_tag { > > +}; > > +typedef struct __jmp_buf_tag sigjmp_buf[1]; > > +sigjmp_buf jmp_buf; > > +int __sigsetjmp (sigjmp_buf, int); > > +typedef struct { > > + a target; > > +} b; > > +extern a *target_p; > > +b *c; > > +extern void foo (int, int, int, int, int, int, a); > > +void > > +d(void) > > +{ > > + if (__sigsetjmp(jmp_buf, 1)) { > > + a target = *target_p; > > + c->target = target; > > + foo(1, 2, 3, 4, 5, 6, target); > > + } > > +} > > diff --git a/gcc/testsuite/gcc.target/i386/pr95021-5.c > > b/gcc/testsuite/gcc.target/i386/pr95021-5.c > > new file mode 100644 > > index 00000000000..3fcf2c3922e > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr95021-5.c > > @@ -0,0 +1,59 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-require-effective-target sse2_runtime } */ > > +/* { dg-options "-O2 -msse2 -mstv -W" } */ > > + > > +#include <stdlib.h> > > +#include <setjmp.h> > > + > > +jmp_buf buf; > > + > > +__int128 *target_p; > > +__int128 *c; > > + > > +int count; > > + > > +__attribute__ ((noclone, noinline)) > > +void > > +foo (__int128 i1, __int128 i2, __int128 i3, __int128 x) > > +{ > > + if (i1 != 0xbadbeef1) > > + abort (); > > + if (i2 != 0x2badbeef) > > + abort (); > > + if (i3 != 0xbad3beef) > > + abort (); > > + if (x != *c) > > + abort (); > > + if (!count) > > + { > > + count++; > > + longjmp (buf, 1); > > + } > > +} > > + > > +__attribute__ ((noclone, noinline)) > > +void > > +bar (void) > > +{ > > + if (setjmp (buf)) > > + { > > + __int128 target = *target_p; > > + *c = target; > > + foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, target); > > + } > > + else > > + foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, 0); > > +} > > + > > +int > > +main () > > +{ > > + __int128 val = 30; > > + __int128 local = 0; > > + target_p = &val; > > + c = &local; > > + bar (); > > + if (val != local) > > + abort (); > > + return 0; > > +} > > -- > > 2.26.2 > > This is the updated patch I am checking in. Thanks. -- H.J.
From 3d24d6f2a9bac6a348a731e6df80ce60995214a9 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Tue, 12 May 2020 11:30:29 -0700 Subject: [PATCH] x86: Allow V1TI vector register pushes Add V1TI vector register push and split it after reload to a sequence of: (set (reg:P SP_REG) (plus:P SP_REG) (const_int -8))) (set (match_dup 0) (match_dup 1)) so that STV pass can convert TI mode integer push to V1TI vector register push. Rename has_non_address_hard_reg to pseudo_reg_set, combine calls of single_set and has_non_address_hard_reg to pseudo_reg_set, to ignore pseudo register push. Remove c-c++-common/dfp/func-vararg-mixed-2.c since it is compiled with -mpreferred-stack-boundary=2 and leads to segfault: Dump of assembler code for function __bid_nesd2: 0x08049210 <+0>: endbr32 0x08049214 <+4>: push %esi 0x08049215 <+5>: push %ebx 0x08049216 <+6>: call 0x8049130 <__x86.get_pc_thunk.bx> 0x0804921b <+11>: add $0x8de5,%ebx 0x08049221 <+17>: sub $0x20,%esp 0x08049224 <+20>: mov 0x30(%esp),%esi 0x08049228 <+24>: pushl 0x2c(%esp) 0x0804922c <+28>: call 0x804e600 <__bid32_to_bid64> 0x08049231 <+33>: mov %esi,(%esp) 0x08049234 <+36>: movd %edx,%xmm1 0x08049238 <+40>: movd %eax,%xmm0 0x0804923c <+44>: punpckldq %xmm1,%xmm0 => 0x08049240 <+48>: movaps %xmm0,0x10(%esp) 0x08049245 <+53>: call 0x804e600 <__bid32_to_bid64> 0x0804924a <+58>: push %edx 0x0804924b <+59>: push %eax 0x0804924c <+60>: pushl 0x1c(%esp) 0x08049250 <+64>: pushl 0x1c(%esp) 0x08049254 <+68>: call 0x804b260 <__bid64_quiet_not_equal> 0x08049259 <+73>: add $0x34,%esp 0x0804925c <+76>: pop %ebx 0x0804925d <+77>: pop %esi 0x0804925e <+78>: ret when libgcc is compiled with -msse2. According to GCC manual: '-mpreferred-stack-boundary=NUM' Attempt to keep the stack boundary aligned to a 2 raised to NUM byte boundary. If '-mpreferred-stack-boundary' is not specified, the default is 4 (16 bytes or 128-bits). *Warning:* If you use this switch, then you must build all modules with the same value, including any libraries. This includes the system libraries and startup modules. c-c++-common/dfp/func-vararg-mixed-2.c, which was added by commit 3b2488ca6ece182f2136a20ee5fa0bb92f935b0f Author: H.J. Lu <hongjiu...@intel.com> Date: Wed Jul 30 19:24:02 2008 +0000 func-vararg-alternate-d128-2.c: New. 2008-07-30 H.J. Lu <hongjiu...@intel.com> Joey Ye <joey...@intel.com> * gcc.dg/dfp/func-vararg-alternate-d128-2.c: New. * gcc.dg/dfp/func-vararg-mixed-2.c: Likewise. isn't expected to work with libgcc. gcc/ PR target/95021 * config/i386/i386-features.c (has_non_address_hard_reg): Renamed to ... (pseudo_reg_set): This. Return the SET expression. Ignore pseudo register push. (general_scalar_to_vector_candidate_p): Combine single_set and has_non_address_hard_reg calls to pseudo_reg_set. (timode_scalar_to_vector_candidate_p): Likewise. * config/i386/i386.md (*pushv1ti2): New pattern. gcc/testsuite/ PR target/95021 * c-c++-common/dfp/func-vararg-mixed-2.c: Removed. * gcc.target/i386/pr95021-1.c: New test. * gcc.target/i386/pr95021-2.c: Likewise. * gcc.target/i386/pr95021-3.c: Likewise. * gcc.target/i386/pr95021-4.c: Likewise. * gcc.target/i386/pr95021-5.c: Likewise. --- gcc/config/i386/i386-features.c | 37 +++--- gcc/config/i386/i386.md | 16 +++ .../c-c++-common/dfp/func-vararg-mixed-2.c | 105 ------------------ gcc/testsuite/gcc.target/i386/pr95021-1.c | 27 +++++ gcc/testsuite/gcc.target/i386/pr95021-2.c | 39 +++++++ gcc/testsuite/gcc.target/i386/pr95021-3.c | 5 + gcc/testsuite/gcc.target/i386/pr95021-4.c | 28 +++++ gcc/testsuite/gcc.target/i386/pr95021-5.c | 45 ++++++++ 8 files changed, 181 insertions(+), 121 deletions(-) delete mode 100644 gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-5.c diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index 78fb373db6e..b9b764c092a 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -1253,25 +1253,36 @@ scalar_chain::convert () return converted_insns; } -/* Return 1 if INSN uses or defines a hard register. - Hard register uses in a memory address are ignored. - Clobbers and flags definitions are ignored. */ +/* Return the SET expression if INSN doesn't reference hard register. + Return NULL if INSN uses or defines a hard register, excluding + pseudo register pushes, hard register uses in a memory address, + clobbers and flags definitions. */ -static bool -has_non_address_hard_reg (rtx_insn *insn) +static rtx +pseudo_reg_set (rtx_insn *insn) { + rtx set = single_set (insn); + if (!set) + return NULL; + + /* Check pseudo register push first. */ + if (REG_P (SET_SRC (set)) + && !HARD_REGISTER_P (SET_SRC (set)) + && push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))) + return set; + df_ref ref; FOR_EACH_INSN_DEF (ref, insn) if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) && DF_REF_REGNO (ref) != FLAGS_REG) - return true; + return NULL; FOR_EACH_INSN_USE (ref, insn) if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) - return true; + return NULL; - return false; + return set; } /* Check if comparison INSN may be transformed @@ -1345,14 +1356,11 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) static bool general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) { - rtx def_set = single_set (insn); + rtx def_set = pseudo_reg_set (insn); if (!def_set) return false; - if (has_non_address_hard_reg (insn)) - return false; - rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); @@ -1442,14 +1450,11 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) static bool timode_scalar_to_vector_candidate_p (rtx_insn *insn) { - rtx def_set = single_set (insn); + rtx def_set = pseudo_reg_set (insn); if (!def_set) return false; - if (has_non_address_hard_reg (insn)) - return false; - rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1bf0c1a7f01..9fd32f28bf3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1674,6 +1674,22 @@ (define_insn "*cmpi<unord><MODEF:mode>" ;; Push/pop instructions. +(define_insn_and_split "*pushv1ti2" + [(set (match_operand:V1TI 0 "push_operand" "=<") + (match_operand:V1TI 1 "register_operand" "v"))] + "TARGET_64BIT && TARGET_STV" + "#" + "&& reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode))); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +} + [(set_attr "type" "multi") + (set_attr "mode" "TI")]) + (define_insn "*push<mode>2" [(set (match_operand:DWI 0 "push_operand" "=<,<") (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))] diff --git a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c b/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c deleted file mode 100644 index 02cafb016d1..00000000000 --- a/gcc/testsuite/c-c++-common/dfp/func-vararg-mixed-2.c +++ /dev/null @@ -1,105 +0,0 @@ -/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ -/* { dg-options "-mpreferred-stack-boundary=2" } */ - -/* C99 6.5.2.2 Function calls. - Test passing varargs of the combination of decimal float types and - other types. */ - -#include <stdarg.h> -#include "dfp-dbg.h" - -/* Supposing the list of varying number of arguments is: - unsigned int, _Decimal128, double, _Decimal32, _Decimal64. */ - -static _Decimal32 -vararg_d32 (unsigned arg, ...) -{ - va_list ap; - _Decimal32 result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - va_arg (ap, _Decimal128); - va_arg (ap, double); - result = va_arg (ap, _Decimal32); - - va_end (ap); - return result; -} - -static _Decimal32 -vararg_d64 (unsigned arg, ...) -{ - va_list ap; - _Decimal64 result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - va_arg (ap, _Decimal128); - va_arg (ap, double); - va_arg (ap, _Decimal32); - result = va_arg (ap, _Decimal64); - - va_end (ap); - return result; -} - -static _Decimal128 -vararg_d128 (unsigned arg, ...) -{ - va_list ap; - _Decimal128 result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - result = va_arg (ap, _Decimal128); - - va_end (ap); - return result; -} - -static unsigned int -vararg_int (unsigned arg, ...) -{ - va_list ap; - unsigned int result; - - va_start (ap, arg); - - result = va_arg (ap, unsigned int); - - va_end (ap); - return result; -} - -static double -vararg_double (unsigned arg, ...) -{ - va_list ap; - float result; - - va_start (ap, arg); - - va_arg (ap, unsigned int); - va_arg (ap, _Decimal128); - result = va_arg (ap, double); - - va_end (ap); - return result; -} - - -int -main () -{ - if (vararg_d32 (3, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 3.0df) FAILURE - if (vararg_d64 (4, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 4.0dd) FAILURE - if (vararg_d128 (1, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 1.0dl) FAILURE - if (vararg_int (0, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 0) FAILURE - if (vararg_double (2, 0, 1.0dl, 2.0, 3.0df, 4.0dd) != 2.0) FAILURE - - FINISH -} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c b/gcc/testsuite/gcc.target/i386/pr95021-1.c new file mode 100644 index 00000000000..a0b9a262a87 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ +/* { dg-final { scan-assembler "movq\[ \t\]%xmm\[0-9\]+, \\(%esp\\)" } } */ +/* { dg-final { scan-assembler-not "psrlq" } } */ + +#include <setjmp.h> + +extern jmp_buf buf; + +extern long long *target_p; +extern long long *c; + +extern void foo (long long); + +__attribute__ ((noclone, noinline)) +void +bar (void) +{ + if (setjmp (buf)) + { + long long target = *target_p; + *c = target; + foo (target); + } + else + foo (0); +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c b/gcc/testsuite/gcc.target/i386/pr95021-2.c new file mode 100644 index 00000000000..53247e52784 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c @@ -0,0 +1,39 @@ +/* { dg-do run { target ia32 } } */ +/* { dg-require-effective-target sse2_runtime } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ + +#include <stdlib.h> +#include "pr95021-1.c" + +jmp_buf buf; + +long long *target_p; +long long *c; + +int count; + +__attribute__ ((noclone, noinline)) +void +foo (long long x) +{ + if (x != *c) + abort (); + if (!count) + { + count++; + longjmp (buf, 1); + } +} + +int +main () +{ + long long val = 30; + long long local = 0; + target_p = &val; + c = &local; + bar (); + if (val != local) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-3.c b/gcc/testsuite/gcc.target/i386/pr95021-3.c new file mode 100644 index 00000000000..1748161a77c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-3.c @@ -0,0 +1,5 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */ +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */ + +#include "pr95021-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr95021-4.c b/gcc/testsuite/gcc.target/i386/pr95021-4.c new file mode 100644 index 00000000000..d5bb28cc01a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-4.c @@ -0,0 +1,28 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ +/* { dg-final { scan-assembler "(movaps|vmovdqa)\[ \t\]%xmm\[0-9\]+, \\(%rsp\\)" } } */ + +#include <setjmp.h> + +extern jmp_buf buf; + +extern __int128 *target_p; +__int128 *c; + +extern int count; + +extern void foo (__int128, __int128, __int128, __int128); + +__attribute__ ((noclone, noinline)) +void +bar (void) +{ + if (setjmp (buf)) + { + __int128 target = *target_p; + *c = target; + foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, target); + } + else + foo (0xbadbeef1, 0x2badbeef, 0xbad3beef, 0); +} diff --git a/gcc/testsuite/gcc.target/i386/pr95021-5.c b/gcc/testsuite/gcc.target/i386/pr95021-5.c new file mode 100644 index 00000000000..d8658095bc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95021-5.c @@ -0,0 +1,45 @@ +/* { dg-do run { target int128 } } */ +/* { dg-require-effective-target sse2_runtime } */ +/* { dg-options "-O2 -msse2 -mstv -W" } */ + +#include <stdlib.h> +#include "pr95021-4.c" + +jmp_buf buf; + +__int128 *target_p; +__int128 *c; + +int count; + +__attribute__ ((noclone, noinline)) +void +foo (__int128 i1, __int128 i2, __int128 i3, __int128 x) +{ + if (i1 != 0xbadbeef1) + abort (); + if (i2 != 0x2badbeef) + abort (); + if (i3 != 0xbad3beef) + abort (); + if (x != *c) + abort (); + if (!count) + { + count++; + longjmp (buf, 1); + } +} + +int +main () +{ + __int128 val = 30; + __int128 local = 0; + target_p = &val; + c = &local; + bar (); + if (val != local) + abort (); + return 0; +} -- 2.26.2