Re: [PATCH v3] i386: Check AX input in any_mul_highpart peepholes

2021-12-26 Thread Uros Bizjak via Gcc-patches
On Sat, Dec 25, 2021 at 3:28 PM H.J. Lu  wrote:
>
> When applying peephole optimization to transform
>
> mov imm, %reg0
> mov %reg1, %AX_REG
> imul %reg0
>
> to
>
> mov imm, %AX_REG
> imul %reg1
>
> disable peephole optimization if reg1 == AX_REG.
>
> gcc/
>
> PR bootstrap/103785
> * config/i386/i386.md: Swap operand order in comments and check
> AX input in any_mul_highpart peepholes.
>
> gcc/testsuite/
>
> PR bootstrap/103785
> * gcc.target/i386/pr103785.c: New test.
> ---
>  gcc/config/i386/i386.md  |  9 --
>  gcc/testsuite/gcc.target/i386/pr103785.c | 38 
>  2 files changed, 44 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103785.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 284b9507466..1c1ec227f71 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -8578,7 +8578,7 @@
> (set_attr "mode" "SI")])
>
>  ;; Highpart multiplication peephole2s to tweak register allocation.
> -;; mov %rdx,imm; mov %rax,%rdi; imulq %rdx  ->  mov %rax,imm; imulq %rdi
> +;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx  ->  mov imm,%rax; imulq %rdi
>  (define_peephole2
>[(set (match_operand:SWI48 0 "general_reg_operand")
> (match_operand:SWI48 1 "immediate_operand"))
> @@ -8588,7 +8588,8 @@
>(any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
>   (clobber (match_dup 2))
>   (clobber (reg:CC FLAGS_REG))])]
> -  "REGNO (operands[0]) != REGNO (operands[2])
> +  "REGNO (operands[3]) != AX_REG
> +   && REGNO (operands[0]) != REGNO (operands[2])
> && REGNO (operands[0]) != REGNO (operands[3])
> && (REGNO (operands[0]) == REGNO (operands[4])
> || peep2_reg_dead_p (3, operands[0]))"
> @@ -8608,7 +8609,9 @@
>  (any_mul_highpart:SI (match_dup 2) (match_dup 0
>   (clobber (match_dup 2))
>   (clobber (reg:CC FLAGS_REG))])]
> -  "REGNO (operands[0]) != REGNO (operands[2])
> +  "REGNO (operands[3]) != AX_REG

Please also add TARGET_64BIT here, this DImode zero-extend pattern
applies to 64bit targets only.

OK with the above change.

Thanks,
Uros.

> +   && REGNO (operands[0]) != REGNO (operands[2])
> +   && REGNO (operands[2]) != REGNO (operands[3])
> && REGNO (operands[0]) != REGNO (operands[3])
> && (REGNO (operands[0]) == REGNO (operands[4])
> || peep2_reg_dead_p (3, operands[0]))"
> diff --git a/gcc/testsuite/gcc.target/i386/pr103785.c 
> b/gcc/testsuite/gcc.target/i386/pr103785.c
> new file mode 100644
> index 000..5503b965256
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr103785.c
> @@ -0,0 +1,38 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include 
> +
> +struct wrapper_t
> +{
> +  long k;
> +  long e;
> +};
> +
> +struct wrapper_t **table;
> +
> +__attribute__ ((weak, regparm (2)))
> +void
> +update (long k, long e)
> +{
> +  struct wrapper_t *elmt;
> +
> +  elmt = table[k % 3079];
> +  if (elmt == 0)
> +return;
> +  elmt->e = e;
> +}
> +
> +int
> +main ()
> +{
> +  table = (struct wrapper_t **) malloc (20 * sizeof (struct wrapper_t *));
> +  for (int i = 0; i < 20; i++)
> +table[i] = (struct wrapper_t *) calloc (sizeof (struct wrapper_t), 1);
> +  if (table[10]->e != 0)
> +abort ();
> +  update (10, 20);
> +  if (table[10]->e != 20)
> +abort ();
> +  return 0;
> +}
> --
> 2.33.1
>


Re: [PATCH v3] i386: Check AX input in any_mul_highpart peepholes

2021-12-26 Thread H.J. Lu via Gcc-patches
On Sun, Dec 26, 2021 at 2:31 AM Uros Bizjak  wrote:
>
> On Sat, Dec 25, 2021 at 3:28 PM H.J. Lu  wrote:
> >
> > When applying peephole optimization to transform
> >
> > mov imm, %reg0
> > mov %reg1, %AX_REG
> > imul %reg0
> >
> > to
> >
> > mov imm, %AX_REG
> > imul %reg1
> >
> > disable peephole optimization if reg1 == AX_REG.
> >
> > gcc/
> >
> > PR bootstrap/103785
> > * config/i386/i386.md: Swap operand order in comments and check
> > AX input in any_mul_highpart peepholes.
> >
> > gcc/testsuite/
> >
> > PR bootstrap/103785
> > * gcc.target/i386/pr103785.c: New test.
> > ---
> >  gcc/config/i386/i386.md  |  9 --
> >  gcc/testsuite/gcc.target/i386/pr103785.c | 38 
> >  2 files changed, 44 insertions(+), 3 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr103785.c
> >
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index 284b9507466..1c1ec227f71 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -8578,7 +8578,7 @@
> > (set_attr "mode" "SI")])
> >
> >  ;; Highpart multiplication peephole2s to tweak register allocation.
> > -;; mov %rdx,imm; mov %rax,%rdi; imulq %rdx  ->  mov %rax,imm; imulq %rdi
> > +;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx  ->  mov imm,%rax; imulq %rdi
> >  (define_peephole2
> >[(set (match_operand:SWI48 0 "general_reg_operand")
> > (match_operand:SWI48 1 "immediate_operand"))
> > @@ -8588,7 +8588,8 @@
> >(any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
> >   (clobber (match_dup 2))
> >   (clobber (reg:CC FLAGS_REG))])]
> > -  "REGNO (operands[0]) != REGNO (operands[2])
> > +  "REGNO (operands[3]) != AX_REG
> > +   && REGNO (operands[0]) != REGNO (operands[2])
> > && REGNO (operands[0]) != REGNO (operands[3])
> > && (REGNO (operands[0]) == REGNO (operands[4])
> > || peep2_reg_dead_p (3, operands[0]))"
> > @@ -8608,7 +8609,9 @@
> >  (any_mul_highpart:SI (match_dup 2) (match_dup 0
> >   (clobber (match_dup 2))
> >   (clobber (reg:CC FLAGS_REG))])]
> > -  "REGNO (operands[0]) != REGNO (operands[2])
> > +  "REGNO (operands[3]) != AX_REG
>
> Please also add TARGET_64BIT here, this DImode zero-extend pattern
> applies to 64bit targets only.
>
> OK with the above change.

This is the patch I am checking in.

Thanks.

> Thanks,
> Uros.
>
> > +   && REGNO (operands[0]) != REGNO (operands[2])
> > +   && REGNO (operands[2]) != REGNO (operands[3])
> > && REGNO (operands[0]) != REGNO (operands[3])
> > && (REGNO (operands[0]) == REGNO (operands[4])
> > || peep2_reg_dead_p (3, operands[0]))"
> > diff --git a/gcc/testsuite/gcc.target/i386/pr103785.c 
> > b/gcc/testsuite/gcc.target/i386/pr103785.c
> > new file mode 100644
> > index 000..5503b965256
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr103785.c
> > @@ -0,0 +1,38 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2" } */
> > +
> > +#include 
> > +
> > +struct wrapper_t
> > +{
> > +  long k;
> > +  long e;
> > +};
> > +
> > +struct wrapper_t **table;
> > +
> > +__attribute__ ((weak, regparm (2)))
> > +void
> > +update (long k, long e)
> > +{
> > +  struct wrapper_t *elmt;
> > +
> > +  elmt = table[k % 3079];
> > +  if (elmt == 0)
> > +return;
> > +  elmt->e = e;
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  table = (struct wrapper_t **) malloc (20 * sizeof (struct wrapper_t *));
> > +  for (int i = 0; i < 20; i++)
> > +table[i] = (struct wrapper_t *) calloc (sizeof (struct wrapper_t), 1);
> > +  if (table[10]->e != 0)
> > +abort ();
> > +  update (10, 20);
> > +  if (table[10]->e != 20)
> > +abort ();
> > +  return 0;
> > +}
> > --
> > 2.33.1
> >



-- 
H.J.
From 53300b05644c9cc0ed4067d1128118e98a3584d8 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Fri, 24 Dec 2021 12:50:21 -0800
Subject: [PATCH] i386: Check AX input in any_mul_highpart peepholes

When applying peephole optimization to transform

	mov imm, %reg0
	mov %reg1, %AX_REG
	imul %reg0

to

	mov imm, %AX_REG
	imul %reg1

disable peephole optimization if reg1 == AX_REG.

gcc/

	PR target/103785
	* config/i386/i386.md: Swap operand order in comments and check
	AX input in any_mul_highpart peepholes.

gcc/testsuite/

	PR target/103785
	* gcc.target/i386/pr103785.c: New test.
---
 gcc/config/i386/i386.md  | 10 +--
 gcc/testsuite/gcc.target/i386/pr103785.c | 38 
 2 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103785.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 284b9507466..eea5d5d3f01 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8578,7 +8578,7 @@
(set_attr "mode" "SI")])
 
 ;; Highpart multiplication peephole2s to tweak register allocation.
-;; mov %rdx,imm; mov

Re: [PATCH] Make integer output faster in libgfortran

2021-12-26 Thread FX via Gcc-patches
Hi,

> I tested this on x86_64-pc-linux-gnu with
> make -k -j8 check-fortran RUNTESTFLAGS="--target_board=unix'{-m32,-m64}'"
> and didn't see any problems.

Thanks Thomas! Pushed.


> (We could also do something like that for a 32-bit system, but
> that is another kettle of fish).

We probably wouldn’t get a speed-up that big. Even on 32-bit targets (at least 
common ones), the 64-bit type and its operations (notably division) are 
implemented via CPU instructions, not library calls.

At this point, the output of integers is probably bound by the many layers of 
indirection of libgfortran's I/O system (which are necessary because of the 
rich I/O formatting allowed by the standard).

Best,
FX

Re: [PATCH] Make integer output faster in libgfortran

2021-12-26 Thread Thomas Koenig via Gcc-patches

Hi FX,



(We could also do something like that for a 32-bit system, but
that is another kettle of fish).


We probably wouldn’t get a speed-up that big. Even on 32-bit targets
(at least common ones), the 64-bit type and its operations (notably
division) are implemented via CPU instructions, not library calls.


I'll look at this a bit more closely and report :-)


 At this point, the output of integers is probably bound by the
many layers of indirection of libgfortran's I/O system (which
are necessary because of the rich I/O formatting allowed by
the standard).


There are a few things we could do.  Getting a LTO-capable version
of libgfortran would be a huge step, because the compiler could
then strip out all of these layers.  The speed of

  character:: c


  write (*,'(A)', advance="no") c

could stand some improvement :-)

Regards

Thomas


Re: [PATCH v2] ix86: Don't use the 'm' constraint for x86_64_general_operand

2021-12-26 Thread H.J. Lu via Gcc-patches
On Fri, Dec 24, 2021 at 12:19 AM Uros Bizjak  wrote:
>
> On Thu, Dec 23, 2021 at 3:42 PM H.J. Lu  wrote:
> >
> > On Mon, Dec 20, 2021 at 2:22 PM H.J. Lu  wrote:
> > >
> > > On Mon, Dec 20, 2021 at 12:38 PM Jakub Jelinek  wrote:
> > > >
> > > > On Mon, Dec 20, 2021 at 11:44:08AM -0800, H.J. Lu wrote:
> > > > > The problem is in
> > > > >
> > > > > (define_memory_constraint "TARGET_MEM_CONSTRAINT"
> > > > >   "Matches any valid memory."
> > > > >   (and (match_code "mem")
> > > > >(match_test "memory_address_addr_space_p (GET_MODE (op), XEXP 
> > > > > (op, 0),
> > > > >  MEM_ADDR_SPACE 
> > > > > (op))")))
> > > > >
> > > > > define_register_constraint allows LRA to convert the operand to the 
> > > > > form
> > > > > '(mem (reg X))', where X is a base register.  I am testing the v2 
> > > > > patch with
> > > >
> > > > If you mean replacing an immediate with a MEM containing that immediate,
> > > > isn't that often the right thing though?
> > > > I mean, if the register pressure is high and options are either spill 
> > > > some
> > > > register, set it to immediate, use it in one instruction and then fill 
> > > > the
> > > > spilled register (i.e. 2 memory loads), compared to a MEM use on the
> > > > arithmetic instruction one MEM seems cheaper to me.  With -fPIC and the
> > > > cst needing runtime relocation slightly less so of course.
> > > >
> > >
> > > We will check the performance impact on SPEC CPU 2017.
> > > Here is the v2 patch.  Liwei,  can you help collect SPEC CPU 2017
> > > impact of the enclosed patch?  Thanks.
> >
> > We checked SPEC CPU 2017 performance with -O2 and -Ofast.
> > There is no performance regression.   OK for master?
>
> OK if there are no further comments from Jakub.

I will check it tomorrow if there are further comments from Jakub.

Thanks.

> Thanks,
> Uros.
>
> > > > The code due to ivopts is trying to have something like
> > > >   size_t a = (size_t) &tunable_list;
> > > >   size_t b = 0xffa8 - a;
> > > >   size_t c = x + b;
> > > > and for that cst - &symbol one needs actually 2 registers, one to hold 
> > > > the
> > > > constant and one to hold the (%rip) based address.
> > > > (insn 790 789 791 111 (set (reg:DI 292)
> > > > (const_int -88 [0xffa8])) "dl-tunables.c":304:15 76 
> > > > {*movdi_internal}
> > > >  (nil))
> > > > (insn 791 790 792 111 (set (reg:DI 293)
> > > > (symbol_ref:DI ("tunable_list") [flags 0x2]   > > > 0x7f3460aa9cf0 tunable_list>)) "dl-tunables.c":304:15 76 
> > > > {*movdi_internal}
> > > >  (nil))
> > > > (insn 792 791 793 111 (parallel [
> > > > (set (reg:DI 291)
> > > > (minus:DI (reg:DI 292)
> > > > (reg:DI 293)))
> > > > (clobber (reg:CC 17 flags))
> > > > ]) "dl-tunables.c":304:15 299 {*subdi_1}
> > > >  (nil))
> > > > (insn 793 792 794 111 (parallel [
> > > > (set (reg:DI 294)
> > > > (plus:DI (reg:DI 291)
> > > > (reg:DI 198 [ ivtmp.176 ])))
> > > > (clobber (reg:CC 17 flags))
> > > > ]) "dl-tunables.c":304:15 226 {*adddi_1}
> > > >  (nil))
> > > > It would be smarter to rewrite the above into a lea 
> > > > 88+tunable_list(%rip), %temp1
> > > > and use a subtraction instead of addition in the last insn above, or of
> > > > course in the particular case even consider the following 2 instructions
> > > > that do:
> > > > (insn 794 793 795 111 (set (reg:DI 296)
> > > > (symbol_ref:DI ("tunable_list") [flags 0x2]   > > > 0x7f3460aa9cf0 tunable_list>)) "dl-tunables.c":304:15 76 
> > > > {*movdi_internal}
> > > >  (nil))
> > > > (insn 795 794 796 111 (parallel [
> > > > (set (reg:DI 295 [ cur ])
> > > > (plus:DI (reg:DI 294)
> > > > (reg:DI 296)))
> > > > (clobber (reg:CC 17 flags))
> > > > ]) "dl-tunables.c":304:15 226 {*adddi_1}
> > > >  (nil))
> > > > and find out that &tuneble_list - &tuneble_list is 0 and we don't need 
> > > > it at
> > > > all.  Guess we don't figure that out due to the cast of one of those
> > > > addresses to size_t and the other one used in POINTER_PLUS_EXPR as 
> > > > normal
> > > > pointer.
> > > >
> > > > Jakub
> > > >
> > >
> > >
> > > --
> > > H.J.
> >
> > Thanks.
> >
> > --
> > H.J.



-- 
H.J.


[PATCH] Emit correct types for CHARACTER(C_CHAR), VALUE

2021-12-26 Thread FX via Gcc-patches
Hi,

I’ve spent many hours going down a rabbit hole, namely that we emit incorrect 
types for C interoperable procedure parameters of type CHARACTER(C_CHAR), 
VALUE. The full details can be found here: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103828 I will try here to be as 
brief as I can.

This bug manifests on the new target aarche64-apple-darwin, and there only if a 
CHARACTER(C_CHAR), VALUE argument is the 11th (or greater) argument to the 
function. Then, we would pass it as a 32-bit value, instead of an 8-bit value 
(as should a char). The front-end emits the wrong type information about the 
argument, but it turns out for all other targets this simply does not matter, 
due to their ABI.

Interoperable CHARACTER(C_CHAR), VALUE arguments are weird, because deep down 
they’re not actually character types (in the Fortran sense of the word), i.e. 
arrays, but scalar C char (i.e., integers!). So they’ve always been kind-of 
second class citizens in the front-end. A function was introduced in 2007 that 
basically patches their type on the fly: gfc_conv_scalar_char_value(). The 
problem is that function is simply wrong. It says (from comments):

  /* Modify the tree type for scalar character dummy arguments of bind(c)
procedures if they are passed by value.  The tree type for them will
be promoted to INTEGER_TYPE for the middle end, which appears to be
what C would do with characters passed by-value.  The value attribute
 implies the dummy is a scalar.  */

The error is a misunderstanding of the C standard. char (and integer types 
smaller than int) are promoted to int for argument passing in unprototyped 
(old-school K&R) functions. In prototyped functions, they are most definitely 
not promoted. Therefore, the function is wrong.

More importantly, we can actually emit the right type much earlier in the 
front-end, and we already have a place to do so, in gfc_sym_type(). We already 
handle the result variables, but by-value arguments should have the same type, 
so the fix is easy.

In fact, once we emit the right type in gfc_sym_type(), we actually never need 
to fix it up in gfc_conv_scalar_char_value(). Removing the hack, I decided to 
leave it as a series of assertions, because we have seen in the past that this 
case is difficult to catch, and can easily regress due to changes elsewhere in 
the front-end.

Then, once the hack is removed, it becomes clear that the two callers of 
gfc_conv_scalar_char_value() expected different things from it: it’s called 
from generate_local_decl() to hack the sym->backend_decl; and it’s called from 
gfc_conv_procedure_call() for its true purpose, as a real conv_* function. For 
the first case, we can inline the checks (which have replaced the hack) into 
the caller in generate_local_decl(). Once only the second use case remains, we 
can make the function local to that file (make static, remove gfc_ prefix).


The patch comes with a couple of extra testcases, exercising the passing of 
char by value, and as integer, and their interoperability.
It was regtested on x86_64-pc-gnu-linux, on aarch64-apple-darwin (because its 
ABI is picky).

OK to commit?

FX



0001-Fortran-Emit-correct-types-for-CHARACTER-C_CHAR-VALU.patch
Description: Binary data


[PATCH] Prune LTO warnings that some tests fail

2021-12-26 Thread FX via Gcc-patches
The testcase g++.dg/torture/pr31863.C fails on *-apple-darwin with:

FAIL: g++.dg/torture/pr31863.C   -O2 -flto  (test for excess errors)
Excess errors:
lto-wrapper: warning: using serial compilation of 2 LTRANS jobs

The test otherwise works fine, but those lto warnings should be pruned.

OK to commit?
FX




0001-LTO-Prune-some-warnings-in-the-testsuite.patch
Description: Binary data


[PATCH] Set __FLT_EVAL_METHOD__/__FLT_EVAL_METHOD_TS_18661_3__ to FLT_EVAL_METHOD_PROMOTE_TO_FLOAT when FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16.

2021-12-26 Thread liuhongt via Gcc-patches
Since FLT_EVAL_METHOD only accepts negative value, 0, 1 or 2.

gcc/c-family/ChangeLog:

PR c/100854
* c-common.c (excess_precision_mode_join): Return
FLT_EVAL_METHOD_PROMOTE_TO_FLOAT when both x and y are
FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16.
---
 gcc/c-family/c-common.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index a25d59fa77b..4dab4d60773 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8842,6 +8842,10 @@ excess_precision_mode_join (enum flt_eval_method x,
   || y == FLT_EVAL_METHOD_UNPREDICTABLE)
 return FLT_EVAL_METHOD_UNPREDICTABLE;
 
+  /* FLT_EVAL_METHOD only accepts negative values, 0, 1 or 2, but
+ FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 is 16.  */
+if (x == y && x == FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16)
+  return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   /* GCC only supports one interchange type right now, _Float16.  If
  we're evaluating _Float16 in 16-bit precision, then flt_eval_method
  will be FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16.  */
-- 
2.18.1



Re: [PATCH] fixed testcase riscv/pr103302.c

2021-12-26 Thread Kito Cheng via Gcc-patches
Hi ShiHua:

Thanks for the fix, committed with Andrew's comment and minor fix for
commit comment.

On Thu, Dec 23, 2021 at 3:41 PM Andrew Pinski via Gcc-patches
 wrote:
>
> On Wed, Dec 22, 2021 at 11:37 PM  wrote:
> >
> > From: LiaoShihua 
> >
> > because riscv32 not support __int128, so skip if -march=rv32*.
> >
> > gcc/testsuite\ChangeLog:
> > * gcc.target/riscv/pr103302.c: skip if -march=rv32*
> > ---
> >  gcc/testsuite/gcc.target/riscv/pr103302.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/gcc/testsuite/gcc.target/riscv/pr103302.c 
> > b/gcc/testsuite/gcc.target/riscv/pr103302.c
> > index 822c4087416..2cfb12498a2 100644
> > --- a/gcc/testsuite/gcc.target/riscv/pr103302.c
> > +++ b/gcc/testsuite/gcc.target/riscv/pr103302.c
> > @@ -1,4 +1,5 @@
> >  /* { dg-do run } */
> > +/* { dg-skip-if "rv32 not support _int128" { *-*-* } { "-march=rv32*" } } 
> > */
>
> Better fix:
> /* { dg-do run { target int128 } } */
>
> Thanks,
> Andrew Pinski
>
> >  /* { dg-options "-Og -fharden-compares -fno-tree-dce -fno-tree-fre " } */
> >
> >  typedef unsigned char u8;
> > --
> > 2.31.1.windows.1
> >