date:20220607

[PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of v in mov{si, di}_internal.

2022-06-07 Thread liuhongt via Gcc-patches

So alternative v won't be igored in record_reg_classess.

Similar for *r alternatives in some vector patterns.

It helps testcase in the PR, also RA now makes better decisions for
gcc.target/i386/extract-insert-combining.c

movd%esi, %xmm0
movd%edi, %xmm1
-   movl%esi, -12(%rsp)
paddd   %xmm0, %xmm1
pinsrd  $0, %esi, %xmm0
paddd   %xmm1, %xmm0

The patch has no big impact on SPEC2017 for both O2 and Ofast
march=native run.

And I noticed there's some changes in SPEC2017

Before:
mov mem, %eax
vmovd %eax, %xmm0
..
mov %eax, 64(%rsp)

After:
vmovd mem, %xmm0
..
vmovd %xmm0, 64(%rsp)

Which should be exactly what we want?

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

gcc/ChangeLog:

* config/i386/i386.md (*movsi_internal): Change alternative
from *v to ?v.
(*movdi_internal): Ditto.
* config/i386/sse.md (vec_set_0): Change alternative *r
to ?r.
(*vec_extractv4sf_mem): Ditto.
(*vec_extracthf): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr105513-1.c: New test.
* gcc.target/i386/extract-insert-combining.c: Add new
scan-assembler-not for spill.
---
 gcc/config/i386/i386.md  |  8 
 gcc/config/i386/sse.md   |  8 
 .../gcc.target/i386/extract-insert-combining.c   |  1 +
 gcc/testsuite/gcc.target/i386/pr105513-1.c   | 16 
 4 files changed, 25 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 48a98e1b68b..5b538413942 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2251,9 +2251,9 @@ (define_split
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r 
,?*Yd,?r,?*v,?*y,?*x,*k,*k  ,*r,*m,*k")
+"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r 
,?*Yd,?r,?v,?*y,?*x,*k,*k  ,*r,*m,*k")
(match_operand:DI 1 "general_operand"
-"riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,v,*Yd,r   ,*v,r  ,*x 
,*y ,*r,*kBk,*k,*k,CBC"))]
+"riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,v,*Yd,r   ,?v,r  ,*x 
,*y ,*r,*kBk,*k,*k,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
@@ -2472,9 +2472,9 @@ (define_peephole2
 
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-"=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k  ,*rm,*k")
+"=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k  ,*rm,*k")
(match_operand:SI 1 "general_operand"
-"g ,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,*v,r  ,*r,*kBk,*k ,CBC"))]
+"g ,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,?v,r  ,*r,*kBk,*k ,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 62688f8e29d..d41ce2e1a9b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10590,11 +10590,11 @@ (define_insn "*vec_concatv4sf_0"
 ;; see comment above inline_secondary_memory_needed function in i386.cc
 (define_insn "vec_set_0"
   [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
- "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
+ "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
(vec_merge:VI4F_128
  (vec_duplicate:VI4F_128
(match_operand: 2 "general_operand"
- " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
+ " Yr,*x,v,m,r ,m,x,v,?rm,?rm,?rm,!x,?re,!*fF"))
  (match_operand:VI4F_128 1 "nonimm_or_0_operand"
  " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
  (const_int 1)))]
@@ -11056,7 +11056,7 @@ (define_insn_and_split "*sse4_1_extractps"
(set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
 
 (define_insn_and_split "*vec_extractv4sf_mem"
-  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
+  [(set (match_operand:SF 0 "register_operand" "=v,?r,f")
(vec_select:SF
  (match_operand:V4SF 1 "memory_operand" "o,o,o")
  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
@@ -11933,7 +11933,7 @@ (define_insn_and_split "*vec_extract_0"
   "operands[1] = gen_lowpart (HFmode, operands[1]);")
 
 (define_insn "*vec_extracthf"
-  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=*r,m,x,v")
+  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
(vec_select:HF
  (match_operand:V8HF 1 "register_operand" "v,v,0,v")
  (parallel
diff --git a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c 
b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
index 32d951e6832..5a53d4cbf06 100644
--- a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
+++ b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
@@ -4,6 +4,7

Re: [PATCH] configure: arrange to use appropriate objcopy

2022-06-07 Thread Jakub Jelinek via Gcc-patches

On Tue, Jun 07, 2022 at 08:12:26AM +0200, Jan Beulich via Gcc-patches wrote:
> > This regressed
> > Executing on host: /home/jakub/src/gcc/obj44/gcc/xgcc 
> > -B/home/jakub/src/gcc/obj44/gcc/ -fdiagnostics-plain-output   -flto -g 
> > -gsplit-dwarf   -c -o c_lto_pr83719_0.o 
> > /home/jakub/src/gcc/gcc/testsuite/gcc.dg/lto/pr83719_0.c(timeout = 300)
> > spawn -ignore SIGHUP /home/jakub/src/gcc/obj44/gcc/xgcc 
> > -B/home/jakub/src/gcc/obj44/gcc/ -fdiagnostics-plain-output -flto -g 
> > -gsplit-dwarf -c -o c_lto_pr83719_0.o 
> > /home/jakub/src/gcc/gcc/testsuite/gcc.dg/lto/pr83719_0.c
> > cc1: note: '-gsplit-dwarf' is not supported with LTO, disabling
> > /home/jakub/src/gcc/obj44/gcc/objcopy: line 120: exec: --: invalid option
> > exec: usage: exec [-cl] [-a name] [command [argument ...]] [redirection ...]
> > compiler exited with status 1
> > FAIL: gcc.dg/lto/pr83719 c_lto_pr83719_0.o assemble,  -flto -g 
> > -gsplit-dwarf 
> > for me, both on x86_64-linux and i686-linux.
> 
> Hmm, it surely worked for me for both, with and without in-tree binutils
> (you don't say which variant you saw the failure with).

System binutils.
grep ORIGINAL_ gcc/Makefile 
ORIGINAL_AS_FOR_TARGET = /usr/bin/as
ORIGINAL_LD_FOR_TARGET = /usr/bin/ld
ORIGINAL_NM_FOR_TARGET = /usr/bin/nm
ORIGINAL_OBJCOPY_FOR_TARGET = 
ls -l /usr/bin/{as,ld,nm,objcopy}
-rwxr-xr-x. 1 root root 439192 Mar 10 12:51 /usr/bin/as
lrwxrwxrwx. 1 root root 20 May 20 13:28 /usr/bin/ld -> /etc/alternatives/ld
-rwxr-xr-x. 1 root root  47928 Mar 10 12:51 /usr/bin/nm
-rwxr-xr-x. 1 root root 184304 Mar 10 12:51 /usr/bin/objcopy
(but ditto grep ORIGINAL_ stage1-gcc/Makefile
or grep ORIGINAL_ prev-gcc/Makefile).

> > For some reason, I have
> > grep OBJCOPY *gcc/Makefile
> > gcc/Makefile:ORIGINAL_OBJCOPY_FOR_TARGET = 
> > prev-gcc/Makefile:ORIGINAL_OBJCOPY_FOR_TARGET = 
> > stage1-gcc/Makefile:ORIGINAL_OBJCOPY_FOR_TARGET = 
> 
> What about the corresponding ORIGINAL_NM_FOR_TARGET? And could you provide
> one of the config.log instances? 

config.log has:
configure:23317: checking what linker to use
configure:23351: result: /usr/bin/ld
configure:23379: checking for nm
configure:23397: found /usr/bin/nm
configure:23409: result: /usr/bin/nm
configure:23420: checking what nm to use
configure:23428: result: /usr/bin/nm
configure:23498: checking what objcopy to use
configure:23506: result:
configure:23536: checking for objdump
configure:23554: found /usr/bin/objdump
configure:23566: result: /usr/bin/objdump
configure:23577: checking what objdump to use
configure:23587: result: /usr/bin/objdump

It is a bootstrapped compiler:
../configure --enable-languages=default,obj-c++,lto,go,d 
--enable-checking=yes,rtl,extra --enable-libstdcxx-backtrace=yes && make -j32 
bootstrap > LOG 2>&1
on Fedora 36 x86_64-linux (ada left out because it is currently broken).

Comparing the toplevel Makefile, I see some differences:
grep NM_FOR_TARGET Makefile 
NM_FOR_TARGET="$(NM_FOR_TARGET)"; export NM_FOR_TARGET; \
NM="$(COMPILER_NM_FOR_TARGET)"; export NM; \
NM_FOR_TARGET=$(NM)
COMPILER_NM_FOR_TARGET=$$r/$(HOST_SUBDIR)/gcc/nm
"NM_FOR_TARGET=$(NM_FOR_TARGET)" \
'NM=$(COMPILER_NM_FOR_TARGET)' \
grep OBJCOPY_FOR_TARGET Makefile 
OBJCOPY_FOR_TARGET="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY_FOR_TARGET; \
OBJCOPY="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY; \
OBJCOPY_FOR_TARGET=$(OBJCOPY)
"OBJCOPY_FOR_TARGET=$(OBJCOPY_FOR_TARGET)" \
'OBJCOPY=$$(OBJCOPY_FOR_TARGET)' \
E.g. the COMPILER_*_FOR_TARGET line is missing completely for OBJCOPY
and the last line is different too.
Also:
grep ^NM[[:space:]]*= Makefile; echo end
NM = nm
end
grep ^OBJCOPY[[:space:]]*= Makefile; echo end
end

Note, I see
S["OBJDUMP"]="objdump"
S["OBJCOPY"]="objcopy"
S["WINDMC"]="windmc"
S["WINDRES"]="windres"
S["STRIP"]="strip"
S["RANLIB"]="ranlib"
S["NM"]="nm"
in toplevel status, it is just that toplevel:
grep ^NM Makefile.tpl 
NM_FOR_BUILD = @NM_FOR_BUILD@
NM = @NM@
NM_FOR_TARGET=@NM_FOR_TARGET@
grep ^OBJCOPY Makefile.tpl 
OBJCOPY_FOR_TARGET=@OBJCOPY_FOR_TARGET@
doesn't have the OBJCOPY = @OBJCOPY@ line, and perhaps the
COMPILER_OBJCOPY_FOR_TARGET stuff.

Jakub

Re: [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of v in mov{si, di}_internal.

2022-06-07 Thread Hongtao Liu via Gcc-patches

On Tue, Jun 7, 2022 at 3:41 PM liuhongt via Gcc-patches
 wrote:
>
> So alternative v won't be igored in record_reg_classess.
>
> Similar for *r alternatives in some vector patterns.
>
> It helps testcase in the PR, also RA now makes better decisions for
> gcc.target/i386/extract-insert-combining.c
>
> movd%esi, %xmm0
> movd%edi, %xmm1
> -   movl%esi, -12(%rsp)
> paddd   %xmm0, %xmm1
> pinsrd  $0, %esi, %xmm0
> paddd   %xmm1, %xmm0
>
> The patch has no big impact on SPEC2017 for both O2 and Ofast
> march=native run.
>
> And I noticed there's some changes in SPEC2017
>
> Before:
> mov mem, %eax
> vmovd %eax, %xmm0
> ..
> mov %eax, 64(%rsp)
>
> After:
> vmovd mem, %xmm0
> ..
> vmovd %xmm0, 64(%rsp)
>
> Which should be exactly what we want?
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.md (*movsi_internal): Change alternative
> from *v to ?v.
> (*movdi_internal): Ditto.
> * config/i386/sse.md (vec_set_0): Change alternative *r
> to ?r.
> (*vec_extractv4sf_mem): Ditto.
> (*vec_extracthf): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr105513-1.c: New test.
> * gcc.target/i386/extract-insert-combining.c: Add new
> scan-assembler-not for spill.
> ---
>  gcc/config/i386/i386.md  |  8 
>  gcc/config/i386/sse.md   |  8 
>  .../gcc.target/i386/extract-insert-combining.c   |  1 +
>  gcc/testsuite/gcc.target/i386/pr105513-1.c   | 16 
>  4 files changed, 25 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 48a98e1b68b..5b538413942 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2251,9 +2251,9 @@ (define_split
>
>  (define_insn "*movdi_internal"
>[(set (match_operand:DI 0 "nonimmediate_operand"
> -"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r 
> ,?*Yd,?r,?*v,?*y,?*x,*k,*k  ,*r,*m,*k")
> +"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r 
> ,?*Yd,?r,?v,?*y,?*x,*k,*k  ,*r,*m,*k")
> (match_operand:DI 1 "general_operand"
> -"riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,v,*Yd,r   ,*v,r  
> ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
> +"riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,v,*Yd,r   ,?v,r  
> ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
>"!(MEM_P (operands[0]) && MEM_P (operands[1]))
> && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> @@ -2472,9 +2472,9 @@ (define_peephole2
>
>  (define_insn "*movsi_internal"
>[(set (match_operand:SI 0 "nonimmediate_operand"
> -"=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k  ,*rm,*k")
> +"=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k  ,*rm,*k")
> (match_operand:SI 1 "general_operand"
> -"g ,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,*v,r  ,*r,*kBk,*k ,CBC"))]
> +"g ,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,?v,r  ,*r,*kBk,*k ,CBC"))]
>"!(MEM_P (operands[0]) && MEM_P (operands[1]))
> && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 62688f8e29d..d41ce2e1a9b 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -10590,11 +10590,11 @@ (define_insn "*vec_concatv4sf_0"
>  ;; see comment above inline_secondary_memory_needed function in i386.cc
>  (define_insn "vec_set_0"
>[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
> - "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
> + "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
> (vec_merge:VI4F_128
>   (vec_duplicate:VI4F_128
> (match_operand: 2 "general_operand"
> - " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
> + " Yr,*x,v,m,r ,m,x,v,?rm,?rm,?rm,!x,?re,!*fF"))
>   (match_operand:VI4F_128 1 "nonimm_or_0_operand"
>   " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
>   (const_int 1)))]
> @@ -11056,7 +11056,7 @@ (define_insn_and_split "*sse4_1_extractps"
> (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
>
>  (define_insn_and_split "*vec_extractv4sf_mem"
> -  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
> +  [(set (match_operand:SF 0 "register_operand" "=v,?r,f")
> (vec_select:SF
>   (match_operand:V4SF 1 "memory_operand" "o,o,o")
>   (parallel [(match_operand 2 "const_0_to_3_operand")])))]
> @@ -11933,7 +11933,7 @@ (define_insn_and_split "*vec_extract_0"
>"operands[1] = gen_lowpart (HFmode, operands[1]);")
>
>  (define_insn "*vec_extracthf"
> -  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=*r,m,x,v")
> +  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
> (vec_select:HF
>   (match_operand:V8HF 1 "register_operand" "v,v,0,v")
>   (parallel
> diff --gi

Modula-2: merge followup re: patch-set 01..09

2022-06-07 Thread Gaius Mulley via Gcc-patches



thank you to the reviewers of the patch sets 01 though 09.  As a brief
followup/summary I propose rewriting linking as per Richard's
suggestion:

https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595782.html

Aside from the many technical advantages, logistically it also means:

  *  the gm2 driver code becomes similar to existing front end drivers.
  *  allowing the next set of patches to be smaller and easier to review.
 [The driver will be posted separate to the gimple interface]

Would it be ok to open up a short term branch devel/m2link to accomodate
these disruptive code changes, or would a private branch be more
appropriate?  Currently the runtime changes have been completed.

regards,
Gaius

[PATCH] Simplify (B * v + C) * D -> BD* v + CD when B, C, D are all INTEGER_CST.

2022-06-07 Thread liuhongt via Gcc-patches

>> + (mult:c (plus:c@4 (mult:c@5 @0 INTEGER_CST@1) INTEGER_CST@2) INTEGER_CST@3)

>since canonicalization puts INTEGER_CSTs last the :c should not be necessary.

Changed.

>> + (if (single_use (@4)
>> +  && single_use (@5))

>since the resulting expression is not simple using :s instead of
>single_use (..) should
>work as well.

Changed.

> when we go from (a + CST1) * CST2 to a * CST2 + CST1*CST2 we have
> to worry about CST1 == -a which would make (a+CST1) * INT_MAX
> not overflow but a * INT_MAX + CST1 * INT_MAX might.  Is the
> overflow check for CST1 * INT_MAX sufficient to rule out
> that a * CST2 does not overflow when (a + CST1) * CST2 does not
> overflow?  Consider a == 2, CST1 == -1, CST2 == INT_MAX,
> here 1 * INT_MAX does not overflow, nor does -1 * INT_MAX, but
> 2 * INT_MAX overflows and thus the resulting expression invokes
> undefined behavior.
>
> The same issue probably arises for the first pattern outer half
> which looks like (a' + CST2) * CST3 with a' = a * CST1?
>
> The appropriate solution might be to perform the arithmetic
> in an unsigned type with the implication that has on value-range
> analysis.

Yes, the patch patched based on value-range analysis.

Update the patch.

Similar for (v + B) * C + D -> C * v + BCD.
Don't simplify it when there's overflow and overflow is UB for type v.

gcc/ChangeLog:

PR tree-optimization/53533
* match.pd: Simplify (B * v + C) * D -> BD * v + CD and
(v + B) * C + D -> C * v + BCD when B,C,D are all INTEGER_CST,
and there's no overflow or !TYPE_OVERFLOW_UNDEFINED.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr53533-1.c: New test.
* gcc.target/i386/pr53533-2.c: New test.
* gcc.target/i386/pr53533-3.c: New test.
* gcc.target/i386/pr53533-4.c: New test.
* gcc.target/i386/pr53533-5.c: New test.
* gcc.dg/vect/slp-11a.c: Adjust testcase.
---
 gcc/match.pd  | 82 +++
 gcc/testsuite/gcc.dg/vect/slp-11a.c   | 10 +--
 gcc/testsuite/gcc.target/i386/pr53533-1.c | 23 +++
 gcc/testsuite/gcc.target/i386/pr53533-2.c | 46 +
 gcc/testsuite/gcc.target/i386/pr53533-3.c | 24 +++
 gcc/testsuite/gcc.target/i386/pr53533-4.c | 46 +
 gcc/testsuite/gcc.target/i386/pr53533-5.c | 22 ++
 7 files changed, 248 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr53533-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr53533-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr53533-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr53533-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr53533-5.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 44a385b912d..54f53a1f988 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -489,6 +489,88 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (!overflow || TYPE_OVERFLOW_WRAPS (type))
(mult @0 { wide_int_to_tree (type, mul); }
 
+/* Similar to above, but there could be an extra add/sub between
+   successive multuiplications.  */
+(simplify
+ (mult (plus:s (mult:s@4 @0 INTEGER_CST@1) INTEGER_CST@2) INTEGER_CST@3)
+ (with {
+   bool overflowed = true;
+   wi::overflow_type ovf1, ovf2;
+   wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@3),
+  TYPE_SIGN (type), &ovf1);
+   wide_int add = wi::mul (wi::to_wide (@2), wi::to_wide (@3),
+  TYPE_SIGN (type), &ovf2);
+  if (TYPE_OVERFLOW_UNDEFINED (type))
+{
+#if GIMPLE
+  value_range vr0;
+  if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE
+ && get_global_range_query ()->range_of_expr (vr0, @4)
+ && vr0.kind () == VR_RANGE)
+   {
+ wide_int wmin0 = vr0.lower_bound ();
+ wide_int wmax0 = vr0.upper_bound ();
+ wmin0 = wi::mul (wmin0, wi::to_wide (@3), TYPE_SIGN (type), &ovf1);
+ wmax0 = wi::mul (wmax0, wi::to_wide (@3), TYPE_SIGN (type), &ovf2);
+ if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE)
+   {
+ wi::add (wmin0, add, TYPE_SIGN (type), &ovf1);
+ wi::add (wmax0, add, TYPE_SIGN (type), &ovf2);
+ if (ovf1 == wi::OVF_NONE && ovf2 == wi::OVF_NONE)
+   overflowed = false;
+   }
+   }
+#endif
+}
+  else
+   overflowed = false;
+ }
+  /* Skip folding on overflow.  */
+  (if (!overflowed)
+   (plus (mult @0 { wide_int_to_tree (type, mul); })
+{ wide_int_to_tree (type, add); }
+
+/* Similar to above, but a multiplication between successive additions.  */
+(simplify
+ (plus (mult:s (plus:s @0 INTEGER_CST@1) INTEGER_CST@2) INTEGER_CST@3)
+ (with {
+   bool overflowed = true;
+   wi::overflow_type ovf1;
+   wi::overflow_type ovf2;
+   wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2),
+  TYPE_SIGN (type), &ovf1);
+   wide_int add = wi::add (mul, wi::to_wide (@3),
+  TYPE_SIGN (type), &ovf2);
+  if (TYPE_OVERFLOW_UNDEFINED (ty

[PATCH V2] RISC-V:Fix a bug that is the CMO builtins are missing parameter

2022-06-07 Thread shiyulong

From: yulong 

We changed builtins format about zicbom and zicboz subextensions and added test 
cases.
diff with the previous version:
1.We deleted the RLT mode's second input operand.
2.We modified the type of builtins from RISCV_BUILTIN_DIRECT to 
RISCV_BUILTIN_DIRECT_NO_TARGET.
3.We modified the test cases and added more parameter tests.

Thanks, Simon and Kito.

gcc/ChangeLog:

* config/riscv/riscv-cmo.def (RISCV_BUILTIN): changed BUILTIN_TYPE and 
FUNCTION_TYPE
* config/riscv/riscv-ftypes.def (0): changed "DEF_RISCV_FTYPE (0, 
(SI/DI))" to "DEF_RISCV_FTYPE (1, (VOID, SI/DI))"
(1):

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-zicbom-1.c: added parameter and modified the 
fun's type
* gcc.target/riscv/cmo-zicbom-2.c: added parameter and modified the 
fun's type
* gcc.target/riscv/cmo-zicboz-1.c: added parameter and modified the 
fun's type
* gcc.target/riscv/cmo-zicboz-2.c: added parameter and modified the 
fun's type

---
 gcc/config/riscv/riscv-cmo.def| 16 ++--
 gcc/config/riscv/riscv-ftypes.def |  4 +--
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c | 25 ---
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c | 25 ---
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c |  9 ---
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c |  9 ---
 6 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/gcc/config/riscv/riscv-cmo.def b/gcc/config/riscv/riscv-cmo.def
index b30ecf96ec1..7b3da7c24ef 100644
--- a/gcc/config/riscv/riscv-cmo.def
+++ b/gcc/config/riscv/riscv-cmo.def
@@ -1,16 +1,16 @@
 // zicbom
-RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, clean32),
-RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, clean64),
+RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_SI, clean32),
+RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_DI, clean64),
 
-RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, flush32),
-RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, flush64),
+RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_SI, flush32),
+RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_DI, flush64),
 
-RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, inval32),
-RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, inval64),
+RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_SI, inval32),
+RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_DI, inval64),
 
 // zicboz
-RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, zero32),
-RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, zero64),
+RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_SI, zero32),
+RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_DI, zero64),
 
 // zicbop
 RISCV_BUILTIN (prefetchi_si, "zicbop_cbo_prefetchi", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI, prefetchi32),
diff --git a/gcc/config/riscv/riscv-ftypes.def 
b/gcc/config/riscv/riscv-ftypes.def
index 62421292ce7..77ac6ea3f66 100644
--- a/gcc/config/riscv/riscv-ftypes.def
+++ b/gcc/config/riscv/riscv-ftypes.def
@@ -28,7 +28,7 @@ along with GCC; see the file COPYING3.  If not see
 
 DEF_RISCV_FTYPE (0, (USI))
 DEF_RISCV_FTYPE (1, (VOID, USI))
-DEF_RISCV_FTYPE (0, (SI))
-DEF_RISCV_FTYPE (0, (DI))
+DEF_RISCV_FTYPE (1, (VOID, SI))
+DEF_RISCV_FTYPE (1, (VOID, DI))
 DEF_RISCV_FTYPE (1, (SI, SI))
 DEF_RISCV_FTYPE (1, (DI, DI))
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
index e2ba2183511..2bf5b77380e 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
@@ -1,21 +1,28 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gc_zicbom -mabi=lp64" } */
+int var;
 
-int foo1()
+void foo1()
 {
-return __builtin_riscv_zicbom_cbo_clean();
+__builtin_riscv_zicbom_cbo_clean(0);
+__builtin_riscv_zicbom_cbo_clean(var);
+__builtin_riscv_zicbom_cbo_clean(0x111);
 }
 
-int foo2()
+void foo2()
 {
-return __builtin_riscv_zicbom_cbo_flush();
+__builtin_riscv_zicbom_cbo_flush(0);
+__builtin_riscv_zicbom_cbo_flush(var);
+__builtin_riscv_zicbom_cbo_flush(0x111);
 }
 
-int foo3()
+void foo3()
 {
-return __builtin_riscv_zicbom_cbo_inval();
+__builtin_riscv_zicbom_cbo_inval(0);
+__builtin_riscv_zicbom_cbo_inval(var);
+__builtin_riscv_zicbom_cbo_inval(0x111);
 }
 
-/* { dg-final { scan-assembler-times "cbo.clean" 1 } } *

Re: [PATCH] PR middle-end/105853: Call store_constructor directly from calls.cc.

2022-06-07 Thread Eric Botcazou via Gcc-patches

> The (proposed) solution is to export store_constructor (and it's helper
> function int_expr_size) from expr.cc, by removing their static qualifier
> and prototyping both functions in expr.h, so they can be called directly
> from load_register_parameters in calls.cc.  This cures both ICEs, but
> almost as important produces much better code generation than GCC 12.
> 
> For PR 105853, GCC 12 generates:
> 
> compose_nd_na_ipv6_src:
> movzx eax, WORD PTR eth_addr_zero[rip+2]
> movzx edx, WORD PTR eth_addr_zero[rip]
> movzx edi, WORD PTR eth_addr_zero[rip+4]
> sal rax, 16
> or rax, rdx
> sal rdi, 32
> or rdi, rax
> xor eax, eax
> jmp packet_set_nd
> eth_addr_zero:  .zero 6
> 
> where now (with this fix) GCC 13 generates:
> compose_nd_na_ipv6_src:
> xorl%edi, %edi
> xorl%eax, %eax
> jmp packet_set_nd
> 
> Likewise, for PR 105856 on ARM, we'd previously generate:
> g_329_3:
> movw r3, #:lower16:.LANCHOR0
> movt r3, #:upper16:.LANCHOR0
> ldr r0, [r3]
> b func_19
> 
> but with this optimization we now generate:
> g_329_3:
> mov r0, #6
> b   func_19

Nice work!  Fiddling with RTL expansion like this is never easy...

> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check with no new failures.  I've also confirmed that on a
> cross-compiler to arm-linux-gnueabihf --with-arch=armv6 this fixes the
> target specific ICE in PR105856.  The make check is currently running
> with --target_board=unix{-m32}, OK for mainline if that also passes?

Yes, it looks quite uncontroversial if you ask me and will unblock people.

-- 
Eric Botcazou

[PATCH-1 v2, rs6000] Replace shift and ior insns with one rotate and mask insn for bswap pattern [PR93453]

2022-06-07 Thread HAO CHEN GUI via Gcc-patches

Hi,
  This patch replaces shift and ior insns with one rotate and mask
insn for the split patterns which are for DI byte swap on Power6. The
test cases shows the optimization.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-06-07 Haochen Gui 

gcc/
* config/rs6000/rs6000.md (define_split for bswapdi load): Merge shift
and ior insns to one rotate and mask insn.
(define_split for bswapdi register): Likewise.

gcc/testsuite/
* gcc.target/powerpc/pr93453-1.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..83800df12aa 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2828,8 +2828,8 @@ (define_split
   emit_insn (gen_bswapsi2 (dest_32, word2));
 }

-  emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+  GEN_INT (0x)));
   DONE;
 })

@@ -2914,10 +2914,10 @@ (define_split
   rtx op3_si  = simplify_gen_subreg (SImode, op3, DImode, lo_off);

   emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
-  emit_insn (gen_bswapsi2 (dest_si, src_si));
-  emit_insn (gen_bswapsi2 (op3_si, op2_si));
-  emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_bswapsi2 (op3_si, src_si));
+  emit_insn (gen_bswapsi2 (dest_si, op2_si));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+  GEN_INT (0x)));
   DONE;
 })

diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
new file mode 100644
index 000..4271886561f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=power6 -O2" } */
+
+unsigned long load_byte_reverse (unsigned long *in)
+{
+   return __builtin_bswap64 (*in);
+}
+
+unsigned long byte_reverse (unsigned long in)
+{
+   return __builtin_bswap64 (in);
+}
+
+/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */

Re: [PATCH] configure: arrange to use appropriate objcopy

2022-06-07 Thread Jan Beulich via Gcc-patches

On 07.06.2022 09:41, Jakub Jelinek wrote:
> On Tue, Jun 07, 2022 at 08:12:26AM +0200, Jan Beulich via Gcc-patches wrote:
>>> This regressed
>>> Executing on host: /home/jakub/src/gcc/obj44/gcc/xgcc 
>>> -B/home/jakub/src/gcc/obj44/gcc/ -fdiagnostics-plain-output   -flto -g 
>>> -gsplit-dwarf   -c -o c_lto_pr83719_0.o 
>>> /home/jakub/src/gcc/gcc/testsuite/gcc.dg/lto/pr83719_0.c(timeout = 300)
>>> spawn -ignore SIGHUP /home/jakub/src/gcc/obj44/gcc/xgcc 
>>> -B/home/jakub/src/gcc/obj44/gcc/ -fdiagnostics-plain-output -flto -g 
>>> -gsplit-dwarf -c -o c_lto_pr83719_0.o 
>>> /home/jakub/src/gcc/gcc/testsuite/gcc.dg/lto/pr83719_0.c
>>> cc1: note: '-gsplit-dwarf' is not supported with LTO, disabling
>>> /home/jakub/src/gcc/obj44/gcc/objcopy: line 120: exec: --: invalid option
>>> exec: usage: exec [-cl] [-a name] [command [argument ...]] [redirection ...]
>>> compiler exited with status 1
>>> FAIL: gcc.dg/lto/pr83719 c_lto_pr83719_0.o assemble,  -flto -g 
>>> -gsplit-dwarf 
>>> for me, both on x86_64-linux and i686-linux.
>>
>> Hmm, it surely worked for me for both, with and without in-tree binutils
>> (you don't say which variant you saw the failure with).
> 
> System binutils.
> grep ORIGINAL_ gcc/Makefile 
> ORIGINAL_AS_FOR_TARGET = /usr/bin/as
> ORIGINAL_LD_FOR_TARGET = /usr/bin/ld
> ORIGINAL_NM_FOR_TARGET = /usr/bin/nm
> ORIGINAL_OBJCOPY_FOR_TARGET = 
> ls -l /usr/bin/{as,ld,nm,objcopy}
> -rwxr-xr-x. 1 root root 439192 Mar 10 12:51 /usr/bin/as
> lrwxrwxrwx. 1 root root 20 May 20 13:28 /usr/bin/ld -> 
> /etc/alternatives/ld
> -rwxr-xr-x. 1 root root  47928 Mar 10 12:51 /usr/bin/nm
> -rwxr-xr-x. 1 root root 184304 Mar 10 12:51 /usr/bin/objcopy
> (but ditto grep ORIGINAL_ stage1-gcc/Makefile
> or grep ORIGINAL_ prev-gcc/Makefile).
> 
>>> For some reason, I have
>>> grep OBJCOPY *gcc/Makefile
>>> gcc/Makefile:ORIGINAL_OBJCOPY_FOR_TARGET = 
>>> prev-gcc/Makefile:ORIGINAL_OBJCOPY_FOR_TARGET = 
>>> stage1-gcc/Makefile:ORIGINAL_OBJCOPY_FOR_TARGET = 
>>
>> What about the corresponding ORIGINAL_NM_FOR_TARGET? And could you provide
>> one of the config.log instances? 
> 
> config.log has:
> configure:23317: checking what linker to use
> configure:23351: result: /usr/bin/ld
> configure:23379: checking for nm
> configure:23397: found /usr/bin/nm
> configure:23409: result: /usr/bin/nm
> configure:23420: checking what nm to use
> configure:23428: result: /usr/bin/nm
> configure:23498: checking what objcopy to use
> configure:23506: result:
> configure:23536: checking for objdump
> configure:23554: found /usr/bin/objdump
> configure:23566: result: /usr/bin/objdump
> configure:23577: checking what objdump to use
> configure:23587: result: /usr/bin/objdump
> 
> It is a bootstrapped compiler:
> ../configure --enable-languages=default,obj-c++,lto,go,d 
> --enable-checking=yes,rtl,extra --enable-libstdcxx-backtrace=yes && make -j32 
> bootstrap > LOG 2>&1
> on Fedora 36 x86_64-linux (ada left out because it is currently broken).
> 
> Comparing the toplevel Makefile, I see some differences:
> grep NM_FOR_TARGET Makefile 
>   NM_FOR_TARGET="$(NM_FOR_TARGET)"; export NM_FOR_TARGET; \
>   NM="$(COMPILER_NM_FOR_TARGET)"; export NM; \
> NM_FOR_TARGET=$(NM)
> COMPILER_NM_FOR_TARGET=$$r/$(HOST_SUBDIR)/gcc/nm
>   "NM_FOR_TARGET=$(NM_FOR_TARGET)" \
>   'NM=$(COMPILER_NM_FOR_TARGET)' \
> grep OBJCOPY_FOR_TARGET Makefile 
>   OBJCOPY_FOR_TARGET="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY_FOR_TARGET; \
>   OBJCOPY="$(OBJCOPY_FOR_TARGET)"; export OBJCOPY; \
> OBJCOPY_FOR_TARGET=$(OBJCOPY)
>   "OBJCOPY_FOR_TARGET=$(OBJCOPY_FOR_TARGET)" \
>   'OBJCOPY=$$(OBJCOPY_FOR_TARGET)' \
> E.g. the COMPILER_*_FOR_TARGET line is missing completely for OBJCOPY
> and the last line is different too.
> Also:
> grep ^NM[[:space:]]*= Makefile; echo end
> NM = nm
> end
> grep ^OBJCOPY[[:space:]]*= Makefile; echo end
> end
> 
> Note, I see
> S["OBJDUMP"]="objdump"
> S["OBJCOPY"]="objcopy"
> S["WINDMC"]="windmc"
> S["WINDRES"]="windres"
> S["STRIP"]="strip"
> S["RANLIB"]="ranlib"
> S["NM"]="nm"
> in toplevel status, it is just that toplevel:
> grep ^NM Makefile.tpl 
> NM_FOR_BUILD = @NM_FOR_BUILD@
> NM = @NM@
> NM_FOR_TARGET=@NM_FOR_TARGET@
> grep ^OBJCOPY Makefile.tpl 
> OBJCOPY_FOR_TARGET=@OBJCOPY_FOR_TARGET@
> doesn't have the OBJCOPY = @OBJCOPY@ line, and perhaps the
> COMPILER_OBJCOPY_FOR_TARGET stuff.

Let me revert the change - I've just realized that I only thought I would
have tested this with system binutils as well. I'm sorry for the breakage.

Jan

Re: [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of v in mov{si,di}_internal.

2022-06-07 Thread Uros Bizjak via Gcc-patches

On Tue, Jun 7, 2022 at 9:41 AM liuhongt  wrote:
>
> So alternative v won't be igored in record_reg_classess.
>
> Similar for *r alternatives in some vector patterns.
>
> It helps testcase in the PR, also RA now makes better decisions for
> gcc.target/i386/extract-insert-combining.c
>
> movd%esi, %xmm0
> movd%edi, %xmm1
> -   movl%esi, -12(%rsp)
> paddd   %xmm0, %xmm1
> pinsrd  $0, %esi, %xmm0
> paddd   %xmm1, %xmm0
>
> The patch has no big impact on SPEC2017 for both O2 and Ofast
> march=native run.
>
> And I noticed there's some changes in SPEC2017
>
> Before:
> mov mem, %eax
> vmovd %eax, %xmm0
> ..
> mov %eax, 64(%rsp)
>
> After:
> vmovd mem, %xmm0
> ..
> vmovd %xmm0, 64(%rsp)
>
> Which should be exactly what we want?
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.md (*movsi_internal): Change alternative
> from *v to ?v.
> (*movdi_internal): Ditto.
> * config/i386/sse.md (vec_set_0): Change alternative *r
> to ?r.
> (*vec_extractv4sf_mem): Ditto.
> (*vec_extracthf): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr105513-1.c: New test.
> * gcc.target/i386/extract-insert-combining.c: Add new
> scan-assembler-not for spill.

Let's have some experiment with this approach. The above is also
better for TUNE_INTER_UNIT_MOVES_{TO,FROM}_VEC, since moves between
%eax and %xmm will again go through memory (I'm not sure how much we
care for these targets anyway).

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.md  |  8 
>  gcc/config/i386/sse.md   |  8 
>  .../gcc.target/i386/extract-insert-combining.c   |  1 +
>  gcc/testsuite/gcc.target/i386/pr105513-1.c   | 16 
>  4 files changed, 25 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 48a98e1b68b..5b538413942 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2251,9 +2251,9 @@ (define_split
>
>  (define_insn "*movdi_internal"
>[(set (match_operand:DI 0 "nonimmediate_operand"
> -"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r 
> ,?*Yd,?r,?*v,?*y,?*x,*k,*k  ,*r,*m,*k")
> +"=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r 
> ,?*Yd,?r,?v,?*y,?*x,*k,*k  ,*r,*m,*k")
> (match_operand:DI 1 "general_operand"
> -"riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,v,*Yd,r   ,*v,r  
> ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
> +"riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,v,*Yd,r   ,?v,r  
> ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
>"!(MEM_P (operands[0]) && MEM_P (operands[1]))
> && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> @@ -2472,9 +2472,9 @@ (define_peephole2
>
>  (define_insn "*movsi_internal"
>[(set (match_operand:SI 0 "nonimmediate_operand"
> -"=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k  ,*rm,*k")
> +"=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k  ,*rm,*k")
> (match_operand:SI 1 "general_operand"
> -"g ,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,*v,r  ,*r,*kBk,*k ,CBC"))]
> +"g ,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,?v,r  ,*r,*kBk,*k ,CBC"))]
>"!(MEM_P (operands[0]) && MEM_P (operands[1]))
> && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 62688f8e29d..d41ce2e1a9b 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -10590,11 +10590,11 @@ (define_insn "*vec_concatv4sf_0"
>  ;; see comment above inline_secondary_memory_needed function in i386.cc
>  (define_insn "vec_set_0"
>[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
> - "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
> + "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
> (vec_merge:VI4F_128
>   (vec_duplicate:VI4F_128
> (match_operand: 2 "general_operand"
> - " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
> + " Yr,*x,v,m,r ,m,x,v,?rm,?rm,?rm,!x,?re,!*fF"))
>   (match_operand:VI4F_128 1 "nonimm_or_0_operand"
>   " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
>   (const_int 1)))]
> @@ -11056,7 +11056,7 @@ (define_insn_and_split "*sse4_1_extractps"
> (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
>
>  (define_insn_and_split "*vec_extractv4sf_mem"
> -  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
> +  [(set (match_operand:SF 0 "register_operand" "=v,?r,f")
> (vec_select:SF
>   (match_operand:V4SF 1 "memory_operand" "o,o,o")
>   (parallel [(match_operand 2 "const_0_to_3_operand")])))]
> @@ -11933,7 +11933,7 @@ (define_insn_and_split "*vec_extract_0"
>"operands[1] = gen_lowpart (HFmode, operands[1]);")
>
>  (define_insn "*vec_extracthf"
> -  [(set (match_operand:HF 0

Re: [PATCH] Mips: Fix the ASAN shadow offset hook for the n32 ABI

2022-06-07 Thread Xi Ruoyao via Gcc-patches

On Mon, 2022-06-06 at 09:28 +, Dimitrije Milosevic wrote:
> Fix the ASAN shadow offset hook for the n32 ABI.
> 
> gcc/ChangeLog:
> 
>     * config/mips/mips.cc (mips_asan_shadow_offset): Reformat
>     to handle the N32 ABI.
>     * config/mips/mips.h (SUBTARGET_SHADOW_OFFSET): Remove
>     the macro, as it is not needed anymore.
> 
> ---
> 
>  gcc/config/mips/mips.cc | 7 ++-
>  gcc/config/mips/mips.h  | 7 ---
>  2 files changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
> index 2dce4007678..91e651c458e 100644
> --- a/gcc/config/mips/mips.cc
> +++ b/gcc/config/mips/mips.cc
> @@ -22745,7 +22745,12 @@ mips_constant_alignment (const_tree exp,
> HOST_WIDE_INT align)
>  static unsigned HOST_WIDE_INT
>  mips_asan_shadow_offset (void)
>  {
> -  return SUBTARGET_SHADOW_OFFSET;
> +  if (mips_abi == ABI_N32)
> +    return (HOST_WIDE_INT_1 << 29);
> +  if (POINTER_SIZE == 64)
> +    return (HOST_WIDE_INT_1 << 37);
> +  else
> +    return HOST_WIDE_INT_C (0x0aaa);
>  }
> 
>  /* Implement TARGET_STARTING_FRAME_OFFSET.  See
> mips_compute_frame_info
> diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
> index 858bbba3a36..0029864fdcd 100644
> --- a/gcc/config/mips/mips.h
> +++ b/gcc/config/mips/mips.h
> @@ -3463,10 +3463,3 @@ struct GTY(())  machine_function {
>     && !TARGET_MICROMIPS && !TARGET_FIX_24K)
> 
>  #define NEED_INDICATE_EXEC_STACK 0
> -
> -/* Define the shadow offset for asan. Other OS's can override in the
> -   respective tm.h files.  */
> -#ifndef SUBTARGET_SHADOW_OFFSET
> -#define SUBTARGET_SHADOW_OFFSET \
> -  (POINTER_SIZE == 64 ? HOST_WIDE_INT_1 << 37 : HOST_WIDE_INT_C
> (0x0aaa))
> -#endif
> 
> ---

I think this depends on https://reviews.llvm.org/D127096 (not committed
yet)?

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [ping][vect-patterns] Refactor widen_plus/widen_minus as internal_fns

2022-06-07 Thread Richard Sandiford via Gcc-patches

Joel Hutton  writes:
>> > Patches attached. They already incorporated the .cc rename, now
>> > rebased to be after the change to tree.h
>>
>> @@ -1412,8 +1412,7 @@ vect_recog_widen_op_pattern (vec_info *vinfo,
>>2, oprnd, half_type, unprom, vectype);
>>
>>tree var = vect_recog_temp_ssa_var (itype, NULL);
>> -  gimple *pattern_stmt = gimple_build_assign (var, wide_code,
>> - oprnd[0], oprnd[1]);
>> +  gimple *pattern_stmt = gimple_build (var, wide_code, oprnd[0],
>> oprnd[1]);
>>
>>
>> you should be able to do without the new gimple_build overload
>> by using
>>
>>gimple_seq stmts = NULL;
>>gimple_build (&stmts, wide_code, itype, oprnd[0], oprnd[1]);
>>gimple *pattern_stmt = gimple_seq_last_stmt (stmts);
>>
>> because 'gimple_build' is an existing API.
>
> Done.
>
> The gimple_build overload was at the request of Richard Sandiford, I assume 
> removing it is ok with you Richard S?
> From Richard Sandiford:
>> For example, I think we should hide this inside a new:
>>
>>   gimple_build (var, wide_code, oprnd[0], oprnd[1]);
>>
>> that works directly on code_helper, similarly to the new code_helper
>> gimple_build interfaces.

I thought the potential problem with the above is that gimple_build
is a folding interface, so in principle it's allowed to return an
existing SSA_NAME set by an existing statement (or even a constant).
I think in this context we do need to force a new statement to be
created.

Of course, the hope is that there wouldn't still be such folding
opportunities at this stage, but I don't think it's guaranteed
(especially with options fuzzing).

Sind I was mentioned :-) ...

Could you run the patch through contrib/check_GNU_style.py?
There seem to be a few long lines.

> +  if (res_op.code.is_tree_code ())

Do you need this is_tree_code ()?  These comparisons…

> +  {
> +  widen_arith = (code == WIDEN_PLUS_EXPR
> +  || code == WIDEN_MINUS_EXPR
> +  || code == WIDEN_MULT_EXPR
> +  || code == WIDEN_LSHIFT_EXPR);

…ought to be safe unconditionally.

> + }
> +  else
> +  widen_arith = false;
> +
> +  if (!widen_arith
> +  && !CONVERT_EXPR_CODE_P (code)
> +  && code != FIX_TRUNC_EXPR
> +  && code != FLOAT_EXPR)
> +return false;
>  
>/* Check types of lhs and rhs.  */
> -  scalar_dest = gimple_assign_lhs (stmt);
> +  scalar_dest = gimple_get_lhs (stmt);
>lhs_type = TREE_TYPE (scalar_dest);
>vectype_out = STMT_VINFO_VECTYPE (stmt_info);
>  
> @@ -4938,10 +4951,14 @@ vectorizable_conversion (vec_info *vinfo,
>  
>if (op_type == binary_op)
>  {
> -  gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
> -   || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
> +  gcc_assert (code == WIDEN_MULT_EXPR
> +   || code == WIDEN_LSHIFT_EXPR
> +   || code == WIDEN_PLUS_EXPR
> +   || code == WIDEN_MINUS_EXPR);
>  
> -  op1 = gimple_assign_rhs2 (stmt);
> +
> +  op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
> +  gimple_call_arg (stmt, 0);
>tree vectype1_in;
>if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
>  &op1, &slp_op1, &dt[1], &vectype1_in))
> […]
> @@ -12181,7 +12235,6 @@ supportable_widening_operation (vec_info *vinfo,
>return false;
>  }
>  
> -
>  /* Function supportable_narrowing_operation
>  
> Check whether an operation represented by the code CODE is a

Seems like a spurious change.

> @@ -12205,7 +12258,7 @@ supportable_widening_operation (vec_info *vinfo,
>  bool
>  supportable_narrowing_operation (enum tree_code code,
>tree vectype_out, tree vectype_in,
> -  enum tree_code *code1, int *multi_step_cvt,
> +  tree_code* _code1, int *multi_step_cvt,

The original formatting (space before the “*”) was correct.
Names beginning with _ are reserved, so I think we need a different
name here.  Also, the name in the comment should stay in sync with
the name in the code.

That said though, I'm not sure…

>   vec *interm_types)
>  {
>machine_mode vec_mode;
> @@ -12217,8 +12270,8 @@ supportable_narrowing_operation (enum tree_code code,
>tree intermediate_type, prev_type;
>machine_mode intermediate_mode, prev_mode;
>int i;
> -  unsigned HOST_WIDE_INT n_elts;
>bool uns;
> +  tree_code * code1 = (tree_code*) _code1;

…the combination of these two changes makes sense on their own.

>  
>*multi_step_cvt = 0;
>switch (code)
> @@ -12227,9 +12280,8 @@ supportable_narrowing_operation (enum tree_code code,
>c1 = VEC_PACK_TRUNC_EXPR;
>if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
> && VECTOR_BOOLEAN_TYPE_P (vectype)
> -   && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
> -   && TYPE_VECTOR_SUBPARTS (

[committed] openmp: Add support for OpenMP 5.2 linear clause syntax for C/C++

2022-06-07 Thread Jakub Jelinek via Gcc-patches

Hi!

The syntax for linear clause changed in 5.2, the original syntax
which is still valid is:
linear (var1, var2)
linear (var3, var4 : step1)
The 4.5 syntax with modifiers like:
linear (val (var5, var6))
linear (val (var7, var8) : step2)
is still supported in 5.2, but is deprecated there.
Instead, one can use a new syntax:
linear (var9, var10 : val)
linear (var11, var12 : step (step3), val)
As val, ref, uval or step (someexpr) can be valid expressions (and especially
in C++ can be const / constexpr / consteval), the spec says that
when the whole step expression is val (or ref or uval) or step ( ... )
then it is the new modifier syntax, one can use + 0 or 0 + or 1 * or * 1
or ()s to say it is the old step expression.
Also, 5.2 now allows val modifier to be specified even outside of declare simd
(but not the other modifiers).  I've implemented this for the new modifier
syntax only, the old one keeps the old restriction (which is why
OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER flag has been introduced).

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2022-06-07  Jakub Jelinek  

gcc/
* tree.h (OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER): Define.
* tree-pretty-print.cc (dump_omp_clause) :
Adjust clause printing style depending on
OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER.
gcc/c/
* c-parser.cc (c_parser_omp_clause_linear): Parse OpenMP 5.2
style linear clause modifiers.  Set
OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER flag on the clauses when
old style modifiers are used.
* c-typeck.cc (c_finish_omp_clauses): Only reject linear clause
with val modifier on simd or for if the old style modifiers are
used.
gcc/cp/
* parser.cc (cp_parser_omp_clause_linear): Parse OpenMP 5.2
style linear clause modifiers.  Set
OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER flag on the clauses when
old style modifiers are used.
* semantics.cc (finish_omp_clauses): Only reject linear clause
with val modifier on simd or for if the old style modifiers are
used.
gcc/fortran/
* trans-openmp.cc (gfc_trans_omp_clauses): Set
OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER on OMP_CLAUSE_LINEAR
clauses unconditionally for now.
gcc/testsuite/
* c-c++-common/gomp/linear-2.c: New test.
* c-c++-common/gomp/linear-3.c: New test.
* g++.dg/gomp/linear-3.C: New test.
* g++.dg/gomp/linear-4.C: New test.
* g++.dg/gomp/linear-5.C: New test.

--- gcc/tree.h.jj   2022-05-30 14:07:02.261303633 +0200
+++ gcc/tree.h  2022-06-06 15:28:02.413728905 +0200
@@ -1841,6 +1841,11 @@ class auto_suppress_location_wrappers
 #define OMP_CLAUSE_LINEAR_VARIABLE_STRIDE(NODE) \
   TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR))
 
+/* True for a LINEAR clause with old style modifier syntax
+   linear(modifier(list)) or linear(modifier(list):step).  */
+#define OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER(NODE) \
+  (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR)->base.addressable_flag)
+
 /* True if a LINEAR clause is for an array or allocatable variable that
needs special handling by the frontend.  */
 #define OMP_CLAUSE_LINEAR_ARRAY(NODE) \
--- gcc/tree-pretty-print.cc.jj 2022-05-30 14:07:02.223304039 +0200
+++ gcc/tree-pretty-print.cc2022-06-06 12:52:18.774204323 +0200
@@ -707,29 +707,50 @@ dump_omp_clause (pretty_printer *pp, tre
 
 case OMP_CLAUSE_LINEAR:
   pp_string (pp, "linear(");
-  switch (OMP_CLAUSE_LINEAR_KIND (clause))
-   {
-   case OMP_CLAUSE_LINEAR_DEFAULT:
- break;
-   case OMP_CLAUSE_LINEAR_REF:
- pp_string (pp, "ref(");
- break;
-   case OMP_CLAUSE_LINEAR_VAL:
- pp_string (pp, "val(");
- break;
-   case OMP_CLAUSE_LINEAR_UVAL:
- pp_string (pp, "uval(");
- break;
-   default:
- gcc_unreachable ();
-   }
+  if (OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER (clause))
+   switch (OMP_CLAUSE_LINEAR_KIND (clause))
+ {
+ case OMP_CLAUSE_LINEAR_DEFAULT:
+   break;
+ case OMP_CLAUSE_LINEAR_REF:
+   pp_string (pp, "ref(");
+   break;
+ case OMP_CLAUSE_LINEAR_VAL:
+   pp_string (pp, "val(");
+   break;
+ case OMP_CLAUSE_LINEAR_UVAL:
+   pp_string (pp, "uval(");
+   break;
+ default:
+   gcc_unreachable ();
+ }
   dump_generic_node (pp, OMP_CLAUSE_DECL (clause),
 spc, flags, false);
-  if (OMP_CLAUSE_LINEAR_KIND (clause) != OMP_CLAUSE_LINEAR_DEFAULT)
+  if (OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER (clause)
+ && OMP_CLAUSE_LINEAR_KIND (clause) != OMP_CLAUSE_LINEAR_DEFAULT)
pp_right_paren (pp);
   pp_colon (pp);
+  if (!OMP_CLAUSE_LINEAR_OLD_LINEAR_MODIFIER (clause)
+ && OMP_CLAUSE_LINEAR_KIND (clause) != OMP_CLAUSE_LINEAR_DEFAULT)
+   switch (OMP_CLAUSE_LINEA

Re: [PATCH] Mips: Enable asynchronous unwind tables with both ASAN and TSAN

2022-06-07 Thread Xi Ruoyao via Gcc-patches

On Mon, 2022-05-30 at 07:10 +, Dimitrije Milosevic wrote:
> Hi Xi, thanks for pointing this out. I'd definitely say that the
> https://clang.llvm.org/docs/ThreadSanitizer.html documentation is
> outdated. According
> tohttps://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#s
> upported-platforms TSAN is supported on Mips64. Furthermore, there are
> actual code segments (in compiler-rt/lib/tsan/rtl/tsan_platforms.h,
> for example) related to Mips64.
> I didn't add the 64-bit target check, however. Here is the updated
> version of the patch.

Well, so should we add TSAN_SUPPORTED=yes for MIPS64 in
libsanitizer/configure.tgt first?  I'll try this on my MIPS64 in a few
days.


-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

RE: [ping][vect-patterns] Refactor widen_plus/widen_minus as internal_fns

2022-06-07 Thread Joel Hutton via Gcc-patches

Thanks Richard,

> I thought the potential problem with the above is that gimple_build is a
> folding interface, so in principle it's allowed to return an existing SSA_NAME
> set by an existing statement (or even a constant).
> I think in this context we do need to force a new statement to be created.

Before I make any changes, I'd like to check we're all on the same page.

richi, are you ok with the gimple_build function, perhaps with a different name 
if you are concerned with overloading? we could use gimple_ch_build or 
gimple_code_helper_build?

Similarly are you ok with the use of gimple_extract_op? I would lean towards 
using it as it is cleaner, but I don't have strong feelings.

Joel

> -Original Message-
> From: Richard Sandiford 
> Sent: 07 June 2022 09:18
> To: Joel Hutton 
> Cc: Richard Biener ; gcc-patches@gcc.gnu.org
> Subject: Re: [ping][vect-patterns] Refactor widen_plus/widen_minus as
> internal_fns
> 
> Joel Hutton  writes:
> >> > Patches attached. They already incorporated the .cc rename, now
> >> > rebased to be after the change to tree.h
> >>
> >> @@ -1412,8 +1412,7 @@ vect_recog_widen_op_pattern (vec_info *vinfo,
> >>2, oprnd, half_type, unprom, vectype);
> >>
> >>tree var = vect_recog_temp_ssa_var (itype, NULL);
> >> -  gimple *pattern_stmt = gimple_build_assign (var, wide_code,
> >> - oprnd[0], oprnd[1]);
> >> +  gimple *pattern_stmt = gimple_build (var, wide_code, oprnd[0],
> >> oprnd[1]);
> >>
> >>
> >> you should be able to do without the new gimple_build overload by
> >> using
> >>
> >>gimple_seq stmts = NULL;
> >>gimple_build (&stmts, wide_code, itype, oprnd[0], oprnd[1]);
> >>gimple *pattern_stmt = gimple_seq_last_stmt (stmts);
> >>
> >> because 'gimple_build' is an existing API.
> >
> > Done.
> >
> > The gimple_build overload was at the request of Richard Sandiford, I
> assume removing it is ok with you Richard S?
> > From Richard Sandiford:
> >> For example, I think we should hide this inside a new:
> >>
> >>   gimple_build (var, wide_code, oprnd[0], oprnd[1]);
> >>
> >> that works directly on code_helper, similarly to the new code_helper
> >> gimple_build interfaces.
> 
> I thought the potential problem with the above is that gimple_build is a
> folding interface, so in principle it's allowed to return an existing SSA_NAME
> set by an existing statement (or even a constant).
> I think in this context we do need to force a new statement to be created.
> 
> Of course, the hope is that there wouldn't still be such folding opportunities
> at this stage, but I don't think it's guaranteed (especially with options
> fuzzing).
> 
> Sind I was mentioned :-) ...
> 
> Could you run the patch through contrib/check_GNU_style.py?
> There seem to be a few long lines.
> 
> > +  if (res_op.code.is_tree_code ())
> 
> Do you need this is_tree_code ()?  These comparisons…
> 
> > +  {
> > +  widen_arith = (code == WIDEN_PLUS_EXPR
> > +|| code == WIDEN_MINUS_EXPR
> > +|| code == WIDEN_MULT_EXPR
> > +|| code == WIDEN_LSHIFT_EXPR);
> 
> …ought to be safe unconditionally.
> 
> > + }
> > +  else
> > +  widen_arith = false;
> > +
> > +  if (!widen_arith
> > +  && !CONVERT_EXPR_CODE_P (code)
> > +  && code != FIX_TRUNC_EXPR
> > +  && code != FLOAT_EXPR)
> > +return false;
> >
> >/* Check types of lhs and rhs.  */
> > -  scalar_dest = gimple_assign_lhs (stmt);
> > +  scalar_dest = gimple_get_lhs (stmt);
> >lhs_type = TREE_TYPE (scalar_dest);
> >vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> >
> > @@ -4938,10 +4951,14 @@ vectorizable_conversion (vec_info *vinfo,
> >
> >if (op_type == binary_op)
> >  {
> > -  gcc_assert (code == WIDEN_MULT_EXPR || code ==
> WIDEN_LSHIFT_EXPR
> > - || code == WIDEN_PLUS_EXPR || code ==
> WIDEN_MINUS_EXPR);
> > +  gcc_assert (code == WIDEN_MULT_EXPR
> > + || code == WIDEN_LSHIFT_EXPR
> > + || code == WIDEN_PLUS_EXPR
> > + || code == WIDEN_MINUS_EXPR);
> >
> > -  op1 = gimple_assign_rhs2 (stmt);
> > +
> > +  op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
> > +gimple_call_arg (stmt, 0);
> >tree vectype1_in;
> >if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
> >&op1, &slp_op1, &dt[1], &vectype1_in)) […] @@
> -12181,7
> > +12235,6 @@ supportable_widening_operation (vec_info *vinfo,
> >return false;
> >  }
> >
> > -
> >  /* Function supportable_narrowing_operation
> >
> > Check whether an operation represented by the code CODE is a
> 
> Seems like a spurious change.
> 
> > @@ -12205,7 +12258,7 @@ supportable_widening_operation (vec_info
> > *vinfo,  bool  supportable_narrowing_operation (enum tree_code code,
> >  tree vectype_out, tree vectype_in,
> > -enum tree_code *code1

Re: [PATCH] Fix insn does not satisfy its constraints: sse2_lshrv1ti3

2022-06-07 Thread Uros Bizjak via Gcc-patches

On Tue, Jun 7, 2022 at 6:56 AM liuhongt via Gcc-patches
 wrote:
>
> 21114(define_insn_and_split "ssse3_palignrdi"
> 21115  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
> 21116(unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
> 21117(match_operand:DI 2 "register_mmxmem_operand" 
> "ym,x,Yv")
> 21118(match_operand:SI 3 "const_0_to_255_mul_8_operand")]
> 21119   UNSPEC_PALIGNR))]
> 21120  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
>
> Alternative 2 requires Yw instead of Yv since it's splitted to vpsrldq
> which requires AVX512VL & AVX512BW for evex version.
>
> Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
> Ready to push to trunk.
>
> gcc/ChangeLog:
>
> PR target/105854
> * config/i386/sse.md (ssse3_palignrdi): Change alternative 2
> from Yv to Yw.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr105854.c: New test.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/sse.md   |  6 ++---
>  gcc/testsuite/gcc.target/i386/pr105854.c | 32 
>  2 files changed, 35 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105854.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 62688f8e29d..200308445db 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -21123,9 +21123,9 @@ (define_insn "_palignr"
> (set_attr "mode" "")])
>
>  (define_insn_and_split "ssse3_palignrdi"
> -  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
> -   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
> -   (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
> +  [(set (match_operand:DI 0 "register_operand" "=y,x,Yw")
> +   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yw")
> +   (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yw")
> (match_operand:SI 3 "const_0_to_255_mul_8_operand")]
>UNSPEC_PALIGNR))]
>"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
> diff --git a/gcc/testsuite/gcc.target/i386/pr105854.c 
> b/gcc/testsuite/gcc.target/i386/pr105854.c
> new file mode 100644
> index 000..28abef67915
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105854.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fcaller-saves -mavx512vl -mno-avx512bw" } */
> +
> +typedef int __attribute__((__vector_size__ (8))) T;
> +typedef signed char __attribute__((__vector_size__ (64))) U;
> +typedef int __attribute__((__vector_size__ (16))) V;
> +typedef long long __attribute__((__vector_size__ (8))) W;
> +typedef int __attribute__((__vector_size__ (64))) X;
> +typedef _Decimal128 __attribute__((__vector_size__ (64))) D;
> +
> +D d;
> +T t;
> +U u;
> +V v;
> +W w;
> +
> +void
> +foo (void)
> +{
> +  T t0 = t;
> +  T t1 = (T) __builtin_ia32_palignr (w, (W) { }, 0);
> +  U u1 = __builtin_shufflevector (u, u, 7, 6, 2, 3, 6, 4, 5, 2, 3, 8, 3, 2, 
> 0,
> + 4, 0, 6, 2, 2, 5, 3, 1, 0, 7, 5, 3, 3, 7, 6,
> + 2, 0, 4, 5, 4, 1, 7, 7, 0, 6, 1, 9, 3, 0, 3,
> + 5, 5, 0, 0, 2, 1, 5, 4, 8, 7,
> + 2, 1, 1, 6, 4, 9, 9, 1, 5, 0, 2);
> +  V v1 = v;
> +  d += 0.;
> +  U u0 = u + u + u1 + (U) d;
> +  V v0 = ((X)u0)[0] + v + v;
> +  t = (T) (long) (__int128) v0 + t + t + t1;
> +}
> --
> 2.18.1
>

[PATCH, CFE] N2863: Improved Rules for Tag Compatibility

2022-06-07 Thread Martin Uecker



Hello Joseph and all,


here is a preliminary patch the implements the proposed
tag compatibility rules for C23 in GCC (N2863). It works
by tweaking the diagnostics in the FE and by recording
structs/union types to be able to set TYPE_CANONICAL to
a canonical definition (as per previous discussions).

 
Overall, this seems to work very well when testing
on my own projects. There are still some issues
left that I want to point out:

- at the moment, all struct/union types are collected
in a vector. This needs to be replaced by a hash table.

- the feature has a flag (-ftag-compat) which is now turned
on by default in all language modes to facilitate testing
and to identify backwards compatibility problems. Turned on,
it survives bootstrapping and regression testing with
only a few cases that test for diagnostics that go
away changed to turn it off.

- The new rules are not applied to structs with variable
sized members (which are a GNU extension).

- In contrast to the published proposal, structs without
tags are now treated as incompatible as requested by WG14.

- There is still one assertion in ipa-free-lang-data I had
to conditionally turn off and did not have time to 
investigate.  Otherwise, there are only C FE changes.
LTO may still need some more testing.

- It fixes some bugs in (formerly) unused FE code
and removes some other dead code. This could be
moved into its own patch.

- If adopted into C, I assume we need some
compatibility warnings. From testing, I could
not identify any backwards compatibility problems.

- There are certainly some issues I may have
overlooked.


Martin


gcc/
* c-family/c.opt: Add -ftag-compat flag.
* c/c-decl.cc (pop_scope): Remove dead code. 
(diagnose_mismatched_decls): Support for 
new tag compatibility rules.
(start_struct): Dito.
(finish_struct): Dito.
(start_enum): Dito.
(finish_enum): Dito.
(build_enumerator): Pass enumtype to build_decl.
(c_simulate_enum_decl): Pass enumtype to 
build_enumerator.
* c/c-parser.cc (c_parser_enum_specifier): Dito.
* c/c-tree.h (build_enumerator): Add enumtype 
argument.
* c/c-typeck.cc (comptypes_internal): Support
for new tag compatibility rules.
(same_translation_unit_p): Removed.
(tagged_types_tu_compatible_p): Bug fixes and
support for new tag compatibility rules.
(convert_for_assignment): Support for new tag
compatibility rules
(digest_init): Dito.
* ipa-free-lang-data.cc (fld_incomplete_type_of):
Conditionally turn of assertion related to
TYPE_CANONICAL if -ftag-compat is on.
doc/
* invoke.texi: Document -ftag-compat flag.
testsuite/
* gcc.dg/asan/pr81460.c: Add -fno-tag-compat.
* gcc.dg/c99-tag-1.c: Add -fno-tag-compat.
* gcc.dg/c99-tag-2.c: Add -fno-tag-compat. 
* gcc.dg/decl-3.c: Add -fno-tag-compat.
* gcc.dg/enum-redef-1.c: Add -fno-tag-compat.
* gcc.dg/parm-incomplete-1.c: Add -fno-tag-compat.
* gcc.dg/pr17188-1.c: Add -fno-tag-compat.
* gcc.dg/pr18809-1.c: Add -pedantic-errors and
-fno-tag-compat.
* gcc.dg/pr27953.c: Add -fno-tag-compat.
* gcc.dg/pr39084.c: Add -fno-tag-compat.
* gcc.dg/pr68533.c: Add -fno-tag-compat.
* gcc.dg/pr79983.c: Add -fno-tag-compat.
* gcc.dg/pr89211.c: Add -fno-tag-compat.
* gcc.dg/tag-compat.c: New test.
* gcc.dg/tag-compat10.c: New test.
* gcc.dg/tag-compat11.c: New test.
* gcc.dg/tag-compat12.c: New test.
* gcc.dg/tag-compat2.c: New test.
* gcc.dg/tag-compat3.c: New test.
* gcc.dg/tag-compat4.c: New test.
* gcc.dg/tag-compat5.c: New test.
* gcc.dg/tag-compat6.c: New test.
* gcc.dg/tag-compat7.c: New test.
* gcc.dg/tag-compat8.c: New test.
* gcc.dg/tag-compat9.c: New test.
* gcc.dg/vla-11.c: Add -fno-tag-compat.
* gcc.dg/vla-stexp-2.c: Add -fno-tag-compat.



diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 41a20bc625e..cd3164018f2 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2108,6 +2108,9 @@ Enum(strong_eval_order) String(some) Value(1)
 EnumValue
 Enum(strong_eval_order) String(all) Value(2)
 
+ftag-compat
+C Var(flag_tag_compat) Init(1)
+
 ftemplate-backtrace-limit=
 C++ ObjC++ Joined RejectNegative UInteger
Var(template_backtrace_limit) Init(10)
 Set the maximum number of template instantiation notes for a single
warning or error.
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 5266a61b859..df208a310f9 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -599,6 +599,10 @@ public:
   auto_vec typedefs_seen;
 };
 
+
+/* All tagged typed so that TYPE_CANONICAL can be set correctly.  */
+static auto_vec all_structs;
+
 /* Information for the struct or union currently being parsed, or
NULL if not parsing a struct or union.  */
 static class c_struct_parse_info *struct_parse_info;
@@ -1354,8 +1358,8 @@ pop_scope (void)
  BLOCK_VARS (block) = extp;
}
  /* If this is the file scope set DECL_CONTEXT of each decl
to
-the TRANSLATION_UNIT_DECL.  This makes
same_translation_unit_p
-work.  */
+the TRANSLATION_UNIT_DECL.  */
+
  if (scope == file_scope)
{
  DECL_CON

Re: [patch] libgompd: Add thread handles

2022-06-07 Thread Jakub Jelinek via Gcc-patches

On Tue, Jun 07, 2022 at 12:21:25AM +0200, Ahmed Sayed Mousse via Gcc-patches 
wrote:
> This patch is the initial implementation of OpenMP-API specs book section
> 20.5.5 with title "Thread Handles"
> 
> libgomp/ChangeLog
> 
> 2022-05-06 Ahmed Sayed 

Two spaces should separate the date and name and name and email.
> 
> * Makefile.am (libgompd_la_SOURCES): Add ompd-threads.c.
> 
> * Makefile.in: Regenerate.
> 

No empty lines in between (and all ChangeLog lines start with a tab (I
assume your mailer ate that).

> * ompd-support.h ( gompd_thread_initial_tls_bias ): New Variable.

No spaces after ( or before )

> * ompd-support.c ( gompd_thread_initial_tls_bias ): New Variable.
> 
> ( gompd_load ): ( gompd_thread_initial_tls_bias ): Initialized with
> &gomp_tls_data - pthread_self ().

It is just gompd_load you are changing, so it should be:
(gompd_load): Initialize gompd_thread_initial_tls_bias.
or so.

> --- a/libgomp/ompd-support.c
> +++ b/libgomp/ompd-support.c
> @@ -36,6 +36,10 @@
>  const char **ompd_dll_locations = NULL;
>  __UINT64_TYPE__ gompd_state;
> 
> +#if (defined HAVE_TLS || defined USE_EMUTLS)
> +__UINT64_TYPE__ gompd_thread_initial_tls_bias;

In reality it isn't these conditions, but
#ifdef GOMP_NEEDS_THREAD_HANDLE that determines if there is
a TLS bias possible.
But the point of those gompd_sizeof* and gompd_access* vars
was to make libgompd slightly more independent from the exact
libgomp version, otherwise one could just use sizeof and offsetof
values directly in libgompd.
So, even using similar ifdefs on the libgompd side looks wrong,
the var should be there unconditionally and just use some special
value (e.g. -1 which isn't a possible TLS bias because the
struct has some alignment requirements) to say that the TLS bias
can't be used and one needs to use struct gomp_thread's handle
member instead.

Also, as I mentioned yesterday, using __UINT64_TYPE__ for everything
is very vasteful, use the right type for each information.
As for TLS bias, in reality it will be up to +- a few hundreds of bytes,
worst case kilobytes, but in theory it could be on 64-bit targets even
larger than 4GB, but on 32-bit arches it can't, so size_t would
be the right type.  Except I think the interfaces don't cover size_t size,
but long would be a usable replacement (not the same thing size-wise on
Windows, but Windows will always GOMP_NEEDS_THREAD_HANDLE).

> +#endif
> +
>  void
>  gompd_load (void)
>  {
> @@ -61,7 +65,11 @@ gompd_load (void)
>= (__UINT64_TYPE__) & (((struct gomp_thread *) NULL)->handle);
>  __UINT64_TYPE__ gompd_sizeof_gomp_thread_handle
>= sizeof (((struct gomp_thread *) NULL)->handle);

There is a preexisting bug above:
  #ifdef GOMP_NEEDS_THREAD_HANDLE
__UINT64_TYPE__ gompd_access_gomp_thread_handle
  = (__UINT64_TYPE__) & (((struct gomp_thread *) NULL)->handle);
__UINT64_TYPE__ gompd_sizeof_gomp_thread_handle
  = sizeof (((struct gomp_thread *) NULL)->handle);
just defines automatic variables in the function and sets them to
those values.  They need to be global vars, ideally const
initialized at file scope.  But, as the field is sometimes present
and sometimes it is not, I think best would be to initialize
it to offsetof/sizeof #ifdef GOMP_NEEDS_THREAD_HANDLE and
otherwise to 0 and 0.
Then we even don't need a magic value or when TLS bias can't be used
and instead always GET_VALUE of gompd_sizeof_gomp_thread_handle,
if it is 0, then use TLS bias, otherwise load the handle.

Again, comment more about the already committed patch now, besides
trying to shrink the values from __UINT64_TYPE__ to probably short int
and making them const and initialized at file scope initializers and
using offsetof, there is a big question when do we expect OMPD to work.
Seems the gompd_{sizeof,access}* symbols aren't exported from the
library, so they are present (say on ELF) just in .symtab/.strtab
sections and debug info.  Those sections can be stripped or stripped to
file, so that would mean OMPD would work only if the libgomp.so.1 library
is not stripped or has separate debug info installed.
Also, if one builds the library with LTO, I think the linker with the
compiler will happily remove all those symbols, as nothing uses them.
To fix this latter thing, one can just add __attribute__((used)) to
all those vars.
But if we want to make those work somehow even without debug info
and .symtab/.strtab sections around, I think we want to force the
symbols into .dynsym/.dynstr too (i.e. export in libgomp.map).
Exporting dozens of such symbols would be quite costly though.
So if we go that route, I think it would be best if we had just
1-2 of such variables with data for libgompd (probably 2 where
one is const and can be in .rodata and the other for vars that might need
changing).  As most if not all of the const data can be represented in
unsigned short, I think it should be an array of const unsigned short,
with macros that say what each element means and those macros we'd

Re: [PATCH V2] RISC-V:Fix a bug that is the CMO builtins are missing parameter

2022-06-07 Thread Kito Cheng via Gcc-patches

On Tue, Jun 7, 2022 at 4:04 PM  wrote:
>
> From: yulong 
>
> We changed builtins format about zicbom and zicboz subextensions and added 
> test cases.
> diff with the previous version:
> 1.We deleted the RLT mode's second input operand.
> 2.We modified the type of builtins from RISCV_BUILTIN_DIRECT to 
> RISCV_BUILTIN_DIRECT_NO_TARGET.
> 3.We modified the test cases and added more parameter tests.
>
> Thanks, Simon and Kito.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-cmo.def (RISCV_BUILTIN): changed BUILTIN_TYPE 
> and FUNCTION_TYPE
> * config/riscv/riscv-ftypes.def (0): changed "DEF_RISCV_FTYPE (0, 
> (SI/DI))" to "DEF_RISCV_FTYPE (1, (VOID, SI/DI))"
> (1):
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/cmo-zicbom-1.c: added parameter and modified the 
> fun's type
> * gcc.target/riscv/cmo-zicbom-2.c: added parameter and modified the 
> fun's type
> * gcc.target/riscv/cmo-zicboz-1.c: added parameter and modified the 
> fun's type
> * gcc.target/riscv/cmo-zicboz-2.c: added parameter and modified the 
> fun's type
>
> ---
>  gcc/config/riscv/riscv-cmo.def| 16 ++--
>  gcc/config/riscv/riscv-ftypes.def |  4 +--
>  gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c | 25 ---
>  gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c | 25 ---
>  gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c |  9 ---
>  gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c |  9 ---
>  6 files changed, 54 insertions(+), 34 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-cmo.def b/gcc/config/riscv/riscv-cmo.def
> index b30ecf96ec1..7b3da7c24ef 100644
> --- a/gcc/config/riscv/riscv-cmo.def
> +++ b/gcc/config/riscv/riscv-cmo.def
> @@ -1,16 +1,16 @@
>  // zicbom
> -RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, clean32),
> -RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, clean64),
> +RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_SI, clean32),
> +RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_DI, clean64),

Seems like that could be void * type rather than SI or DI here

RISCV_VOID_FTYPE_SI -> RISCV_VOID_FTYPE_VOID_PTR
RISCV_VOID_FTYPE_DI -> RISCV_VOID_FTYPE_VOID_PTR

And add RISCV_ATYPE_VOID_PTR here

diff --git a/gcc/config/riscv/riscv-builtins.cc
b/gcc/config/riscv/riscv-builtins.cc
index 795132a0c16..1218fdfc67d 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -133,6 +133,7 @@ AVAIL (prefetchi64, TARGET_ZICBOP && TARGET_64BIT)
#define RISCV_ATYPE_USI unsigned_intSI_type_node
#define RISCV_ATYPE_SI intSI_type_node
#define RISCV_ATYPE_DI intDI_type_node
+#define RISCV_ATYPE_VOID_PTR ptr_type_node

/* RISCV_FTYPE_ATYPESN takes N RISCV_FTYPES-like type codes and lists
   their associated RISCV_ATYPEs.  */


And function type here.

diff --git a/gcc/config/riscv/riscv-ftypes.def
b/gcc/config/riscv/riscv-ftypes.def
index 77ac6ea3f66..7c40ac89a4a 100644
--- a/gcc/config/riscv/riscv-ftypes.def
+++ b/gcc/config/riscv/riscv-ftypes.def
@@ -30,5 +30,6 @@ DEF_RISCV_FTYPE (0, (USI))
DEF_RISCV_FTYPE (1, (VOID, USI))
DEF_RISCV_FTYPE (1, (VOID, SI))
DEF_RISCV_FTYPE (1, (VOID, DI))
+DEF_RISCV_FTYPE (1, (VOID, VOID_PTR))
DEF_RISCV_FTYPE (1, (SI, SI))
DEF_RISCV_FTYPE (1, (DI, DI))



>
> -RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, flush32),
> -RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, flush64),
> +RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_SI, flush32),
> +RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_DI, flush64),
>
> -RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, inval32),
> -RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, inval64),
> +RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_SI, inval32),
> +RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_DI, inval64),
>
>  // zicboz
> -RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE, zero32),
> -RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
> RISCV_DI_FTYPE, zero64),
> +RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_SI, zero32),
> +RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT_NO_TARGET, 
> RISCV_VOID_FTYPE_DI, zero64),
>
>  // zicbop
>  RISCV_BUILTIN (prefetchi_si, "zicbop_cbo_prefetchi", RISCV_BUILTIN_DIRECT, 
> RISCV_SI_FTYPE_SI, prefetchi32),
> diff --git a/gcc/config/riscv/riscv-ftypes.def 
> b/gcc/config/riscv/riscv-ftypes.def
> index 62421292ce7..77ac6ea3f66 100644
> --- a/gcc/config/ris

Re: [PATCH] Mips: Fix the ASAN shadow offset hook for the n32 ABI

2022-06-07 Thread Dimitrije Milosevic

Correct, it should be committed very soon.

From: Xi Ruoyao 
Sent: Tuesday, June 7, 2022 10:17 AM
To: Dimitrije Milosevic ; 
gcc-patches@gcc.gnu.org 
Cc: Djordje Todorovic 
Subject: Re: [PATCH] Mips: Fix the ASAN shadow offset hook for the n32 ABI

On Mon, 2022-06-06 at 09:28 +, Dimitrije Milosevic wrote:
> Fix the ASAN shadow offset hook for the n32 ABI.
>
> gcc/ChangeLog:
>
> * config/mips/mips.cc (mips_asan_shadow_offset): Reformat
> to handle the N32 ABI.
> * config/mips/mips.h (SUBTARGET_SHADOW_OFFSET): Remove
> the macro, as it is not needed anymore.
>
> ---
>
>  gcc/config/mips/mips.cc | 7 ++-
>  gcc/config/mips/mips.h  | 7 ---
>  2 files changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
> index 2dce4007678..91e651c458e 100644
> --- a/gcc/config/mips/mips.cc
> +++ b/gcc/config/mips/mips.cc
> @@ -22745,7 +22745,12 @@ mips_constant_alignment (const_tree exp,
> HOST_WIDE_INT align)
>  static unsigned HOST_WIDE_INT
>  mips_asan_shadow_offset (void)
>  {
> -  return SUBTARGET_SHADOW_OFFSET;
> +  if (mips_abi == ABI_N32)
> +return (HOST_WIDE_INT_1 << 29);
> +  if (POINTER_SIZE == 64)
> +return (HOST_WIDE_INT_1 << 37);
> +  else
> +return HOST_WIDE_INT_C (0x0aaa);
>  }
>
>  /* Implement TARGET_STARTING_FRAME_OFFSET.  See
> mips_compute_frame_info
> diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
> index 858bbba3a36..0029864fdcd 100644
> --- a/gcc/config/mips/mips.h
> +++ b/gcc/config/mips/mips.h
> @@ -3463,10 +3463,3 @@ struct GTY(())  machine_function {
> && !TARGET_MICROMIPS && !TARGET_FIX_24K)
>
>  #define NEED_INDICATE_EXEC_STACK 0
> -
> -/* Define the shadow offset for asan. Other OS's can override in the
> -   respective tm.h files.  */
> -#ifndef SUBTARGET_SHADOW_OFFSET
> -#define SUBTARGET_SHADOW_OFFSET \
> -  (POINTER_SIZE == 64 ? HOST_WIDE_INT_1 << 37 : HOST_WIDE_INT_C
> (0x0aaa))
> -#endif
>
> ---

I think this depends on https://reviews.llvm.org/D127096 (not committed
yet)?

--
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH] Mips: Enable asynchronous unwind tables with both ASAN and TSAN

2022-06-07 Thread Dimitrije Milosevic

Definitely, a patch is on the way.

From: Xi Ruoyao 
Sent: Tuesday, June 7, 2022 10:20 AM
To: Dimitrije Milosevic ; 
gcc-patches@gcc.gnu.org 
Cc: Djordje Todorovic 
Subject: Re: [PATCH] Mips: Enable asynchronous unwind tables with both ASAN and 
TSAN

On Mon, 2022-05-30 at 07:10 +, Dimitrije Milosevic wrote:
> Hi Xi, thanks for pointing this out. I'd definitely say that the
> https://clang.llvm.org/docs/ThreadSanitizer.html documentation is
> outdated. According
> tohttps://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#s
> upported-platforms TSAN is supported on Mips64. Furthermore, there are
> actual code segments (in compiler-rt/lib/tsan/rtl/tsan_platforms.h,
> for example) related to Mips64.
> I didn't add the 64-bit target check, however. Here is the updated
> version of the patch.

Well, so should we add TSAN_SUPPORTED=yes for MIPS64 in
libsanitizer/configure.tgt first?  I'll try this on my MIPS64 in a few
days.


--
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

Re: [PATCH v1 3/3] RISC-V: Replace zero_extendsidi2_shifted with generalized split

2022-06-07 Thread Kito Cheng via Gcc-patches

On Wed, May 25, 2022 at 5:47 AM Philipp Tomsich
 wrote:
>
> The current method of treating shifts of extended values on RISC-V
> frequently causes sequences of 3 shifts, despite the presence of the
> 'zero_extendsidi2_shifted' pattern.
>
> Consider:
> unsigned long f(unsigned int a, unsigned long b)
> {
> a = a << 1;
> unsigned long c = (unsigned long) a;
> c = b + (c<<4);
> return c;
> }
> which will present at combine-time as:
> Trying 7, 8 -> 9:
> 7: r78:SI=r81:DI#0<<0x1
>   REG_DEAD r81:DI
> 8: r79:DI=zero_extend(r78:SI)
>   REG_DEAD r78:SI
> 9: r72:DI=r79:DI<<0x4
>   REG_DEAD r79:DI
> Failed to match this instruction:
> (set (reg:DI 72 [ _1 ])
> (and:DI (ashift:DI (reg:DI 81)
> (const_int 5 [0x5]))
> (const_int 68719476704 [0xfffe0])))
> and produce the following (optimized) assembly:
> f:
> slliw   a5,a0,1
> sllia5,a5,32
> srlia5,a5,28
> add a0,a5,a1
> ret
>
> The current way of handling this (in 'zero_extendsidi2_shifted')
> doesn't apply for two reasons:
> - this is seen before reload, and
> - (more importantly) the constant mask is not 0xul.
>
> To address this, we introduce a generalized version of shifting
> zero-extended values that supports any mask of consecutive ones as
> long as the number of training zeros is the inner shift-amount.
>
> With this new split, we generate the following assembly for the
> aforementioned function:
> f:
> sllia0,a0,33
> srlia0,a0,28
> add a0,a0,a1
> ret
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.md (zero_extendsidi2_shifted): Replace
>   with a generalized split that requires no clobber, runs
>   before reload and works for smaller masks.
>
> Signed-off-by: Philipp Tomsich 
> ---
>
>  gcc/config/riscv/riscv.md | 37 -
>  1 file changed, 20 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index b8ab0cf169a..cc10cd90a74 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -2119,23 +2119,26 @@ (define_split
>  ;; occur when unsigned int is used for array indexing.  Split this into two
>  ;; shifts.  Otherwise we can get 3 shifts.
>
> -(define_insn_and_split "zero_extendsidi2_shifted"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> -   (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
> -  (match_operand:QI 2 "immediate_operand" "I"))
> -   (match_operand 3 "immediate_operand" "")))
> -   (clobber (match_scratch:DI 4 "=&r"))]
> -  "TARGET_64BIT && !TARGET_ZBA
> -   && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0x)"
> -  "#"
> -  "&& reload_completed"
> -  [(set (match_dup 4)
> -   (ashift:DI (match_dup 1) (const_int 32)))
> -   (set (match_dup 0)
> -   (lshiftrt:DI (match_dup 4) (match_dup 5)))]
> -  "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
> -  [(set_attr "type" "shift")
> -   (set_attr "mode" "DI")])
> +(define_split
> +  [(set (match_operand:DI 0 "register_operand")
> +   (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
> +  (match_operand:QI 2 "immediate_operand"))
> +   (match_operand:DI 3 "consecutive_bits_operand")))]
> +  "TARGET_64BIT"
> +  [(set (match_dup 0) (ashift:DI (match_dup 1) (match_dup 4)))
> +   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (match_dup 5)))]

I would prefer to keep using another register if possible:

like this:
+  [(set (match_dup 6) (ashift:DI (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 6) (match_dup 5)))]

if (can_create_pseudo_p)
  operands[6] = gen_reg_rtx (DImode);
else
  operands[6] = operands[0];


> +{
> +   unsigned HOST_WIDE_INT mask = UINTVAL (operands[3]);
> +   int leading = clz_hwi (mask);
> +   int trailing = ctz_hwi (mask);
> +
> +   /* The shift-amount must match the number of trailing bits */
> +   if (trailing != UINTVAL (operands[2]))
> +  FAIL;
> +
> +   operands[4] = GEN_INT (leading + trailing);
> +   operands[5] = GEN_INT (leading);
> +})
>
>  ;;
>  ;;  
> --
> 2.34.1
>

Re: [PATCH v1 2/3] RISC-V: Split slli+sh[123]add.uw opportunities to avoid zext.w

2022-06-07 Thread Kito Cheng via Gcc-patches

LGTM, you can commit that without [3/3] if you like :)

On Wed, May 25, 2022 at 5:47 AM Philipp Tomsich
 wrote:
>
> When encountering a prescaled (biased) value as a candidate for
> sh[123]add.uw, the combine pass will present this as shifted by the
> aggregate amount (prescale + shift-amount) with an appropriately
> adjusted mask constant that has fewer than 32 bits set.
>
> E.g., here's the failing expression seen in combine for a prescale of
> 1 and a shift of 2 (note how 0x3fff8 >> 3 is 0x7fff).
>   Trying 7, 8 -> 10:
>   7: r78:SI=r81:DI#0<<0x1
> REG_DEAD r81:DI
>   8: r79:DI=zero_extend(r78:SI)
> REG_DEAD r78:SI
>  10: r80:DI=r79:DI<<0x2+r82:DI
> REG_DEAD r79:DI
> REG_DEAD r82:DI
>   Failed to match this instruction:
>   (set (reg:DI 80 [ cD.1491 ])
>   (plus:DI (and:DI (ashift:DI (reg:DI 81)
>(const_int 3 [0x3]))
>(const_int 17179869176 [0x3fff8]))
>   (reg:DI 82)))
>
> To address this, we introduce a splitter handling these cases.
>
> gcc/ChangeLog:
>
> * config/riscv/bitmanip.md: Add split to handle opportunities
>   for slli + sh[123]add.uw
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zba-shadd.c: New test.
>
> Signed-off-by: Philipp Tomsich 
> Co-developed-by: Manolis Tsamis 
>
> ---
>
>  gcc/config/riscv/bitmanip.md   | 44 ++
>  gcc/testsuite/gcc.target/riscv/zba-shadd.c | 13 +++
>  2 files changed, 57 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zba-shadd.c
>
> diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
> index 0ab9ffe3c0b..6c1ccc6f8c5 100644
> --- a/gcc/config/riscv/bitmanip.md
> +++ b/gcc/config/riscv/bitmanip.md
> @@ -79,6 +79,50 @@ (define_insn "*shNadduw"
>[(set_attr "type" "bitmanip")
> (set_attr "mode" "DI")])
>
> +;; During combine, we may encounter an attempt to combine
> +;;   slli rtmp, rs, #imm
> +;;   zext.w rtmp, rtmp
> +;;   sh[123]add rd, rtmp, rs2
> +;; which will lead to the immediate not satisfying the above constraints.
> +;; By splitting the compound expression, we can simplify to a slli and a
> +;; sh[123]add.uw.
> +(define_split
> +  [(set (match_operand:DI 0 "register_operand")
> +   (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
> +   (match_operand:QI 2 "immediate_operand"))
> +(match_operand:DI 3 "consecutive_bits_operand"))
> +(match_operand:DI 4 "register_operand")))
> +   (clobber (match_operand:DI 5 "register_operand"))]
> +  "TARGET_64BIT && TARGET_ZBA"
> +  [(set (match_dup 5) (ashift:DI (match_dup 1) (match_dup 6)))
> +   (set (match_dup 0) (plus:DI (and:DI (ashift:DI (match_dup 5)
> + (match_dup 7))
> +  (match_dup 8))
> +  (match_dup 4)))]
> +{
> +   unsigned HOST_WIDE_INT mask = UINTVAL (operands[3]);
> +   /* scale: shift within the sh[123]add.uw */
> +   int scale = 32 - clz_hwi (mask);
> +   /* bias:  pre-scale amount (i.e. the prior shift amount) */
> +   int bias = ctz_hwi (mask) - scale;
> +
> +   /* If the bias + scale don't add up to operand[2], reject. */
> +   if ((scale + bias) != UINTVAL (operands[2]))
> +  FAIL;
> +
> +   /* If the shift-amount is out-of-range for sh[123]add.uw, reject. */
> +   if ((scale < 1) || (scale > 3))
> +  FAIL;
> +
> +   /* If there's no bias, the '*shNadduw' pattern should have matched. */
> +   if (bias == 0)
> +  FAIL;
> +
> +   operands[6] = GEN_INT (bias);
> +   operands[7] = GEN_INT (scale);
> +   operands[8] = GEN_INT (0xULL << scale);
> +})
> +
>  (define_insn "*add.uw"
>[(set (match_operand:DI 0 "register_operand" "=r")
> (plus:DI (zero_extend:DI
> diff --git a/gcc/testsuite/gcc.target/riscv/zba-shadd.c 
> b/gcc/testsuite/gcc.target/riscv/zba-shadd.c
> new file mode 100644
> index 000..33da2530f3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zba-shadd.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=rv64gc_zba -mabi=lp64" } */
> +
> +unsigned long foo(unsigned int a, unsigned long b)
> +{
> +a = a << 1;
> +unsigned long c = (unsigned long) a;
> +unsigned long d = b + (c<<2);
> +return d;
> +}
> +
> +/* { dg-final { scan-assembler "sh2add.uw" } } */
> +/* { dg-final { scan-assembler-not "zext" } } */
> \ No newline at end of file
> --
> 2.34.1
>

Re: [PATCH v1 1/3] RISC-V: add consecutive_bits_operand predicate

2022-06-07 Thread Kito Cheng via Gcc-patches

LGTM


On Wed, May 25, 2022 at 5:48 AM Philipp Tomsich
 wrote:
>
> Provide an easy way to constrain for constants that are a a single,
> consecutive run of ones.
>
> gcc/ChangeLog:
>
> * config/riscv/predicates.md (consecutive_bits_operand):
>   Implement new predicate.
>
> Signed-off-by: Philipp Tomsich 
> ---
>
>  gcc/config/riscv/predicates.md | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> index c37caa2502b..90db5dfcdd5 100644
> --- a/gcc/config/riscv/predicates.md
> +++ b/gcc/config/riscv/predicates.md
> @@ -243,3 +243,14 @@ (define_predicate "const63_operand"
>  (define_predicate "imm5_operand"
>(and (match_code "const_int")
> (match_test "INTVAL (op) < 5")))
> +
> +;; A CONST_INT operand that consists of a single run of consecutive set bits.
> +(define_predicate "consecutive_bits_operand"
> +  (match_code "const_int")
> +{
> +   unsigned HOST_WIDE_INT val = UINTVAL (op);
> +   if (exact_log2 ((val >> ctz_hwi (val)) + 1) < 0)
> +   return false;
> +
> +   return true;
> +})
> --
> 2.34.1
>

Re: [PATCH v2] RISC-V: bitmanip: improve constant-loading for (1ULL << 31) in DImode

2022-06-07 Thread Kito Cheng via Gcc-patches

> OK for backport?

OK, it seems no issue after a week :)


>
> On Thu, 2 Jun 2022 at 21:23, Philipp Tomsich  wrote:
> >
> > Thanks, applied to trunk!
> >
> > On Thu, 2 Jun 2022 at 15:17, Kito Cheng  wrote:
> > >
> > > LGTM
> > >
> > > On Mon, May 30, 2022 at 5:52 AM Philipp Tomsich
> > >  wrote:
> > > >
> > > > The SINGLE_BIT_MASK_OPERAND() is overly restrictive, triggering for
> > > > bits above 31 only (to side-step any issues with the negative SImode
> > > > value 0x8000/(-1ull << 31)/(1 << 31)).  This moves the special
> > > > handling of this SImode value (i.e. the check for (-1ull << 31) to
> > > > riscv.cc and relaxes the SINGLE_BIT_MASK_OPERAND() test.
> > > >
> > > > With this, the code-generation for loading (1ULL << 31) from:
> > > > li  a0,1
> > > > sllia0,a0,31
> > > > to:
> > > > bseti   a0,zero,31
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > * config/riscv/riscv.cc (riscv_build_integer_1): Rewrite value 
> > > > as
> > > > (-1 << 31) for the single-bit case, when operating on (1 << 31)
> > > > in SImode.
> > > > * gcc/config/riscv/riscv.h (SINGLE_BIT_MASK_OPERAND): Allow for
> > > > any single-bit value, moving the special case for (1 << 31) to
> > > > riscv_build_integer_1 (in riscv.c).
> > > >
> > > > Signed-off-by: Philipp Tomsich 
> > > >
> > > > ---
> > > >
> > > > Changes in v2:
> > > > - Use HOST_WIDE_INT_1U/HOST_WIDE_INT_M1U instead of constants.
> > > > - Fix some typos in the comment above the rewrite of the value.
> > > > - Update the comment to clarify that we expect a LUI to be emitted for
> > > >   the SImode case (i.e. sign-extended for RV64) of (1 << 31).
> > > >
> > > >  gcc/config/riscv/riscv.cc |  9 +
> > > >  gcc/config/riscv/riscv.h  | 11 ---
> > > >  2 files changed, 13 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> > > > index f83dc796d88..2e83ca07394 100644
> > > > --- a/gcc/config/riscv/riscv.cc
> > > > +++ b/gcc/config/riscv/riscv.cc
> > > > @@ -420,6 +420,15 @@ riscv_build_integer_1 (struct riscv_integer_op 
> > > > codes[RISCV_MAX_INTEGER_OPS],
> > > >/* Simply BSETI.  */
> > > >codes[0].code = UNKNOWN;
> > > >codes[0].value = value;
> > > > +
> > > > +  /* RISC-V sign-extends all 32bit values that live in a 32bit
> > > > +register.  To avoid paradoxes, we thus need to use the
> > > > +sign-extended (negative) representation (-1 << 31) for the
> > > > +value, if we want to build (1 << 31) in SImode.  This will
> > > > +then expand to an LUI instruction.  */
> > > > +  if (mode == SImode && value == (HOST_WIDE_INT_1U << 31))
> > > > +   codes[0].value = (HOST_WIDE_INT_M1U << 31);
> > > > +
> > > >return 1;
> > > >  }
> > > >
> > > > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> > > > index 5083a1c24b0..6f7f4d3fbdc 100644
> > > > --- a/gcc/config/riscv/riscv.h
> > > > +++ b/gcc/config/riscv/riscv.h
> > > > @@ -528,13 +528,10 @@ enum reg_class
> > > >(((VALUE) | ((1UL<<31) - IMM_REACH)) == ((1UL<<31) - IMM_REACH)  
> > > > \
> > > > || ((VALUE) | ((1UL<<31) - IMM_REACH)) + IMM_REACH == 0)
> > > >
> > > > -/* If this is a single bit mask, then we can load it with bseti.  But 
> > > > this
> > > > -   is not useful for any of the low 31 bits because we can use addi or 
> > > > lui
> > > > -   to load them.  It is wrong for loading SImode 0x8000 on rv64 
> > > > because it
> > > > -   needs to be sign-extended.  So we restrict this to the upper 32-bits
> > > > -   only.  */
> > > > -#define SINGLE_BIT_MASK_OPERAND(VALUE) \
> > > > -  (pow2p_hwi (VALUE) && (ctz_hwi (VALUE) >= 32))
> > > > +/* If this is a single bit mask, then we can load it with bseti.  
> > > > Special
> > > > +   handling of SImode 0x8000 on RV64 is done in 
> > > > riscv_build_integer_1. */
> > > > +#define SINGLE_BIT_MASK_OPERAND(VALUE) 
> > > > \
> > > > +  (pow2p_hwi (VALUE))
> > > >
> > > >  /* Stack layout; function entry, exit and calling.  */
> > > >
> > > > --
> > > > 2.34.1
> > > >

Re: [1/2] PR96463 - aarch64 specific changes

2022-06-07 Thread Prathamesh Kulkarni via Gcc-patches

On Mon, 6 Jun 2022 at 16:29, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> >> >  {
> >> >/* The pattern matching functions above are written to look for a 
> >> > small
> >> >   number to begin the sequence (0, 1, N/2).  If we begin with an 
> >> > index
> >> > @@ -24084,6 +24112,12 @@ aarch64_expand_vec_perm_const_1 (struct 
> >> > expand_vec_perm_d *d)
> >> > || d->vec_flags == VEC_SVE_PRED)
> >> >&& known_gt (nelt, 1))
> >> >  {
> >> > +  /* If operand and result modes differ, then only check
> >> > +  for dup case.  */
> >> > +  if (d->vmode != op_mode)
> >> > + return (d->vec_flags == VEC_SVE_DATA)
> >> > + ? aarch64_evpc_sve_dup (d, op_mode) : false;
> >> > +
> >>
> >> I think it'd be more future-proof to format this as:
> >>
> >> if (d->vmod == d->op_mode)
> >>   {
> >> …existing code…
> >>   }
> >> else
> >>   {
> >> if (aarch64_evpc_sve_dup (d))
> >>   return true;
> >>   }
> >>
> >> with the d->vec_flags == VEC_SVE_DATA check being in aarch64_evpc_sve_dup,
> >> alongside the op_mode check.  I think we'll be adding more checks here
> >> over time.
> > Um I was wondering if we should structure it as:
> > if (d->vmode == d->op_mode)
> >   {
> >  ...existing code...
> >   }
> > if (aarch64_evpc_sve_dup (d))
> >   return true;
> >
> > So we check for dup irrespective of  d->vmode == d->op_mode ?
>
> Yeah, I can see the attraction of that.  I think the else is better
> though because the fallback TBL handling will (rightly) come at the end
> of the existing code.  Without the else, we'd have specific tests like
> DUP after generic ones like TBL, so the reader would have to work out
> for themselves that DUP and TBL handle disjoint cases.
>
> >> >if (aarch64_evpc_rev_local (d))
> >> >   return true;
> >> >else if (aarch64_evpc_rev_global (d))
> >> > @@ -24105,7 +24139,12 @@ aarch64_expand_vec_perm_const_1 (struct 
> >> > expand_vec_perm_d *d)
> >> >else if (aarch64_evpc_reencode (d))
> >> >   return true;
> >> >if (d->vec_flags == VEC_SVE_DATA)
> >> > - return aarch64_evpc_sve_tbl (d);
> >> > + {
> >> > +   if (aarch64_evpc_sve_tbl (d))
> >> > + return true;
> >> > +   else if (aarch64_evpc_sve_dup (d, op_mode))
> >> > + return true;
> >> > + }
> >> >else if (d->vec_flags == VEC_ADVSIMD)
> >> >   return aarch64_evpc_tbl (d);
> >> >  }
> >>
> >> Is this part still needed, given the above?
> >>
> >> Thanks,
> >> Richard
> >>
> >> > @@ -24119,9 +24158,6 @@ aarch64_vectorize_vec_perm_const (machine_mode 
> >> > vmode, machine_mode op_mode,
> >> > rtx target, rtx op0, rtx op1,
> >> > const vec_perm_indices &sel)
> >> >  {
> >> > -  if (vmode != op_mode)
> >> > -return false;
> >> > -
> >> >struct expand_vec_perm_d d;
> >> >
> >> >/* Check whether the mask can be applied to a single vector.  */
> >> > @@ -24154,10 +24190,10 @@ aarch64_vectorize_vec_perm_const (machine_mode 
> >> > vmode, machine_mode op_mode,
> >> >d.testing_p = !target;
> >> >
> >> >if (!d.testing_p)
> >> > -return aarch64_expand_vec_perm_const_1 (&d);
> >> > +return aarch64_expand_vec_perm_const_1 (&d, op_mode);
> >> >
> >> >rtx_insn *last = get_last_insn ();
> >> > -  bool ret = aarch64_expand_vec_perm_const_1 (&d);
> >> > +  bool ret = aarch64_expand_vec_perm_const_1 (&d, op_mode);
> >> >gcc_assert (last == get_last_insn ());
> >> >
> >> >return ret;
> >
> > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> > index bee410929bd..1a804b1ab73 100644
> > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> > @@ -44,6 +44,7 @@
> >  #include "aarch64-sve-builtins-shapes.h"
> >  #include "aarch64-sve-builtins-base.h"
> >  #include "aarch64-sve-builtins-functions.h"
> > +#include "ssa.h"
> >
> >  using namespace aarch64_sve;
> >
> > @@ -1207,6 +1208,64 @@ public:
> >  insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
> >  return e.use_contiguous_load_insn (icode);
> >}
> > +
> > +  gimple *
> > +  fold (gimple_folder &f) const override
> > +  {
> > +tree arg0 = gimple_call_arg (f.call, 0);
> > +tree arg1 = gimple_call_arg (f.call, 1);
> > +
> > +/* Transform:
> > +   lhs = svld1rq ({-1, -1, ... }, arg1)
> > +   into:
> > +   tmp = mem_ref [(int * {ref-all}) arg1]
> > +   lhs = vec_perm_expr.
> > +   on little endian target.
> > +   vectype is the corresponding ADVSIMD type.  */
> > +
> > +if (!BYTES_BIG_ENDIAN
> > + && integer_all_onesp (arg0))
> > +  {
> > + tree lhs = gimple_call_lhs (f.call);
> > + tree lhs_type = TREE_TYPE (lhs);
> > + poly_uint64 lhs_len = TYPE_VECTOR_SUBPARTS (lhs_type);
> > + tree eltype = TRE

Re: [PATCH v1 3/3] RISC-V: Replace zero_extendsidi2_shifted with generalized split

2022-06-07 Thread Philipp Tomsich

On Tue, 7 Jun 2022 at 12:24, Kito Cheng  wrote:
>
> On Wed, May 25, 2022 at 5:47 AM Philipp Tomsich
>  wrote:
> >
> > The current method of treating shifts of extended values on RISC-V
> > frequently causes sequences of 3 shifts, despite the presence of the
> > 'zero_extendsidi2_shifted' pattern.
> >
> > Consider:
> > unsigned long f(unsigned int a, unsigned long b)
> > {
> > a = a << 1;
> > unsigned long c = (unsigned long) a;
> > c = b + (c<<4);
> > return c;
> > }
> > which will present at combine-time as:
> > Trying 7, 8 -> 9:
> > 7: r78:SI=r81:DI#0<<0x1
> >   REG_DEAD r81:DI
> > 8: r79:DI=zero_extend(r78:SI)
> >   REG_DEAD r78:SI
> > 9: r72:DI=r79:DI<<0x4
> >   REG_DEAD r79:DI
> > Failed to match this instruction:
> > (set (reg:DI 72 [ _1 ])
> > (and:DI (ashift:DI (reg:DI 81)
> > (const_int 5 [0x5]))
> > (const_int 68719476704 [0xfffe0])))
> > and produce the following (optimized) assembly:
> > f:
> > slliw   a5,a0,1
> > sllia5,a5,32
> > srlia5,a5,28
> > add a0,a5,a1
> > ret
> >
> > The current way of handling this (in 'zero_extendsidi2_shifted')
> > doesn't apply for two reasons:
> > - this is seen before reload, and
> > - (more importantly) the constant mask is not 0xul.
> >
> > To address this, we introduce a generalized version of shifting
> > zero-extended values that supports any mask of consecutive ones as
> > long as the number of training zeros is the inner shift-amount.
> >
> > With this new split, we generate the following assembly for the
> > aforementioned function:
> > f:
> > sllia0,a0,33
> > srlia0,a0,28
> > add a0,a0,a1
> > ret
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/riscv.md (zero_extendsidi2_shifted): Replace
> >   with a generalized split that requires no clobber, runs
> >   before reload and works for smaller masks.
> >
> > Signed-off-by: Philipp Tomsich 
> > ---
> >
> >  gcc/config/riscv/riscv.md | 37 -
> >  1 file changed, 20 insertions(+), 17 deletions(-)
> >
> > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> > index b8ab0cf169a..cc10cd90a74 100644
> > --- a/gcc/config/riscv/riscv.md
> > +++ b/gcc/config/riscv/riscv.md
> > @@ -2119,23 +2119,26 @@ (define_split
> >  ;; occur when unsigned int is used for array indexing.  Split this into two
> >  ;; shifts.  Otherwise we can get 3 shifts.
> >
> > -(define_insn_and_split "zero_extendsidi2_shifted"
> > -  [(set (match_operand:DI 0 "register_operand" "=r")
> > -   (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
> > -  (match_operand:QI 2 "immediate_operand" "I"))
> > -   (match_operand 3 "immediate_operand" "")))
> > -   (clobber (match_scratch:DI 4 "=&r"))]
> > -  "TARGET_64BIT && !TARGET_ZBA
> > -   && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0x)"
> > -  "#"
> > -  "&& reload_completed"
> > -  [(set (match_dup 4)
> > -   (ashift:DI (match_dup 1) (const_int 32)))
> > -   (set (match_dup 0)
> > -   (lshiftrt:DI (match_dup 4) (match_dup 5)))]
> > -  "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
> > -  [(set_attr "type" "shift")
> > -   (set_attr "mode" "DI")])
> > +(define_split
> > +  [(set (match_operand:DI 0 "register_operand")
> > +   (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
> > +  (match_operand:QI 2 "immediate_operand"))
> > +   (match_operand:DI 3 "consecutive_bits_operand")))]
> > +  "TARGET_64BIT"
> > +  [(set (match_dup 0) (ashift:DI (match_dup 1) (match_dup 4)))
> > +   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (match_dup 5)))]
>
> I would prefer to keep using another register if possible:
>
> like this:
> +  [(set (match_dup 6) (ashift:DI (match_dup 1) (match_dup 4)))
> +   (set (match_dup 0) (lshiftrt:DI (match_dup 6) (match_dup 5)))]
>
> if (can_create_pseudo_p)
>   operands[6] = gen_reg_rtx (DImode);
> else
>   operands[6] = operands[0];

I don't see the benefit to this (unless you expect opportunities for
CSE), as there will be a linear dependency chain anyway.  I'd like to
understand your reasoning behind this a bit better, as our style
currently generally tries to not avoid introducing temporaries if it
is avoidable.

Thanks,
Philipp.

>
> > +{
> > +   unsigned HOST_WIDE_INT mask = UINTVAL (operands[3]);
> > +   int leading = clz_hwi (mask);
> > +   int trailing = ctz_hwi (mask);
> > +
> > +   /* The shift-amount must match the number of trailing bits */
> > +   if (trailing != UINTVAL (operands[2]))
> > +  FAIL;
> > +
> > +   operands[4] = GEN_INT (leading + trailing);
> > +   operands[5] = GEN_INT (leading);
> > +})
> >
> >  ;;
> >  ;;  
> > --
> > 2.34.1
> >

Re: [1/2] PR96463 - aarch64 specific changes

2022-06-07 Thread Richard Sandiford via Gcc-patches

Prathamesh Kulkarni  writes:
> On Mon, 6 Jun 2022 at 16:29, Richard Sandiford
>  wrote:
>>
>> Prathamesh Kulkarni  writes:
>> >> >  {
>> >> >/* The pattern matching functions above are written to look for a 
>> >> > small
>> >> >   number to begin the sequence (0, 1, N/2).  If we begin with an 
>> >> > index
>> >> > @@ -24084,6 +24112,12 @@ aarch64_expand_vec_perm_const_1 (struct 
>> >> > expand_vec_perm_d *d)
>> >> > || d->vec_flags == VEC_SVE_PRED)
>> >> >&& known_gt (nelt, 1))
>> >> >  {
>> >> > +  /* If operand and result modes differ, then only check
>> >> > +  for dup case.  */
>> >> > +  if (d->vmode != op_mode)
>> >> > + return (d->vec_flags == VEC_SVE_DATA)
>> >> > + ? aarch64_evpc_sve_dup (d, op_mode) : false;
>> >> > +
>> >>
>> >> I think it'd be more future-proof to format this as:
>> >>
>> >> if (d->vmod == d->op_mode)
>> >>   {
>> >> …existing code…
>> >>   }
>> >> else
>> >>   {
>> >> if (aarch64_evpc_sve_dup (d))
>> >>   return true;
>> >>   }
>> >>
>> >> with the d->vec_flags == VEC_SVE_DATA check being in aarch64_evpc_sve_dup,
>> >> alongside the op_mode check.  I think we'll be adding more checks here
>> >> over time.
>> > Um I was wondering if we should structure it as:
>> > if (d->vmode == d->op_mode)
>> >   {
>> >  ...existing code...
>> >   }
>> > if (aarch64_evpc_sve_dup (d))
>> >   return true;
>> >
>> > So we check for dup irrespective of  d->vmode == d->op_mode ?
>>
>> Yeah, I can see the attraction of that.  I think the else is better
>> though because the fallback TBL handling will (rightly) come at the end
>> of the existing code.  Without the else, we'd have specific tests like
>> DUP after generic ones like TBL, so the reader would have to work out
>> for themselves that DUP and TBL handle disjoint cases.
>>
>> >> >if (aarch64_evpc_rev_local (d))
>> >> >   return true;
>> >> >else if (aarch64_evpc_rev_global (d))
>> >> > @@ -24105,7 +24139,12 @@ aarch64_expand_vec_perm_const_1 (struct 
>> >> > expand_vec_perm_d *d)
>> >> >else if (aarch64_evpc_reencode (d))
>> >> >   return true;
>> >> >if (d->vec_flags == VEC_SVE_DATA)
>> >> > - return aarch64_evpc_sve_tbl (d);
>> >> > + {
>> >> > +   if (aarch64_evpc_sve_tbl (d))
>> >> > + return true;
>> >> > +   else if (aarch64_evpc_sve_dup (d, op_mode))
>> >> > + return true;
>> >> > + }
>> >> >else if (d->vec_flags == VEC_ADVSIMD)
>> >> >   return aarch64_evpc_tbl (d);
>> >> >  }
>> >>
>> >> Is this part still needed, given the above?
>> >>
>> >> Thanks,
>> >> Richard
>> >>
>> >> > @@ -24119,9 +24158,6 @@ aarch64_vectorize_vec_perm_const (machine_mode 
>> >> > vmode, machine_mode op_mode,
>> >> > rtx target, rtx op0, rtx op1,
>> >> > const vec_perm_indices &sel)
>> >> >  {
>> >> > -  if (vmode != op_mode)
>> >> > -return false;
>> >> > -
>> >> >struct expand_vec_perm_d d;
>> >> >
>> >> >/* Check whether the mask can be applied to a single vector.  */
>> >> > @@ -24154,10 +24190,10 @@ aarch64_vectorize_vec_perm_const 
>> >> > (machine_mode vmode, machine_mode op_mode,
>> >> >d.testing_p = !target;
>> >> >
>> >> >if (!d.testing_p)
>> >> > -return aarch64_expand_vec_perm_const_1 (&d);
>> >> > +return aarch64_expand_vec_perm_const_1 (&d, op_mode);
>> >> >
>> >> >rtx_insn *last = get_last_insn ();
>> >> > -  bool ret = aarch64_expand_vec_perm_const_1 (&d);
>> >> > +  bool ret = aarch64_expand_vec_perm_const_1 (&d, op_mode);
>> >> >gcc_assert (last == get_last_insn ());
>> >> >
>> >> >return ret;
>> >
>> > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
>> > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> > index bee410929bd..1a804b1ab73 100644
>> > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
>> > @@ -44,6 +44,7 @@
>> >  #include "aarch64-sve-builtins-shapes.h"
>> >  #include "aarch64-sve-builtins-base.h"
>> >  #include "aarch64-sve-builtins-functions.h"
>> > +#include "ssa.h"
>> >
>> >  using namespace aarch64_sve;
>> >
>> > @@ -1207,6 +1208,64 @@ public:
>> >  insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
>> >  return e.use_contiguous_load_insn (icode);
>> >}
>> > +
>> > +  gimple *
>> > +  fold (gimple_folder &f) const override
>> > +  {
>> > +tree arg0 = gimple_call_arg (f.call, 0);
>> > +tree arg1 = gimple_call_arg (f.call, 1);
>> > +
>> > +/* Transform:
>> > +   lhs = svld1rq ({-1, -1, ... }, arg1)
>> > +   into:
>> > +   tmp = mem_ref [(int * {ref-all}) arg1]
>> > +   lhs = vec_perm_expr.
>> > +   on little endian target.
>> > +   vectype is the corresponding ADVSIMD type.  */
>> > +
>> > +if (!BYTES_BIG_ENDIAN
>> > + && integer_all_onesp (arg0))
>> > +  {
>> > + tre

Re: [PATCH] libgomp, openmp: pinned memory

2022-06-07 Thread Andrew Stubbs

Following some feedback from users of the OG11 branch I think I need to 
withdraw this patch, for now.


The memory pinned via the mlock call does not give the expected 
performance boost. I had not expected that it would do much in my test 
setup, given that the machine has a lot of RAM and my benchmarks are 
small, but others have tried more and on varying machines and architectures.


It seems that it isn't enough for the memory to be pinned, it has to be 
pinned using the Cuda API to get the performance boost. I had not done 
this because it was difficult to resolve the code abstraction 
difficulties and anyway the implementation was supposed to be device 
independent, but it seems we need a specific pinning mechanism for each 
device.


I will resubmit this patch with some kind of Cuda/plugin hook soonish, 
keeping the existing implementation for other device types. I don't know 
how that'll handle heterogenous systems, but those ought to be rare.


I don't think libmemkind will resolve this performance issue, although 
certainly it can be used for host implementations of low-latency 
memories, etc.


Andrew

On 13/01/2022 13:53, Andrew Stubbs wrote:

On 05/01/2022 17:07, Andrew Stubbs wrote:
I don't believe 64KB will be anything like enough for any real HPC 
application. Is it really worth optimizing for this case?


Anyway, I'm working on an implementation using mmap instead of malloc 
for pinned allocations. I figure that will simplify the unpin 
algorithm (because it'll be munmap) and optimize for large allocations 
such as I imagine HPC applications will use. It won't fix the ulimit 
issue.


Here's my new patch.

This version is intended to apply on top of the latest version of my 
low-latency allocator patch, although the dependency is mostly textual.


Pinned memory is allocated via mmap + mlock, and allocation fails 
(returns NULL) if the lock fails and there's no fallback configured.


This means that large allocations will now be page aligned and therefore 
pin the smallest number of pages for the size requested, and that that 
memory will be unpinned automatically when freed via munmap, or moved 
via mremap.


Obviously this is not ideal for allocations much smaller than one page. 
If that turns out to be a problem in the real world then we can add a 
special case fairly straight-forwardly, and incur the extra page 
tracking expense in those cases only, or maybe implement our own 
pinned-memory heap (something like already proposed for low-latency 
memory, perhaps).


Also new is a realloc implementation that works better when reallocation 
fails. This is confirmed by the new testcases.


OK for stage 1?

Thanks

Andrew

[committed] arm: Improve code generation for BFI and BFC [PR105090]

2022-06-07 Thread Richard Earnshaw via Gcc-patches


This patch, in response to PR105090, makes some general improvements
to the code generation when BFI and BFC instructions are available.
Firstly we handle more cases where the RTL does not generate an INSV
operation due to a lack of a tie between the input and output, but we
nevertheless need to emit BFI later on; we handle this by requiring
the register allocator to tie the operands.  Secondly we handle some
cases where we were previously emitting BFC, but AND with an immediate
would be better; we do this by converting all BFC patterns into AND
using a split pattern.  And finally, we handle some cases where
previously we would emit multiple BIC operations to clear a value, but
could instead use a single BFC instruction.

BFC and BFI express the mask as a pair of values, one for the number
of bits to clear and another for the location of the least significant
bit.  We handle these with a single new output modifier letter that
causes both values to be printed; we use an 'inverted' value so that
it can be used directly with the constant used in an AND rtl
construct.  We've run out of 'new' letters, so to do this we re-use
one of the long-obsoleted Maverick output modifiers.

gcc/ChangeLog:

PR target/105090
* config/arm/arm.cc (arm_bfi_1_p): New function.
(arm_bfi_p): New function.
(arm_rtx_costs_internal): Add costs for BFI idioms.
(arm_print_operand [case 'V']): Format output for BFI/BFC masks.
* config/arm/constraints.md (Dj): New constraint.
* config/arm/arm.md (arm_andsi3_insn): Add alternative to use BFC.
(insv_zero): Convert to an insn with a split.
(*bfi, *bfi_alt1, *bfi_alt2, *bfi_alt3): New patterns.
---
 gcc/config/arm/arm.cc | 113 ++
 gcc/config/arm/arm.md | 106 ---
 gcc/config/arm/constraints.md |  10 ++-
 3 files changed, 208 insertions(+), 21 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 7ecf7b74ffa..2a76c7b9a72 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -10201,6 +10201,61 @@ arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
   return true;
 }
 
+/* Helper for arm_bfi_p.  */
+static bool
+arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
+{
+  unsigned HOST_WIDE_INT const1;
+  unsigned HOST_WIDE_INT const2 = 0;
+
+  if (!CONST_INT_P (XEXP (op0, 1)))
+return false;
+
+  const1 = XUINT (XEXP (op0, 1), 0);
+  if (!CONST_INT_P (XEXP (op1, 1))
+  || ~XUINT (XEXP (op1, 1), 0) != const1)
+return false;
+
+  if (GET_CODE (XEXP (op0, 0)) == ASHIFT
+  && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
+{
+  const2 = XUINT (XEXP (XEXP (op0, 0), 1), 0);
+  *sub0 = XEXP (XEXP (op0, 0), 0);
+}
+  else
+*sub0 = XEXP (op0, 0);
+
+  if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
+return false;
+
+  *sub1 = XEXP (op1, 0);
+  return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
+}
+
+/* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
+   format looks something like:
+
+   (IOR (AND (reg1) (~const1))
+	(AND (ASHIFT (reg2) (const2))
+	 (const1)))
+
+   where const1 is a consecutive sequence of 1-bits with the
+   least-significant non-zero bit starting at bit position const2.  If
+   const2 is zero, then the shift will not appear at all, due to
+   canonicalization.  The two arms of the IOR expression may be
+   flipped.  */
+static bool
+arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
+{
+  if (GET_CODE (x) != IOR)
+return false;
+  if (GET_CODE (XEXP (x, 0)) != AND
+  || GET_CODE (XEXP (x, 1)) != AND)
+return false;
+  return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
+	  || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
+}
+
 /* RTX costs.  Make an estimate of the cost of executing the operation
X, which is contained within an operation with code OUTER_CODE.
SPEED_P indicates whether the cost desired is the performance cost,
@@ -10959,14 +11014,28 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
   *cost = LIBCALL_COST (2);
   return false;
 case IOR:
-  if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
-{
-  if (speed_p)
-*cost += extra_cost->alu.rev;
+  {
+	rtx sub0, sub1;
+	if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
+	  {
+	if (speed_p)
+	  *cost += extra_cost->alu.rev;
 
-  return true;
-}
-/* Fall through.  */
+	return true;
+	  }
+	else if (mode == SImode && arm_arch_thumb2
+		 && arm_bfi_p (x, &sub0, &sub1))
+	  {
+	*cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
+	*cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
+	if (speed_p)
+	  *cost += extra_cost->alu.bfi;
+
+	return true;
+	  }
+  }
+
+  /* Fall through.  */
 case AND: case XOR:
   if (mode == SImode)
 	{
@@ -23780,8 +23849,8 @@ arm_print_condition (FILE *stream)

Re: [PATCH] libgomp, openmp: pinned memory

2022-06-07 Thread Jakub Jelinek via Gcc-patches

On Tue, Jun 07, 2022 at 12:05:40PM +0100, Andrew Stubbs wrote:
> Following some feedback from users of the OG11 branch I think I need to
> withdraw this patch, for now.
> 
> The memory pinned via the mlock call does not give the expected performance
> boost. I had not expected that it would do much in my test setup, given that
> the machine has a lot of RAM and my benchmarks are small, but others have
> tried more and on varying machines and architectures.

I don't understand why there should be any expected performance boost (at
least not unless the machine starts swapping out pages),
{ omp_atk_pinned, true } is solely about the requirement that the memory
can't be swapped out.

> It seems that it isn't enough for the memory to be pinned, it has to be
> pinned using the Cuda API to get the performance boost. I had not done this

For performance boost of what kind of code?
I don't understand how Cuda API could be useful (or can be used at all) if
offloading to NVPTX isn't involved.  The fact that somebody asks for host
memory allocation with omp_atk_pinned set to true doesn't mean it will be
in any way related to NVPTX offloading (unless it is in NVPTX target region
obviously, but then mlock isn't available, so sure, if there is something
CUDA can provide for that case, nice).

> I don't think libmemkind will resolve this performance issue, although
> certainly it can be used for host implementations of low-latency memories,
> etc.

The reason for libmemkind is primarily its support of HBW memory (but
admittedly I need to find out what kind of such memory it does support),
or the various interleaving etc. the library has.
Plus, when we have such support, as it has its own costomizable allocator,
it could be used to allocate larger chunks of memory that can be mlocked
and then just allocate from that pinned memory if user asks for small
allocations from that memory.

Jakub

Re: [PATCH] libgomp, openmp: pinned memory

2022-06-07 Thread Andrew Stubbs


On 07/06/2022 13:10, Jakub Jelinek wrote:

On Tue, Jun 07, 2022 at 12:05:40PM +0100, Andrew Stubbs wrote:

Following some feedback from users of the OG11 branch I think I need to
withdraw this patch, for now.

The memory pinned via the mlock call does not give the expected performance
boost. I had not expected that it would do much in my test setup, given that
the machine has a lot of RAM and my benchmarks are small, but others have
tried more and on varying machines and architectures.


I don't understand why there should be any expected performance boost (at
least not unless the machine starts swapping out pages),
{ omp_atk_pinned, true } is solely about the requirement that the memory
can't be swapped out.


It seems like it takes a faster path through the NVidia drivers. This is 
a black box, for me, but that seems like a plausible explanation. The 
results are different on x86_64 and powerpc hosts (such as the Summit 
supercomputer).



It seems that it isn't enough for the memory to be pinned, it has to be
pinned using the Cuda API to get the performance boost. I had not done this


For performance boost of what kind of code?
I don't understand how Cuda API could be useful (or can be used at all) if
offloading to NVPTX isn't involved.  The fact that somebody asks for host
memory allocation with omp_atk_pinned set to true doesn't mean it will be
in any way related to NVPTX offloading (unless it is in NVPTX target region
obviously, but then mlock isn't available, so sure, if there is something
CUDA can provide for that case, nice).


This is specifically for NVPTX offload, of course, but then that's what 
our customer is paying for.


The expectation, from users, is that memory pinning will give the 
benefits specific to the active device. We can certainly make that 
happen when there is only one (flavour of) offload device present. I had 
hoped it could be one way for all, but it looks like not.





I don't think libmemkind will resolve this performance issue, although
certainly it can be used for host implementations of low-latency memories,
etc.


The reason for libmemkind is primarily its support of HBW memory (but
admittedly I need to find out what kind of such memory it does support),
or the various interleaving etc. the library has.
Plus, when we have such support, as it has its own costomizable allocator,
it could be used to allocate larger chunks of memory that can be mlocked
and then just allocate from that pinned memory if user asks for small
allocations from that memory.


It should be straight-forward to switch the no-offload implementation to 
libmemkind when the time comes (the changes would be contained within 
config/linux/allocator.c), but I have no plans to do so myself (and no 
hardware to test it with). I'd prefer that it didn't impede the offload 
solution in the meantime.


Andrew

Re: [PATCH] libgomp, openmp: pinned memory

2022-06-07 Thread Jakub Jelinek via Gcc-patches

On Tue, Jun 07, 2022 at 01:28:33PM +0100, Andrew Stubbs wrote:
> > For performance boost of what kind of code?
> > I don't understand how Cuda API could be useful (or can be used at all) if
> > offloading to NVPTX isn't involved.  The fact that somebody asks for host
> > memory allocation with omp_atk_pinned set to true doesn't mean it will be
> > in any way related to NVPTX offloading (unless it is in NVPTX target region
> > obviously, but then mlock isn't available, so sure, if there is something
> > CUDA can provide for that case, nice).
> 
> This is specifically for NVPTX offload, of course, but then that's what our
> customer is paying for.
> 
> The expectation, from users, is that memory pinning will give the benefits
> specific to the active device. We can certainly make that happen when there
> is only one (flavour of) offload device present. I had hoped it could be one
> way for all, but it looks like not.

I think that is just an expectation that isn't backed by anything in the
standard.
When users need something like that (but would be good to describe what
it is, memory that will be primarily used for interfacing the offloading
device 0 (or some specific device given by some number), or memory that
can be used without remapping on some offloading device, something else?
And when we know what exactly that is (e.g. what Cuda APIs or GCN APIs etc.
can provide), discuss on omp-lang whether there shouldn't be some standard
way to ask for such an allocator.  Or there is always the possibility of
extensions.  Not sure if one can just define ompx_atv_whatever, use some
large value for it (but the spec doesn't have a vendor range which would be
safe to use) and support it that way.

Plus a different thing is allocators in the offloading regions.
I think we should translate some omp_alloc etc. calls in such regions
when they use constant expression standard allocators to doing the
allocation through other means, or allocators.c can be overridden or
amended for the needs or possibilities of the offloading targets.

Jakub

Re: [PATCH v1 3/3] RISC-V: Replace zero_extendsidi2_shifted with generalized split

2022-06-07 Thread Kito Cheng via Gcc-patches

Using the same pseudo register makes one longer live range instead of
two shorter live ranges, that's not good when inst. scheduler try to
separate those two instructions, and I think register allocator has
more complete knowledge to decide which way is better - using the same
or different, so I prefer to use another pseudo here if possible.

That's also what AArch64/ARM/x86 port did - use new pseudo as tmp if possible.


On Tue, Jun 7, 2022 at 6:50 PM Philipp Tomsich  wrote:
>
> On Tue, 7 Jun 2022 at 12:24, Kito Cheng  wrote:
> >
> > On Wed, May 25, 2022 at 5:47 AM Philipp Tomsich
> >  wrote:
> > >
> > > The current method of treating shifts of extended values on RISC-V
> > > frequently causes sequences of 3 shifts, despite the presence of the
> > > 'zero_extendsidi2_shifted' pattern.
> > >
> > > Consider:
> > > unsigned long f(unsigned int a, unsigned long b)
> > > {
> > > a = a << 1;
> > > unsigned long c = (unsigned long) a;
> > > c = b + (c<<4);
> > > return c;
> > > }
> > > which will present at combine-time as:
> > > Trying 7, 8 -> 9:
> > > 7: r78:SI=r81:DI#0<<0x1
> > >   REG_DEAD r81:DI
> > > 8: r79:DI=zero_extend(r78:SI)
> > >   REG_DEAD r78:SI
> > > 9: r72:DI=r79:DI<<0x4
> > >   REG_DEAD r79:DI
> > > Failed to match this instruction:
> > > (set (reg:DI 72 [ _1 ])
> > > (and:DI (ashift:DI (reg:DI 81)
> > > (const_int 5 [0x5]))
> > > (const_int 68719476704 [0xfffe0])))
> > > and produce the following (optimized) assembly:
> > > f:
> > > slliw   a5,a0,1
> > > sllia5,a5,32
> > > srlia5,a5,28
> > > add a0,a5,a1
> > > ret
> > >
> > > The current way of handling this (in 'zero_extendsidi2_shifted')
> > > doesn't apply for two reasons:
> > > - this is seen before reload, and
> > > - (more importantly) the constant mask is not 0xul.
> > >
> > > To address this, we introduce a generalized version of shifting
> > > zero-extended values that supports any mask of consecutive ones as
> > > long as the number of training zeros is the inner shift-amount.
> > >
> > > With this new split, we generate the following assembly for the
> > > aforementioned function:
> > > f:
> > > sllia0,a0,33
> > > srlia0,a0,28
> > > add a0,a0,a1
> > > ret
> > >
> > > gcc/ChangeLog:
> > >
> > > * config/riscv/riscv.md (zero_extendsidi2_shifted): Replace
> > >   with a generalized split that requires no clobber, runs
> > >   before reload and works for smaller masks.
> > >
> > > Signed-off-by: Philipp Tomsich 
> > > ---
> > >
> > >  gcc/config/riscv/riscv.md | 37 -
> > >  1 file changed, 20 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> > > index b8ab0cf169a..cc10cd90a74 100644
> > > --- a/gcc/config/riscv/riscv.md
> > > +++ b/gcc/config/riscv/riscv.md
> > > @@ -2119,23 +2119,26 @@ (define_split
> > >  ;; occur when unsigned int is used for array indexing.  Split this into 
> > > two
> > >  ;; shifts.  Otherwise we can get 3 shifts.
> > >
> > > -(define_insn_and_split "zero_extendsidi2_shifted"
> > > -  [(set (match_operand:DI 0 "register_operand" "=r")
> > > -   (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
> > > -  (match_operand:QI 2 "immediate_operand" "I"))
> > > -   (match_operand 3 "immediate_operand" "")))
> > > -   (clobber (match_scratch:DI 4 "=&r"))]
> > > -  "TARGET_64BIT && !TARGET_ZBA
> > > -   && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0x)"
> > > -  "#"
> > > -  "&& reload_completed"
> > > -  [(set (match_dup 4)
> > > -   (ashift:DI (match_dup 1) (const_int 32)))
> > > -   (set (match_dup 0)
> > > -   (lshiftrt:DI (match_dup 4) (match_dup 5)))]
> > > -  "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
> > > -  [(set_attr "type" "shift")
> > > -   (set_attr "mode" "DI")])
> > > +(define_split
> > > +  [(set (match_operand:DI 0 "register_operand")
> > > +   (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
> > > +  (match_operand:QI 2 "immediate_operand"))
> > > +   (match_operand:DI 3 "consecutive_bits_operand")))]
> > > +  "TARGET_64BIT"
> > > +  [(set (match_dup 0) (ashift:DI (match_dup 1) (match_dup 4)))
> > > +   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (match_dup 5)))]
> >
> > I would prefer to keep using another register if possible:
> >
> > like this:
> > +  [(set (match_dup 6) (ashift:DI (match_dup 1) (match_dup 4)))
> > +   (set (match_dup 0) (lshiftrt:DI (match_dup 6) (match_dup 5)))]
> >
> > if (can_create_pseudo_p)
> >   operands[6] = gen_reg_rtx (DImode);
> > else
> >   operands[6] = operands[0];
>
> I don't see the benefit to this (unless you expect opportunities for
> CSE), as th

Re: [PATCH] c++: function NTTP argument considered unused [PR53164, PR105848]

2022-06-07 Thread Patrick Palka via Gcc-patches

On Mon, 6 Jun 2022, Jason Merrill wrote:

> On 6/6/22 14:27, Patrick Palka wrote:
> > On Thu, 7 Oct 2021, Jason Merrill wrote:
> > 
> > > On 10/7/21 11:17, Patrick Palka wrote:
> > > > On Wed, 6 Oct 2021, Jason Merrill wrote:
> > > > 
> > > > > On 10/6/21 15:52, Patrick Palka wrote:
> > > > > > On Wed, 6 Oct 2021, Patrick Palka wrote:
> > > > > > 
> > > > > > > On Tue, 5 Oct 2021, Jason Merrill wrote:
> > > > > > > 
> > > > > > > > On 10/5/21 15:17, Patrick Palka wrote:
> > > > > > > > > On Mon, 4 Oct 2021, Patrick Palka wrote:
> > > > > > > > > 
> > > > > > > > > > When passing a function template as the argument to a
> > > > > > > > > > function
> > > > > > > > > > NTTP
> > > > > > > > > > inside a template, we resolve it to the right specialization
> > > > > > > > > > ahead
> > > > > > > > > > of
> > > > > > > > > > time via resolve_address_of_overloaded_function, though the
> > > > > > > > > > call
> > > > > > > > > > to
> > > > > > > > > > mark_used within defers odr-using it until instantiation
> > > > > > > > > > time
> > > > > > > > > > (as
> > > > > > > > > > usual).
> > > > > > > > > > But at instantiation time we end up never calling mark_used
> > > > > > > > > > on
> > > > > > > > > > the
> > > > > > > > > > specialization.
> > > > > > > > > > 
> > > > > > > > > > This patch fixes this by adding a call to mark_used in
> > > > > > > > > > convert_nontype_argument_function.
> > > > > > > > > > 
> > > > > > > > > > PR c++/53164
> > > > > > > > > > 
> > > > > > > > > > gcc/cp/ChangeLog:
> > > > > > > > > > 
> > > > > > > > > > * pt.c (convert_nontype_argument_function): Call
> > > > > > > > > > mark_used.
> > > > > > > > > > 
> > > > > > > > > > gcc/testsuite/ChangeLog:
> > > > > > > > > > 
> > > > > > > > > > * g++.dg/template/non-dependent16.C: New test.
> > > > > > > > > > ---
> > > > > > > > > >  gcc/cp/pt.c |  3
> > > > > > > > > > +++
> > > > > > > > > >  gcc/testsuite/g++.dg/template/non-dependent16.C | 16
> > > > > > > > > > 
> > > > > > > > > >  2 files changed, 19 insertions(+)
> > > > > > > > > >  create mode 100644
> > > > > > > > > > gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > 
> > > > > > > > > > diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
> > > > > > > > > > index f950f4a21b7..5e819c9598c 100644
> > > > > > > > > > --- a/gcc/cp/pt.c
> > > > > > > > > > +++ b/gcc/cp/pt.c
> > > > > > > > > > @@ -6668,6 +6668,9 @@ convert_nontype_argument_function
> > > > > > > > > > (tree
> > > > > > > > > > type,
> > > > > > > > > > tree
> > > > > > > > > > expr,
> > > > > > > > > >return NULL_TREE;
> > > > > > > > > >  }
> > > > > > > > > >  +  if (!mark_used (fn_no_ptr, complain) && !(complain &
> > > > > > > > > > tf_error))
> > > > > > > > > > +return NULL_TREE;
> > > > > > > > > > +
> > > > > > > > > >linkage = decl_linkage (fn_no_ptr);
> > > > > > > > > >if (cxx_dialect >= cxx11 ? linkage == lk_none :
> > > > > > > > > > linkage !=
> > > > > > > > > > lk_external)
> > > > > > > > > >  {
> > > > > > > > > > diff --git a/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > b/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > new file mode 100644
> > > > > > > > > > index 000..b7dca8f6752
> > > > > > > > > > --- /dev/null
> > > > > > > > > > +++ b/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > @@ -0,0 +1,16 @@
> > > > > > > > > > +// PR c++/53164
> > > > > > > > > > +
> > > > > > > > > > +template
> > > > > > > > > > +void f(T) {
> > > > > > > > > > +  T::fail; // { dg-error "not a member" }
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +template
> > > > > > > > > > +struct A { };
> > > > > > > > > > +
> > > > > > > > > > +template
> > > > > > > > > > +void g() {
> > > > > > > > > > +  A a;
> > > > > > > > > > +}
> > > > > > > > > 
> > > > > > > > > I should mention that the original testcase in the PR was
> > > > > > > > > slightly
> > > > > > > > > different than this one in that it also performed a call to
> > > > > > > > > the
> > > > > > > > > NTTP,
> > > > > > > > > e.g.
> > > > > > > > > 
> > > > > > > > >   template
> > > > > > > > >   struct A {
> > > > > > > > > static void h() {
> > > > > > > > >   p(0);
> > > > > > > > > }
> > > > > > > > >   };
> > > > > > > > > 
> > > > > > > > >   template
> > > > > > > > >   void g() {
> > > > > > > > > A::h();
> > > > > > > > >   }
> > > > > > > > > 
> > > > > > > > >   templated void g<0>();
> > > > > > > > > 
> > > > > > > > > and not even the call was enough to odr-use f, apparently
> > > > > > > > > because
> > > > > > > > > the
> > > > > > > > > CALL_EXPR case of tsubst_expr calls mark_used on the callee
> > > > > > > > > only
> > > > > > > > > when
> > > > > > > > > it's a FUNCTION_DECL, but in this case after substitution it's
> > > > > > > > > an
> > > >

[PATCH] c++: remove single-parameter version of mark_used

2022-06-07 Thread Patrick Palka via Gcc-patches

gcc/cp/ChangeLog:

* cp-tree.h (mark_used): Remove single-parameter overload.  Add
default argument to the two-parameter overload.
* decl2.cc (mark_used): Likewise.
---
 gcc/cp/cp-tree.h | 3 +--
 gcc/cp/decl2.cc  | 8 +---
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index cc13809f38a..ac5b3eef118 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6932,8 +6932,7 @@ extern bool decl_constant_var_p   (tree);
 extern bool decl_maybe_constant_var_p  (tree);
 extern void no_linkage_error   (tree);
 extern void check_default_args (tree);
-extern bool mark_used  (tree);
-extern bool mark_used  (tree, tsubst_flags_t);
+extern bool mark_used  (tree, tsubst_flags_t = 
tf_warning_or_error);
 extern bool mark_single_function   (tree, tsubst_flags_t);
 extern void finish_static_data_member_decl (tree, tree, bool, tree, int);
 extern tree cp_build_parm_decl (tree, tree, tree);
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 974afe798b6..f15381be5f1 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -5730,7 +5730,7 @@ mark_single_function (tree expr, tsubst_flags_t complain)
wrong, true otherwise.  */
 
 bool
-mark_used (tree decl, tsubst_flags_t complain)
+mark_used (tree decl, tsubst_flags_t complain /* = tf_warning_or_error */)
 {
   /* If we're just testing conversions or resolving overloads, we
  don't want any permanent effects like forcing functions to be
@@ -5974,12 +5974,6 @@ mark_used (tree decl, tsubst_flags_t complain)
   return true;
 }
 
-bool
-mark_used (tree decl)
-{
-  return mark_used (decl, tf_warning_or_error);
-}
-
 tree
 vtv_start_verification_constructor_init_function (void)
 {
-- 
2.36.1.299.gab336e8f1c

Re: [PATCH, CFE] N2863: Improved Rules for Tag Compatibility

2022-06-07 Thread Joseph Myers

On Tue, 7 Jun 2022, Martin Uecker wrote:

> here is a preliminary patch the implements the proposed
> tag compatibility rules for C23 in GCC (N2863). It works

I don't see any response on the reflector to my comments on that proposal 
(message 21374, Fri, 14 Jan 2022 23:32:47 +).  Nor do I see any tests 
in this patch dealing with the questions of exactly when struct and union 
types are complete or incomplete, as in my first comment there (if there 
are any tests concerning that, it's not apparent for lack of comments 
explaining what exactly the tests are trying to test).  I think we'll need 
a version of the proposal without known issues before the patch is fully 
reviewable.

> - the feature has a flag (-ftag-compat) which is now turned
> on by default in all language modes to facilitate testing
> and to identify backwards compatibility problems. Turned on,
> it survives bootstrapping and regression testing with
> only a few cases that test for diagnostics that go
> away changed to turn it off.

Turning on by default in past language modes seems questionable other than 
for this sort of preliminary testing (in any case, incompatible with 
previous standard requirements so can't be enabled for strict conformance 
modes).  (And in general I'd discourage adding options for individual 
language feature like that, with the resulting proliferation of dialects 
with different combinations of features - again, it may be useful for 
testing purposes, especially before we know whether the feature gets into 
C23, and it may be useful within the compiler sources to distinguish in 
some way which places are checking for this feature rather than just 
testing flag_isoc2x, but actually releasing with a command-line option for 
it is more problematic.)

> diff --git a/gcc/testsuite/gcc.dg/tag-compat2.c
> b/gcc/testsuite/gcc.dg/tag-compat2.c
> new file mode 100644
> index 000..20dc1a9c894
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tag-compat2.c
> @@ -0,0 +1,47 @@
> +/*
> + * { dg-do compile }
> + * { dg-options "-ftag-compat" }
> + */
> +
> +typedef struct bar { int x; } X;
> +typedef struct bar { float x; } Y; /* { dg-warning "redefinition of
> struct or union" } */

I'd expect conflicting definitions of a type in the same scope to remain 
errors, not warnings, regardless of this feature.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [PATCH, CFE] N2863: Improved Rules for Tag Compatibility

2022-06-07 Thread Martin Uecker

Am Dienstag, den 07.06.2022, 14:22 + schrieb Joseph Myers:
> On Tue, 7 Jun 2022, Martin Uecker wrote:
> 
> > here is a preliminary patch the implements the proposed
> > tag compatibility rules for C23 in GCC (N2863). It works
> 
> I don't see any response on the reflector to my comments on that proposal 
> (message 21374, Fri, 14 Jan 2022 23:32:47 +).  Nor do I see any tests 
> in this patch dealing with the questions of exactly when struct and union 
> types are complete or incomplete, as in my first comment there (if there 
> are any tests concerning that, it's not apparent for lack of comments 
> explaining what exactly the tests are trying to test).  I think we'll need 
> a version of the proposal without known issues before the patch is fully 
> reviewable.

Thanks Joseph! I will revisit the wording next and then resend
the patch with the corresponing tests.

Martin


> 
> > - the feature has a flag (-ftag-compat) which is now turned
> > on by default in all language modes to facilitate testing
> > and to identify backwards compatibility problems. Turned on,
> > it survives bootstrapping and regression testing with
> > only a few cases that test for diagnostics that go
> > away changed to turn it off.
> 
> Turning on by default in past language modes seems questionable other than 
> for this sort of preliminary testing (in any case, incompatible with 
> previous standard requirements so can't be enabled for strict conformance 
> modes). 
> (And in general I'd discourage adding options for individual 
> language feature like that, with the resulting proliferation of dialects 
> with different combinations of features - again, it may be useful for 
> testing purposes, especially before we know whether the feature gets into 
> C23, and it may be useful within the compiler sources to distinguish in 
> some way which places are checking for this feature rather than just 
> testing flag_isoc2x, but actually releasing with a command-line option for 
> it is more problematic.)
> 
> > diff --git a/gcc/testsuite/gcc.dg/tag-compat2.c
> > b/gcc/testsuite/gcc.dg/tag-compat2.c
> > new file mode 100644
> > index 000..20dc1a9c894
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tag-compat2.c
> > @@ -0,0 +1,47 @@
> > +/*
> > + * { dg-do compile }
> > + * { dg-options "-ftag-compat" }
> > + */
> > +
> > +typedef struct bar { int x; } X;
> > +typedef struct bar { float x; } Y; /* { dg-warning "redefinition of
> > struct or union" } */
> 
> I'd expect conflicting definitions of a type in the same scope to remain 
> errors, not warnings, regardless of this feature.
>

Re: [committed] diagnostics: add SARIF output format

2022-06-07 Thread Thomas Schwinge

Hi David, and Markus!

On 2022-06-02T15:46:20-0400, David Malcolm via Gcc-patches 
 wrote:
> This patch adds support to gcc's diagnostic subsystem for emitting
> diagnostics in SARIF, aka the Static Analysis Results Interchange Format:
>   https://sarifweb.azurewebsites.net/
> by extending -fdiagnostics-format= to add two new options:
>   -fdiagnostics-format=sarif-stderr
> and:
>   -fdiagnostics-format=sarif-file
>
> The patch targets SARIF v2.1.0

Now that's "funny": on that very day that you pushed to GCC
"diagnostics: add SARIF output format", I'd been attending at ISC 2022
the "Compiler-assisted Correctness Checking and Performance Optimization
for HPC" (C3PO) workshop, ,
where in his interesting keynote "On the Benefits of Software
Verification Competitions for HPC", Markus Schordan (in CC just for your
information) had a number of generally positive :-) mentions of GCC's
Static Analyzer -- just also did comment that it doesn't support the
standard SARIF output format.  Seems that issue is now resolved.  :-)

He generally also covered other fundamental aspects, such as the
difference between "sound" vs. "complete" analysis.  See

"What is soundness (in static analysis)?", or

"Soundness and completeness: with precision", for example.  As I
remember, it was stated that it's unclear which one GCC's Static Analyzer
strives for; may want to clarify that, in the manual:
, I
suppose?

Plus, probably a few more things relevant for GCC's Static Analyzer, that
I don't currently remember; I didn't take notes.  Maybe Markus is going
to upload his presentation on , or
would like to make it available to you in another way?

Note that I'm really just relaying information here, but other than
general interest, I'm myself not too familiar with the details of Static
Analysis.  Just thought that you would appreciate hearing about GCC's
Static Analyzer "spotted in the wild".

Grüße
 Thomas

> This is a JSON-based format suited for capturing the results of static
> analysis tools (like GCC's -fanalyzer), but it can also be used for plain
> GCC warnings and errors.
>
> SARIF supports per-event metadata in diagnostic paths such as
> ["acquire", "resource"] and ["release", "lock"] (specifically, the
> threadFlowLocation "kinds" property: SARIF v2.1.0 section 3.38.8), so
> the patch extends GCC"s diagnostic_event subclass with a "struct meaning"
> with similar purpose.  The patch implements this for -fanalyzer so that
> the various state-machine-based warnings set these in the SARIF output.
>
> The heart of the implementation is in the new file
> diagnostic-format-sarif.cc.  Much of the rest of the patch is interface
> classes, isolating the diagnostic subsystem (which has no knowledge of
> e.g. tree or langhook) from the "client" code in the compiler proper
> cc1 etc).
>
> The patch adds a langhook for specifying the SARIF v2.1.0
> "artifact.sourceLanguage" property, based on the list in
> SARIF v2.1.0 Appendix J.
>
> The patch adds automated DejaGnu tests to our testsuite via new
> scan-sarif-file and scan-sarif-file-not directives (although these
> merely use regexps, rather than attempting to use a proper JSON parser).
>
> I've tested the patch by hand using the validator at:
>   https://sarifweb.azurewebsites.net/Validation
> and the react-based viewer at:
>   https://microsoft.github.io/sarif-web-component/
> which successfully shows most of the information (although not paths,
> and not CWE IDs), and I've fixed all validation errors I've seen (though
> bugs no doubt remain).
>
> I've also tested the generated SARIF using the VS Code extension linked
> to from the SARIF website; I'm a novice with VS Code, but it seems to be
> able to handle my generated SARIF files (e.g. showing the data in the
> SARIF tab, and showing squiggly underlines under issues, and when I
> click on them, it visualizes the events in the path inline within the
> source window).
>
> Has anyone written an Emacs mode for SARIF files? (pretty please)
>
> Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> Pushed to trunk as r13-967-g6cf276ddf22066.
>
> [...]
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

[PING] nvptx: forward '-v' command-line option to assembler, linker

2022-06-07 Thread Thomas Schwinge

Hi!

On 2022-05-30T09:06:21+0200, Tobias Burnus  wrote:
> On 29.05.22 22:49, Thomas Schwinge wrote:
>> Not sure if that's what you had in mind, but what do you think about the
>> attached "nvptx: forward '-v' command-line option to assembler, linker"?
>> OK to push to GCC master branch (after merging
>> 
>> "Put '-v' verbose output onto stderr instead of stdout")?
>
> I was mainly thinking of some way to have it available — which
> '-foffload-options=-Wa,-v' already permits on the GCC side. (Once the
> nvptx-tools patch actually makes use of the '-v'.)

(Merged a week ago.)

> If I understand your patch correctly, this patch now causes 'gcc -v' to
> imply 'gcc -v -Wa,-v'. I think that's okay, since 'gcc -v' already
> outputs a lot of lines and those lines can be helpful to understand what
> happens and what not.

ACK.

> Tom, your thoughts on this?

Ping.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 17c35607d4927299b0c4bd19dd6fd205c85c4a4b Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Sun, 29 May 2022 22:31:43 +0200
Subject: [PATCH] nvptx: forward '-v' command-line option to assembler, linker

For example, for offloading compilation with '-save-temps -v', before vs. after
word-diff then looks like:

[...]
 [...]/build-gcc-offload-nvptx-none/gcc/as {+-v -v+} -o ./a.xnvptx-none.mkoffload.o ./a.xnvptx-none.mkoffload.s
{+Verifying sm_30 code with sm_35 code generation.+}
{+ ptxas -c -o /dev/null ./a.xnvptx-none.mkoffload.o --gpu-name sm_35 -O0+}
[...]
 [...]/build-gcc-offload-nvptx-none/gcc/collect2 {+-v -v+} -o ./a.xnvptx-none.mkoffload [...] @./a.xnvptx-none.mkoffload.args.1 -lgomp -lgcc -lc -lgcc
{+collect2 version 12.0.1 20220428 (experimental)+}
{+[...]/build-gcc-offload-nvptx-none/gcc/collect-ld -v -v -o ./a.xnvptx-none.mkoffload [...] ./a.xnvptx-none.mkoffload.o -lgomp -lgcc -lc -lgcc+}
{+Linking ./a.xnvptx-none.mkoffload.o as 0+}
{+trying lib libc.a+}
{+trying lib libgcc.a+}
{+trying lib libgomp.a+}
{+Resolving abort+}
{+Resolving acc_on_device+}
{+Linking libgomp.a::oacc-init.o/ as 1+}
{+Linking libc.a::lib_a-abort.o/   as 2+}
[...]

(This depends on 
"Put '-v' verbose output onto stderr instead of stdout".)

	gcc/
	* config/nvptx/nvptx.h (ASM_SPEC, LINK_SPEC): Define.
---
 gcc/config/nvptx/nvptx.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index ed72c253191..b184f1d0150 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -27,6 +27,13 @@
 
 /* Run-time Target.  */
 
+/* Assembler supports '-v' option; handle similar to
+   '../../gcc.cc:asm_options', 'HAVE_GNU_AS'.  */
+#define ASM_SPEC "%{v}"
+
+/* Linker supports '-v' option.  */
+#define LINK_SPEC "%{v}"
+
 #define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
 
 #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins ()
-- 
2.25.1

Re: [PATCH-1 v2, rs6000] Replace shift and ior insns with one rotate and mask insn for bswap pattern [PR93453]

2022-06-07 Thread Segher Boessenkool

Hi!

On Tue, Jun 07, 2022 at 04:07:58PM +0800, HAO CHEN GUI wrote:
>   This patch replaces shift and ior insns with one rotate and mask
> insn for the split patterns which are for DI byte swap on Power6. The
> test cases shows the optimization.

Nice :-)

> -  emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32)));
> -  emit_insn (gen_iordi3 (dest, dest, op3));
> +  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
> +GEN_INT (0x)));

You could make some define_expand to make this easier to use.  But not
sure what to call it.  The goal would be to make this easier to read and
use, not to make it harder :-)  Something with duplicate-si-to-di or
such?  Is that pattern somewhere else already, maybe vectors, maybe some
other target even?

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-mdejagnu-cpu=power6 -O2" } */

It doesn't require -m64, only -mpowerpc64.  You can use has_arch_ppc64
to test for the latter.

Okay for trunk, even without that improvement.  Thanks!

Segher

Re: [PATCH-1 v2, rs6000] Replace shift and ior insns with one rotate and mask insn for bswap pattern [PR93453]

2022-06-07 Thread Segher Boessenkool

On Tue, Jun 07, 2022 at 04:07:58PM +0800, HAO CHEN GUI wrote:
>   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.

One further thing: please make sure you have tested on -m32 as well,
especially for integer stuff like this, it is easy to accidentally get
the conditions on a define_* and one of its uses out of synch.  But it
looks to be okay in this case :-)

Segher

aarch64: Fix bitfield alignment in param passing [PR105549]

2022-06-07 Thread Christophe Lyon via Gcc-patches

While working on enabling DFP for AArch64, I noticed new failures in
gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
caused by DFP types handling. These tests are generated during 'make
check' and enabling DFP made generation different (not sure if new
non-DFP tests are generated, or if existing ones are generated
differently, the tests in question are huge and difficult to compare).

Anyway, I reduced the problem to what I attach at the end of the new
gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
scheme as other va_arg* AArch64 tests.  Richard Sandiford further
reduced this to a non-vararg function, added as a second testcase.

This is a tough case mixing bitfields and alignment, where
aarch64_function_arg_alignment did not follow what its descriptive
comment says: we want to use the natural alignment of the bitfield
type only if the user didn't override the alignment for the bitfield
itself.

The fix is thus very small, and this patch adds two new tests
(va_arg-17.c and pr105549.c). va_arg-17.c contains the reduced
offending testcase from struct-layout-1.exp for reference.

We also take the opportunity to fix the comment above
aarch64_function_arg_alignment since the value of the abi_break
parameter was changed in a previous commit, no longer match the
description.

2022-06-02  Christophe Lyon  

gcc/
PR target/105549
* config/aarch64/aarch64.cc (aarch64_function_arg_alignment):
Check DECL_USER_ALIGN for bitfield.

gcc/testsuite/
PR target/105549
* gcc.target/aarch64/aapcs64/va_arg-17.c: New.
* gcc.target/aarch64/pr105549.c: New.


### Attachment also inlined for ease of reply###


diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -7262,9 +7262,9 @@ aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, 
machine_mode mode,
 /* Given MODE and TYPE of a function argument, return the alignment in
bits.  The idea is to suppress any stronger alignment requested by
the user and opt for the natural alignment (specified in AAPCS64 \S
-   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
-   calculated in versions of GCC prior to GCC-9.  This is a helper
-   function for local use only.  */
+   4.1).  ABI_BREAK is set to the old alignment if the alignment was
+   incorrectly calculated in versions of GCC prior to GCC-9.  This is
+   a helper function for local use only.  */
 
 static unsigned int
 aarch64_function_arg_alignment (machine_mode mode, const_tree type,
@@ -7304,7 +7304,10 @@ aarch64_function_arg_alignment (machine_mode mode, 
const_tree type,
   "s" contains only one Fundamental Data Type (the int field)
   but gains 8-byte alignment and size thanks to "e".  */
alignment = std::max (alignment, DECL_ALIGN (field));
-   if (DECL_BIT_FIELD_TYPE (field))
+
+   /* Take bit-field type's alignment into account only if the
+  user didn't override this field's alignment.  */
+   if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))
  bitfield_alignment
= std::max (bitfield_alignment,
TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
new file mode 100644
index 
..24895c3ab48309b601f6f22c176f1e52350c2257
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
@@ -0,0 +1,105 @@
+/* Test AAPCS64 layout and __builtin_va_arg.
+
+   This test covers a corner case where a composite type parameter fits in one
+   register: we do not need a double-word alignment when accessing it in the
+   va_arg stack area.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-17.c"
+#include "type-def.h"
+
+enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
+typedef enum E6 Tal16E6 __attribute__((aligned (16)));
+typedef unsigned int Tuint;
+
+int fails;
+
+union S2844 {
+  Tuint a:10) - 1) & 31) + 1);
+  Tal16E6 __attribute__((aligned (2), packed)) b:31;
+  struct{}c[0];
+} ;
+union S2844 s2844;
+union S2844 a2844[5];
+
+#define HAS_DATA_INIT_FUNC
+void init_data ()
+{
+  memset (&s2844, '\0', sizeof (s2844));
+  memset (a2844, '\0', sizeof (a2844));
+  s2844.a = 799U;
+  a2844[2].a = 586U;
+}
+
+#include "abitest.h"
+#else
+  ARG   (int  , 1, W0 , LAST_NAMED_ARG_ID)
+  DOTS
+  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
+  ANON  (union S2844  , s2844, X1 , 2)
+  ANON  (long long, 2LL  , X2 , 3)
+  ANON  (union  S2844 , a2844[2] , X3 , 4)
+  LAST_ANON (union  S2844

Re: [PATCH] aarch64: Fix va_arg alignment handling (PR target/105549)

2022-06-07 Thread Christophe Lyon via Gcc-patches


I've reworked my patch for this PR, so this one is obsolete.

The new one is:
https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596326.html


On 5/10/22 17:02, Christophe Lyon wrote:

While working on enabling DFP for AArch64, I noticed new failures in
gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
caused by DFP types handling. These tests are generated during 'make
check' and enabling DFP made generation different (not sure if new
non-DFP tests are generated, or if existing ones are generated
differently, the tests in question are huge and difficult to compare).

Anyway, I reduced the problem to what I attach at the end of the new
gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
scheme as other va_arg* AArch64 tests.

This is a tough case mixing bitfields and alignment, where
aarch64_gimplify_va_arg_expr did not follow the exact same rule as
aarch64_layout_arg. When the va_arg parameter uses only one general
register, we do not want to introduce double-word alignment.

The fix is thus very small, and this patch adds a new test
(va_arg-17.c), which contains the reduced offending testcase from
struct-layout-1.exp for reference.

2022-04-25  Christophe Lyon  

gcc/
PR target/105549
* config/aarch64/aarch64.cc (aarch64_gimplify_va_arg_expr): Fix
alignment of single-register parameters.

gcc/testssuite/
PR target/105549
* gcc.target/aarch64/aapcs64/va_arg-17.c: New.
---
  gcc/config/aarch64/aarch64.cc |   4 +-
  .../gcc.target/aarch64/aapcs64/va_arg-17.c| 105 ++
  2 files changed, 108 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f650abbc4ce..bd855758778 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -19667,7 +19667,9 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, 
gimple_seq *pre_p,
rsize = ROUND_UP (size, UNITS_PER_WORD);
nregs = rsize / UNITS_PER_WORD;
  
-  if (align > 8)

+  /* Align on double-word only if we need 2 registers, like in
+aarch64_layout_arg.  */
+  if (align > 8 && nregs == 2)
{
  if (abi_break && warn_psabi)
inform (input_location, "parameter passing for argument of type "
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
new file mode 100644
index 000..24895c3ab48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
@@ -0,0 +1,105 @@
+/* Test AAPCS64 layout and __builtin_va_arg.
+
+   This test covers a corner case where a composite type parameter fits in one
+   register: we do not need a double-word alignment when accessing it in the
+   va_arg stack area.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-17.c"
+#include "type-def.h"
+
+enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
+typedef enum E6 Tal16E6 __attribute__((aligned (16)));
+typedef unsigned int Tuint;
+
+int fails;
+
+union S2844 {
+  Tuint a:10) - 1) & 31) + 1);
+  Tal16E6 __attribute__((aligned (2), packed)) b:31;
+  struct{}c[0];
+} ;
+union S2844 s2844;
+union S2844 a2844[5];
+
+#define HAS_DATA_INIT_FUNC
+void init_data ()
+{
+  memset (&s2844, '\0', sizeof (s2844));
+  memset (a2844, '\0', sizeof (a2844));
+  s2844.a = 799U;
+  a2844[2].a = 586U;
+}
+
+#include "abitest.h"
+#else
+  ARG   (int  , 1, W0 , LAST_NAMED_ARG_ID)
+  DOTS
+  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
+  ANON  (union S2844  , s2844, X1 , 2)
+  ANON  (long long, 2LL  , X2 , 3)
+  ANON  (union  S2844 , a2844[2] , X3 , 4)
+  LAST_ANON (union  S2844 , a2844[2] , X4 , 5)
+#endif
+
+#if 0
+  /* This test is derived from a case generated by struct-layout-1.exp:  */
+
+enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
+typedef enum E6 Tal16E6 __attribute__((aligned (16)));
+typedef unsigned int Tuint;
+
+int fails;
+
+union S2844 {
+  Tuint a:10) - 1) & 31) + 1);
+  Tal16E6 __attribute__((aligned (2), packed)) b:31;
+  struct{}c[0];
+} ;
+union S2844 s2844;
+union S2844 a2844[5];
+
+typedef __builtin_va_list __gnuc_va_list;
+typedef __gnuc_va_list va_list;
+
+void check2844va (int z, ...) {
+  union S2844 arg, *p;
+  va_list ap;
+
+  __builtin_va_start(ap,z);
+  if (__builtin_va_arg(ap,double) != 1.0)
+printf ("fail %d.%d\n", 2844, 0), ++fails;
+
+  p = &s2844;
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (p->a != arg.a)
+printf ("fail %d.%d\n", 2844, 1), ++fails;
+
+  if (__builtin_va_arg(ap,long long) != 3LL)
+printf ("fail %d.%d\n", 2844, 2), ++fails;
+
+  p = &a2844[2];
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (

Re: [PATCH,RS6000 2/5] Rework the RS6000_BTM defines.

2022-06-07 Thread will schmidt via Gcc-patches

On Tue, 2022-06-07 at 10:50 +0800, Kewen.Lin wrote:
> Hi Will,


Hi!

> 
> The whole series looks good to me, thanks!

:-)

> IMHO one place can be
> further
refactored, not sure if it's worth to updating together in
> this series, it's ...

Additional comments below.  
I've made note of the comments, and request (ask) that this be
approved, with a pinky promise that I intend to follow up on the
suggestions in my next patch series.


> 
> on 2022/6/7 06:05, will schmidt wrote:
> > [PATCH,RS6000 2/5) Rework the RS6000_BTM defines.
> > 
> > The RS6000_BTM_ definitions are mostly unused after the
> > rs6000
> > builtin code was reworked.  The remaining references can be
> > replaced
> > with the OPTION_MASK_ and MASK_ equivalents.
> > 
> > This patch remvoes the defines:
> > RS6000_BTM_FRES, RS6000_BTM_FRSQRTE, RS6000_BTM_FRSQRTES,
> > RS6000_BTM_POPCNTD, RS6000_BTM_CELL, RS6000_BTM_DFP,
> > RS6000_BTM_HARD_FLOAT, RS6000_BTM_LDBL128, RS6000_BTM_64BIT,
> > RS6000_BTM_POWERPC64, RS6000_BTM_FLOAT128, RS6000_BTM_FLOAT128_HW
> > RS6000_BTM_MMA, RS6000_BTM_P10.
> > 
> > I note that the BTM -> OPTION_MASK mappings are not always 1-to-1.
> > in particular the BTM_FRES and BTM_FRSQRTE values were both mapped
> > to
> > OPTION_MASK_PPC_GFXOPT, while the BTM_FRE and BTM_FRSQRTES both
> > mapped
> > to OPTION_MASK_POPCNTB.  In total I spent quite a bit of time
> > double-checking these since it looked like copy/paste errors.  I
> > split
> > some of these changes out into a subsequent patch to limit the
> > amount
> > of potential confusion in any particular patch.
> > 
> > gcc/
> > * config/rs6000/rs6000-c.cc: Update comments.
> > * config/rs6000/rs6000.cc (RS6000_BTM_FRES, RS6000_BTM_FRSQRTE,
> > RS6000_BTM_FRSQRTES, RS6000_BTM_POPCNTD, RS6000_BTM_CELL,
> > RS6000_BTM_64BIT, RS6000_BTM_POWERPC64, RS6000_BTM_DFP,
> > RS6000_BTM_HARD_FLOAT,RS6000_BTM_LDBL128, RS6000_BTM_FLOAT128,
> > RS6000_BTM_FLOAT128_HW, RS6000_BTM_MMA, RS6000_BTM_P10):
> > Replace
> > with OPTION_MASK_PPC_GFXOPT, OPTION_MASK_PPC_GFXOPT,
> > OPTION_MASK_POPCNTB, OPTION_MASK_POPCNTD,
> > OPTION_MASK_FPRND, MASK_64BIT, MASK_POWERPC64,
> > OPTION_MASK_DFP, OPTION_MASK_SOFT_FLOAT, OPTION_MASK_MULTIPLE,
> > OPTION_MASK_FLOAT128_KEYWORD, OPTION_MASK_FLOAT128_HW,
> > OPTION_MASK_MMA, OPTION_MASK_POWER10.
> > * config/rs6000/rs6000.h (RS6000_BTM_FRES, RS6000_BTM_FRSQRTE,
> > RS6000_BTM_FRSQRTES, RS6000_BTM_POPCNTD, RS6000_BTM_CELL,
> > RS6000_BTM_DFP, RS6000_BTM_HARD_FLOAT, RS6000_BTM_LDBL128,
> > RS6000_BTM_64BIT, RS6000_BTM_POWERPC64, RS6000_BTM_FLOAT128,
> > RS6000_BTM_FLOAT128_HW, RS6000_BTM_MMA, RS6000_BTM_P10):
> > Delete.
> > 
> > diff --git a/gcc/config/rs6000/rs6000-c.cc
> > b/gcc/config/rs6000/rs6000-c.cc
> > index 9c8cbd7a66e4..4c99afc761ae 100644
> > --- a/gcc/config/rs6000/rs6000-c.cc
> > +++ b/gcc/config/rs6000/rs6000-c.cc
> > @@ -594,13 +594,13 @@ rs6000_target_modify_macros (bool define_p,
> > HOST_WIDE_INT flags,
> >   via the target attribute/pragma.  */
> >if ((flags & OPTION_MASK_FLOAT128_HW) != 0)
> >  rs6000_define_or_undefine_macro (define_p,
> > "__FLOAT128_HARDWARE__");
> >  
> >/* options from the builtin masks.  */
> > -  /* Note that RS6000_BTM_CELL is enabled only if (rs6000_cpu ==
> > - PROCESSOR_CELL) (e.g. -mcpu=cell).  */
> > -  if ((bu_mask & RS6000_BTM_CELL) != 0)
> > +  /* Note that OPTION_MASK_FPRND is enabled only if
> > + (rs6000_cpu == PROCESSOR_CELL) (e.g. -mcpu=cell).  */
> > +  if ((bu_mask & OPTION_MASK_FPRND) != 0)
> >  rs6000_define_or_undefine_macro (define_p, "__PPU__");
> >  
> 
> ... here.  In function rs6000_target_modify_macros, bu_mask is used
> by
> two places, the beginning debug outputting and the above
> OPTION_MASK_FPRND
> check.  I wonder if we can get rid of bu_mask and just use sth. like:
> 
> (rs6000_cpu == PROCESSOR_CELL) && (flags & OPTION_MASK_FPRND)
> 

Agreed.

> // the others are using "flags &", it's passed by rs6000_isa_flags,
> // should be the same as just using OPTION_MASK_FPRND.
> 
> If we drop bu_mask in function rs6000_target_modify_macros, function

> rs6000_builtin_mask_calculate will have only one use place in
> function
> rs6000_option_override_internal.  IMHO this function
> rs6000_builtin_mask_calculate also becomes stale after built-in
> function
> rewriting and needs some updates with new bif framework later.

The DEBUG output using the builtin_mask still appeared to have some
potential value, but I can make a point to investigate that further.

I do have in my queue to try to resolve PR 101865, that is the bug with
ARCH_PWR8.  I got into this OPTION_MASK side-quest as part of the
investigation into that bug.   I can make a point to investigate and
clean up the bu_mask usage as part of that series.

Thanks
-Will

> 
> BR,
> Kewen

[PATCH] aarch64: Lower vcombine to GIMPLE

2022-06-07 Thread Andrew Carlotti via Gcc-patches

Hi all,

This lowers vcombine intrinsics to a GIMPLE vector constructor, which enables 
better optimisation during GIMPLE passes.

Bootstrapped and tested on aarch64-none-linux-gnu, and tested for 
aarch64_be-none-linux-gnu via cross-compilation.


gcc/

* config/aarch64/aarch64-builtins.c
(aarch64_general_gimple_fold_builtin): Add combine.

gcc/testsuite/

* gcc.target/aarch64/advsimd-intrinsics/combine.c:
New test.

---

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
5217dbdb2ac78bba0a669d22af6d769d1fe91a3d..9d52fb8c5a48c9b743defb340a85fb20a1c8f014
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2827,6 +2827,18 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, 
gcall *stmt,
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
break;

+ BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
+ BUILTIN_VD_I (BINOPU, combine, 0, NONE)
+ BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
+   {
+ if (BYTES_BIG_ENDIAN)
+   std::swap(args[0], args[1]);
+ tree ret_type = TREE_TYPE (gimple_call_lhs (stmt));
+ tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, args[0], 
NULL_TREE, args[1]);
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ctor);
+   }
+   break;
+
  /*lower store and load neon builtins to gimple.  */
  BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
  BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c
new file mode 100644
index 
..d08faf7a4a160a1e83428ed9b270731bbf7b8c8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/combine.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include 
+
+/*
+** foo:
+** umovw0, v1\.s\[1\]
+** ret
+*/
+
+int32_t foo (int32x2_t a, int32x2_t b)
+{
+  int32x4_t c = vcombine_s32(a, b);
+  return vgetq_lane_s32(c, 3);
+}
+

Re: aarch64: Fix bitfield alignment in param passing [PR105549]

2022-06-07 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> While working on enabling DFP for AArch64, I noticed new failures in
> gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
> caused by DFP types handling. These tests are generated during 'make
> check' and enabling DFP made generation different (not sure if new
> non-DFP tests are generated, or if existing ones are generated
> differently, the tests in question are huge and difficult to compare).
>
> Anyway, I reduced the problem to what I attach at the end of the new
> gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
> scheme as other va_arg* AArch64 tests.  Richard Sandiford further
> reduced this to a non-vararg function, added as a second testcase.
>
> This is a tough case mixing bitfields and alignment, where
> aarch64_function_arg_alignment did not follow what its descriptive
> comment says: we want to use the natural alignment of the bitfield
> type only if the user didn't override the alignment for the bitfield
> itself.
>
> The fix is thus very small, and this patch adds two new tests
> (va_arg-17.c and pr105549.c). va_arg-17.c contains the reduced
> offending testcase from struct-layout-1.exp for reference.
>
> We also take the opportunity to fix the comment above
> aarch64_function_arg_alignment since the value of the abi_break
> parameter was changed in a previous commit, no longer match the
> description.
>
> 2022-06-02  Christophe Lyon  
>
>   gcc/
>   PR target/105549
>   * config/aarch64/aarch64.cc (aarch64_function_arg_alignment):
>   Check DECL_USER_ALIGN for bitfield.
>
>   gcc/testsuite/
>   PR target/105549
>   * gcc.target/aarch64/aapcs64/va_arg-17.c: New.
>   * gcc.target/aarch64/pr105549.c: New.
>
>
> ### Attachment also inlined for ease of reply
> ###
>
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -7262,9 +7262,9 @@ aarch64_vfp_is_call_candidate (cumulative_args_t 
> pcum_v, machine_mode mode,
>  /* Given MODE and TYPE of a function argument, return the alignment in
> bits.  The idea is to suppress any stronger alignment requested by
> the user and opt for the natural alignment (specified in AAPCS64 \S
> -   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
> -   calculated in versions of GCC prior to GCC-9.  This is a helper
> -   function for local use only.  */
> +   4.1).  ABI_BREAK is set to the old alignment if the alignment was
> +   incorrectly calculated in versions of GCC prior to GCC-9.  This is
> +   a helper function for local use only.  */
>  
>  static unsigned int
>  aarch64_function_arg_alignment (machine_mode mode, const_tree type,
> @@ -7304,7 +7304,10 @@ aarch64_function_arg_alignment (machine_mode mode, 
> const_tree type,
>  "s" contains only one Fundamental Data Type (the int field)
>  but gains 8-byte alignment and size thanks to "e".  */
>   alignment = std::max (alignment, DECL_ALIGN (field));
> - if (DECL_BIT_FIELD_TYPE (field))
> +
> + /* Take bit-field type's alignment into account only if the
> +user didn't override this field's alignment.  */
> + if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))

I think we need to check DECL_PACKED instead.  On its own, an alignment
attribute on the field can only increase alignment, not decrease it.
In constrast, the packed attribute effectively forces the alignment to
1 byte, so has an effect even without an alignment attribute.  Adding an
explicit alignment on top can then increase the alignment from 1 to any
value (bigger or smaller than the original underlying type).

E.g. for:

-
typedef unsigned long long ull __attribute__((aligned(ALIGN)));

#ifndef EXTRA
#define EXTRA unsigned long long x;
#endif

struct S1 { __attribute__((aligned(1))) ull i : 1; EXTRA };
struct S2 { __attribute__((aligned(2))) ull i : 1; EXTRA };
struct S4 { __attribute__((aligned(4))) ull i : 1; EXTRA };
struct S8 { __attribute__((aligned(8))) ull i : 1; EXTRA };
struct S16 { __attribute__((aligned(16))) ull i : 1; EXTRA };

struct Sp { ull i : 1; EXTRA }__attribute__((packed));
struct S1p { __attribute__((packed, aligned(1))) ull i : 1; EXTRA };
struct S2p { __attribute__((packed, aligned(2))) ull i : 1; EXTRA };
struct S4p { __attribute__((packed, aligned(4))) ull i : 1; EXTRA };
struct S8p { __attribute__((packed, aligned(8))) ull i : 1; EXTRA };
struct S16p { __attribute__((packed, aligned(16))) ull i : 1; EXTRA };

int f1(int a, struct S1 s) { return s.i; }
int f2(int a, struct S2 s) { return s.i; }
int f4(int a, struct S4 s) { return s.i; }
int f8(int a, struct S8 s) { return s.i; }
int f16(int a, struct S16 s) { return s.i; }

int fp(int

Re: [PATCH] c++: function NTTP argument considered unused [PR53164, PR105848]

2022-06-07 Thread Jason Merrill via Gcc-patches


On 6/7/22 09:24, Patrick Palka wrote:

On Mon, 6 Jun 2022, Jason Merrill wrote:


On 6/6/22 14:27, Patrick Palka wrote:

On Thu, 7 Oct 2021, Jason Merrill wrote:


On 10/7/21 11:17, Patrick Palka wrote:

On Wed, 6 Oct 2021, Jason Merrill wrote:


On 10/6/21 15:52, Patrick Palka wrote:

On Wed, 6 Oct 2021, Patrick Palka wrote:


On Tue, 5 Oct 2021, Jason Merrill wrote:


On 10/5/21 15:17, Patrick Palka wrote:

On Mon, 4 Oct 2021, Patrick Palka wrote:


When passing a function template as the argument to a
function
NTTP
inside a template, we resolve it to the right specialization
ahead
of
time via resolve_address_of_overloaded_function, though the
call
to
mark_used within defers odr-using it until instantiation
time
(as
usual).
But at instantiation time we end up never calling mark_used
on
the
specialization.

This patch fixes this by adding a call to mark_used in
convert_nontype_argument_function.

PR c++/53164

gcc/cp/ChangeLog:

* pt.c (convert_nontype_argument_function): Call
mark_used.

gcc/testsuite/ChangeLog:

* g++.dg/template/non-dependent16.C: New test.
---
  gcc/cp/pt.c |  3
+++
  gcc/testsuite/g++.dg/template/non-dependent16.C | 16

  2 files changed, 19 insertions(+)
  create mode 100644
gcc/testsuite/g++.dg/template/non-dependent16.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index f950f4a21b7..5e819c9598c 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -6668,6 +6668,9 @@ convert_nontype_argument_function
(tree
type,
tree
expr,
return NULL_TREE;
  }
  +  if (!mark_used (fn_no_ptr, complain) && !(complain &
tf_error))
+return NULL_TREE;
+
linkage = decl_linkage (fn_no_ptr);
if (cxx_dialect >= cxx11 ? linkage == lk_none :
linkage !=
lk_external)
  {
diff --git a/gcc/testsuite/g++.dg/template/non-dependent16.C
b/gcc/testsuite/g++.dg/template/non-dependent16.C
new file mode 100644
index 000..b7dca8f6752
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/non-dependent16.C
@@ -0,0 +1,16 @@
+// PR c++/53164
+
+template
+void f(T) {
+  T::fail; // { dg-error "not a member" }
+}
+
+template
+struct A { };
+
+template
+void g() {
+  A a;
+}


I should mention that the original testcase in the PR was
slightly
different than this one in that it also performed a call to
the
NTTP,
e.g.

   template
   struct A {
 static void h() {
   p(0);
 }
   };

   template
   void g() {
 A::h();
   }

   templated void g<0>();

and not even the call was enough to odr-use f, apparently
because
the
CALL_EXPR case of tsubst_expr calls mark_used on the callee
only
when
it's a FUNCTION_DECL, but in this case after substitution it's
an
ADDR_EXPR of a FUNCTION_DECL.  Fixing this by looking through
the
ADDR_EXPR
worked, but IIUC the call isn't necessary for f to be
odr-used,
simply
using f as a template argument should be sufficient, so it
seems
the
above is better fix.


I agree that pedantically the use happens when substituting into
the
use
of
A, but convert_nontype_argument_function seems like a weird
place
to
implement that; it's only called again during instantiation of
A,
when we
instantiate the injected-class-name.  If A isn't
instantiated,
e.g.
if 'a'
is a pointer to A, we again don't instantiate f.


I see, makes sense..  I'm not sure where else we can mark the use,
then.
Since we resolve the OVERLOAD f to the FUNCTION_DECL f ahead
of
time (during which mark_used doesn't actually instantiate f
because
we're inside a template), at instantiation time the type A is
already
non-dependent so tsubst_aggr_type avoids doing the work that would
end
up calling convert_nontype_argument_function.



I see that clang doesn't reject your testcase, either, but MSVC
and
icc
do
(even with 'a' a pointer): https://godbolt.org/z/MGE6TcMch


FWIW although Clang doesn't reject 'A a;', it does reject
'using type = A;' weirdly enough:
https://godbolt.org/z/T9qEn6bWW


Shall we just go with the other more specific approach, that makes
sure
the CALL_EXPR case of tsubst_expr calls mark_used when the callee
is
an
ADDR_EXPR?  Something like (bootstrapped and regtested):


Err, this approach is wrong because by stripping the ADDR_EXPR here
we
end up checking access of the unwrapped FUNCTION_DECL again after
substituting into the call.  So we incorrectly reject e.g.

  template
  void g() {
P(); // error: ‘static void A::h()’ is private within this
context
  }

  struct A {
void f() {
  g();
}
  private:
static void h();
  };

since A::h isn't accessible from g.


I guess you could call mark_used directly instead of stripping the
ADDR_EXPR.


That seems to work nicely, how does the below look?  Bootstrapped and
regtested on x86_64-pc-linux-gnu.



Or for the general problem, perhaps we could mark the TEMPLATE_INFO or
TI_ARGS
to indicate that we still need to mark_used the arguments

Re: [PATCH] c++: remove single-parameter version of mark_used

2022-06-07 Thread Jason Merrill via Gcc-patches


On 6/7/22 10:16, Patrick Palka wrote:

gcc/cp/ChangeLog:

* cp-tree.h (mark_used): Remove single-parameter overload.  Add
default argument to the two-parameter overload.
* decl2.cc (mark_used): Likewise.


OK.


---
  gcc/cp/cp-tree.h | 3 +--
  gcc/cp/decl2.cc  | 8 +---
  2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index cc13809f38a..ac5b3eef118 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6932,8 +6932,7 @@ extern bool decl_constant_var_p   (tree);
  extern bool decl_maybe_constant_var_p (tree);
  extern void no_linkage_error  (tree);
  extern void check_default_args(tree);
-extern bool mark_used  (tree);
-extern bool mark_used  (tree, tsubst_flags_t);
+extern bool mark_used  (tree, tsubst_flags_t = 
tf_warning_or_error);
  extern bool mark_single_function  (tree, tsubst_flags_t);
  extern void finish_static_data_member_decl(tree, tree, bool, tree, int);
  extern tree cp_build_parm_decl(tree, tree, tree);
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 974afe798b6..f15381be5f1 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -5730,7 +5730,7 @@ mark_single_function (tree expr, tsubst_flags_t complain)
 wrong, true otherwise.  */
  
  bool

-mark_used (tree decl, tsubst_flags_t complain)
+mark_used (tree decl, tsubst_flags_t complain /* = tf_warning_or_error */)
  {
/* If we're just testing conversions or resolving overloads, we
   don't want any permanent effects like forcing functions to be
@@ -5974,12 +5974,6 @@ mark_used (tree decl, tsubst_flags_t complain)
return true;
  }
  
-bool

-mark_used (tree decl)
-{
-  return mark_used (decl, tf_warning_or_error);
-}
-
  tree
  vtv_start_verification_constructor_init_function (void)
  {

Re: [PATCH] c++: alias template arguments are evaluated [PR101906]

2022-06-07 Thread Patrick Palka via Gcc-patches

On Wed, 23 Mar 2022, Jason Merrill wrote:

> On 3/22/22 14:31, Patrick Palka wrote:
> > On Tue, 22 Mar 2022, Patrick Palka wrote:
> > 
> > > Here we're neglecting to clear cp_unevaluated_operand when substituting
> > > into the arguments of the alias template-id skip<(T(), 0), T> with T=A,
> > > which means cp_unevaluated_operand remains set during mark_used for
> > > A::A() and so we never synthesize it.  Later constant evaluation for
> > > the substituted template argument (A(), 0) (during coerce_template_parms)
> > > fails with "'constexpr A::A()' used before its definition" since it was
> > > never synthesized.
> > 
> > It occurred to me to check the case where 'skip' is a function/variable
> > template instead of an alias template, and unfortunately seems we run
> > into the same issue:
> > 
> >template T skip();  // Function template
> >// template T skip; // Variable template
> > 
> >template
> >constexpr unsigned sizeof_() {
> >  return sizeof(skip<(T(), 0), T>());
> >  // return sizeof(skip<(T(), 0), T>);
> >}
> > 
> >struct A {
> >  int m = -1;
> >};
> > 
> >static_assert(sizeof_() == sizeof(A), "");
> > 
> > : In instantiation of ‘constexpr unsigned int sizeof_() [with T =
> > A]’:
> > :14:25:   required from here
> > :6:34: error: ‘constexpr A::A()’ used before its definition
> > 
> > We can fix this similarly by clearing cp_unevaluated_operand when
> > substituting into the arguments of a TEMPLATE_ID_EXPR, but now I'm
> > worried this cp_unevaluated_operand business might not be the best
> > approach (despite it being consistent with what tsubst_aggr_type does).
> > 
> > Maybe instantiate_cx_fn_r should be responsible for making sure A::A()
> > gets synthesized?
> 
> Or cxx_eval_call_expression, but just as a workaround:
> manifestly-constant-evaluated expressions are evaluated even in an unevaluated
> operand, so I think adjusting cp_unevaluated_operand is correct.
> 
> Perhaps tsubst_template_args should use cp_evaluated,

Makes sense.

> and places that use plain tsubst for substituting template args should
> use it instead?

Even though tsubst already uses tsubst_template_args to substitute
TREE_VEC?  AFAICT this change would have no effect except when
args is NULL_TREE, in which case tsubst exits early but
tsubst_template_args doesn't.

Here's what I have so far, which survives bootstrap and regtest.

-- >8 --

Subject: [PATCH] c++: template-id arguments are evaluated [PR101906]

Here we're neglecting to clear cp_unevaluated_operand when substituting
into the arguments of the alias template-id skip<(T(), 0), T> with T=A,
which means cp_unevaluated_operand remains set during mark_used for
A::A() and so we never synthesize it.  Later constant evaluation for
the substituted template argument (A(), 0) (during coerce_template_parms)
fails with "'constexpr A::A()' used before its definition" since it was
never synthesized.

This doesn't happen with a class template because tsubst_aggr_type
clears cp_unevaluated_operand during substitution thereof.  But
since template arguments are generally manifestly constant-evaluated,
which in turn are evaluated even in an unevaluated operand, we
should be clearing cp_unevaluated_operand more broadly whenever
substituting any set of template arguments.  Thus this patch makes us
clear cp_unevaluated_operand during tsubst_template_args.

PR c++/101906

gcc/cp/ChangeLog:

* pt.cc (tsubst_template_args): Set cp_evaluated here.
(tsubst_aggr_type): Not here.

gcc/testsuite/ChangeLog:

* g++.dg/template/evaluated1.C: New test.
* g++.dg/template/evaluated1a.C: New test.
* g++.dg/template/evaluated1b.C: New test.
* g++.dg/template/evaluated1c.C: New test.
---
 gcc/cp/pt.cc|  6 +++---
 gcc/testsuite/g++.dg/template/evaluated1.C  | 17 +
 gcc/testsuite/g++.dg/template/evaluated1a.C | 16 
 gcc/testsuite/g++.dg/template/evaluated1b.C | 17 +
 gcc/testsuite/g++.dg/template/evaluated1c.C | 17 +
 5 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/evaluated1.C
 create mode 100644 gcc/testsuite/g++.dg/template/evaluated1a.C
 create mode 100644 gcc/testsuite/g++.dg/template/evaluated1b.C
 create mode 100644 gcc/testsuite/g++.dg/template/evaluated1c.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index ee7d2c935cc..7fe1c7653aa 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -13475,6 +13475,9 @@ tsubst_template_args (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
   if (t == error_mark_node)
 return error_mark_node;
 
+  /* In "sizeof(X)" we need to evaluate "I".  */
+  cp_evaluated ev;
+
   len = TREE_VEC_LENGTH (t);
   elts = XALLOCAVEC (tree, len);
 
@@ -13709,9 +13712,6 @@ tsubst_aggr_type (tree t,
  tree context;
  tree r;
 
- /* In "sizeof(X)" we need to evaluate "I".  */
- cp_evaluated ev;

Re: [PATCH] RISC-V: Compute default ABI from -mcpu or -march

2022-06-07 Thread Palmer Dabbelt


On Mon, 06 Jun 2022 19:51:20 PDT (-0700), gcc-patches@gcc.gnu.org wrote:

If -mcpu or -march is specified and there is no -mabi, we will calculate
default ABI from arch string provided by -march or defined in CPU info.


IMO this is generally a good idea and we've talked about it before, but 
just setting the ABI from the ISA isn't quite the right way to go.  IIRC 
we came up with something slightly more complicated, like picking the 
closest supported multilib.  That's probably more in line with what 
users are asking for, which IIUC is sort of just "I don't care that much 
about ABI, just make my stuff build".


Whatever we do here, we should document in the RISC-V specs as we'll 
want to make sure LLVM does the same thing.  We probably also want some 
sort of "-mabi=auto" argument, as it's always best to have an argument 
that changes back to the no-argument behavior.





gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (compute_default_abi): 
Implementation
to calculate -mabi from arch string.
(riscv_expand_abi_from_arch): New spec function to calcalute -mabi from 
arch
string provided by -march option.
(riscv_expand_abi_from_cpu): New spec function to find CPU info and 
calculate
-mabi from arch string defined in CPU info.
* config/riscv/riscv.h (EXTRA_SPEC_FUNCTIONS): Add above spec functions.
(OPTION_DEFAULT_SPECS): Use new spec functions to calculate -mabi and 
-march
has higher priority than -mcpu.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/mabi-1.c: ilp32e test.
* gcc.target/riscv/mabi-2.c: ilp32 test.
* gcc.target/riscv/mabi-3.c: ilp32f test.
* gcc.target/riscv/mabi-4.c: ilp32d test.
* gcc.target/riscv/mabi-5.c: lp64 test.
* gcc.target/riscv/mabi-6.c: lp64f test.
* gcc.target/riscv/mabi-7.c: lp64d test.
* gcc.target/riscv/mabi-8.c: -march override -mcpu.
---
 gcc/common/config/riscv/riscv-common.cc | 66 +
 gcc/config/riscv/riscv.h| 15 --
 gcc/testsuite/gcc.target/riscv/mabi-1.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-2.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-3.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-4.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-5.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-6.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-7.c |  7 +++
 gcc/testsuite/gcc.target/riscv/mabi-8.c |  7 +++
 10 files changed, 134 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/mabi-8.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 0e5be2ce105..f8e40549d18 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1266,6 +1266,72 @@ riscv_default_mtune (int argc, const char **argv)
 return default_mtune;
 }

+/* Compute default -mabi option from arch string.  */
+
+static const char *
+compute_default_abi (const char* arch_str)
+{
+  location_t loc = UNKNOWN_LOCATION;
+
+  riscv_parse_arch_string (arch_str, NULL, loc);
+
+  if (current_subset_list->xlen () == 64)
+{
+  if (current_subset_list->lookup ("d", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "lp64d";
+  if (current_subset_list->lookup ("f", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "lp64f";
+  return "lp64";
+}
+  else
+{
+  if (current_subset_list->lookup ("e", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "ilp32e";
+  if (current_subset_list->lookup ("d", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "ilp32d";
+  if (current_subset_list->lookup ("f", RISCV_DONT_CARE_VERSION,
+RISCV_DONT_CARE_VERSION))
+   return "ilp32f";
+  return "ilp32";
+}
+}
+
+/* Expand default -mabi option from -march option.  */
+
+const char *
+riscv_expand_abi_from_arch (int argc, const char **argv)
+{
+  gcc_assert (argc == 1);
+  return xasprintf ("-mabi=%s", compute_default_abi (argv[0]));
+}
+
+/* Expand default -mabi option from -mcpu option.  */
+
+const char *
+riscv_expand_abi_from_cpu (int argc, const char **argv)
+{
+  gcc_assert (argc > 0 && argc <= 2);
+  const char *default_abi_str = NULL;
+  if (argc >= 2)
+default_abi_str = argv[1];
+
+  const riscv_cpu_

[PATCH] Add -fextra-libc-function=memcmpeq for __memcmpeq

2022-06-07 Thread H.J. Lu via Gcc-patches

Add -fextra-libc-function=memcmpeq to map

extern int __memcmpeq (const void *, const void *, size_t);

which was added to GLIBC 2.35, to __builtin_memcmp_eq.

gcc/

* builtins.cc: Include "opts.h".
(expand_builtin): Generate BUILT_IN_MEMCMP_EQ if __memcmpeq is
available.
* builtins.def (BUILT_IN___MEMCMPEQ): New.
* common.opt: Add -fextra-libc-function=.
* opts.cc (extra_libc_functions): New.
(parse_extra_libc_function): New function.
(common_handle_option): Handle -fextra-libc-function=.
* opts.h (extra_libc_function_list): New.
(extra_libc_functions): Likewise.
* doc/invoke.texi: Document -fextra-libc-function=memcmpeq.

gcc/testsuite/

* c-c++-common/memcmpeq-1.c: New test.
* c-c++-common/memcmpeq-2.c: Likewise.
* c-c++-common/memcmpeq-3.c: Likewise.
* c-c++-common/memcmpeq-4.c: Likewise.
* c-c++-common/memcmpeq-5.c: Likewise.
* c-c++-common/memcmpeq-6.c: Likewise.
* c-c++-common/memcmpeq-7.c: Likewise.
---
 gcc/builtins.cc |  5 -
 gcc/builtins.def|  4 
 gcc/common.opt  |  4 
 gcc/doc/invoke.texi |  6 ++
 gcc/opts.cc | 23 +++
 gcc/opts.h  |  7 +++
 gcc/testsuite/c-c++-common/memcmpeq-1.c | 11 +++
 gcc/testsuite/c-c++-common/memcmpeq-2.c | 11 +++
 gcc/testsuite/c-c++-common/memcmpeq-3.c | 11 +++
 gcc/testsuite/c-c++-common/memcmpeq-4.c | 11 +++
 gcc/testsuite/c-c++-common/memcmpeq-5.c | 11 +++
 gcc/testsuite/c-c++-common/memcmpeq-6.c | 11 +++
 gcc/testsuite/c-c++-common/memcmpeq-7.c | 11 +++
 13 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/c-c++-common/memcmpeq-1.c
 create mode 100644 gcc/testsuite/c-c++-common/memcmpeq-2.c
 create mode 100644 gcc/testsuite/c-c++-common/memcmpeq-3.c
 create mode 100644 gcc/testsuite/c-c++-common/memcmpeq-4.c
 create mode 100644 gcc/testsuite/c-c++-common/memcmpeq-5.c
 create mode 100644 gcc/testsuite/c-c++-common/memcmpeq-6.c
 create mode 100644 gcc/testsuite/c-c++-common/memcmpeq-7.c

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index b9d89b409b8..22269318e8c 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "demangle.h"
 #include "gimple-range.h"
 #include "pointer-query.h"
+#include "opts.h"
 
 struct target_builtins default_target_builtins;
 #if SWITCHABLE_TARGET
@@ -7410,7 +7411,9 @@ expand_builtin (tree exp, rtx target, rtx subtarget, 
machine_mode mode,
return target;
   if (fcode == BUILT_IN_MEMCMP_EQ)
{
- tree newdecl = builtin_decl_explicit (BUILT_IN_MEMCMP);
+ tree newdecl = builtin_decl_explicit
+   (extra_libc_functions.has_memcmpeq
+? BUILT_IN___MEMCMPEQ : BUILT_IN_MEMCMP);
  TREE_OPERAND (exp, 1) = build_fold_addr_expr (newdecl);
}
   break;
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 005976f34e9..eb8d33b16e9 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -965,6 +965,10 @@ DEF_BUILTIN_STUB (BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX, 
"__builtin_alloca_with_ali
equality with zero.  */
 DEF_BUILTIN_STUB (BUILT_IN_MEMCMP_EQ, "__builtin_memcmp_eq")
 
+/* Similar to BUILT_IN_MEMCMP_EQ, but is mapped to __memcmpeq only with
+   -fextra-libc-function=memcmpeq.  */
+DEF_EXT_LIB_BUILTIN (BUILT_IN___MEMCMPEQ, "__memcmpeq", 
BT_FN_INT_CONST_PTR_CONST_PTR_SIZE, ATTR_PURE_NOTHROW_NONNULL_LEAF)
+
 /* An internal version of strcmp/strncmp, used when the result is only 
tested for equality with zero.  */
 DEF_BUILTIN_STUB (BUILT_IN_STRCMP_EQ, "__builtin_strcmp_eq")
diff --git a/gcc/common.opt b/gcc/common.opt
index 7ca0cceed82..7a7631682b0 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1587,6 +1587,10 @@ Enum(excess_precision) String(standard) 
Value(EXCESS_PRECISION_STANDARD)
 EnumValue
 Enum(excess_precision) String(16) Value(EXCESS_PRECISION_FLOAT16)
 
+fextra-libc-function=
+Common Driver Joined
+Specify the extra function in the C library.
+
 ; Whether we permit the extended set of values for FLT_EVAL_METHOD
 ; introduced in ISO/IEC TS 18661-3, or limit ourselves to those in C99/C11.
 fpermitted-flt-eval-methods=
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8cd5bdddc5d..fe1e3709953 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -676,6 +676,7 @@ Objective-C and Objective-C++ Dialects}.
 -ffixed-@var{reg}  -fexceptions @gol
 -fnon-call-exceptions  -fdelete-dead-exceptions  -funwind-tables @gol
 -fasynchronous-unwind-tables @gol
+-fextra-libc-function=memcmpeq @gol
 -fno-gnu-unique @gol
 -finhibit-size-directive  -fcommon  -fno-ident @gol
 -fpcc-struct-return  -fpic  -fPIC  -fpie  -fPIE  -fno-plt @gol
@@ -17250,6 +17251,11 @@ Generate unwin

Re: [PATCH,RS6000 2/5] Rework the RS6000_BTM defines.

2022-06-07 Thread Segher Boessenkool

On Tue, Jun 07, 2022 at 11:45:13AM -0500, will schmidt wrote:
> Additional comments below.  
> I've made note of the comments, and request (ask) that this be
> approved, with a pinky promise that I intend to follow up on the
> suggestions in my next patch series.

Suggestions aren't requirements :-)

> > If we drop bu_mask in function rs6000_target_modify_macros, function
> > rs6000_builtin_mask_calculate will have only one use place in
> > function
> > rs6000_option_override_internal.  IMHO this function
> > rs6000_builtin_mask_calculate also becomes stale after built-in
> > function
> > rewriting and needs some updates with new bif framework later.
> 
> The DEBUG output using the builtin_mask still appeared to have some
> potential value, but I can make a point to investigate that further.

"Potential value" is a value of zero, if not a negative value.  If some
debug output has real and current value (which are two sides of the same
coin), it will be apparent to every reader.  Debug output that isn't
useful currently is throw-away, and should be thrown away.  It is easy
to recreate (it is a totally boring number of print statements after
all), and you can pull it from git history anyway.

Segher

Re: [PATCH] c++: function NTTP argument considered unused [PR53164, PR105848]

2022-06-07 Thread Patrick Palka via Gcc-patches

On Tue, 7 Jun 2022, Jason Merrill wrote:

> On 6/7/22 09:24, Patrick Palka wrote:
> > On Mon, 6 Jun 2022, Jason Merrill wrote:
> > 
> > > On 6/6/22 14:27, Patrick Palka wrote:
> > > > On Thu, 7 Oct 2021, Jason Merrill wrote:
> > > > 
> > > > > On 10/7/21 11:17, Patrick Palka wrote:
> > > > > > On Wed, 6 Oct 2021, Jason Merrill wrote:
> > > > > > 
> > > > > > > On 10/6/21 15:52, Patrick Palka wrote:
> > > > > > > > On Wed, 6 Oct 2021, Patrick Palka wrote:
> > > > > > > > 
> > > > > > > > > On Tue, 5 Oct 2021, Jason Merrill wrote:
> > > > > > > > > 
> > > > > > > > > > On 10/5/21 15:17, Patrick Palka wrote:
> > > > > > > > > > > On Mon, 4 Oct 2021, Patrick Palka wrote:
> > > > > > > > > > > 
> > > > > > > > > > > > When passing a function template as the argument to a
> > > > > > > > > > > > function
> > > > > > > > > > > > NTTP
> > > > > > > > > > > > inside a template, we resolve it to the right
> > > > > > > > > > > > specialization
> > > > > > > > > > > > ahead
> > > > > > > > > > > > of
> > > > > > > > > > > > time via resolve_address_of_overloaded_function, though
> > > > > > > > > > > > the
> > > > > > > > > > > > call
> > > > > > > > > > > > to
> > > > > > > > > > > > mark_used within defers odr-using it until instantiation
> > > > > > > > > > > > time
> > > > > > > > > > > > (as
> > > > > > > > > > > > usual).
> > > > > > > > > > > > But at instantiation time we end up never calling
> > > > > > > > > > > > mark_used
> > > > > > > > > > > > on
> > > > > > > > > > > > the
> > > > > > > > > > > > specialization.
> > > > > > > > > > > > 
> > > > > > > > > > > > This patch fixes this by adding a call to mark_used in
> > > > > > > > > > > > convert_nontype_argument_function.
> > > > > > > > > > > > 
> > > > > > > > > > > > PR c++/53164
> > > > > > > > > > > > 
> > > > > > > > > > > > gcc/cp/ChangeLog:
> > > > > > > > > > > > 
> > > > > > > > > > > > * pt.c (convert_nontype_argument_function): Call
> > > > > > > > > > > > mark_used.
> > > > > > > > > > > > 
> > > > > > > > > > > > gcc/testsuite/ChangeLog:
> > > > > > > > > > > > 
> > > > > > > > > > > > * g++.dg/template/non-dependent16.C: New test.
> > > > > > > > > > > > ---
> > > > > > > > > > > >   gcc/cp/pt.c |
> > > > > > > > > > > > 3
> > > > > > > > > > > > +++
> > > > > > > > > > > >   gcc/testsuite/g++.dg/template/non-dependent16.C |
> > > > > > > > > > > > 16
> > > > > > > > > > > > 
> > > > > > > > > > > >   2 files changed, 19 insertions(+)
> > > > > > > > > > > >   create mode 100644
> > > > > > > > > > > > gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > > > 
> > > > > > > > > > > > diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
> > > > > > > > > > > > index f950f4a21b7..5e819c9598c 100644
> > > > > > > > > > > > --- a/gcc/cp/pt.c
> > > > > > > > > > > > +++ b/gcc/cp/pt.c
> > > > > > > > > > > > @@ -6668,6 +6668,9 @@ convert_nontype_argument_function
> > > > > > > > > > > > (tree
> > > > > > > > > > > > type,
> > > > > > > > > > > > tree
> > > > > > > > > > > > expr,
> > > > > > > > > > > > return NULL_TREE;
> > > > > > > > > > > >   }
> > > > > > > > > > > >   +  if (!mark_used (fn_no_ptr, complain) &&
> > > > > > > > > > > > !(complain &
> > > > > > > > > > > > tf_error))
> > > > > > > > > > > > +return NULL_TREE;
> > > > > > > > > > > > +
> > > > > > > > > > > > linkage = decl_linkage (fn_no_ptr);
> > > > > > > > > > > > if (cxx_dialect >= cxx11 ? linkage == lk_none :
> > > > > > > > > > > > linkage !=
> > > > > > > > > > > > lk_external)
> > > > > > > > > > > >   {
> > > > > > > > > > > > diff --git
> > > > > > > > > > > > a/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > > > b/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > > > new file mode 100644
> > > > > > > > > > > > index 000..b7dca8f6752
> > > > > > > > > > > > --- /dev/null
> > > > > > > > > > > > +++ b/gcc/testsuite/g++.dg/template/non-dependent16.C
> > > > > > > > > > > > @@ -0,0 +1,16 @@
> > > > > > > > > > > > +// PR c++/53164
> > > > > > > > > > > > +
> > > > > > > > > > > > +template
> > > > > > > > > > > > +void f(T) {
> > > > > > > > > > > > +  T::fail; // { dg-error "not a member" }
> > > > > > > > > > > > +}
> > > > > > > > > > > > +
> > > > > > > > > > > > +template
> > > > > > > > > > > > +struct A { };
> > > > > > > > > > > > +
> > > > > > > > > > > > +template
> > > > > > > > > > > > +void g() {
> > > > > > > > > > > > +  A a;
> > > > > > > > > > > > +}
> > > > > > > > > > > 
> > > > > > > > > > > I should mention that the original testcase in the PR was
> > > > > > > > > > > slightly
> > > > > > > > > > > different than this one in that it also performed a call
> > > > > > > > > > > to
> > > > > > > > > > > the
> > > > > > > > > > > NTTP,
> > > > > > > > > > > e.g.
> > > > > > > > > > > 
> > > > > > > > > > >template
> > > > > > > > > > >struct A {

Re: [PATCH, V3] Optimize vec_splats of constant vec_extract for V2DI/V2DF, PR target 99293

2022-06-07 Thread will schmidt via Gcc-patches

On Mon, 2022-06-06 at 20:31 -0400, Michael Meissner wrote:
> Optimize vec_splats of constant vec_extract for V2DI/V2DF, PR target
> 99293.
> 
> This is version 3 of the patch.  The original patch was:
> 
> > Date: Mon, 28 Mar 2022 12:26:02 -0400
> > Subject: [PATCH 1/4] Optimize vec_splats of constant vec_extract
> > for V2DI/V2DF, PR target 99293.
> > Message-ID: 
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-March/592420.html
> 
> Version 2 of the patch was:
> 
> > Date: Fri, 13 May 2022 10:49:26 -0400
> > Subject: [PATCH] Optimize vec_splats of constant V2DI/V2DF
> > vec_extract, PR target/99293
> > Message-ID: 
> > https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594797.html
> 
> The differences between version 2 and version 3 was to clean up the
> description
> of what the patch does, and to make the example test case clear.
> 
> In PR target/99293, it was pointed out that doing:
> 
>   vector long long dest0, dest1, src;
>   /* ... */
>   dest0 = vec_splats (vec_extract (src, 0));
>   dest1 = vec_splats (vec_extract (src, 1));
> 
> would generate slower code.
> 
> It generates the following code on power8:
> 
>   ;; vec_splats (vec_extract (src, 0))
>   xxpermdi 0,34,34,3
>   xxpermdi 34,0,0,0
> 
>   ;; vec_splats (vec_extract (src, 1))
>   xxlor 0,34,34
>   xxpermdi 34,0,0,0
> 
> However on power9 and power10 it generates:
> 
>   ;; vec_splats (vec_extract (src, 0))
>   mfvsld 3,34
>   mtvsrdd 34,9,9
> 
>   ;; vec_splats (vec_extract (src, 1))
>   mfvsrd 9,34
>   mtvsrdd 34,9,9
> 
> This is due to the power9 having the mfvsrld instruction which can
> extract
> either 64-bit element into a GPR.  While there are alternatives for
> both
> vector registers and GPR registers, the register allocator prefers to
> put
> DImode into GPR registers.
> 
> In this case, it is better to have a single combiner pattern that can
> generate
> a single xxpermdi, instead of 2 insnsns (the extract and then the
> concat).
> This is true if the two operations are move from vector register and
> move to
> vector register.  As Segher pointed out in a previous version of the
> patch, the
> combiner already tries doing creating a (vec_duplicate (vec_select
> ...))
> pattern, but we didn't provide one.
> 
> This patch reworks vsx_xxspltd_ for V2DImode and V2DFmode so
> that it now
> uses VEC_DUPLICATE, which the combiner checks for.

Ok.

> 
> I have built Spec 2017 with this patch installed, and the cam4_r
> benchmark
> is the only benchmark that generated different code (3
> mfvsrld/mtvsrdd
> pairs of instructions were replaced with xxpermdi).
> 
> I have built bootstrap versions on the following systems and I have
> run
> the regression tests.  There were no regressions in the runs:
> 
>   Power9 little endian, --with-cpu=power9
>   Power10 little endian, --with-cpu=power10
>   Power8 big endian, --with-cpu=power8 (both 32-bit & 64-bit
> tests)

Ok.


> 
> Can I install this into the trunk?  After a burn-in period, can I
> backport
> and install this into GCC 11 and GCC 10 branches?
> 
> 2022-06-06   Michael Meissner  
> 
> gcc/
>   PR target/99293
>   * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
>   UNSPEC_VSX_XXSPLTD case.
>   * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
>   (vsx_xxspltd_): Rewrite to use VEC_DUPLICATE.
> 
> gcc/testsuite:
>   PR target/99293
>   * gcc.target/powerpc/builtins-1.c: Update insn count.
>   * gcc.target/powerpc/pr99293.c: New test.
> ---
>  gcc/config/rs6000/rs6000-p8swap.cc|  1 -
>  gcc/config/rs6000/vsx.md  | 19 +++
>  gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr99293.c| 51
> +++
>  4 files changed, 62 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99293.c
> 
> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc
> b/gcc/config/rs6000/rs6000-p8swap.cc
> index 275702fee1b..3160fcbdeca 100644
> --- a/gcc/config/rs6000/rs6000-p8swap.cc
> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
> @@ -807,7 +807,6 @@ rtx_is_swappable_p (rtx op, unsigned int
> *special)
> case UNSPEC_VUPKLU_V4SF:
>   return 0;
> case UNSPEC_VSPLT_DIRECT:
> -   case UNSPEC_VSX_XXSPLTD:
>   *special = SH_SPLAT;
>   return 1;
> case UNSPEC_REDUC_PLUS:
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 1b75538f42f..a1a1ce95195 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -296,7 +296,6 @@ (define_c_enum "unspec"
> UNSPEC_VSX_XXPERM
> 
> UNSPEC_VSX_XXSPLTW
> -   UNSPEC_VSX_XXSPLTD
> UNSPEC_VSX_DIVSD
> UNSPEC_VSX_DIVUD
> UNSPEC_VSX_DIVSQ

Ok.

> @@ -4673,16 +4672,18 @@ (define_insn "vsx_vsplt_di"
>  ;; V2DF/V2DI splat for use by vec_splat builtin
>  (define_insn "vsx_xxspltd_"
>[(set (match_operand:VSX_D 0 "vsx_register_operand" "

[r13-998 Regression] FAIL: gcc.target/i386/xop-pcmov3.c scan-assembler vpcmov on Linux/x86_64

2022-06-07 Thread skpandey--- via Gcc-patches

On Linux/x86_64,

c4320bde42c6497b701e2e6b8f1c5069bed19818 is the first bad commit
commit c4320bde42c6497b701e2e6b8f1c5069bed19818
Author: Roger Sayle 
Date:   Tue Jun 7 07:49:40 2022 +0100

Recognize vpcmov in combine with -mxop on x86.

caused

FAIL: gcc.target/i386/xop-pcmov3.c scan-assembler vpcmov

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r13-998/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/xop-pcmov3.c --target_board='unix{-m32\ 
-march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/xop-pcmov3.c --target_board='unix{-m64\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)

[r13-1005 Regression] FAIL: gcc.target/i386/pr105854.c (test for excess errors) on Linux/x86_64

2022-06-07 Thread skpandey--- via Gcc-patches

On Linux/x86_64,

cd22395457f063824c839fd1c0077d15d3dccd6d is the first bad commit
commit cd22395457f063824c839fd1c0077d15d3dccd6d
Author: liuhongt 
Date:   Mon Jun 6 13:39:19 2022 +0800

Fix insn does not satisfy its constraints: sse2_lshrv1ti3

caused

FAIL: gcc.target/i386/pr105854.c (test for excess errors)

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r13-1005/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/pr105854.c --target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/pr105854.c --target_board='unix{-m32\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)

Re: [PATCH][_Hashtable] Fix insertion of range of type convertible to value_type PR 105714

2022-06-07 Thread François Dumont via Gcc-patches


Hi

Is this less ambitious version ok ?

Thanks

On 25/05/22 07:09, François Dumont wrote:

Here is the patch to fix just what is described in PR 105714.

    libstdc++: [_Hashtable] Insert range of types convertible to 
value_type PR 105714


    Fix insertion of range of types convertible to value_type.

    libstdc++-v3/ChangeLog:

    PR libstdc++/105714
    * include/bits/hashtable_policy.h (_ValueTypeEnforcer): New.
    * include/bits/hashtable.h 
(_Hashtable<>::_M_insert_unique_aux): New.
    (_Hashtable<>::_M_insert(_Arg&&, const _NodeGenerator&, 
true_type)): Use latters.
    (_Hashtable<>::_M_insert(_Arg&&, const _NodeGenerator&, 
false_type)): Likewise.
    (_Hashtable(_InputIterator, _InputIterator, size_type, 
const _Hash&, const _Equal&,

    const allocator_type&, true_type)): Use this.insert range.
    (_Hashtable(_InputIterator, _InputIterator, size_type, 
const _Hash&, const _Equal&,

    const allocator_type&, false_type)): Use _M_insert.
    * testsuite/23_containers/unordered_map/cons/56112.cc: 
Check how many times conversion

    is done.
    * testsuite/23_containers/unordered_map/insert/105714.cc: 
New test.
    * testsuite/23_containers/unordered_set/insert/105714.cc: 
New test.


Tested under Linux x64, ok to commit ?

François

On 24/05/22 12:31, Jonathan Wakely wrote:

On Tue, 24 May 2022 at 11:22, Jonathan Wakely wrote:

On Tue, 24 May 2022 at 11:18, Jonathan Wakely wrote:

On Thu, 5 May 2022 at 18:38, François Dumont via Libstdc++
 wrote:

Hi

Renewing my patch to fix PR 56112 but for the insert methods, I 
totally

change it, now works also with move-only key types.

I let you Jonathan find a better name than _ValueTypeEnforcer as 
usual :-)


libstdc++: [_Hashtable] Insert range of types convertible to 
value_type

PR 56112

Fix insertion of range of types convertible to value_type. Fix 
also when

this value_type
has a move-only key_type which also allow converted values to be 
moved.


libstdc++-v3/ChangeLog:

  PR libstdc++/56112
  * include/bits/hashtable_policy.h (_ValueTypeEnforcer): 
New.

  * include/bits/hashtable.h
(_Hashtable<>::_M_insert_unique_aux): New.
  (_Hashtable<>::_M_insert(_Arg&&, const _NodeGenerator&,
true_type)): Use latters.
  (_Hashtable<>::_M_insert(_Arg&&, const _NodeGenerator&,
false_type)): Likewise.
  (_Hashtable(_InputIterator, _InputIterator, size_type, 
const

_Hash&, const _Equal&,
  const allocator_type&, true_type)): Use this.insert range.
  (_Hashtable(_InputIterator, _InputIterator, size_type, 
const

_Hash&, const _Equal&,
  const allocator_type&, false_type)): Use _M_insert.
  * testsuite/23_containers/unordered_map/cons/56112.cc: 
Check

how many times conversion
  is done.
  (test02): New test case.
  * testsuite/23_containers/unordered_set/cons/56112.cc: 
New test.


Tested under Linux x86_64.

Ok to commit ?

No, sorry.

The new test02 function in 23_containers/unordered_map/cons/56112.cc
doesn't compile with libc++ or MSVC either, are you sure that test is
valid? I don't think it is, because S2 is not convertible to
pair. None of the pair constructors are
viable, because the move constructor would require two user-defined
conversions (from S2 to pair and then from
pair to pair). A conversion
sequence cannot have more than one user-defined conversion using a
constructor or converion operator. So if your patch makes that
compile, it's a bug in the new code. I haven't analyzed that code to
see where the problem is, I'm just looking at the test results and the
changes in behaviour.

I meant to include this link showing that libc++ and MSVC reject
test02() as well:

https://godbolt.org/z/j7E9f6bd4

I've created https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105717 for
the insertion bug, rather than reopening PR 56112.

Re: [PATCH] c++: function NTTP argument considered unused [PR53164, PR105848]

2022-06-07 Thread Jason Merrill via Gcc-patches


On 6/7/22 15:18, Patrick Palka wrote:

On Tue, 7 Jun 2022, Jason Merrill wrote:


On 6/7/22 09:24, Patrick Palka wrote:

On Mon, 6 Jun 2022, Jason Merrill wrote:


On 6/6/22 14:27, Patrick Palka wrote:

On Thu, 7 Oct 2021, Jason Merrill wrote:


On 10/7/21 11:17, Patrick Palka wrote:

On Wed, 6 Oct 2021, Jason Merrill wrote:


On 10/6/21 15:52, Patrick Palka wrote:

On Wed, 6 Oct 2021, Patrick Palka wrote:


On Tue, 5 Oct 2021, Jason Merrill wrote:


On 10/5/21 15:17, Patrick Palka wrote:

On Mon, 4 Oct 2021, Patrick Palka wrote:


When passing a function template as the argument to a
function
NTTP
inside a template, we resolve it to the right
specialization
ahead
of
time via resolve_address_of_overloaded_function, though
the
call
to
mark_used within defers odr-using it until instantiation
time
(as
usual).
But at instantiation time we end up never calling
mark_used
on
the
specialization.

This patch fixes this by adding a call to mark_used in
convert_nontype_argument_function.

PR c++/53164

gcc/cp/ChangeLog:

* pt.c (convert_nontype_argument_function): Call
mark_used.

gcc/testsuite/ChangeLog:

* g++.dg/template/non-dependent16.C: New test.
---
   gcc/cp/pt.c |
3
+++
   gcc/testsuite/g++.dg/template/non-dependent16.C |
16

   2 files changed, 19 insertions(+)
   create mode 100644
gcc/testsuite/g++.dg/template/non-dependent16.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index f950f4a21b7..5e819c9598c 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -6668,6 +6668,9 @@ convert_nontype_argument_function
(tree
type,
tree
expr,
 return NULL_TREE;
   }
   +  if (!mark_used (fn_no_ptr, complain) &&
!(complain &
tf_error))
+return NULL_TREE;
+
 linkage = decl_linkage (fn_no_ptr);
 if (cxx_dialect >= cxx11 ? linkage == lk_none :
linkage !=
lk_external)
   {
diff --git
a/gcc/testsuite/g++.dg/template/non-dependent16.C
b/gcc/testsuite/g++.dg/template/non-dependent16.C
new file mode 100644
index 000..b7dca8f6752
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/non-dependent16.C
@@ -0,0 +1,16 @@
+// PR c++/53164
+
+template
+void f(T) {
+  T::fail; // { dg-error "not a member" }
+}
+
+template
+struct A { };
+
+template
+void g() {
+  A a;
+}


I should mention that the original testcase in the PR was
slightly
different than this one in that it also performed a call
to
the
NTTP,
e.g.

template
struct A {
  static void h() {
p(0);
  }
};

template
void g() {
  A::h();
}

templated void g<0>();

and not even the call was enough to odr-use f, apparently
because
the
CALL_EXPR case of tsubst_expr calls mark_used on the
callee
only
when
it's a FUNCTION_DECL, but in this case after substitution
it's
an
ADDR_EXPR of a FUNCTION_DECL.  Fixing this by looking
through
the
ADDR_EXPR
worked, but IIUC the call isn't necessary for f to be
odr-used,
simply
using f as a template argument should be sufficient, so it
seems
the
above is better fix.


I agree that pedantically the use happens when substituting
into
the
use
of
A, but convert_nontype_argument_function seems like a
weird
place
to
implement that; it's only called again during instantiation
of
A,
when we
instantiate the injected-class-name.  If A isn't
instantiated,
e.g.
if 'a'
is a pointer to A, we again don't instantiate f.


I see, makes sense..  I'm not sure where else we can mark the
use,
then.
Since we resolve the OVERLOAD f to the FUNCTION_DECL f
ahead
of
time (during which mark_used doesn't actually instantiate
f
because
we're inside a template), at instantiation time the type A
is
already
non-dependent so tsubst_aggr_type avoids doing the work that
would
end
up calling convert_nontype_argument_function.



I see that clang doesn't reject your testcase, either, but
MSVC
and
icc
do
(even with 'a' a pointer): https://godbolt.org/z/MGE6TcMch


FWIW although Clang doesn't reject 'A a;', it does reject
'using type = A;' weirdly enough:
https://godbolt.org/z/T9qEn6bWW


Shall we just go with the other more specific approach, that
makes
sure
the CALL_EXPR case of tsubst_expr calls mark_used when the
callee
is
an
ADDR_EXPR?  Something like (bootstrapped and regtested):


Err, this approach is wrong because by stripping the ADDR_EXPR
here
we
end up checking access of the unwrapped FUNCTION_DECL again
after
substituting into the call.  So we incorrectly reject e.g.

   template
   void g() {
 P(); // error: ‘static void A::h()’ is private within
this
context
   }

   struct A {
 void f() {
   g();
 }
   private:
 static void h();
   };

since A::h isn't accessible from g.


I guess you could call mark_used directly instead of stripping the
ADDR_EXPR.


That seems to work nicely, how does the below look?  Bootstrapped
and
regtested on x86_64-pc-linux-gnu.



Or for the general probl

Re: [PATCH] c++: alias template arguments are evaluated [PR101906]

2022-06-07 Thread Jason Merrill via Gcc-patches


On 6/7/22 14:25, Patrick Palka wrote:

On Wed, 23 Mar 2022, Jason Merrill wrote:


On 3/22/22 14:31, Patrick Palka wrote:

On Tue, 22 Mar 2022, Patrick Palka wrote:


Here we're neglecting to clear cp_unevaluated_operand when substituting
into the arguments of the alias template-id skip<(T(), 0), T> with T=A,
which means cp_unevaluated_operand remains set during mark_used for
A::A() and so we never synthesize it.  Later constant evaluation for
the substituted template argument (A(), 0) (during coerce_template_parms)
fails with "'constexpr A::A()' used before its definition" since it was
never synthesized.


It occurred to me to check the case where 'skip' is a function/variable
template instead of an alias template, and unfortunately seems we run
into the same issue:

template T skip();  // Function template
// template T skip; // Variable template

template
constexpr unsigned sizeof_() {
  return sizeof(skip<(T(), 0), T>());
  // return sizeof(skip<(T(), 0), T>);
}

struct A {
  int m = -1;
};

static_assert(sizeof_() == sizeof(A), "");

: In instantiation of ‘constexpr unsigned int sizeof_() [with T =
A]’:
:14:25:   required from here
:6:34: error: ‘constexpr A::A()’ used before its definition

We can fix this similarly by clearing cp_unevaluated_operand when
substituting into the arguments of a TEMPLATE_ID_EXPR, but now I'm
worried this cp_unevaluated_operand business might not be the best
approach (despite it being consistent with what tsubst_aggr_type does).

Maybe instantiate_cx_fn_r should be responsible for making sure A::A()
gets synthesized?


Or cxx_eval_call_expression, but just as a workaround:
manifestly-constant-evaluated expressions are evaluated even in an unevaluated
operand, so I think adjusting cp_unevaluated_operand is correct.

Perhaps tsubst_template_args should use cp_evaluated,


Makes sense.


and places that use plain tsubst for substituting template args should
use it instead?


Even though tsubst already uses tsubst_template_args to substitute
TREE_VEC?  AFAICT this change would have no effect except when
args is NULL_TREE, in which case tsubst exits early but
tsubst_template_args doesn't.


Never mind, then.


Here's what I have so far, which survives bootstrap and regtest.


OK.


-- >8 --

Subject: [PATCH] c++: template-id arguments are evaluated [PR101906]

Here we're neglecting to clear cp_unevaluated_operand when substituting
into the arguments of the alias template-id skip<(T(), 0), T> with T=A,
which means cp_unevaluated_operand remains set during mark_used for
A::A() and so we never synthesize it.  Later constant evaluation for
the substituted template argument (A(), 0) (during coerce_template_parms)
fails with "'constexpr A::A()' used before its definition" since it was
never synthesized.

This doesn't happen with a class template because tsubst_aggr_type
clears cp_unevaluated_operand during substitution thereof.  But
since template arguments are generally manifestly constant-evaluated,
which in turn are evaluated even in an unevaluated operand, we
should be clearing cp_unevaluated_operand more broadly whenever
substituting any set of template arguments.  Thus this patch makes us
clear cp_unevaluated_operand during tsubst_template_args.

PR c++/101906

gcc/cp/ChangeLog:

* pt.cc (tsubst_template_args): Set cp_evaluated here.
(tsubst_aggr_type): Not here.

gcc/testsuite/ChangeLog:

* g++.dg/template/evaluated1.C: New test.
* g++.dg/template/evaluated1a.C: New test.
* g++.dg/template/evaluated1b.C: New test.
* g++.dg/template/evaluated1c.C: New test.
---
  gcc/cp/pt.cc|  6 +++---
  gcc/testsuite/g++.dg/template/evaluated1.C  | 17 +
  gcc/testsuite/g++.dg/template/evaluated1a.C | 16 
  gcc/testsuite/g++.dg/template/evaluated1b.C | 17 +
  gcc/testsuite/g++.dg/template/evaluated1c.C | 17 +
  5 files changed, 70 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/template/evaluated1.C
  create mode 100644 gcc/testsuite/g++.dg/template/evaluated1a.C
  create mode 100644 gcc/testsuite/g++.dg/template/evaluated1b.C
  create mode 100644 gcc/testsuite/g++.dg/template/evaluated1c.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index ee7d2c935cc..7fe1c7653aa 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -13475,6 +13475,9 @@ tsubst_template_args (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
if (t == error_mark_node)
  return error_mark_node;
  
+  /* In "sizeof(X)" we need to evaluate "I".  */

+  cp_evaluated ev;
+
len = TREE_VEC_LENGTH (t);
elts = XALLOCAVEC (tree, len);
  
@@ -13709,9 +13712,6 @@ tsubst_aggr_type (tree t,

  tree context;
  tree r;
  
-	  /* In "sizeof(X)" we need to evaluate "I".  */

- cp_evaluated ev;
-
  /* First, determine the context for the type we are looking
 up.  */

Re: [PATCH, V3] Optimize vec_splats of constant vec_extract for V2DI/V2DF, PR target 99293

2022-06-07 Thread Segher Boessenkool

On Tue, Jun 07, 2022 at 02:26:17PM -0500, will schmidt wrote:
> On Mon, 2022-06-06 at 20:31 -0400, Michael Meissner wrote:
> >  (define_insn "vsx_xxspltd_"
> >[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
> > -(unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand"
> > "wa")

Someone (you?) uses format=flawed.  You cannot reply to emails that
contain patches that way, it messes up everything :-(

> > -  (match_operand:QI 2 "u5bit_cint_operand" "i")]
> > -  UNSPEC_VSX_XXSPLTD))]
> > +   (vec_duplicate:VSX_D
> > +(vec_select:
> > + (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
> > + (parallel [(match_operand:QI 2 "const_0_to_1_operand"
> > "i")]]
> >"VECTOR_MEM_VSX_P (mode)"
> 
> Noting that
> (define_mode_iterator VSX_D [V2DF V2DI])
> (define_mode_attr VS_scalar [(V1TI"TI")
>(V2DF  "DF")
>(V2DI  "DI")
>(V4SF  "SF")
>(V4SI  "SI")
>(V8HI  "HI")
>(V16QI "QI")])

Yeah, the comment
;; Map the scalar mode for a vector type
is misleading, in more ways than one :-(

And the whole thing is just the same as VEC_base anyway, so it is much
better to just use that.


Segher

Re: [PATCH 2/3] Disable generating load/store vector pairs for block copies.

2022-06-07 Thread will schmidt via Gcc-patches

On Mon, 2022-06-06 at 20:55 -0400, Michael Meissner wrote:
> [PATCH 2/3] Disable generating load/store vector pairs for block copies.
> 
> If the store vector pair instruction is disabled, do not generate block
> copies that use load and store vector pair instructions.
> 
> I have built bootstrap compilers and run the regression tests on three
> different systems:
> 
> 1)Little endian power10 using the --with-cpu=power10 option.
> 
> 2)Little endian power9 using the --with-cpu=power9 option.
> 
> 3)Big endian power8 using the --with-cpu=power8 option.  On this 
> system,
>   both 64-bit and 32-bit code generation was tested.
> 
> There were no regressions in the runs.  Can I check this patch into the
> trunk?  If there are no changes needed for the backports, can I check this
> code into the active branches after a burn-in period?
> 
> 2022-06-06   Michael Meissner  
> 
> gcc/
> 
>   * config/rs6000/rs6000-string.cc (expand_block_move): If the store
>   vector pair instructions are disabled, do not generate block
>   copies using load and store vector pairs.
> ---
>  gcc/config/rs6000/rs6000-string.cc | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-string.cc 
> b/gcc/config/rs6000/rs6000-string.cc
> index 59d901ac68d..1b18e043269 100644
> --- a/gcc/config/rs6000/rs6000-string.cc
> +++ b/gcc/config/rs6000/rs6000-string.cc
> @@ -2787,14 +2787,16 @@ expand_block_move (rtx operands[], bool might_overlap)
>rtx src, dest;
>bool move_with_length = false;
> 
> -  /* Use OOmode for paired vsx load/store.  Use V2DI for single
> -  unaligned vsx load/store, for consistency with what other
> -  expansions (compare) already do, and so we can use lxvd2x on
> -  p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
> -  with length < 16 (if allowed), then gpr load/store.  */
> +  /* Use OOmode for paired vsx load/store unless the store vector pair
> +  instructions are disabled.  Use V2DI for single unaligned vsx
> +  load/store, for consistency with what other expansions (compare)
> +  already do, and so we can use lxvd2x on p8.  Order is VSX pair
> +  unaligned, VSX unaligned, Altivec, VSX with length < 16 (if allowed),
> +  then gpr load/store.  */
> 
>if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
> && TARGET_BLOCK_OPS_VECTOR_PAIR
> +   && TARGET_STORE_VECTOR_PAIR
> && bytes >= 32
> && (align >= 256 || !STRICT_ALIGNMENT))


Seems straightforward.  LGTM, 
Thanks
-Will




>   {
> -- 
> 2.35.3
> 
>

Re: [PATCH 1/3] Disable generating store vector pair.

2022-06-07 Thread will schmidt via Gcc-patches

On Mon, 2022-06-06 at 20:55 -0400, Michael Meissner wrote:
> [PATCH 1/3] Disable generating store vector pair.
> 
> Testing has revealed that the power10 has some slowdowns if the store
> vector pair instruction is generated in some cases.  This patch disables
> generating the store vector pair instructions (stxvp, pstxvp, and stxvpx)
> unless an undocumented switch is used.  It is anticipated that perhaps
> with future machines we can generate the store vector pair instruction.
> 
> This patch does a split after reload to convert a store vector pair
> instruction into a pair of store vector instructions.
> 
> We do continue to generate the load vector pair instructions (lxvp, plxvp,
> and lxvpx), since we have found that in code that heavily uses MMA, it is
> still a win to generate the load vector pair instructions.
> 
> There are two future patches planed:
> 
> 1)Disable block moves from generating load/store vector pair
>   instructions unless the the store vector pair instructions are
>   being generted.
> 
> 2)Make the built-in functions for generating store vector pair
>   always generate those instructions even if store vector pair
>   instructions are disabled.
> 
> I have built bootstrap compilers and run the regression tests on three
> different systems:
> 
> 1)Little endian power10 using the --with-cpu=power10 option.
> 
> 2)Little endian power9 using the --with-cpu=power9 option.
> 
> 3)Big endian power8 using the --with-cpu=power8 option.  On this 
> system,
>   both 64-bit and 32-bit code generation was tested.
> 
> There were no regressions in the runs except for the tests that are
> modified in patch #3 in these series of patches.  Can I check this patch
> into the trunk?  If there are no changes needed for the backports, can I
> check this code into the active branches after a burn-in period?
> 
> 2022-06-06   Michael Meissner  
> 
> gcc/
> 
>   * config/rs6000/mma.md (movoo): Disable generating store vector
>   pair instructions unless these are enabled by the user.
>   (movxo): Likewise.
>   * config/rs6000/rs6000.cc (rs6000_setup_reg_addr_masks): If store
>   vector pair instructions are disabled, do not allow vector pair
>   addresses to be indexed.
>   (rs6000_split_multireg_move): Do not split XOmode stores into two
>   store vector pair instructions unless store vector pair
>   instructions are enabled.
>   * config/rs6000/rs6000.md (isa attribute): Add stxvp attribute.
>   (enabled attribute): Disable alternative using store vector pair
>   instructions unless they are enabled.
>   * config/rs6000/rs6000.opt (-mstore-vector-pair): New option.
> 
> gcc/testsuite/
> 
>   * gcc.target/powerpc/p10-store-vector-pair-1.c: New test.
>   * gcc.target/powerpc/p10-store-vector-pair-2.c: New test.
> ---
>  gcc/config/rs6000/mma.md  | 41 ++
>  gcc/config/rs6000/rs6000.cc   |  9 +-
>  gcc/config/rs6000/rs6000.md   |  8 +-
>  gcc/config/rs6000/rs6000.opt  |  4 +
>  .../powerpc/p10-store-vector-pair-1.c | 82 +++
>  .../powerpc/p10-store-vector-pair-2.c | 81 ++
>  6 files changed, 206 insertions(+), 19 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c
> 
> diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> index a183b6a168a..9b5f243b88d 100644
> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -274,26 +274,35 @@ (define_expand "movoo"
>DONE;
>  })
> 
> +;; By default for power10, do not generate the stxvp/pstxvp/stxvpx
> +;; instructions.  Instead, split these instructions into two separate store
> +;; vector instructions.  We do always generate a lxvp/plxvp/lxvpx 
> instruction.
> +;; We leave in the support for generating stxvp/pstxvp/stxvpx in future
> +;; machines.

... and if (undocumented) STORE_VECTOR_PAIR option is indicated ?

Nothing else jumps out at me.  

Thanks
-Will

Re: [PATCH 3/3] Adjust MMA tests to account for no store vector pair.

2022-06-07 Thread will schmidt via Gcc-patches

On Mon, 2022-06-06 at 20:56 -0400, Michael Meissner wrote:
> [PATCH 3/3] Adjust MMA tests to account for no store vector pair.
> 
> In changing the default for generating the store vector pair instructions,
> I had to adjust several of the MMA tests to remove checking for these
> instructions.  Mostly I just deleted the scan-assembler lines checking for
> stxvp.  In two of the tests, I added the -mstore-vector-pair option since
> the point of the test was to check for specific cases with store vector
> pair instructions.
> 
> I have built bootstrap compilers and run the regression tests on three
> different systems:
> 
> 1)Little endian power10 using the --with-cpu=power10 option.
> 
> 2)Little endian power9 using the --with-cpu=power9 option.
> 
> 3)Big endian power8 using the --with-cpu=power8 option.  On this 
> system,
>   both 64-bit and 32-bit code generation was tested.
> 
> There were no regressions in the runs.  Can I check this patch into the
> trunk?  If there are no changes needed for the backports, can I check this
> code into the active branches after a burn-in period?
> 
> 2022-06-06   Michael Meissner  
> 
> gcc/testsuite/
> 
>   * gcc.target/powerpc/mma-builtin-1.c: Eliminate checking for store
>   vector pair instructions.
>   * gcc.target/powerpc/mma-builtin-10-pair.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-10-quit.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-2.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-3.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-4.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-5.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-6.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-7.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-9.c: Likewise.
>   * gcc.target/powerpc/mma-builtin-8.c: Add -mstore-vector-pair.
>   * gcc.target/powerpc/pr102976.c: Likewise.
> ---
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c   | 1 -
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c | 2 --
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c | 2 --
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-2.c   | 1 -
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-3.c   | 1 -
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c   | 2 --
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c   | 2 --
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-6.c   | 1 -
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-7.c   | 2 --
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-8.c   | 2 +-
>  gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c   | 2 --
>  gcc/testsuite/gcc.target/powerpc/pr102976.c| 6 +-
>  12 files changed, 6 insertions(+), 18 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c 
> b/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
> index 69ee826e1be..47b45b00403 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-1.c
> @@ -260,7 +260,6 @@ foo13b (__vector_quad *dst, __vector_quad *src, vec_t 
> *vec)
> 
>  /* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
>  /* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
> -/* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
>  /* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
>  /* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
>  /* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */




This all seems straightforward.   LGTM, thanks. 
-Will

Re: [PATCH, V3] Optimize vec_splats of constant vec_extract for V2DI/V2DF, PR target 99293

2022-06-07 Thread will schmidt via Gcc-patches

On Tue, 2022-06-07 at 15:21 -0500, Segher Boessenkool wrote:
> On Tue, Jun 07, 2022 at 02:26:17PM -0500, will schmidt wrote:
> > On Mon, 2022-06-06 at 20:31 -0400, Michael Meissner wrote:
> > >  (define_insn "vsx_xxspltd_"
> > >[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
> > > -(unspec:VSX_D [(match_operand:VSX_D 1
> > > "vsx_register_operand"
> > > "wa")
> 
> Someone (you?) uses format=flawed.  You cannot reply to emails that
> contain patches that way, it messes up everything :-(

Right..  Something on my end may be posessed, several of my emails
today have tried to go all HTML on me, and or otherwise gone
format-wonky, which I do not want.  ;-) 


> 
> > > -(match_operand:QI 2 "u5bit_cint_operand" "i")]
> > > -  UNSPEC_VSX_XXSPLTD))]
> > > + (vec_duplicate:VSX_D
> > > +  (vec_select:
> > > +   (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
> > > +   (parallel [(match_operand:QI 2 "const_0_to_1_operand"
> > > "i")]]
> > >"VECTOR_MEM_VSX_P (mode)"
> > 
> > Noting that
> > (define_mode_iterator VSX_D [V2DF V2DI])
> > (define_mode_attr VS_scalar [(V1TI  "TI")
> >  (V2DF  "DF")
> >  (V2DI  "DI")
> >  (V4SF  "SF")
> >  (V4SI  "SI")
> >  (V8HI  "HI")
> >  (V16QI "QI")])
> 
> Yeah, the comment
> ;; Map the scalar mode for a vector type
> is misleading, in more ways than one :-(
> 
> And the whole thing is just the same as VEC_base anyway, so it is
> much
> better to just use that.
> 
> 
> Segher

Re: [PATCH v3, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-07 Thread Segher Boessenkool

Hi!

On Tue, Jun 07, 2022 at 09:51:43AM +0800, HAO CHEN GUI wrote:
> On 2/6/2022 上午 5:01, Segher Boessenkool wrote:
> > On Wed, May 18, 2022 at 04:52:26PM +0800, HAO CHEN GUI wrote:
> >>const double __builtin_vsx_xsmaxdp (double, double);
> >> -XSMAXDP smaxdf3 {}
> >> +XSMAXDP fmaxdf3 {}
> >>
> >>const double __builtin_vsx_xsmindp (double, double);
> >> -XSMINDP smindf3 {}
> >> +XSMINDP fmindf3 {}
> > 
> > Are s{min,max}df3 still used after this?
> 
> Expands reduc_s[min|max]_scal are still using s[min|max]df3.

Ah, and reduction is undefined for float unless fast-math anyway.  Okay.

> Also we could implement reduc_f[min|max]_scal after committing
> this patch.

Because fmin and fmax treat NaNs as missing data.  But you have to be
careful with SNaNs here, probably just skip most of this when
-fsignaling-nans is used.  It also still needs -fno-signed-zeros btw,
but not full -ffast-math indeed.


Segher

Re: [PATCH 1/3] Disable generating store vector pair.

2022-06-07 Thread Peter Bergner via Gcc-patches

On 6/6/22 7:55 PM, Michael Meissner wrote:
> gcc/
[snip]
>   * config/rs6000/rs6000.opt (-mstore-vector-pair): New option.
[snip]
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index 4931d781c4e..79ceec6e6a5 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -624,6 +624,10 @@ mieee128-constant
>  Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
>  Generate (do not generate) code that uses the LXVKQ instruction.
>  
> +; Generate (do not generate) code that uses the store vector pair 
> instruction.
> +mstore-vector-pair
> +Target Undocumented Var(TARGET_STORE_VECTOR_PAIR) Init(0) Save
> +
>  -param=rs6000-density-pct-threshold=
>  Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) 
> Init(85) IntegerRange(0, 100) Param
>  When costing for loop vectorization, we probably need to penalize the loop 
> body

I think I mentioned this offline, but I'd prefer a negative target flag,
something like TARGET_NO_STORE_VECTOR_PAIR that defaults to off, meaning we'd
generate stxvp by default.  Then I'd like to see MASK_NO_STORE_VECTOR_PAIR
added to power10's rs6000-cpu.def definition.  That way, stxvp isn't generated
on Power10, but would be by default on any possible future cpus without
having to add a flag to those cpus rs6000-cpu.def entries.

Peter

Re: [committed] diagnostics: add SARIF output format

2022-06-07 Thread David Malcolm via Gcc-patches

On Tue, 2022-06-07 at 17:10 +0200, Thomas Schwinge wrote:
> Hi David, and Markus!
> 
> On 2022-06-02T15:46:20-0400, David Malcolm via Gcc-patches <
> gcc-patches@gcc.gnu.org> wrote:
> > This patch adds support to gcc's diagnostic subsystem for emitting
> > diagnostics in SARIF, aka the Static Analysis Results Interchange
> > Format:
> >   https://sarifweb.azurewebsites.net/
> > by extending -fdiagnostics-format= to add two new options:
> >   -fdiagnostics-format=sarif-stderr
> > and:
> >   -fdiagnostics-format=sarif-file
> > 
> > The patch targets SARIF v2.1.0
> 
> Now that's "funny": on that very day that you pushed to GCC
> "diagnostics: add SARIF output format", I'd been attending at ISC 2022
> the "Compiler-assisted Correctness Checking and Performance
> Optimization
> for HPC" (C3PO) workshop, <
> https://c3po-workshop.github.io/2022/program>,
> where in his interesting keynote "On the Benefits of Software
> Verification Competitions for HPC", Markus Schordan (in CC just for
> your
> information) had a number of generally positive :-) mentions of GCC's
> Static Analyzer -- just also did comment that it doesn't support the
> standard SARIF output format.  Seems that issue is now resolved.  :-)

Thanks for the heads-up!

> 
> He generally also covered other fundamental aspects, such as the
> difference between "sound" vs. "complete" analysis.  See
> < 
> http://www.pl-enthusiast.net/2017/10/23/what-is-soundness-in-static-analysis/
> >
> "What is soundness (in static analysis)?", or
> < 
> https://bertrandmeyer.com/2019/04/21/soundness-completeness-precision/>
> "Soundness and completeness: with precision", for example.  As I
> remember, it was stated that it's unclear which one GCC's Static
> Analyzer
> strives for; may want to clarify that, in the manual:
> , I
> suppose?

Thanks.  I'd thought I'd mentioned it in the official docs, but I think
I've only ever said it in presentations: GCC's -fanalyzer is *neither*
sound nor complete: it can fail to find real problems, and it can
report false positives.

Specifically, the analyzer's program_state classes contain various
approximations, and further, there are various places in the
representation of program state where the analyzer merges "sufficiently
similar" states at a program point, which can lead to false positives. 

Also, the analyzer tracks state in a very coarse-grained way, which can
lead to "state explosions", and so I have various heuristics where the
analysis simply gives up if it's seen too many unmergable states
(either at a given program point, or globally), leading to failing to
fully explore the software-under-test.  Also, the way I check
feasibility of constraints along execution paths is another
approximation, which can falsely reject a path as infeasible (leading
to false negatives).  Plus there are bugs...

I tend to think of -fanalyzer as a family of more expensive GCC
warnings, rather than a formal verification tool; I try to run it on
real-world C code, and try to ensure it generates helpful results with
a decent "signal:noise ratio", but it's not going to be suitable for
formally proving the absence of undefined behaviors.

> 
> 
> Plus, probably a few more things relevant for GCC's Static Analyzer,
> that
> I don't currently remember; I didn't take notes.  Maybe Markus is
> going
> to upload his presentation on , or
> would like to make it available to you in another way?

The talk sounds very interesting, so yes, I'm keen in seeing it if if
was recorded (or are there slides?).

> 
> 
> Note that I'm really just relaying information here, but other than
> general interest, I'm myself not too familiar with the details of
> Static
> Analysis.  Just thought that you would appreciate hearing about GCC's
> Static Analyzer "spotted in the wild".


Thanks!
Dave

> 
> 
> Grüße
>  Thomas
> 
> 
> > This is a JSON-based format suited for capturing the results of
> > static
> > analysis tools (like GCC's -fanalyzer), but it can also be used for
> > plain
> > GCC warnings and errors.
> > 
> > SARIF supports per-event metadata in diagnostic paths such as
> > ["acquire", "resource"] and ["release", "lock"] (specifically, the
> > threadFlowLocation "kinds" property: SARIF v2.1.0 section 3.38.8),
> > so
> > the patch extends GCC"s diagnostic_event subclass with a "struct
> > meaning"
> > with similar purpose.  The patch implements this for -fanalyzer so
> > that
> > the various state-machine-based warnings set these in the SARIF
> > output.
> > 
> > The heart of the implementation is in the new file
> > diagnostic-format-sarif.cc.  Much of the rest of the patch is
> > interface
> > classes, isolating the diagnostic subsystem (which has no knowledge
> > of
> > e.g. tree or langhook) from the "client" code in the compiler
> > proper
> > cc1 etc).
> > 
> > The patch adds a langhook for specifying the SARIF v2.1.0
> > "artifact.sourceLanguage" propert

Re: [PATCH 1/3] Disable generating store vector pair.

2022-06-07 Thread Segher Boessenkool

On Tue, Jun 07, 2022 at 04:17:04PM -0500, Peter Bergner wrote:
> I think I mentioned this offline, but I'd prefer a negative target flag,
> something like TARGET_NO_STORE_VECTOR_PAIR that defaults to off, meaning we'd
> generate stxvp by default.

NAK.  All negatives should be -mno-xxx with -mxxx the corresponding
positive.  All of them.

>  Then I'd like to see MASK_NO_STORE_VECTOR_PAIR
> added to power10's rs6000-cpu.def definition.  That way, stxvp isn't generated
> on Power10, but would be by default on any possible future cpus without
> having to add a flag to those cpus rs6000-cpu.def entries.

The p10 cpu support should simply not enable the option by default.
There is no reason to play games.


Segher

Re: [ping2][PATCH 0/8][RFC] Support BTF decl_tag and type_tag annotations

2022-06-07 Thread David Faust via Gcc-patches




On 6/2/22 19:04, Yonghong Song wrote:
> 
> 
> On 5/27/22 12:56 PM, David Faust wrote:
>>
>>
>> On 5/26/22 00:29, Yonghong Song wrote:
>>>
>>>
>>> On 5/24/22 10:04 AM, David Faust wrote:


 On 5/24/22 09:03, Yonghong Song wrote:
>
>
> On 5/24/22 8:53 AM, David Faust wrote:
>>
>>
>> On 5/24/22 04:07, Jose E. Marchesi wrote:
>>>
 On 5/11/22 11:44 AM, David Faust wrote:
>
> On 5/10/22 22:05, Yonghong Song wrote:
>>
>>
>> On 5/10/22 8:43 PM, Yonghong Song wrote:
>>>
>>>
>>> On 5/6/22 2:18 PM, David Faust wrote:


 On 5/5/22 16:00, Yonghong Song wrote:
>
>
> On 5/4/22 10:03 AM, David Faust wrote:
>>
>>
>> On 5/3/22 15:32, Joseph Myers wrote:
>>> On Mon, 2 May 2022, David Faust via Gcc-patches wrote:
>>>
 Consider the following example:

  #define __typetag1 
 __attribute__((btf_type_tag("tag1")))
  #define __typetag2 
 __attribute__((btf_type_tag("tag2")))
  #define __typetag3 
 __attribute__((btf_type_tag("tag3")))

  int __typetag1 * __typetag2 __typetag3 * g;

 The expected behavior is that 'g' is "a pointer with tags
 'tag2' and
 'tag3',
 to a pointer with tag 'tag1' to an int". i.e.:
>>>
>>> That's not a correct expectation for either GNU __attribute__ or
>>> C2x [[]]
>>> attribute syntax.  In either syntax, __typetag2 __typetag3 
>>> should
>>> apply to
>>> the type to which g points, not to g or its type, just as if
>>> you had a
>>> type qualifier there.  You'd need to put the attributes (or
>>> qualifier)
>>> after the *, not before, to make them apply to the pointer
>>> type.  See
>>> "Attribute Syntax" in the GCC manual for how the syntax is
>>> defined for
>>> GNU
>>> attributes and deduce in turn, for each subsequence of the 
>>> tokens
>>> matching
>>> the syntax for some kind of declarator, what the type for "T D1"
>>> would be
>>> as defined there and in the C standard, as deduced from the 
>>> type for
>>> "T D"
>>> for a sub-declarator D.
>>>   >> But GCC's attribute parsing produces a variable 'g'
>>> which is "a
>> pointer with
 tag 'tag1' to a pointer with tags 'tag2' and 'tag3' to an
 int", i.e.
>>>
>>> In GNU syntax, __typetag1 applies to the declaration, whereas 
>>> in C2x
>>> syntax it applies to int.  Again, if you wanted it to apply to 
>>> the
>>> pointer
>>> type it would need to go after the * not before.
>>>
>>> If you are concerned with the fine details of what construct an
>>> attribute
>>> appertains to, I recommend using C2x syntax not GNU syntax.
>>>
>>
>> Joseph, thank you! This is very helpful. My understanding of
>> the syntax
>> was not correct.
>>
>> (Actually, I made a bad mistake in paraphrasing this example 
>> from the
>> discussion of it in the series cover letter. But, the reason
>> why it is
>> incorrect is the same.)
>>
>>
>> Yonghong, is the specific ordering an expectation in BPF 
>> programs or
>> other users of the tags?
>
> This is probably a language writing issue. We are saying tags only
> apply to pointer. We probably should say it only apply to pointee.
>
> $ cat t.c
> int const *ptr;
>
> the llvm ir debuginfo:
>
> !5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 
> 64)
> !6 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !7)
> !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
>
> We could replace 'const' with a tag like below:
>
> int __attribute__((btf_type_tag("tag"))) *ptr;
>
> !5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 
> 64,
> annotations: !7)
> !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
> !7 = !{!8}
> !8

[PATCH 1/9] dwarf: add dw_get_die_parent function

2022-06-07 Thread David Faust via Gcc-patches

gcc/

* dwarf2out.cc (dw_get_die_parent): New function.
* dwarf2out.h (dw_get_die_parent): Declare it here.
---
 gcc/dwarf2out.cc | 8 
 gcc/dwarf2out.h  | 1 +
 2 files changed, 9 insertions(+)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 29f32ec6939..9c61026bb34 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -5235,6 +5235,14 @@ dw_get_die_sib (dw_die_ref die)
   return die->die_sib;
 }
 
+/* Return a reference to the parent of a given DIE.  */
+
+dw_die_ref
+dw_get_die_parent (dw_die_ref die)
+{
+  return die->die_parent;
+}
+
 /* Add an address constant attribute value to a DIE.  When using
dwarf_split_debug_info, address attributes in dies destined for the
final executable should be direct references--setting the parameter
diff --git a/gcc/dwarf2out.h b/gcc/dwarf2out.h
index 656ef94afde..e6962fb4848 100644
--- a/gcc/dwarf2out.h
+++ b/gcc/dwarf2out.h
@@ -455,6 +455,7 @@ extern dw_die_ref lookup_type_die (tree);
 
 extern dw_die_ref dw_get_die_child (dw_die_ref);
 extern dw_die_ref dw_get_die_sib (dw_die_ref);
+extern dw_die_ref dw_get_die_parent (dw_die_ref);
 extern enum dwarf_tag dw_get_die_tag (dw_die_ref);
 
 /* Data about a single source file.  */
-- 
2.36.1

[PATCH 0/9] Add debug_annotate attributes

2022-06-07 Thread David Faust via Gcc-patches

Hello,

This patch series adds support for:

- Two new C-language-level attributes that allow to associate (to "annotate" or
  to "tag") particular declarations and types with arbitrary strings. As
  explained below, this is intended to be used to, for example, characterize
  certain pointer types.

- The conveyance of that information in the DWARF output in the form of a new
  DIE: DW_TAG_GNU_annotation.

- The conveyance of that information in the BTF output in the form of two new
  kinds of BTF objects: BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG.

All of these facilities are being added to the eBPF ecosystem, and support for
them exists in some form in LLVM.

Purpose
===

1)  Addition of C-family language constructs (attributes) to specify free-text
tags on certain language elements, such as struct fields.

The purpose of these annotations is to provide additional information about
types, variables, and function parameters of interest to the kernel. A
driving use case is to tag pointer types within the linux kernel and eBPF
programs with additional semantic information, such as '__user' or '__rcu'.

For example, consider the linux kernel function do_execve with the
following declaration:

  static int do_execve(struct filename *filename,
 const char __user *const __user *__argv,
 const char __user *const __user *__envp);

Here, __user could be defined with these annotations to record semantic
information about the pointer parameters (e.g., they are user-provided) in
DWARF and BTF information. Other kernel facilites such as the eBPF verifier
can read the tags and make use of the information.

2)  Conveying the tags in the generated DWARF debug info.

The main motivation for emitting the tags in DWARF is that the Linux kernel
generates its BTF information via pahole, using DWARF as a source:

++  BTF  BTF   +--+
| pahole |---> vmlinux.btf --->| verifier |
++ +--+
^^
||
  DWARF |BTF |
||
 vmlinux  +-+
 module1.ko   | BPF program |
 module2.ko   +-+
   ...

This is because:

a)  Unlike GCC, LLVM will only generate BTF for BPF programs.

b)  GCC can generate BTF for whatever target with -gbtf, but there is no
support for linking/deduplicating BTF in the linker.

In the scenario above, the verifier needs access to the pointer tags of
both the kernel types/declarations (conveyed in the DWARF and translated
to BTF by pahole) and those of the BPF program (available directly in BTF).

Another motivation for having the tag information in DWARF, unrelated to
BPF and BTF, is that the drgn project (another DWARF consumer) also wants
to benefit from these tags in order to differentiate between different
kinds of pointers in the kernel.

3)  Conveying the tags in the generated BTF debug info.

This is easy: the main purpose of having this info in BTF is for the
compiled eBPF programs. The kernel verifier can then access the tags
of pointers used by the eBPF programs.


For more information about these tags and the motivation behind them, please
refer to the following linux kernel discussions:

  https://lore.kernel.org/bpf/20210914223004.244411-1-...@fb.com/
  https://lore.kernel.org/bpf/20211012164838.3345699-1-...@fb.com/
  https://lore.kernel.org/bpf/2022012604.1504583-1-...@fb.com/


Implementation Overview
===

To enable these annotations, two new C language attributes are added:
__attribute__((debug_annotate_decl("foo"))) and
__attribute__((debug_annotate_type("bar"))). Both attributes accept a single
arbitrary string constant argument, which will be recorded in the generated
DWARF and/or BTF debug information. They have no effect on code generation.

Note that we are not using the same attribute names as LLVM (btf_decl_tag and
btf_type_tag, respectively). While these attributes are functionally very
similar, they have grown beyond purely BTF-specific uses, so inclusion of "btf"
in the attribute name seems misleading.

DWARF support is enabled via a new DW_TAG_GNU_annotation. When generating DWARF,
declarations and types will be checked for the corresponding attributes. If
present, a DW_TAG_GNU_annotation DIE will be created as a child of the DIE for
the annotated type or declaration, one for each tag. These DIEs link the
arbitrary tag value to the item they annotate.

For example, the following variable declaration:

  #define __typetag1 __attribute__((debug_annotate_type ("typetag1")))

  #define __decltag1 __attribute__((debug_a

[PATCH 2/9] include: Add new definitions

2022-06-07 Thread David Faust via Gcc-patches

include/

* btf.h: Add BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG defines. Update
comments.
(struct btf_decl_tag): New.
* dwarf2.def: Add new DWARF extension DW_TAG_GNU_annotation.
---
 include/btf.h  | 17 +++--
 include/dwarf2.def |  4 
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/btf.h b/include/btf.h
index 78b551ced23..37deaef8b48 100644
--- a/include/btf.h
+++ b/include/btf.h
@@ -69,7 +69,7 @@ struct btf_type
 
   /* SIZE is used by INT, ENUM, STRUCT, UNION, DATASEC kinds.
  TYPE is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, FUNC,
- FUNC_PROTO and VAR kinds.  */
+ FUNC_PROTO, VAR and DECL_TAG kinds.  */
   union
   {
 uint32_t size; /* Size of the entire type, in bytes.  */
@@ -109,7 +109,9 @@ struct btf_type
 #define BTF_KIND_VAR   14  /* Variable.  */
 #define BTF_KIND_DATASEC   15  /* Section such as .bss or .data.  */
 #define BTF_KIND_FLOAT 16  /* Floating point.  */
-#define BTF_KIND_MAX   BTF_KIND_FLOAT
+#define BTF_KIND_DECL_TAG  17  /* Decl Tag.  */
+#define BTF_KIND_TYPE_TAG  18  /* Type Tag.  */
+#define BTF_KIND_MAX   BTF_KIND_TYPE_TAG
 #define NR_BTF_KINDS   (BTF_KIND_MAX + 1)
 
 /* For some BTF_KINDs, struct btf_type is immediately followed by
@@ -190,6 +192,17 @@ struct btf_var_secinfo
   uint32_t size;   /* Size (in bytes) of variable.  */
 };
 
+/* BTF_KIND_DECL_TAG is followed by a single struct btf_decl_tag, which
+   describes the tag location:
+   - If component_idx == -1, then the tag is applied to a struct, union,
+ variable or function.
+   - Otherwise it is applied to a struct/union member or function argument
+ with the given given index numbered 0..vlen-1.  */
+struct btf_decl_tag
+{
+  int32_t component_idx;
+};
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/dwarf2.def b/include/dwarf2.def
index 530c6f849f9..a1f7a47a036 100644
--- a/include/dwarf2.def
+++ b/include/dwarf2.def
@@ -174,6 +174,10 @@ DW_TAG (DW_TAG_GNU_formal_parameter_pack, 0x4108)
are properly part of DWARF 5.  */
 DW_TAG (DW_TAG_GNU_call_site, 0x4109)
 DW_TAG (DW_TAG_GNU_call_site_parameter, 0x410a)
+
+/* Extension for BTF annotations.  */
+DW_TAG (DW_TAG_GNU_annotation, 0x6000)
+
 /* Extensions for UPC.  See: http://dwarfstd.org/doc/DWARF4.pdf.  */
 DW_TAG (DW_TAG_upc_shared_type, 0x8765)
 DW_TAG (DW_TAG_upc_strict_type, 0x8766)
-- 
2.36.1

[PATCH 3/9] c-family: Add debug_annotate attribute handlers

2022-06-07 Thread David Faust via Gcc-patches

This patch adds attribute handlers for two new attributes:
"debug_annotate_decl" and "debug_annotate_type". Both attributes accept
a single string argument, and are used to add arbitrary annotations to
debug information generated for the decls or types to which they apply.

gcc/c-family/

* c-attribs.cc (c_common_attribute_table): Add new attributes
debug_annotate_decl and debug_annotate_type.
(handle_debug_annotate_decl_attribute): New.
(handle_debug_annotate_type_attribute): Likewise.
---
 gcc/c-family/c-attribs.cc | 43 +++
 1 file changed, 43 insertions(+)

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index c8d96723f4c..50e8fc1b695 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -174,6 +174,9 @@ static tree handle_signed_bool_precision_attribute (tree *, 
tree, tree, int,
bool *);
 static tree handle_retain_attribute (tree *, tree, tree, int, bool *);
 
+static tree handle_debug_annotate_decl_attribute (tree *, tree, tree, int, 
bool *);
+static tree handle_debug_annotate_type_attribute (tree *, tree, tree, int, 
bool *);
+
 /* Helper to define attribute exclusions.  */
 #define ATTR_EXCL(name, function, type, variable)  \
   { name, function, type, variable }
@@ -555,6 +558,10 @@ const struct attribute_spec c_common_attribute_table[] =
  handle_dealloc_attribute, NULL },
   { "tainted_args",  0, 0, true,  false, false, false,
  handle_tainted_args_attribute, NULL },
+  { "debug_annotate_decl",1, 1, false, false, false, false,
+ handle_debug_annotate_decl_attribute, NULL },
+  { "debug_annotate_type",1, 1, false, true, false, false,
+ handle_debug_annotate_type_attribute, NULL },
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 };
 
@@ -5868,6 +5875,42 @@ handle_tainted_args_attribute (tree *node, tree name, 
tree, int,
   return NULL_TREE;
 }
 
+/* Handle a "debug_annotate_decl" attribute; arguments as in
+   struct attribute_spec.handler.   */
+
+static tree
+handle_debug_annotate_decl_attribute (tree *, tree name, tree args, int,
+ bool *no_add_attrs)
+{
+  if (!args)
+*no_add_attrs = true;
+  else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+{
+  error ("%qE attribute requires a string", name);
+  *no_add_attrs = true;
+}
+
+  return NULL_TREE;
+}
+
+/* Handle a "debug_annotate_type" attribute; arguments as in
+   struct attribute_spec.handler.   */
+
+static tree
+handle_debug_annotate_type_attribute (tree *, tree name, tree args, int,
+ bool *no_add_attrs)
+{
+  if (!args)
+*no_add_attrs = true;
+  else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+{
+  error ("%qE attribute requires a string", name);
+  *no_add_attrs = true;
+}
+
+  return NULL_TREE;
+}
+
 /* Attempt to partially validate a single attribute ATTR as if
it were to be applied to an entity OPER.  */
 
-- 
2.36.1

[PATCH 5/9] ctfc: pass through debug annotations to BTF

2022-06-07 Thread David Faust via Gcc-patches

BTF generation currently relies on the internal CTF representation to
convert debug info from DWARF dies. This patch adds a new internal
header, "ctf-int.h", which defines CTF kinds to be used internally to
represent BTF tags which must pass through the CTF container. It also
adds a new type for representing information specific to those tags, and
a member for that type in ctf_dtdef.

This patch also updates ctf_add_reftype to accept a const char * name,
and add it for the newly added type.

gcc/

* ctf-int.h: New file.
* ctfc.cc (ctf_add_reftype): Add NAME parameter. Pass it to
ctf_add_generic call.
(ctf_add_pointer): Update ctf_add_reftype call accordingly.
* ctfc.h (ctf_add_reftype): Analogous change.
(ctf_btf_annotation): New.
(ctf_dtdef): Add member for it.
(enum ctf_dtu_d_union_enum): Likewise.
* dwarf2ctf.cc (gen_ctf_modifier_type): Update call to
ctf_add_reftype accordingly.
---
 gcc/ctf-int.h| 29 +
 gcc/ctfc.cc  | 11 +++
 gcc/ctfc.h   | 17 ++---
 gcc/dwarf2ctf.cc |  2 +-
 4 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 gcc/ctf-int.h

diff --git a/gcc/ctf-int.h b/gcc/ctf-int.h
new file mode 100644
index 000..fb5f4aacad6
--- /dev/null
+++ b/gcc/ctf-int.h
@@ -0,0 +1,29 @@
+/* ctf-int.h - GCC internal definitions used for CTF debug info.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef GCC_CTF_INT_H
+#define GCC_CTF_INT_H 1
+
+/* These CTF kinds only exist as a bridge to generating BTF types for
+   BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG. They do not correspond to any
+   representable type kind in CTF.  */
+#define CTF_K_DECL_TAG  62
+#define CTF_K_TYPE_TAG  63
+
+#endif /* GCC_CTF_INT_H */
diff --git a/gcc/ctfc.cc b/gcc/ctfc.cc
index f24e7bff948..a0404520b2a 100644
--- a/gcc/ctfc.cc
+++ b/gcc/ctfc.cc
@@ -107,6 +107,9 @@ ctf_dtu_d_union_selector (ctf_dtdef_ref ctftype)
   return CTF_DTU_D_ARGUMENTS;
 case CTF_K_SLICE:
   return CTF_DTU_D_SLICE;
+case CTF_K_DECL_TAG:
+case CTF_K_TYPE_TAG:
+  return CTF_DTU_D_BTFNOTE;
 default:
   /* The largest member as default.  */
   return CTF_DTU_D_ARRAY;
@@ -428,15 +431,15 @@ ctf_add_encoded (ctf_container_ref ctfc, uint32_t flag, 
const char * name,
 }
 
 ctf_id_t
-ctf_add_reftype (ctf_container_ref ctfc, uint32_t flag, ctf_id_t ref,
-uint32_t kind, dw_die_ref die)
+ctf_add_reftype (ctf_container_ref ctfc, uint32_t flag, const char * name,
+ctf_id_t ref, uint32_t kind, dw_die_ref die)
 {
   ctf_dtdef_ref dtd;
   ctf_id_t type;
 
   gcc_assert (ref <= CTF_MAX_TYPE);
 
-  type = ctf_add_generic (ctfc, flag, NULL, &dtd, die);
+  type = ctf_add_generic (ctfc, flag, name, &dtd, die);
   dtd->dtd_data.ctti_info = CTF_TYPE_INFO (kind, flag, 0);
   /* Caller of this API must guarantee that a CTF type with id = ref already
  exists.  This will also be validated for us at link-time.  */
@@ -548,7 +551,7 @@ ctf_id_t
 ctf_add_pointer (ctf_container_ref ctfc, uint32_t flag, ctf_id_t ref,
 dw_die_ref die)
 {
-  return (ctf_add_reftype (ctfc, flag, ref, CTF_K_POINTER, die));
+  return (ctf_add_reftype (ctfc, flag, NULL, ref, CTF_K_POINTER, die));
 }
 
 ctf_id_t
diff --git a/gcc/ctfc.h b/gcc/ctfc.h
index 001e544ef08..fab18f024d7 100644
--- a/gcc/ctfc.h
+++ b/gcc/ctfc.h
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "dwarf2ctf.h"
 #include "ctf.h"
 #include "btf.h"
+#include "ctf-int.h"
 
 /* Invalid CTF type ID definition.  */
 
@@ -151,6 +152,13 @@ typedef struct GTY (()) ctf_func_arg
 
 #define ctf_farg_list_next(elem) ((ctf_func_arg_t *)((elem)->farg_next))
 
+/* BTF support: a BTF type tag or decl tag.  */
+
+typedef struct GTY (()) ctf_btf_annotation
+{
+  uint32_t component_idx;
+} ctf_btf_annotation_t;
+
 /* Type definition for CTF generation.  */
 
 struct GTY ((for_user)) ctf_dtdef
@@ -173,6 +181,8 @@ struct GTY ((for_user)) ctf_dtdef
 ctf_func_arg_t * GTY ((tag ("CTF_DTU_D_ARGUMENTS"))) dtu_argv;
 /* slice.  */
 ctf_sliceinfo_t GTY ((tag ("CTF_DTU_D_SLICE"))) dtu_slice;
+/* btf annotation.  */
+ctf_btf_annotation_t GTY ((tag ("CTF_DTU_D_BTFNOTE"))) dtu_btfnote;
   } dtd_u;
 };
 
@@ -212,7 +222,8 @@ enum ctf_dtu_d_union

[PATCH 7/9] btf: output decl_tag and type_tag records

2022-06-07 Thread David Faust via Gcc-patches

This patch updates btfout.cc to be aware of debug annotations, convert
them to BTF_KIND_DECL_TAG and BTF_KIND_TYPE_TAG records, and output them
appropriately.

gcc/

* btfout.cc (get_btf_kind): Handle TYPE_TAG and DECL_TAG kinds.
(btf_calc_num_vbytes): Likewise.
(btf_asm_type): Likewise.
(output_asm_btf_vlen_bytes): Likewise.
---
 gcc/btfout.cc | 28 
 1 file changed, 28 insertions(+)

diff --git a/gcc/btfout.cc b/gcc/btfout.cc
index 31af50521da..f291cd925be 100644
--- a/gcc/btfout.cc
+++ b/gcc/btfout.cc
@@ -136,6 +136,8 @@ get_btf_kind (uint32_t ctf_kind)
 case CTF_K_VOLATILE: return BTF_KIND_VOLATILE;
 case CTF_K_CONST:return BTF_KIND_CONST;
 case CTF_K_RESTRICT: return BTF_KIND_RESTRICT;
+case CTF_K_TYPE_TAG: return BTF_KIND_TYPE_TAG;
+case CTF_K_DECL_TAG: return BTF_KIND_DECL_TAG;
 default:;
 }
   return BTF_KIND_UNKN;
@@ -201,6 +203,7 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd)
 case BTF_KIND_CONST:
 case BTF_KIND_RESTRICT:
 case BTF_KIND_FUNC:
+case BTF_KIND_TYPE_TAG:
 /* These kinds have no vlen data.  */
   break;
 
@@ -238,6 +241,10 @@ btf_calc_num_vbytes (ctf_dtdef_ref dtd)
   vlen_bytes += vlen * sizeof (struct btf_var_secinfo);
   break;
 
+case BTF_KIND_DECL_TAG:
+  vlen_bytes += sizeof (struct btf_decl_tag);
+  break;
+
 default:
   break;
 }
@@ -636,6 +643,22 @@ btf_asm_type (ctf_container_ref ctfc, ctf_dtdef_ref dtd)
   dw2_asm_output_data (4, dtd->dtd_data.ctti_size, "btt_size: %uB",
   dtd->dtd_data.ctti_size);
   return;
+case BTF_KIND_DECL_TAG:
+  {
+   /* A decl tag might refer to (be the child DIE of) a variable. Try to
+  lookup the parent DIE's CTF variable, and if it exists point to the
+  corresponding BTF variable. This is an odd construction - we have a
+  'type' which refers to a variable, rather than the reverse.  */
+   dw_die_ref parent = dw_get_die_parent (dtd->dtd_key);
+   ctf_dvdef_ref dvd = ctf_dvd_lookup (ctfc, parent);
+   if (dvd)
+ {
+   unsigned int var_id =
+ *(btf_var_ids->get (dvd)) + num_types_added + 1;
+   dw2_asm_output_data (4, var_id, "btt_type");
+   return;
+ }
+  }
 default:
   break;
 }
@@ -949,6 +972,11 @@ output_asm_btf_vlen_bytes (ctf_container_ref ctfc, 
ctf_dtdef_ref dtd)
 at this point.  */
   gcc_unreachable ();
 
+case BTF_KIND_DECL_TAG:
+  dw2_asm_output_data (4, dtd->dtd_u.dtu_btfnote.component_idx,
+  "decltag_compidx");
+  break;
+
 default:
   /* All other BTF type kinds have no variable length data.  */
   break;
-- 
2.36.1

[PATCH 4/9] dwarf: generate annotation DIEs

2022-06-07 Thread David Faust via Gcc-patches

The "debug_annotate_decl" and "debug_annotate_type" attributes are
handled by constructing DW_TAG_GNU_annotation DIEs. These DIEs are
children of the declarations or types which they annotate, and convey
the information via a string constant.

gcc/

* dwarf2out.cc (gen_decl_annotation_dies): New function.
(gen_type_annotation_dies): Likewise.
(modified_type_die): Call them here, if appropriate.
(gen_formal_parameter_die): Likewise.
(gen_typedef_die): Likewise.
(gen_type_die): Likewise.
(gen_decl_die): Likewise.
---
 gcc/dwarf2out.cc | 94 
 1 file changed, 94 insertions(+)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 9c61026bb34..aff9f72bd55 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -13611,6 +13611,78 @@ long_double_as_float128 (tree type)
   return NULL_TREE;
 }
 
+/* Given a tree T, which may be a decl or a type, process any
+   "debug_annotate_decl" attributes on T, provided in ATTR. Construct
+   DW_TAG_GNU_annotation DIEs appropriately as children of TARGET, usually
+   the DIE for T.  */
+
+static void
+gen_decl_annotation_dies (tree t, dw_die_ref target)
+{
+  dw_die_ref die;
+  tree attr;
+
+  if (t == NULL_TREE || !target)
+return;
+
+  if (TYPE_P (t))
+attr = lookup_attribute ("debug_annotate_decl", TYPE_ATTRIBUTES (t));
+  else if (DECL_P (t))
+attr = lookup_attribute ("debug_annotate_decl", DECL_ATTRIBUTES (t));
+  else
+/* This is an error.  */
+gcc_unreachable ();
+
+  while (attr != NULL_TREE)
+{
+  die = new_die (DW_TAG_GNU_annotation, target, t);
+  add_name_attribute (die, IDENTIFIER_POINTER (get_attribute_name (attr)));
+  add_AT_string (die, DW_AT_const_value,
+TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr;
+  attr = TREE_CHAIN (attr);
+}
+
+  /* Strip the decl tag attribute to avoid creating multiple copies if we hit
+ this tree node again in some recursive call.  */
+  if (TYPE_P (t))
+TYPE_ATTRIBUTES (t) =
+  remove_attribute ("debug_annotate_decl", TYPE_ATTRIBUTES (t));
+  else if (DECL_P (t))
+DECL_ATTRIBUTES (t) =
+  remove_attribute ("debug_annotate_decl", DECL_ATTRIBUTES (t));
+}
+
+/* Given a tree TYPE, process any "debug_annotate_type" attributes on
+   TYPE. Construct DW_TAG_GNU_annotation DIEs appropriately as children of
+   TARGET, usually the DIE for TYPE.  */
+
+static void
+gen_type_annotation_dies (tree type, dw_die_ref target)
+{
+  dw_die_ref die;
+  tree attr;
+
+  if (type == NULL_TREE || !target)
+return;
+
+  gcc_assert (TYPE_P (type));
+
+  attr = lookup_attribute ("debug_annotate_type", TYPE_ATTRIBUTES (type));
+  while (attr != NULL_TREE)
+{
+  die = new_die (DW_TAG_GNU_annotation, target, type);
+  add_name_attribute (die, IDENTIFIER_POINTER (get_attribute_name (attr)));
+  add_AT_string (die, DW_AT_const_value,
+TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr;
+  attr = TREE_CHAIN (attr);
+}
+
+  /* Strip the type tag attribute to avoid creating multiple copies if we hit
+ this type again in some recursive call.  */
+  TYPE_ATTRIBUTES (type) =
+remove_attribute ("debug_annotate_type", TYPE_ATTRIBUTES (type));
+}
+
 /* Given a pointer to an arbitrary ..._TYPE tree node, return a debugging
entry that chains the modifiers specified by CV_QUALS in front of the
given type.  REVERSE is true if the type is to be interpreted in the
@@ -14009,6 +14081,9 @@ modified_type_die (tree type, int cv_quals, bool 
reverse,
   if (TYPE_ARTIFICIAL (type))
 add_AT_flag (mod_type_die, DW_AT_artificial, 1);
 
+  /* Generate any annotation DIEs on this type.  */
+  gen_type_annotation_dies (type, mod_type_die);
+
   return mod_type_die;
 }
 
@@ -23002,6 +23077,9 @@ gen_formal_parameter_die (tree node, tree origin, bool 
emit_name_p,
   gcc_unreachable ();
 }
 
+  /* Generate any annotation DIEs for this decl.  */
+  gen_decl_annotation_dies (node, parm_die);
+
   return parm_die;
 }
 
@@ -26076,6 +26154,9 @@ gen_typedef_die (tree decl, dw_die_ref context_die)
 
   if (get_AT (type_die, DW_AT_name))
 add_pubtype (decl, type_die);
+
+  /* Generate any annotation DIEs for the typedef.  */
+  gen_decl_annotation_dies (decl, type_die);
 }
 
 /* Generate a DIE for a struct, class, enum or union type.  */
@@ -26389,6 +26470,16 @@ gen_type_die (tree type, dw_die_ref context_die)
  if (die)
check_die (die);
}
+
+  /* Generate any annotation DIEs on the type.  */
+  dw_die_ref die = lookup_type_die (type);
+  if (die)
+   {
+ gen_type_annotation_dies (type, die);
+
+ /* "decl" annotations may also be attached to a type.  */
+ gen_decl_annotation_dies (type, die);
+   }
 }
 }
 
@@ -27145,6 +27236,9 @@ gen_decl_die (tree decl, tree origin, struct 
vlr_context *ctx,
   break;
 }
 
+  /* Generate any annotation D

[PATCH 6/9] dwarf2ctf: convert annotation DIEs to CTF types

2022-06-07 Thread David Faust via Gcc-patches

This patch makes the DWARF-to-CTF conversion process aware of the new
DW_TAG_GNU annotation DIEs. The DIEs are converted to an internal-only
CTF representation as appropriate and added to the compilation unit CTF
container.

gcc/

* dwarf2ctf.cc (handle_debug_annotations): New function.
(gen_ctf_sou_type): Call it here, if appropriate. Don't try to
create member types for children that are not DW_TAG_member.
(gen_ctf_function_type): Call handle_debug_annotations if
appropriate.
(gen_ctf_variable): Likewise.
(gen_ctf_function): Likewise.
(gen_ctf_type): Likewise.
---
 gcc/dwarf2ctf.cc | 112 ++-
 1 file changed, 111 insertions(+), 1 deletion(-)

diff --git a/gcc/dwarf2ctf.cc b/gcc/dwarf2ctf.cc
index 393aa92d71d..65714e5d3b9 100644
--- a/gcc/dwarf2ctf.cc
+++ b/gcc/dwarf2ctf.cc
@@ -32,6 +32,12 @@ along with GCC; see the file COPYING3.  If not see
 static ctf_id_t
 gen_ctf_type (ctf_container_ref, dw_die_ref);
 
+static void
+gen_ctf_variable (ctf_container_ref, dw_die_ref);
+
+static void
+handle_debug_annotations (ctf_container_ref, dw_die_ref, ctf_id_t, int);
+
 /* All the DIE structures we handle come from the DWARF information
generated by GCC.  However, there are three situations where we need
to create our own created DIE structures because GCC doesn't
@@ -547,6 +553,7 @@ gen_ctf_sou_type (ctf_container_ref ctfc, dw_die_ref sou, 
uint32_t kind)
   /* Now process the struct members.  */
   {
 dw_die_ref c;
+int idx = 0;
 
 c = dw_get_die_child (sou);
 if (c)
@@ -559,6 +566,12 @@ gen_ctf_sou_type (ctf_container_ref ctfc, dw_die_ref sou, 
uint32_t kind)
 
  c = dw_get_die_sib (c);
 
+ if (dw_get_die_tag (c) != DW_TAG_member)
+   continue;
+
+ if (c == dw_get_die_child (sou))
+   idx = 0;
+
  field_name = get_AT_string (c, DW_AT_name);
  field_type = ctf_get_AT_type (c);
  field_location = ctf_get_AT_data_member_location (c);
@@ -626,6 +639,12 @@ gen_ctf_sou_type (ctf_container_ref ctfc, dw_die_ref sou, 
uint32_t kind)
 field_name,
 field_type_id,
 field_location);
+
+ /* Handle BTF tags on the member.  */
+ if (btf_debuginfo_p ())
+   handle_debug_annotations (ctfc, c, sou_type_id, idx);
+
+ idx++;
}
   while (c != dw_get_die_child (sou));
   }
@@ -716,6 +735,9 @@ gen_ctf_function_type (ctf_container_ref ctfc, dw_die_ref 
function,
  arg_type = gen_ctf_type (ctfc, ctf_get_AT_type (c));
  /* Add the argument to the existing CTF function type.  */
  ctf_add_function_arg (ctfc, function, arg_name, arg_type);
+
+ if (btf_debuginfo_p ())
+   handle_debug_annotations (ctfc, c, function_type_id, i - 1);
}
  else
/* This is a local variable.  Ignore.  */
@@ -828,6 +850,10 @@ gen_ctf_variable (ctf_container_ref ctfc, dw_die_ref die)
   /* Skip updating the number of global objects at this time.  This is updated
  later after pre-processing as some CTF variable records although
  generated now, will not be emitted later.  [PR105089].  */
+
+  /* Handle any BTF tags on the variable.  */
+  if (btf_debuginfo_p ())
+handle_debug_annotations (ctfc, die, CTF_NULL_TYPEID, -1);
 }
 
 /* Add a CTF function record for the given input DWARF DIE.  */
@@ -845,8 +871,12 @@ gen_ctf_function (ctf_container_ref ctfc, dw_die_ref die)
  counter.  Note that DWARF encodes function types in both
  DW_TAG_subroutine_type and DW_TAG_subprogram in exactly the same
  way.  */
-  (void) gen_ctf_function_type (ctfc, die, true /* from_global_func */);
+  function_type_id = gen_ctf_function_type (ctfc, die, true /* 
from_global_func */);
   ctfc->ctfc_num_global_funcs += 1;
+
+  /* Handle any BTF tags on the function itself.  */
+  if (btf_debuginfo_p ())
+handle_debug_annotations (ctfc, die, function_type_id, -1);
 }
 
 /* Add CTF type record(s) for the given input DWARF DIE and return its type id.
@@ -923,6 +953,10 @@ gen_ctf_type (ctf_container_ref ctfc, dw_die_ref die)
   break;
 }
 
+  /* Handle any BTF tags on the type.  */
+  if (btf_debuginfo_p () && !unrecog_die)
+handle_debug_annotations (ctfc, die, type_id, -1);
+
   /* For all types unrepresented in CTF, use an explicit CTF type of kind
  CTF_K_UNKNOWN.  */
   if ((type_id == CTF_NULL_TYPEID) && (!unrecog_die))
@@ -931,6 +965,82 @@ gen_ctf_type (ctf_container_ref ctfc, dw_die_ref die)
   return type_id;
 }
 
+/* BTF support. Handle any annotations attached to a given DIE, and generate
+   intermediate CTF types for them. BTF tags are inserted into the type chain
+   at this point. The return value is the CTF type ID of the last type tag
+   created (for type chaining), or the same as the argument TYPE_ID if there 
are
+

[PATCH 9/9] testsuite: add debug annotation tests

2022-06-07 Thread David Faust via Gcc-patches

This patch adds tests for debug annotations, in BTF and in DWARF.

gcc/testsuite/

* gcc.dg/debug/btf/btf-decltag-func.c: New test.
* gcc.dg/debug/btf/btf-decltag-sou.c: Likewise.
* gcc.dg/debug/btf/btf-decltag-typedef.c: Likewise.
* gcc.dg/debug/btf/btf-typetag-1.c: Likewise.
* gcc.dg/debug/dwarf2/annotation-1.c: Likewise.
* gcc.dg/debug/dwarf2/annotation-2.c: Likewise.
* gcc.dg/debug/dwarf2/annotation-3.c: Likewise.
* gcc.dg/debug/dwarf2/annotation-4.c: Likewise.
---
 .../gcc.dg/debug/btf/btf-decltag-func.c   | 18 ++
 .../gcc.dg/debug/btf/btf-decltag-sou.c| 34 +++
 .../gcc.dg/debug/btf/btf-decltag-typedef.c| 15 
 .../gcc.dg/debug/btf/btf-typetag-1.c  | 20 +++
 .../gcc.dg/debug/dwarf2/annotation-1.c| 20 +++
 .../gcc.dg/debug/dwarf2/annotation-2.c| 17 ++
 .../gcc.dg/debug/dwarf2/annotation-3.c| 20 +++
 .../gcc.dg/debug/dwarf2/annotation-4.c| 34 +++
 8 files changed, 178 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decltag-func.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decltag-sou.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-decltag-typedef.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/btf/btf-typetag-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/annotation-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/annotation-2.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/annotation-3.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/annotation-4.c

diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-func.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-func.c
new file mode 100644
index 000..b2d6820cf23
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-func.c
@@ -0,0 +1,18 @@
+
+/* { dg-do compile )  */
+/* { dg-options "-O0 -gbtf -dA" } */
+
+/* { dg-final { scan-assembler-times "\[\t \]0x1100\[\t 
\]+\[^\n\]*btt_info" 4 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x\[\t 
\]+\[^\n\]*decltag_compidx" 3 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x1\[\t 
\]+\[^\n\]*decltag_compidx" 1 } } */
+
+#define __tag1 __attribute__((debug_annotate_decl("decl-tag-1")))
+#define __tag2 __attribute__((debug_annotate_decl("decl-tag-2")))
+#define __tag3 __attribute__((debug_annotate_decl("decl-tag-3")))
+
+extern int bar (int __tag1, int __tag2) __tag3;
+
+int __tag1 __tag2 foo (int arg1, int *arg2 __tag2)
+  {
+return bar (arg1 + 1, *arg2 + 2);
+  }
diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-sou.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-sou.c
new file mode 100644
index 000..bb125b53ce7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-sou.c
@@ -0,0 +1,34 @@
+
+/* { dg-do compile )  */
+/* { dg-options "-O0 -gbtf -dA" } */
+
+/* { dg-final { scan-assembler-times "\[\t \]0x1100\[\t 
\]+\[^\n\]*btt_info" 16 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0\[\t \]+\[^\n\]*decltag_compidx" 
2 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x1\[\t 
\]+\[^\n\]*decltag_compidx" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x2\[\t 
\]+\[^\n\]*decltag_compidx" 3 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x3\[\t 
\]+\[^\n\]*decltag_compidx" 3 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x4\[\t 
\]+\[^\n\]*decltag_compidx" 1 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x\[\t 
\]+\[^\n\]*decltag_compidx" 6 } } */
+
+#define __tag1 __attribute__((debug_annotate_decl("decl-tag-1")))
+#define __tag2 __attribute__((debug_annotate_decl("decl-tag-2")))
+#define __tag3 __attribute__((debug_annotate_decl("decl-tag-3")))
+
+struct t {
+  int a;
+  long b __tag3;
+  char c __tag2 __tag3;
+} __tag1 __tag2;
+
+struct t my_t __tag1 __tag3;
+
+
+union u {
+  char one __tag1 __tag2;
+  short two;
+  int three __tag1;
+  long four __tag1 __tag2 __tag3;
+  long long five __tag2;
+} __tag3;
+
+union u my_u __tag2;
diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-typedef.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-typedef.c
new file mode 100644
index 000..6a44aaf9623
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/debug/btf/btf-decltag-typedef.c
@@ -0,0 +1,15 @@
+/* { dg-do compile )  */
+/* { dg-options "-O0 -gbtf -dA" } */
+
+/* { dg-final { scan-assembler-times "\[\t \]0x1100\[\t 
\]+\[^\n\]*btt_info" 3 } } */
+/* { dg-final { scan-assembler-times "\[\t \]0x\[\t 
\]+\[^\n\]*decltag_compidx" 3 } } */
+
+#define __tag1 __attribute__((debug_annotate_decl("decl-tag-1")))
+#define __tag2 __attribute__((debug_annotate_decl("decl-tag-2")))
+#define __tag3 __attribute__((debug_annotate_decl("decl-tag-3")))
+
+struct s { int a; } __tag1;
+
+typedef struct s * sptr __tag2;
+
+sptr my_sptr __tag3;
diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-typetag-1.c 
b/gcc/testsuite/gcc.dg/debug/btf/btf-typetag-

[PATCH 8/9] doc: document new attributes

2022-06-07 Thread David Faust via Gcc-patches

gcc/

* doc/extend.texi (Common Function Attributes): Document
debug_annotate_decl attribute.
(Common Variable Attributes): Likewise.
(Common Type Attributes): Likewise. Also document
debug_annotate_type attribute.
---
 gcc/doc/extend.texi | 106 
 1 file changed, 106 insertions(+)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index a2e2a303ff1..a4c114f0e81 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -2931,6 +2931,30 @@ extern __attribute__ ((alloc_size (1), malloc, nothrow))
 StrongAlias (allocate, alloc);
 @end smallexample
 
+@item debug_annotate_decl (@var{annotation})
+@cindex @code{debug_annotate_decl} function attribute
+The @code{debug_annotate_decl} attribute is used to add arbitrary
+string annotations to the debugging information produced for a given
+declaration. The attribute accepts a single string argument, and may be
+specified multiple times for a single declaration. The behavior is
+to record the string argument in debug information generated for the
+declaration. Currently, DWARF and BTF debug information are supported.
+There is no effect on code generation; the attribute has no effect at
+all if neither DWARF nor BTF are output.
+
+@smallexample
+int foo (int a, int b) __attribute__((debug_annotate_decl ("my_tag")));
+@end smallexample
+
+@noindent
+results in a DW_TAG_GNU_annotation DIE associating the string ``my_tag''
+to the function ``foo'', and/or a BTF_KIND_DECL_TAG BTF record to the
+same effect.
+
+The @code{debug_annotate_decl} attribute can also be used for
+variables and types (@pxref{Common Variable Attributes},
+@pxref{Common Type Attributes}.)
+
 @item deprecated
 @itemx deprecated (@var{msg})
 @cindex @code{deprecated} function attribute
@@ -7510,6 +7534,42 @@ but not attributes that affect a symbol's linkage or 
visibility such as
 attribute is also not copied.  @xref{Common Function Attributes}.
 @xref{Common Type Attributes}.
 
+@item debug_annotate_decl (@var{annotation})
+@cindex @code{debug_annotate_decl} variable attribute
+The @code{debug_annotate_decl} attribute is used to add arbitrary
+string annotations to the debugging information produced for a given
+declaration. The attribute accepts a single string argument, and may be
+specified multiple times for a single declaration. The behavior is
+to record the string argument in debug information generated for the
+declaration. Currently, DWARF and BTF debug information are supported.
+There is no effect on code generation; the attribute has no effect at
+all if neither DWARF nor BTF are output.
+
+@smallexample
+int my_var __attribute__((debug_annotate_decl ("my_tag")))
+@end smallexample
+
+@noindent
+results in a DW_TAG_GNU_annotation DIE associating the string ``my_tag''
+to the ``my_var'', and/or a BTF_KIND_DECL_TAG BTF record to the same
+effect.
+
+Annotations can be specified for declarations other than variables,
+such as struct fields. For example:
+
+@smallexample
+struct foo @{
+  int * x __attribute__ ((debug_annotate_decl ("my_tag")));
+@};
+@end smallexample
+has similar results, producing debug info which associates the string
+``my_tag'' to the struct field ``x''.
+
+@noindent
+The @code{debug_annotate_decl} attribute can also be used for
+functions and types (@pxref{Common Function Attributes},
+@pxref{Common Type Attributes}.)
+
 @item deprecated
 @itemx deprecated (@var{msg})
 @cindex @code{deprecated} variable attribute
@@ -8593,6 +8653,52 @@ A @{ /* @r{@dots{}} */ @};
 struct __attribute__ ((copy ( (struct A *)0)) B @{ /* @r{@dots{}} */ @};
 @end smallexample
 
+@item debug_annotate_decl (@var{annotation})
+@cindex @code{debug_annotate_decl} type attribute
+The @code{debug_annotate_decl} attribute is used to add arbitrary
+string annotations to the debugging information produced for a given
+type declaration. The attribute accepts a single string argument, and
+may be specified multiple times for a type declaration. The behavior
+is to record the string argument in the debug information generated
+for the declaration. Currently, DWARF and BTF debug information are
+supported. There is no effect on code generation; the attribute has no
+effect at all if neither DWARF nor BTF are output.
+
+@smallexample
+struct t @{
+/* @r{@dots{}} */
+@} __attribute__((debug_annotate_decl ("my_tag")));
+@end smallexample
+
+@noindent
+results in a DW_TAG_GNU_annotation DIE associating the string
+``my_tag'' to the ``struct t'', and/or a BTF_KIND_DECL_TAG BTF record
+to the same effect.
+
+The @code{debug_annotate_decl} attribute can also be used for
+variables and functions (@pxref{Common Variable Attributes},
+@pxref{Common Function Attributes}.)
+
+@item debug_annotate_type (@var{annotation})
+@cindex @code{debug_annotate_type} type attribute
+The @code{debug_annotate_type} attribute is used to add arbitrary
+string annotations to the debugging information produced for a given
+type. The attribute

Re: [PATCH 1/3] Disable generating store vector pair.

2022-06-07 Thread Michael Meissner via Gcc-patches

On Tue, Jun 07, 2022 at 04:17:04PM -0500, Peter Bergner wrote:
> On 6/6/22 7:55 PM, Michael Meissner wrote:
> > gcc/
> [snip]
> > * config/rs6000/rs6000.opt (-mstore-vector-pair): New option.
> [snip]
> > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> > index 4931d781c4e..79ceec6e6a5 100644
> > --- a/gcc/config/rs6000/rs6000.opt
> > +++ b/gcc/config/rs6000/rs6000.opt
> > @@ -624,6 +624,10 @@ mieee128-constant
> >  Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
> >  Generate (do not generate) code that uses the LXVKQ instruction.
> >  
> > +; Generate (do not generate) code that uses the store vector pair 
> > instruction.
> > +mstore-vector-pair
> > +Target Undocumented Var(TARGET_STORE_VECTOR_PAIR) Init(0) Save
> > +
> >  -param=rs6000-density-pct-threshold=
> >  Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) 
> > Init(85) IntegerRange(0, 100) Param
> >  When costing for loop vectorization, we probably need to penalize the loop 
> > body
> 
> I think I mentioned this offline, but I'd prefer a negative target flag,
> something like TARGET_NO_STORE_VECTOR_PAIR that defaults to off, meaning we'd
> generate stxvp by default.  Then I'd like to see MASK_NO_STORE_VECTOR_PAIR
> added to power10's rs6000-cpu.def definition.  That way, stxvp isn't generated
> on Power10, but would be by default on any possible future cpus without
> having to add a flag to those cpus rs6000-cpu.def entries.

I don't much care when the option is spelled, but I'm happy to go with whatever
name people want.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com

Re: [PATCH 1/3] Disable generating store vector pair.

2022-06-07 Thread Peter Bergner via Gcc-patches

On 6/7/22 4:24 PM, Segher Boessenkool wrote:
> On Tue, Jun 07, 2022 at 04:17:04PM -0500, Peter Bergner wrote:
>> I think I mentioned this offline, but I'd prefer a negative target flag,
>> something like TARGET_NO_STORE_VECTOR_PAIR that defaults to off, meaning we'd
>> generate stxvp by default.
> 
> NAK.  All negatives should be -mno-xxx with -mxxx the corresponding
> positive.  All of them.

That's not what I was asking for.  I totally agree that -mno-store-vector-pair
should disable generating stxvp and that -mstore-vector-pair should enable
generating it.  What I asked for was that the internal flag we use to enable
and disable it should be a negative flag, where TARGET_NO_STORE_VECTOR_PAIR is
true when we use -mno-store-vector-pair and false when using 
-mstore-vector-pair.
That way we can add that flag to power10's rs6000-cpu.def entry and then we're
done.  What I don't want to have to do is that if/when power87 is released, we
still have to add TARGET_STORE_VECTOR_PAIR its rs6000-cpu.def entry just to
get stxvp insns generated.  That adds a cost to every cpu after power10 since
we'd have to remember to add that flag to every follow-on cpu.

Peter

[PATCH V3] RISC-V:Fix a bug that is the CMO builtins are missing parameter

2022-06-07 Thread shiyulong

From: yulong 

We changed builtins format about zicbom and zicboz subextensions and modified 
test cases.
diff with the previous version:
1.We modified the FUNCTION_TYPE from RISCV_VOID_FTYPE_SI/DI to 
RISCV_VOID_FTYPE_VOID_PTR.
2.We added a new RISCV_ATYPE_VOID_PTR in riscv-builtins.cc and a new 
DEF_RISCV_FTYPE (1, (VOID, VOID_PTR)) in riscv-ftypes.def.
3.We deleted DEF_RISCV_FTYPE (1, (VOID, SI/DI)).
4.We modified the input parameters of the test cases.

Thanks, Simon and Kito.

gcc/ChangeLog:

* config/riscv/riscv-builtins.cc (RISCV_ATYPE_VOID_PTR): New.
* config/riscv/riscv-cmo.def (RISCV_BUILTIN): changed the FUNCTION_TYPE 
of RISCV_BUILTIN.
* config/riscv/riscv-ftypes.def (0): New.
(1):

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-zicbom-1.c: modified the input parameters.
* gcc.target/riscv/cmo-zicbom-2.c: modified the input parameters.
* gcc.target/riscv/cmo-zicboz-1.c: modified the input parameters.
* gcc.target/riscv/cmo-zicboz-2.c: modified the input parameters.

---
 gcc/config/riscv/riscv-builtins.cc|  1 +
 gcc/config/riscv/riscv-cmo.def| 16 ++--
 gcc/config/riscv/riscv-ftypes.def |  3 +--
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c | 26 ---
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c | 26 ---
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c | 10 ---
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c | 10 ---
 7 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/gcc/config/riscv/riscv-builtins.cc 
b/gcc/config/riscv/riscv-builtins.cc
index 795132a0c16..1218fdfc67d 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -133,6 +133,7 @@ AVAIL (prefetchi64, TARGET_ZICBOP && TARGET_64BIT)
 #define RISCV_ATYPE_USI unsigned_intSI_type_node
 #define RISCV_ATYPE_SI intSI_type_node
 #define RISCV_ATYPE_DI intDI_type_node
+#define RISCV_ATYPE_VOID_PTR ptr_type_node
 
 /* RISCV_FTYPE_ATYPESN takes N RISCV_FTYPES-like type codes and lists
their associated RISCV_ATYPEs.  */
diff --git a/gcc/config/riscv/riscv-cmo.def b/gcc/config/riscv/riscv-cmo.def
index b30ecf96ec1..9fe5094ce1a 100644
--- a/gcc/config/riscv/riscv-cmo.def
+++ b/gcc/config/riscv/riscv-cmo.def
@@ -1,16 +1,16 @@
 // zicbom
-RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, clean32),
-RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, clean64),
+RISCV_BUILTIN (clean_si, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, clean32),
+RISCV_BUILTIN (clean_di, "zicbom_cbo_clean", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, clean64),
 
-RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, flush32),
-RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, flush64),
+RISCV_BUILTIN (flush_si, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, flush32),
+RISCV_BUILTIN (flush_di, "zicbom_cbo_flush", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, flush64),
 
-RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, inval32),
-RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, inval64),
+RISCV_BUILTIN (inval_si, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, inval32),
+RISCV_BUILTIN (inval_di, "zicbom_cbo_inval", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, inval64),
 
 // zicboz
-RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE, zero32),
-RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT, 
RISCV_DI_FTYPE, zero64),
+RISCV_BUILTIN (zero_si, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, zero32),
+RISCV_BUILTIN (zero_di, "zicboz_cbo_zero", RISCV_BUILTIN_DIRECT_NO_TARGET, 
RISCV_VOID_FTYPE_VOID_PTR, zero64),
 
 // zicbop
 RISCV_BUILTIN (prefetchi_si, "zicbop_cbo_prefetchi", RISCV_BUILTIN_DIRECT, 
RISCV_SI_FTYPE_SI, prefetchi32),
diff --git a/gcc/config/riscv/riscv-ftypes.def 
b/gcc/config/riscv/riscv-ftypes.def
index 62421292ce7..c2b45c63ea1 100644
--- a/gcc/config/riscv/riscv-ftypes.def
+++ b/gcc/config/riscv/riscv-ftypes.def
@@ -28,7 +28,6 @@ along with GCC; see the file COPYING3.  If not see
 
 DEF_RISCV_FTYPE (0, (USI))
 DEF_RISCV_FTYPE (1, (VOID, USI))
-DEF_RISCV_FTYPE (0, (SI))
-DEF_RISCV_FTYPE (0, (DI))
+DEF_RISCV_FTYPE (1, (VOID, VOID_PTR))
 DEF_RISCV_FTYPE (1, (SI, SI))
 DEF_RISCV_FTYPE (1, (DI, DI))
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
index e2ba2183511..6341f7874d3 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
@@ -1,21 +1,29 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gc_zicbom -mabi=lp64" } */
 
-int f

[PATCH] Adjust testcase to avoid compile failure under -m32.

2022-06-07 Thread liuhongt via Gcc-patches

Pushed as abvious patch.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr105854.c: Add target int128 and dfp.
---
 gcc/testsuite/gcc.target/i386/pr105854.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr105854.c 
b/gcc/testsuite/gcc.target/i386/pr105854.c
index 28abef67915..36a8080b8a7 100644
--- a/gcc/testsuite/gcc.target/i386/pr105854.c
+++ b/gcc/testsuite/gcc.target/i386/pr105854.c
@@ -1,4 +1,5 @@
-/* { dg-do compile } */
+/* { dg-do compile { target int128 } } */
+/* { dg-require-effective-target dfp } */
 /* { dg-options "-O -fcaller-saves -mavx512vl -mno-avx512bw" } */
 
 typedef int __attribute__((__vector_size__ (8))) T;
-- 
2.18.1

Re: [PATCH 1/3] Disable generating store vector pair.

2022-06-07 Thread Michael Meissner via Gcc-patches

On Tue, Jun 07, 2022 at 07:59:34PM -0500, Peter Bergner wrote:
> On 6/7/22 4:24 PM, Segher Boessenkool wrote:
> > On Tue, Jun 07, 2022 at 04:17:04PM -0500, Peter Bergner wrote:
> >> I think I mentioned this offline, but I'd prefer a negative target flag,
> >> something like TARGET_NO_STORE_VECTOR_PAIR that defaults to off, meaning 
> >> we'd
> >> generate stxvp by default.
> > 
> > NAK.  All negatives should be -mno-xxx with -mxxx the corresponding
> > positive.  All of them.
> 
> That's not what I was asking for.  I totally agree that -mno-store-vector-pair
> should disable generating stxvp and that -mstore-vector-pair should enable
> generating it.  What I asked for was that the internal flag we use to enable
> and disable it should be a negative flag, where TARGET_NO_STORE_VECTOR_PAIR is
> true when we use -mno-store-vector-pair and false when using 
> -mstore-vector-pair.
> That way we can add that flag to power10's rs6000-cpu.def entry and then we're
> done.  What I don't want to have to do is that if/when power87 is released, we
> still have to add TARGET_STORE_VECTOR_PAIR its rs6000-cpu.def entry just to
> get stxvp insns generated.  That adds a cost to every cpu after power10 since
> we'd have to remember to add that flag to every follow-on cpu.

FWIW, I really dislike having negative flags like that (just talking about the
option mask internals, not the user option).

I don't view the cost to add one postive flag to the next CPU as bad, as it
will be a one time cost.  Presumably it would be set also next++ CPU.  This is
like power8 is all of the power7 flags + new flags.  Power9 is all of the
power8 flags + new flags.  I.e. in general it is cumulative.  Yes, I'm aware
there are times when there are breaks, but hopefully those are rare.

Otherwise it is like the mess with -mpower8-fusion, where going from power8 to
power9 we have to clear the fusion flag.  If store vector pair is a postive
flag, then it isn't set in power10 flags, but it might be set in next cpu
flags.  But if it is a negative flag, we have to explicitly clear it.

We can do it, but I just prefer to go with the positive flag approach.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com

[PATCH v4, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]

2022-06-07 Thread HAO CHEN GUI via Gcc-patches

Hi,
  This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000.
Tests show that outputs of xs[min/max]dp are consistent with the standard
of C99 fmin/max.

  This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead
of smin/max. So the builtins always generate xs[min/max]dp on all
platforms.

  Compared with previous version, the main change is to fix indent problem.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-31 Haochen Gui 

gcc/
PR target/103605
* config/rs6000/rs6000.md (FMINMAX): New.
(minmax_op): New.
(f3): New pattern by UNSPEC_FMAX and UNSPEC_FMIN.
* config/rs6000/rs6000-builtins.def (__builtin_vsx_xsmaxdp): Set
pattern to fmaxdf3.
(__builtin_vsx_xsmindp): Set pattern to fmindf3.

gcc/testsuite/
PR target/103605
* gcc.dg/powerpc/pr103605.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f4a9f24bcc5..8b735493b40 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1613,10 +1613,10 @@
 XSCVSPDP vsx_xscvspdp {}

   const double __builtin_vsx_xsmaxdp (double, double);
-XSMAXDP smaxdf3 {}
+XSMAXDP fmaxdf3 {}

   const double __builtin_vsx_xsmindp (double, double);
-XSMINDP smindf3 {}
+XSMINDP fmindf3 {}

   const double __builtin_vsx_xsrdpi (double);
 XSRDPI vsx_xsrdpi {}
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..42d3edf2eca 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,8 @@ (define_c_enum "unspec"
UNSPEC_HASHCHK
UNSPEC_XXSPLTIDP_CONST
UNSPEC_XXSPLTIW_CONST
+   UNSPEC_FMAX
+   UNSPEC_FMIN
   ])

 ;;
@@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr"
   DONE;
 })

+
+(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+
+(define_int_attr  minmax_op [(UNSPEC_FMAX "max")
+(UNSPEC_FMIN "min")])
+
+(define_insn "f3"
+  [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa")
+   (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
+ (match_operand:SFDF 2 "vsx_register_operand" "wa")]
+FMINMAX))]
+  "TARGET_VSX"
+  "xsdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")]
+)
+
 (define_expand "movcc"
[(set (match_operand:GPR 0 "gpc_reg_operand")
 (if_then_else:GPR (match_operand 1 "comparison_operator")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c 
b/gcc/testsuite/gcc.target/powerpc/pr103605.c
new file mode 100644
index 000..e43ac40c2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O1 -mvsx" } */
+/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */
+
+#include 
+
+double test1 (double d0, double d1)
+{
+  return fmin (d0, d1);
+}
+
+float test2 (float d0, float d1)
+{
+  return fmin (d0, d1);
+}
+
+double test3 (double d0, double d1)
+{
+  return fmax (d0, d1);
+}
+
+float test4 (float d0, float d1)
+{
+  return fmax (d0, d1);
+}
+
+double test5 (double d0, double d1)
+{
+  return __builtin_vsx_xsmindp (d0, d1);
+}
+
+double test6 (double d0, double d1)
+{
+  return __builtin_vsx_xsmaxdp (d0, d1);
+}

RE: [PATCH] Update {skylake,icelake,alderlake}_cost to add a bit preference to vector store.

2022-06-07 Thread Cui, Lili via Gcc-patches

> -Original Message-
> From: Hongtao Liu 
> Sent: Monday, June 6, 2022 1:25 PM
> To: H.J. Lu 
> Cc: Cui, Lili ; Liu, Hongtao ; GCC
> Patches 
> Subject: Re: [PATCH] Update {skylake,icelake,alderlake}_cost to add a bit
> preference to vector store.
> >
> > Should we add some tests to verify improvements?
> We can take pr99881.c as a unit test.
> 
> Ok for the trunk.
> >
> > --
> > H.J.
> 
Hi hongtao,

1. I added test case pr105493.c for 525.x264_r. For 538.imagic_r we have 
pr99881.c.
2. I changed the dg-final check in pr105638.c due to code generation changes.

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Ok for trunk?

Thanks,
Lili.

> 
> --
> BR,
> Hongtao


0001-Update-skylake-icelake-alderlake-_cost-to-add-a-bit-.patch
Description: 0001-Update-skylake-icelake-alderlake-_cost-to-add-a-bit-.patch

[Patch] OpenMP: Move omp requires checks to libgomp

2022-06-07 Thread Tobias Burnus


This is based on Chung-Lin's patch at 
https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563393.html

This is about code like:
  #pragma omp requires unified_shared_memory
  !$omp requires reverse_offload
which before was rejected with a sorry during parsing and is now
handled in libgomp (by excluding the devices from the available
device list).

Note: The requires-directive consistency check is nonfatal, i.e.
the program continues after the
 libgomp: requires-directive clause inconsistency between compilation units 
detected: 'unified_shared_memory' vs. 'reverse_offload'
message.

Changes compared to Chung-Lin's patch:
- I take the omp_* device API calls into account
- Rediffed because of changes done in the past year
- Included Thomas' fix for !ENABLE_OFFLOADING + intelmic, i.e. OG11 commit
  https://gcc.gnu.org/g:6da4ffd4a790f5f0abf290147217ca46a36f981e

On the libgomp side: The devices which do not fulfill the requirements are
now filtered out. That's in line how I understood the spec – and to make it
clearer, I spelled this out explicitly when adding (for other reasons) two
glossary items (passed 2nd vote but not yet in a released OpenMP spec):
- "accessible devices
   The host device and all non-host devices accessible for execution."
- "supported devices
   The host device and all non-host devices supported by the implementation
   for execution of target code for which the device-related requirements
   of the 'requires' directive are fulfilled."

Note:
* The FE only generates the requirement clauses when there is at least
  one declare target variable or function and offloading is used
  (target region, API call etc.)
  In particular, this implies that for !ENABLE_OFFLOADING, none is
  generated.
* libgomp only checks whether those values are consistent when
  env var OMP_TARGET_OFFLOAD != disable.

=> Thus, I protected the check for this (libgomp.c-c++-common/requires-1.c)
   by { dg-skip-if "" { ! offloading_enabled } }
   (and assume that OMP_TARGET_OFFLOAD is not set).

If env var OMP_TARGET_OFFLOAD != disable, it then runs for all configured
plugins and checks first whether devices are actually available and then
whether the requirement mask is fulfilled. Currently, none of the clauses
is supported (neither unified_shared_memory nor unified_shared_address nor
reverse_offload) even though there are patches submitted (and being worked on),
which add support for those.

I then unconditionally print a note like:
  libgomp: note: nvptx devices present but 'omp requires unified_shared_memory' 
cannot be fulfilled

This note is printed if env var OMP_TARGET_OFFLOAD != disable,
libgomp supports the device type, a device was found but omp requires
could not fulfilled.
This means that this message is also printed when compiled with
  -foffload=disable
or 'omp target if(0)' was used throught or ...

I think that's acceptable, but it could also be optimized further; however,
the initialization (e.g. GOMP_offload_register_ver) happens much later such
that the knowledge that a device is not needed (as with -foffload=disable)
is not available.

I hope the note is not too confusing, but otherwise:
* it could be postponed and then printed in context
  (requires device type <-> name association)
* it could only be printed with GOMP_DEBUG set
  but for the common case (why did it not run?), outputting it
  unconditionally surely helps to understand what went "wrong".

Thoughts? Comments? OK?

Tobias

PS: I have not fully tested the intelmic version.
PPS: I have not tried to implement the compile-time check to impose
consistent 'omp requires' – as proposed in the last review. I think I will
open a PR to track that proposal.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP: Move omp requires checks to libgomp

Handle reverse_offload, unified_address, and unified_shared_memory
requirements in libgomp by putting them into the .gnu.gomp_requires section.

For all in-principle supported devices, if a requirement cannot be fulfilled,
the device is excluded from the (supported) devices list. Currently, none of
those requirements are marked as supported for any of the non-host devices.

Additionally, libgomp checks for consistency across the entire
.gnu.gomp_requires section, matching the requirements set by the OpenMP spec.

gcc/c/ChangeLog:

	* c-parser.cc (c_parser_declaration_or_fndef): Set
	OMP_REQUIRES_TARGET_USED in omp_requires_mask if function has
	"omp declare target" attribute.
	(c_parser_omp_target_data): Set	OMP_REQUIRES_TARGET_USED in
	omp_requires_mask.
	(c_parser_omp_target_enter_data): Likewise.
	(c_parser_omp_target_exit_data): Likewise.
	(c_parser_omp_requires): Remove sorry.

gcc/cp/ChangeLog:

	* parser.cc (cp_parser_simple_declaration): Set
	OMP_REQUIRES_TARGET_USED in omp_re

[PATCH] c++: Fix up ICE on __builtin_shufflevector constexpr evaluation [PR105871]

2022-06-07 Thread Jakub Jelinek via Gcc-patches

Hi!

As the following testcase shows, BIT_FIELD_REF result doesn't have to have
just integral type, it can also have vector type.  And in that case
cxx_eval_bit_field_ref just ICEs on it because it is unprepared for that
case, creates the initial value with build_int_cst (sure, that one could be
easily replaced with build_zero_cst) and then expects it can through shifts,
ands and ors come up with the final value, but that doesn't work for
vectors.

We already call fold_ternary if whole is a VECTOR_CST, this patch does the
same if the result doesn't have integral type.  And, there is no guarantee
fold_ternary will succeed and the callers certainly don't expect NULL
being returned, so it also diagnoses those as non-constant and returns
original t in that case.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-06-08  Jakub Jelinek  

PR c++/105871
* constexpr.cc (cxx_eval_bit_field_ref): For BIT_FIELD_REF with
non-integral result type use fold_ternary too like for BIT_FIELD_REFs
from VECTOR_CST.  If fold_ternary returns NULL, diagnose non-constant
expression, set *non_constant_p and return t, instead of returning
NULL.

* g++.dg/pr105871.C: New test.

--- gcc/cp/constexpr.cc.jj  2022-06-03 11:20:13.0 +0200
+++ gcc/cp/constexpr.cc 2022-06-07 13:43:13.157127740 +0200
@@ -4198,9 +4198,16 @@ cxx_eval_bit_field_ref (const constexpr_
   if (*non_constant_p)
 return t;
 
-  if (TREE_CODE (whole) == VECTOR_CST)
-return fold_ternary (BIT_FIELD_REF, TREE_TYPE (t), whole,
-TREE_OPERAND (t, 1), TREE_OPERAND (t, 2));
+  if (TREE_CODE (whole) == VECTOR_CST || !INTEGRAL_TYPE_P (TREE_TYPE (t)))
+{
+  if (tree r = fold_ternary (BIT_FIELD_REF, TREE_TYPE (t), whole,
+TREE_OPERAND (t, 1), TREE_OPERAND (t, 2)))
+   return r;
+  if (!ctx->quiet)
+   error ("%qE is not a constant expression", orig_whole);
+  *non_constant_p = true;
+  return t;
+}
 
   start = TREE_OPERAND (t, 2);
   istart = tree_to_shwi (start);
--- gcc/testsuite/g++.dg/pr105871.C.jj  2022-06-07 13:56:02.743241969 +0200
+++ gcc/testsuite/g++.dg/pr105871.C 2022-06-07 13:56:29.042975525 +0200
@@ -0,0 +1,12 @@
+// PR c++/105871
+// { dg-do compile }
+// { dg-options "-Wno-psabi" }
+
+typedef __attribute__((__vector_size__ ( 1))) unsigned char U;
+typedef __attribute__((__vector_size__ (16))) unsigned char V;
+
+U
+foo (void)
+{
+  return __builtin_shufflevector ((U){}, (V){}, 0);
+}

Jakub

Re: [PATCH v2] libgccjit: allow common objects in $(EXTRA_GCC_OBJS) and $(EXTRA_OBJS)

2022-06-07 Thread Xi Ruoyao via Gcc-patches

On Mon, 2022-06-06 at 18:33 -0400, David Malcolm wrote:
> > On Thu, 2022-05-19 at 16:10 +0800, Yang Yujie wrote:
> > > This patch does not affect any other target architecture than
> > > loongarch,
> > > and has been bootstrapped and regression-tested on loongarch64-
> > > linux-
> > > gnuf64
> > > an x86_64-pc-linux-gnu.
> > > 
> > > Any recommendations? Please review. Thanks a lot.
> 
> The patch looks good to me.

Pushed as r13-1010.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University

90 matches

Mail list logo