[PATCH] Update documentation for -ftree-loop-vectorize and -ftree-slp-vectorize which are enabled by default at -02.

2021-11-05 Thread liuhongt via Gcc-patches
Bootstrappend on x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

gcc/ChangeLog:

PR tree-optimization/103077
* doc/invoke.texi (Options That Control Optimization):
Update documentation for -ftree-loop-vectorize and
-ftree-slp-vectorize which are enabled by default at -02.
---
 gcc/doc/invoke.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c5730228821..22d17090641 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11958,13 +11958,13 @@ and @option{-ftree-slp-vectorize} if not explicitly 
specified.
 @item -ftree-loop-vectorize
 @opindex ftree-loop-vectorize
 Perform loop vectorization on trees. This flag is enabled by default at
-@option{-O3} and by @option{-ftree-vectorize}, @option{-fprofile-use},
+@option{-O2} and by @option{-ftree-vectorize}, @option{-fprofile-use},
 and @option{-fauto-profile}.
 
 @item -ftree-slp-vectorize
 @opindex ftree-slp-vectorize
 Perform basic block vectorization on trees. This flag is enabled by default at
-@option{-O3} and by @option{-ftree-vectorize}, @option{-fprofile-use},
+@option{-O2} and by @option{-ftree-vectorize}, @option{-fprofile-use},
 and @option{-fauto-profile}.
 
 @item -ftrivial-auto-var-init=@var{choice}
-- 
2.18.1



[PATCH] i386: Support complex fma/conj_fma for _Float16.

2021-11-05 Thread Kong, Lingling via Gcc-patches
Hi,

This patch is to support cmla_optab, cmul_optab, cmla_conj_optab, 
cmul_conj_optab for vector _Float16.
Ok for master?

gcc/ChangeLog:

* config/i386/sse.md (cmul3): add new define_expand.
(cmla4): Likewise

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512fp16-vector-complex-float.c: New test.
---
 gcc/config/i386/sse.md| 23 +++
 .../i386/avx512fp16-vector-complex-float.c| 40 +++
 2 files changed, 63 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 
0a7f5b178f9..8d3fef0a31a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5922,6 +5922,12 @@
 (UNSPEC_COMPLEX_FMUL "fmulc")
 (UNSPEC_COMPLEX_FCMUL "fcmulc")])
 
+(define_int_attr conj_op
+   [(UNSPEC_COMPLEX_FMA "")
+(UNSPEC_COMPLEX_FCMA "_conj")
+(UNSPEC_COMPLEX_FMUL "")
+(UNSPEC_COMPLEX_FCMUL "_conj")])
+
 (define_mode_attr complexmove
   [(V32HF "avx512f_loadv16sf")
(V16HF "avx512vl_loadv8sf")
@@ -6003,6 +6009,15 @@
   DONE;
 })
 
+(define_expand "cmla4"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+   (unspec:VF_AVX512FP16VL
+   [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+(match_operand:VF_AVX512FP16VL 2 "vector_operand")
+(match_operand:VF_AVX512FP16VL 3 "vector_operand")]
+UNSPEC_COMPLEX_F_C_MA))]
+  "TARGET_AVX512FP16")
+
 (define_insn "fma__"
   [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
(unspec:VF_AVX512FP16VL
@@ -6084,6 +6099,14 @@
   [(set_attr "type" "ssemuladd")
(set_attr "mode" "")])
 
+(define_expand "cmul3"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+   (unspec:VF_AVX512FP16VL
+ [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+  (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+  UNSPEC_COMPLEX_F_C_MUL))]
+  "TARGET_AVX512FP16")
+
 (define_insn "__"
   [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
  (unspec:VF_AVX512FP16VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
new file mode 100644
index 000..bcb957f0de0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vfmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*ph\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*sh\[ \\t\]"} } */
+/* { dg-final { scan-assembler-times "vfcmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmulcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfcmulcph\[ \\t\]" 1 } } */
+
+#include
+#define TYPE _Float16
+#define N 16
+
+void fma0 (_Complex TYPE *a, _Complex TYPE *b,
+   _Complex TYPE *c)
+{
+  for (int i = 0; i < N; i++)
+c[i] += a[i] * b[i];
+}
+
+void fmaconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+ _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+c[i] += a[i] * ~b[i];
+}
+
+void fmul (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+  _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+c[i] = a[i] * b[i];
+}
+
+void fmulconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+  _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+c[i] = a[i] * ~b[i];
+}
--
2.18.1



[PATCH] i386: Optimization for mm512_set1_pch.

2021-11-05 Thread Kong, Lingling via Gcc-patches
Hi,

This patch is to support fold _mm512_fmadd_pch (a, _mm512_set1_pch(*(b)), c) to 
1 instruction vfmaddcph (%rsp){1to16}, %zmm1, %zmm2.
OK for master?

gcc/ChangeLog:

* config/i386/sse.md (fma___pair):
Add new define_insn.
(fma__fmaddc_bcst): Add new define_insn_and_split.
(fma__fcmaddc_bcst): Likewise

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512fp16vl-complex-broadcast-1.c: New test.
---
 gcc/config/i386/sse.md| 62 +++
 .../i386/avx512fp16vl-complex-broadcast-1.c   | 25 
 2 files changed, 87 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 
0a7f5b178f9..eba8e77515f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -193,7 +193,9 @@
 
   ;; For AVX512FP16 suppport
   UNSPEC_COMPLEX_FMA
+  UNSPEC_COMPLEX_FMA_PAIR
   UNSPEC_COMPLEX_FCMA
+  UNSPEC_COMPLEX_FCMA_PAIR
   UNSPEC_COMPLEX_FMUL
   UNSPEC_COMPLEX_FCMUL
   UNSPEC_COMPLEX_MASK
@@ -5913,6 +5915,9 @@
 (define_int_iterator UNSPEC_COMPLEX_F_C_MA
[UNSPEC_COMPLEX_FMA UNSPEC_COMPLEX_FCMA])
 
+(define_int_iterator UNSPEC_COMPLEX_F_C_MA_PAIR
+   [UNSPEC_COMPLEX_FMA_PAIR UNSPEC_COMPLEX_FCMA_PAIR])
+
 (define_int_iterator UNSPEC_COMPLEX_F_C_MUL
[UNSPEC_COMPLEX_FMUL UNSPEC_COMPLEX_FCMUL])
 
@@ -5922,6 +5927,10 @@
 (UNSPEC_COMPLEX_FMUL "fmulc")
 (UNSPEC_COMPLEX_FCMUL "fcmulc")])
 
+(define_int_attr complexpairopname
+   [(UNSPEC_COMPLEX_FMA_PAIR "fmaddc")
+(UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")])
+
 (define_mode_attr complexmove
   [(V32HF "avx512f_loadv16sf")
(V16HF "avx512vl_loadv8sf")
@@ -6067,6 +6076,59 @@
  [(match_dup 1) (match_dup 2) (match_dup 4)]
   UNSPEC_COMPLEX_F_C_MA))])
 
+(define_insn "fma___pair"
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v")
+   (unspec:VF1_AVX512VL
+[(match_operand:VF1_AVX512VL 1 "vector_operand" "%v")
+ (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr")
+ (match_operand:VF1_AVX512VL 3 "vector_operand" "0")]
+ UNSPEC_COMPLEX_F_C_MA_PAIR))]
+ "TARGET_AVX512FP16"
+ "vph\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")])
+
+(define_insn_and_split "fma__fmaddc_bcst"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+   (unspec:VF_AVX512FP16VL
+ [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+  (subreg:VF_AVX512FP16VL
+(match_operand: 2 "bcst_vector_operand") 0)
+  (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
+  UNSPEC_COMPLEX_FMA))]
+  "TARGET_AVX512FP16"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:
+ [(match_dup 1) (match_dup 2) (match_dup 3)]
+  UNSPEC_COMPLEX_FMA_PAIR))]
+  {
+operands[0] = lowpart_subreg (mode, operands[0], mode);
+operands[1] = lowpart_subreg (mode, operands[1], mode);
+operands[3] = lowpart_subreg (mode, operands[3], 
+mode);
+  })
+
+(define_insn_and_split "fma__fcmaddc_bcst"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+   (unspec:VF_AVX512FP16VL
+ [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+  (subreg:VF_AVX512FP16VL
+(match_operand: 2 "bcst_vector_operand") 0)
+  (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
+  UNSPEC_COMPLEX_FCMA))]
+  "TARGET_AVX512FP16"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:
+ [(match_dup 1) (match_dup 2) (match_dup 3)]
+  UNSPEC_COMPLEX_FCMA_PAIR))]
+  {
+operands[0] = lowpart_subreg (mode, operands[0], mode);
+operands[1] = lowpart_subreg (mode, operands[1], mode);
+operands[3] = lowpart_subreg (mode, operands[3], 
+mode);
+  })
+
 (define_insn "___mask"
   [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
(vec_merge:VF_AVX512FP16VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c
new file mode 100644
index 000..3c8e84230f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to4\\\}" 2 } }  */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 2 } }  */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to16\\\}" 2 } }  */
+
+#include 
+
+volatile __m512h res0, a0, c0;
+volatile __m256h res1, a1, c1;
+volatile __m128h res2, a2, c2;
+volatile _Float16 *b;
+
+void extern
+avx_test(void)
+{
+  res0 = _mm512_fmadd_pch (a0, _mm512_set1_pch(*(b + 2 * 6)), c0);
+  res0 = _mm512_fcmadd_pch (a0, _mm512_set1_pch(*(b + 2 * 6)), c0);
+
+  res1 = _mm256_fmadd_pch (a1, _mm256_set1_pch(*(b + 2 * 6)), c1);
+  res1 = _mm256_fcmadd_pch (a1, _mm256_set1_pch(*(b + 2 * 6)), c1);
+
+  res2 =  _mm_f

Re: [PATCH] gcc: vx-common.h: fix test for VxWorks7

2021-11-05 Thread Olivier Hainque via Gcc-patches
Hi Rasmus,

> On 3 Nov 2021, at 14:18, Rasmus Villemoes  wrote:
> 
> The macro TARGET_VXWORKS7 is always defined (see vxworks-dummy.h).
> Thus we need to test its value, not its definedness.
> 
> Fixes aca124df (define NO_DOT_IN_LABEL only in vxworks6).
> 
> gcc/ChangeLog:
> 
>   * config/vx-common.h: Test value of TARGET_VXWORKS7 rather
>   than definedness.

Indeed. Ok, thanks!







Re: [PATCH] IBM Z: ldist-{rawmemchr, strlen} tests require vector extensions

2021-11-05 Thread Stefan Schulze Frielinghaus via Gcc-patches
On Tue, Nov 02, 2021 at 04:20:01PM +0100, Andreas Schwab wrote:
> On Nov 02 2021, Stefan Schulze Frielinghaus via Gcc-patches wrote:
> 
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
> > index 6abfd278351..bf6335f6360 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
> > @@ -1,5 +1,6 @@
> >  /* { dg-do run { target s390x-*-* } } */
> >  /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } 
> > */
> > +/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
> 
> I think that should use an effective_target check.

Thanks for the hint.  Wasn't aware of those checks.  I replaced all
"target s390x-*-*" checks with "target s390_vx".  The latter tests
whether the toolchain and actual machine is capable to deal with vector
extensions.

Ok for mainline?
>From 86b46ae8cb3c014739f783a88043951c996deb61 Mon Sep 17 00:00:00 2001
From: Stefan Schulze Frielinghaus 
Date: Fri, 5 Nov 2021 09:05:01 +0100
Subject: [PATCH] IBM Z: ldist-{rawmemchr,strlen} tests require vector
 extensions fixup

This is a fixup for 64bf0c835f8918adf7e4140a04ac79c2963204aa.  Using
effective target check s390_vx is more robust e.g. when trying to run
the test on a machine older than z13.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ldist-rawmemchr-1.c: Replace s390x-*-* by
s390_vx.
* gcc.dg/tree-ssa/ldist-rawmemchr-2.c: Likewise.
* gcc.dg/tree-ssa/ldist-strlen-1.c: Likewise.
* gcc.dg/tree-ssa/ldist-strlen-3.c: Likewise.
---
 gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c | 10 +-
 gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c | 10 +-
 gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c|  6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-3.c|  4 ++--
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
index bf6335f6360..8e7f1f868fe 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
@@ -1,9 +1,9 @@
-/* { dg-do run { target s390x-*-* } } */
+/* { dg-do run { target s390_vx } } */
 /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
-/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
-/* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" { 
target s390x-*-* } } } */
-/* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" { 
target s390x-*-* } } } */
-/* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" { 
target s390x-*-* } } } */
+/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */
+/* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" { 
target s390_vx } } } */
+/* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" { 
target s390_vx } } } */
+/* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" { 
target s390_vx } } } */
 
 /* Rawmemchr pattern: reduction stmt and no store */
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c
index 83f5a35a322..0959d4b8f2a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c
@@ -1,9 +1,9 @@
-/* { dg-do run { target s390x-*-* } } */
+/* { dg-do run { target s390_vx } } */
 /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
-/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
-/* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" { 
target s390x-*-* } } } */
-/* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" { 
target s390x-*-* } } } */
-/* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" { 
target s390x-*-* } } } */
+/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */
+/* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" { 
target s390_vx } } } */
+/* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" { 
target s390_vx } } } */
+/* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" { 
target s390_vx } } } */
 
 /* Rawmemchr pattern: reduction stmt and store */
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c
index aeb04b91f6b..dff573cb35f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c
@@ -1,9 +1,9 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
-/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
+/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */
 /* { dg-final { scan-tree-dump-tim

Re: [PATCH] gcc: vx-common.h: fix test for VxWorks7

2021-11-05 Thread Rasmus Villemoes via Gcc-patches
On 05/11/2021 09.08, Olivier Hainque wrote:
> Hi Rasmus,
> 
>> On 3 Nov 2021, at 14:18, Rasmus Villemoes  wrote:
>>
>> The macro TARGET_VXWORKS7 is always defined (see vxworks-dummy.h).
>> Thus we need to test its value, not its definedness.
>>
>> Fixes aca124df (define NO_DOT_IN_LABEL only in vxworks6).
>>
>> gcc/ChangeLog:
>>
>>  * config/vx-common.h: Test value of TARGET_VXWORKS7 rather
>>  than definedness.
> 
> Indeed. Ok, thanks!

Applied to master and pushed - hope I've done it right.

How about the gcc-11 branch, can it be applied there as well, and if so,
should I do a "git cherry-pick -x" and push it to that branch? From
looking at the git history it seems to be the way things are done.

Thanks,
Rasmus


[PATCH] [1/2] arm: Implement cortex-M return signing address codegen

2021-11-05 Thread Andrea Corallo via Gcc-patches
Hi all,

this patch enables address return signature and verification based on
Armv8.1-M Pointer Authentication [1].

To sign the return address, we use the PAC R12, LR, SP instruction
upon function entry.  This is signing LR using SP and storing the
result in R12.  R12 will be pushed into the stack.

During function epilogue R12 will be popped and AUT R12, LR, SP will
be used to verify that the content of LR is still valid before return.

Here an example of PAC instrumented function prologue and epilogue:

pac r12, lr, sp
push{r3, r7, lr}
push{r12}
sub sp, sp, #4
[...] function body
add sp, sp, #4
pop {r12}
pop {r3, r7, lr}
aut r12, lr, sp
bx  lr

The patch also takes care of generating a PACBTI instruction in place
of the sequence BTI+PAC when Branch Target Identification is enabled
contextually.

These two patches apply on top of Tejas series posted here [2].

Regressioned and arm-linux-gnu aarch64-linux-gnu bootstraped.

Best Regards

  Andrea

[1] 

[2] 

>From 605970bdef506d749bbe9650ee469f41b1d7377f Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Fri, 24 Sep 2021 14:50:29 +0200
Subject: [PATCH 1/2] [PATCH] [1/2] arm: Implement cortex-M return signing
 address codegen

gcc/Changelog

2021-11-03  Andrea Corallo  

* config/arm/arm.c: (arm_compute_frame_layout)
(arm_expand_prologue, thumb2_expand_return, arm_expand_epilogue)
(arm_conditional_register_usage): Update for pac codegen.
(arm_pac_enabled_for_curr_function_p): New function.
* config/arm/arm.md (pac_ip_lr_sp, pacbti_ip_lr_sp, aut_ip_lr_sp):
Add new patterns.
* config/arm/unspecs.md (UNSPEC_PAC_IP_LR_SP)
(UNSPEC_PACBTI_IP_LR_SP, UNSPEC_AUT_IP_LR_SP): Add unspecs.

gcc/testsuite/Changelog

2021-11-03  Andrea Corallo  

* gcc.target/arm/pac-1.c : New test case.
* gcc.target/arm/pac-2.c : Likewise.
* gcc.target/arm/pac-3.c : Likewise.
* gcc.target/arm/pac-4.c : Likewise.
* gcc.target/arm/pac-5.c : Likewise.
---
 gcc/config/arm/arm.c | 85 
 gcc/config/arm/arm.md| 20 +++
 gcc/config/arm/unspecs.md|  3 +
 gcc/testsuite/gcc.target/arm/pac-1.c | 25 
 gcc/testsuite/gcc.target/arm/pac-2.c | 25 
 gcc/testsuite/gcc.target/arm/pac-3.c | 25 
 gcc/testsuite/gcc.target/arm/pac-4.c | 25 
 gcc/testsuite/gcc.target/arm/pac-5.c | 26 +
 8 files changed, 224 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/pac-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/pac-2.c
 create mode 100644 gcc/testsuite/gcc.target/arm/pac-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/pac-4.c
 create mode 100644 gcc/testsuite/gcc.target/arm/pac-5.c

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index a87bcb298f9..2889a471fa5 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -302,6 +302,7 @@ static bool arm_vectorize_vec_perm_const (machine_mode, 
rtx, rtx, rtx,
  const vec_perm_indices &);
 
 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
+static bool arm_pac_enabled_for_curr_function_p (void);
 
 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt 
type_of_cost,
   tree vectype,
@@ -22696,6 +22697,10 @@ arm_compute_frame_layout (void)
 nonecure entry functions with VSTR/VLDR.  */
   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
saved += 4;
+
+  /* Allocate space for saving R12 */
+  if (arm_pac_enabled_for_curr_function_p ())
+   saved += 4;
 }
   else /* TARGET_THUMB1 */
 {
@@ -23288,11 +23293,12 @@ arm_expand_prologue (void)
   /* The static chain register is the same as the IP register.  If it is
  clobbered when creating the frame, we need to save and restore it.  */
   clobber_ip = IS_NESTED (func_type)
-  && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
-  || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
-   || flag_stack_clash_protection)
-  && !df_regs_ever_live_p (LR_REGNUM)
-  && arm_r3_live_at_start_p ()));
+&& (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+|| ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+ || flag_stack_clash_protection)
+&& !df_regs_ever_live_p (LR_REGNUM)
+&& arm_r3_live_at_start_p ()))
+   || (arm_pac_enabled_for_curr_function_p ()));
 
   /* Find somewhere to store IP whilst the frame is being cr

[PATCH] [2/2] arm: add arm bti pass

2021-11-05 Thread Andrea Corallo via Gcc-patches
Hi all,

this patch enables Branch Target Identification Armv8.1-M Mechanism
[1].

This is achieved by moving and generalizing the Aarch64 "bti" pass so
it can be used also by the Arm backend.

The pass iterates through the instructions and adds the necessary BTI
instructions at the beginning of every function and at every landing
pads targeted by indirect jumps.

Regressioned and arm-linux-gnu aarch64-linux-gnu bootstraped.

Best Regards

  Andrea

[1] 


>From 3fc8c64d20efd618b72e4527b7486d779fdaf13b Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Wed, 28 Jul 2021 15:49:16 +0200
Subject: [PATCH 2/2] [PATCH] [2/2] arm: add arm bti pass

gcc/ChangeLog

2021-11-03  Andrea Corallo  

* config.gcc (aarch64*-*-*): Rename 'aarch64-bti-insert.o' into
'aarch-bti-insert.o'.
(arm*-*-*): Add 'aarch-bti-insert.o'.
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins):
Update.
* config/aarch64/aarch64-protos.h (aarch64_bti_enabled): Remove
proto.
* config/aarch64/aarch64.c (aarch_bti_enabled): Rename from
'aarch64_bti_enabled'.
(aarch_bti_j_insn_p, aarch_pac_insn_p, aarch_gen_bti_c)
(aarch_gen_bti_j): New functions.
(aarch64_output_mi_thunk)
(aarch64_print_patchable_function_entry)
(aarch64_file_end_indicate_exec_stack): Update.
* config/aarch64/t-aarch64 (aarch-bti-insert.o): Rename from
'aarch64-bti-insert.o' and account for new folder.
* config/arm/aarch-bti-insert.c: New file, rename from
'gcc/config/aarch64/aarch64-bti-insert.c' and generalize.
* config/arm/aarch-common-protos.h (aarch_bti_enabled)
(aarch_bti_j_insn_p, aarch_pac_insn_p, aarch_gen_bti_c)
(aarch_gen_bti_j): New protos.
* config/arm/arm-passes.def: New file.
* config/arm/arm-protos.h (make_pass_insert_bti): New proto.
* config/arm/arm.c (aarch_bti_enabled, aarch_bti_j_insn_p)
(aarch_pac_insn_p, aarch_gen_bti_c, aarch_pac_insn_p): New
functions.
* config/arm/arm.md (bti): Add pattern.
* config/arm/t-arm (aarch-bti-insert.o): Add rule.
* config/arm/unspecs.md (UNSPECV_BTI): New unspec.

gcc/testsuite/ChangeLog

2021-09-15  Andrea Corallo  

* gcc.target/arm/bti-1.c: New testcase.
* gcc.target/arm/bti-2.c: Likewise.
---
 gcc/config.gcc|  4 +-
 gcc/config/aarch64/aarch64-c.c|  2 +-
 gcc/config/aarch64/aarch64-protos.h   |  1 -
 gcc/config/aarch64/aarch64.c  | 58 ++--
 gcc/config/aarch64/t-aarch64  |  4 +-
 .../aarch-bti-insert.c}   | 66 ---
 gcc/config/arm/aarch-common-protos.h  |  5 ++
 gcc/config/arm/arm-passes.def | 21 ++
 gcc/config/arm/arm-protos.h   |  2 +
 gcc/config/arm/arm.c  | 50 ++
 gcc/config/arm/arm.md |  6 ++
 gcc/config/arm/t-arm  | 10 +++
 gcc/config/arm/unspecs.md |  1 +
 gcc/testsuite/gcc.target/arm/bti-1.c  | 12 
 gcc/testsuite/gcc.target/arm/bti-2.c  | 58 
 15 files changed, 237 insertions(+), 63 deletions(-)
 rename gcc/config/{aarch64/aarch64-bti-insert.c => arm/aarch-bti-insert.c} 
(80%)
 create mode 100644 gcc/config/arm/arm-passes.def
 create mode 100644 gcc/testsuite/gcc.target/arm/bti-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bti-2.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index fb1f06f3da8..5e3114c7f0c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -328,7 +328,7 @@ aarch64*-*-*)
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
d_target_objs="aarch64-d.o"
-   extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o 
aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o 
aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o 
falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o"
+   extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o 
aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o 
aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o 
falkor-tag-collision-avoidance.o aarch-bti-insert.o aarch64-cc-fusion.o"
target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c 
\$(srcdir)/config/aarch64/aarch64-sve-builtins.h 
\$(srcdir)/config/aarch64/aarch64-sve-builtins.cc"
target_has_targetm_common=yes
;;
@@ -352,7 +352,7 @@ arc*-*-*)
;;
 arm*-*-*)
cpu_type=arm
-   extra_objs="arm-builtins.o aarch-common.o"
+   extra_objs="arm-builtins.o aarch-common.o aarch-bti-insert.o"
extra_header

Re: [PATCH] Bump required minimum DejaGnu version to 1.5.3

2021-11-05 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 8:12 PM Segher Boessenkool
 wrote:
>
> On Thu, Nov 04, 2021 at 01:22:24PM +0100, Martin Liška wrote:
> > On 11/4/21 12:55, Segher Boessenkool wrote:
> > >On Fri, Oct 29, 2021 at 09:32:21AM +0200, Richard Biener via Gcc-patches
> > >wrote:
> > >>On Fri, Oct 29, 2021 at 2:42 AM Bernhard Reutner-Fischer via
> > >>Gcc-patches  wrote:
> > >>>
> > >>>From: Bernhard Reutner-Fischer 
> > >>>
> > >>>Bump required DejaGnu version to 1.5.3 (or later).
> > >>>Ok for trunk?
> > >>
> > >>OK.
> > >
> > >If we really want to require such a new version of DejaGnu (most
> > >machines I use have 1.5.1 or older), can we include it with GCC please?
> >
> > Do you mean in contrib/download_prerequisites?
>
> I was thinking as actual code, so we can make modifications where we
> need to / want to as well.  But your idea is much less contentious :-)
>
> > Note the version 1.5.1 is 8 years old, what legacy system do you use that
> > has such
> > an old version?
>
> CentOS 7.  Some of those systems cannot run CentOS 8.  And CentOS 8 will
> reach EoL in less than two months, and CentOS Stream is not an option at
> all (and even if it were, it cannot work on many of the machines).
>
> Everything else on CentOS 7 is supported by GCC (it is the oldest
> supported for pretty much everything, but still).  It would be bad for
> DejaGnu to be the limiting factor :-/

So just contribute updated dejagnu packages to CentOS 7 "backports" or
whatever means exists there?  Btw, openSUSE Tumbleweed still has
ppc64 (non-le) support and I bet Debian has that as well.

Richard.

>
> Segher


Re: [PATCH 0/4] config: Allow a host to opt out of PCH.

2021-11-05 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 9:03 PM Iain Sandoe via Gcc-patches
 wrote:
>
> GCC (currently) has an implementation of pre-compiled-headers, that relies
> on being able to launch the compiler executable at the same address each
> time.  This constraint is not permitted by some system security models.
>
> The facility is an optimisation; saving the output of parsing a covering
> header file (that may include many others) so that the parsing need not be
> repeated when the same set of headers is needed in many places in a project.
>
> The patch series disables the operation of the PCH-related command lines,
> but does not cause an error to be emitted.  The intent is that build
> recipes that expect PCH to work will continue to operate, but the compiler
> no longer acts on them and therefore is no longer bound to the requirement
> to launch at a fixed address.
>
>  * When invoked to "generate PCH" the compiler will carry out the parsing
>as before - producing any diagnostics if relevant and then saving a
>stub file (to satisfy build recipe targets).  The stub file is marked as
>invalid PCH.
>
>  * When an include directive is encountered, the compiler no longer checks
>to see if a PCH header is available.
>
>  * The top-level configure option (--disable-host-pch-support) is also
>propagated to libstdc++ where it causes the automatic invocation of the
>existing --disable-libstdxx-pch.
>
> tested on x86_64-darwin, aarch64-darwin, and on x86_64, powerpc64le-linux,
> OK for master?

I had the impression we have support for PCH file relocation to deal with ASLR
at least on some platforms.  But it's IMHO nice to have a way to disable PCH
and that paves the way to have it disabled by default for a release before we
eventually nuke support completely (and then provide a backward-compatible
stub implementation).

So - OK if there are no complaints from reviewers of their respective area the
series touches.

Thanks,
Richard.

> thanks
> Iain
>
> Iain Sandoe (4):
>   config: Add top-level flag to disable host PCH.
>   libstdc++: Adjust build of PCH files accounting configured host
> support.
>   libcpp: Honour a configuration without host support for PCH.
>   c-family, gcc: Allow configuring without support for PCH.
>
>  Makefile.def  |  9 ++--
>  Makefile.in   | 87 +--
>  configure | 42 +++
>  configure.ac  | 35 
>  gcc/c-family/c-pch.c  | 23 ++-
>  gcc/config.in |  6 +++
>  gcc/config/host-darwin.c  | 18 
>  gcc/configure | 29 -
>  gcc/configure.ac  | 17 
>  gcc/doc/install.texi  |  6 +++
>  libcpp/config.in  |  3 ++
>  libcpp/configure  | 24 +++
>  libcpp/configure.ac   | 16 +++
>  libcpp/files.c| 14 +++
>  libcpp/pch.c  | 12 ++
>  libstdc++-v3/acinclude.m4 | 49 +-
>  libstdc++-v3/configure| 71 +---
>  libstdc++-v3/configure.ac | 11 -
>  18 files changed, 391 insertions(+), 81 deletions(-)
>
> --
> 2.24.3 (Apple Git-128)
>


Re: [PATCH] Add !flag_signaling_nans to simplifcation: (trunc)copysign((extend)a, (extend)b) to copysign (a, b).

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, Nov 5, 2021 at 3:20 AM liuhongt  wrote:
>
> > Note that this is not safe with -fsignaling-nans, so needs to be disabled
> > for that option (if there isn't already logic somewhere with that effect),
> > because the extend will convert a signaling NaN to quiet (raising
> > "invalid"), but copysign won't, so this transformation could result in a
> > signaling NaN being wrongly returned when the original code would never
> > have returned a signaling NaN.
> >
> > --
> > Joseph S. Myers
> > jos...@codesourcery.com
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog
>
> * match.pd
> (Simplifcation (trunc)copysign((extend)a, (extend)b) to
> .COPYSIGN (a, b)): Add !flag_signaling_nans.
> ---
>  gcc/match.pd | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index fb1065dc0e6..d6a8dd0dd20 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6176,6 +6176,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (simplify
>(convert (copysigns (convert@2 @0) (convert @1)))
> (if (optimize
> +   && !flag_signaling_nans

Please use !HONOR_SNANS (@2)

OK with that change.

> && types_match (type, TREE_TYPE (@0))
> && types_match (type, TREE_TYPE (@1))
> && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@2))
> --
> 2.18.1
>


Re: [PATCH 1/2] [Gimple] Simplify (trunc)fmax/fmin((extend)a, (extend)b) to MAX/MIN(a,b)

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, Nov 5, 2021 at 6:38 AM liuhongt  wrote:
>
> a and b are same type as trunc type and has less precision than
> extend type, the transformation is guarded by flag_finite_math_only.
>
> Bootstrapped and regtested under x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> gcc/ChangeLog:
>
> PR target/102464
> * match.pd: Simplify (trunc)fmax/fmin((extend)a, (extend)b) to
> MAX/MIN(a,b)
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr102464-maxmin.c: New test.
> ---
>  gcc/match.pd  | 14 ++
>  .../gcc.target/i386/pr102464-maxmin.c | 44 +++
>  2 files changed, 58 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-maxmin.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index f63079023d0..857ce7f712a 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6182,6 +6182,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> && direct_internal_fn_supported_p (IFN_COPYSIGN,
>   type, OPTIMIZE_FOR_BOTH))
>  (IFN_COPYSIGN @0 @1
> +
> +(for maxmin (max min)
> + (simplify
> +  (convert (maxmin (convert@2 @0) (convert @1)))
> +   (if (flag_finite_math_only

I suppose you are concerned about infinities, not about NaNs.
Please use !HONOR_INFINITIES (@2) then (in general testing
flag_* is frowned upon).  You may want to do the FLOAT_TYPE_P
tests first.

> +   && optimize
> +   && FLOAT_TYPE_P (type)
> +   && FLOAT_TYPE_P (TREE_TYPE (@2))
> +   && types_match (type, TREE_TYPE (@0))
> +   && types_match (type, TREE_TYPE (@1))
> +   && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@2))
> +   && optab_handler (maxmin == MAX_EXPR ? smax_optab : smin_optab,
> +   TYPE_MODE (type)) != CODE_FOR_nothing)
> +(maxmin @0 @1
>  #endif
>
>  (for froms (XFLOORL XCEILL XROUNDL XRINTL)
> diff --git a/gcc/testsuite/gcc.target/i386/pr102464-maxmin.c 
> b/gcc/testsuite/gcc.target/i386/pr102464-maxmin.c
> new file mode 100644
> index 000..37867235a6c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102464-maxmin.c
> @@ -0,0 +1,44 @@
> +/* PR target/102464.  */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ffast-math -ftree-vectorize 
> -mtune=generic -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-times "vmaxph" 3 } }  */
> +/* { dg-final { scan-assembler-times "vminph" 3 } }  */
> +/* { dg-final { scan-assembler-times "vmaxsh" 3 } }  */
> +/* { dg-final { scan-assembler-times "vminsh" 3 } }  */
> +/* { dg-final { scan-assembler-times "vmaxps" 2 } }  */
> +/* { dg-final { scan-assembler-times "vminps" 2 } }  */
> +/* { dg-final { scan-assembler-times "vmaxss" 2 } }  */
> +/* { dg-final { scan-assembler-times "vminss" 2 } }  */
> +/* { dg-final { scan-assembler-times "vmaxpd" 1 } }  */
> +/* { dg-final { scan-assembler-times "vminpd" 1 } }  */
> +/* { dg-final { scan-assembler-times "vmaxsd" 1 } }  */
> +/* { dg-final { scan-assembler-times "vminsd" 1 } }  */
> +
> +#include
> +#define FOO(CODE,TYPE,SUFFIX)  \
> +  void \
> +  foo_vect_##CODE##TYPE##SUFFIX (TYPE* __restrict a, TYPE* b, TYPE* c) \
> +  {\
> +for (int i = 0; i != 8; i++)   \
> +  a[i] = CODE##SUFFIX (b[i], c[i]);  
>   \
> +  }\
> +  TYPE \
> +  foo_##CODE##TYPE##SUFFIX (TYPE b, TYPE c)\
> +  {\
> +return CODE##l (b, c); \
> +  }
> +
> +FOO (fmax, _Float16, f);
> +FOO (fmax, _Float16,);
> +FOO (fmax, _Float16, l);
> +FOO (fmin, _Float16, f);
> +FOO (fmin, _Float16,);
> +FOO (fmin, _Float16, l);
> +
> +FOO (fmax, float,);
> +FOO (fmax, float, l);
> +FOO (fmin, float,);
> +FOO (fmin, float, l);
> +
> +FOO (fmax, double, l);
> +FOO (fmin, double, l);
> --
> 2.18.1
>


Re: [PATCH 0/4] config: Allow a host to opt out of PCH.

2021-11-05 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 05, 2021 at 10:42:05AM +0100, Richard Biener via Gcc-patches wrote:
> I had the impression we have support for PCH file relocation to deal with ASLR
> at least on some platforms.

Unfortunately we do not, e.g. if you build cc1/cc1plus as PIE on
x86_64-linux, PCH will stop working unless one always invokes it with
disabled ASLR through personality.

I think this is related to function pointers and pointers to .rodata/.data
etc. variables in GC memory, we currently do not relocate that.

What we perhaps could do is (at least assuming all the ELF PT_LOAD segments
are adjacent with a single load base for them - I think at least ia64
non-PIE binaries were violating this by having .text and .data PT_LOAD
segments many terrabytes appart with a whole in between not protected in any
way, but dunno if that is for PIEs too), perhaps try in a host
specific way remember the address range in which the function pointers and
.rodata/.data can exist, remember the extent start and end from PCH generation
and on PCH load query those addresses for the current compiler and relocate
everything in that extent by the load bias from the last run.
But, the assumption for this is that those function and data/rodata pointers
in GC memory are actually marked at least as pointers...
Do we e.g. have objects with virtual classes in GC memory and if so, do we
catch their virtual table pointers?

Jakub



Re: [PATCH 2/2] [Gimple] Simplify (trunc)fma ((extend)a, (extend)b, (extend)c) to IFN_FMA (a,b, c).

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, Nov 5, 2021 at 6:38 AM liuhongt  wrote:
>
> a, b, c are same type as truncation type and has less precision than
> extend type, the optimization is guarded under
> flag_unsafe_math_optimizations.
>
> Bootstrapped and regtested under x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?

OK.

Thanks,
Richard.

> gcc/ChangeLog:
> PR target/102464
> * match.pd: Simplify
> (trunc)fma ((extend)a, (extend)b, (extend)c) to IFN_FMA (a, b,
> c) under flag_unsafe_math_optimizations.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr102464-fma.c: New test.
> ---
>  gcc/match.pd | 16 ++
>  gcc/testsuite/gcc.target/i386/pr102464-fma.c | 32 
>  2 files changed, 48 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464-fma.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 857ce7f712a..fb1065dc0e6 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6196,6 +6196,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> && optab_handler (maxmin == MAX_EXPR ? smax_optab : smin_optab,
> TYPE_MODE (type)) != CODE_FOR_nothing)
>  (maxmin @0 @1
> +
> +(for froms (BUILT_IN_FMAF BUILT_IN_FMA BUILT_IN_FMAL)
> + tos (IFN_FMA IFN_FMA IFN_FMA)
> + (simplify
> +  (convert (froms (convert@3 @0) (convert @1) (convert @2)))
> +   (if (flag_unsafe_math_optimizations
> +   && optimize
> +   && FLOAT_TYPE_P (type)
> +   && FLOAT_TYPE_P (TREE_TYPE (@3))
> +   && types_match (type, TREE_TYPE (@0))
> +   && types_match (type, TREE_TYPE (@1))
> +   && types_match (type, TREE_TYPE (@2))
> +   && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (@3))
> +   && direct_internal_fn_supported_p (as_internal_fn (tos),
> + type, OPTIMIZE_FOR_BOTH))
> +(tos @0 @1 @2
>  #endif
>
>  (for froms (XFLOORL XCEILL XROUNDL XRINTL)
> diff --git a/gcc/testsuite/gcc.target/i386/pr102464-fma.c 
> b/gcc/testsuite/gcc.target/i386/pr102464-fma.c
> new file mode 100644
> index 000..9c70d93d980
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102464-fma.c
> @@ -0,0 +1,32 @@
> +/* PR target/102464.  */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ffast-math -ftree-vectorize 
> -mtune=generic -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-times "vfmadd...ph" 3 } }  */
> +/* { dg-final { scan-assembler-times "vfmadd...sh" 3 } }  */
> +/* { dg-final { scan-assembler-times "vfmadd...ps" 2 } }  */
> +/* { dg-final { scan-assembler-times "vfmadd...ss" 2 } }  */
> +/* { dg-final { scan-assembler-times "vfmadd...pd" 1 } }  */
> +/* { dg-final { scan-assembler-times "vfmadd...sd" 1 } }  */
> +
> +#include
> +#define FOO(TYPE,SUFFIX)   \
> +  void \
> +  foo_vect_##TYPE##SUFFIX (TYPE* __restrict a, TYPE* b, TYPE* c, TYPE* d) \
> +  {\
> +for (int i = 0; i != 8; i++)   \
> +  a[i] = fma##SUFFIX (b[i], c[i], d[i]);   \
> +  }\
> +  TYPE \
> +  foo_##TYPE##SUFFIX (TYPE b, TYPE c, TYPE d)  \
> +  {\
> +return fma##l (b, c, d);   \
> +  }
> +
> +FOO (_Float16, f);
> +FOO (_Float16,);
> +FOO (_Float16, l);
> +
> +FOO (float,);
> +FOO (float, l);
> +
> +FOO (double, l);
> --
> 2.18.1
>


Re: [PATCH] x86: Make stringop_algs::stringop_strategy ctor constexpr [PR100246]

2021-11-05 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 01:45:38PM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Thu, Nov 04, 2021 at 12:39:34PM +, Iain Sandoe wrote:
> > Bootstrap succeeded with Apple clang-503.0.40 (Xcode 5.1.1) on macOS 10.8
> > which is the earliest version I expect to work (previous xcode impl. have 
> > more
> > C++11 incompatibilities).   So OK from a Darwin PoV.
> > 
> > The other reported toolchain with the issue was GCC-4.9.2 as discussed on
> > IRC - this also seems OK.
> 
> > > Especially because 11.x is not going to have the dyninit optimization for
> > > sure, it would be nice to do this on the 11 branch too.

Bootstrapped/regtested on x86_64-linux and i686-linux successfully too, with
sligtly different formatting, as I think in our coding style constexpr
should go on the previous line and the ctor didn't have space before (.

Ok for trunk and 11.3?

2021-11-05  Jakub Jelinek  

PR bootstrap/100246
* config/i386/i386.h
(stringop_algs::stringop_strategy::stringop_strategy): Make the ctor
constexpr.

--- gcc/config/i386/i386.h.jj   2021-09-28 23:18:35.282563395 +0200
+++ gcc/config/i386/i386.h  2021-11-04 10:48:47.165086806 +0100
@@ -78,8 +78,9 @@ struct stringop_algs
this issue.  Since this header is used by code compiled with the C
compiler we must guard the addition.  */
 #ifdef __cplusplus
-stringop_strategy(int _max = -1, enum stringop_alg _alg = libcall,
- int _noalign = false)
+constexpr
+stringop_strategy (int _max = -1, enum stringop_alg _alg = libcall,
+  int _noalign = false)
   : max (_max), alg (_alg), noalign (_noalign) {}
 #endif
 const int max;


Jakub



Re: Values of WIDE_INT_MAX_ELTS in gcc11 and gcc12 are different

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, Nov 5, 2021 at 7:54 AM Jakub Jelinek via Gcc-patches
 wrote:
>
> On Thu, Nov 04, 2021 at 11:05:35PM -0700, Andrew Pinski via Gcc-patches wrote:
> > > I noticed that the macro “WIDE_INT_MAX_ELTS” has different values in 
> > > GCC11 and GCC12 (on the same X86 machine)
> > >
> > > For gcc11:
> > >
> > > wide int max elts =3
> > >
> > > For gcc12:
> > >
> > > wide int max elts =9
> > >
> > > Does anyone know what’s the reason for this difference?
> > >
> > > Thanks a lot for any help.
> >
> > Yes originally, the x86 backend only used OI and XI modes for vectors
> > during data movement.
> > This changed with r10-5741-gc57b4c22089 which added the use of OI mode
> > for TImode adding with overflow and then MAX_BITSIZE_MODE_ANY_INT
> > changed from 128 to 160 (in r10-6178-gc124b345e46078) to fix the ICE
> > introduced by that change .
> > And then with r12-979-g782e57f2c09 removed the define of
> > MAX_BITSIZE_MODE_ANY_INT.
> > Now what was not mentioned in r12-979-g782e57f2c09 (or before) of why
> > MAX_BITSIZE_MODE_ANY_INT was defined in the first place for x86. HJL
> > assumed there was some problem of why it was defined that way but not
> > realizing memory usage was the reason.
> > It was defined to keep the memory usage down as you see that it is now
> > almost a 3x memory increase for all wi::wide_int.
> > I do think r12-979-g782e57f2c09 should be reverted with an added
> > comment on saying defining MAX_BITSIZE_MODE_ANY_INT here is to
> > decrease the memory footprint.
>
> I completely agree.

Do we have permanent objects embedding wide[st]_int?  I know of
class loop and loop_bound.  Btw, there are other targets with large
integer modes (aarch64 with XImode) and not defining
MAX_BITSIZE_MODE_ANY_INT

Richard.

> Jakub
>


Re: [PATCH] Update documentation for -ftree-loop-vectorize and -ftree-slp-vectorize which are enabled by default at -02.

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, Nov 5, 2021 at 8:08 AM liuhongt via Gcc-patches
 wrote:
>
> Bootstrappend on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?

OK

> gcc/ChangeLog:
>
> PR tree-optimization/103077
> * doc/invoke.texi (Options That Control Optimization):
> Update documentation for -ftree-loop-vectorize and
> -ftree-slp-vectorize which are enabled by default at -02.
> ---
>  gcc/doc/invoke.texi | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index c5730228821..22d17090641 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -11958,13 +11958,13 @@ and @option{-ftree-slp-vectorize} if not explicitly 
> specified.
>  @item -ftree-loop-vectorize
>  @opindex ftree-loop-vectorize
>  Perform loop vectorization on trees. This flag is enabled by default at
> -@option{-O3} and by @option{-ftree-vectorize}, @option{-fprofile-use},
> +@option{-O2} and by @option{-ftree-vectorize}, @option{-fprofile-use},
>  and @option{-fauto-profile}.
>
>  @item -ftree-slp-vectorize
>  @opindex ftree-slp-vectorize
>  Perform basic block vectorization on trees. This flag is enabled by default 
> at
> -@option{-O3} and by @option{-ftree-vectorize}, @option{-fprofile-use},
> +@option{-O2} and by @option{-ftree-vectorize}, @option{-fprofile-use},
>  and @option{-fauto-profile}.
>
>  @item -ftrivial-auto-var-init=@var{choice}
> --
> 2.18.1
>


[PATCH] c++, v2: Fix up -fstrong-eval-order handling of call arguments [PR70796]

2021-11-05 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 04, 2021 at 03:07:57PM +0100, Jakub Jelinek via Gcc-patches wrote:
> For the METHOD_TYPE first argument
> I use a temporary always though, that should be always is_gimple_reg_type...

Doing so regressed
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++11  scan-tree-dump gimple "V::V 
.this, _1.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++11  scan-tree-dump gimple "Y::Y 
._2, _3.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++14  scan-tree-dump gimple "V::V 
.this, _1.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++14  scan-tree-dump gimple "Y::Y 
._2, _3.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++17  scan-tree-dump gimple "V::V 
.this, _1.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++17  scan-tree-dump gimple "Y::Y 
._2, _3.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++20  scan-tree-dump gimple "V::V 
.this, _1.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++20  scan-tree-dump gimple "Y::Y 
._2, _3.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++2b  scan-tree-dump gimple "V::V 
.this, _1.;"
+FAIL: g++.dg/cpp1z/inh-ctor23.C  -std=gnu++2b  scan-tree-dump gimple "Y::Y 
._2, _3.;"
because the testcase relies on this being passed directly in gimple dump,
rather than some SSA_NAME based on this.
Instead of changing the testcase, I've figured out that it is actually quite
easy to restore previous behavior here, for 2 reasons even.
One is that there are no side-effects in the ctor call arguments, so
the forcing of this into a temporary wasn't really needed, we can like in
the other cases quite cheaply see if the call has any side-effect arguments.
And the other reason is that in C++ this can't be modified, and similarly
vars with reference type can't be modified, so for those we don't need to
force them into a temporary either even if there are side-effects.
This means e.g. on
struct S
{
  void foo (S &, int);
  void bar (int);
};

void S::foo (S &p, int x)
{
  this->bar (++x);
  p.bar (++x);
}
we can keep what we were emitting before even for -std=c++17.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
and after a while for 7.3 too?

2021-11-05  Jakub Jelinek  

PR c++/70796
* cp-gimplify.c (cp_gimplify_arg): New function.
(cp_gimplify_expr): Use cp_gimplify_arg instead of gimplify_arg,
pass true as last argument to it if there are any following
arguments in strong evaluation order with side-effects.

* g++.dg/cpp1z/eval-order11.C: New test.

--- gcc/cp/cp-gimplify.c.jj 2021-10-29 19:33:10.542344939 +0200
+++ gcc/cp/cp-gimplify.c2021-11-05 00:41:29.124227336 +0100
@@ -398,6 +398,47 @@ gimplify_to_rvalue (tree *expr_p, gimple
   return t;
 }
 
+/* Like gimplify_arg, but if ORDERED is set (which should be set if
+   any of the arguments this argument is sequenced before has
+   TREE_SIDE_EFFECTS set, make sure expressions with is_gimple_reg_type type
+   are gimplified into SSA_NAME or a fresh temporary and for
+   non-is_gimple_reg_type we don't optimize away TARGET_EXPRs.  */
+
+static enum gimplify_status
+cp_gimplify_arg (tree *arg_p, gimple_seq *pre_p, location_t call_location,
+bool ordered)
+{
+  enum gimplify_status t;
+  if (ordered
+  && !is_gimple_reg_type (TREE_TYPE (*arg_p))
+  && TREE_CODE (*arg_p) == TARGET_EXPR)
+{
+  /* gimplify_arg would strip away the TARGET_EXPR, but
+that can mean we don't copy the argument and some following
+argument with side-effect could modify it.  */
+  protected_set_expr_location (*arg_p, call_location);
+  return gimplify_expr (arg_p, pre_p, NULL, is_gimple_lvalue, fb_either);
+}
+  else
+{
+  t = gimplify_arg (arg_p, pre_p, call_location);
+  if (t == GS_ERROR)
+   return GS_ERROR;
+  else if (ordered
+  && is_gimple_reg_type (TREE_TYPE (*arg_p))
+  && is_gimple_variable (*arg_p)
+  && TREE_CODE (*arg_p) != SSA_NAME
+  /* No need to force references into register, references
+ can't be modified.  */
+  && !TYPE_REF_P (TREE_TYPE (*arg_p))
+  /* And this can't be modified either.  */
+  && *arg_p != current_class_ptr)
+   *arg_p = get_initialized_tmp_var (*arg_p, pre_p);
+  return t;
+}
+
+}
+
 /* Do C++-specific gimplification.  Args are as for gimplify_expr.  */
 
 int
@@ -613,7 +654,8 @@ cp_gimplify_expr (tree *expr_p, gimple_s
  gcc_assert (call_expr_nargs (*expr_p) == 2);
  gcc_assert (!CALL_EXPR_ORDERED_ARGS (*expr_p));
  enum gimplify_status t
-   = gimplify_arg (&CALL_EXPR_ARG (*expr_p, 1), pre_p, loc);
+   = cp_gimplify_arg (&CALL_EXPR_ARG (*expr_p, 1), pre_p, loc,
+  TREE_SIDE_EFFECTS (CALL_EXPR_ARG (*expr_p, 0)));
  if (t == GS_ERROR)
ret = GS_ERROR;
}
@@ -622,10 +664,18 @@ cp_gimplify_expr (tree *expr_p, gimple_s
  /* Leave the last argument for gimplify_call_expr, to avoid probl

[PATCH] Split vector loop analysis into main and epilogue analysis

2021-11-05 Thread Richard Biener via Gcc-patches
As discussed this splits the analysis loop into two, first settling
on a vector mode used for the main loop and only then analyzing
the epilogue of that for possible vectorization.  That makes it
easier to put in support for unrolled main loops.

On the way I've realized some cleanup opportunities, namely caching
n_stmts in vec_info_shared (it's computed by dataref analysis)
avoiding to pass that around and setting/clearing loop->aux
during analysis - try_vectorize_loop_1 will ultimatively set it
on those we vectorize.

This also gets rid of the previously introduced callback in
vect_analyze_loop_1 in favor of making that advance the mode iterator.
I'm now pushing VOIDmode explicitely into the vector_modes array
which makes the re-start on the epilogue side a bit more
straight-forward.  Note that will now use auto-detection of the
vector mode in case the main loop used it and we want to try
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P and the first mode from
the target array if not.  I've added a comment that says we may
want to make sure we don't try vectorizing the epilogue with a
bigger vector size than the main loop but the situation isn't
very likely to appear in practice I guess (and it was also present
before this change).

In principle this change should not change vectorization decisions
but the way we handled re-analyzing epilogues as main loops makes
me only 99% sure that it does.

Bootstrapped and tested on x86_64-unkown-linux-gnu.

OK?

Thanks,
Richard.

2021-11-05  Richard Biener  

* tree-vectorizer.h (vec_info_shared::n_stmts): Add.
(LOOP_VINFO_N_STMTS): Likewise.
(vec_info_for_bb): Remove unused function.
* tree-vectorizer.c (vec_info_shared::vec_info_shared):
Initialize n_stmts member.
* tree-vect-loop.c: Remove INCLUDE_FUNCTIONAL.
(vect_create_loop_vinfo): Do not set loop->aux.
(vect_analyze_loop_2): Do not get n_stmts as argument,
instead use LOOP_VINFO_N_STMTS.  Set LOOP_VINFO_VECTORIZABLE_P
here.
(vect_analyze_loop_1): Remove callback, get the mode iterator
and autodetected_vector_mode as argument, advancing the
iterator and initializing autodetected_vector_mode here.
(vect_analyze_loop): Split analysis loop into two, first
processing main loops only and then epilogues.
---
 gcc/tree-vect-loop.c  | 415 +-
 gcc/tree-vectorizer.c |   3 +-
 gcc/tree-vectorizer.h |  10 +-
 3 files changed, 212 insertions(+), 216 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 13a53436729..abf87f99d6d 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -20,7 +20,6 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #define INCLUDE_ALGORITHM
-#define INCLUDE_FUNCTIONAL
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -1520,8 +1519,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared 
*shared,
  = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
 }
 
-  gcc_assert (!loop->aux);
-  loop->aux = loop_vinfo;
   return loop_vinfo;
 }
 
@@ -2209,7 +2206,7 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info 
loop_vinfo,
for it.  The different analyses will record information in the
loop_vec_info struct.  */
 static opt_result
-vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
+vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
 {
   opt_result ok = opt_result::success ();
   int res;
@@ -2244,7 +2241,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal, unsigned *n_stmts)
   opt_result res
= vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
 &LOOP_VINFO_DATAREFS (loop_vinfo),
-n_stmts);
+&LOOP_VINFO_N_STMTS (loop_vinfo));
   if (!res)
{
  if (dump_enabled_p ())
@@ -2341,7 +2338,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal, unsigned *n_stmts)
   poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 
   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
-  ok = vect_analyze_slp (loop_vinfo, *n_stmts);
+  ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
   if (!ok)
 return ok;
 
@@ -2641,6 +2638,7 @@ start_over:
LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
 
   /* Ok to vectorize!  */
+  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
   return opt_result::success ();
 
 again:
@@ -2891,46 +2889,70 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
   return true;
 }
 
-/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is
-   not NULL.  Process the analyzed loop with PROCESS even if analysis
-   failed.  Sets *N_STMTS and FATAL according to the analysis.
+/* Analyze LOOP with VECTOR_MO

[PATCH] Fix PR103028

2021-11-05 Thread Andreas Krebbel via Gcc-patches
This prevents find_cond_trap from being invoked after reload.  It may
generate compares which would require reloading.

Bootstrapped and regression tested on s390x.

Ok for mainline?

gcc/ChangeLog:

PR rtl-optimization/103028
* ifcvt.c (find_if_header): Invoke find_cond_trap only before
reload.

gcc/testsuite/ChangeLog:

PR rtl-optimization/103028
* gcc.dg/pr103028.c: New test.
---
 gcc/ifcvt.c |  3 ++-
 gcc/testsuite/gcc.dg/pr103028.c | 16 
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr103028.c

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 017944f4f79..1f5b9476ac2 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -4341,7 +4341,8 @@ find_if_header (basic_block test_bb, int pass)
   && cond_exec_find_if_block (&ce_info))
 goto success;
 
-  if (targetm.have_trap ()
+  if (!reload_completed
+  && targetm.have_trap ()
   && optab_handler (ctrap_optab, word_mode) != CODE_FOR_nothing
   && find_cond_trap (test_bb, then_edge, else_edge))
 goto success;
diff --git a/gcc/testsuite/gcc.dg/pr103028.c b/gcc/testsuite/gcc.dg/pr103028.c
new file mode 100644
index 000..e299ac5d5b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103028.c
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/103028 */
+/* { dg-do compile } */
+/* { dg-options "-Og -fif-conversion2 -fharden-conditional-branches" } */
+
+/* This used to fail on s390x only with -march=z9-109 and -march=z9-ec */
+/* { dg-additional-options "-march=z9-ec" { target s390*-*-* } } */
+
+unsigned char x;
+int foo(void)
+{
+  unsigned long long i = x;
+  i = i + 0x8000;
+  if (i > 0x)
+return x;
+  return 0;
+}
-- 
2.31.1



Re: [RFA] Minor optimization of variable bit testing

2021-11-05 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 4:09 PM Jeff Law  wrote:
>
>
>
> On 11/3/2021 2:15 AM, Richard Biener via Gcc-patches wrote:
> > On Tue, Nov 2, 2021 at 4:53 PM Jeff Law  wrote:
> >>
> >> I was wandering spec chasing down instances where we should be
> >> generating bit-test, bit-set and bit-clear types of instructions for our
> >> target when I ran across a generic missed optimization in this space.
> >>
> >>
> >> (((1 << N) & C) != 0)  -> (N == C')
> >> (((1 << N) & C) == 0)  -> (N != C')
> >>
> >> Where C is a constant power of 2 and C' is log2 (C).
> >>
> >>
> >>
> >> That obviously avoids the shift by a variable amount and the bit masking
> >> which is the primary effect.  I did see cases where we were able to
> >> constant propagate into uses of N, but those were only in PHI nodes and
> >> never triggered any real secondary effects in the cases I looked at.
> >>
> >>
> >> Anyway, it's a fairly minor optimization, but with the analysis done and
> >> patch in hand, it's silly not to take the easy win.
> >>
> >>
> >> Bootstrapped and regression tested on x86_64 and verified that the
> >> affected spec benchmark (gcc itself) still passes on our target.
> >>
> >> OK for the trunk?  Note I added the patterns at the end of match.pd.
> >> Certainly open to moving them elsewhere.
> > There are related patterns like
> >
> > /* (CST1 << A) == CST2 -> A == ctz (CST2) - ctz (CST1)
> > (CST1 << A) != CST2 -> A != ctz (CST2) - ctz (CST1)
> >
> > please move the new patterns next to those.
> Will do.   FWIW, it feels like match.pd is getting a bit unwieldy in
> terms of being able to find things.  I wonder if we should be looking to
> break it up into multiple files.  Not critical of course, but it's grown
> to ~6k lines at this point.

Originally I had multiple .pd files and match.pd #including them.  But at
some point it was quite difficult to decide where to put a pattern which
resulted in a similarly messy state.

Btw, dwarf2out.c is still the largest file at 33k lines and match.pd isn't
amongst the 10 largest.

But yes, some visual separation of things might help.  I'll also note
that pattern order affects the generated matching code since we
try to preserve the invariant that earlier matching patterns need
to match first.

>
> >
> > +/* ((1 << n) & M) != 0  -> n == log2 (M) */
> > +(simplify
> > + (ne
> > +  (bit_and
> > +   (nop_convert? (lshift integer_onep@0 @1)) integer_pow2p@2) 
> > integer_zerop@3)
> > + (eq @1 { build_int_cst (integer_type_node,
> > + wi::exact_log2 (wi::to_wide (@2))); }))
> > +
> > +/* ((1 << n) & M) == 0  -> n != log2 (M) */
> > +(simplify
> > + (eq
> > +  (bit_and
> > +   (nop_convert? (lshift integer_onep@0 @1)) integer_pow2p@2) 
> > integer_zerop@3)
> > + (ne @1 { build_int_cst (integer_type_node,
> > + wi::exact_log2 (wi::to_wide (@2))); }))
> >
> > you don't need @3 or @0 so no need to specify them.
> Ah, I didn't know the language allowed us to do that.  Will do and
> adjust operand #s.
>
>
>
> >   You can merge the
> > patterns with
> >
> > (for cmp (ne eq)
> > icmp (eq ne)
> Thanks.  I was pretty sure we we had this kind of mapping capability,
> now that I know what to look for, it's easy to find.
>
>
> >(simplify
> >  (cmp
> > +  (bit_and
> >(nop_convert? (lshift integer_onep @1)) integer_pow2p@2) 
> > integer_zerop)
> >  (icmp @1 { wide_int_to_tree (TREE_TYPE (@1),
> > + wi::exact_log2 (wi::to_wide (@2))); }))
> >
> > I belive the integer constant you build should be of the type of @1 (I
> > fixed that above,
> > also using wide_int_to_tree.  The pattern is written in a way that _could_ 
> > match
> > vector operations and a vector by vector shift in which case the
> > wi::to_wide would
> > ICE - integer_pow2p currently does not match vector constants.  But maybe be
> > defensive and add
> >
> >(if (INTEGRAL_TYPE_P (TREE_TYPE (@1)))
> >
> > I think the patch is OK with those changes.
> I'll add that test as well and retest.
>
> Thanks,
> jeff
>


Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-05 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 4:11 PM Martin Liška  wrote:
>
> On 11/4/21 14:09, Richard Biener wrote:
> > But we shouldn't start with the current global options but with ones
> > we saved for
> > optimize attribute/pragma processing, no?
>
> We hit the issue when we combine cmdline and pragma optimize options.
>
> >
> >> Problem of -gtoggle is that it does not directly influence an option, but 
> >> it negates it.
> >>
> >> That said, I think my patch with gtoggle_used is a reasonable workaround.
> > Well, then we could as well unset flag_gtoggle after processing it, no?
>
> Yeah, that work! :)
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

OK if you add a comment like

 /* Make sure to process -gtoggle only once.  */

Richard.

> Thanks,
> Martin


Re: [AArch64] Fix NEON load/store gimple lowering and big-endian testisms

2021-11-05 Thread Richard Biener via Gcc-patches
On Thu, Nov 4, 2021 at 6:49 PM Richard Sandiford via Gcc-patches
 wrote:
>
> "Andre Vieira (lists)"  writes:
> > Hi,
> >
> > This should address the ubsan bootstrap build and big-endian testisms
> > reported against the last NEON load/store gimple lowering patch. I also
> > fixed a follow-up issue where the alias information was leading to a bad
> > codegen transformation. The NEON intrinsics specifications do not forbid
> > the use of memory accesses with different pointer types. In fact you
> > will see intrinsic user code loading a int16x8_t vector from an int
> > pointer, so we must make sure GCC is aware a NEON memory access of an
> > 'int' pointer can alias with a 'short' pointer.
> >
> > Bootstrapped aarch64-linux-gnu (also did an ubsan bootstrap).
> >
> > Is this OK for trunk?
> >
> > gcc/ChangeLog:
> >
> >  * config/aarch64/aarch64-builtins.c
> > (aarch64_general_gimple_fold_builtin): Change pointer alignment and alias.
> >
> > gcc/testsuite/ChangeLog:
> >
> >  * gcc.target/aarch64/fmla_intrinsic_1.c: Fix big-endian testism.
> >  * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
> >  * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise.
> >
> > diff --git a/gcc/config/aarch64/aarch64-builtins.c 
> > b/gcc/config/aarch64/aarch64-builtins.c
> > index 
> > a815e4cfbccab692ca688ba87c71b06c304abbfb..fc8fcb02c55e22963d2a3bf77b4749eb5b1c1561
> >  100644
> > --- a/gcc/config/aarch64/aarch64-builtins.c
> > +++ b/gcc/config/aarch64/aarch64-builtins.c
> > @@ -2486,16 +2486,22 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> > fcode, gcall *stmt,
> >   aarch64_simd_type_info simd_type
> > = aarch64_simd_types[mem_type];
> >   tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> > + elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> > + TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
> >   tree zero = build_zero_cst (elt_ptr_type);
> >   gimple_seq stmts = NULL;
> >   tree base = gimple_convert (&stmts, elt_ptr_type,
> >   args[0]);
>
> This conversion seems redundant.  Do things work if we use args[0]
> directly?

Just use

   tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
VOIDmode, true);

that will build a ref-all pointer for you, appropriately shared.

>
> > + /* Use element type alignment.  */
> > + tree access_type
> > +   = build_aligned_type (simd_type.itype,
> > + TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
>
> I think simd_type.eltype is more natural than TREE_TYPE (simd_type.itype)
> here, to match the pointer target type.
>
> Same idea for the stores.
>
> >   if (stmts)
> > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >   new_stmt
> > = gimple_build_assign (gimple_get_lhs (stmt),
> >fold_build2 (MEM_REF,
> > -   simd_type.itype,
> > +   access_type,
> > base, zero));
> > }
> >   break;
> > @@ -2508,17 +2514,22 @@ aarch64_general_gimple_fold_builtin (unsigned int 
> > fcode, gcall *stmt,
> >   aarch64_simd_type_info simd_type
> > = aarch64_simd_types[mem_type];
> >   tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> > + elt_ptr_type = build_distinct_type_copy (elt_ptr_type);
> > + TYPE_REF_CAN_ALIAS_ALL (elt_ptr_type) = true;
> >   tree zero = build_zero_cst (elt_ptr_type);
> >   gimple_seq stmts = NULL;
> >   tree base = gimple_convert (&stmts, elt_ptr_type,
> >   args[0]);
> > + /* Use element type alignment.  */
> > + tree access_type
> > +   = build_aligned_type (simd_type.itype,
> > + TYPE_ALIGN (TREE_TYPE (simd_type.itype)));
> >   if (stmts)
> > gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >   new_stmt
> > -   = gimple_build_assign (fold_build2 (MEM_REF,
> > -  simd_type.itype,
> > -  base,
> > -  zero), args[1]);
> > +   = gimple_build_assign (fold_build2 (MEM_REF, access_type,
> > +   base, zero),
> > +  args[1]);
> > }
> >   break;
> >
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c 
> > b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> > index 
> > adb787a8599af23847dd62dcd153d7cfe43dacc0..c1aeb06e74753052c2ee441b361b92148f1b4b0a
> >  100644
> > --- a/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c
> > @@ -107,10 +107,12 @@ main (int argc, char *

Re: [PATCH 0/4] config: Allow a host to opt out of PCH.

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, Nov 5, 2021 at 10:54 AM Jakub Jelinek  wrote:
>
> On Fri, Nov 05, 2021 at 10:42:05AM +0100, Richard Biener via Gcc-patches 
> wrote:
> > I had the impression we have support for PCH file relocation to deal with 
> > ASLR
> > at least on some platforms.
>
> Unfortunately we do not, e.g. if you build cc1/cc1plus as PIE on
> x86_64-linux, PCH will stop working unless one always invokes it with
> disabled ASLR through personality.
>
> I think this is related to function pointers and pointers to .rodata/.data
> etc. variables in GC memory, we currently do not relocate that.
>
> What we perhaps could do is (at least assuming all the ELF PT_LOAD segments
> are adjacent with a single load base for them - I think at least ia64
> non-PIE binaries were violating this by having .text and .data PT_LOAD
> segments many terrabytes appart with a whole in between not protected in any
> way, but dunno if that is for PIEs too), perhaps try in a host
> specific way remember the address range in which the function pointers and
> .rodata/.data can exist, remember the extent start and end from PCH generation
> and on PCH load query those addresses for the current compiler and relocate
> everything in that extent by the load bias from the last run.
> But, the assumption for this is that those function and data/rodata pointers
> in GC memory are actually marked at least as pointers...

If any such pointers exist they must be marked GTY((skip)) since they do not
point to GC memory...  So we'd need to invent special-handling for those.

> Do we e.g. have objects with virtual classes in GC memory and if so, do we
> catch their virtual table pointers?

Who knows, but then I don't remember adding stuff that should end in a PCH.

Honestly I don't think it's worth spending too much time in making this work.
Iff then disallow pointers to outside GC in PCH (maybe code abort() or
mark_invalid_pch calls into the pch walkers when they reach a GTY((skip)))

Richard.

> Jakub
>


Re: [PATCH] x86: Make stringop_algs::stringop_strategy ctor constexpr [PR100246]

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, Nov 5, 2021 at 10:59 AM Jakub Jelinek via Gcc-patches
 wrote:
>
> On Thu, Nov 04, 2021 at 01:45:38PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > On Thu, Nov 04, 2021 at 12:39:34PM +, Iain Sandoe wrote:
> > > Bootstrap succeeded with Apple clang-503.0.40 (Xcode 5.1.1) on macOS 10.8
> > > which is the earliest version I expect to work (previous xcode impl. have 
> > > more
> > > C++11 incompatibilities).   So OK from a Darwin PoV.
> > >
> > > The other reported toolchain with the issue was GCC-4.9.2 as discussed on
> > > IRC - this also seems OK.
> >
> > > > Especially because 11.x is not going to have the dyninit optimization 
> > > > for
> > > > sure, it would be nice to do this on the 11 branch too.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux successfully too, with
> sligtly different formatting, as I think in our coding style constexpr
> should go on the previous line and the ctor didn't have space before (.
>
> Ok for trunk and 11.3?

OK.

> 2021-11-05  Jakub Jelinek  
>
> PR bootstrap/100246
> * config/i386/i386.h
> (stringop_algs::stringop_strategy::stringop_strategy): Make the ctor
> constexpr.
>
> --- gcc/config/i386/i386.h.jj   2021-09-28 23:18:35.282563395 +0200
> +++ gcc/config/i386/i386.h  2021-11-04 10:48:47.165086806 +0100
> @@ -78,8 +78,9 @@ struct stringop_algs
> this issue.  Since this header is used by code compiled with the C
> compiler we must guard the addition.  */
>  #ifdef __cplusplus
> -stringop_strategy(int _max = -1, enum stringop_alg _alg = libcall,
> - int _noalign = false)
> +constexpr
> +stringop_strategy (int _max = -1, enum stringop_alg _alg = libcall,
> +  int _noalign = false)
>: max (_max), alg (_alg), noalign (_noalign) {}
>  #endif
>  const int max;
>
>
> Jakub
>


Re: [PATCH] c++, dyninit: Optimize C++ dynamic initialization by constants into DECL_INITIAL adjustment [PR102876]

2021-11-05 Thread Richard Biener via Gcc-patches
On Thu, 4 Nov 2021, Jakub Jelinek wrote:

> On Thu, Nov 04, 2021 at 12:13:51PM +0100, Richard Biener wrote:
> > As a general comment I wonder whether doing this fully in the C++
> > frontend leveraging the constexpr support is a better approach, esp.
> > before we end up putting all initializers into a single function ...
> > even partly constexpr evaluating things might help in some case.
> 
> I initially thought that is what we should do, but I agree with Jason
> that it isn't either/or, while we should keep investigating the
> auto-constexpr handling for inline functions (curious about details for
> that, e.g. should those implicit constexpr be just a different flag
> from what we currently use, so that we e.g. ignore them during manifestly
> constant evaluation and only handle them when doing optimization only
> constant evaluation?  Do we want to copy their bodies early before all
> cp_fold like we do for real constexpr functions, or can we process
> them on their cp_folded bodies before gimplification (gimplification
> is destructive, so after that we couldn't use those obviously)?),
> that still won't handle cases of functions not marked inline, functions
> with bodies defined only after the variable with dynamic initialization,
> functions with bodies in different TUs with LTO, etc.
> Or e.g. strict C++ says something isn't valid in constant expressions,
> reinterpret_cast, etc., but our optimizers handle it fine and we still
> optimize into constant stores.

Agreed that we should attack it from both sides, I just had the
impression that most bugreports complain that clang++ can do it
and those mostly looked opportunities that could be leveraged
by simply const-evaluating the initializer. So I wonder if we shouldn't
do that first.

> > On that note it might be worth experimenting with keeping each
> > initializer in a separate function until IPA where IPA could
> > then figure out dependences via IPA REFs (with LTO on the whole
> > program), a) diagnosing inter-CU undefined behavior, b) "fixing"
> > things by making sure the initialization happens init-before-use
> > (when there's no cycle), c) with local analysis do the promotion
> > to READONLY at IPA time and elide the function.
> 
> I thought about separate functions, but it isn't clear to me how those
> would actually help.  Because in order to optimize the dynamic initializers
> that weren't possible to optimize with constexpr machinery, we need
> inlining, not really sure if we can rely just on just early inlining, and then
> need some constant propagation etc.  But on the other side, we don't want
> to call hundreds of different functions from the *GLOBAL_*_I_* functions,
> so even if we used separate functions, we want IPA to inline it.

All true, but at least separate functions make it easier to see what
the initializer is without resorting to tricks like the internal functions
you add (just guessing a bit, didn't look at the patch yet).

Say, if the CTOR function has

  a = 2;
  b = foo ();
  c = 0;

coming from

int a = baz (); // returns constant 2
int b = foo (); // not resolvable
int c = bar (); // returns constant 0

then how do we know that foo () does not modify a[] or c[]?
At least modifying c from foo () should be UB?  modifying a
might be OK.  But with

  a = 2;
  b = foo ();
  c = 0;

we need to prove we can move the inits before any possible clobbers
to make them static inits?  Promoting a is OK I guess since foo ()
will simply re-initialize it.  But promoting c is only OK if
foo modifying it would be UB.

> For the diagnostics of UB, we have -fsanitize=address which should diagnose
> incorrect initialization ordering.

Ah, I see.  Of course that doesn't diagnose things that are UB but
happen to be "corrected" by link order?

Richard.


Re: [PATCH] c++, dyninit: Optimize C++ dynamic initialization by constants into DECL_INITIAL adjustment [PR102876]

2021-11-05 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 05, 2021 at 11:44:53AM +0100, Richard Biener wrote:
> Agreed that we should attack it from both sides, I just had the
> impression that most bugreports complain that clang++ can do it
> and those mostly looked opportunities that could be leveraged
> by simply const-evaluating the initializer. So I wonder if we shouldn't
> do that first.

Yes, clang++ can do it (apparently in a limited way, they can either
optimize all dynamic initializers in a TU or none, so kind of what
my patch would do without those internal functions), but they
clearly aren't doing it using const evaluating the initializer,
from -mllvm -print-after-all (seems quite unreadable variant of
GCC -fdump-{tree,ipa,rtl}-all-details with everything intermixed
on stdout) it seems to be done in a
Global Variable Optimizer
pass that seems to be before inlining but after
Interprocedural Sparse Conditional Constant Propagation
Called Value Propagation

They do seem to handle e.g.
int foo ();
int a = foo ();
int foo () { return 1; }
int bar (int);
int b = bar (foo ());
int bar (int x) { return x + 7; }
which we won't be able to optimize in the FE even if we wanted to 
treat as constexpr all functions rather than only inlines that Jason
was planning to handle like that, the bodies of
the functions aren't available when we process those variable initializers.

> All true, but at least separate functions make it easier to see what
> the initializer is without resorting to tricks like the internal functions
> you add (just guessing a bit, didn't look at the patch yet).

I think the internal function calls are actually cheaper than separate
functions and can be kept in the IL after IPA until we use them and
remove them.
If wanted, we could actually run the pass twice, once before IPA so that
it can optimize vars where early inlining optimized stuff into constants,
in that first pass we would remove the ifns wrapping only dynamic
initialization of vars that the early pass instance was able to optimize,
and then one after IPA and constant propagation, dce etc. which would
handle the rest (and that one would remove all the ifns).

> Say, if the CTOR function has
> 
>   a = 2;
>   b = foo ();
>   c = 0;
> 
> coming from
> 
> int a = baz (); // returns constant 2
> int b = foo (); // not resolvable
> int c = bar (); // returns constant 0
> 
> then how do we know that foo () does not modify a[] or c[]?
> At least modifying c from foo () should be UB?  modifying a

foo certainly can read and modify a no matter what type it has,
and it won't change anything, a has been initialized to 2 either
dynamically or statically and both behave the same.
As for c, if it is not vacuously initialized (i.e. needs construction
with non-trivial constructor), reading or storing it I believe would be
UB.  If it is vacuously initialized, then the
https://eel.is/c++draft/basic.start.static#3
I was refering to applies:
"An implementation is permitted to perform the initialization of a variable
with static or thread storage duration as a static initialization even if
such initialization is not required to be done statically, provided that

- the dynamic version of the initialization does not change the value of any
  other object of static or thread storage duration prior to its
  initialization, and

- the static version of the initialization produces the same value in the
  initialized variable as would be produced by the dynamic initialization if
  all variables not required to be initialized statically were initialized
  dynamically.

[Note 2: As a consequence, if the initialization of an object obj1 refers to
an object obj2 potentially requiring dynamic initialization and defined later
in the same translation unit, it is unspecified whether the value of obj2
used will be the value of the fully initialized obj2 (because obj2 was
statically initialized) or will be the value of obj2 merely zero-initialized.
For example, inline double fd() { return 1.0; }
extern double d1;
double d2 = d1; // unspecified:
// either statically initialized to 0.0 or
// dynamically initialized to 0.0 if d1 is
// dynamically initialized, or 1.0 otherwise
double d1 = fd();   // either initialized statically or dynamically to 1.0
- end note]"

My reading is that the first bullet talks about just dynamic initialization
of the particular variable and not e.g. about all the dynamic initialization
of previous objects, so when the optimization uses those ifn markers
and checks something even stronger (that no other variables are modified
in that particular dynamic initialization) and the example shows that
at least reading of c in foo is ok but one needs to be prepared to see there
either a value that would be there if the optimization didn't happen or
one where it did.  The example doesn't talk about writing the variable...

> > For the diagnostics of UB, we have -fsanitize=address which should diagnose
> > incorrect initialization orde

Re: [PATCH] Bump required minimum DejaGnu version to 1.5.3

2021-11-05 Thread Jonathan Wakely via Gcc-patches
On Fri, 5 Nov 2021 at 09:35, Richard Biener via Gcc  wrote:
> So just contribute updated dejagnu packages to CentOS 7 "backports" or
> whatever means exists there?

Yes, we could add a newer dejagnu to EPEL.


RE: [PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-05 Thread Tamar Christina via Gcc-patches


> -Original Message-
> From: Richard Biener 
> Sent: Tuesday, November 2, 2021 6:22 PM
> To: Richard Sandiford 
> Cc: Richard Biener via Gcc-patches ; Tamar
> Christina ; nd 
> Subject: Re: [PATCH]middle-end Add an RPO pass after successful
> vectorization
> 
> On Tue, 2 Nov 2021, Richard Sandiford wrote:
> 
> > Richard Biener via Gcc-patches  writes:
> > > On Tue, 2 Nov 2021, Tamar Christina wrote:
> > >
> > >> > -Original Message-
> > >> > From: Richard Biener 
> > >> > Sent: Tuesday, November 2, 2021 2:24 PM
> > >> > To: Tamar Christina 
> > >> > Cc: gcc-patches@gcc.gnu.org; nd 
> > >> > Subject: Re: [PATCH]middle-end Add an RPO pass after successful
> > >> > vectorization
> > >> >
> > >> > On Tue, 2 Nov 2021, Tamar Christina wrote:
> > >> >
> > >> > > Hi All,
> > >> > >
> > >> > > Following my current SVE predicate optimization series a
> > >> > > problem has presented itself in that the way vector masks are
> > >> > > generated for masked operations relies on CSE to share masks
> efficiently.
> > >> > >
> > >> > > The issue however is that masking is done using the & operand
> > >> > > and & is associative and so reassoc decides to reassociate the
> masked operations.
> > >> >
> > >> > But it does this for the purpose of canonicalization and thus CSE.
> > >>
> > >> Yes, but it turns something like
> > >>
> > >> (a & b) & mask into a & (b & mask).
> > >>
> > >> When (a & b) is used somewhere else you now lose the CSE.  So it's
> > >> actually hurting In this case.
> > >
> > > OK, so that's a known "issue" with reassoc, it doesn't consider
> > > global CSE opportunities and I guess it pushes 'mask' to leaf if it
> > > is loop carried.
> > >
> > >> >
> > >> > > This makes CSE then unable to CSE an unmasked and a masked
> > >> > > operation leading to duplicate operations being performed.
> > >> > >
> > >> > > To counter this we want to add an RPO pass over the vectorized
> > >> > > loop body when vectorization succeeds.  This makes it then no
> > >> > > longer reliant on the RTL level CSE.
> > >> > >
> > >> > > I have not added a testcase for this as it requires the changes
> > >> > > in my patch series, however the entire series relies on this
> > >> > > patch to work so all the tests there cover it.
> > >> > >
> > >> > > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > >> > > x86_64-linux-gnu and no issues.
> > >> > >
> > >> > > Ok for master?
> > >> >
> > >> > You are running VN over _all_ loop bodies rather only those
> vectorized.
> > >> > We loop over vectorized loops earlier for optimizing masked store
> sequences.
> > >> > I suppose you could hook in there.  I'll also notice that we have
> > >> > pass_pre_slp_scalar_cleanup which eventually runs plus we have a
> late FRE.
> > >> > So I don't understand why it doesn't work to CSE later.
> > >> >
> > >>
> > >> Atm, say you have the conditions a > b, and a > b & a > c
> > >>
> > >> We generate
> > >>
> > >> mask1 = (a > b) & loop_mask
> > >> mask2 = (a > b & a > c) & loop_mask
> > >>
> > >> with the intention that mask1 can be re-used in mask2.
> > >>
> > >> Reassoc changes this to mask2 = a > b & (a > c & loop_mask)
> > >>
> > >> Which has now unmasked (a > b) in mask2, which leaves us unable to
> > >> combine the mask1 and mask2.  It doesn't generate incorrect code, just
> inefficient.
> > >>
> > >> >   for (i = 1; i < number_of_loops (cfun); i++)
> > >> > {
> > >> >   loop_vec_info loop_vinfo;
> > >> >   bool has_mask_store;
> > >> >
> > >> >   loop = get_loop (cfun, i);
> > >> >   if (!loop || !loop->aux)
> > >> > continue;
> > >> >   loop_vinfo = (loop_vec_info) loop->aux;
> > >> >   has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
> > >> >   delete loop_vinfo;
> > >> >   if (has_mask_store
> > >> >   && targetm.vectorize.empty_mask_is_expensive
> (IFN_MASK_STORE))
> > >> > optimize_mask_stores (loop);
> > >> >   loop->aux = NULL;
> > >> > }
> > >> >
> > >>
> > >> Ah thanks, I'll make the changes.
> > >
> > > Note I think that full-blown CSE is a bit overkill just to counter a
> > > deficient reassoc (or VN).  At least it is supposed to be "cheap"
> > > and can be conditionalized on loop masks being used as well.
> >
> > Not sure we should make this conditional on loop masks being used.
> > It seems either that:
> >
> > (a) the vectoriser is supposed to avoid creating code that has folding
> > or VN opportunities, in which case we need to generate the vectorised
> > code in a smarter way or
> >
> > (b) the vectoriser is allowed to create code that has folding or VN
> > opportunities, in which case it would be good to have a defined
> > place to get rid of them.
> 
> It's certainly (b), and the definitive place to get rid of those is the 
> post-loop
> optimizer FRE pass.  That just happens to be after a reassoc pass which
> makes FRE run into the pre-existing issue that we fail to capture all (or the
> best) possible CSE opportunit

Re: [PATCH] Split vector loop analysis into main and epilogue analysis

2021-11-05 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> As discussed this splits the analysis loop into two, first settling
> on a vector mode used for the main loop and only then analyzing
> the epilogue of that for possible vectorization.  That makes it
> easier to put in support for unrolled main loops.
>
> On the way I've realized some cleanup opportunities, namely caching
> n_stmts in vec_info_shared (it's computed by dataref analysis)
> avoiding to pass that around and setting/clearing loop->aux
> during analysis - try_vectorize_loop_1 will ultimatively set it
> on those we vectorize.
>
> This also gets rid of the previously introduced callback in
> vect_analyze_loop_1 in favor of making that advance the mode iterator.
> I'm now pushing VOIDmode explicitely into the vector_modes array
> which makes the re-start on the epilogue side a bit more
> straight-forward.  Note that will now use auto-detection of the
> vector mode in case the main loop used it and we want to try
> LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P and the first mode from
> the target array if not.  I've added a comment that says we may
> want to make sure we don't try vectorizing the epilogue with a
> bigger vector size than the main loop but the situation isn't
> very likely to appear in practice I guess (and it was also present
> before this change).
>
> In principle this change should not change vectorization decisions
> but the way we handled re-analyzing epilogues as main loops makes
> me only 99% sure that it does.
>
> Bootstrapped and tested on x86_64-unkown-linux-gnu.

Comments inline.

>
> OK?
>
> Thanks,
> Richard.
>
> 2021-11-05  Richard Biener  
>
>   * tree-vectorizer.h (vec_info_shared::n_stmts): Add.
>   (LOOP_VINFO_N_STMTS): Likewise.
>   (vec_info_for_bb): Remove unused function.
>   * tree-vectorizer.c (vec_info_shared::vec_info_shared):
>   Initialize n_stmts member.
>   * tree-vect-loop.c: Remove INCLUDE_FUNCTIONAL.
>   (vect_create_loop_vinfo): Do not set loop->aux.
>   (vect_analyze_loop_2): Do not get n_stmts as argument,
>   instead use LOOP_VINFO_N_STMTS.  Set LOOP_VINFO_VECTORIZABLE_P
>   here.
>   (vect_analyze_loop_1): Remove callback, get the mode iterator
>   and autodetected_vector_mode as argument, advancing the
>   iterator and initializing autodetected_vector_mode here.
>   (vect_analyze_loop): Split analysis loop into two, first
>   processing main loops only and then epilogues.
> ---
>  gcc/tree-vect-loop.c  | 415 +-
>  gcc/tree-vectorizer.c |   3 +-
>  gcc/tree-vectorizer.h |  10 +-
>  3 files changed, 212 insertions(+), 216 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 13a53436729..abf87f99d6d 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -20,7 +20,6 @@ along with GCC; see the file COPYING3.  If not see
>  .  */
>  
>  #define INCLUDE_ALGORITHM
> -#define INCLUDE_FUNCTIONAL
>  #include "config.h"
>  #include "system.h"
>  #include "coretypes.h"
> @@ -1520,8 +1519,6 @@ vect_create_loop_vinfo (class loop *loop, 
> vec_info_shared *shared,
> = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
>  }
>  
> -  gcc_assert (!loop->aux);
> -  loop->aux = loop_vinfo;
>return loop_vinfo;
>  }
>  
> @@ -2209,7 +2206,7 @@ vect_determine_partial_vectors_and_peeling 
> (loop_vec_info loop_vinfo,
> for it.  The different analyses will record information in the
> loop_vec_info struct.  */
>  static opt_result
> -vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned 
> *n_stmts)
> +vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
>  {
>opt_result ok = opt_result::success ();
>int res;
> @@ -2244,7 +2241,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> &fatal, unsigned *n_stmts)
>opt_result res
>   = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
>&LOOP_VINFO_DATAREFS (loop_vinfo),
> -  n_stmts);
> +  &LOOP_VINFO_N_STMTS (loop_vinfo));
>if (!res)
>   {
> if (dump_enabled_p ())
> @@ -2341,7 +2338,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> &fatal, unsigned *n_stmts)
>poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR 
> (loop_vinfo);
>  
>/* Check the SLP opportunities in the loop, analyze and build SLP trees.  
> */
> -  ok = vect_analyze_slp (loop_vinfo, *n_stmts);
> +  ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
>if (!ok)
>  return ok;
>  
> @@ -2641,6 +2638,7 @@ start_over:
>   LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
>  
>/* Ok to vectorize!  */
> +  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
>return opt_result::success ();
>  
>  again:
> @@ -2891,46 +2889,70 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
>return true;
>  }
>

Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-05 Thread Martin Liška

On 11/5/21 11:23, Richard Biener wrote:

OK if you add a comment like

  /* Make sure to process -gtoggle only once.  */


Sure, added and installed as 14c7041a1f00ef4ee9a036e0b369c97646db5b5c.

Cheers,
Martin


RE: [PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, 5 Nov 2021, Tamar Christina wrote:

> 
> 
> > -Original Message-
> > From: Richard Biener 
> > Sent: Tuesday, November 2, 2021 6:22 PM
> > To: Richard Sandiford 
> > Cc: Richard Biener via Gcc-patches ; Tamar
> > Christina ; nd 
> > Subject: Re: [PATCH]middle-end Add an RPO pass after successful
> > vectorization
> > 
> > On Tue, 2 Nov 2021, Richard Sandiford wrote:
> > 
> > > Richard Biener via Gcc-patches  writes:
> > > > On Tue, 2 Nov 2021, Tamar Christina wrote:
> > > >
> > > >> > -Original Message-
> > > >> > From: Richard Biener 
> > > >> > Sent: Tuesday, November 2, 2021 2:24 PM
> > > >> > To: Tamar Christina 
> > > >> > Cc: gcc-patches@gcc.gnu.org; nd 
> > > >> > Subject: Re: [PATCH]middle-end Add an RPO pass after successful
> > > >> > vectorization
> > > >> >
> > > >> > On Tue, 2 Nov 2021, Tamar Christina wrote:
> > > >> >
> > > >> > > Hi All,
> > > >> > >
> > > >> > > Following my current SVE predicate optimization series a
> > > >> > > problem has presented itself in that the way vector masks are
> > > >> > > generated for masked operations relies on CSE to share masks
> > efficiently.
> > > >> > >
> > > >> > > The issue however is that masking is done using the & operand
> > > >> > > and & is associative and so reassoc decides to reassociate the
> > masked operations.
> > > >> >
> > > >> > But it does this for the purpose of canonicalization and thus CSE.
> > > >>
> > > >> Yes, but it turns something like
> > > >>
> > > >> (a & b) & mask into a & (b & mask).
> > > >>
> > > >> When (a & b) is used somewhere else you now lose the CSE.  So it's
> > > >> actually hurting In this case.
> > > >
> > > > OK, so that's a known "issue" with reassoc, it doesn't consider
> > > > global CSE opportunities and I guess it pushes 'mask' to leaf if it
> > > > is loop carried.
> > > >
> > > >> >
> > > >> > > This makes CSE then unable to CSE an unmasked and a masked
> > > >> > > operation leading to duplicate operations being performed.
> > > >> > >
> > > >> > > To counter this we want to add an RPO pass over the vectorized
> > > >> > > loop body when vectorization succeeds.  This makes it then no
> > > >> > > longer reliant on the RTL level CSE.
> > > >> > >
> > > >> > > I have not added a testcase for this as it requires the changes
> > > >> > > in my patch series, however the entire series relies on this
> > > >> > > patch to work so all the tests there cover it.
> > > >> > >
> > > >> > > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > > >> > > x86_64-linux-gnu and no issues.
> > > >> > >
> > > >> > > Ok for master?
> > > >> >
> > > >> > You are running VN over _all_ loop bodies rather only those
> > vectorized.
> > > >> > We loop over vectorized loops earlier for optimizing masked store
> > sequences.
> > > >> > I suppose you could hook in there.  I'll also notice that we have
> > > >> > pass_pre_slp_scalar_cleanup which eventually runs plus we have a
> > late FRE.
> > > >> > So I don't understand why it doesn't work to CSE later.
> > > >> >
> > > >>
> > > >> Atm, say you have the conditions a > b, and a > b & a > c
> > > >>
> > > >> We generate
> > > >>
> > > >> mask1 = (a > b) & loop_mask
> > > >> mask2 = (a > b & a > c) & loop_mask
> > > >>
> > > >> with the intention that mask1 can be re-used in mask2.
> > > >>
> > > >> Reassoc changes this to mask2 = a > b & (a > c & loop_mask)
> > > >>
> > > >> Which has now unmasked (a > b) in mask2, which leaves us unable to
> > > >> combine the mask1 and mask2.  It doesn't generate incorrect code, just
> > inefficient.
> > > >>
> > > >> >   for (i = 1; i < number_of_loops (cfun); i++)
> > > >> > {
> > > >> >   loop_vec_info loop_vinfo;
> > > >> >   bool has_mask_store;
> > > >> >
> > > >> >   loop = get_loop (cfun, i);
> > > >> >   if (!loop || !loop->aux)
> > > >> > continue;
> > > >> >   loop_vinfo = (loop_vec_info) loop->aux;
> > > >> >   has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
> > > >> >   delete loop_vinfo;
> > > >> >   if (has_mask_store
> > > >> >   && targetm.vectorize.empty_mask_is_expensive
> > (IFN_MASK_STORE))
> > > >> > optimize_mask_stores (loop);
> > > >> >   loop->aux = NULL;
> > > >> > }
> > > >> >
> > > >>
> > > >> Ah thanks, I'll make the changes.
> > > >
> > > > Note I think that full-blown CSE is a bit overkill just to counter a
> > > > deficient reassoc (or VN).  At least it is supposed to be "cheap"
> > > > and can be conditionalized on loop masks being used as well.
> > >
> > > Not sure we should make this conditional on loop masks being used.
> > > It seems either that:
> > >
> > > (a) the vectoriser is supposed to avoid creating code that has folding
> > > or VN opportunities, in which case we need to generate the vectorised
> > > code in a smarter way or
> > >
> > > (b) the vectoriser is allowed to create code that has folding or VN
> > > opportunities, in which case it would be good to have a defined
> > > pla

Re: Values of WIDE_INT_MAX_ELTS in gcc11 and gcc12 are different

2021-11-05 Thread H.J. Lu via Gcc-patches
On Fri, Nov 5, 2021 at 3:01 AM Richard Biener
 wrote:
>
> On Fri, Nov 5, 2021 at 7:54 AM Jakub Jelinek via Gcc-patches
>  wrote:
> >
> > On Thu, Nov 04, 2021 at 11:05:35PM -0700, Andrew Pinski via Gcc-patches 
> > wrote:
> > > > I noticed that the macro “WIDE_INT_MAX_ELTS” has different values in 
> > > > GCC11 and GCC12 (on the same X86 machine)
> > > >
> > > > For gcc11:
> > > >
> > > > wide int max elts =3
> > > >
> > > > For gcc12:
> > > >
> > > > wide int max elts =9
> > > >
> > > > Does anyone know what’s the reason for this difference?
> > > >
> > > > Thanks a lot for any help.
> > >
> > > Yes originally, the x86 backend only used OI and XI modes for vectors
> > > during data movement.
> > > This changed with r10-5741-gc57b4c22089 which added the use of OI mode
> > > for TImode adding with overflow and then MAX_BITSIZE_MODE_ANY_INT
> > > changed from 128 to 160 (in r10-6178-gc124b345e46078) to fix the ICE
> > > introduced by that change .
> > > And then with r12-979-g782e57f2c09 removed the define of
> > > MAX_BITSIZE_MODE_ANY_INT.
> > > Now what was not mentioned in r12-979-g782e57f2c09 (or before) of why
> > > MAX_BITSIZE_MODE_ANY_INT was defined in the first place for x86. HJL
> > > assumed there was some problem of why it was defined that way but not
> > > realizing memory usage was the reason.
> > > It was defined to keep the memory usage down as you see that it is now
> > > almost a 3x memory increase for all wi::wide_int.
> > > I do think r12-979-g782e57f2c09 should be reverted with an added
> > > comment on saying defining MAX_BITSIZE_MODE_ANY_INT here is to
> > > decrease the memory footprint.
> >
> > I completely agree.
>
> Do we have permanent objects embedding wide[st]_int?  I know of
> class loop and loop_bound.  Btw, there are other targets with large
> integer modes (aarch64 with XImode) and not defining
> MAX_BITSIZE_MODE_ANY_INT
>

MAX_BITSIZE_MODE_ANY_INT was removed so that YMM and ZMM
registers can be used for by_pieces operations.

-- 
H.J.


[committed] libstdc++: Add xfail to pretty printer tests that fail in C++20

2021-11-05 Thread Jonathan Wakely via Gcc-patches
Tested x86-linux, in C++17 and C++20 modes, with GDB 10 and GDB 12.
Pushed to trunk.


For some reason the type printer for std::string doesn't work in C++20
mode, so std::basic_string, allocator is
printed out in full rather than being shown as std::string. It's
probably related to the fact that the extern template declarations are
disabled for C++20, but I don't know why that affects GDB.

For now I'm just marking the relevant tests as XFAIL. That requires
adding support for target selectors to individual GDB directives such as
note-test and whatis-regexp-test.

libstdc++-v3/ChangeLog:

* testsuite/lib/gdb-test.exp: Add target selector support to the
dg-final directives.
* testsuite/libstdc++-prettyprinters/80276.cc: Add xfail for
C++20.
* testsuite/libstdc++-prettyprinters/libfundts.cc: Likewise.
* testsuite/libstdc++-prettyprinters/prettyprinters.exp: Tweak
comment.
---
 libstdc++-v3/testsuite/lib/gdb-test.exp   | 63 ---
 .../libstdc++-prettyprinters/80276.cc |  2 +-
 .../libstdc++-prettyprinters/libfundts.cc |  4 +-
 .../prettyprinters.exp|  2 +-
 4 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/testsuite/lib/gdb-test.exp 
b/libstdc++-v3/testsuite/lib/gdb-test.exp
index f993355c2b4..db0fc2f0280 100644
--- a/libstdc++-v3/testsuite/lib/gdb-test.exp
+++ b/libstdc++-v3/testsuite/lib/gdb-test.exp
@@ -50,40 +50,48 @@ proc get_line_number {filename marker} {
 return $gdb_markers($filename,$marker)
 }
 
-# Make note of a gdb test.  A test consists of a variable name and an
-# expected result.
-proc note-test {var result} {
+proc register_gdb_test {var result kind rexp selector} {
 global gdb_tests
 
-lappend gdb_tests $var $result print 0
+set xfail 0
+if {[string length $selector] > 0} {
+   switch [dg-process-target $selector] {
+   "N" { return }
+   "S" { }
+   "P" { }
+   "F" { set xfail 1 }
+   }
+}
+
+lappend gdb_tests $var $result $kind $rexp $xfail
+}
+
+# Make note of a gdb test.  A test consists of a variable name and an
+# expected result, and an optional target selector.
+proc note-test {var result {selector {}}} {
+register_gdb_test $var $result print 0 $selector
 }
 
 # A test that uses a regular expression.  This is like note-test, but
 # the result is a regular expression that is matched against the
 # output.
-proc regexp-test {var result} {
-global gdb_tests
-
-lappend gdb_tests $var $result print 1
+proc regexp-test {var result {selector {}}} {
+register_gdb_test $var $result print 1 $selector
 }
 
 # A test of 'whatis'.  This tests a type rather than a variable.
-proc whatis-test {var result} {
-global gdb_tests
-
-lappend gdb_tests $var $result whatis 0
+proc whatis-test {var result {selector {}}} {
+register_gdb_test $var $result whatis 0 $selector
 }
 
 # A test of 'whatis' that uses a regular expression. This tests a type rather
 # than a variable.
-proc whatis-regexp-test {var result} {
-global gdb_tests
-
-lappend gdb_tests $var $result whatis 1
+proc whatis-regexp-test {var result {selector {}}} {
+register_gdb_test $var $result whatis 1 $selector
 }
 
 # Utility for testing variable values using gdb, invoked via dg-final.
-# Tests all tests indicated by note-test and regexp-test.
+# Tests all tests indicated by note-test, whatis-test, and the regexp versions.
 #
 # Argument 0 is the marker on which to put a breakpoint
 # Argument 2 handles expected failures and the like
@@ -144,7 +152,7 @@ proc gdb-test { marker {selector {}} {load_xmethods 0} } {
 puts $fd "info share"
 
 set count 0
-foreach {var result kind rexp} $gdb_tests {
+foreach {var result kind rexp xfail} $gdb_tests {
incr count
set gdb_var($count) $var
set gdb_expected($count) $result
@@ -152,6 +160,7 @@ proc gdb-test { marker {selector {}} {load_xmethods 0} } {
if {$do_whatis_tests} {
set gdb_is_type($count) 1
set gdb_is_regexp($count) $rexp
+   set gdb_is_xfail($count) $xfail
set gdb_command($count) "whatis $var"
} else {
unsupported "$testname"
@@ -161,6 +170,7 @@ proc gdb-test { marker {selector {}} {load_xmethods 0} } {
} else {
set gdb_is_type($count) 0
set gdb_is_regexp($count) $rexp
+   set gdb_is_xfail($count) $xfail
set gdb_command($count) "print $var"
}
puts $fd $gdb_command($count)
@@ -198,11 +208,20 @@ proc gdb-test { marker {selector {}} {load_xmethods 0} } {
}
 
if {$match} {
-   pass "$testname $gdb_command($test_counter)"
+   if {$gdb_is_xfail($test_counter)} {
+   xpass "$testname $gdb_command($test_counter)"
+   verbose " matched =>$first<="
+   } else {
+  

Re: [PATCH] Split vector loop analysis into main and epilogue analysis

2021-11-05 Thread Richard Biener via Gcc-patches
On Fri, 5 Nov 2021, Richard Sandiford wrote:

> Richard Biener  writes:
> > As discussed this splits the analysis loop into two, first settling
> > on a vector mode used for the main loop and only then analyzing
> > the epilogue of that for possible vectorization.  That makes it
> > easier to put in support for unrolled main loops.
> >
> > On the way I've realized some cleanup opportunities, namely caching
> > n_stmts in vec_info_shared (it's computed by dataref analysis)
> > avoiding to pass that around and setting/clearing loop->aux
> > during analysis - try_vectorize_loop_1 will ultimatively set it
> > on those we vectorize.
> >
> > This also gets rid of the previously introduced callback in
> > vect_analyze_loop_1 in favor of making that advance the mode iterator.
> > I'm now pushing VOIDmode explicitely into the vector_modes array
> > which makes the re-start on the epilogue side a bit more
> > straight-forward.  Note that will now use auto-detection of the
> > vector mode in case the main loop used it and we want to try
> > LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P and the first mode from
> > the target array if not.  I've added a comment that says we may
> > want to make sure we don't try vectorizing the epilogue with a
> > bigger vector size than the main loop but the situation isn't
> > very likely to appear in practice I guess (and it was also present
> > before this change).
> >
> > In principle this change should not change vectorization decisions
> > but the way we handled re-analyzing epilogues as main loops makes
> > me only 99% sure that it does.
> >
> > Bootstrapped and tested on x86_64-unkown-linux-gnu.
> 
> Comments inline.
> 
> >
> > OK?
> >
> > Thanks,
> > Richard.
> >
> > 2021-11-05  Richard Biener  
> >
> > * tree-vectorizer.h (vec_info_shared::n_stmts): Add.
> > (LOOP_VINFO_N_STMTS): Likewise.
> > (vec_info_for_bb): Remove unused function.
> > * tree-vectorizer.c (vec_info_shared::vec_info_shared):
> > Initialize n_stmts member.
> > * tree-vect-loop.c: Remove INCLUDE_FUNCTIONAL.
> > (vect_create_loop_vinfo): Do not set loop->aux.
> > (vect_analyze_loop_2): Do not get n_stmts as argument,
> > instead use LOOP_VINFO_N_STMTS.  Set LOOP_VINFO_VECTORIZABLE_P
> > here.
> > (vect_analyze_loop_1): Remove callback, get the mode iterator
> > and autodetected_vector_mode as argument, advancing the
> > iterator and initializing autodetected_vector_mode here.
> > (vect_analyze_loop): Split analysis loop into two, first
> > processing main loops only and then epilogues.
> > ---
> >  gcc/tree-vect-loop.c  | 415 +-
> >  gcc/tree-vectorizer.c |   3 +-
> >  gcc/tree-vectorizer.h |  10 +-
> >  3 files changed, 212 insertions(+), 216 deletions(-)
> >
> > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> > index 13a53436729..abf87f99d6d 100644
> > --- a/gcc/tree-vect-loop.c
> > +++ b/gcc/tree-vect-loop.c
> > @@ -20,7 +20,6 @@ along with GCC; see the file COPYING3.  If not see
> >  .  */
> >  
> >  #define INCLUDE_ALGORITHM
> > -#define INCLUDE_FUNCTIONAL
> >  #include "config.h"
> >  #include "system.h"
> >  #include "coretypes.h"
> > @@ -1520,8 +1519,6 @@ vect_create_loop_vinfo (class loop *loop, 
> > vec_info_shared *shared,
> >   = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
> >  }
> >  
> > -  gcc_assert (!loop->aux);
> > -  loop->aux = loop_vinfo;
> >return loop_vinfo;
> >  }
> >  
> > @@ -2209,7 +2206,7 @@ vect_determine_partial_vectors_and_peeling 
> > (loop_vec_info loop_vinfo,
> > for it.  The different analyses will record information in the
> > loop_vec_info struct.  */
> >  static opt_result
> > -vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned 
> > *n_stmts)
> > +vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
> >  {
> >opt_result ok = opt_result::success ();
> >int res;
> > @@ -2244,7 +2241,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> > &fatal, unsigned *n_stmts)
> >opt_result res
> > = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
> >  &LOOP_VINFO_DATAREFS (loop_vinfo),
> > -n_stmts);
> > +&LOOP_VINFO_N_STMTS (loop_vinfo));
> >if (!res)
> > {
> >   if (dump_enabled_p ())
> > @@ -2341,7 +2338,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> > &fatal, unsigned *n_stmts)
> >poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR 
> > (loop_vinfo);
> >  
> >/* Check the SLP opportunities in the loop, analyze and build SLP trees. 
> >  */
> > -  ok = vect_analyze_slp (loop_vinfo, *n_stmts);
> > +  ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
> >if (!ok)
> >  return ok;
> >  
> > @@ -2641,6 +2638,7 @@ start_over:
> > LOOP_VINFO_VECT_FACTOR (loop_vinfo)))

Re: [PATCH v2] libstdc++: Add support for POWER9 DARN instruction to std::random_device

2021-11-05 Thread Jonathan Wakely via Gcc-patches
On Thu, 4 Nov 2021 at 20:44, Bill Schmidt wrote:

> For posterity:  This was discussed briefly on IRC, and Segher approved
> with some
> simplifications and a request to implement a fail/retry check.
>
>
Here's what I have now. No more assembler check in configure, and it uses
the 64-bit __builtin_darn() and truncates it to 32-bit, or retries (up to
100 times) if it fails.

I'm doing some more testing now.
commit 77816264712923481db37a29d1638faa1d99aadc
Author: Jonathan Wakely 
Date:   Wed Oct 20 09:25:24 2021

libstdc++: Add support for POWER9 DARN instruction to std::random_device

The ISA-3.0 instruction set includes DARN ("deliver a random number")
which can be used similar to the existing support for RDRAND and RDSEED.

libstdc++-v3/ChangeLog:

* src/c++11/random.cc [__powerpc__] (USE_DARN): Define.
(__ppc_darn): New function to use POWER9 DARN instruction.
(Which): Add 'darn' enumerator.
(which_source): Check for __ppc_darn.
(random_device::_M_init): Support "darn" and "hw" tokens.
(random_device::_M_getentropy): Add darn to switch.
* testsuite/26_numerics/random/random_device/cons/token.cc:
Check "darn" token.
* testsuite/26_numerics/random/random_device/entropy.cc:
Likewise.

diff --git a/libstdc++-v3/src/c++11/random.cc b/libstdc++-v3/src/c++11/random.cc
index 4b64bde00ea..b0d88374d59 100644
--- a/libstdc++-v3/src/c++11/random.cc
+++ b/libstdc++-v3/src/c++11/random.cc
@@ -37,6 +37,8 @@
 # ifdef _GLIBCXX_X86_RDSEED
 #  define USE_RDSEED 1
 # endif
+#elif defined __powerpc__ && defined __BUILTIN_CPU_SUPPORTS__
+# define USE_DARN 1
 #endif
 
 #include 
@@ -69,7 +71,7 @@
 #if defined _GLIBCXX_USE_CRT_RAND_S || defined _GLIBCXX_USE_DEV_RANDOM
 // The OS provides a source of randomness we can use.
 # pragma GCC poison _M_mt
-#elif defined USE_RDRAND || defined USE_RDSEED
+#elif defined USE_RDRAND || defined USE_RDSEED || defined USE_DARN
 // Hardware instructions might be available, but use cpuid checks at runtime.
 # pragma GCC poison _M_mt
 // If the runtime cpuid checks fail we'll use a linear congruential engine.
@@ -135,6 +137,24 @@ namespace std _GLIBCXX_VISIBILITY(default)
 #endif
 #endif
 
+#ifdef USE_DARN
+unsigned int
+__attribute__((target("cpu=power9")))
+__ppc_darn(void*)
+{
+  const uint64_t failed = -1;
+  unsigned int retries = 100;
+  uint64_t val = __builtin_darn();
+  while (val == failed) [[__unlikely__]]
+   {
+ if (--retries == 0)
+   std::__throw_runtime_error(__N("random_device: darn failed"));
+ val = __builtin_darn();
+   }
+  return (uint32_t)val;
+}
+#endif
+
 #ifdef _GLIBCXX_USE_CRT_RAND_S
 unsigned int
 __winxp_rand_s(void*)
@@ -193,11 +213,16 @@ namespace std _GLIBCXX_VISIBILITY(default)
 }
 #endif
 
-enum Which {
-  rand_s = 1, rdseed = 2, rdrand = 4, device_file = 8, prng = 16,
+enum Which : unsigned {
+  device_file = 1, prng = 2, rand_s = 4,
+  rdseed = 64, rdrand = 128, darn = 256,
   any = 0x
 };
 
+constexpr Which
+operator|(Which l, Which r) noexcept
+{ return Which(unsigned(l) | unsigned(r)); }
+
 inline Which
 which_source(random_device::result_type (*func [[maybe_unused]])(void*),
 void* file [[maybe_unused]])
@@ -221,6 +246,11 @@ namespace std _GLIBCXX_VISIBILITY(default)
return rdrand;
 #endif
 
+#ifdef USE_DARN
+  if (func == &__ppc_darn)
+   return darn;
+#endif
+
 #ifdef _GLIBCXX_USE_DEV_RANDOM
   if (file != nullptr)
return device_file;
@@ -269,6 +299,14 @@ namespace std _GLIBCXX_VISIBILITY(default)
 else if (token == "rdrand" || token == "rdrnd")
   which = rdrand;
 #endif // USE_RDRAND
+#ifdef USE_DARN
+else if (token == "darn")
+  which = darn;
+#endif
+#if defined USE_RDRAND || defined USE_RDSEED || defined USE_DARN
+else if (token == "hw" || token == "hardware")
+  which = rdrand | rdseed | darn;
+#endif
 #ifdef _GLIBCXX_USE_CRT_RAND_S
 else if (token == "rand_s")
   which = rand_s;
@@ -346,6 +384,17 @@ namespace std _GLIBCXX_VISIBILITY(default)
 }
 #endif // USE_RDRAND
 
+#ifdef USE_DARN
+if (which & darn)
+  {
+   if (__builtin_cpu_supports("darn"))
+ {
+   _M_func = &__ppc_darn;
+   return;
+ }
+  }
+#endif // USE_DARN
+
 #ifdef _GLIBCXX_USE_DEV_RANDOM
 if (which & device_file)
 {
@@ -497,6 +546,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 case rdrand:
 case rdseed:
+case darn:
   return (double) max;
 case rand_s:
 case prng:
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/random_device/cons/token.cc 
b/libstdc++-v3/testsuite/26_numerics/random/random_device/cons/token.cc
index aeb7403e830..d6ac3a37c64 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/random_device/cons/token.cc
+++ b/lib

[PATCH v2 0/3] RISC-V: Support zfinx extension

2021-11-05 Thread jiawei
Zfinx extension[1] had already finished public review. Here is the 
implementation patch set that reuse floating point pattern and ban the use of 
fpr when use zfinx as a target.

Current works can be find in follow links, we will keep update zhinx and 
zhinxmin after zfh extension goes upstream.
  https://github.com/pz9115/riscv-gcc/tree/zfinx-rebase
  https://github.com/pz9115/riscv-binutils-gdb/tree/zfinx-rebase

For test you can use qemu or spike that support zfinx extension, the
qemu will go upstream soon and spike is still in review:
  https://github.com/plctlab/plct-qemu/tree/plct-zfinx-dev
  https://github.com/plctlab/plct-spike/tree/plct-upstream-zfinx  

Thanks for Tariq Kurd, Kito Cheng, Jim Willson, Jeremy Bennett helped us a lot 
with this work.

[1] https://github.com/riscv/riscv-zfinx/blob/main/zfinx-1.0.0-rc.pdf

Version log:

v2: As Kito Cheng's comment, add Changelog part in patches, update imply info 
in riscv-common.c,
remove useless check and update annotation in riscv.c.

jiawei (3):
  RISC-V: Minimal support of zfinx extension
  RISC-V: Target support for zfinx extension
  RISC-V: Limit regs use  for zfinx extension

 gcc/common/config/riscv/riscv-common.c |  7 +++
 gcc/config/riscv/arch-canonicalize |  1 +
 gcc/config/riscv/constraints.md|  3 +-
 gcc/config/riscv/riscv-builtins.c  |  4 +-
 gcc/config/riscv/riscv-c.c |  2 +-
 gcc/config/riscv/riscv-opts.h  |  6 +++
 gcc/config/riscv/riscv.c   | 14 -
 gcc/config/riscv/riscv.md  | 72 +-
 gcc/config/riscv/riscv.opt |  3 ++
 9 files changed, 71 insertions(+), 41 deletions(-)

-- 
2.25.1



[PATCH v2 2/3] RISC-V: Target support for zfinx extension

2021-11-05 Thread jiawei
Support 'TARGET_ZFINX' with float instruction pattern and builtin function.

gcc/ChangeLog:

* config/riscv/riscv-builtins.c (AVAIL): Add TARGET_ZFINX.
(riscv_atomic_assign_expand_fenv): Ditto.
* config/riscv/riscv-c.c (riscv_cpu_cpp_builtins): Add TARGET_ZFINX.
* config/riscv/riscv.md 
(TARGET_HARD_FLOAT || TARGET_ZFINX): Add TARGET_ZFINX.
(TARGET_DOUBLE_FLOAT || TARGET_ZDINX): Add TARGET_ZDINX.

Co-Authored-By: sinan 
---
 gcc/config/riscv/riscv-builtins.c |  4 +-
 gcc/config/riscv/riscv-c.c|  2 +-
 gcc/config/riscv/riscv.md | 72 +++
 3 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/gcc/config/riscv/riscv-builtins.c 
b/gcc/config/riscv/riscv-builtins.c
index 97b1480a15e..d892e6cdb26 100644
--- a/gcc/config/riscv/riscv-builtins.c
+++ b/gcc/config/riscv/riscv-builtins.c
@@ -85,7 +85,7 @@ struct riscv_builtin_description {
   unsigned int (*avail) (void);
 };
 
-AVAIL (hard_float, TARGET_HARD_FLOAT)
+AVAIL (hard_float, TARGET_HARD_FLOAT || TARGET_ZFINX)
 
 /* Construct a riscv_builtin_description from the given arguments.
 
@@ -279,7 +279,7 @@ riscv_expand_builtin (tree exp, rtx target, rtx subtarget 
ATTRIBUTE_UNUSED,
 void
 riscv_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 {
-  if (!TARGET_HARD_FLOAT)
+  if (!(TARGET_HARD_FLOAT || TARGET_ZFINX))
 return;
 
   tree frflags = GET_BUILTIN_DECL (CODE_FOR_riscv_frflags);
diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
index efd4a61ea29..d064a7fc2b3 100644
--- a/gcc/config/riscv/riscv-c.c
+++ b/gcc/config/riscv/riscv-c.c
@@ -58,7 +58,7 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   if (TARGET_HARD_FLOAT)
 builtin_define_with_int_value ("__riscv_flen", UNITS_PER_FP_REG * 8);
 
-  if (TARGET_HARD_FLOAT && TARGET_FDIV)
+  if ((TARGET_HARD_FLOAT || TARGET_ZFINX) && TARGET_FDIV)
 {
   builtin_define ("__riscv_fdiv");
   builtin_define ("__riscv_fsqrt");
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 225e5b259c1..27c9c74c679 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -296,8 +296,8 @@
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
 ;; Iterator for hardware-supported floating-point modes.
-(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
-   (DF "TARGET_DOUBLE_FLOAT")])
+(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
+   (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")])
 
 ;; Iterator for floating-point modes that can be loaded into X registers.
 (define_mode_iterator SOFTF [SF (DF "TARGET_64BIT")])
@@ -444,7 +444,7 @@
   [(set (match_operand:ANYF0 "register_operand" "=f")
(plus:ANYF (match_operand:ANYF 1 "register_operand" " f")
   (match_operand:ANYF 2 "register_operand" " f")))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_HARD_FLOAT || TARGET_ZFINX"
   "fadd.\t%0,%1,%2"
   [(set_attr "type" "fadd")
(set_attr "mode" "")])
@@ -575,7 +575,7 @@
   [(set (match_operand:ANYF 0 "register_operand" "=f")
(minus:ANYF (match_operand:ANYF 1 "register_operand" " f")
(match_operand:ANYF 2 "register_operand" " f")))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_HARD_FLOAT || TARGET_ZFINX"
   "fsub.\t%0,%1,%2"
   [(set_attr "type" "fadd")
(set_attr "mode" "")])
@@ -745,7 +745,7 @@
   [(set (match_operand:ANYF   0 "register_operand" "=f")
(mult:ANYF (match_operand:ANYF1 "register_operand" " f")
  (match_operand:ANYF 2 "register_operand" " f")))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_HARD_FLOAT || TARGET_ZFINX"
   "fmul.\t%0,%1,%2"
   [(set_attr "type" "fmul")
(set_attr "mode" "")])
@@ -1052,7 +1052,7 @@
   [(set (match_operand:ANYF   0 "register_operand" "=f")
(div:ANYF (match_operand:ANYF 1 "register_operand" " f")
  (match_operand:ANYF 2 "register_operand" " f")))]
-  "TARGET_HARD_FLOAT && TARGET_FDIV"
+  "(TARGET_HARD_FLOAT || TARGET_ZFINX) && TARGET_FDIV"
   "fdiv.\t%0,%1,%2"
   [(set_attr "type" "fdiv")
(set_attr "mode" "")])
@@ -1067,7 +1067,7 @@
 (define_insn "sqrt2"
   [(set (match_operand:ANYF0 "register_operand" "=f")
(sqrt:ANYF (match_operand:ANYF 1 "register_operand" " f")))]
-  "TARGET_HARD_FLOAT && TARGET_FDIV"
+  "(TARGET_HARD_FLOAT || TARGET_ZFINX) && TARGET_FDIV"
 {
 return "fsqrt.\t%0,%1";
 }
@@ -1082,7 +1082,7 @@
(fma:ANYF (match_operand:ANYF 1 "register_operand" " f")
  (match_operand:ANYF 2 "register_operand" " f")
  (match_operand:ANYF 3 "register_operand" " f")))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_HARD_FLOAT || TARGET_ZFINX"
   "fmadd.\t%0,%1,%2,%3"
   [(set_attr "type" "fmadd")
(set_attr "mode" "")])
@@ -1093,7 +1093,7 @@
(fma:ANYF (match_operand:ANYF   1 "register_operand" " f")
  (match_operand:ANYF   

[PATCH v2 3/3] RISC-V: Limit regs use for zfinx extension

2021-11-05 Thread jiawei
Limit zfinx abi support with 'ilp32','ilp32e','lp64' only.

Use GPR instead FPR when 'zfinx' enable, Only use even registers in RV32 when 
'zdinx' enable.

gcc/ChangeLog:

* config/riscv/constraints.md
(TARGET_HARD_FLOAT ? FP_REGS : ((TARGET_ZFINX || TARGET_ZDINX) ? 
GR_REGS : NO_REGS)):
  Use gpr when zfinx or zdinx enable.
* config/riscv/riscv.c (riscv_hard_regno_mode_ok): Add TARGET_ZFINX.
(riscv_option_override): Ditto.
(riscv_abi): Add ABI limit for zfinx.

Co-Authored-By: sinan 
---
 gcc/config/riscv/constraints.md |  3 ++-
 gcc/config/riscv/riscv.c| 14 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index c87d5b796a5..a99b8ce277e 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -20,8 +20,9 @@
 ;; .
 
 ;; Register constraints
+;; Zfinx support need refuse FPR and use GPR
 
-(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : ((TARGET_ZFINX 
|| TARGET_ZDINX) ? GR_REGS : NO_REGS)"
   "A floating-point register (if available).")
 
 (define_register_constraint "j" "SIBCALL_REGS"
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index a545dbf66f7..f4e0e46e1a7 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -4789,6 +4789,13 @@ riscv_hard_regno_mode_ok (unsigned int regno, 
machine_mode mode)
!= call_used_or_fixed_reg_p (regno + i))
   return false;
 
+  /* Only use even registers in RV32 ZDINX */
+  if (!TARGET_64BIT && TARGET_ZDINX){
+if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
+   GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
+  return !(regno & 1);
+  }
+
   return true;
 }
 
@@ -4980,7 +4987,7 @@ riscv_option_override (void)
 error ("%<-mdiv%> requires %<-march%> to subsume the % extension");
 
   /* Likewise floating-point division and square root.  */
-  if (TARGET_HARD_FLOAT && (target_flags_explicit & MASK_FDIV) == 0)
+  if ((TARGET_HARD_FLOAT || TARGET_ZFINX) && (target_flags_explicit & 
MASK_FDIV) == 0)
 target_flags |= MASK_FDIV;
 
   /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
@@ -5026,6 +5033,11 @@ riscv_option_override (void)
   if (TARGET_RVE && riscv_abi != ABI_ILP32E)
 error ("rv32e requires ilp32e ABI");
 
+  // Zfinx require abi ilp32,ilp32e or lp64.
+  if (TARGET_ZFINX && riscv_abi != ABI_ILP32
+&& riscv_abi != ABI_LP64 && riscv_abi != ABI_ILP32E)
+  error ("z*inx requires ABI ilp32, ilp32e or lp64");
+
   /* We do not yet support ILP32 on RV64.  */
   if (BITS_PER_WORD != POINTER_SIZE)
 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
-- 
2.25.1



[PATCH v2 1/3] RISC-V: Minimal support of zfinx extension

2021-11-05 Thread jiawei
Minimal support of zfinx extension, include 'zfinx' and 'zdinx' corresponding 
to 'f' and 'd', the 'zdinx' will imply 'zfinx' same as 'd' imply 'f'.

gcc/ChangeLog:

* common/config/riscv/riscv-common.c(riscv_implied_info_t): Add zdinx 
imply zfinx.
  (riscv_ext_version_table): Add zfinx, zdinx.
* config/riscv/arch-canonicalize(IMPLIED_EXT): Add zdinx imply zfinx.
* config/riscv/riscv-opts.h
  (MASK_ZFINX): New.
  (MASK_ZDINX): Ditto.
  (TARGET_ZFINX): Ditto.
  (TARGET_ZDINX): Ditto.
* config/riscv/riscv.opt(riscv_zf_subext): New.

Co-Authored-By: sinan 
---
 gcc/common/config/riscv/riscv-common.c | 7 +++
 gcc/config/riscv/arch-canonicalize | 1 +
 gcc/config/riscv/riscv-opts.h  | 6 ++
 gcc/config/riscv/riscv.opt | 3 +++
 4 files changed, 17 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.c 
b/gcc/common/config/riscv/riscv-common.c
index 37b6ea80086..6db5a434257 100644
--- a/gcc/common/config/riscv/riscv-common.c
+++ b/gcc/common/config/riscv/riscv-common.c
@@ -50,6 +50,7 @@ static const riscv_implied_info_t riscv_implied_info[] =
   {"d", "f"},
   {"f", "zicsr"},
   {"d", "zicsr"},
+  {"zdinx", "zfinx"},
   {NULL, NULL}
 };
 
@@ -106,6 +107,9 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zbc", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zbs", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zfinx", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zdinx", ISA_SPEC_CLASS_NONE, 1, 0},
+
   /* Terminate the list.  */
   {NULL, ISA_SPEC_CLASS_NONE, 0, 0}
 };
@@ -916,6 +920,9 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] =
   {"zbc",&gcc_options::x_riscv_zb_subext, MASK_ZBC},
   {"zbs",&gcc_options::x_riscv_zb_subext, MASK_ZBS},
 
+  {"zfinx",&gcc_options::x_riscv_zf_subext, MASK_ZFINX},
+  {"zdinx",&gcc_options::x_riscv_zf_subext, MASK_ZDINX},
+
   {NULL, NULL, 0}
 };
 
diff --git a/gcc/config/riscv/arch-canonicalize 
b/gcc/config/riscv/arch-canonicalize
index c7df3c8a313..9197163d1c3 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -36,6 +36,7 @@ LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
 #
 IMPLIED_EXT = {
   "d" : ["f"],
+  "zdinx" : ["zfinx"],
 }
 
 def arch_canonicalize(arch):
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 2efc4b80f1f..5a790a028cf 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -83,4 +83,10 @@ enum stack_protector_guard {
 #define TARGET_ZBC((riscv_zb_subext & MASK_ZBC) != 0)
 #define TARGET_ZBS((riscv_zb_subext & MASK_ZBS) != 0)
 
+#define MASK_ZFINX  (1 << 0)
+#define MASK_ZDINX  (1 << 1)
+
+#define TARGET_ZFINX((riscv_zf_subext & MASK_ZFINX) != 0)
+#define TARGET_ZDINX((riscv_zf_subext & MASK_ZDINX) != 0)
+
 #endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 15bf89e17c2..54d27747eff 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -198,6 +198,9 @@ int riscv_zi_subext
 TargetVariable
 int riscv_zb_subext
 
+TargetVariable
+int riscv_zf_subext
+
 Enum
 Name(isa_spec_class) Type(enum riscv_isa_spec_class)
 Supported ISA specs (for use with the -misa-spec= option):
-- 
2.25.1



[PATCH v2] IPA: Provide a mechanism to register static DTORs via cxa_atexit.

2021-11-05 Thread Iain Sandoe via Gcc-patches


I tried enabling this on x86-64-linux (just for interest) and it seems to work
OK there too - but that testing revealed a thinko that didn’t show with a
a normal regstrap.


 
 Changes from original post:
 1. amended a comment
 2. fixed a thinko where I was not allowing for functions declared as
*both* CTOR and DTOR.

For at least one target (Darwin) the platform convention is to
register static destructors (i.e. __attribute__((destructor)))
with __cxa_atexit rather than placing them into a list that is
run by some other mechanism.

This patch provides a target hook that allows a target to opt
into this and handling for the process in ipa_cdtor_merge ().

When the mode is enabled (dtors_from_cxa_atexit is set) we:

 * Generate new CTORs to register static destructors with
   __cxa_atexit and add them to the existing list of CTORs;
   we then process the revised CTORs list.

 * We sort the DTORs into priority and then TU order, this
   means that they are registered in that order with
   __cxa_atexit () and therefore will be run in the reverse
   order.

 * Likewise, CTORs are sorted into priority and then TU order,
   which means that they will run in that order.

This matches the behavior of using init/fini (or
mod_init_func/mod_term_func) sections.

Signed-off-by: Iain Sandoe 

gcc/ChangeLog:

* config/darwin.h (TARGET_DTORS_FROM_CXA_ATEXIT): New.
* doc/tm.texi: Regenerated.
* doc/tm.texi.in: Add TARGET_DTORS_FROM_CXA_ATEXIT hook.
* ipa.c (ipa_discover_variable_flags):
(cgraph_build_static_cdtor_1): Return the built function
decl.
(build_cxa_atexit_decl): New.
(build_dso_handle_decl): New.
(build_cxa_dtor_registrations): New.
(compare_cdtor_tu_order): New.
(build_cxa_atexit_fns): New.
(ipa_cdtor_merge): If dtors_from_cxa_atexit is set,
process the DTORs/CTORs accordingly.
(pass_ipa_cdtor_merge::gate): Also run if
dtors_from_cxa_atexit is set.
* target.def (dtors_from_cxa_atexit): New hook.
---
 gcc/config/darwin.h |   5 ++
 gcc/doc/tm.texi |   8 ++
 gcc/doc/tm.texi.in  |   2 +
 gcc/ipa.c   | 201 +++-
 gcc/target.def  |  10 +++
 5 files changed, 222 insertions(+), 4 deletions(-)

diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 27cb3e4bb30..5202903f5b2 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -54,6 +54,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #define DO_GLOBAL_DTORS_BODY
 
+/* Register static destructors to run from __cxa_atexit instead of putting
+   them into a .mod_term_funcs section.  */
+
+#define TARGET_DTORS_FROM_CXA_ATEXIT true
+
 /* The string value for __SIZE_TYPE__.  */
 
 #ifndef SIZE_TYPE
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 78a1af1ad4d..6ec1d50b3e4 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -9210,6 +9210,14 @@ collecting constructors and destructors to be run at 
startup and exit.
 It is false if we must use @command{collect2}.
 @end deftypevr
 
+@deftypevr {Target Hook} bool TARGET_DTORS_FROM_CXA_ATEXIT
+This value is true if the target wants destructors to be queued to be
+run from __cxa_atexit.  If this is the case then, for each priority level,
+a new constructor will be entered that registers the destructors for that
+level with __cxa_atexit (and there will be no destructors emitted).
+It is false the method implied by @code{have_ctors_dtors} is used.
+@end deftypevr
+
 @deftypefn {Target Hook} void TARGET_ASM_CONSTRUCTOR (rtx @var{symbol}, int 
@var{priority})
 If defined, a function that outputs assembler code to arrange to call
 the function referenced by @var{symbol} at initialization time.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 4401550989e..2b9960b73d7 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -6015,6 +6015,8 @@ encountering an @code{init_priority} attribute.
 
 @hook TARGET_HAVE_CTORS_DTORS
 
+@hook TARGET_DTORS_FROM_CXA_ATEXIT
+
 @hook TARGET_ASM_CONSTRUCTOR
 
 @hook TARGET_ASM_DESTRUCTOR
diff --git a/gcc/ipa.c b/gcc/ipa.c
index 4f62ac183ee..d234a69b9fe 100644
--- a/gcc/ipa.c
+++ b/gcc/ipa.c
@@ -837,7 +837,7 @@ ipa_discover_variable_flags (void)
FINAL specify whether the externally visible name for collect2 should
be produced. */
 
-static void
+static tree
 cgraph_build_static_cdtor_1 (char which, tree body, int priority, bool final,
 tree optimization,
 tree target)
@@ -916,6 +916,7 @@ cgraph_build_static_cdtor_1 (char which, tree body, int 
priority, bool final,
 
   set_cfun (NULL);
   current_function_decl = NULL;
+  return decl;
 }
 
 /* Generate and emit a static constructor or destructor.  WHICH must
@@ -1022,6 +1023,128 @@ build_cdtor (bool ctor_p, const vec &cdtors)
 }
 }
 
+/* Helper functions for build_cxa_dtor_registrations ().
+   Build a decl for __cxa_atexit ().  */
+
+sta

Re: [PATCH 0/5] Add Power10 XXSPLTI* and LXVKQ instructions

2021-11-05 Thread Michael Meissner via Gcc-patches
I mentioned that I would start a build/check on a big endian power8 system in
the last set of patches.  There were no regressions with this set of patches on
a big endian system, testing both 32-bit and 64-bit code generation.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH v2] libstdc++: Add support for POWER9 DARN instruction to std::random_device

2021-11-05 Thread Bill Schmidt via Gcc-patches


On 11/5/21 7:44 AM, Jonathan Wakely wrote:
> On Thu, 4 Nov 2021 at 20:44, Bill Schmidt wrote: For posterity:  This was 
> discussed briefly on IRC, and Segher approved with some simplifications and a 
> request to implement a fail/retry check. Here's what I have now. No more 
> assembler check 
> On Thu, 4 Nov 2021 at 20:44, Bill Schmidt wrote:
>
> For posterity:  This was discussed briefly on IRC, and Segher approved 
> with some
> simplifications and a request to implement a fail/retry check.
>
>
> Here's what I have now. No more assembler check in configure, and it uses the 
> 64-bit __builtin_darn() and truncates it to 32-bit, or retries (up to 100 
> times) if it fails.
>
> I'm doing some more testing now.
>
Those changes look good from my perspective.  Thanks again for this work!  (Not 
a maintainer, blah blah...)

Bill



[PATCH] ipa: Do not require RECORD_TYPE for ancestor jump functions

2021-11-05 Thread Martin Jambor
Hi,

the check this patch removes has remained from times when ancestor
jump functions have been only used for devirtualization and also
contained BINFOs.  It is not necessary now and should have been
removed long time ago.

Pre-approved by Honza and bootstrapped and tested on x86_64-linux, I am
going to push it in a moment.

Martin


gcc/ChangeLog:

2021-11-04  Martin Jambor  

* ipa-prop.c (compute_complex_assign_jump_func): Remove
unnecessary check for RECORD_TYPE.
---
 gcc/ipa-prop.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index 443f21ce61b..e85df0971fc 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -1421,8 +1421,6 @@ compute_complex_assign_jump_func (struct 
ipa_func_body_info *fbi,
   if (TREE_CODE (op1) != ADDR_EXPR)
 return;
   op1 = TREE_OPERAND (op1, 0);
-  if (TREE_CODE (TREE_TYPE (op1)) != RECORD_TYPE)
-return;
   base = get_ref_base_and_extent_hwi (op1, &offset, &size, &reverse);
   offset_int mem_offset;
   if (!base
-- 
2.33.0



Re: [PATCH v2] IPA: Provide a mechanism to register static DTORs via cxa_atexit.

2021-11-05 Thread Iain Sandoe via Gcc-patches
sheesh … EWRONGREVISEDPATCH

> On 5 Nov 2021, at 13:08, Iain Sandoe  wrote:
> 
> I tried enabling this on x86-64-linux (just for interest) and it seems to work
> OK there too - but that testing revealed a thinko that didn’t show with a
> a normal regstrap.

… now with the correct patch.

[PATCH v2] IPA: Provide a mechanism to register static DTORs via
 cxa_atexit.

For at least one target (Darwin) the platform convention is to
register static destructors (i.e. __attribute__((destructor)))
with __cxa_atexit rather than placing them into a list that is
run by some other mechanism.

This patch provides a target hook that allows a target to opt
into this and handling for the process in ipa_cdtor_merge ().

When the mode is enabled (dtors_from_cxa_atexit is set) we:

 * Generate new CTORs to register static destructors with
   __cxa_atexit and add them to the existing list of CTORs;
   we then process the revised CTORs list.

 * We sort the DTORs into priority and then TU order, this
   means that they are registered in that order with
   __cxa_atexit () and therefore will be run in the reverse
   order.

 * Likewise, CTORs are sorted into priority and then TU order,
   which means that they will run in that order.

This matches the behavior of using init/fini (or
mod_init_func/mod_term_func) sections.

Signed-off-by: Iain Sandoe 

gcc/ChangeLog:

* config/darwin.h (TARGET_DTORS_FROM_CXA_ATEXIT): New.
* doc/tm.texi: Regenerated.
* doc/tm.texi.in: Add TARGET_DTORS_FROM_CXA_ATEXIT hook.
* ipa.c (ipa_discover_variable_flags):
(cgraph_build_static_cdtor_1): Return the built function
decl.
(build_cxa_atexit_decl): New.
(build_dso_handle_decl): New.
(build_cxa_dtor_registrations): New.
(compare_cdtor_tu_order): New.
(build_cxa_atexit_fns): New.
(ipa_cdtor_merge): If dtors_from_cxa_atexit is set,
process the DTORs/CTORs accordingly.
(pass_ipa_cdtor_merge::gate): Also run if
dtors_from_cxa_atexit is set.
* target.def (dtors_from_cxa_atexit): New hook.
---
 gcc/config/darwin.h |   7 +-
 gcc/doc/tm.texi |   8 ++
 gcc/doc/tm.texi.in  |   2 +
 gcc/ipa.c   | 200 +++-
 gcc/target.def  |  10 +++
 5 files changed, 222 insertions(+), 5 deletions(-)

diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 27cb3e4bb30..2b19fb7c085 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -54,6 +54,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #define DO_GLOBAL_DTORS_BODY
 
+/* Register static destructors to run from __cxa_atexit instead of putting
+   them into a .mod_term_funcs section.  */
+
+#define TARGET_DTORS_FROM_CXA_ATEXIT true
+
 /* The string value for __SIZE_TYPE__.  */
 
 #ifndef SIZE_TYPE
@@ -1160,7 +1165,7 @@ extern void darwin_driver_init (unsigned int *,struct 
cl_decoded_option **);
 
 /* The Apple assembler and linker do not support constructor priorities.  */
 #undef SUPPORTS_INIT_PRIORITY
-#define SUPPORTS_INIT_PRIORITY 0
+#define SUPPORTS_INIT_PRIORITY 1
 
 #undef STACK_CHECK_STATIC_BUILTIN
 #define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 990152f5b15..0a4df18b825 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -9233,6 +9233,14 @@ collecting constructors and destructors to be run at 
startup and exit.
 It is false if we must use @command{collect2}.
 @end deftypevr
 
+@deftypevr {Target Hook} bool TARGET_DTORS_FROM_CXA_ATEXIT
+This value is true if the target wants destructors to be queued to be
+run from __cxa_atexit.  If this is the case then, for each priority level,
+a new constructor will be entered that registers the destructors for that
+level with __cxa_atexit (and there will be no destructors emitted).
+It is false the method implied by @code{have_ctors_dtors} is used.
+@end deftypevr
+
 @deftypefn {Target Hook} void TARGET_ASM_CONSTRUCTOR (rtx @var{symbol}, int 
@var{priority})
 If defined, a function that outputs assembler code to arrange to call
 the function referenced by @var{symbol} at initialization time.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 193c9bdd853..c733f356fe4 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -6021,6 +6021,8 @@ encountering an @code{init_priority} attribute.
 
 @hook TARGET_HAVE_CTORS_DTORS
 
+@hook TARGET_DTORS_FROM_CXA_ATEXIT
+
 @hook TARGET_ASM_CONSTRUCTOR
 
 @hook TARGET_ASM_DESTRUCTOR
diff --git a/gcc/ipa.c b/gcc/ipa.c
index 4f62ac183ee..325b658b55e 100644
--- a/gcc/ipa.c
+++ b/gcc/ipa.c
@@ -837,7 +837,7 @@ ipa_discover_variable_flags (void)
FINAL specify whether the externally visible name for collect2 should
be produced. */
 
-static void
+static tree
 cgraph_build_static_cdtor_1 (char which, tree body, int priority, bool final,
 tree optimization,
 tree target)
@@ -916,6 +916,7 @@ cgraph_build_static_

[PATCH][committed] Split vector loop analysis into main and epilogue analysis

2021-11-05 Thread Richard Biener via Gcc-patches
As discussed this splits the analysis loop into two, first settling
on a vector mode used for the main loop and only then analyzing
the epilogue of that for possible vectorization.  That makes it
easier to put in support for unrolled main loops.

On the way I've realized some cleanup opportunities, namely caching
n_stmts in vec_info_shared (it's computed by dataref analysis)
avoiding to pass that around and setting/clearing loop->aux
during analysis - try_vectorize_loop_1 will ultimatively set it
on those we vectorize.

This also gets rid of the previously introduced callback in
vect_analyze_loop_1 in favor of making that advance the mode iterator.
I'm now pushing VOIDmode explicitely into the vector_modes array
which makes the re-start on the epilogue side a bit more
straight-forward.  Note that will now use auto-detection of the
vector mode in case the main loop used it and we want to try
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P and the first mode from
the target array if not.  I've added a comment that says we may
want to make sure we don't try vectorizing the epilogue with a
bigger vector size than the main loop but the situation isn't
very likely to appear in practice I guess (and it was also present
before this change).

In principle this change should not change vectorization decisions
but the way we handled re-analyzing epilogues as main loops makes
me only 99% sure that it does.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

2021-11-05  Richard Biener  

* tree-vectorizer.h (vec_info_shared::n_stmts): Add.
(LOOP_VINFO_N_STMTS): Likewise.
(vec_info_for_bb): Remove unused function.
* tree-vectorizer.c (vec_info_shared::vec_info_shared):
Initialize n_stmts member.
* tree-vect-loop.c: Remove INCLUDE_FUNCTIONAL.
(vect_create_loop_vinfo): Do not set loop->aux.
(vect_analyze_loop_2): Do not get n_stmts as argument,
instead use LOOP_VINFO_N_STMTS.  Set LOOP_VINFO_VECTORIZABLE_P
here.
(vect_analyze_loop_1): Remove callback, get the mode iterator
and autodetected_vector_mode as argument, advancing the
iterator and initializing autodetected_vector_mode here.
(vect_analyze_loop): Split analysis loop into two, first
processing main loops only and then epilogues.
---
 gcc/tree-vect-loop.c  | 415 +-
 gcc/tree-vectorizer.c |   3 +-
 gcc/tree-vectorizer.h |  10 +-
 3 files changed, 212 insertions(+), 216 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 13a53436729..abf87f99d6d 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -20,7 +20,6 @@ along with GCC; see the file COPYING3.  If not see
 .  */
 
 #define INCLUDE_ALGORITHM
-#define INCLUDE_FUNCTIONAL
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -1520,8 +1519,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared 
*shared,
  = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
 }
 
-  gcc_assert (!loop->aux);
-  loop->aux = loop_vinfo;
   return loop_vinfo;
 }
 
@@ -2209,7 +2206,7 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info 
loop_vinfo,
for it.  The different analyses will record information in the
loop_vec_info struct.  */
 static opt_result
-vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
+vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
 {
   opt_result ok = opt_result::success ();
   int res;
@@ -2244,7 +2241,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal, unsigned *n_stmts)
   opt_result res
= vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
 &LOOP_VINFO_DATAREFS (loop_vinfo),
-n_stmts);
+&LOOP_VINFO_N_STMTS (loop_vinfo));
   if (!res)
{
  if (dump_enabled_p ())
@@ -2341,7 +2338,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
&fatal, unsigned *n_stmts)
   poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 
   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
-  ok = vect_analyze_slp (loop_vinfo, *n_stmts);
+  ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
   if (!ok)
 return ok;
 
@@ -2641,6 +2638,7 @@ start_over:
LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
 
   /* Ok to vectorize!  */
+  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
   return opt_result::success ();
 
 again:
@@ -2891,46 +2889,70 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
   return true;
 }
 
-/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is
-   not NULL.  Process the analyzed loop with PROCESS even if analysis
-   failed.  Sets *N_STMTS and FATAL according to the analysis.
+/* Analyze LOOP with VECTOR_MODES[

[PATCH] AArch64: Fix PR103085

2021-11-05 Thread Wilco Dijkstra via Gcc-patches
The stack protector implementation hides symbols in a const unspec, which means
movdi/movsi patterns must always support const on symbol operands and explicitly
strip away the unspec. Do this for the recently added GOT alternatives. Add a
test to ensure stack-protector tests GOT accesses as well.

Passes bootstrap and regress. OK for commit?

2021-11-05  Wilco Dijkstra  

PR target/103085
* config/aarch64/aarch64.c (aarch64_mov_operand_p): Strip the salt 
first.
* config/aarch64/constraints.md: Support const in Usw.

* gcc.target/aarch64/pr103085.c: New test
---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
430ea036a7be4da842fd08998923a3462457dbfd..39de231d8ac6d10362cdd2b48eb9bd9de60c6703
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20155,12 +20155,14 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
   return aarch64_simd_valid_immediate (x, NULL);
 }
 
+  /* Remove UNSPEC_SALT_ADDR before checking symbol reference.  */
+  x = strip_salt (x);
+
   /* GOT accesses are valid moves.  */
   if (SYMBOL_REF_P (x)
   && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
 return true;
 
-  x = strip_salt (x);
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
 return true;
 
diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index 
70ca66070217d06f974b18f7690326bc329fed89..da5438e7ce254272eb10e9ac60d5367c46635e84
 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -152,10 +152,11 @@ (define_constraint "Usa"
(match_test "aarch64_symbolic_address_p (op)")
(match_test "aarch64_mov_operand_p (op, GET_MODE (op))")))
 
+;; const is needed here to support UNSPEC_SALT_ADDR.
 (define_constraint "Usw"
   "@internal
A constraint that matches a small GOT access."
-  (and (match_code "symbol_ref")
+  (and (match_code "const,symbol_ref")
(match_test "aarch64_classify_symbolic_expression (op)
 == SYMBOL_SMALL_GOT_4G")))
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr103085.c 
b/gcc/testsuite/gcc.target/aarch64/pr103085.c
new file mode 100644
index 
..dbc9c15b71f224b3c7dec0cca5655a31adc207f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr103085.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-protector-strong -fPIC" } */
+
+void g(int*);
+void
+f (int x)
+{
+  int arr[10];
+  g (arr);
+}
+


[PATCH] Amend split vector loop analysis into main and epilogue analysis

2021-11-05 Thread Richard Biener via Gcc-patches
I forgot to commit the changes done as response to Richards review
before committing.

Will push after bootstrap.

2021-11-05  Richard Biener  

* tree-vect-loop.c (vect_analyze_loop): Remove obsolete
comment and expand on another one.  Combine nested if.
---
 gcc/tree-vect-loop.c | 48 +---
 1 file changed, 10 insertions(+), 38 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index abf87f99d6d..b56b7a4a386 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2998,35 +2998,6 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
*shared)
   return opt_loop_vec_info::propagate_failure (res);
 }
 
-  /* When pick_lowest_cost_p is true, we should in principle iterate
- over all the loop_vec_infos that LOOP_VINFO could replace and
- try to vectorize LOOP_VINFO under the same conditions.
- E.g. when trying to replace an epilogue loop, we should vectorize
- LOOP_VINFO as an epilogue loop with the same VF limit.  When trying
- to replace the main loop, we should vectorize LOOP_VINFO as a main
- loop too.
-
- However, autovectorize_vector_modes is usually sorted as follows:
-
- - Modes that naturally produce lower VFs usually follow modes that
- naturally produce higher VFs.
-
- - When modes naturally produce the same VF, maskable modes
- usually follow unmaskable ones, so that the maskable mode
- can be used to vectorize the epilogue of the unmaskable mode.
-
- This order is preferred because it leads to the maximum
- epilogue vectorization opportunities.  Targets should only use
- a different order if they want to make wide modes available while
- disparaging them relative to earlier, smaller modes.  The assumption
- in that case is that the wider modes are more expensive in some
- way that isn't reflected directly in the costs.
-
- There should therefore be few interesting cases in which
- LOOP_VINFO fails when treated as an epilogue loop, succeeds when
- treated as a standalone loop, and ends up being genuinely cheaper
- than FIRST_LOOP_VINFO.  */
-
   auto_vector_modes vector_modes;
   /* Autodetect first vector size we try.  */
   vector_modes.safe_push (VOIDmode);
@@ -3042,7 +3013,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
*shared)
   unsigned int first_loop_next_i = 0;
   unsigned HOST_WIDE_INT simdlen = loop->simdlen;
 
-  /* First determine the main loop vectorization mode.  */
+  /* First determine the main loop vectorization mode, either the first
+ one that works, starting with auto-detecting the vector mode and then
+ following the targets order of preference, or the one with the
+ lowest cost if pick_lowest_cost_p.  */
   while (1)
 {
   unsigned int loop_vinfo_i = mode_i;
@@ -3065,15 +3039,13 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
*shared)
  first_loop_vinfo = opt_loop_vec_info::success (NULL);
  simdlen = 0;
}
- else if (pick_lowest_cost_p && first_loop_vinfo)
+ else if (pick_lowest_cost_p
+  && first_loop_vinfo
+  && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
{
- /* Keep trying to roll back vectorization attempts while the
-loop_vec_infos they produced were worse than this one.  */
- if (vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
-   {
- delete first_loop_vinfo;
- first_loop_vinfo = opt_loop_vec_info::success (NULL);
-   }
+ /* Pick loop_vinfo over first_loop_vinfo.  */
+ delete first_loop_vinfo;
+ first_loop_vinfo = opt_loop_vec_info::success (NULL);
}
  if (first_loop_vinfo == NULL)
{
-- 
2.31.1


Re: [PATCH] gcc: vx-common.h: fix test for VxWorks7

2021-11-05 Thread Olivier Hainque via Gcc-patches



> On 5 Nov 2021, at 09:48, Rasmus Villemoes  wrote:
> Applied to master and pushed - hope I've done it right.

AFAICS, yes.

> How about the gcc-11 branch, can it be applied there as well,

Yes, I think so. The builds you do used to work before
the change that introduced the ifdef, IIUC, and the adjustment
you propose is close to being in the "obvious" category.

Very small, clearly vxworks only and in line with
every other use of that macro.

> and if so,
> should I do a "git cherry-pick -x" and push it to that branch?

> From
> looking at the git history it seems to be the way things are done.

That's my understanding as well.

Thanks,

Olivier



Re: [PATCH] gcc: vx-common.h: fix test for VxWorks7

2021-11-05 Thread Rasmus Villemoes via Gcc-patches
On 05/11/2021 15.08, Olivier Hainque wrote:
> 
> 
>> On 5 Nov 2021, at 09:48, Rasmus Villemoes  wrote:
>> Applied to master and pushed - hope I've done it right.
> 
> AFAICS, yes.
> 
>> How about the gcc-11 branch, can it be applied there as well,
> 
> Yes, I think so. The builds you do used to work before
> the change that introduced the ifdef, 

Well, apart from all the other fixups, some of which are not
upstreamable, that I also need to apply :)

IIUC, and the adjustment
> you propose is close to being in the "obvious" category.

Indeed. I'll cherry-pick it into releases/gcc-11.

Have you had a chance to look at the other four patches I've sent
recently? They are also vxworks-only, and shouldn't be very controversial.

Rasmus


Re: [PATCH 3/N] Come up with casm global state.

2021-11-05 Thread Martin Liška

On 10/26/21 09:45, Richard Biener wrote:

On Mon, Oct 25, 2021 at 6:32 PM Segher Boessenkool
 wrote:


Hi!

On Mon, Oct 25, 2021 at 03:36:25PM +0200, Martin Liška wrote:

--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -189,4 +189,13 @@ extern bool rs6000_passes_vector;
  extern bool rs6000_returns_struct;
  extern bool cpu_builtin_p;

+struct rs6000_asm_out_state : public asm_out_state
+{
+  /* Initialize ELF sections. */
+  void init_elf_sections ();
+
+  /* Initialize XCOFF sections. */
+  void init_xcoff_sections ();
+};


Our coding convention says to use "class", not "struct" (since this
isn't valid C code at all).


-  sdata2_section
+ sec.sdata2
  = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
  SDATA2_SECTION_ASM_OP);


(broken indentation)


+/* Implement TARGET_ASM_INIT_SECTIONS.  */


That comment is out-of-date.


+static asm_out_state *
+rs6000_elf_asm_init_sections (void)
+{
+  rs6000_asm_out_state *target_state
+= new (ggc_alloc ()) rs6000_asm_out_state ();


Hrm, maybe we can have a macro or function that does this, ggc_new or
something?


+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static asm_out_state *
+rs6000_xcoff_asm_init_sections (void)


Here, too.  Both implementations are each one of several functions that
together implement the target macro.


+/* The section that holds the DWARF2 frame unwind information, when known.
+   The section is set either by the target's init_sections hook or by the
+   first call to switch_to_eh_frame_section.  */
+section *eh_frame;
+
+/* RS6000 sections.  */


Nothing here?  Just remove the comment header?

The idea looks fine to me.


Yeah, of course then the target hook does not need to do the allocation
and we could simply keep the current init_sections hook but change it
to take the asm_out_state to initialize as argument.


Makes sense.



Note that I'd put

+/* RS6000 sections.  */
+
+/* ELF sections.  */
+section *toc;
+section *sdata2;
+
+/* XCOFF sections.  */
+section *read_only_data;
+section *private_data;
+section *tls_data;
+section *tls_private_data;
+section *read_only_private_data;

into a union, thus

union {
struct /* RS6000 sections */ {
/* ELF sections.  */
   section *toc;
...
} rs6000;
struct /* darwin sections */ {
  ...
};


Union is a bit tricky for GGC marking script, but we can manage that.



not sure whether we need some magic GTY marking here to make
it pick up the 'correct' set.  Another alternative would be

  section *target[MAX_TARGET_SECTIONS];

and #defines in the targets mapping the former global variables to
indices in that array.

All of this isn't "nice C++" of course, but well ... I'm not the one
to insist ;)


Anyway, I took a look at targets that do call the init_sections hook and I 
noticed
Darwin uses pretty many sections and comes up with an array that is defined 
here:
./gcc/config/darwin-sections.def

I tend to creating all sections in asm_out_state with DEF_SECTION, where the 
list
will be extensible with a target-specific definition list.

What do you think Richi?

Thanks,
Martin



Richard.



Segher




[PATCH] gcov-profile: Filter test only for some targets [PR102945]

2021-11-05 Thread Martin Liška

Pushed.

Martin

PR gcov-profile/102945

gcc/testsuite/ChangeLog:

* gcc.dg/gcov-info-to-gcda.c: Filter supported targets.
---
 gcc/testsuite/gcc.dg/gcov-info-to-gcda.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/gcov-info-to-gcda.c 
b/gcc/testsuite/gcc.dg/gcov-info-to-gcda.c
index a42a768326c..4583360feef 100644
--- a/gcc/testsuite/gcc.dg/gcov-info-to-gcda.c
+++ b/gcc/testsuite/gcc.dg/gcov-info-to-gcda.c
@@ -1,5 +1,4 @@
-/* { dg-do run } */
-/* { dg-skip-if "profile-info-section" { powerpc-ibm-aix* } } */
+/* { dg-do run { target *-*-linux* *-*-gnu* } } */
 /* { dg-options "-fprofile-arcs -fprofile-info-section" } */
 
 #define assert(expr)\

--
2.33.1



[PATCH] libsanitizer: remove -pedantic option

2021-11-05 Thread Martin Liška

The code uses intentionally braced-groups within expressions:

({\
  uptr pc;\
  asm("lea 0(%%rip), %0" : "=r"(pc)); \
  pc; \
})

And we emit gazillion of warnings now:

/home/marxin/Programming/gcc/libsanitizer/tsan/tsan_interceptors_posix.cpp: In 
function ‘int sigaction_impl(int, const __sanitizer::__sanitizer_sigaction*, 
__sanitizer::__sanitizer_sigaction*)’:
/home/marxin/Programming/gcc/libsanitizer/sanitizer_common/sanitizer_stacktrace.h:212:5:
 warning: ISO C++ forbids braced-groups within expressions [-Wpedantic]
  212 | ({\
  | ^
/home/marxin/Programming/gcc/libsanitizer/tsan/tsan_interceptors.h:44:26: note: 
in expansion of macro ‘GET_CURRENT_PC’
   44 |   UNUSED const uptr pc = GET_CURRENT_PC();
  |  ^~

Ready to be installed?
Thanks,
Martin

libsanitizer/ChangeLog:

* asan/Makefile.am: Remove -pedantic option.
* asan/Makefile.in: Likewise.
* hwasan/Makefile.am: Likewise.
* hwasan/Makefile.in: Likewise.
* interception/Makefile.am: Likewise.
* interception/Makefile.in: Likewise.
* lsan/Makefile.am: Likewise.
* lsan/Makefile.in: Likewise.
* sanitizer_common/Makefile.am: Likewise.
* sanitizer_common/Makefile.in: Likewise.
* tsan/Makefile.am: Likewise.
* tsan/Makefile.in: Likewise.
* ubsan/Makefile.am: Likewise.
* ubsan/Makefile.in: Likewise.
---
 libsanitizer/asan/Makefile.am | 2 +-
 libsanitizer/asan/Makefile.in | 2 +-
 libsanitizer/hwasan/Makefile.am   | 2 +-
 libsanitizer/hwasan/Makefile.in   | 2 +-
 libsanitizer/interception/Makefile.am | 2 +-
 libsanitizer/interception/Makefile.in | 2 +-
 libsanitizer/lsan/Makefile.am | 2 +-
 libsanitizer/lsan/Makefile.in | 2 +-
 libsanitizer/sanitizer_common/Makefile.am | 2 +-
 libsanitizer/sanitizer_common/Makefile.in | 2 +-
 libsanitizer/tsan/Makefile.am | 2 +-
 libsanitizer/tsan/Makefile.in | 2 +-
 libsanitizer/ubsan/Makefile.am| 2 +-
 libsanitizer/ubsan/Makefile.in| 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/libsanitizer/asan/Makefile.am b/libsanitizer/asan/Makefile.am
index 4f802f723d6..7270116cf71 100644
--- a/libsanitizer/asan/Makefile.am
+++ b/libsanitizer/asan/Makefile.am
@@ -7,7 +7,7 @@ DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS 
-D__STDC_FORMAT_MACROS -D
 if USING_MAC_INTERPOSE
 DEFS += -DMAC_INTERPOSE_FUNCTIONS -DMISSING_BLOCKS_SUPPORT
 endif
-AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic 
-Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fno-rtti 
-fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros 
-fno-ipa-icf
+AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -Wno-long-long  
-fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer 
-funwind-tables -fvisibility=hidden -Wno-variadic-macros -fno-ipa-icf
 AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
 AM_CXXFLAGS += -std=gnu++14
 AM_CXXFLAGS += $(EXTRA_CXXFLAGS)
diff --git a/libsanitizer/asan/Makefile.in b/libsanitizer/asan/Makefile.in
index 528ab61312c..26971051b82 100644
--- a/libsanitizer/asan/Makefile.in
+++ b/libsanitizer/asan/Makefile.in
@@ -416,7 +416,7 @@ AM_CPPFLAGS = -I $(top_srcdir)/include -I $(top_srcdir)
 
 # May be used by toolexeclibdir.

 gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
-AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic \
+AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings \
-Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti \
-fomit-frame-pointer -funwind-tables -fvisibility=hidden \
-Wno-variadic-macros -fno-ipa-icf \
diff --git a/libsanitizer/hwasan/Makefile.am b/libsanitizer/hwasan/Makefile.am
index e12c0a0ce71..9fd39953789 100644
--- a/libsanitizer/hwasan/Makefile.am
+++ b/libsanitizer/hwasan/Makefile.am
@@ -4,7 +4,7 @@ AM_CPPFLAGS = -I $(top_srcdir)/include -I $(top_srcdir)
 gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
 
 DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DCAN_SANITIZE_UB=0 -DHWASAN_WITH_INTERCEPTORS=1

-AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic 
-Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fno-rtti -funwind-tables 
-fvisibility=hidden -Wno-variadic-macros -fno-ipa-icf
+AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -Wno-long-long  
-fPIC -fno-builtin -fno-exceptions -fno-rtti -funwind-tables 
-fvisibility=hidden -Wno-variadic-macros -fno-ipa-icf
 AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
 AM_CXXFLAGS += -std=gnu++14
 AM_CXXFLAGS += $(EXTRA_CXXFLAGS)
diff --git a/libsanitizer/hwasan/M

[PATCH] Cleanup back_threader::find_path_to_names.

2021-11-05 Thread Aldy Hernandez via Gcc-patches
The main path discovery function was due for a cleanup.  First,
there's a nagging goto and second, my bitmap use was sloppy.  Hopefully
this makes the code easier for others to read.

Regstrapped on x86-64 Linux.  I also made sure there were no difference
in the number of threads with this patch.

No functional changes.

OK?

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::find_paths_to_names):
Remove gotos and other cleanups.
---
 gcc/tree-ssa-threadbackward.c | 52 ++-
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index b7eaff94567..d6a5b0b8da2 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -402,26 +402,18 @@ back_threader::find_paths_to_names (basic_block bb, 
bitmap interesting)
 
   m_path.safe_push (bb);
 
+  // Try to resolve the path without looking back.
   if (m_path.length () > 1
-  && !m_profit.profitable_path_p (m_path, m_name, NULL))
+  && (!m_profit.profitable_path_p (m_path, m_name, NULL)
+ || maybe_register_path ()))
 {
   m_path.pop ();
   m_visited_bbs.remove (bb);
   return false;
 }
 
-  // Try to resolve the path without looking back.
-  if (m_path.length () > 1 && maybe_register_path ())
-{
-  m_path.pop ();
-  m_visited_bbs.remove (bb);
-  return true;
-}
-
   auto_bitmap processed;
-  unsigned i;
   bool done = false;
-
   // We use a worklist instead of iterating through the bitmap,
   // because we may add new items in-flight.
   auto_vec worklist (bitmap_count_bits (interesting));
@@ -433,34 +425,30 @@ back_threader::find_paths_to_names (basic_block bb, 
bitmap interesting)
   basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
 
   // Process any names defined in this block.
-  if (def_bb == bb)
+  if (def_bb == bb
+ && bitmap_set_bit (processed, i)
+ && resolve_def (name, interesting, worklist))
{
- bitmap_set_bit (processed, i);
-
- if (resolve_def (name, interesting, worklist))
-   {
- done = true;
- goto leave_bb;
-   }
+ done = true;
+ break;
}
 }
-
   // If there are interesting names not yet processed, keep looking.
-  bitmap_and_compl_into (interesting, processed);
-  if (!bitmap_empty_p (interesting))
+  if (!done)
 {
-  edge_iterator iter;
-  edge e;
-  FOR_EACH_EDGE (e, iter, bb->preds)
-   if ((e->flags & EDGE_ABNORMAL) == 0)
- done |= find_paths_to_names (e->src, interesting);
+  bitmap_and_compl_into (interesting, processed);
+  if (!bitmap_empty_p (interesting))
+   {
+ edge_iterator iter;
+ edge e;
+ FOR_EACH_EDGE (e, iter, bb->preds)
+   if ((e->flags & EDGE_ABNORMAL) == 0)
+ done |= find_paths_to_names (e->src, interesting);
+   }
 }
 
- leave_bb:
-  bitmap_iterator bi;
-  EXECUTE_IF_SET_IN_BITMAP (processed, 0, i, bi)
-bitmap_set_bit (interesting, i);
-
+  // Reset things to their original state.
+  bitmap_ior_into (interesting, processed);
   m_path.pop ();
   m_visited_bbs.remove (bb);
   return done;
-- 
2.31.1



Re: [PATCH] AArch64: Fix PR103085

2021-11-05 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> The stack protector implementation hides symbols in a const unspec, which 
> means
> movdi/movsi patterns must always support const on symbol operands and 
> explicitly
> strip away the unspec. Do this for the recently added GOT alternatives. Add a
> test to ensure stack-protector tests GOT accesses as well.
>
> Passes bootstrap and regress. OK for commit?
>
> 2021-11-05  Wilco Dijkstra  
>
> PR target/103085
> * config/aarch64/aarch64.c (aarch64_mov_operand_p): Strip the salt 
> first.
> * config/aarch64/constraints.md: Support const in Usw.
>
> * gcc.target/aarch64/pr103085.c: New test

OK, thanks.

Richard

> ---
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 430ea036a7be4da842fd08998923a3462457dbfd..39de231d8ac6d10362cdd2b48eb9bd9de60c6703
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -20155,12 +20155,14 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
>return aarch64_simd_valid_immediate (x, NULL);
>  }
>
> +  /* Remove UNSPEC_SALT_ADDR before checking symbol reference.  */
> +  x = strip_salt (x);
> +
>/* GOT accesses are valid moves.  */
>if (SYMBOL_REF_P (x)
>&& aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
>  return true;
>
> -  x = strip_salt (x);
>if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
>  return true;
>
> diff --git a/gcc/config/aarch64/constraints.md 
> b/gcc/config/aarch64/constraints.md
> index 
> 70ca66070217d06f974b18f7690326bc329fed89..da5438e7ce254272eb10e9ac60d5367c46635e84
>  100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -152,10 +152,11 @@ (define_constraint "Usa"
> (match_test "aarch64_symbolic_address_p (op)")
> (match_test "aarch64_mov_operand_p (op, GET_MODE (op))")))
>
> +;; const is needed here to support UNSPEC_SALT_ADDR.
>  (define_constraint "Usw"
>"@internal
> A constraint that matches a small GOT access."
> -  (and (match_code "symbol_ref")
> +  (and (match_code "const,symbol_ref")
> (match_test "aarch64_classify_symbolic_expression (op)
>  == SYMBOL_SMALL_GOT_4G")))
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr103085.c 
> b/gcc/testsuite/gcc.target/aarch64/pr103085.c
> new file mode 100644
> index 
> ..dbc9c15b71f224b3c7dec0cca5655a31adc207f6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr103085.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fstack-protector-strong -fPIC" } */
> +
> +void g(int*);
> +void
> +f (int x)
> +{
> +  int arr[10];
> +  g (arr);
> +}
> +


Re: [PATCH] gcc: vx-common.h: fix test for VxWorks7

2021-11-05 Thread Olivier Hainque via Gcc-patches



> On 5 Nov 2021, at 15:12, Rasmus Villemoes  wrote:
> 
>> Yes, I think so. The builds you do used to work before
>> the change that introduced the ifdef, 
> 
> Well, apart from all the other fixups, some of which are not
> upstreamable, that I also need to apply :)

Sure. My comment was only meant as positioning wrt
the branch commit policy.

The issue you address can be seen as triggering a regression on
its own, regardless of other changes possibly causing issues as well,
so a safe fix for it is eligible to the branch.

> IIUC, and the adjustment
>> you propose is close to being in the "obvious" category.
> 
> Indeed. I'll cherry-pick it into releases/gcc-11.
> 
> Have you had a chance to look at the other four patches I've sent
> recently? They are also vxworks-only, and shouldn't be very controversial.

Not yet, but I'm planning to get there RSN.

I have been in delivery preparation mode on another project this week
and we had to finalize a move to gcc-11 for non vxworks ports we have
in-house first.

VxWorks ports are next in line, starting next week. We have quite a
few changes to push (wrt shared libs in particular), and I'll take the
opportunity to incorporate your changes in my local testing cycles
(pretty heavy for vx7, and some for vx6).

We happen to also have a few fixincludes hunks around. Some of
them have been there for years now and I thought would be nice to
propagate at some point.

Do you use it?





[PATCH] Darwin, Arm64 : Initial support for the self-host driver.

2021-11-05 Thread Iain Sandoe via Gcc-patches
This allows people to host a c-family/fortran GCC cross-compiler on
aarch64-apple-darwin (support for Ada will follow in a separate patch).

At present, there is no special action needed for aarch64-darwin;
this just pulls in generic Darwin code.

Tested on aarch64-darwin20,
OK for master?
thanks,
Iain

---
 gcc/config.host  |  7 -
 gcc/config/aarch64/host-aarch64-darwin.c | 33 
 gcc/config/aarch64/x-darwin  |  3 +++
 3 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 gcc/config/aarch64/host-aarch64-darwin.c
 create mode 100644 gcc/config/aarch64/x-darwin

diff --git a/gcc/config.host b/gcc/config.host
index 0a02c33cc80..37f9c719b68 100644
--- a/gcc/config.host
+++ b/gcc/config.host
@@ -99,7 +99,8 @@ case ${host} in
 esac
 
 case ${host} in
-  aarch64*-*-freebsd* | aarch64*-*-linux* | aarch64*-*-fuchsia*)
+  aarch64*-*-freebsd* | aarch64*-*-linux* | aarch64*-*-fuchsia* |\
+  aarch64*-*-darwin* | arm64*-*-darwin*)
 case ${target} in
   aarch64*-*-*)
host_extra_gcc_objs="driver-aarch64.o"
@@ -251,6 +252,10 @@ case ${host} in
 host_extra_gcc_objs="${host_extra_gcc_objs} driver-mingw32.o"
 host_lto_plugin_soname=liblto_plugin.dll
 ;;
+  aarch64*-*-darwin* | arm64*-*-darwin*)
+out_host_hook_obj="${out_host_hook_obj} host-aarch64-darwin.o"
+host_xmake_file="${host_xmake_file} aarch64/x-darwin"
+;;
   i[34567]86-*-darwin* | x86_64-*-darwin*)
 out_host_hook_obj="${out_host_hook_obj} host-i386-darwin.o"
 host_xmake_file="${host_xmake_file} i386/x-darwin"
diff --git a/gcc/config/aarch64/host-aarch64-darwin.c 
b/gcc/config/aarch64/host-aarch64-darwin.c
new file mode 100644
index 000..d70f2df3bf1
--- /dev/null
+++ b/gcc/config/aarch64/host-aarch64-darwin.c
@@ -0,0 +1,33 @@
+/* aarch64/arm64-darwin host-specific hook definitions.
+
+Copyright The GNU Toolchain Authors.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for arm64/aarch64 hosts; this file
+   exists just to include the generic config/host-darwin.h.  */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/aarch64/x-darwin b/gcc/config/aarch64/x-darwin
new file mode 100644
index 000..6d788d5e89c
--- /dev/null
+++ b/gcc/config/aarch64/x-darwin
@@ -0,0 +1,3 @@
+host-aarch64-darwin.o : $(srcdir)/config/aarch64/host-aarch64-darwin.c
+   $(COMPILE) $<
+   $(POSTCOMPILE)
-- 
2.24.3 (Apple Git-128)



[committed] hppa: Move PREFERRED_DEBUGGING_TYPE define in pa64-hpux.h to pa.h

2021-11-05 Thread John David Anglin

The D language build on hppa64 does not include pa64-hpux.h.  It only includes 
pa.h.  As
a result PREFERRED_DEBUGGING_TYPE was not defined.  This caused a build error 
when defaults.h
was included.

The include issue might affect other defines but so far I haven't noticed any 
problems.

Tested on hppa64-hp-hpux11.11.

Committed to trunk.

Dave
---
Move PREFERRED_DEBUGGING_TYPE define in pa64-hpux.h to pa.h

This fixes D language build on hppa64-hpux11.

2021-11-05  John David Anglin  

gcc/ChangeLog:

* config/pa/pa.h (PREFERRED_DEBUGGING_TYPE): Define to DWARF2_DEBUG.
* config/pa/pa64-hpux.h (PREFERRED_DEBUGGING_TYPE): Remove define.

diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 7a313d617b0..96815ec69cb 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -136,6 +136,9 @@ extern unsigned long total_code_bytes;
by default.  */
 #define DEFAULT_GDB_EXTENSIONS 1

+/* Select dwarf2 as the preferred debug format.  */
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
 /* This used to be zero (no max length), but big enums and such can
cause huge strings which killed gas.

diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
index c25bc38ba64..3ee97a418a2 100644
--- a/gcc/config/pa/pa64-hpux.h
+++ b/gcc/config/pa/pa64-hpux.h
@@ -266,7 +266,6 @@ do {
\
 /* It looks like DWARF2 will be the easiest debug format to handle on this
platform.  */
 #define DWARF2_DEBUGGING_INFO 1
-#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG

 /* This target uses the ELF object file format.  */
 #define OBJECT_FORMAT_ELF


Re: [PATCH] libsanitizer: remove -pedantic option

2021-11-05 Thread H.J. Lu via Gcc-patches
On Fri, Nov 5, 2021 at 8:00 AM Martin Liška  wrote:
>
> The code uses intentionally braced-groups within expressions:
>
>  ({\

Should we add __extension__ here?

>uptr pc;\
>asm("lea 0(%%rip), %0" : "=r"(pc)); \
>pc; \
>  })
>
> And we emit gazillion of warnings now:
>
> /home/marxin/Programming/gcc/libsanitizer/tsan/tsan_interceptors_posix.cpp: 
> In function ‘int sigaction_impl(int, const 
> __sanitizer::__sanitizer_sigaction*, __sanitizer::__sanitizer_sigaction*)’:
> /home/marxin/Programming/gcc/libsanitizer/sanitizer_common/sanitizer_stacktrace.h:212:5:
>  warning: ISO C++ forbids braced-groups within expressions [-Wpedantic]
>212 | ({\
>| ^
> /home/marxin/Programming/gcc/libsanitizer/tsan/tsan_interceptors.h:44:26: 
> note: in expansion of macro ‘GET_CURRENT_PC’
> 44 |   UNUSED const uptr pc = GET_CURRENT_PC();
>|  ^~
>
> Ready to be installed?
> Thanks,
> Martin
>
> libsanitizer/ChangeLog:
>
> * asan/Makefile.am: Remove -pedantic option.
> * asan/Makefile.in: Likewise.
> * hwasan/Makefile.am: Likewise.
> * hwasan/Makefile.in: Likewise.
> * interception/Makefile.am: Likewise.
> * interception/Makefile.in: Likewise.
> * lsan/Makefile.am: Likewise.
> * lsan/Makefile.in: Likewise.
> * sanitizer_common/Makefile.am: Likewise.
> * sanitizer_common/Makefile.in: Likewise.
> * tsan/Makefile.am: Likewise.
> * tsan/Makefile.in: Likewise.
> * ubsan/Makefile.am: Likewise.
> * ubsan/Makefile.in: Likewise.
> ---
>   libsanitizer/asan/Makefile.am | 2 +-
>   libsanitizer/asan/Makefile.in | 2 +-
>   libsanitizer/hwasan/Makefile.am   | 2 +-
>   libsanitizer/hwasan/Makefile.in   | 2 +-
>   libsanitizer/interception/Makefile.am | 2 +-
>   libsanitizer/interception/Makefile.in | 2 +-
>   libsanitizer/lsan/Makefile.am | 2 +-
>   libsanitizer/lsan/Makefile.in | 2 +-
>   libsanitizer/sanitizer_common/Makefile.am | 2 +-
>   libsanitizer/sanitizer_common/Makefile.in | 2 +-
>   libsanitizer/tsan/Makefile.am | 2 +-
>   libsanitizer/tsan/Makefile.in | 2 +-
>   libsanitizer/ubsan/Makefile.am| 2 +-
>   libsanitizer/ubsan/Makefile.in| 2 +-
>   14 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/libsanitizer/asan/Makefile.am b/libsanitizer/asan/Makefile.am
> index 4f802f723d6..7270116cf71 100644
> --- a/libsanitizer/asan/Makefile.am
> +++ b/libsanitizer/asan/Makefile.am
> @@ -7,7 +7,7 @@ DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS 
> -D__STDC_FORMAT_MACROS -D
>   if USING_MAC_INTERPOSE
>   DEFS += -DMAC_INTERPOSE_FUNCTIONS -DMISSING_BLOCKS_SUPPORT
>   endif
> -AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic 
> -Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fno-rtti 
> -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros 
> -fno-ipa-icf
> +AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -Wno-long-long  
> -fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer 
> -funwind-tables -fvisibility=hidden -Wno-variadic-macros -fno-ipa-icf
>   AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
>   AM_CXXFLAGS += -std=gnu++14
>   AM_CXXFLAGS += $(EXTRA_CXXFLAGS)
> diff --git a/libsanitizer/asan/Makefile.in b/libsanitizer/asan/Makefile.in
> index 528ab61312c..26971051b82 100644
> --- a/libsanitizer/asan/Makefile.in
> +++ b/libsanitizer/asan/Makefile.in
> @@ -416,7 +416,7 @@ AM_CPPFLAGS = -I $(top_srcdir)/include -I $(top_srcdir)
>
>   # May be used by toolexeclibdir.
>   gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
> -AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic \
> +AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings \
> -Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti \
> -fomit-frame-pointer -funwind-tables -fvisibility=hidden \
> -Wno-variadic-macros -fno-ipa-icf \
> diff --git a/libsanitizer/hwasan/Makefile.am b/libsanitizer/hwasan/Makefile.am
> index e12c0a0ce71..9fd39953789 100644
> --- a/libsanitizer/hwasan/Makefile.am
> +++ b/libsanitizer/hwasan/Makefile.am
> @@ -4,7 +4,7 @@ AM_CPPFLAGS = -I $(top_srcdir)/include -I $(top_srcdir)
>   gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
>
>   DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS 
> -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DCAN_SANITIZE_UB=0 
> -DHWASAN_WITH_INTERCEPTORS=1
> -AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic 
> -Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fno-rtti -funwind-tables 
> -fvisibility=hidden -Wno-variadic-macros -fno-ipa-icf
> +AM_CXXFLAGS = -

Re: [PATCH 0/4] config: Allow a host to opt out of PCH.

2021-11-05 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 05, 2021 at 11:31:58AM +0100, Richard Biener wrote:
> On Fri, Nov 5, 2021 at 10:54 AM Jakub Jelinek  wrote:
> >
> > On Fri, Nov 05, 2021 at 10:42:05AM +0100, Richard Biener via Gcc-patches 
> > wrote:
> > > I had the impression we have support for PCH file relocation to deal with 
> > > ASLR
> > > at least on some platforms.
> >
> > Unfortunately we do not, e.g. if you build cc1/cc1plus as PIE on
> > x86_64-linux, PCH will stop working unless one always invokes it with
> > disabled ASLR through personality.
> >
> > I think this is related to function pointers and pointers to .rodata/.data
> > etc. variables in GC memory, we currently do not relocate that.
> >
> > What we perhaps could do is (at least assuming all the ELF PT_LOAD segments
> > are adjacent with a single load base for them - I think at least ia64
> > non-PIE binaries were violating this by having .text and .data PT_LOAD
> > segments many terrabytes appart with a whole in between not protected in any
> > way, but dunno if that is for PIEs too), perhaps try in a host
> > specific way remember the address range in which the function pointers and
> > .rodata/.data can exist, remember the extent start and end from PCH 
> > generation
> > and on PCH load query those addresses for the current compiler and relocate
> > everything in that extent by the load bias from the last run.
> > But, the assumption for this is that those function and data/rodata pointers
> > in GC memory are actually marked at least as pointers...
> 
> If any such pointers exist they must be marked GTY((skip)) since they do not
> point to GC memory...  So we'd need to invent special-handling for those.
> 
> > Do we e.g. have objects with virtual classes in GC memory and if so, do we
> > catch their virtual table pointers?
> 
> Who knows, but then I don't remember adding stuff that should end in a PCH.

So, I've investigated a little bit.
Apparently all the relocation we currently do for PCH is done at PCH write
time, we choose some address range in the address space we think will be likely
mmappable each time successfully, relocate all pointers pointing to GC
memory to point in there and then write that to file, together with the
scalar GTY global vars values and GTY pointers in global vars.
On PCH load, we just try to mmap memory in the right range, fail PCH load if
unsuccessful, and read the GC memory into that range and update scalar and
pointer GTY global vars from what we've recorded.
Patch that made PCH load to fail for PIEs etc. was
https://gcc.gnu.org/legacy-ml/gcc-patches/2003-10/msg01994.html
If we wanted to relocate pointers to functions and .data/.rodata etc.,
ideally we'd create a relocation list of addresses that should be
incremented by the bias and quickly relocate those.

I wrote following ugly hack:

--- ggc-common.c.jj 2021-08-19 11:42:27.365422400 +0200
+++ ggc-common.c2021-11-05 15:37:51.447222544 +0100
@@ -404,6 +404,9 @@ struct mmap_info
 
 /* Write out the state of the compiler to F.  */
 
+char *exestart = (char *) 2;
+char *exeend = (char *) 2;
+
 void
 gt_pch_save (FILE *f)
 {
@@ -458,6 +461,14 @@ gt_pch_save (FILE *f)
 for (rti = *rt; rti->base != NULL; rti++)
   if (fwrite (rti->base, rti->stride, 1, f) != 1)
fatal_error (input_location, "cannot write PCH file: %m");
+  else if uintptr_t) rti->base) & (sizeof (void *) - 1)) == 0)
+{
+  char *const *p = (char *const *) rti->base;
+  char *const *q = (char *const *) ((uintptr_t) rti->base + 
(rti->stride & ~(sizeof (void *) - 1)));
+  for (; p < q; p++)
+   if (*p >= exestart && *p < exeend)
+ fprintf (stderr, "scalar at %p points to executable %p\n", (void 
*) p, (void *) *p);
+}
 
   /* Write out all the global pointers, after translation.  */
   write_pch_globals (gt_ggc_rtab, &state);
@@ -546,6 +557,15 @@ gt_pch_save (FILE *f)
   state.ptrs[i]->note_ptr_fn (state.ptrs[i]->obj,
  state.ptrs[i]->note_ptr_cookie,
  relocate_ptrs, &state);
+  if uintptr_t) state.ptrs[i]->obj) & (sizeof (void *) - 1)) == 0)
+{
+  char *const *p = (char *const *) (state.ptrs[i]->obj);
+  char *const *q = (char *const *) ((uintptr_t) (state.ptrs[i]->obj) + 
(state.ptrs[i]->size & ~(sizeof (void *) - 1)));
+  for (; p < q; p++)
+   if (*p >= exestart && *p < exeend)
+ fprintf (stderr, "object %p at %p points to executable %p\n", 
(void *) (state.ptrs[i]->obj), (void *) p, (void *) *p);
+}
+
   ggc_pch_write_object (state.d, state.f, state.ptrs[i]->obj,
state.ptrs[i]->new_addr, state.ptrs[i]->size,
state.ptrs[i]->note_ptr_fn == gt_pch_p_S);

and under debugger set exestart and exeend from /proc/*/maps of the cc1plus
process being debugged (the extent of cc1plus mappings).
This resulted in something like:
scalar at 0x3d869a8 p

Re: [PATCH] libsanitizer: remove -pedantic option

2021-11-05 Thread Martin Liška

On 11/5/21 16:22, H.J. Lu wrote:

Should we add __extension__ here?


I tried doing that but it didn't help me with the warning.
Maybe I did something wrong?

Cheers,
Martin


Re: [PATCH] libsanitizer: remove -pedantic option

2021-11-05 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 05, 2021 at 04:25:53PM +0100, Martin Liška wrote:
> On 11/5/21 16:22, H.J. Lu wrote:
> > Should we add __extension__ here?
> 
> I tried doing that but it didn't help me with the warning.
> Maybe I did something wrong?

Works for me just fine say on:
void foo ()
{
  int a = ({ int d = 1; d; });
  int b = __extension__ ({ int d = 1; d; });
}
-Wpedantic warning on line 3, none on line 4.  Add -D__extension__=
and it warns on both.

Jakub



Re: [PATCH] libsanitizer: remove -pedantic option

2021-11-05 Thread H.J. Lu via Gcc-patches
On Fri, Nov 5, 2021 at 8:25 AM Martin Liška  wrote:
>
> On 11/5/21 16:22, H.J. Lu wrote:
> > Should we add __extension__ here?
>
> I tried doing that but it didn't help me with the warning.
> Maybe I did something wrong?

[hjl@gnu-cfl-2 tmp]$ cat y.cc
#include 

#define uptr uintptr_t

#  define GET_CURRENT_PC()\
   (__extension__ ({  \
  uptr pc;\
  asm("lea 0(%%rip), %0" : "=r"(pc)); \
  pc; \
}))

uptr
foo (void)
{
  return GET_CURRENT_PC ();
}
[hjl@gnu-cfl-2 tmp]$ gcc -S -O2 y.cc -pedantic
[hjl@gnu-cfl-2 tmp]$


-- 
H.J.


Re: [PATCH 1/7] ifcvt: Check if cmovs are needed.

2021-11-05 Thread Richard Sandiford via Gcc-patches
Robin Dapp  writes:
> Hi Richard,
>
> after giving it a second thought, and seeing that most of the changes to 
> existing code are not strictly necessary anymore, I figured it could be 
> easier not changing the current control flow too much like in the 
> attached patch.
>
> The changes remaining are to "outsource" the maybe_expand_insn part and 
> making the emit_conditional_move with full comparison and rev_comparsion 
> externally available.
>
> I suppose straightening of the arguably somewhat baroque parts, we can 
> defer to a separate patch.
>
> On s390 this works nicely but I haven't yet done a bootstrap on other archs.
>
> Regards
>   Robin
>
> commit eb50384ee0cdeeefa61ae89bdbb2875500b7ce60
> Author: Robin Dapp 
> Date:   Wed Nov 27 13:53:40 2019 +0100
>
> ifcvt/optabs: Allow using a CC comparison for emit_conditional_move.
> 
> Currently we only ever call emit_conditional_move with the comparison
> (as well as its comparands) we got from the jump.  Thus, backends are
> going to emit a CC comparison for every conditional move that is being
> generated instead of re-using the existing CC.
> This, combined with emitting temporaries for each conditional move,
> causes sky-high costs for conditional moves.
> 
> This patch allows to re-use a CC so the costing situation is improved a
> bit.

Sorry for the slow reply.

> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 6ae883cbdd4..f7765e60548 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -772,7 +772,7 @@ static int noce_try_addcc (struct noce_if_info *);
>  static int noce_try_store_flag_constants (struct noce_if_info *);
>  static int noce_try_store_flag_mask (struct noce_if_info *);
>  static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
> - rtx, rtx, rtx);
> + rtx, rtx, rtx, rtx = NULL, rtx = NULL);
>  static int noce_try_cmove (struct noce_if_info *);
>  static int noce_try_cmove_arith (struct noce_if_info *);
>  static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
> @@ -1711,7 +1711,8 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
>  
>  static rtx
>  noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
> -  rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue)
> +  rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue, rtx cc_cmp,
> +  rtx rev_cc_cmp)
>  {
>rtx target ATTRIBUTE_UNUSED;
>int unsignedp ATTRIBUTE_UNUSED;
> @@ -1743,23 +1744,30 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, 
> enum rtx_code code,
>end_sequence ();
>  }
>  
> -  /* Don't even try if the comparison operands are weird
> - except that the target supports cbranchcc4.  */
> -  if (! general_operand (cmp_a, GET_MODE (cmp_a))
> -  || ! general_operand (cmp_b, GET_MODE (cmp_b)))
> -{
> -  if (!have_cbranchcc4
> -   || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
> -   || cmp_b != const0_rtx)
> - return NULL_RTX;
> -}
> -
>unsignedp = (code == LTU || code == GEU
>  || code == LEU || code == GTU);
>  
> -  target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
> -   vtrue, vfalse, GET_MODE (x),
> -   unsignedp);
> +  if (cc_cmp != NULL_RTX && rev_cc_cmp != NULL_RTX)
> +target = emit_conditional_move (x, cc_cmp, rev_cc_cmp,
> + vtrue, vfalse, GET_MODE (x));
> +  else
> +{
> +  /* Don't even try if the comparison operands are weird
> +  except that the target supports cbranchcc4.  */
> +  if (! general_operand (cmp_a, GET_MODE (cmp_a))
> +   || ! general_operand (cmp_b, GET_MODE (cmp_b)))
> + {
> +   if (!have_cbranchcc4
> +   || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
> +   || cmp_b != const0_rtx)
> + return NULL_RTX;
> + }
> +
> +  target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
> +   vtrue, vfalse, GET_MODE (x),
> +   unsignedp);
> +}
> +

It's hard to judge this in isolation because it's not clear when
and how the new arguments are going to be used, but it seems OK
in principle.  Do you still want:

  /* If earliest == jump, try to build the cmove insn directly.
 This is helpful when combine has created some complex condition
 (like for alpha's cmovlbs) that we can't hope to regenerate
 through the normal interface.  */

  if (if_info->cond_earliest == if_info->jump)
{

to be used when cc_cmp and rev_cc_cmp are nonnull?

>if (target)
>  return target;
>  
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index 019bbb62882..25eecf29ed8 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -52,6 +52,9 @@ static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, 
> rtx *,
>  static rtx expand_unop_direct (machine_mode, optab, rtx, rtx, int);
> 

[PATCH] coroutines: Handle initial awaiters with non-void returns [PR 100127].

2021-11-05 Thread Iain Sandoe via Gcc-patches
The way in which a C++20 coroutine is specified discards any value
that might be returned from the initial or final await expressions.

This PR ICE was caused by an initial await expression with an
await_resume () returning a reference, the function rewrite code
was not set up to expect this.

Fixed by looking through any indirection present and by explicitly
discarding the value, if any, returned by await_resume().

It does not seem useful to make a diagnostic for this, since
the user could define a generic awaiter that usefully returns
values when used in a different position from the initial (or
final) await expressions.

tested on x86_64 darwin, linux,
OK for master and backports?
thanks
Iain

Signed-off-by: Iain Sandoe 

PR c++/100127

gcc/cp/ChangeLog:

* coroutines.cc (coro_rewrite_function_body): Handle initial
await expressions that try to produce a reference value.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr100127.C: New test.
---
 gcc/cp/coroutines.cc   |  9 ++-
 gcc/testsuite/g++.dg/coroutines/pr100127.C | 65 ++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/coroutines/pr100127.C

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 9017902e6fb..6db4b70f028 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -4211,9 +4211,16 @@ coro_rewrite_function_body (location_t fn_start, tree 
fnbody, tree orig,
{
  /* Build a compound expression that sets the
 initial-await-resume-called variable true and then calls the
-initial suspend expression await resume.  */
+initial suspend expression await resume.
+In the case that the user decides to make the initial await
+await_resume() return a value, we need to discard it and, it is
+a reference type, look past the indirection.  */
+ if (INDIRECT_REF_P (initial_await))
+   initial_await = TREE_OPERAND (initial_await, 0);
  tree vec = TREE_OPERAND (initial_await, 3);
  tree aw_r = TREE_VEC_ELT (vec, 2);
+ if (!VOID_TYPE_P (TREE_TYPE (aw_r)))
+   aw_r = build1 (CONVERT_EXPR, void_type_node, aw_r);
  tree update = build2 (MODIFY_EXPR, boolean_type_node, i_a_r_c,
boolean_true_node);
  aw_r = cp_build_compound_expr (update, aw_r, tf_warning_or_error);
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100127.C 
b/gcc/testsuite/g++.dg/coroutines/pr100127.C
new file mode 100644
index 000..374cd710077
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr100127.C
@@ -0,0 +1,65 @@
+#ifdef __clang__
+#include 
+namespace std {
+  using namespace std::experimental;
+}
+#else
+#include 
+#endif
+#include 
+
+struct future
+{
+using value_type = int;
+struct promise_type;
+using handle_type = std::coroutine_handle;
+
+handle_type _coroutine;
+
+future(handle_type h) : _coroutine{h} {}
+
+~future() noexcept{
+if (_coroutine) {
+_coroutine.destroy();
+}
+}
+
+value_type get() {
+auto ptr = _coroutine.promise()._value;
+return *ptr;
+}
+
+struct promise_type {
+std::optional _value = std::nullopt;
+
+future get_return_object() {
+return future{handle_type::from_promise(*this)};
+}
+void return_value(value_type val) {
+_value = static_cast(val);
+}
+auto initial_suspend() noexcept {
+class awaiter {
+std::optional & value;
+public:
+explicit awaiter(std::optional & val) noexcept : 
value{val} {}
+bool await_ready() noexcept { return value.has_value(); }
+void await_suspend(handle_type) noexcept { }
+value_type & await_resume() noexcept { return *value; }
+};
+
+return awaiter{_value};
+}
+std::suspend_always final_suspend() noexcept {
+return {};
+}
+//void return_void() {}
+void unhandled_exception() {}
+};
+};
+
+future create_future()
+{ co_return 2021; }
+
+int main()
+{ auto f = create_future(); }
-- 
2.24.3 (Apple Git-128)



[PATCH] coroutines: Pass lvalues to user-defined operator new [PR 100772].

2021-11-05 Thread Iain Sandoe via Gcc-patches
The wording of the standard has been clarified to be explicit that
the the parameters to any user-defined operator-new in the promise
class should be lvalues.

tested on x86_64 darwin, linux,
OK for master and backports?
thanks
Iain

Signed-off-by: Iain Sandoe 

PR c++/100772

gcc/cp/ChangeLog:

* coroutines.cc (morph_fn_to_coro): Convert function parms
from reference before constructing any operator-new args
list.

gcc/testsuite/ChangeLog:

* g++.dg/coroutines/pr100772-a.C: New test.
* g++.dg/coroutines/pr100772-b.C: New test.
---
 gcc/cp/coroutines.cc |  6 +-
 gcc/testsuite/g++.dg/coroutines/pr100772-a.C | 77 
 gcc/testsuite/g++.dg/coroutines/pr100772-b.C | 93 
 3 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/coroutines/pr100772-a.C
 create mode 100644 gcc/testsuite/g++.dg/coroutines/pr100772-b.C

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index 6db4b70f028..ab211201255 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -4602,8 +4602,8 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
If the lookup finds an allocation function in the scope of the promise
type, overload resolution is performed on a function call created by
assembling an argument list.  The first argument is the amount of space
-   requested, and has type std::size_t.  The succeeding arguments are
-   those of the original function.  */
+   requested, and has type std::size_t.  The lvalues p1...pn are the
+   succeeding arguments..  */
   vec *args = make_tree_vector ();
   vec_safe_push (args, resizeable); /* Space needed.  */
 
@@ -4623,6 +4623,8 @@ morph_fn_to_coro (tree orig, tree *resumer, tree 
*destroyer)
   tf_warning_or_error);
  vec_safe_push (args, this_ref);
}
+ else if (parm_i->rv_ref || parm_i->pt_ref)
+   vec_safe_push (args, convert_from_reference (arg));
  else
vec_safe_push (args, arg);
}
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-a.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
new file mode 100644
index 000..a325d384fc3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-a.C
@@ -0,0 +1,77 @@
+//  { dg-additional-options "-fsyntax-only " }
+#ifdef __clang__
+#include 
+namespace std {
+  using namespace std::experimental;
+}
+#else
+#include 
+#endif
+
+struct Task
+{
+struct promise_type
+{
+   void return_void() const noexcept {}
+
+   void* operator new(std::size_t, auto &&...args) noexcept
+   {
+static_assert(sizeof...(args) > 0);
+static_assert(sizeof...(args) == 2);
+
+   return nullptr;
+   }
+
+   void operator delete(void *, std::size_t) noexcept
+   {
+   }
+
+static Task get_return_object_on_allocation_failure() noexcept
+{
+return {};
+}
+
+Task get_return_object() noexcept
+{
+return Task{ *this };
+}
+
+std::suspend_always initial_suspend() noexcept
+{
+return {};
+}
+
+std::suspend_always final_suspend() noexcept
+{
+return {};
+}
+
+void unhandled_exception() noexcept {}
+};
+
+using promise_handle = std::coroutine_handle;
+
+Task() = default;
+Task(promise_type & promise) noexcept
+: m_handle{ promise_handle::from_promise(promise) }
+{}
+
+~Task()
+{
+if (m_handle.address()) { m_handle.destroy(); }
+}
+
+promise_handle m_handle{};
+};
+
+
+Task Foo(auto && ... args) noexcept
+{
+co_return;
+}
+
+int main()
+{
+int v;
+Foo(v, 2134);
+}
diff --git a/gcc/testsuite/g++.dg/coroutines/pr100772-b.C 
b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
new file mode 100644
index 000..6cdf8d1e529
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr100772-b.C
@@ -0,0 +1,93 @@
+#ifdef __clang__
+#include 
+namespace std {
+  using namespace std::experimental;
+}
+#else
+#include 
+#endif
+#include 
+#include 
+#include   // needed for abi::__cxa_demangle
+#include 
+
+std::shared_ptr cppDemangle(const char *abiName)
+{
+  int status;
+  char *ret = abi::__cxa_demangle(abiName, 0, 0, &status);  
+
+  /* NOTE: must free() the returned char when done with it! */
+  std::shared_ptr retval;
+  retval.reset( (char *)ret, [](char *mem) { if (mem) free((void*)mem); } );
+  return retval;
+}
+
+template 
+struct Id{};
+struct Task
+{
+  struct promise_type
+  {
+void return_void() const noexcept {}
+
+static void is_int (std::string x) {
+  if (x != "Id")
+   abort() ;
+}
+template 
+void* operator new(std::size_t len, Args ...args) noexcept
+  

[PATCH] coroutines, c++: Find lambda-ness from the ramp function [PR 96517].

2021-11-05 Thread Iain Sandoe via Gcc-patches
When we query is_capture_proxy(), and the scope of the var is one of
the two coroutine helpers, we need to look for the scope information
that pertains to the original function (represented by the ramp now).

We can look up the ramp function from either helper (in practice, the
only caller would be the actor) and if that lookup returns NULL, it
means that the coroutine component is the ramp already and handled by
the usual code path.

tested on x86_64-darwin, linux,
OK for master / backports ?
thanks
Iain

Signed-off-by: Iain Sandoe 

gcc/cp/ChangeLog:

PR c++/96517
* lambda.c (is_capture_proxy): When the scope of the var to
be tested is a coroutine helper, lookup the scope information
from the parent (ramp) function.

gcc/testsuite/ChangeLog:

PR c++/96517
* g++.dg/coroutines/pr96517.C: New test.
---
 gcc/cp/lambda.c   |  6 -
 gcc/testsuite/g++.dg/coroutines/pr96517.C | 29 +++
 3 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/coroutines/pr96517.C


diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index 2e9d38bbe83..c1556480e22 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -244,7 +244,11 @@ is_capture_proxy (tree decl)
  && !(DECL_ARTIFICIAL (decl)
   && DECL_LANG_SPECIFIC (decl)
   && DECL_OMP_PRIVATIZED_MEMBER (decl))
- && LAMBDA_FUNCTION_P (DECL_CONTEXT (decl)));
+ && (LAMBDA_FUNCTION_P (DECL_CONTEXT (decl))
+ || (DECL_DECLARES_FUNCTION_P (DECL_CONTEXT (decl))
+ && DECL_COROUTINE_P (DECL_CONTEXT (decl))
+ && DECL_RAMP_FN (DECL_CONTEXT (decl))
+ && LAMBDA_FUNCTION_P (DECL_RAMP_FN (DECL_CONTEXT (decl));
 }
 
 /* Returns true iff DECL is a capture proxy for a normal capture
diff --git a/gcc/testsuite/g++.dg/coroutines/pr96517.C 
b/gcc/testsuite/g++.dg/coroutines/pr96517.C
new file mode 100644
index 000..9cbac3ebc0d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/coroutines/pr96517.C
@@ -0,0 +1,29 @@
+// { dg-additional-options " -O1 " }
+#include 
+
+struct coroutine {
+struct promise_type {
+coroutine get_return_object() { return {}; }
+void return_void() {}
+void unhandled_exception() {}
+auto initial_suspend() noexcept { return std::suspend_never{}; }
+auto final_suspend() noexcept { return std::suspend_never{}; }
+};
+};
+
+struct data {
+constexpr int get() { return 5; }
+};
+
+struct test {
+data _data;
+
+void foo() {
+[this]() -> coroutine {
+_data.get();
+co_return;
+};
+}
+};
+
+int main() {}
-- 
2.24.3 (Apple Git-128)



Re: [PATCH] libsanitizer: remove -pedantic option

2021-11-05 Thread Martin Liška

On 11/5/21 16:29, Jakub Jelinek wrote:

On Fri, Nov 05, 2021 at 04:25:53PM +0100, Martin Liška wrote:

On 11/5/21 16:22, H.J. Lu wrote:

Should we add __extension__ here?


I tried doing that but it didn't help me with the warning.
Maybe I did something wrong?


Works for me just fine say on:
void foo ()
{
   int a = ({ int d = 1; d; });
   int b = __extension__ ({ int d = 1; d; });
}
-Wpedantic warning on line 3, none on line 4.  Add -D__extension__=
and it warns on both.

Jakub



Thank you both, it really work. I wrongly put the keyword to the first
statement in curly braces.

I'm going to suggest that to the upstream.

Cheers,
Martin


Re: GCC 11 backports

2021-11-05 Thread Martin Liška

On 8/23/21 10:54, Martin Liška wrote:

On 8/16/21 13:13, Martin Liška wrote:

I'm going to apply the following 3 tested patches.

Martin


One more patch I've just tested.

Martin


And one more backport.

MartinFrom 64fbc25cb6983725fefe313bfedd3657df795d54 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Fri, 13 Aug 2021 17:22:35 +0200
Subject: [PATCH] Speed up jump table switch detection.

	PR tree-optimization/100393

gcc/ChangeLog:

	* tree-switch-conversion.c (group_cluster::dump): Use
	  get_comparison_count.
	(jump_table_cluster::find_jump_tables): Pre-compute number of
	comparisons and then decrement it. Cache also max_ratio.
	(jump_table_cluster::can_be_handled): Change signature.
	* tree-switch-conversion.h (get_comparison_count): New.

(cherry picked from commit c517cf2e685e2903b591d63c1034ff9726cb3822)
---
 gcc/tree-switch-conversion.c | 42 
 gcc/tree-switch-conversion.h | 14 ++--
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 7f65c4ce839..8fc5eaa3033 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -1090,7 +1090,7 @@ group_cluster::dump (FILE *f, bool details)
   for (unsigned i = 0; i < m_cases.length (); i++)
 {
   simple_cluster *sc = static_cast (m_cases[i]);
-  comparison_count += sc->m_range_p ? 2 : 1;
+  comparison_count += sc->get_comparison_count ();
 }
 
   unsigned HOST_WIDE_INT range = get_range (get_low (), get_high ());
@@ -1185,11 +1185,24 @@ jump_table_cluster::find_jump_tables (vec &clusters)
 
   min.quick_push (min_cluster_item (0, 0, 0));
 
+  unsigned HOST_WIDE_INT max_ratio
+= (optimize_insn_for_size_p ()
+   ? param_jump_table_max_growth_ratio_for_size
+   : param_jump_table_max_growth_ratio_for_speed);
+
   for (unsigned i = 1; i <= l; i++)
 {
   /* Set minimal # of clusters with i-th item to infinite.  */
   min.quick_push (min_cluster_item (INT_MAX, INT_MAX, INT_MAX));
 
+  /* Pre-calculate number of comparisons for the clusters.  */
+  HOST_WIDE_INT comparison_count = 0;
+  for (unsigned k = 0; k <= i - 1; k++)
+	{
+	  simple_cluster *sc = static_cast (clusters[k]);
+	  comparison_count += sc->get_comparison_count ();
+	}
+
   for (unsigned j = 0; j < i; j++)
 	{
 	  unsigned HOST_WIDE_INT s = min[j].m_non_jt_cases;
@@ -1200,10 +1213,15 @@ jump_table_cluster::find_jump_tables (vec &clusters)
 	  if ((min[j].m_count + 1 < min[i].m_count
 	   || (min[j].m_count + 1 == min[i].m_count
 		   && s < min[i].m_non_jt_cases))
-	  && can_be_handled (clusters, j, i - 1))
+	  && can_be_handled (clusters, j, i - 1, max_ratio,
+ comparison_count))
 	min[i] = min_cluster_item (min[j].m_count + 1, j, s);
+
+	  simple_cluster *sc = static_cast (clusters[j]);
+	  comparison_count -= sc->get_comparison_count ();
 	}
 
+  gcc_checking_assert (comparison_count == 0);
   gcc_checking_assert (min[i].m_count != INT_MAX);
 }
 
@@ -1241,7 +1259,9 @@ jump_table_cluster::find_jump_tables (vec &clusters)
 
 bool
 jump_table_cluster::can_be_handled (const vec &clusters,
-unsigned start, unsigned end)
+unsigned start, unsigned end,
+unsigned HOST_WIDE_INT max_ratio,
+unsigned HOST_WIDE_INT comparison_count)
 {
   /* If the switch is relatively small such that the cost of one
  indirect jump on the target are higher than the cost of a
@@ -1260,10 +1280,6 @@ jump_table_cluster::can_be_handled (const vec &clusters,
   if (start == end)
 return true;
 
-  unsigned HOST_WIDE_INT max_ratio
-= (optimize_insn_for_size_p ()
-   ? param_jump_table_max_growth_ratio_for_size
-   : param_jump_table_max_growth_ratio_for_speed);
   unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (),
 	clusters[end]->get_high ());
   /* Check overflow.  */
@@ -1277,18 +1293,6 @@ jump_table_cluster::can_be_handled (const vec &clusters,
   if (lhs < range)
 return false;
 
-  /* First make quick guess as each cluster
- can add at maximum 2 to the comparison_count.  */
-  if (lhs > 2 * max_ratio * (end - start + 1))
-return false;
-
-  unsigned HOST_WIDE_INT comparison_count = 0;
-  for (unsigned i = start; i <= end; i++)
-{
-  simple_cluster *sc = static_cast (clusters[i]);
-  comparison_count += sc->m_range_p ? 2 : 1;
-}
-
   return lhs <= max_ratio * comparison_count;
 }
 
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index d76f19b57f6..a375e52636e 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -180,6 +180,13 @@ public:
 return tree_int_cst_equal (get_low (), get_high ());
   }
 
+  /* Return number of comparisons needed for the case.  */
+  unsigned
+  get_comparison_count ()
+  {
+return m_range_p ? 2 : 1;
+  }
+
   /* Low value of the case.  */
   tree m_low;
 
@@ -267,9 +274,12 @@ public:
   static vec f

Re: Values of WIDE_INT_MAX_ELTS in gcc11 and gcc12 are different

2021-11-05 Thread Qing Zhao via Gcc-patches
Thanks all for the information.
Based on the information so far, my understanding is that we cannot revert 
r12-979-g782e57f2c09 
Since it’s for enabling YMM and ZMM registers to be used for by_pieces 
operations on X86.
Let me know if I miss anything here.

FYI. 

This issue was found during my work to back port all the patches of 
-ftrivial-auto-var-init so far from 
GCC12 to GCC11. 

The following small testing case (_Complex long double)

_Complex long double result;

_Complex long double foo()
{
   _Complex long double temp3;

  result = temp3;
  return result;
}

Failed with -ftrivial-auto-var-init=pattern with GCC11 at the following Line 
3087 at call to “build_nonstandard_integer_type”
: (expand_DEFERRED_INIT):

3076   if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE
3077   && tree_fits_uhwi_p (var_size)
3078   && (init_type == AUTO_INIT_PATTERN
3079   || !is_gimple_reg_type (var_type))
3080   && int_mode_for_size (tree_to_uhwi (var_size) * BITS_PER_UNIT,
3081 0).exists ())
3082 {
3083   unsigned HOST_WIDE_INT total_bytes = tree_to_uhwi (var_size);
3084   unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
3085   memset (buf, (init_type == AUTO_INIT_PATTERN
3086 ? INIT_PATTERN_VALUE : 0), total_bytes);
3087   tree itype = build_nonstandard_integer_type
3088  (total_bytes * BITS_PER_UNIT, 1);

The exact failing point is at function 
“set_min_and_max_values_for_integral_type”:

2851   gcc_assert (precision <= WIDE_INT_MAX_PRECISION);

For _Complex long double,  “precision” is 256.  
In GCC11, “WIDE_INT_MAX_PRECISION” is 192,  in GCC12, it’s 512. 
As a result, the above assertion failed on GCC11. 

I am wondering what’s the best fix for this issue in gcc11? 

Qing


> On Nov 5, 2021, at 5:01 AM, Richard Biener via Gcc-patches 
>  wrote:
> 
> On Fri, Nov 5, 2021 at 7:54 AM Jakub Jelinek via Gcc-patches
>  wrote:
>> 
>> On Thu, Nov 04, 2021 at 11:05:35PM -0700, Andrew Pinski via Gcc-patches 
>> wrote:
 I noticed that the macro “WIDE_INT_MAX_ELTS” has different values in GCC11 
 and GCC12 (on the same X86 machine)
 
 For gcc11:
 
 wide int max elts =3
 
 For gcc12:
 
 wide int max elts =9
 
 Does anyone know what’s the reason for this difference?
 
 Thanks a lot for any help.
>>> 
>>> Yes originally, the x86 backend only used OI and XI modes for vectors
>>> during data movement.
>>> This changed with r10-5741-gc57b4c22089 which added the use of OI mode
>>> for TImode adding with overflow and then MAX_BITSIZE_MODE_ANY_INT
>>> changed from 128 to 160 (in r10-6178-gc124b345e46078) to fix the ICE
>>> introduced by that change .
>>> And then with r12-979-g782e57f2c09 removed the define of
>>> MAX_BITSIZE_MODE_ANY_INT.
>>> Now what was not mentioned in r12-979-g782e57f2c09 (or before) of why
>>> MAX_BITSIZE_MODE_ANY_INT was defined in the first place for x86. HJL
>>> assumed there was some problem of why it was defined that way but not
>>> realizing memory usage was the reason.
>>> It was defined to keep the memory usage down as you see that it is now
>>> almost a 3x memory increase for all wi::wide_int.
>>> I do think r12-979-g782e57f2c09 should be reverted with an added
>>> comment on saying defining MAX_BITSIZE_MODE_ANY_INT here is to
>>> decrease the memory footprint.
>> 
>> I completely agree.
> 
> Do we have permanent objects embedding wide[st]_int?  I know of
> class loop and loop_bound.  Btw, there are other targets with large
> integer modes (aarch64 with XImode) and not defining
> MAX_BITSIZE_MODE_ANY_INT
> 
> Richard.
> 
>>Jakub



[PATCH] Darwin, Arm64 : Ada fixes for hosted tools.

2021-11-05 Thread Iain Sandoe via Gcc-patches
This is host-only support (target support will come later).

This will allow someone (with an existing Ada compiler on the
platform - which can be provided by the experimental aarch64-darwin
branch) - to build the host tools (gnatmake and friends) for a
non-native cross.

The existing provisions for iOS are OK for cross-compilation from
an x86-64-darwin platform, but we need some adjustments so that these
host tools can be built to run on aarch64-darwin.

tested on aarch64-darwin20.
OK for master?
thanks
Iain

---
 gcc/ada/gcc-interface/Make-lang.in | 18 +-
 gcc/ada/sigtramp-ios.c |  8 
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/gcc/ada/gcc-interface/Make-lang.in 
b/gcc/ada/gcc-interface/Make-lang.in
index ba194d1369d..b2feaa7d808 100644
--- a/gcc/ada/gcc-interface/Make-lang.in
+++ b/gcc/ada/gcc-interface/Make-lang.in
@@ -725,6 +725,14 @@ endif
 # For unwind-pe.h
 CFLAGS-ada/raise-gcc.o += -I$(srcdir)/../libgcc -DEH_MECHANISM_$(EH_MECHANISM)
 
+# Under aarch64 darwin, we need to include the rtl signal trampoline.
+
+ifeq ($(strip $(filter-out aarch64 arm64 darwin%,$(host_cpu) $(host_os))),)
+  EXTRA_HOST_OBJS=ada/sigtramp-ios.o
+else
+  EXTRA_HOST_OBJS =
+endif
+
 ada/libgnat/s-excmac.o: ada/libgnat/s-excmac.ads ada/libgnat/s-excmac.adb
 
 ada/libgnat/s-excmac.ads: $(srcdir)/ada/libgnat/s-excmac__$(EH_MECHANISM).ads
@@ -738,16 +746,16 @@ ada/libgnat/s-excmac.adb: 
$(srcdir)/ada/libgnat/s-excmac__$(EH_MECHANISM).adb
 # Needs to be built with CC=gcc
 # Since the RTL should be built with the latest compiler, remove the
 #  stamp target in the parent directory whenever gnat1 is rebuilt
-gnat1$(exeext): $(TARGET_ADA_SRCS) $(GNAT1_OBJS) $(ADA_BACKEND) 
libcommon-target.a \
-   $(LIBDEPS) $(ada.prev)
+gnat1$(exeext): $(TARGET_ADA_SRCS) $(GNAT1_OBJS) $(ADA_BACKEND) 
$(EXTRA_HOST_OBJS) libcommon-target.a \
+   $(EXTRA_HOST_OBJS) $(LIBDEPS) $(ada.prev)
@$(call LINK_PROGRESS,$(INDEX.ada),start)
-   +$(GCC_LLINK) -o $@ $(GNAT1_OBJS) $(ADA_BACKEND) $(CFLAGS) \
+   +$(GCC_LLINK) -o $@ $(GNAT1_OBJS) $(ADA_BACKEND) $(EXTRA_HOST_OBJS) 
$(CFLAGS) \
  libcommon-target.a $(LIBS) $(SYSLIBS) $(BACKENDLIBS) $(GNATLIB)
$(RM) stamp-gnatlib2-rts stamp-tools
@$(call LINK_PROGRESS,$(INDEX.ada),end)
 
-gnatbind$(exeext): ada/b_gnatb.o $(CONFIG_H) $(GNATBIND_OBJS) ggc-none.o 
libcommon-target.a $(LIBDEPS)
-   +$(GCC_LINK) -o $@ $(CFLAGS) ada/b_gnatb.o $(GNATBIND_OBJS) ggc-none.o 
libcommon-target.a $(LIBS) $(SYSLIBS) $(GNATLIB)
+gnatbind$(exeext): ada/b_gnatb.o $(CONFIG_H) $(GNATBIND_OBJS) 
$(EXTRA_HOST_OBJS) ggc-none.o libcommon-target.a $(LIBDEPS)
+   +$(GCC_LINK) -o $@ $(CFLAGS) ada/b_gnatb.o $(GNATBIND_OBJS) 
$(EXTRA_HOST_OBJS) ggc-none.o libcommon-target.a $(LIBS) $(SYSLIBS) $(GNATLIB)
 
 # use target-gcc target-gnatmake target-gnatbind target-gnatlink
 gnattools: $(GCC_PARTS) $(CONFIG_H) prefix.o force
diff --git a/gcc/ada/sigtramp-ios.c b/gcc/ada/sigtramp-ios.c
index 6e2913d91f2..8403a1865ea 100644
--- a/gcc/ada/sigtramp-ios.c
+++ b/gcc/ada/sigtramp-ios.c
@@ -71,6 +71,10 @@
 
 /* sigtramp stub providing unwind info for common registers.  */
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 extern void __gnat_sigtramp_common
   (int signo, void *siginfo, void *sigcontext,
__sigtramphandler_t * handler);
@@ -87,6 +91,10 @@ void __gnat_sigtramp (int signo, void *si, void *ucontext,
   __gnat_sigtramp_common (signo, si, mcontext, handler);
 }
 
+#if defined(__cplusplus)
+}
+#endif
+
 /* asm string construction helpers.  */
 
 #define STR(TEXT) #TEXT
-- 
2.24.3 (Apple Git-128)



Re: Values of WIDE_INT_MAX_ELTS in gcc11 and gcc12 are different

2021-11-05 Thread Jakub Jelinek via Gcc-patches
On Fri, Nov 05, 2021 at 04:11:36PM +, Qing Zhao wrote:
> 3076   if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE
> 3077   && tree_fits_uhwi_p (var_size)
> 3078   && (init_type == AUTO_INIT_PATTERN
> 3079   || !is_gimple_reg_type (var_type))
> 3080   && int_mode_for_size (tree_to_uhwi (var_size) * BITS_PER_UNIT,
> 3081 0).exists ())
> 3082 {
> 3083   unsigned HOST_WIDE_INT total_bytes = tree_to_uhwi (var_size);
> 3084   unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
> 3085   memset (buf, (init_type == AUTO_INIT_PATTERN
> 3086 ? INIT_PATTERN_VALUE : 0), total_bytes);
> 3087   tree itype = build_nonstandard_integer_type
> 3088  (total_bytes * BITS_PER_UNIT, 1);
> 
> The exact failing point is at function 
> “set_min_and_max_values_for_integral_type”:
> 
> 2851   gcc_assert (precision <= WIDE_INT_MAX_PRECISION);
> 
> For _Complex long double,  “precision” is 256.  
> In GCC11, “WIDE_INT_MAX_PRECISION” is 192,  in GCC12, it’s 512. 
> As a result, the above assertion failed on GCC11. 
> 
> I am wondering what’s the best fix for this issue in gcc11? 

Even for gcc 12 the above is wrong, you can't blindly assume that
build_nonstandard_integer_type will work for arbitrary precisions,
and even if it works that it will actually work.
The fact that such a mode exist is one thing, but
targetm.scalar_mode_supported_p should be tested for whether the mode
is actually supported.

Jakub



Re: [PATCH] Darwin, Arm64 : Ada fixes for hosted tools.

2021-11-05 Thread Arnaud Charlet via Gcc-patches
> This is host-only support (target support will come later).
> 
> This will allow someone (with an existing Ada compiler on the
> platform - which can be provided by the experimental aarch64-darwin
> branch) - to build the host tools (gnatmake and friends) for a
> non-native cross.
> 
> The existing provisions for iOS are OK for cross-compilation from
> an x86-64-darwin platform, but we need some adjustments so that these
> host tools can be built to run on aarch64-darwin.
> 
> tested on aarch64-darwin20.
> OK for master?

Did you forget to attach the commit log (git show is your friend)?

The patch itself looks OK on principle, pending the associated log! ;-)

Arno


Re: [PATCH 0/4] config: Allow a host to opt out of PCH.

2021-11-05 Thread Iain Sandoe



> On 5 Nov 2021, at 15:25, Jakub Jelinek  wrote:
> 
> On Fri, Nov 05, 2021 at 11:31:58AM +0100, Richard Biener wrote:
>> On Fri, Nov 5, 2021 at 10:54 AM Jakub Jelinek  wrote:
>>> 
>>> On Fri, Nov 05, 2021 at 10:42:05AM +0100, Richard Biener via Gcc-patches 
>>> wrote:
 I had the impression we have support for PCH file relocation to deal with 
 ASLR
 at least on some platforms.
>>> 
>>> Unfortunately we do not, e.g. if you build cc1/cc1plus as PIE on
>>> x86_64-linux, PCH will stop working unless one always invokes it with
>>> disabled ASLR through personality.
>>> 
>>> I think this is related to function pointers and pointers to .rodata/.data
>>> etc. variables in GC memory, we currently do not relocate that.
>>> 
>>> What we perhaps could do is (at least assuming all the ELF PT_LOAD segments
>>> are adjacent with a single load base for them - I think at least ia64
>>> non-PIE binaries were violating this by having .text and .data PT_LOAD
>>> segments many terrabytes appart with a whole in between not protected in any
>>> way, but dunno if that is for PIEs too), perhaps try in a host
>>> specific way remember the address range in which the function pointers and
>>> .rodata/.data can exist, remember the extent start and end from PCH 
>>> generation
>>> and on PCH load query those addresses for the current compiler and relocate
>>> everything in that extent by the load bias from the last run.
>>> But, the assumption for this is that those function and data/rodata pointers
>>> in GC memory are actually marked at least as pointers...
>> 
>> If any such pointers exist they must be marked GTY((skip)) since they do not
>> point to GC memory...  So we'd need to invent special-handling for those.
>> 
>>> Do we e.g. have objects with virtual classes in GC memory and if so, do we
>>> catch their virtual table pointers?
>> 
>> Who knows, but then I don't remember adding stuff that should end in a PCH.
> 
> So, I've investigated a little bit.
> Apparently all the relocation we currently do for PCH is done at PCH write
> time, we choose some address range in the address space we think will be 
> likely
> mmappable each time successfully, relocate all pointers pointing to GC
> memory to point in there and then write that to file, together with the
> scalar GTY global vars values and GTY pointers in global vars.
> On PCH load, we just try to mmap memory in the right range, fail PCH load if
> unsuccessful, and read the GC memory into that range and update scalar and
> pointer GTY global vars from what we've recorded.
> Patch that made PCH load to fail for PIEs etc. was
> https://gcc.gnu.org/legacy-ml/gcc-patches/2003-10/msg01994.html
> If we wanted to relocate pointers to functions and .data/.rodata etc.,
> ideally we'd create a relocation list of addresses that should be
> incremented by the bias and quickly relocate those.

It is hard to judge the relative effort in the two immediately visible 
solutions:

1. relocatable PCH
2. taking the tree streamer from the modules implementation, moving its home
to c-family and adding hooks so that each FE can stream its own special 
trees.

ISTM, that part of the reason people dislike PCH is because the implementation 
is
mixed up with the GC solution - the rendering is non-transparent etc.

So, in some ways, (2) above would be a better investment - the process of PCH 
is:
generate:
“get to the end of parsing a TU” .. stream the AST
consume:
.. see a header .. stream the PCH AST in if there is one available for the 
header.

There is no reason for this to be mixed into the GC solution - the read in 
(currently)
happens to an empty TU and there should be nothing in the AST that carries any
reference to the compiler’s executable.

just 0.02 GBP.
Iain


> 
> I wrote following ugly hack:
> 
> --- ggc-common.c.jj   2021-08-19 11:42:27.365422400 +0200
> +++ ggc-common.c  2021-11-05 15:37:51.447222544 +0100
> @@ -404,6 +404,9 @@ struct mmap_info
> 
> /* Write out the state of the compiler to F.  */
> 
> +char *exestart = (char *) 2;
> +char *exeend = (char *) 2;
> +
> void
> gt_pch_save (FILE *f)
> {
> @@ -458,6 +461,14 @@ gt_pch_save (FILE *f)
> for (rti = *rt; rti->base != NULL; rti++)
>   if (fwrite (rti->base, rti->stride, 1, f) != 1)
>   fatal_error (input_location, "cannot write PCH file: %m");
> +  else if uintptr_t) rti->base) & (sizeof (void *) - 1)) == 0)
> +{
> +  char *const *p = (char *const *) rti->base;
> +  char *const *q = (char *const *) ((uintptr_t) rti->base + 
> (rti->stride & ~(sizeof (void *) - 1)));
> +  for (; p < q; p++)
> + if (*p >= exestart && *p < exeend)
> +   fprintf (stderr, "scalar at %p points to executable %p\n", (void 
> *) p, (void *) *p);
> +}
> 
>   /* Write out all the global pointers, after translation.  */
>   write_pch_globals (gt_ggc_rtab, &state);
> @@ -546,6 +557,15 @@ gt_pch_save (FILE *f)
>   state.ptrs[i]->note_ptr_fn (state.ptrs[i]->obj,

Re: [PATCH] Darwin, Arm64 : Ada fixes for hosted tools.

2021-11-05 Thread Iain Sandoe
Hi Arno,

> On 5 Nov 2021, at 16:36, Arnaud Charlet  wrote:
> 
>> This is host-only support (target support will come later).
>> 
>> This will allow someone (with an existing Ada compiler on the
>> platform - which can be provided by the experimental aarch64-darwin
>> branch) - to build the host tools (gnatmake and friends) for a
>> non-native cross.
>> 
>> The existing provisions for iOS are OK for cross-compilation from
>> an x86-64-darwin platform, but we need some adjustments so that these
>> host tools can be built to run on aarch64-darwin.
>> 
>> tested on aarch64-darwin20.
>> OK for master?
> 
> Did you forget to attach the commit log (git show is your friend)?

No, I just managed to delete it when adding the post-notes to the email
header ;-) … and then didn’t notice when git send-emailing it …
Iain

—— this is the missing part.

Signed-off-by: Iain Sandoe 

gcc/ada/ChangeLog:

* gcc-interface/Make-lang.in: Use ios signal trampoline code
for hosted Ada tools.
* sigtramp-ios.c: Wrap the declarations in extern "C" when the
code is built by a C++ compiler.


> 
> The patch itself looks OK on principle, pending the associated log! ;-)
> 
> Arno



Re: [PATCH] Darwin, Arm64 : Ada fixes for hosted tools.

2021-11-05 Thread Arnaud Charlet via Gcc-patches
> No, I just managed to delete it when adding the post-notes to the email
> header ;-) … and then didn’t notice when git send-emailing it …

OK!

> Signed-off-by: Iain Sandoe 
> 
> gcc/ada/ChangeLog:

should be gcc/ada/

>   * gcc-interface/Make-lang.in: Use ios signal trampoline code
>   for hosted Ada tools.
>   * sigtramp-ios.c: Wrap the declarations in extern "C" when the
>   code is built by a C++ compiler.

I confirm that the patch is OK, thanks!


[PATCH, v2, OpenMP 5.0] Implement relaxation of implicit map vs. existing device mappings (for mainline trunk)

2021-11-05 Thread Chung-Lin Tang

Hi Jakub,

On 2021/6/24 11:55 PM, Jakub Jelinek wrote:

On Fri, May 14, 2021 at 09:20:25PM +0800, Chung-Lin Tang wrote:

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index e790f08b23f..69c4a8e0a0a 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -10374,6 +10374,7 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void 
*data)
  gcc_unreachable ();
}
OMP_CLAUSE_SET_MAP_KIND (clause, kind);
+  OMP_CLAUSE_MAP_IMPLICIT_P (clause) = 1;
if (DECL_SIZE (decl)
  && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
{


As Thomas mentioned, there is now also OMP_CLAUSE_MAP_IMPLICIT that means
something different:
/* Nonzero on map clauses added implicitly for reduction clauses on combined
or composite constructs.  They shall be removed if there is an explicit
map clause.  */
Having OMP_CLAUSE_MAP_IMPLICIT and OMP_CLAUSE_MAP_IMPLICIT_P would be too
confusing.  So either we need to use just one flag for both purposes or
have two different flags and find a better name for one of them.
The former would be possible if no OMP_CLAUSE_MAP clauses added by the FEs
are implicit - then you could clear OMP_CLAUSE_MAP_IMPLICIT in
gimplify_scan_omp_clauses.  I wonder if it is the case though, e.g. doesn't
your "Improve OpenMP target support for C++ [PR92120 v4]" patch add a lot of
such implicit map clauses (e.g. the this[:1] and various others)?


I have changed the name to OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P, to signal that
this bit is to be passed to the runtime. Right now its intended to be used by
clauses created by the middle-end, but front-end uses like that for C++ could
be clarified later.


Also, gimplify_adjust_omp_clauses_1 sometimes doesn't add just one map
clause, but several, shouldn't those be marked implicit too?  And similarly
it calls lang_hooks.decls.omp_finish_clause which can add even further map
clauses implicitly, shouldn't those be implicit too (in that case copy
the flag from the clause it is called on to the extra clauses it adds)?

Also as Thomas mentioned, it should be restricted to non-OpenACC,
it can check gimplify_omp_ctxp->region_type if it is OpenMP or OpenACC.


Agreed, I've adjusted the patch to only to this implicit setting for OpenMP.
This reduces a lot of the originally needed scan test adjustment for existing 
OpenACC testcases.


@@ -10971,9 +10972,15 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, 
gimple_seq body, tree *list_p,
list_p = &OMP_CLAUSE_CHAIN (c);
  }
  
-  /* Add in any implicit data sharing.  */

+  /* Add in any implicit data sharing. Implicit clauses are added at the start


Two spaces after dot in comments.


Done.


+ of the clause list, but after any non-map clauses.  */
struct gimplify_adjust_omp_clauses_data data;
-  data.list_p = list_p;
+  tree *implicit_add_list_p = orig_list_p;
+  while (*implicit_add_list_p
+&& OMP_CLAUSE_CODE (*implicit_add_list_p) != OMP_CLAUSE_MAP)
+implicit_add_list_p = &OMP_CLAUSE_CHAIN (*implicit_add_list_p);


Why are the implicit map clauses added first and not last?


As I also explained in the first submission email, due to the processing order,
if implicit classes are added last (and processed last), for example:

  #pragma omp target map(tofrom: var.ptr[:N]) map(tofrom: var[implicit])
  {
 // access of var.ptr[]
  }

The explicit var.ptr[:N] will not find anything to map, because the (implicit) 
map(var) has not been seen yet,
and the assumed array section attachment behavior will fail.

Only an order like: map(tofrom: var[implicit]) map(tofrom: var.ptr[:N]) will 
the usual assumed behavior show.

And yes, this depends on the new behavior implemented by patch [1], which I 
still need you to review.
e.g. for map(var.ptr[:N]), the proper behavior should *only* map the array 
section but NOT the base-pointer.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571195.html


There is also the OpenMP 5.1 [352:17-22] case which basically says that the
implicit mappings should be ignored if there are explicit ones on the same
construct (though, do we really create implicit clauses in that case?).


Implicit clauses do not appear to be created if there's an explicit clause 
already existing.


+#define GOMP_MAP_IMPLICIT  (GOMP_MAP_FLAG_SPECIAL_3 \
+| GOMP_MAP_FLAG_SPECIAL_4)
+/* Mask for entire set of special map kind bits.  */
+#define GOMP_MAP_FLAG_SPECIAL_BITS (GOMP_MAP_FLAG_SPECIAL_0 \
+| GOMP_MAP_FLAG_SPECIAL_1 \
+| GOMP_MAP_FLAG_SPECIAL_2 \
+| GOMP_MAP_FLAG_SPECIAL_3 \
+| GOMP_MAP_FLAG_SPECIAL_4)

...

+#define GOMP_MAP_IMPLICIT_P(X) \
+  (((X) & GOMP_MAP_FLAG_SPECIAL_BITS) == GOMP_MAP_IMPLICIT)


I think here we need to decide with which GOMP_MAP* kinds the implicit
bit will need to be combined with, with looking forward int

Re: [PATCH 1/5] Add XXSPLTI* and LXVKQ instructions (new data structure and function)

2021-11-05 Thread will schmidt via Gcc-patches
On Fri, 2021-11-05 at 00:04 -0400, Michael Meissner wrote:
> Add new constant data structure.
> 
> This patch provides the data structure and function to convert a
> CONST_INT, CONST_DOUBLE, CONST_VECTOR, or VEC_DUPLICATE of a constant) to
> an array of bytes, half-words, words, and  double words that can be loaded
> into a 128-bit vector register.
> 
> The next patches will use this data structure to generate code that
> generates load of the vector/floating point registers using the XXSPLTIDP,
> XXSPLTIW, and LXVKQ instructions that were added in power10.
> 
> 2021-11-05  Michael Meissner  
> 

Email here is different than the from:.  No big deal either way.  

> gcc/
> 
>   * config/rs6000/rs6000-protos.h (VECTOR_128BIT_*): New macros.

I defer to maintainers.  I like to explicitly include the full macro names here 
so a grep later on can easily find it.  


>   (vec_const_128bit_type): New structure type.
>   (vec_const_128bit_to_bytes): New declaration.
>   * config/rs6000/rs6000.c (constant_int_to_128bit_vector): New
>   helper function.
>   (constant_fp_to_128bit_vector): New helper function.
>   (vec_const_128bit_to_bytes): New function.

ok

> ---
>  gcc/config/rs6000/rs6000-protos.h |  28 
>  gcc/config/rs6000/rs6000.c| 253 ++
>  2 files changed, 281 insertions(+)
> 
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 14f6b313105..490d6e33736 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -222,6 +222,34 @@ address_is_prefixed (rtx addr,
>return (iform == INSN_FORM_PREFIXED_NUMERIC
> || iform == INSN_FORM_PCREL_LOCAL);
>  }
> +
> +/* Functions and data structures relating to 128-bit constants that are
> +   converted to byte, half-word, word, and double-word values.  All fields 
> are
> +   kept in big endian order.  We also convert scalar values to 128-bits if 
> they
> +   are going to be loaded into vector registers.  */
> +#define VECTOR_128BIT_BITS   128
> +#define VECTOR_128BIT_BYTES  (128 / 8)
> +#define VECTOR_128BIT_HALF_WORDS (128 / 16)
> +#define VECTOR_128BIT_WORDS  (128 / 32)
> +#define VECTOR_128BIT_DOUBLE_WORDS   (128 / 64)

ok

> +
> +typedef struct {
> +  /* Constant as various sized items.  */
> +  unsigned HOST_WIDE_INT double_words[VECTOR_128BIT_DOUBLE_WORDS];
> +  unsigned int words[VECTOR_128BIT_WORDS];
> +  unsigned short half_words[VECTOR_128BIT_HALF_WORDS];
> +  unsigned char bytes[VECTOR_128BIT_BYTES];
> +
> +  unsigned original_size;/* Constant size before splat.  */
> +  bool fp_constant_p;/* Is the constant floating 
> point?  */
> +  bool all_double_words_same;/* Are the double words all 
> equal?  */
> +  bool all_words_same;   /* Are the words all equal?  */
> +  bool all_half_words_same;  /* Are the halft words all equal?  */

half

> +  bool all_bytes_same;   /* Are the bytes all equal?  */




> +} vec_const_128bit_type;
> +

ok.  


> +extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
> +vec_const_128bit_type *);
>  #endif /* RTX_CODE */
> 
>  #ifdef TREE_CODE
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 01affc7a47c..f285022294a 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -28619,6 +28619,259 @@ rs6000_output_addr_vec_elt (FILE *file, int value)
>fprintf (file, "\n");
>  }
> 
> +
> +/* Copy an integer constant to the vector constant structure.  */
> +

Here and subsequent comments, I'd debate on whether to enhance the
comment to be explicit on the structure name being copied to/from.
(vec_const_128bit_type is easy to search for, vector or constant or
structure are not as unique)

> +static void
> +constant_int_to_128bit_vector (rtx op,
> +machine_mode mode,
> +size_t byte_num,
> +vec_const_128bit_type *info)
> +{
> +  unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
> +  unsigned bitsize = GET_MODE_BITSIZE (mode);
> +
> +  for (int shift = bitsize - 8; shift >= 0; shift -= 8)
> +info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
> +}

I didn't confirm the maths, but looks OK at a glance.


> +
> +/* Copy an floating point constant to the vector constant structure.  */
> +

s/an/a/

> +static void
> +constant_fp_to_128bit_vector (rtx op,
> +   machine_mode mode,
> +   size_t byte_num,
> +   vec_const_128bit_type *info)
> +{
> +  unsigned bitsize = GET_MODE_BITSIZE (mode);
> +  unsigned num_words = bitsize / 32;
> +  const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
> +  long real_words[VECTOR_128BIT_WORDS];
> +
> +  /* Make sure we don't overflow the real_words array and that it is
> +

Re: [PATCH] c++, dyninit: Optimize C++ dynamic initialization by constants into DECL_INITIAL adjustment [PR102876]

2021-11-05 Thread Martin Sebor via Gcc-patches

On 11/4/21 3:42 AM, Jakub Jelinek via Gcc-patches wrote:

Hi!

When users don't use constexpr everywhere in initialization of namespace
scope non-comdat vars and the initializers aren't constant when FE is
looking at them, the FE performs dynamic initialization of those variables.
But after inlining and some constant propagation, we often end up with
just storing constants into those variables in the _GLOBAL__sub_I_*
constructor.
C++ gives us permission to change some of that dynamic initialization
back into static initialization - https://eel.is/c++draft/basic.start.static#3
For classes that need (dynamic) construction, I believe access to some var
from other dynamic construction before that var is constructed is UB, but
as the example in the above mentioned spot of C++:
inline double fd() { return 1.0; }
extern double d1;
double d2 = d1; // unspecified:
 // either statically initialized to 0.0 or
 // dynamically initialized to 0.0 if d1 is
 // dynamically initialized, or 1.0 otherwise
double d1 = fd();   // either initialized statically or dynamically to 1.0
some vars can be used before they are dynamically initialized and the
implementation can still optimize those into static initialization.

The following patch attempts to optimize some such cases back into
DECL_INITIAL initializers and where possible (originally const vars without
mutable members) put those vars back to .rodata etc.

Because we put all dynamic initialization from a single TU into one single
function (well, originally one function per priority but typically inline
those back into one function), we can either have a simpler approach
(from the PR it seems that is what LLVM uses) where either we manage to
optimize all dynamic initializers into constant in the TU, or nothing,
or by adding some markup - in the form of a pair of internal functions in
this patch - around each dynamic initialization that can be optimized,
we can optimize each dynamic initialization separately.

The patch adds a new pass that is invoked (through gate check) only on
DECL_ARTIFICIAL DECL_STATIC_CONSTRUCTOR functions, and looks there for
sequences like:
   .DYNAMIC_INIT_START (&b, 0);
   b = 1;
   .DYNAMIC_INIT_END (&b);
or
   .DYNAMIC_INIT_START (&e, 1);
   # DEBUG this => &e.f
   MEM[(struct S *)&e + 4B] ={v} {CLOBBER};
   MEM[(struct S *)&e + 4B].a = 1;
   MEM[(struct S *)&e + 4B].b = 2;
   MEM[(struct S *)&e + 4B].c = 3;
   # DEBUG BEGIN_STMT
   MEM[(struct S *)&e + 4B].d = 6;
   # DEBUG this => NULL
   .DYNAMIC_INIT_END (&e);
(where between the pair of markers everything is either debug stmts or
stores of constants into the variables or their parts).
The pass needs to be done late enough so that after IPA all the needed
constant propagation and perhaps loop unrolling is done, on the other
side should be early enough so that if we can't optimize it, we can
remove those .DYNAMIC_INIT* internal calls that could prevent some
further optimizations (they have fnspec such that they pretend to read
the corresponding variable).


In my work-in-progress patch to diagnose stores into constant
objects (and subobjects) I deal with the same problem.  I had
considered a pair of markers like those above (David Malcolm
suggested a smilar approach as well), but decided to go
a different route, not trusting they could be kept together,
or that they wouldn't be viewed as overly intrusive.  With
it, I have been able to distinguish dynamic initialization
from overwriting stores even at the end of compilation, but
I'd be fine changing that and running the detection earlier.

So if the markers are added for the purpose of optimizing
the dynamic initialization at file scope, could they be added
for those of locals as well?  That way I wouldn't need to add
a separate solution.



Currently the optimization is only able to optimize cases where the whole
variable is stored in a single store (typically scalar variables), or
uses the native_{encode,interpret}* infrastructure to create or update
the CONSTRUCTOR.  This means that except for the first category, we can't
right now handle unions or anything that needs relocations (vars containing
pointers to other vars or references).
I think it would be nice to incrementally add before the native_* fallback
some attempt to just create or update a CONSTRUCTOR if possible.  If we only
see var.a.b.c.d[10].e = const; style of stores, this shouldn't be that hard
as the whole access path is recorded there and we'd just need to decide what
to do with unions if two or more union members are accessed.  And do a deep
copy of the CONSTRUCTOR and try to efficiently update the copy afterwards
(the CONSTRUCTORs should be sorted on increasing offsets of the
members/elements, so doing an ordered vec insertion might not be the best
idea).  But MEM_REFs complicate this, parts or all of the access path
is lost.  For non-unions in most cases we could try to guess which field
it is (do we have some

[committed] hppa: Support TI mode and soft float on PA64

2021-11-05 Thread John David Anglin

Without TImode support on hppa64, it is necessary to disable building libgomp 
with fortran.

Previously, we didn't support TImode because we need both DImode and TImode 
divmod routines
from libgcc.  The standard build only builds one of the two.  This is nominally 
determined
by MIN_UNITS_PER_WORD.  I created a makefile fragment to build the needed 
DImode routines.

Since the alignment requirements for TImode are not defined, I just assumed the 
standard
alignment and calling convention for a structure containing a pair of 64-bit 
words.

I also added softfp support based on early mips float format and hppa glibc 
exception support.

Tested on hppa64-hp-hpux11.11 and hppa64-unknown-linux-gnu.

Committed on trunk and gcc-11.

Dave
---
Support TI mode and soft float on PA64

This change implements TI mode on PA64.  Various new patterns are
added to pa.md.  The libgcc build needed modification to build both
DI and TI routines.  We also need various softfp routines to
convert to and from TImode.

I added full softfp for the -msoft-float option.  At the moment,
this doesn't completely eliminate all use of the floating-point
co-processor.  For this, libgcc needs to be built with -msoft-mult.
The floating-point exception support also needs a soft option.

2021-11-05  John David Anglin  

PR libgomp/96661

gcc/ChangeLog:

* config/pa/pa-modes.def: Add OImode integer type.
* config/pa/pa.c (pa_scalar_mode_supported_p): Allow TImode
for TARGET_64BIT.
* config/pa/pa.h (MIN_UNITS_PER_WORD) Define to MIN_UNITS_PER_WORD
to UNITS_PER_WORD if IN_LIBGCC2.
* config/pa/pa.md (addti3, addvti3, subti3, subvti3, negti2,
negvti2, ashlti3, shrpd_internal): New patterns.
Change some multi instruction types to multi.

libgcc/ChangeLog:

* config.host (hppa*64*-*-linux*): Revise tmake_file.
(hppa*64*-*-hpux11*): Likewise.
* config/pa/sfp-exceptions.c: New.
* config/pa/sfp-machine.h: New.
* config/pa/t-dimode: New.
* config/pa/t-softfp-sfdftf: New.

diff --git a/gcc/config/pa/pa-modes.def b/gcc/config/pa/pa-modes.def
index 769de66f6b6..6020233c171 100644
--- a/gcc/config/pa/pa-modes.def
+++ b/gcc/config/pa/pa-modes.def
@@ -30,3 +30,6 @@ FLOAT_MODE (TF, 16, mips_quad_format);

 /* HPPA floating comparisons produce distinct condition codes.  */
 CC_MODE (CCFP);
+
+/* Mode used for signed overflow checking of TImode.  */
+INT_MODE (OI, 32);
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 21b812e9be7..f22d25a4066 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -6550,18 +6550,16 @@ hppa_gimplify_va_arg_expr (tree valist, tree type, 
gimple_seq *pre_p,

 /* True if MODE is valid for the target.  By "valid", we mean able to
be manipulated in non-trivial ways.  In particular, this means all
-   the arithmetic is supported.
-
-   Currently, TImode is not valid as the HP 64-bit runtime documentation
-   doesn't document the alignment and calling conventions for this type.
-   Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
-   2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
+   the arithmetic is supported.  */

 static bool
 pa_scalar_mode_supported_p (scalar_mode mode)
 {
   int precision = GET_MODE_PRECISION (mode);

+  if (TARGET_64BIT && mode == TImode)
+return true;
+
   switch (GET_MODE_CLASS (mode))
 {
 case MODE_PARTIAL_INT:
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 7a313d617b0..96815ec69cb 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -255,11 +258,17 @@ typedef struct GTY(()) machine_function
is UNITS_PER_WORD.  Otherwise, it is the constant value that is the
smallest value that UNITS_PER_WORD can have at run-time.

-   FIXME: This needs to be 4 when TARGET_64BIT is true to suppress the
-   building of various TImode routines in libgcc.  The HP runtime
-   specification doesn't provide the alignment requirements and calling
-   conventions for TImode variables.  */
-#define MIN_UNITS_PER_WORD 4
+   This needs to be 8 when TARGET_64BIT is true to allow building various
+   TImode routines in libgcc.  However, we also need the DImode DIVMOD
+   routines because they are not currently implemented in pa.md.
+
+   The HP runtime specification doesn't provide the alignment requirements
+   and calling conventions for TImode variables.  */
+#ifdef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD  UNITS_PER_WORD
+#else
+#define MIN_UNITS_PER_WORD  4
+#endif

 /* The widest floating point format supported by the hardware.  Note that
setting this influences some Ada floating point type sizes, currently
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index ea6da457fcb..f124c301b7a 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -5357,6 +5357,88 @@
   [(set_attr "type" "binary,binary")
(set_attr "length" "4,4")])

+(define_insn "addti3"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+   (pl

[COMMITTED] PR tree-optimization/102943 - Abstract ranger cache update list.

2021-11-05 Thread Andrew MacLeod via Gcc-patches

OK,removing the call to vec::contains() is clearly the right move.

Rather than go with the extra bitmap to track whats in the vector, I 
have done a couple of things.
  1 - Abstracted the update list into its own class, making it easier 
to change the underlying mechaism from a stack to a breadth first queue 
or whatever else.
  2 - Changed the implementation to not use vec::contains, and moved 
away from using the vector push/pop API.


It is now implemented as a single vector over the basic blocks, in which 
a value of 0 indicates the block is not in the list, and otherwise it 
points to the next index in the list.  The list is terminated with a -1.


m_head points to the head of the list, and -1 if there is no list.

empty_p()  is O(1), check if m_head == -1
in_list_p(BB) is O(1),  check is vec[bb] != 0
pop() is O(1), return m_head after vec[bb] = 0, move to the next element,
push() is O(1).  if (vec[bb] != 0)  {  vec[bb] = m_head, m_head = bb; }

so at the expense of a single vector over BBs, we have an O(1) solution 
to everything.


This provides some nominal improvements over all compilations, and has 
similar performance characteristics to the bitmap solution.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew


>From 98244c68e77cf75f93b66ee02df059f718c3fbc0 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Thu, 4 Nov 2021 15:08:06 -0400
Subject: [PATCH 1/2] Abstract ranger cache update list.

Make it more efficient by removing the call to vec::contains.

	PR tree-optimization/102943
	* gimple-range-cache.cc (class update_list): New.
	(update_list::add): Replace add_to_update.
	(update_list::pop): New.
	(ranger_cache::ranger_cache): Adjust.
	(ranger_cache::~ranger_cache): Adjust.
	(ranger_cache::add_to_update): Delete.
	(ranger_cache::propagate_cache): Adjust to new class.
	(ranger_cache::propagate_updated_value): Ditto.
	(ranger_cache::fill_block_cache): Ditto.
	* gimple-range-cache.h (class ranger_cache): Adjust to update class.
---
 gcc/gimple-range-cache.cc | 129 +-
 gcc/gimple-range-cache.h  |   4 +-
 2 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index 05010cf15bc..e5591bab0ef 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -754,14 +754,96 @@ temporal_cache::set_always_current (tree name)
 
 // --
 
+// This class provides an abstraction of a list of blocks to be updated
+// by the cache.  It is currently a stack but could be changed.  It also
+// maintains a list of blocks which have failed propagation, and does not
+// enter any of those blocks into the list.
+
+// A vector over the BBs is maintained, and an entry of 0 means it is not in
+// a list.  Otherwise, the entry is the next block in the list. -1 terminates
+// the list.  m_head points to the top of the list, -1 if the list is empty.
+
+class update_list
+{
+public:
+  update_list ();
+  ~update_list ();
+  void add (basic_block bb);
+  basic_block pop ();
+  inline bool empty_p () { return m_update_head == -1; }
+  inline void clear_failures () { bitmap_clear (m_propfail); }
+  inline void propagation_failed (basic_block bb)
+  { bitmap_set_bit (m_propfail, bb->index); }
+private:
+  vec m_update_list;
+  int m_update_head;
+  bitmap m_propfail;
+};
+
+// Create an update list.
+
+update_list::update_list ()
+{
+  m_update_list.create (0);
+  m_update_list.safe_grow_cleared (last_basic_block_for_fn (cfun) + 64);
+  m_update_head = -1;
+  m_propfail = BITMAP_ALLOC (NULL);
+}
+
+// Destroy an update list.
+
+update_list::~update_list ()
+{
+  m_update_list.release ();
+  BITMAP_FREE (m_propfail);
+}
+
+// Add BB to the list of blocks to update, unless it's already in the list.
+
+void
+update_list::add (basic_block bb)
+{
+  int i = bb->index;
+  // If propagation has failed for BB, or its already in the list, don't
+  // add it again.
+  if ((unsigned)i >= m_update_list.length ())
+m_update_list.safe_grow_cleared (i + 64);
+  if (!m_update_list[i] && !bitmap_bit_p (m_propfail, i))
+{
+  if (empty_p ())
+	{
+	  m_update_head = i;
+	  m_update_list[i] = -1;
+	}
+  else
+	{
+	  gcc_checking_assert (m_update_head > 0);
+	  m_update_list[i] = m_update_head;
+	  m_update_head = i;
+	}
+}
+}
+
+// Remove a block from the list.
+
+basic_block
+update_list::pop ()
+{
+  gcc_checking_assert (!empty_p ());
+  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, m_update_head);
+  int pop = m_update_head;
+  m_update_head = m_update_list[pop];
+  m_update_list[pop] = 0;
+  return bb;
+}
+
+// --
+
 ranger_cache::ranger_cache (int not_executable_flag)
 		: m_gori (not_executable_flag)
 {
   m_workback.create (0);
   m_workback.safe_grow_cleared (last_basic_block_for_fn (cfun));
-  m_update_list.create (0);
-  m_update_list.s

[COMMITTED] PR-tree-optimization/103093 - Remove def chain import assert from GORI.

2021-11-05 Thread Andrew MacLeod via Gcc-patches
As detailed in the PR, when the IL is changing between queries, the 
imports and def chains of an ssa-name may change,a nd when coparing with 
a newly created ssa-name, certain assumptions about presence/lack of 
presence between lists may no longer be true.


This patch simply removes the offending assert.

Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew


>From 1f6dd5de33912c261a5003150212c290165ac1b6 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Fri, 5 Nov 2021 11:25:09 -0400
Subject: [PATCH 2/2] Remove def chain import assert from GORI.

When the IL has changed, any new ssa-names import calculations may not jive
with existing ssa-names, so just remove the assert.

	gcc/
	PR tree-optimization/103093
	* gimple-range-gori.cc (range_def_chain::get_imports): Remove assert.

	gcc/testsuite/
	* gcc.dg/pr103093.c: New.
---
 gcc/gimple-range-gori.cc|  3 ---
 gcc/testsuite/gcc.dg/pr103093.c | 20 
 2 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr103093.c

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 2e58c23216b..fb2d571ef44 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -226,9 +226,6 @@ range_def_chain::get_imports (tree name)
   if (!has_def_chain (name))
 get_def_chain (name);
   bitmap i = m_def_chain[SSA_NAME_VERSION (name)].m_import;
-  // Either this is a default def,  OR imports must be a subset of exports.
-  gcc_checking_assert (!get_def_chain (name) || !i
-		   || !bitmap_intersect_compl_p (i, get_def_chain (name)));
   return i;
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr103093.c b/gcc/testsuite/gcc.dg/pr103093.c
new file mode 100644
index 000..f42572147ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103093.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int i_0, c_4, uc_7, func_2_c_11;
+
+short *func_2_ptr_10;
+
+void func_2() {
+  uc_7 = 7;
+  for (; uc_7 <= 60; uc_7 += 1) {
+c_4 = 5;
+for (; c_4 <= 76; c_4 += 1) {
+  func_2_ptr_10 = &i_0;	/* { dg-warning "assignment to .*" } */
+  if ((i_0 |= 5) > 0 ?: (60 && uc_7) | *func_2_ptr_10)
+if (func_2_c_11)
+  for (;;)
+;
+}
+  }
+}
-- 
2.17.2



[PATCH] gcov-profile: Fix -fcompare-debug with -fprofile-generate [PR100520]

2021-11-05 Thread Martin Liška

Hello.

This strips .gk from aux_base_name in coverage.c.
Do you like the implementation of endswith, or do we have the functionality 
somewhere?

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

PR gcov-profile/100520

gcc/ChangeLog:

* coverage.c (coverage_compute_profile_id): Strip .gk when
compare debug is used.
* system.h (endswith): New function.

gcc/testsuite/ChangeLog:

* gcc.dg/pr100520.c: New test.
---
 gcc/coverage.c  |  7 +--
 gcc/system.h| 13 +
 gcc/testsuite/gcc.dg/pr100520.c |  5 +
 3 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr100520.c

diff --git a/gcc/coverage.c b/gcc/coverage.c
index 4467f1eaa5c..4daa3f9fc30 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -571,8 +571,11 @@ coverage_compute_profile_id (struct cgraph_node *n)
   if (!use_name_only && first_global_object_name)
chksum = coverage_checksum_string
  (chksum, first_global_object_name);
-  chksum = coverage_checksum_string
-   (chksum, aux_base_name);
+  char *base_name = xstrdup (aux_base_name);
+  if (endswith (base_name, ".gk"))
+   base_name[strlen (base_name) - 3] = '\0';
+  chksum = coverage_checksum_string (chksum, base_name);
+  free (base_name);
 }
 
   /* Non-negative integers are hopefully small enough to fit in all targets.

diff --git a/gcc/system.h b/gcc/system.h
index adde3e264b6..4ac656c9c3c 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -1305,4 +1305,17 @@ startswith (const char *str, const char *prefix)
   return strncmp (str, prefix, strlen (prefix)) == 0;
 }
 
+/* Return true if STR string ends with SUFFIX.  */

+
+static inline bool
+endswith (const char *str, const char *suffix)
+{
+  size_t str_len = strlen (str);
+  size_t suffix_len = strlen (suffix);
+  if (str_len < suffix_len)
+return false;
+
+  return memcmp (str + str_len - suffix_len, suffix, suffix_len) == 0;
+}
+
 #endif /* ! GCC_SYSTEM_H */
diff --git a/gcc/testsuite/gcc.dg/pr100520.c b/gcc/testsuite/gcc.dg/pr100520.c
new file mode 100644
index 000..60f79c2b888
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr100520.c
@@ -0,0 +1,5 @@
+/* PR gcov-profile/100520 */
+/* { dg-do compile } */
+/* { dg-options "-fcompare-debug -fprofile-generate" } */
+
+static int f() {}
--
2.33.1



Re: [PATCH] gcov-profile: Fix -fcompare-debug with -fprofile-generate [PR100520]

2021-11-05 Thread Jan Hubicka via Gcc-patches
> Hello.
> 
> This strips .gk from aux_base_name in coverage.c.
> Do you like the implementation of endswith, or do we have the functionality 
> somewhere?
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Thanks,
> Martin
> 
>   PR gcov-profile/100520
> 
> gcc/ChangeLog:
> 
>   * coverage.c (coverage_compute_profile_id): Strip .gk when
>   compare debug is used.
>   * system.h (endswith): New function.

Droping .kg in coverage.c seems OK, but having endswith included into
every gcc source looks like bit of overkill given that is can be open
coded in 3 statements?

Honza
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/pr100520.c: New test.
> ---
>  gcc/coverage.c  |  7 +--
>  gcc/system.h| 13 +
>  gcc/testsuite/gcc.dg/pr100520.c |  5 +
>  3 files changed, 23 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr100520.c
> 
> diff --git a/gcc/coverage.c b/gcc/coverage.c
> index 4467f1eaa5c..4daa3f9fc30 100644
> --- a/gcc/coverage.c
> +++ b/gcc/coverage.c
> @@ -571,8 +571,11 @@ coverage_compute_profile_id (struct cgraph_node *n)
>if (!use_name_only && first_global_object_name)
>   chksum = coverage_checksum_string
> (chksum, first_global_object_name);
> -  chksum = coverage_checksum_string
> - (chksum, aux_base_name);
> +  char *base_name = xstrdup (aux_base_name);
> +  if (endswith (base_name, ".gk"))
> + base_name[strlen (base_name) - 3] = '\0';
> +  chksum = coverage_checksum_string (chksum, base_name);
> +  free (base_name);
>  }
>/* Non-negative integers are hopefully small enough to fit in all targets.
> diff --git a/gcc/system.h b/gcc/system.h
> index adde3e264b6..4ac656c9c3c 100644
> --- a/gcc/system.h
> +++ b/gcc/system.h
> @@ -1305,4 +1305,17 @@ startswith (const char *str, const char *prefix)
>return strncmp (str, prefix, strlen (prefix)) == 0;
>  }
> +/* Return true if STR string ends with SUFFIX.  */
> +
> +static inline bool
> +endswith (const char *str, const char *suffix)
> +{
> +  size_t str_len = strlen (str);
> +  size_t suffix_len = strlen (suffix);
> +  if (str_len < suffix_len)
> +return false;
> +
> +  return memcmp (str + str_len - suffix_len, suffix, suffix_len) == 0;
> +}
> +
>  #endif /* ! GCC_SYSTEM_H */
> diff --git a/gcc/testsuite/gcc.dg/pr100520.c b/gcc/testsuite/gcc.dg/pr100520.c
> new file mode 100644
> index 000..60f79c2b888
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr100520.c
> @@ -0,0 +1,5 @@
> +/* PR gcov-profile/100520 */
> +/* { dg-do compile } */
> +/* { dg-options "-fcompare-debug -fprofile-generate" } */
> +
> +static int f() {}
> -- 
> 2.33.1
> 


Re: Values of WIDE_INT_MAX_ELTS in gcc11 and gcc12 are different

2021-11-05 Thread Qing Zhao via Gcc-patches


> On Nov 5, 2021, at 11:17 AM, Jakub Jelinek  wrote:
> 
> On Fri, Nov 05, 2021 at 04:11:36PM +, Qing Zhao wrote:
>> 3076   if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE
>> 3077   && tree_fits_uhwi_p (var_size)
>> 3078   && (init_type == AUTO_INIT_PATTERN
>> 3079   || !is_gimple_reg_type (var_type))
>> 3080   && int_mode_for_size (tree_to_uhwi (var_size) * BITS_PER_UNIT,
>> 3081 0).exists ())
>> 3082 {
>> 3083   unsigned HOST_WIDE_INT total_bytes = tree_to_uhwi (var_size);
>> 3084   unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
>> 3085   memset (buf, (init_type == AUTO_INIT_PATTERN
>> 3086 ? INIT_PATTERN_VALUE : 0), total_bytes);
>> 3087   tree itype = build_nonstandard_integer_type
>> 3088  (total_bytes * BITS_PER_UNIT, 1);
>> 
>> The exact failing point is at function 
>> “set_min_and_max_values_for_integral_type”:
>> 
>> 2851   gcc_assert (precision <= WIDE_INT_MAX_PRECISION);
>> 
>> For _Complex long double,  “precision” is 256.  
>> In GCC11, “WIDE_INT_MAX_PRECISION” is 192,  in GCC12, it’s 512. 
>> As a result, the above assertion failed on GCC11. 
>> 
>> I am wondering what’s the best fix for this issue in gcc11? 
> 
> Even for gcc 12 the above is wrong, you can't blindly assume that
> build_nonstandard_integer_type will work for arbitrary precisions,
> and even if it works that it will actually work.
> The fact that such a mode exist is one thing, but
> targetm.scalar_mode_supported_p should be tested for whether the mode
> is actually supported.

You mean “int_mode_for_size().exists()” is not enough to make sure 
“build_nonstandard_integer_type” to be valid?
We should add “targetm.scalar_mode_supported_p” too ?

Qing
> 
>   Jakub
> 



Re: [PATCH] Darwin, Arm64 : Initial support for the self-host driver.

2021-11-05 Thread Richard Earnshaw via Gcc-patches




On 05/11/2021 15:14, Iain Sandoe via Gcc-patches wrote:

This allows people to host a c-family/fortran GCC cross-compiler on
aarch64-apple-darwin (support for Ada will follow in a separate patch).

At present, there is no special action needed for aarch64-darwin;
this just pulls in generic Darwin code.

Tested on aarch64-darwin20,
OK for master?
thanks,
Iain

---
  gcc/config.host  |  7 -
  gcc/config/aarch64/host-aarch64-darwin.c | 33 
  gcc/config/aarch64/x-darwin  |  3 +++
  3 files changed, 42 insertions(+), 1 deletion(-)
  create mode 100644 gcc/config/aarch64/host-aarch64-darwin.c
  create mode 100644 gcc/config/aarch64/x-darwin

diff --git a/gcc/config.host b/gcc/config.host
index 0a02c33cc80..37f9c719b68 100644
--- a/gcc/config.host
+++ b/gcc/config.host
@@ -99,7 +99,8 @@ case ${host} in
  esac
  
  case ${host} in

-  aarch64*-*-freebsd* | aarch64*-*-linux* | aarch64*-*-fuchsia*)
+  aarch64*-*-freebsd* | aarch64*-*-linux* | aarch64*-*-fuchsia* |\
+  aarch64*-*-darwin* | arm64*-*-darwin*)
  case ${target} in


I think we should avoid the term arm64.  Historically we had patterns 
that matched arm* or arm6* for the 32-bit tools.




aarch64*-*-*)
host_extra_gcc_objs="driver-aarch64.o"
@@ -251,6 +252,10 @@ case ${host} in
  host_extra_gcc_objs="${host_extra_gcc_objs} driver-mingw32.o"
  host_lto_plugin_soname=liblto_plugin.dll
  ;;
+  aarch64*-*-darwin* | arm64*-*-darwin*)
+out_host_hook_obj="${out_host_hook_obj} host-aarch64-darwin.o"
+host_xmake_file="${host_xmake_file} aarch64/x-darwin"
+;;
i[34567]86-*-darwin* | x86_64-*-darwin*)
  out_host_hook_obj="${out_host_hook_obj} host-i386-darwin.o"
  host_xmake_file="${host_xmake_file} i386/x-darwin"
diff --git a/gcc/config/aarch64/host-aarch64-darwin.c 
b/gcc/config/aarch64/host-aarch64-darwin.c
new file mode 100644
index 000..d70f2df3bf1
--- /dev/null
+++ b/gcc/config/aarch64/host-aarch64-darwin.c
@@ -0,0 +1,33 @@
+/* aarch64/arm64-darwin host-specific hook definitions.
+
+Copyright The GNU Toolchain Authors.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for arm64/aarch64 hosts; this file
+   exists just to include the generic config/host-darwin.h.  */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/aarch64/x-darwin b/gcc/config/aarch64/x-darwin
new file mode 100644
index 000..6d788d5e89c
--- /dev/null
+++ b/gcc/config/aarch64/x-darwin
@@ -0,0 +1,3 @@
+host-aarch64-darwin.o : $(srcdir)/config/aarch64/host-aarch64-darwin.c
+   $(COMPILE) $<
+   $(POSTCOMPILE)



Otherwise, OK.

R.


Re: [PATCH 2/5] Add Power10 XXSPLTI* and LXVKQ instructions (LXVKQ)

2021-11-05 Thread will schmidt via Gcc-patches
On Fri, 2021-11-05 at 00:07 -0400, Michael Meissner wrote:
> Add LXVKQ support.
> 
> This patch adds support to generate the LXVKQ instruction to load specific
> IEEE-128 floating point constants.
> 
> Compared to the last time I submitted this patch, I modified it so that it
> uses the bit pattern of the vector to see if it can generate the LXVKQ
> instruction.  This means on a little endian Power system, the
> following code will generate a LXVKQ 34,16 instruction:
> 
> vector long long foo (void)
> {
> #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
>   return (vector long long) { 0x, 0x8000 };
> #else
>   return (vector long long) { 0x8000, 0x };
> #endif
> }
> 
> because that vector pattern is the same bit pattern as -0.0F128.
> 
> 2021-11-05  Michael Meissner  
> 
> gcc/
> 
>   * config/rs6000/constraints.md (eQ): New constraint.
>   * config/rs6000/predicates.md (easy_fp_constant): Add support for
>   generating the LXVKQ instruction.
>   (easy_vector_constant_ieee128): New predicate.
>   (easy_vector_constant): Add support for generating the LXVKQ
>   instruction.
>   * config/rs6000/rs6000-protos.h (constant_generates_lxvkq): New
>   declaration.
>   * config/rs6000/rs6000.c (output_vec_const_move): Add support for
>   generating LXVKQ.
>   (constant_generates_lxvkq): New function.
>   * config/rs6000/rs6000.opt (-mieee128-constant): New debug
>   option.
>   * config/rs6000/vsx.md (vsx_mov_64bit): Add support for
>   generating LXVKQ.
>   (vsx_mov_32bit): Likewise.
>   * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
>   eQ constraint.
> 
> gcc/testsuite/
> 
>   * gcc.target/powerpc/float128-constant.c: New test.
> ---
>  gcc/config/rs6000/constraints.md  |   6 +
>  gcc/config/rs6000/predicates.md   |  34 
>  gcc/config/rs6000/rs6000-protos.h |   1 +
>  gcc/config/rs6000/rs6000.c|  62 +++
>  gcc/config/rs6000/rs6000.opt  |   4 +
>  gcc/config/rs6000/vsx.md  |  14 ++
>  gcc/doc/md.texi   |   4 +
>  .../gcc.target/powerpc/float128-constant.c| 160 ++
>  8 files changed, 285 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-constant.c
> 
> diff --git a/gcc/config/rs6000/constraints.md 
> b/gcc/config/rs6000/constraints.md
> index c8cff1a3038..e72132b4c28 100644
> --- a/gcc/config/rs6000/constraints.md
> +++ b/gcc/config/rs6000/constraints.md
> @@ -213,6 +213,12 @@ (define_constraint "eI"
>"A signed 34-bit integer constant if prefixed instructions are supported."
>(match_operand 0 "cint34_operand"))
> 
> +;; A TF/KF scalar constant or a vector constant that can load certain IEEE
> +;; 128-bit constants into vector registers using LXVKQ.
> +(define_constraint "eQ"
> +  "An IEEE 128-bit constant that can be loaded into VSX registers."
> +  (match_operand 0 "easy_vector_constant_ieee128"))
> +
>  ;; Floating-point constraints.  These two are defined so that insn
>  ;; length attributes can be calculated exactly.
> 

ok


> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index 956e42bc514..e0d1c718e9f 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -601,6 +601,14 @@ (define_predicate "easy_fp_constant"
>if (TARGET_VSX && op == CONST0_RTX (mode))
>  return 1;
> 
> +  /* Constants that can be generated with ISA 3.1 instructions are easy.  */

Easy is relative, but OK.

> +  vec_const_128bit_type vsx_const;
> +  if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
> +{
> +  if (constant_generates_lxvkq (&vsx_const) != 0)
> + return true;
> +}
> +
>/* Otherwise consider floating point constants hard, so that the
>   constant gets pushed to memory during the early RTL phases.  This
>   has the advantage that double precision constants that can be
> @@ -609,6 +617,23 @@ (define_predicate "easy_fp_constant"
> return 0;
>  })
> 
> +;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
> +;; via the LXVKQ instruction.
> +
> +(define_predicate "easy_vector_constant_ieee128"
> +  (match_code "const_vector,const_double")
> +{
> +  vec_const_128bit_type vsx_const;
> +
> +  /* Can we generate the LXVKQ instruction?  */
> +  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
> +  || !TARGET_VSX)
> +return false;

Presumably all of the checks there are valid.  (Can we have power10
without float128_hw or ieee128_constant flags set?)I do notice the
addition of an ieee128_constant flag below.
> +
> +  return (vec_const_128bit_to_bytes (op, mode, &vsx_const)
> +   && constant_generates_lxvkq (&vsx_const) != 0);
> +})
> +

ok


>  ;; Return 1 if the operand is a constant that ca

*PING* [PATCH] PR fortran/102715 - [12 Regression] ICE in gfc_simplify_transpose, at fortran/simplify.c:8184

2021-11-05 Thread Harald Anlauf via Gcc-patches

Early ping.

Am 31.10.21 um 22:35 schrieb Harald Anlauf via Fortran:

Dear Fortranners,

the fix for initialization of DT arrays caused an apparent regression for
cases where inconsistent ranks were used in such an initialization.
This caused either an ICE in subsequent uses of these arrays, or showed
up in valgrind as invalid reads, all of which seemed to be related to this
rank mismatch.

The cleanest solution seems to be to strictly reject rank mismatch earlier
than we used to, which helps error recovery.  I had to adjust one testcase
accordingly.

The place I inserted the check does not distinguish between explicit shape
and implied shape.  The Intel compiler does give a slightly different
error message for the implied shape case.  If anyone feels strongly about
this, I'm open to suggestions for better choices of handling this.

Regtested on x86_64-pc-linux-gnu.  OK for mainline / affected branches?

Thanks,
Harald





Re: [PATCH 2/5] Add Power10 XXSPLTI* and LXVKQ instructions (LXVKQ)

2021-11-05 Thread Michael Meissner via Gcc-patches
On Fri, Nov 05, 2021 at 12:52:51PM -0500, will schmidt wrote:
> > diff --git a/gcc/config/rs6000/predicates.md 
> > b/gcc/config/rs6000/predicates.md
> > index 956e42bc514..e0d1c718e9f 100644
> > --- a/gcc/config/rs6000/predicates.md
> > +++ b/gcc/config/rs6000/predicates.md
> > @@ -601,6 +601,14 @@ (define_predicate "easy_fp_constant"
> >if (TARGET_VSX && op == CONST0_RTX (mode))
> >  return 1;
> > 
> > +  /* Constants that can be generated with ISA 3.1 instructions are easy.  
> > */
> 
> Easy is relative, but OK.

The names of the function is easy_fp_constant.

> > +  vec_const_128bit_type vsx_const;
> > +  if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
> > +{
> > +  if (constant_generates_lxvkq (&vsx_const) != 0)
> > +   return true;
> > +}
> > +
> >/* Otherwise consider floating point constants hard, so that the
> >   constant gets pushed to memory during the early RTL phases.  This
> >   has the advantage that double precision constants that can be
> > @@ -609,6 +617,23 @@ (define_predicate "easy_fp_constant"
> > return 0;
> >  })
> > 
> > +;; Return 1 if the operand is a special IEEE 128-bit value that can be 
> > loaded
> > +;; via the LXVKQ instruction.
> > +
> > +(define_predicate "easy_vector_constant_ieee128"
> > +  (match_code "const_vector,const_double")
> > +{
> > +  vec_const_128bit_type vsx_const;
> > +
> > +  /* Can we generate the LXVKQ instruction?  */
> > +  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
> > +  || !TARGET_VSX)
> > +return false;
> 
> Presumably all of the checks there are valid.  (Can we have power10
> without float128_hw or ieee128_constant flags set?)I do notice the
> addition of an ieee128_constant flag below.

Yes, we can have power10 without float128_hw.  At the moment, 32-bit big endian
does not enable the 128-bit IEEE instructions.  Also when we are building the
bits in libgcc that can switch between compiling the software routines and the
routines used for IEEE hardware, and when we are building the IEEE 128-bit
software emulation functions we need to explicitly turn off IEEE 128-bit
hardware support.

Similarly for VSX, if the user explicitly says -mno-vsx, then we can't enable
this instruction.

> Ok.  I did look at this a bit before it clicked, so would suggest a
> comment stl "All of the constants that can be loaded by lxvkq will have
> zero in the bottom 3 words, so ensure those are zero before we use a
> switch based on the nonzero portion of the constant."
> 
> It would be fine as-is too.  :-)

Ok.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[r12-4931 Regression] FAIL: libgomp.fortran/examples-4/simd-6.f90 -Os (test for excess errors) on Linux/x86_64

2021-11-05 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

33f1d038708a793a498076c8647165613ec90661 is the first bad commit
commit 33f1d038708a793a498076c8647165613ec90661
Author: Richard Biener 
Date:   Wed Oct 27 13:14:41 2021 +0200

First refactor of vect_analyze_loop

caused

FAIL: gfortran.dg/alloc_comp_assign_2.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler 
error)
FAIL: gfortran.dg/alloc_comp_assign_2.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
errors)
FAIL: gfortran.dg/alloc_comp_assign_2.f90   -O3 -g  (internal compiler error)
FAIL: gfortran.dg/alloc_comp_assign_2.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/pr101267.f90   -O  (internal compiler error)
FAIL: gfortran.dg/pr101267.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr79315.f90   -O  (internal compiler error)
FAIL: gfortran.dg/pr79315.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr81175.f   -O  (internal compiler error)
FAIL: gfortran.dg/pr81175.f   -O  (test for excess errors)
FAIL: gfortran.dg/pr98974.F90   -O  (internal compiler error)
FAIL: gfortran.dg/pr98974.F90   -O  (test for excess errors)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O1  (internal compiler error)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O1  (test for excess errors)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O2  (internal compiler error)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O2  (test for excess errors)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler 
error)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
errors)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O3 -g  (internal compiler error)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -O3 -g  (test for excess errors)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -Os  (internal compiler error)
FAIL: libgomp.fortran/examples-4/simd-6.f90   -Os  (test for excess errors)

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r12-4931/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/alloc_comp_assign_2.f90 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/alloc_comp_assign_2.f90 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/pr101267.f90 --target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/pr101267.f90 --target_board='unix{-m32\ 
-march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gfortran.dg/pr79315.f90 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gfortran.dg/pr81175.f 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gfortran.dg/pr81175.f 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gfortran.dg/pr98974.F90 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gfortran.dg/pr98974.F90 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/x86_64-linux/libgomp/testsuite && make check 
RUNTESTFLAGS="fortran.exp=libgomp.fortran/examples-4/simd-6.f90 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/x86_64-linux/libgomp/testsuite && make check 
RUNTESTFLAGS="fortran.exp=libgomp.fortran/examples-4/simd-6.f90 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


Re: [PATCH 1/5] Add XXSPLTI* and LXVKQ instructions (new data structure and function)

2021-11-05 Thread Michael Meissner via Gcc-patches
On Fri, Nov 05, 2021 at 12:01:43PM -0500, will schmidt wrote:
> On Fri, 2021-11-05 at 00:04 -0400, Michael Meissner wrote:
> > Add new constant data structure.
> > 
> > This patch provides the data structure and function to convert a
> > CONST_INT, CONST_DOUBLE, CONST_VECTOR, or VEC_DUPLICATE of a constant) to
> > an array of bytes, half-words, words, and  double words that can be loaded
> > into a 128-bit vector register.
> > 
> > The next patches will use this data structure to generate code that
> > generates load of the vector/floating point registers using the XXSPLTIDP,
> > XXSPLTIW, and LXVKQ instructions that were added in power10.
> > 
> > 2021-11-05  Michael Meissner  
> > 

Whoops, it should be meiss...@linux.ibm.com.

> comment to be explicit on the structure name being copied to/from.
> (vec_const_128bit_type is easy to search for, vector or constant or
> structure are not as unique)

Yes, the original name was more generic (rs6000_const).  Originally it could
potentially handle vector constants that were greater than 128-bits if we ever
have support for larger vectors.  But I thought that extra generallity hindered
the code (since you had to check whether the size was exactly 128-bits, etc.).
So I made the data structure tailored to the problem at hand.

> > +
> > +/* Copy an floating point constant to the vector constant structure.  */
> > +
> 
> s/an/a/

Ok.

> > +static void
> > +constant_fp_to_128bit_vector (rtx op,
> > + machine_mode mode,
> > + size_t byte_num,
> > + vec_const_128bit_type *info)
> > +{
> > +  unsigned bitsize = GET_MODE_BITSIZE (mode);
> > +  unsigned num_words = bitsize / 32;
> > +  const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
> > +  long real_words[VECTOR_128BIT_WORDS];
> > +
> > +  /* Make sure we don't overflow the real_words array and that it is
> > + filled completely.  */
> > +  gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
> 
> Not clear to me on the potential to partially fill the real_words
> array. 

At the moment we don't support a 16-bit floating point type in the compiler
(the Power10 has limited 16-bit floating point support, but we don't make a
special type for it).  If/when we add the 16-bit floating point, we will
possibly need to revisit this.

> > +
> > +  real_to_target (real_words, rtype, mode);
> > +
> > +  /* Iterate over each 32-bit word in the floating point constant.  The
> > + real_to_target function puts out words in endian fashion.  We need
> 
> Meaning host-endian fashion, or is that meant to be big-endian ? 

Real_to_target puts out the 32-bit values in endian fashion.  This data
structure wants to hold everything in big endian fashion to make checking
things simpler.

> Perhaps also rephrase or move the comment up to indicate that
> real_to_target will have placed or has already placed the words in
>  endian fashion.
> As stated I was expecting to see a call to real_to_target() below the
> comment. 

Yes, I probably should move the real_to_target call after the comment.

> > +
> > +  /* Possibly splat the constant to fill a vector size.  */
> 
> 
> Suggest "Splat the constant to fill a vector size if ..."

Ok.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH v2] libstdc++: Add support for POWER9 DARN instruction to std::random_device

2021-11-05 Thread Jonathan Wakely via Gcc-patches
On Fri, 5 Nov 2021 at 13:20, Bill Schmidt wrote:

>
> On 11/5/21 7:44 AM, Jonathan Wakely wrote:
> > On Thu, 4 Nov 2021 at 20:44, Bill Schmidt wrote: For posterity:  This
> was discussed briefly on IRC, and Segher approved with some simplifications
> and a request to implement a fail/retry check. Here's what I have now. No
> more assembler check
> > On Thu, 4 Nov 2021 at 20:44, Bill Schmidt wrote:
> >
> > For posterity:  This was discussed briefly on IRC, and Segher
> approved with some
> > simplifications and a request to implement a fail/retry check.
> >
> >
> > Here's what I have now. No more assembler check in configure, and it
> uses the 64-bit __builtin_darn() and truncates it to 32-bit, or retries (up
> to 100 times) if it fails.
> >
> > I'm doing some more testing now.
> >
> Those changes look good from my perspective.  Thanks again for this work!
> (Not a maintainer, blah blah...)
>

I changed the number of retries to 10 and pushed it to trunk.


Re: [PATCH,Fortran] Fortran: Delete unused decl in gfortran.h

2021-11-05 Thread Mikael Morin

Le 27/10/2021 à 23:11, Bernhard Reutner-Fischer via Fortran a écrit :

Delete some more declarations without definitions and make some
functions static.
Bootstrapped and regtested on x86_64-unknown-linux without regressions.
Ok for trunk?


Ok.
Thanks

Mikael


[committed] libstdc++: Add [[unlikely]] attributes to std::random_device routines

2021-11-05 Thread Jonathan Wakely via Gcc-patches
Tested x86+64-linux, pushed to trunk.


libstdc++-v3/ChangeLog:

* src/c++11/random.cc (__x86_rdrand, __x86_rdseed): Add
[[unlikely]] attribute.
---
 libstdc++-v3/src/c++11/random.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/src/c++11/random.cc b/libstdc++-v3/src/c++11/random.cc
index 55a73c51fca..4b88818646f 100644
--- a/libstdc++-v3/src/c++11/random.cc
+++ b/libstdc++-v3/src/c++11/random.cc
@@ -97,7 +97,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
   unsigned int retries = 100;
   unsigned int val;
 
-  while (__builtin_ia32_rdrand32_step(&val) == 0)
+  while (__builtin_ia32_rdrand32_step(&val) == 0) [[__unlikely__]]
if (--retries == 0)
  std::__throw_runtime_error(__N("random_device: rdrand failed"));
 
@@ -113,7 +113,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
   unsigned int retries = 100;
   unsigned int val;
 
-  while (__builtin_ia32_rdseed_si_step(&val) == 0)
+  while (__builtin_ia32_rdseed_si_step(&val) == 0) [[__unlikely__]]
{
  if (--retries == 0)
{
-- 
2.31.1



[committed] libstdc++: Support getentropy and arc4random in std::random_device

2021-11-05 Thread Jonathan Wakely via Gcc-patches
This adds additional "getentropy" and "arc4random" tokens to
std::random_device. The former is supported on Glibc and OpenBSD (and
apparently wasm), and the latter is supported on various BSDs.

I'm trying to test this on OpenBSD but I can't bootstrap GCC using the
system clang.


libstdc++-v3/ChangeLog:

* acinclude.m4 (GLIBCXX_CHECK_GETENTROPY, GLIBCXX_CHECK_ARC4RANDOM):
Define.
* configure.ac (GLIBCXX_CHECK_GETENTROPY, GLIBCXX_CHECK_ARC4RANDOM):
Use them.
* config.h.in: Regenerate.
* configure: Regenerate.
* src/c++11/random.cc (random_device): Add getentropy and
arc4random as sources.
* testsuite/26_numerics/random/random_device/cons/token.cc:
Check new tokens.
* testsuite/26_numerics/random/random_device/entropy.cc:
Likewise.
---
 libstdc++-v3/acinclude.m4 |  46 
 libstdc++-v3/config.h.in  |   6 +
 libstdc++-v3/configure| 103 ++
 libstdc++-v3/configure.ac |   4 +
 libstdc++-v3/src/c++11/random.cc  |  69 +++-
 .../random/random_device/cons/token.cc|   1 +
 .../random/random_device/entropy.cc   |   7 ++
 7 files changed, 234 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 90ecc4a87a2..497af5723e1 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4830,6 +4830,52 @@ AC_DEFUN([GLIBCXX_CHECK_EXCEPTION_PTR_SYMVER], [
   fi
 ])
 
+dnl
+dnl Check whether getentropy is present in .
+dnl
+AC_DEFUN([GLIBCXX_CHECK_GETENTROPY], [
+
+  AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  AC_MSG_CHECKING([for getentropy])
+  AC_CACHE_VAL(glibcxx_cv_getentropy, [
+  AC_TRY_COMPILE(
+   [#include ],
+   [unsigned i;
+::getentropy(&i, sizeof(i));],
+   [glibcxx_cv_getentropy=yes], [glibcxx_cv_getentropy=no])
+])
+
+  if test $glibcxx_cv_getentropy = yes; then
+AC_DEFINE(HAVE_GETENTROPY, 1, [Define if getentropy is available in 
.])
+  fi
+  AC_MSG_RESULT($glibcxx_cv_getentropy)
+  AC_LANG_RESTORE
+])
+
+dnl
+dnl Check whether arc4random is present in .
+dnl
+AC_DEFUN([GLIBCXX_CHECK_ARC4RANDOM], [
+
+  AC_LANG_SAVE
+  AC_LANG_CPLUSPLUS
+  AC_MSG_CHECKING([for arc4random])
+  AC_CACHE_VAL(glibcxx_cv_arc4random, [
+  AC_TRY_COMPILE(
+   [#include ],
+   [unsigned i = ::arc4random();],
+   [glibcxx_cv_arc4random=yes], [glibcxx_cv_arc4random=no])
+])
+
+  if test $glibcxx_cv_arc4random = yes; then
+AC_DEFINE(HAVE_ARC4RANDOM, 1, [Define if arc4random is available in 
.])
+  fi
+  AC_MSG_RESULT($glibcxx_cv_arc4random)
+  AC_LANG_RESTORE
+])
+
+
 # Macros from the top-level gcc directory.
 m4_include([../config/gc++filt.m4])
 m4_include([../config/tls.m4])
diff --git a/libstdc++-v3/config.h.in b/libstdc++-v3/config.h.in
index 228a758325e..420021fcb1a 100644
--- a/libstdc++-v3/config.h.in
+++ b/libstdc++-v3/config.h.in
@@ -9,6 +9,9 @@
 /* Define to 1 if you have the `aligned_alloc' function. */
 #undef HAVE_ALIGNED_ALLOC
 
+/* Define if arc4random is available in . */
+#undef HAVE_ARC4RANDOM
+
 /* Define to 1 if you have the  header file. */
 #undef HAVE_ARPA_INET_H
 
@@ -132,6 +135,9 @@
 /* Define to 1 if you have the `frexpl' function. */
 #undef HAVE_FREXPL
 
+/* Define if getentropy is available in . */
+#undef HAVE_GETENTROPY
+
 /* Define if _Unwind_GetIPInfo is available. */
 #undef HAVE_GETIPINFO
 
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index c1aea827070..21371031b66 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -75429,6 +75429,109 @@ $as_echo "#define _GLIBCXX_X86_RDSEED 1" >>confdefs.h
 $as_echo "$ac_cv_x86_rdseed" >&6; }
 
 
+# Check for other random number APIs
+
+
+
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS 
conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for getentropy" >&5
+$as_echo_n "checking for getentropy... " >&6; }
+  if ${glibcxx_cv_getentropy+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include 
+int
+main ()
+{
+unsigned i;
+::getentropy(&i, sizeof(i));
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  glibcxx_cv_getentropy=yes
+else
+  glibcxx_cv_getentropy=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+
+
+  if test $glibcxx_cv_getentropy = yes; then
+
+$as_echo "#define HAVE_GETENTROPY 1" >>confdefs.h
+
+  fi
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_getentropy" >&5
+$as_echo "$glibcxx_cv_getentropy" >&6; }
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeex

Re: [committed] libstdc++: Support getentropy and arc4random in std::random_device

2021-11-05 Thread Jonathan Wakely via Gcc-patches
Oops sorry - this is NOT committed yet. I won't push it until I've tested
it on at least one BSD, preferably OpenBSD so I can test parts of the new
code.


On Fri, 5 Nov 2021 at 18:21, Jonathan Wakely via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> This adds additional "getentropy" and "arc4random" tokens to
> std::random_device. The former is supported on Glibc and OpenBSD (and
> apparently wasm), and the latter is supported on various BSDs.
>
> I'm trying to test this on OpenBSD but I can't bootstrap GCC using the
> system clang.
>
>
> libstdc++-v3/ChangeLog:
>
> * acinclude.m4 (GLIBCXX_CHECK_GETENTROPY,
> GLIBCXX_CHECK_ARC4RANDOM):
> Define.
> * configure.ac (GLIBCXX_CHECK_GETENTROPY,
> GLIBCXX_CHECK_ARC4RANDOM):
> Use them.
> * config.h.in: Regenerate.
> * configure: Regenerate.
> * src/c++11/random.cc (random_device): Add getentropy and
> arc4random as sources.
> * testsuite/26_numerics/random/random_device/cons/token.cc:
> Check new tokens.
> * testsuite/26_numerics/random/random_device/entropy.cc:
> Likewise.
> ---
>  libstdc++-v3/acinclude.m4 |  46 
>  libstdc++-v3/config.h.in  |   6 +
>  libstdc++-v3/configure| 103 ++
>  libstdc++-v3/configure.ac |   4 +
>  libstdc++-v3/src/c++11/random.cc  |  69 +++-
>  .../random/random_device/cons/token.cc|   1 +
>  .../random/random_device/entropy.cc   |   7 ++
>  7 files changed, 234 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
> index 90ecc4a87a2..497af5723e1 100644
> --- a/libstdc++-v3/acinclude.m4
> +++ b/libstdc++-v3/acinclude.m4
> @@ -4830,6 +4830,52 @@ AC_DEFUN([GLIBCXX_CHECK_EXCEPTION_PTR_SYMVER], [
>fi
>  ])
>
> +dnl
> +dnl Check whether getentropy is present in .
> +dnl
> +AC_DEFUN([GLIBCXX_CHECK_GETENTROPY], [
> +
> +  AC_LANG_SAVE
> +  AC_LANG_CPLUSPLUS
> +  AC_MSG_CHECKING([for getentropy])
> +  AC_CACHE_VAL(glibcxx_cv_getentropy, [
> +  AC_TRY_COMPILE(
> +   [#include ],
> +   [unsigned i;
> +::getentropy(&i, sizeof(i));],
> +   [glibcxx_cv_getentropy=yes], [glibcxx_cv_getentropy=no])
> +])
> +
> +  if test $glibcxx_cv_getentropy = yes; then
> +AC_DEFINE(HAVE_GETENTROPY, 1, [Define if getentropy is available in
> .])
> +  fi
> +  AC_MSG_RESULT($glibcxx_cv_getentropy)
> +  AC_LANG_RESTORE
> +])
> +
> +dnl
> +dnl Check whether arc4random is present in .
> +dnl
> +AC_DEFUN([GLIBCXX_CHECK_ARC4RANDOM], [
> +
> +  AC_LANG_SAVE
> +  AC_LANG_CPLUSPLUS
> +  AC_MSG_CHECKING([for arc4random])
> +  AC_CACHE_VAL(glibcxx_cv_arc4random, [
> +  AC_TRY_COMPILE(
> +   [#include ],
> +   [unsigned i = ::arc4random();],
> +   [glibcxx_cv_arc4random=yes], [glibcxx_cv_arc4random=no])
> +])
> +
> +  if test $glibcxx_cv_arc4random = yes; then
> +AC_DEFINE(HAVE_ARC4RANDOM, 1, [Define if arc4random is available in
> .])
> +  fi
> +  AC_MSG_RESULT($glibcxx_cv_arc4random)
> +  AC_LANG_RESTORE
> +])
> +
> +
>  # Macros from the top-level gcc directory.
>  m4_include([../config/gc++filt.m4])
>  m4_include([../config/tls.m4])
> diff --git a/libstdc++-v3/config.h.in b/libstdc++-v3/config.h.in
> index 228a758325e..420021fcb1a 100644
> --- a/libstdc++-v3/config.h.in
> +++ b/libstdc++-v3/config.h.in
> @@ -9,6 +9,9 @@
>  /* Define to 1 if you have the `aligned_alloc' function. */
>  #undef HAVE_ALIGNED_ALLOC
>
> +/* Define if arc4random is available in . */
> +#undef HAVE_ARC4RANDOM
> +
>  /* Define to 1 if you have the  header file. */
>  #undef HAVE_ARPA_INET_H
>
> @@ -132,6 +135,9 @@
>  /* Define to 1 if you have the `frexpl' function. */
>  #undef HAVE_FREXPL
>
> +/* Define if getentropy is available in . */
> +#undef HAVE_GETENTROPY
> +
>  /* Define if _Unwind_GetIPInfo is available. */
>  #undef HAVE_GETIPINFO
>
> diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
> index c1aea827070..21371031b66 100755
> --- a/libstdc++-v3/configure
> +++ b/libstdc++-v3/configure
> @@ -75429,6 +75429,109 @@ $as_echo "#define _GLIBCXX_X86_RDSEED 1"
> >>confdefs.h
>  $as_echo "$ac_cv_x86_rdseed" >&6; }
>
>
> +# Check for other random number APIs
> +
> +
> +
> +  ac_ext=cpp
> +ac_cpp='$CXXCPP $CPPFLAGS'
> +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
> +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS
> conftest.$ac_ext $LIBS >&5'
> +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
> +
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for getentropy" >&5
> +$as_echo_n "checking for getentropy... " >&6; }
> +  if ${glibcxx_cv_getentropy+:} false; then :
> +  $as_echo_n "(cached) " >&6
> +else
> +
> +  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
> +/* end confdefs.h.  */
> +#include 
> +int
> +main ()
> +{
> +unsigned i;
> +::getentropy(&i, sizeof(i));
> +  ;
> +  return 0;
> +}
> +_ACEOF
> +if a

Re: [PATCH,FORTRAN] Fix memory leak in finalization wrappers

2021-11-05 Thread Mikael Morin

Le 29/10/2021 à 01:58, Bernhard Reutner-Fischer via Fortran a écrit :

On Wed, 27 Oct 2021 23:39:43 +0200
Bernhard Reutner-Fischer  wrote:


Ping
[hmz. it's been a while, I'll rebase and retest this one.
Ok if it passes?]

Testing passed without any new regressions.
Ok for trunk?
thanks,


On Mon, 15 Oct 2018 10:23:06 +0200
Bernhard Reutner-Fischer  wrote:


If a finalization is not required we created a namespace containing
formal arguments for an internal interface definition but never used
any of these. So the whole sub_ns namespace was not wired up to the
program and consequently was never freed. The fix is to simply not
generate any finalization wrappers if we know that it will be unused.
Note that this reverts back to the original r190869
(8a96d64282ac534cb597f446f02ac5d0b13249cc) handling for this case
by reverting this specific part of r194075
(f1ee56b4be7cc3892e6ccc75d73033c129098e87) for PR fortran/37336.


I’m a bit concerned by the loss of the null_expr’s type interface.
I can’t convince myself that it’s either absolutely necessary or 
completely useless.
Tobias didn’t include a test in his commit unfortunately, but I bet he 
did the change on purpose.
Don’t you get the same effect on the memory leaks if you keep just the 
following hunk?


>>> @@ -1605,8 +1608,7 @@ generate_finalization_wrapper (gfc_symbol 
*derived, gfc_namespace *ns,

>>> /* Set up the namespace.  */
>>> sub_ns = gfc_get_namespace (ns, 0);
>>> sub_ns->sibling = ns->contained;
>>> -  if (!expr_null_wrapper)
>>> -ns->contained = sub_ns;
>>> +  ns->contained = sub_ns;
>>> sub_ns->resolved = 1;
>>>
>>> /* Set up the procedure symbol.  */


The rest of the changes (appart from class.c) are mostly OK with the nit 
below and should be put in their own commit.


>>> @@ -3826,10 +3828,8 @@ free_tb_tree (gfc_symtree *t)
>>>
>>> free_tb_tree (t->left);
>>> free_tb_tree (t->right);
>>> -
>>> -  /* TODO: Free type-bound procedure structs themselves; probably 
needs some

>>> - sort of ref-counting mechanism.  */
>>> free (t->n.tb);

Please keep a comment; it remains somehow valid but could be updated 
maybe: gfc_typebound_proc’s u.generic field for example is nowhere freed 
as far as I know.


Thanks.

Mikael


Re: [PATCH v4] attribs: Implement -Wno-attributes=vendor::attr [PR101940]

2021-11-05 Thread Jason Merrill via Gcc-patches

On 9/28/21 16:20, Marek Polacek wrote:

On Thu, Sep 23, 2021 at 02:25:16PM -0400, Jason Merrill wrote:

On 9/20/21 18:59, Marek Polacek via Gcc-patches wrote:

+  handle_ignored_attributes_option (&v);
+  /* ??? We can't free (args); here.  */


Perhaps we want to copy strings in handle_ignored_attributes_option rather
than here?


Well, the other use doesn't need copying, so I left it be.


But the other use is modifying the strings passed on the command line, 
which also seems questionable.  I think it would be better for 
handle_ignored_attributes_option to copy the relevant pieces out.


The patch looks good outside of this issue.

Jason



Re: [PATCH 3/5] Add Power10 XXSPLTIW

2021-11-05 Thread will schmidt via Gcc-patches
On Fri, 2021-11-05 at 00:09 -0400, Michael Meissner wrote:
> Generate XXSPLTIW on power10.
> 

Hi,


> This patch adds support to automatically generate the ISA 3.1 XXSPLTIW
> instruction for V8HImode, V4SImode, and V4SFmode vectors.  It does this by
> adding support for vector constants that can be used, and adding a
> VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction.
> 
> The eP constraint was added to recognize constants that can be loaded into
> vector registers with a single prefixed instruction.

Perhaps Swap "... the eP constraint was added ..."  for "Add the eP
constraint to ..."


> 
> I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector
> constants.


> 
> 2021-11-05  Michael Meissner  
> 
> gcc/
> 
>   * config/rs6000/constraints.md (eP): Update comment.
>   * config/rs6000/predicates.md (easy_fp_constant): Add support for
>   generating XXSPLTIW.
>   (vsx_prefixed_constant): New predicate.
>   (easy_vector_constant): Add support for
>   generating XXSPLTIW.
>   * config/rs6000/rs6000-protos.h (prefixed_xxsplti_p): New
>   declaration.
>   (constant_generates_xxspltiw): Likewise.
>   * config/rs6000/rs6000.c (xxspltib_constant_p): If we can generate
>   XXSPLTIW, don't do XXSPLTIB and sign extend.

Perhaps just 'generate XXSPLTIW if possible'.  

>   (output_vec_const_move): Add support for XXSPLTIW.
>   (prefixed_xxsplti_p): New function.
>   (constant_generates_xxspltiw): New function.
>   * config/rs6000/rs6000.md (prefixed attribute): Add support to
>   mark XXSPLTI* instructions as being prefixed.
>   * config/rs6000/rs6000.opt (-msplat-word-constant): New debug
>   switch.
>   * config/rs6000/vsx.md (vsx_mov_64bit): Add support for
>   generating XXSPLTIW or XXSPLTIDP.
>   (vsx_mov_32bit): Likewise.
>   * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
>   eP constraint.
> 
> gcc/testsuite/
> 
>   * gcc.target/powerpc/vec-splat-constant-v16qi.c: New test.
>   * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test.
>   * gcc.target/powerpc/vec-splat-constant-v4si.c: New test.
>   * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test.
>   * gcc.target/powerpc/vec-splati-runnable.c: Update insn count.
> ---
>  gcc/config/rs6000/constraints.md  |  6 ++
>  gcc/config/rs6000/predicates.md   | 46 ++-
>  gcc/config/rs6000/rs6000-protos.h |  2 +
>  gcc/config/rs6000/rs6000.c| 81 +++
>  gcc/config/rs6000/rs6000.md   |  5 ++
>  gcc/config/rs6000/rs6000.opt  |  4 +
>  gcc/config/rs6000/vsx.md  | 28 +++
>  gcc/doc/md.texi   |  4 +
>  .../powerpc/vec-splat-constant-v16qi.c| 27 +++
>  .../powerpc/vec-splat-constant-v4sf.c | 67 +++
>  .../powerpc/vec-splat-constant-v4si.c | 51 
>  .../powerpc/vec-splat-constant-v8hi.c | 62 ++
>  .../gcc.target/powerpc/vec-splati-runnable.c  |  4 +-
>  13 files changed, 369 insertions(+), 18 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c
> 
> diff --git a/gcc/config/rs6000/constraints.md 
> b/gcc/config/rs6000/constraints.md
> index e72132b4c28..a4b05837fa6 100644
> --- a/gcc/config/rs6000/constraints.md
> +++ b/gcc/config/rs6000/constraints.md
> @@ -213,6 +213,12 @@ (define_constraint "eI"
>"A signed 34-bit integer constant if prefixed instructions are supported."
>(match_operand 0 "cint34_operand"))
> 
> +;; A SF/DF scalar constant or a vector constant that can be loaded into 
> vector
> +;; registers with one prefixed instruction such as XXSPLTIDP or XXSPLTIW.
> +(define_constraint "eP"
> +  "A constant that can be loaded into a VSX register with one prefixed insn."
> +  (match_operand 0 "vsx_prefixed_constant"))
> +
>  ;; A TF/KF scalar constant or a vector constant that can load certain IEEE
>  ;; 128-bit constants into vector registers using LXVKQ.
>  (define_constraint "eQ"
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index e0d1c718e9f..ed6252bd0c4 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -605,7 +605,10 @@ (define_predicate "easy_fp_constant"
>vec_const_128bit_type vsx_const;
>if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
>  {
> -  if (constant_generates_lxvkq (&vsx_const) != 0)
> +  if (constant_generates_lxvkq (&vsx_const))
> + return true;
> +
> +  if (constant_generates_xxspltiw (&vsx_const))
>   return true;
>  }
> 

o

Re: [PATCH 4/5] Add Power10 XXSPLTIDP for vector constants

2021-11-05 Thread will schmidt via Gcc-patches
On Fri, 2021-11-05 at 00:10 -0400, Michael Meissner wrote:
> Generate XXSPLTIDP for vectors on power10.
> 
> This patch implements XXSPLTIDP support for all vector constants.  The
> XXSPLTIDP instruction is given a 32-bit immediate that is converted to a 
> vector
> of two DFmode constants.  The immediate is in SFmode format, so only constants
> that fit as SFmode values can be loaded with XXSPLTIDP.
> 
> The constraint (eP) added in the previous patch for XXSPLTIW is also used
> for XXSPLTIDP.
> 

ok


> DImode scalar constants are not handled.  This is due to the majority of 
> DImode
> constants will be in the GPR registers.  With vector registers, you have the
> problem that XXSPLTIDP splats the double word into both elements of the
> vector.  However, if TImode is loaded with an integer constant, it wants a 
> full
> 128-bit constant.

This may be worth as adding to a todo somewhere in the code.

> 
> SFmode and DFmode scalar constants are not handled in this patch.  The
> support for for those constants will be in the next patch.

ok

> 
> I have added a temporary switch (-msplat-float-constant) to control whether or
> not the XXSPLTIDP instruction is generated.
> 
> I added 2 new tests to test loading up V2DI and V2DF vector constants.




> 
> 2021-11-05  Michael Meissner  
> 
> gcc/
> 
>   * config/rs6000/predicates.md (easy_fp_constant): Add support for
>   generating XXSPLTIDP.
>   (vsx_prefixed_constant): Likewise.
>   (easy_vector_constant): Likewise.
>   * config/rs6000/rs6000-protos.h (constant_generates_xxspltidp):
>   New declaration.
>   * config/rs6000/rs6000.c (output_vec_const_move): Add support for
>   generating XXSPLTIDP.
>   (prefixed_xxsplti_p): Likewise.
>   (constant_generates_xxspltidp): New function.
>   * config/rs6000/rs6000.opt (-msplat-float-constant): New debug option.
> 
> gcc/testsuite/
> 
>   * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
>   regex for power10.
>   * gcc.target/powerpc/vec-splat-constant-v2df.c: New test.
>   * gcc.target/powerpc/vec-splat-constant-v2di.c: New test.
> ---


ok

>  gcc/config/rs6000/predicates.md   |   9 ++
>  gcc/config/rs6000/rs6000-protos.h |   1 +
>  gcc/config/rs6000/rs6000.c| 108 ++
>  gcc/config/rs6000/rs6000.opt  |   4 +
>  .../powerpc/pr86731-fwrapv-longlong.c |   9 +-
>  .../powerpc/vec-splat-constant-v2df.c |  64 +++
>  .../powerpc/vec-splat-constant-v2di.c |  50 
>  7 files changed, 241 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c
> 
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index ed6252bd0c4..d748b11857c 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -610,6 +610,9 @@ (define_predicate "easy_fp_constant"
> 
>if (constant_generates_xxspltiw (&vsx_const))
>   return true;
> +
> +  if (constant_generates_xxspltidp (&vsx_const))
> + return true;
>  }
> 
>/* Otherwise consider floating point constants hard, so that the
> @@ -653,6 +656,9 @@ (define_predicate "vsx_prefixed_constant"
>if (constant_generates_xxspltiw (&vsx_const))
>  return true;
> 
> +  if (constant_generates_xxspltidp (&vsx_const))
> +return true;
> +
>return false;
>  })
> 
> @@ -727,6 +733,9 @@ (define_predicate "easy_vector_constant"
> 
> if (constant_generates_xxspltiw (&vsx_const))
>   return true;
> +
> +   if (constant_generates_xxspltidp (&vsx_const))
> + return true;
>   }


ok

> 
>if (TARGET_P9_VECTOR
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 99c6a671289..2d28df7442d 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
>  vec_const_128bit_type *);
>  extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
>  extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
> +extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
>  #endif /* RTX_CODE */
> 
>  #ifdef TREE_CODE
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index be24f56eb31..8fde48cf2b3 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -7012,6 +7012,13 @@ output_vec_const_move (rtx *operands)
> operands[2] = GEN_INT (imm);
> return "xxspltiw %x0,%2";
>   }
> +
> +   imm = constant_generates_xxspltidp (&vsx_const);
> +   if (imm)


Just a nit that the two lines could be combined into a similar form
as used elsewhere as ...
if (constant_generates_xxsp

  1   2   >