Re: [PATCH 7/8] vect: Support multiple lane-reducing operations for loop reduction [PR114440]

2024-07-19 Thread Rainer Orth
Richard Biener  writes:

>> gcc/testsuite/
>> PR tree-optimization/114440
>> * gcc.dg/vect/vect-reduc-chain-1.c
>> * gcc.dg/vect/vect-reduc-chain-2.c
>> * gcc.dg/vect/vect-reduc-chain-3.c
>> * gcc.dg/vect/vect-reduc-chain-dot-slp-1.c
>> * gcc.dg/vect/vect-reduc-chain-dot-slp-2.c
>> * gcc.dg/vect/vect-reduc-chain-dot-slp-3.c
>> * gcc.dg/vect/vect-reduc-chain-dot-slp-4.c
>> * gcc.dg/vect/vect-reduc-dot-slp-1.c

This part of the ChangeLog is incomplete.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH v1] Internal-fn: Only allow modes describe types for internal fn[PR115961]

2024-07-19 Thread Richard Sandiford
pan2...@intel.com writes:
> From: Pan Li 
>
> The direct_internal_fn_supported_p has no restrictions for the type
> modes.  For example the bitfield like below will be recog as .SAT_TRUNC.
>
> struct e
> {
>   unsigned pre : 12;
>   unsigned a : 4;
> };
>
> __attribute__((noipa))
> void bug (e * v, unsigned def, unsigned use) {
>   e & defE = *v;
>   defE.a = min_u (use + 1, 0xf);
> }
>
> This patch would like to add checks for the direct_internal_fn_supported_p,
> and only allows the tree types describled by modes.
>
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.
>
>   PR target/115961
>
> gcc/ChangeLog:
>
>   * internal-fn.cc (mode_describle_type_precision_p): Add new func
>   impl to check if mode describle the tree type.
>   (direct_internal_fn_supported_p): Add above check for the first
>   and second tree type of tree pair.
>
> gcc/testsuite/ChangeLog:
>
>   * g++.target/i386/pr115961-run-1.C: New test.
>   * g++.target/riscv/rvv/base/pr115961-run-1.C: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/internal-fn.cc| 21 
>  .../g++.target/i386/pr115961-run-1.C  | 34 +++
>  .../riscv/rvv/base/pr115961-run-1.C   | 34 +++
>  3 files changed, 89 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr115961-run-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 95946bfd683..4dc69264a24 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4164,6 +4164,23 @@ direct_internal_fn_optab (internal_fn fn)
>gcc_unreachable ();
>  }
>  
> +/* Return true if the mode describes the precision of tree type,  or false.  
> */
> +
> +static bool
> +mode_describle_type_precision_p (const_tree type)

Bit pedantic, but it's not really just about precision.  For floats
and vectors it's also about format.  Maybe:

/* Return true if TYPE's mode has the same format as TYPE, and if there is
   a 1:1 correspondence between the values that the mode can store and the
   values that the type can store.  */

And maybe my mode_describes_type_p suggestion wasn't the best,
but given that it's not just about precision, I'm not sure about
mode_describle_type_precision_p either.  How about:

  type_strictly_matches_mode_p

?  I'm open to other suggestions.

> +{
> +  if (VECTOR_TYPE_P (type))
> +return VECTOR_MODE_P (TYPE_MODE (type));
> +
> +  if (INTEGRAL_TYPE_P (type))
> +return type_has_mode_precision_p (type);
> +
> +  if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type))
> +return true;
> +
> +  return false;
> +}
> +
>  /* Return true if FN is supported for the types in TYPES when the
> optimization type is OPT_TYPE.  The types are those associated with
> the "type0" and "type1" fields of FN's direct_internal_fn_info
> @@ -4173,6 +4190,10 @@ bool
>  direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
>   optimization_type opt_type)
>  {
> +  if (!mode_describle_type_precision_p (types.first)
> +|| !mode_describle_type_precision_p (types.second))

Formatting nit: the "||" should line up with the "!".

LGTM otherwise.

Thanks,
Richard

> +return false;
> +
>switch (fn)
>  {
>  #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
> diff --git a/gcc/testsuite/g++.target/i386/pr115961-run-1.C 
> b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
> new file mode 100644
> index 000..b8c8aef3b17
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
> @@ -0,0 +1,34 @@
> +/* PR target/115961 */
> +/* { dg-do run } */
> +/* { dg-options "-O3 -fdump-rtl-expand-details" } */
> +
> +struct e
> +{
> +  unsigned pre : 12;
> +  unsigned a : 4;
> +};
> +
> +static unsigned min_u (unsigned a, unsigned b)
> +{
> +  return (b < a) ? b : a;
> +}
> +
> +__attribute__((noipa))
> +void bug (e * v, unsigned def, unsigned use) {
> +  e & defE = *v;
> +  defE.a = min_u (use + 1, 0xf);
> +}
> +
> +__attribute__((noipa, optimize(0)))
> +int main(void)
> +{
> +  e v = { 0xded, 3 };
> +
> +  bug(&v, 32, 33);
> +
> +  if (v.a != 0xf)
> +__builtin_abort ();
> +
> +  return 0;
> +}
> +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
> diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C 
> b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
> new file mode 100644
> index 000..b8c8aef3b17
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
> @@ -0,0 +1,34 @@
> +/* PR target/115961 */
> +/* { dg-do run } */
> +/* { dg-options "-O3 -fdump-rtl-expand-details" } */
> +
> +struct e
> +{
> +  unsigned pre : 12;
> +  unsigned a : 4;
> +};
> +
> +static unsigned min_u (unsigned a, unsigned b)
> +{
> +  return (b < a) ? b : a;
> +}
> +
> +__attribu

Re: [r15-2135 Regression] FAIL: libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable -Os at line 32 (test for warnings, line 31) on Linux/x86_64

2024-07-19 Thread Thomas Schwinge
Hi!

First, note this is now GCC PR115989
"[15 regression] libgomp.oacc-fortran/privatized-ref-2.f90 fails after 
r15-2135-gc3aa339ea50f05".

Otherwise:

On 2024-07-19T06:54:46+0100, Paul Richard Thomas 
 wrote:
> Thanks for doing that test. Here is what the error looks like on 14-branch:
> libgomp.oacc-fortran/privatized-ref-2.f90:36:22:
>36 |   A = [(3*j, j=1, 10)]
>   |  ^
> Warning: ‘a.offset’ is used uninitialized [-Wuninitialized]
> libgomp.oacc-fortran/privatized-ref-2.f90:31:30:
>31 |   integer, allocatable :: A(:)
>   |  ^
> note: ‘a’ declared here
> libgomp.oacc-fortran/privatized-ref-2.f90:36:22:
> repeats for the descriptor bounds.
>
> The scalarizer, which sets up the loops for the assignment of 'A' assigns
> the bounds and offset to variables. These are then manipulated further and
> used for the loop bounds and allocation. The patch does a once off setting
> of the bounds, to eliminate the bogus warnings. The allocate statement
> already does this.

Maybe you're already aware, but if not, please have a look how PR108889
(Paul's commit r15-2135-gc3aa339ea50f050caf7ed2e497f5499ec2d7b9cc
"Fortran: Suppress bogus used uninitialized warnings [PR108889]") relates
to "PR77504 etc." as mentioned in
'libgomp.oacc-fortran/privatized-ref-2.f90'?

> I will patch appropriately just as soon as I am able.

Next, the proposed patch:

> On Fri, 19 Jul 2024 at 02:59, Jiang, Haochen 
> wrote:
>> Just did a quick test. Correct myself previously. Those lines also
>> needs to be removed since they are XPASS now.
>>
>> However the real issue is the dg-note at Line 32, that is the warning
>> disappeared.
>>
>> diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90 
>> b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
>> index 498ef70b63a..8cf79a10e8d 100644
>> --- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
>> +++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
>> @@ -29,16 +29,10 @@ program main
>>implicit none (type, external)
>>integer :: j
>>integer, allocatable :: A(:)
>> -  ! { dg-note {'a' declared here} {} { target *-*-* } .-1 }
>>character(len=:), allocatable :: my_str
>>character(len=15), allocatable :: my_str15
>>
>>A = [(3*j, j=1, 10)]
>> -  ! { dg-bogus {'a\.offset' is used uninitialized} {PR77504 etc.} { xfail 
>> *-*-* } .-1 }
>> -  ! { dg-bogus {'a\.dim\[0\]\.lbound' is used uninitialized} {PR77504 etc.} 
>> { xfail *-*-* } .-2 }
>> -  ! { dg-bogus {'a\.dim\[0\]\.ubound' is used uninitialized} {PR77504 etc.} 
>> { xfail *-*-* } .-3 }
>> -  ! { dg-bogus {'a\.dim\[0\]\.lbound' may be used uninitialized} {PR77504 
>> etc.} { xfail { ! __OPTIMIZE__ } } .-4 }
>> -  ! { dg-bogus {'a\.dim\[0\]\.ubound' may be used uninitialized} {PR77504 
>> etc.} { xfail { ! __OPTIMIZE__ } } .-5 }
>>call foo (A, size(A))
>>call bar (A)
>>my_str = "1234567890"
>>
>> After the change, all the tests are passed. However, is that right?

... looks exactly right to me.  Please push.


Grüße
 Thomas


>> I am not familiar with either Fortran or libgomp, but the warning
>> like something declared here which might report variable declaration
>> conflict seems needed.
>>
>> Thx,
>> Haochen
>>
>> *From:* Jiang, Haochen
>> *Sent:* Friday, July 19, 2024 9:49 AM
>> *To:* Paul Richard Thomas 
>> *Cc:* pa...@gcc.gnu.org; gcc-regress...@gcc.gnu.org;
>> gcc-patches@gcc.gnu.org
>> *Subject:* RE: [r15-2135 Regression] FAIL:
>> libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
>> -DACC_MEM_SHARED=1 -foffload=disable -Os at line 32 (test for warnings,
>> line 31) on Linux/x86_64
>>
>>
>>
>> Hi Paul,
>>
>>
>>
>> I suspect it is not the correct way to do that, those lines are ok since
>> they are XFAIL. The problem is that specific warning test.
>>
>>
>>
>> Thx,
>>
>> Haochen
>>
>>
>>
>> *From:* Paul Richard Thomas 
>> *Sent:* Friday, July 19, 2024 12:28 AM
>> *To:* haochen.jiang 
>> *Cc:* pa...@gcc.gnu.org; gcc-regress...@gcc.gnu.org;
>> gcc-patches@gcc.gnu.org; Jiang, Haochen 
>> *Subject:* Re: [r15-2135 Regression] FAIL:
>> libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
>> -DACC_MEM_SHARED=1 -foffload=disable -Os at line 32 (test for warnings,
>> line 31) on Linux/x86_64
>>
>>
>>
>> Hi Haochen,
>>
>>
>>
>> Try removing lines 37-41 since these are precisely the bogus warnings that
>> the patch is meant to eliminate.
>>
>>
>>
>> Regards
>>
>>
>>
>> Paul
>>
>>
>>
>> On Thu, 18 Jul 2024 at 14:38, haochen.jiang 
>> wrote:
>>
>> On Linux/x86_64,
>>
>> c3aa339ea50f050caf7ed2e497f5499ec2d7b9cc is the first bad commit
>> commit c3aa339ea50f050caf7ed2e497f5499ec2d7b9cc
>> Author: Paul Thomas 
>> Date:   Thu Jul 18 08:51:35 2024 +0100
>>
>> Fortran: Suppress bogus used uninitialized warnings [PR108889].
>>
>> caused
>>
>> FAIL: libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
>> -DACC_MEM_SHARED=1 -foffload=disable  -O0   at line 32

[PATCH] s390: testsuite: Fix vcond-shift.c

2024-07-19 Thread Stefan Schulze Frielinghaus
Previously we optimized expressions of the form a < 0 ? -1 : 0 to
(signed)a >> 31 during vcond expanding.  Since r15-1741-g2ccdd0f22312a1
this is done in match.pd.  The implementation in the back end as well as
in match.pd are basically the same but still distinct.  For the tests in
vcond-shift.c the back end emitted

  (xx - (xx >> 31)) >> 1

whereas now via match.pd

  ((int) ((unsigned int) xx >> 31) + xx) >> 1

which is basically the same.  We just have to adapt the scan-assembler
directives w.r.t. signed/unsigned shifts which is done by this patch.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/vcond-shift.c: Adapt to new match.pd
rule and change scan-assembler-times for shifts.
---
 Regtested on s390.  Ok for mainline?

 gcc/testsuite/gcc.target/s390/vector/vcond-shift.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c 
b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
index a6b4e97aa50..b942f44039d 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
@@ -3,13 +3,13 @@
 /* { dg-do compile { target { s390*-*-* } } } */
 /* { dg-options "-O3 -march=z13 -mzarch" } */
 
-/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
-/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
-/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
+/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 4 } } */
+/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 4 } } */
+/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 4 } } */
 /* { dg-final { scan-assembler-not "vzero\t*" } } */
-/* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
-/* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
-/* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
+/* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 6 } } */
+/* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 6 } } */
+/* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 6 } } */
 
 /* Make it expand to two vector operations.  */
 #define ITER(X) (2 * (16 / sizeof (X[1])))
-- 
2.45.2



Re: [r15-2135 Regression] FAIL: libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable -Os at line 32 (test for warnings, line 31) on Linux/x86_64

2024-07-19 Thread Paul Richard Thomas
Hi Haochen,

Thanks for doing that test. Here is what the error looks like on 14-branch:
libgomp.oacc-fortran/privatized-ref-2.f90:36:22:
   36 |   A = [(3*j, j=1, 10)]
  |  ^
Warning: ‘a.offset’ is used uninitialized [-Wuninitialized]
libgomp.oacc-fortran/privatized-ref-2.f90:31:30:
   31 |   integer, allocatable :: A(:)
  |  ^
note: ‘a’ declared here
libgomp.oacc-fortran/privatized-ref-2.f90:36:22:
repeats for the descriptor bounds.

The scalarizer, which sets up the loops for the assignment of 'A' assigns
the bounds and offset to variables. These are then manipulated further and
used for the loop bounds and allocation. The patch does a once off setting
of the bounds, to eliminate the bogus warnings. The allocate statement
already does this.

I will patch appropriately just as soon as I am able.

Thanks again

Paul


On Fri, 19 Jul 2024 at 02:59, Jiang, Haochen 
wrote:

> Just did a quick test. Correct myself previously. Those lines also
>
> needs to be removed since they are XPASS now.
>
>
>
> However the real issue is the dg-note at Line 32, that is the warning
>
> disappeared.
>
>
>
> diff --git a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
> b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
>
> index 498ef70b63a..8cf79a10e8d 100644
>
> --- a/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
>
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/privatized-ref-2.f90
>
> @@ -29,16 +29,10 @@ program main
>
>implicit none (type, external)
>
>integer :: j
>
>integer, allocatable :: A(:)
>
> -  ! { dg-note {'a' declared here} {} { target *-*-* } .-1 }
>
>character(len=:), allocatable :: my_str
>
>character(len=15), allocatable :: my_str15
>
>
>
>A = [(3*j, j=1, 10)]
>
> -  ! { dg-bogus {'a\.offset' is used uninitialized} {PR77504 etc.} { xfail
> *-*-* } .-1 }
>
> -  ! { dg-bogus {'a\.dim\[0\]\.lbound' is used uninitialized} {PR77504
> etc.} { xfail *-*-* } .-2 }
>
> -  ! { dg-bogus {'a\.dim\[0\]\.ubound' is used uninitialized} {PR77504
> etc.} { xfail *-*-* } .-3 }
>
> -  ! { dg-bogus {'a\.dim\[0\]\.lbound' may be used uninitialized} {PR77504
> etc.} { xfail { ! __OPTIMIZE__ } } .-4 }
>
> -  ! { dg-bogus {'a\.dim\[0\]\.ubound' may be used uninitialized} {PR77504
> etc.} { xfail { ! __OPTIMIZE__ } } .-5 }
>
>call foo (A, size(A))
>
>call bar (A)
>
>my_str = "1234567890"
>
>
>
> After the change, all the tests are passed. However, is that right?
>
>
>
> I am not familiar with either Fortran or libgomp, but the warning
>
> like something declared here which might report variable declaration
>
> conflict seems needed.
>
>
>
> Thx,
>
> Haochen
>
>
>
> *From:* Jiang, Haochen
> *Sent:* Friday, July 19, 2024 9:49 AM
> *To:* Paul Richard Thomas 
> *Cc:* pa...@gcc.gnu.org; gcc-regress...@gcc.gnu.org;
> gcc-patches@gcc.gnu.org
> *Subject:* RE: [r15-2135 Regression] FAIL:
> libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
> -DACC_MEM_SHARED=1 -foffload=disable -Os at line 32 (test for warnings,
> line 31) on Linux/x86_64
>
>
>
> Hi Paul,
>
>
>
> I suspect it is not the correct way to do that, those lines are ok since
> they are XFAIL. The problem is that specific warning test.
>
>
>
> Thx,
>
> Haochen
>
>
>
> *From:* Paul Richard Thomas 
> *Sent:* Friday, July 19, 2024 12:28 AM
> *To:* haochen.jiang 
> *Cc:* pa...@gcc.gnu.org; gcc-regress...@gcc.gnu.org;
> gcc-patches@gcc.gnu.org; Jiang, Haochen 
> *Subject:* Re: [r15-2135 Regression] FAIL:
> libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
> -DACC_MEM_SHARED=1 -foffload=disable -Os at line 32 (test for warnings,
> line 31) on Linux/x86_64
>
>
>
> Hi Haochen,
>
>
>
> Try removing lines 37-41 since these are precisely the bogus warnings that
> the patch is meant to eliminate.
>
>
>
> Regards
>
>
>
> Paul
>
>
>
> On Thu, 18 Jul 2024 at 14:38, haochen.jiang 
> wrote:
>
> On Linux/x86_64,
>
> c3aa339ea50f050caf7ed2e497f5499ec2d7b9cc is the first bad commit
> commit c3aa339ea50f050caf7ed2e497f5499ec2d7b9cc
> Author: Paul Thomas 
> Date:   Thu Jul 18 08:51:35 2024 +0100
>
> Fortran: Suppress bogus used uninitialized warnings [PR108889].
>
> caused
>
> FAIL: libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
> -DACC_MEM_SHARED=1 -foffload=disable  -O0   at line 32 (test for warnings,
> line 31)
> FAIL: libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
> -DACC_MEM_SHARED=1 -foffload=disable  -O1   at line 32 (test for warnings,
> line 31)
> FAIL: libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
> -DACC_MEM_SHARED=1 -foffload=disable  -O2   at line 32 (test for warnings,
> line 31)
> FAIL: libgomp.oacc-fortran/privatized-ref-2.f90 -DACC_DEVICE_TYPE_host=1
> -DACC_MEM_SHARED=1 -foffload=disable  -O3 -fomit-frame-pointer
> -funroll-loops -fpeel-loops -ftracer -finline-functions   at line 32 (test
> for warnings, line 31)
> FAIL: libgomp.oacc-fortran/privatized-ref

Re: [PATCH] c-family: Introduce the -Winvalid-noreturn flag from clang with extra tuneability

2024-07-19 Thread Julian Waters
Attempting to resend with a different client, plain text enabled...

Hi Jason,

I’ve managed to address your review comments, and rewrite the patch to
include both -Winvalid-noreturn and -Winvalid-noreturn= but have trouble
figuring out how to format invoke.texi and where to add the documentation
for the new warning options. I’ve also made this a Common option, since
it’s come to my attention that the warning is not specific to c-family, but
is also used by other languages too (See tree-cfg.cc). Here’s the current
version of the patch, hope it’s good to go this time

best regards,
Julian

>From fe1b4d5747e05101410b6bb6db9430362e3977d9 Mon Sep 17 00:00:00 2001
From: Julian Waters 
Date: Fri, 19 Jul 2024 11:22:38 +0800
Subject: [PATCH] Introduce the -Winvalid-noreturn flag from clang with extra
 tuneability

Currently, gcc warns about noreturn marked functions that return both
explicitly and implicitly, with no way to turn this warning off. clang does
have an option for these classes of warnings, -Winvalid-noreturn. However,
we can do better. Instead of just having 1 option that switches the
warnings for both on and off, we can define an extra layer of granularity,
and have a separate options for implicit returns and explicit returns, as
in -Winvalid-return=explicit and -Winvalid-noreturn=implicit, on top of the
regular -Winvalid-noreturn. This patch adds both to gcc, for compatibility
with clang.

gcc/ChangeLog:

* common.opt: Introduce -Winvalid-noreturn and -Winvalid-noreturn=
* opts.cc (common_handle_option): Handle new option
* flag-types.h: New flags for -Winvalid-noreturn=
* tree-cfg.cc (pass_warn_function_return::execute): Use it

gcc/c/ChangeLog:

* c-typeck.cc (c_finish_return): Use it
* gimple-parser.cc (c_finish_gimple_return): Use it

gcc/config/mingw/ChangeLog:

* mingw32.h (EXTRA_OS_CPP_BUILTINS): Fix semicolons

gcc/cp/ChangeLog:

* coroutines.cc (finish_co_return_stmt): Use it
* typeck.cc (check_return_expr): Use it

Signed-off-by: Julian Waters 
---
 gcc/c/c-typeck.cc  |  4 ++--
 gcc/c/gimple-parser.cc |  4 ++--
 gcc/common.opt | 13 +
 gcc/config/mingw/mingw32.h |  6 +++---
 gcc/cp/coroutines.cc   |  4 ++--
 gcc/cp/typeck.cc   |  4 ++--
 gcc/flag-types.h   |  7 +++
 gcc/opts.cc| 19 +++
 gcc/tree-cfg.cc|  3 ++-
 9 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 7e0f01ed22b..0f8c8cfff2e 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -11738,8 +11738,8 @@ c_finish_return (location_t loc, tree retval, tree
origtype)
  in a function returning void.  */
   location_t xloc = expansion_point_location_if_in_system_header (loc);

-  if (TREE_THIS_VOLATILE (current_function_decl))
-warning_at (xloc, 0,
+  if (TREE_THIS_VOLATILE (current_function_decl) && flag_invalid_noreturn
!= invalid_noreturn_kind::EXPLICIT)
+warning_at (xloc, OPT_Winvalid_noreturn,
  "function declared % has a % statement");

   if (retval)
diff --git a/gcc/c/gimple-parser.cc b/gcc/c/gimple-parser.cc
index d156d83cd37..8a684fe334a 100644
--- a/gcc/c/gimple-parser.cc
+++ b/gcc/c/gimple-parser.cc
@@ -2592,8 +2592,8 @@ c_finish_gimple_return (location_t loc, tree retval)
  in a function returning void.  */
   location_t xloc = expansion_point_location_if_in_system_header (loc);

-  if (TREE_THIS_VOLATILE (current_function_decl))
-warning_at (xloc, 0,
+  if (TREE_THIS_VOLATILE (current_function_decl) && flag_invalid_noreturn
!= invalid_noreturn_kind::EXPLICIT)
+warning_at (xloc, OPT_Winvalid_noreturn,
  "function declared % has a % statement");

   if (! retval)
diff --git a/gcc/common.opt b/gcc/common.opt
index ea39f87ae71..d44ff0231c3 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -94,6 +94,11 @@ void *flag_instrument_functions_exclude_files
 Variable
 void *flag_ignored_attributes

+; The noreturn kind to ignore when warning for noreturn code that
+; does return.
+Variable
+invalid_noreturn_kind flag_invalid_noreturn = invalid_noreturn_kind::NONE
+
 ; Generic structs (e.g. templates not explicitly specialized)
 ; may not have a compilation unit associated with them, and so
 ; may need to be treated differently from ordinary structs.
@@ -667,6 +672,14 @@ Winvalid-memory-model
 Common Var(warn_invalid_memory_model) Init(1) Warning
 Warn when an atomic memory model parameter is known to be outside the
valid range.

+Winvalid-noreturn
+Common Warning
+Warn when code marked noreturn returns implicitly and/or explicitly.
+
+Winvalid-noreturn=
+Common Joined Warning
+Warn when code marked noreturn returns in the manner specified.
+
 Wlarger-than-
 Common RejectNegative Joined Warning Undocumented Alias(Wlarger-than=)

diff --git a/gcc/config/mingw/mingw32.h b/gcc/config/mingw/mingw32.h
index 0dfe8e995b6..48eadfa2c2c 100644
--- a/gcc/config/mingw/mingw32.h
+++ b/gcc/conf

[PATCH v2] RISC-V: More support of vx and vf for autovec comparison

2024-07-19 Thread demin.han
There are still some cases which can't utilize vx or vf after
last_combine pass.

1. integer comparison when imm isn't in range of [-16, 15]
2. float imm is 0.0
3. DI or DF mode under RV32

This patch fix above mentioned issues.

Tested on RV32 and RV64.

Signed-off-by: demin.han 
gcc/ChangeLog:

* config/riscv/autovec.md: register_operand to nonmemory_operand
* config/riscv/riscv-v.cc (get_cmp_insn_code): Select code according
* to scalar_p
(expand_vec_cmp): Generate scalar_p and transform op1
* config/riscv/riscv.cc (riscv_const_insns): Add !FLOAT_MODE_P
* constrain

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Fix and add test

Signed-off-by: demin.han 
---
V2 changes:
  1. remove unnecessary add_integer_operand and related code
  2. fix one format issue
  3. split patch and make it only related to vec cmp

 gcc/config/riscv/autovec.md   |  2 +-
 gcc/config/riscv/riscv-v.cc   | 57 +++
 gcc/config/riscv/riscv.cc |  2 +-
 .../riscv/rvv/autovec/cmp/vcond-1.c   | 48 +++-
 4 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index d5793acc999..a772153 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -690,7 +690,7 @@ (define_expand "vec_cmp"
   [(set (match_operand: 0 "register_operand")
(match_operator: 1 "comparison_operator"
  [(match_operand:V_VLSF 2 "register_operand")
-  (match_operand:V_VLSF 3 "register_operand")]))]
+  (match_operand:V_VLSF 3 "nonmemory_operand")]))]
   "TARGET_VECTOR"
   {
 riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e290675bbf0..56328075aeb 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2624,32 +2624,27 @@ expand_vec_init (rtx target, rtx vals)
 /* Get insn code for corresponding comparison.  */
 
 static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
 {
   insn_code icode;
-  switch (code)
+  if (FLOAT_MODE_P (mode))
 {
-case EQ:
-case NE:
-case LE:
-case LEU:
-case GT:
-case GTU:
-case LTGT:
-  icode = code_for_pred_cmp (mode);
-  break;
-case LT:
-case LTU:
-case GE:
-case GEU:
-  if (FLOAT_MODE_P (mode))
-   icode = code_for_pred_cmp (mode);
+  icode = !scalar_p ? code_for_pred_cmp (mode)
+   : code_for_pred_cmp_scalar (mode);
+  return icode;
+}
+  if (scalar_p)
+{
+  if (code == GE || code == GEU)
+   icode = code_for_pred_ge_scalar (mode);
   else
-   icode = code_for_pred_ltge (mode);
-  break;
-default:
-  gcc_unreachable ();
+   icode = code_for_pred_cmp_scalar (mode);
+  return icode;
 }
+  if (code == LT || code == LTU || code == GE || code == GEU)
+icode = code_for_pred_ltge (mode);
+  else
+icode = code_for_pred_cmp (mode);
   return icode;
 }
 
@@ -2771,7 +2766,6 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1, rtx mask,
 {
   machine_mode mask_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
-  insn_code icode = get_cmp_insn_code (code, data_mode);
 
   if (code == LTGT)
 {
@@ -2779,12 +2773,29 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1, rtx mask,
   rtx gt = gen_reg_rtx (mask_mode);
   expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
   expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
-  icode = code_for_pred (IOR, mask_mode);
+  insn_code icode = code_for_pred (IOR, mask_mode);
   rtx ops[] = {target, lt, gt};
   emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
   return;
 }
 
+  rtx elt;
+  bool scalar_p = false;
+  if (const_vec_duplicate_p (op1, &elt))
+{
+  if (FLOAT_MODE_P (data_mode))
+   {
+ scalar_p = true;
+ op1 = force_reg (GET_MODE_INNER (GET_MODE (op1)), elt);
+   }
+  else if (!has_vi_variant_p (code, elt))
+   {
+ scalar_p = true;
+ op1 = elt;
+   }
+}
+  insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
+
   rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
   if (!mask && !maskoff)
 {
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 19b9b2daa95..ad5668b2c5a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -2140,7 +2140,7 @@ riscv_const_insns (rtx x)
   register vec_duplicate into vmv.v.x.  */
scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
-   && !immediate_operand (elt, Pmode))
+   && !FLOAT_MODE_P (smode) && !immediate_opera

Re: [PATCH] s390: testsuite: Fix vcond-shift.c

2024-07-19 Thread Andrew Pinski
On Thu, Jul 18, 2024 at 10:31 PM Stefan Schulze Frielinghaus
 wrote:
>
> Previously we optimized expressions of the form a < 0 ? -1 : 0 to
> (signed)a >> 31 during vcond expanding.  Since r15-1741-g2ccdd0f22312a1
> this is done in match.pd.  The implementation in the back end as well as
> in match.pd are basically the same but still distinct.  For the tests in
> vcond-shift.c the back end emitted
>
>   (xx - (xx >> 31)) >> 1
>
> whereas now via match.pd
>
>   ((int) ((unsigned int) xx >> 31) + xx) >> 1
>
> which is basically the same.  We just have to adapt the scan-assembler
> directives w.r.t. signed/unsigned shifts which is done by this patch.

Note I filed https://gcc.gnu.org/PR115999 because I noticed those 2
form produce slightly different code generation for scalars (I assume
it will produce similar issues for vectors too).

Thanks,
Andrew Pinski

>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/s390/vector/vcond-shift.c: Adapt to new match.pd
> rule and change scan-assembler-times for shifts.
> ---
>  Regtested on s390.  Ok for mainline?
>
>  gcc/testsuite/gcc.target/s390/vector/vcond-shift.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c 
> b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> index a6b4e97aa50..b942f44039d 100644
> --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> @@ -3,13 +3,13 @@
>  /* { dg-do compile { target { s390*-*-* } } } */
>  /* { dg-options "-O3 -march=z13 -mzarch" } */
>
> -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
> -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
> -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
> +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 4 } } */
> +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 4 } } */
> +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 4 } } */
>  /* { dg-final { scan-assembler-not "vzero\t*" } } */
> -/* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
> -/* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
> -/* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
> +/* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 6 } } */
> +/* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 6 } } */
> +/* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 6 } } */
>
>  /* Make it expand to two vector operations.  */
>  #define ITER(X) (2 * (16 / sizeof (X[1])))
> --
> 2.45.2
>


Re: [PATCH] s390: testsuite: Fix vcond-shift.c

2024-07-19 Thread Stefan Schulze Frielinghaus
On Thu, Jul 18, 2024 at 11:58:10PM -0700, Andrew Pinski wrote:
> On Thu, Jul 18, 2024 at 10:31 PM Stefan Schulze Frielinghaus
>  wrote:
> >
> > Previously we optimized expressions of the form a < 0 ? -1 : 0 to
> > (signed)a >> 31 during vcond expanding.  Since r15-1741-g2ccdd0f22312a1
> > this is done in match.pd.  The implementation in the back end as well as
> > in match.pd are basically the same but still distinct.  For the tests in
> > vcond-shift.c the back end emitted
> >
> >   (xx - (xx >> 31)) >> 1
> >
> > whereas now via match.pd
> >
> >   ((int) ((unsigned int) xx >> 31) + xx) >> 1
> >
> > which is basically the same.  We just have to adapt the scan-assembler
> > directives w.r.t. signed/unsigned shifts which is done by this patch.
> 
> Note I filed https://gcc.gnu.org/PR115999 because I noticed those 2
> form produce slightly different code generation for scalars (I assume
> it will produce similar issues for vectors too).

Thanks for the heads up.  In that case we should probably wait a bit
once a normal form or whatever has settled.

Cheers,
Stefan

> 
> Thanks,
> Andrew Pinski
> 
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/s390/vector/vcond-shift.c: Adapt to new match.pd
> > rule and change scan-assembler-times for shifts.
> > ---
> >  Regtested on s390.  Ok for mainline?
> >
> >  gcc/testsuite/gcc.target/s390/vector/vcond-shift.c | 12 ++--
> >  1 file changed, 6 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c 
> > b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > index a6b4e97aa50..b942f44039d 100644
> > --- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > +++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
> > @@ -3,13 +3,13 @@
> >  /* { dg-do compile { target { s390*-*-* } } } */
> >  /* { dg-options "-O3 -march=z13 -mzarch" } */
> >
> > -/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
> > -/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
> > -/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 4 } } */
> > +/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 4 } } */
> > +/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 4 } } */
> >  /* { dg-final { scan-assembler-not "vzero\t*" } } */
> > -/* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
> > -/* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
> > -/* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
> > +/* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 6 } } */
> > +/* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 6 } } */
> >
> >  /* Make it expand to two vector operations.  */
> >  #define ITER(X) (2 * (16 / sizeof (X[1])))
> > --
> > 2.45.2
> >


[PATCH] RISC-V: Fix double mode under RV32 not utilize vf

2024-07-19 Thread demin.han
Currently, some binops of vector vs double scalar under RV32 can't
translated to vf but vfmv+vxx.vv.

The cause is that vec_duplicate is also expanded to broadcast for double mode
under RV32. last-combine can't process expanded broadcast.

gcc/ChangeLog:

* config/riscv/vector.md: Add !FLOAT_MODE_P constrain

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vadd-rv32gcv-nofm.c: Fix test
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv32gcv-nofm.c: Ditto
* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv-nofm.c: Ditto
* gcc.target/riscv/rvv/autovec/binop/vsub-rv32gcv-nofm.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-5.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-6.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-5.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-6.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-5.c: Ditto

Signed-off-by: demin.han 
---
 gcc/config/riscv/vector.md| 3 ++-
 .../riscv/rvv/autovec/binop/vadd-rv32gcv-nofm.c   | 4 ++--
 .../riscv/rvv/autovec/binop/vdiv-rv32gcv-nofm.c   | 4 ++--
 .../riscv/rvv/autovec/binop/vmul-rv32gcv-nofm.c   | 4 ++--
 .../riscv/rvv/autovec/binop/vsub-rv32gcv-nofm.c   | 4 ++--
 .../riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c| 8 
 .../gcc.target/riscv/rvv/autovec/cond/cond_fadd-1.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fadd-2.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fadd-3.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fadd-4.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-1.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-3.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-4.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-5.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-6.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmax-1.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmax-2.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmax-3.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmax-4.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmin-1.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmin-2.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmin-3.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmin-4.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-1.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-3.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-4.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-5.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-6.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmul-1.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmul-2.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmul-3.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmul-4.c   | 4 ++--
 .../gcc.target/riscv/rvv/autovec/cond/cond_fmul-5.c   | 4 ++--
 33 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index bcedf3d79e2..d1518f3e623 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/risc

Re: [PATCH 3/3] Add power11 tests

2024-07-19 Thread Segher Boessenkool
On Thu, Jul 18, 2024 at 09:53:05AM -0500, Peter Bergner wrote:
> On 7/18/24 8:23 AM, Segher Boessenkool wrote:
> > Everything in gcc.target/powerpc is only run for target powerpc*-*-*
> > anyway, so make this just
> > 
> > /* { dg-do compile } */
> > 
> > please.  (Or nothing, it is the default after all, but you may want to
> > have it explicit).
> 
> Personally, I do like seeing the /* { dg-do compile } */ even though
> that is the default in this testsuite directory.

It depends :-)  For testsuites that often do something else as well
(like in gcc.target), it can be nice to be verbose, yes.

> >> +/* { dg-require-effective-target powerpc_vsx_ok } */
> > 
> > Why is this needed?  It needs a comment saying that.
> 
> After Kewen's testsuite patch, powerpc_vsx_ok is pretty useless as it only
> checks whether the assembler knows about vsx, and what people normally want
> is powerpc_vsx which verifies that the compiler options used to compile the
> test support generating VSX insns.

That, sure, but even then it does not make any sense.  Each of the
funcs uses its own -mcpu= so either will have VSX or not, and it doesn't
matter at all what the default compiler target does.

And, absolutely nothing in this test case uses VSX at all.

> > Saying that this is "to test if can use the target attr" is nonsense.
> > That does not need Linux either btw.  Target selection might not work
> > correctly currently on some other systems, but this is not a run test!
> 
> Agreed.  Also, if the clones stuff really doesn't work for those
> targets even in a compile test, then rather than diabling this way,
> I think I'd like to see a target-supports clones_ok test or similar.

Yup.


Segher


[Patch, rs6000, middle-end] v7: Add implementation for different targets for pair mem fusion

2024-07-19 Thread Ajit Agarwal
Hello Richard:

All comments are addressed.

Common infrastructure using generic code for pair mem fusion of different
targets.

rs6000 target specific code implement virtual functions defined by generic code.

Target specific code are added in rs6000-mem-fusion.cc.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


rs6000, middle-end: Add implementation for different targets for pair mem fusion

Common infrastructure using generic code for pair mem fusion of different
targets.

rs6000 target specific code implement virtual functions defined by generic code.

Target specific code are added in rs6000-mem-fusion.cc.

2024-07-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/rs6000-passes.def: New mem fusion pass
before pass_early_remat.
* pair-fusion.h: Add additional pure virtual function
required for rs6000 target implementation.
* pair-fusion.cc: Use of virtual functions for additional
virtual function addded for rs6000 target.
* config/rs6000/rs6000-mem-fusion.cc: Add new pass.
Add target specific implementation for generic pure virtual
functions.
* config/rs6000/mma.md: Modify movoo machine description.
Add new machine description movoo1.
* config/rs6000/rs6000.cc: Modify rs6000_split_multireg_move
to expand movoo machine description for all constraints.
* config.gcc: Add new object file.
* config/rs6000/rs6000-protos.h: Add new prototype for mem
fusion pass.
* config/rs6000/t-rs6000: Add new rule.
* rtl-ssa/functions.h: Move out allocate function from private
to public and add get_m_temp_defs function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/mem-fusion.C: New test.
* g++.target/powerpc/mem-fusion-1.C: New test.
* gcc.target/powerpc/mma-builtin-1.c: Modify test.
---
 gcc/config.gcc|   2 +
 gcc/config/rs6000/mma.md  |  26 +-
 gcc/config/rs6000/rs6000-mem-fusion.cc| 746 ++
 gcc/config/rs6000/rs6000-passes.def   |   4 +-
 gcc/config/rs6000/rs6000-protos.h |   1 +
 gcc/config/rs6000/rs6000.cc   |  58 +-
 gcc/config/rs6000/rs6000.md   |   1 +
 gcc/config/rs6000/t-rs6000|   5 +
 gcc/pair-fusion.cc|  32 +-
 gcc/pair-fusion.h |  48 ++
 gcc/rtl-ssa/functions.h   |  11 +-
 .../g++.target/powerpc/mem-fusion-1.C |  22 +
 gcc/testsuite/g++.target/powerpc/mem-fusion.C |  15 +
 .../gcc.target/powerpc/mma-builtin-1.c|   4 +-
 14 files changed, 946 insertions(+), 29 deletions(-)
 create mode 100644 gcc/config/rs6000/rs6000-mem-fusion.cc
 create mode 100644 gcc/testsuite/g++.target/powerpc/mem-fusion-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/mem-fusion.C

diff --git a/gcc/config.gcc b/gcc/config.gcc
index bc45615741b..12f79a78177 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -524,6 +524,7 @@ powerpc*-*-*)
extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
+   extra_objs="${extra_objs} rs6000-mem-fusion.o"
extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h"
extra_headers="${extra_headers} xmmintrin.h mm_malloc.h emmintrin.h"
@@ -560,6 +561,7 @@ rs6000*-*-*)
extra_options="${extra_options} g.opt fused-madd.opt 
rs6000/rs6000-tables.opt"
extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
+   extra_objs="${extra_objs} rs6000-mem-fusion.o"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/rs6000/rs6000-logue.cc 
\$(srcdir)/config/rs6000/rs6000-call.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
;;
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d0066df..88413926a02 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -294,7 +294,31 @@
 
 (define_insn_and_split "*movoo"
   [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,ZwO,wa")
-   (match_operand:OO 1 "input_operand" "ZwO,wa,wa"))]
+(match_operand:OO 1 "input_operand" "ZwO,wa,wa"))]
+  "TARGET_MMA
+   && (gpc_reg_operand (operands[0], OOmode)
+   || gpc_reg_operand (operands[1], OOmode))"
+;;""
+  "@
+   #
+   #
+   #"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_multireg_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "vecload,vecstore,veclogical")
+   (set_attr "length" "*,*,8")])
+;;   (set_attr "max_prefixed_insns" "2,2,*")])
+
+
+(define_insn_and_split "*movoo1"
+  [(set (match_operand:OO 0 "no

Re: [Patch, rs6000, middle-end] v6: Add implementation for different targets for pair mem fusion

2024-07-19 Thread Ajit Agarwal
Hello Richard:

On 18/07/24 4:44 pm, Richard Sandiford wrote:
> Ajit Agarwal  writes:
>> [...]
 +// Set subreg for OO mode pair to generate sequential registers given
 +// insn_info pairs I1, I2 and LOAD_P is true iff load insn and false
 +// if store insn.
 +void
 +rs6000_pair_fusion::set_multiword_subreg (insn_info *i1, insn_info *i2,
 +bool load_p)
 +{
 +  if (load_p)
 +{
 +  bool i1_subreg_p = use_has_subreg_p (i1);
 +  bool i2_subreg_p = use_has_subreg_p (i2);
 +
 +  if (i1_subreg_p || i2_subreg_p)
 +  set_multiword_existing_subreg (i1, i2);
 +  else
 +  set_multiword_subreg_load (i1, i2);
>>>
>>> I don't understand this.  Why do we have both set_multiword_existing_subreg
>>> and set_multiword_subreg_load?  i1_subreg_p and i2_subreg_p are logically
>>> independent of one another (since i1 and i2 were separate instructions
>>> until now).  So "i1_subreg_p || i2_subreg_p" implies that
>>> set_multiword_existing_subreg can handle i1s that have no existing
>>> subreg (used when i2_subreg_p) and that it can handle i2s that have no
>>> existing subreg (used when i1_subreg_p).  So doesn't this mean that
>>> set_multiword_existing_subreg can handle everything?
>>>
>>
>> I will make the following change.
>>  if (load_p)
>> {
>>   bool i1_subreg_p = use_has_subreg_p (i1);
>>   bool i2_subreg_p = use_has_subreg_p (i2);
>>
>>   if (!i1_subreg_p && !i2_subreg_p) 
>> set_multiword_subreg_load (i1, i2);
>>   else
>> set_multiword_existing_subreg (i1, i2);
>> }
>>
>> Is this okay.
> 
> That's the same thing though: it's just replacing a ? A : B with !a ? B : A.
> 

Addressed in v7 of the patch.

>>> IMO, the way the update should work is that:
>>>
>>> (a) all references to the old registers should be updated via
>>> insn_propagation (regardless of whether the old references
>>> involved subregs).
>>>
>>> (b) those updates should be part of the same insn_change group as
>>> the change to the load itself.
>>>
>>> For stores, definitions of the stored register can probably be handled
>>> directly using df_refs, but there too, the updates should IMO be part
>>> of the same insn_change group as the change to the store itself.
>>>
>>> In both cases, it's the:
>>>
>>>   crtl->ssa->change_insns (changes);
>>>
>>> in pair_fusion_bb_info::fuse_pair that should be responsible for
>>> updating the rtl-ssa IR.  The changes that the pass wants to make
>>> should be described as insn_changes and passed to change_insns.
>>>
>>> The reason for funneling all changes through change_insns is that
>>> it allows rtl-ssa to maintain more complex datastructures.  Clients
>>> aren't supposed to manually update the datastructures piecemeal.
>>>
>>
>> I am afraid I am not getting this. Would you mind elaborating this.
>> Sorry for that.
> 
> See how fwprop.cc makes changes.  It:
> 
> - creates an insn_change for each change that it wants to make
> 
> - uses insn_propagation to replace the old value with the new value
> 
> - sets the new_uses of the insn_change to reflect the effect
>   of the propagation (in this case, replacing the old 128-bit
>   value with a 256-bit value)
> 
> - uses change_insn to commit the change
> 
> The process would be similar here.
>

Addressed in v7 of the patch.
 
> Thanks,
> Richard

Thanks & Regards
Ajit


Re: [Patch, v2] gcn/mkoffload.cc: Use #embed for including the generated ELF file

2024-07-19 Thread Jakub Jelinek
On Fri, Jun 21, 2024 at 05:30:02PM +0200, Tobias Burnus wrote:
> gcc/ChangeLog:
> 
>   * config/gcn/mkoffload.cc (read_file): Remove.
>   (process_obj): Generate C file that uses #embed.
>   (main): Update call to it; remove no longer needed file I/O.

> +  fprintf (cfile,
> +"static unsigned char gcn_code[] = {\n"
> +"#if defined(__STDC_EMBED_FOUND__) && __has_embed (\"%s\") == 
> __STDC_EMBED_FOUND__\n"

If this was an attempt to deal gracefully with no #embed support, then
the above would be wrong and should have been
#if defined(__STDC_EMBED_FOUND__) && defined(__has_embed)
#if __has_embed ("whatever") == __STDC_EMBED_FOUND__
or so, because in a compiler which will not support __has_embed
you'll get error like
error: missing binary operator before token "("
on
#if defined(__STDC_EMBED_FOUND__) && __has_embed ("whatever") == 
__STDC_EMBED_FOUND__
as it is handled like
#if 0 && 0 ("whatever") == 0

Now, if all you want is an error if the file doesn't exist, then
#embed "whatever"
will do that too (though, in the patchset currently posted that is
still a fatal error, rather than just a normal one, perhaps we should
change that).

If you want an error not just when it doesn't exist, but also when it
is empty, then you could do
#embed "whatever" if_empty (%%%)
(or whatever is syntactically not valid there).

Jakub



RE: [PATCH v2] RISC-V: More support of vx and vf for autovec comparison

2024-07-19 Thread Li, Pan2
> +  TEST_COND_IMM_FLOAT (T, >, 0.0, _gt)   
> \
>  +  TEST_COND_IMM_FLOAT (T, <, 0.0, _lt)  
> \
>  +  TEST_COND_IMM_FLOAT (T, >=, 0.0, _ge) 
> \
>  +  TEST_COND_IMM_FLOAT (T, <=, 0.0, _le) 
> \
>  +  TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq) 
> \
>  +  TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne) 
> \

Just curious, does this patch covered float imm is -0.0 (notice only +0.0 is 
mentioned)?
If so we can have similar tests as +0.0 here.

It is totally Ok if -0.0f is not applicable here.

Pan

-Original Message-
From: demin.han  
Sent: Friday, July 19, 2024 4:55 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; Li, Pan2 ; 
jeffreya...@gmail.com; rdapp@gmail.com
Subject: [PATCH v2] RISC-V: More support of vx and vf for autovec comparison

There are still some cases which can't utilize vx or vf after
last_combine pass.

1. integer comparison when imm isn't in range of [-16, 15]
2. float imm is 0.0
3. DI or DF mode under RV32

This patch fix above mentioned issues.

Tested on RV32 and RV64.

Signed-off-by: demin.han 
gcc/ChangeLog:

* config/riscv/autovec.md: register_operand to nonmemory_operand
* config/riscv/riscv-v.cc (get_cmp_insn_code): Select code according
* to scalar_p
(expand_vec_cmp): Generate scalar_p and transform op1
* config/riscv/riscv.cc (riscv_const_insns): Add !FLOAT_MODE_P
* constrain

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Fix and add test

Signed-off-by: demin.han 
---
V2 changes:
  1. remove unnecessary add_integer_operand and related code
  2. fix one format issue
  3. split patch and make it only related to vec cmp

 gcc/config/riscv/autovec.md   |  2 +-
 gcc/config/riscv/riscv-v.cc   | 57 +++
 gcc/config/riscv/riscv.cc |  2 +-
 .../riscv/rvv/autovec/cmp/vcond-1.c   | 48 +++-
 4 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index d5793acc999..a772153 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -690,7 +690,7 @@ (define_expand "vec_cmp"
   [(set (match_operand: 0 "register_operand")
(match_operator: 1 "comparison_operator"
  [(match_operand:V_VLSF 2 "register_operand")
-  (match_operand:V_VLSF 3 "register_operand")]))]
+  (match_operand:V_VLSF 3 "nonmemory_operand")]))]
   "TARGET_VECTOR"
   {
 riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e290675bbf0..56328075aeb 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2624,32 +2624,27 @@ expand_vec_init (rtx target, rtx vals)
 /* Get insn code for corresponding comparison.  */
 
 static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
 {
   insn_code icode;
-  switch (code)
+  if (FLOAT_MODE_P (mode))
 {
-case EQ:
-case NE:
-case LE:
-case LEU:
-case GT:
-case GTU:
-case LTGT:
-  icode = code_for_pred_cmp (mode);
-  break;
-case LT:
-case LTU:
-case GE:
-case GEU:
-  if (FLOAT_MODE_P (mode))
-   icode = code_for_pred_cmp (mode);
+  icode = !scalar_p ? code_for_pred_cmp (mode)
+   : code_for_pred_cmp_scalar (mode);
+  return icode;
+}
+  if (scalar_p)
+{
+  if (code == GE || code == GEU)
+   icode = code_for_pred_ge_scalar (mode);
   else
-   icode = code_for_pred_ltge (mode);
-  break;
-default:
-  gcc_unreachable ();
+   icode = code_for_pred_cmp_scalar (mode);
+  return icode;
 }
+  if (code == LT || code == LTU || code == GE || code == GEU)
+icode = code_for_pred_ltge (mode);
+  else
+icode = code_for_pred_cmp (mode);
   return icode;
 }
 
@@ -2771,7 +2766,6 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1, rtx mask,
 {
   machine_mode mask_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
-  insn_code icode = get_cmp_insn_code (code, data_mode);
 
   if (code == LTGT)
 {
@@ -2779,12 +2773,29 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx 
op1, rtx mask,
   rtx gt = gen_reg_rtx (mask_mode);
   expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
   expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
-  icode = code_for_pred (IOR, mask_mode);
+  insn_code icode = code_for_pred (IOR, mask_mode);
   rtx ops[] = {target, lt, gt};
   emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
   return;
 }
 
+  rtx elt;
+  bool scalar_p = false;
+  if (const_vec_dup

[PATCH v2] Internal-fn: Only allow type matches mode for internal fn[PR115961]

2024-07-19 Thread pan2 . li
From: Pan Li 

The direct_internal_fn_supported_p has no restrictions for the type
modes.  For example the bitfield like below will be recog as .SAT_TRUNC.

struct e
{
  unsigned pre : 12;
  unsigned a : 4;
};

__attribute__((noipa))
void bug (e * v, unsigned def, unsigned use) {
  e & defE = *v;
  defE.a = min_u (use + 1, 0xf);
}

This patch would like to check strictly for the direct_internal_fn_supported_p,
and only allows the type matches mode for ifn type tree pair.

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

PR target/115961

gcc/ChangeLog:

* internal-fn.cc (type_strictly_matches_mode_p): Add new func
impl to check type strictly matches mode or not.
(type_pair_strictly_matches_mode_p): Ditto but for tree type
pair.
(direct_internal_fn_supported_p): Add above check for the tree
type pair.

gcc/testsuite/ChangeLog:

* g++.target/i386/pr115961-run-1.C: New test.
* g++.target/riscv/rvv/base/pr115961-run-1.C: New test.

Signed-off-by: Pan Li 
---
 gcc/internal-fn.cc| 32 +
 .../g++.target/i386/pr115961-run-1.C  | 34 +++
 .../riscv/rvv/base/pr115961-run-1.C   | 34 +++
 3 files changed, 100 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr115961-run-1.C
 create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 95946bfd683..5c21249318e 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4164,6 +4164,35 @@ direct_internal_fn_optab (internal_fn fn)
   gcc_unreachable ();
 }
 
+/* Return true if TYPE's mode has the same format as TYPE, and if there is
+   a 1:1 correspondence between the values that the mode can store and the
+   values that the type can store.  */
+
+static bool
+type_strictly_matches_mode_p (const_tree type)
+{
+  if (VECTOR_TYPE_P (type))
+return VECTOR_MODE_P (TYPE_MODE (type));
+
+  if (INTEGRAL_TYPE_P (type))
+return type_has_mode_precision_p (type);
+
+  if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type))
+return true;
+
+  return false;
+}
+
+/* Return true if both the first and the second type of tree pair are
+   strictly matches their modes,  or return false.  */
+
+static bool
+type_pair_strictly_matches_mode_p (tree_pair type_pair)
+{
+  return type_strictly_matches_mode_p (type_pair.first)
+&& type_strictly_matches_mode_p (type_pair.second);
+}
+
 /* Return true if FN is supported for the types in TYPES when the
optimization type is OPT_TYPE.  The types are those associated with
the "type0" and "type1" fields of FN's direct_internal_fn_info
@@ -4173,6 +4202,9 @@ bool
 direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
optimization_type opt_type)
 {
+  if (!type_pair_strictly_matches_mode_p (types))
+return false;
+
   switch (fn)
 {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
diff --git a/gcc/testsuite/g++.target/i386/pr115961-run-1.C 
b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
new file mode 100644
index 000..b8c8aef3b17
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
@@ -0,0 +1,34 @@
+/* PR target/115961 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+struct e
+{
+  unsigned pre : 12;
+  unsigned a : 4;
+};
+
+static unsigned min_u (unsigned a, unsigned b)
+{
+  return (b < a) ? b : a;
+}
+
+__attribute__((noipa))
+void bug (e * v, unsigned def, unsigned use) {
+  e & defE = *v;
+  defE.a = min_u (use + 1, 0xf);
+}
+
+__attribute__((noipa, optimize(0)))
+int main(void)
+{
+  e v = { 0xded, 3 };
+
+  bug(&v, 32, 33);
+
+  if (v.a != 0xf)
+__builtin_abort ();
+
+  return 0;
+}
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C 
b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
new file mode 100644
index 000..b8c8aef3b17
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
@@ -0,0 +1,34 @@
+/* PR target/115961 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+struct e
+{
+  unsigned pre : 12;
+  unsigned a : 4;
+};
+
+static unsigned min_u (unsigned a, unsigned b)
+{
+  return (b < a) ? b : a;
+}
+
+__attribute__((noipa))
+void bug (e * v, unsigned def, unsigned use) {
+  e & defE = *v;
+  defE.a = min_u (use + 1, 0xf);
+}
+
+__attribute__((noipa, optimize(0)))
+int main(void)
+{
+  e v = { 0xded, 3 };
+
+  bug(&v, 32, 33);
+
+  if (v.a != 0xf)
+__builtin_abort ();
+
+  return 0;
+}
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
-- 
2.34.1



RE: [PATCH v1] Internal-fn: Only allow modes describe types for internal fn[PR115961]

2024-07-19 Thread Li, Pan2
Thanks Richard S for comments and suggestions, updated in v2.

Pan

-Original Message-
From: Richard Sandiford  
Sent: Friday, July 19, 2024 3:46 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@gmail.com; 
richard.guent...@gmail.com; tamar.christ...@arm.com; jeffreya...@gmail.com; 
rdapp@gmail.com
Subject: Re: [PATCH v1] Internal-fn: Only allow modes describe types for 
internal fn[PR115961]

pan2...@intel.com writes:
> From: Pan Li 
>
> The direct_internal_fn_supported_p has no restrictions for the type
> modes.  For example the bitfield like below will be recog as .SAT_TRUNC.
>
> struct e
> {
>   unsigned pre : 12;
>   unsigned a : 4;
> };
>
> __attribute__((noipa))
> void bug (e * v, unsigned def, unsigned use) {
>   e & defE = *v;
>   defE.a = min_u (use + 1, 0xf);
> }
>
> This patch would like to add checks for the direct_internal_fn_supported_p,
> and only allows the tree types describled by modes.
>
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.
>
>   PR target/115961
>
> gcc/ChangeLog:
>
>   * internal-fn.cc (mode_describle_type_precision_p): Add new func
>   impl to check if mode describle the tree type.
>   (direct_internal_fn_supported_p): Add above check for the first
>   and second tree type of tree pair.
>
> gcc/testsuite/ChangeLog:
>
>   * g++.target/i386/pr115961-run-1.C: New test.
>   * g++.target/riscv/rvv/base/pr115961-run-1.C: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/internal-fn.cc| 21 
>  .../g++.target/i386/pr115961-run-1.C  | 34 +++
>  .../riscv/rvv/base/pr115961-run-1.C   | 34 +++
>  3 files changed, 89 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr115961-run-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 95946bfd683..4dc69264a24 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4164,6 +4164,23 @@ direct_internal_fn_optab (internal_fn fn)
>gcc_unreachable ();
>  }
>  
> +/* Return true if the mode describes the precision of tree type,  or false.  
> */
> +
> +static bool
> +mode_describle_type_precision_p (const_tree type)

Bit pedantic, but it's not really just about precision.  For floats
and vectors it's also about format.  Maybe:

/* Return true if TYPE's mode has the same format as TYPE, and if there is
   a 1:1 correspondence between the values that the mode can store and the
   values that the type can store.  */

And maybe my mode_describes_type_p suggestion wasn't the best,
but given that it's not just about precision, I'm not sure about
mode_describle_type_precision_p either.  How about:

  type_strictly_matches_mode_p

?  I'm open to other suggestions.

> +{
> +  if (VECTOR_TYPE_P (type))
> +return VECTOR_MODE_P (TYPE_MODE (type));
> +
> +  if (INTEGRAL_TYPE_P (type))
> +return type_has_mode_precision_p (type);
> +
> +  if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type))
> +return true;
> +
> +  return false;
> +}
> +
>  /* Return true if FN is supported for the types in TYPES when the
> optimization type is OPT_TYPE.  The types are those associated with
> the "type0" and "type1" fields of FN's direct_internal_fn_info
> @@ -4173,6 +4190,10 @@ bool
>  direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
>   optimization_type opt_type)
>  {
> +  if (!mode_describle_type_precision_p (types.first)
> +|| !mode_describle_type_precision_p (types.second))

Formatting nit: the "||" should line up with the "!".

LGTM otherwise.

Thanks,
Richard

> +return false;
> +
>switch (fn)
>  {
>  #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
> diff --git a/gcc/testsuite/g++.target/i386/pr115961-run-1.C 
> b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
> new file mode 100644
> index 000..b8c8aef3b17
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr115961-run-1.C
> @@ -0,0 +1,34 @@
> +/* PR target/115961 */
> +/* { dg-do run } */
> +/* { dg-options "-O3 -fdump-rtl-expand-details" } */
> +
> +struct e
> +{
> +  unsigned pre : 12;
> +  unsigned a : 4;
> +};
> +
> +static unsigned min_u (unsigned a, unsigned b)
> +{
> +  return (b < a) ? b : a;
> +}
> +
> +__attribute__((noipa))
> +void bug (e * v, unsigned def, unsigned use) {
> +  e & defE = *v;
> +  defE.a = min_u (use + 1, 0xf);
> +}
> +
> +__attribute__((noipa, optimize(0)))
> +int main(void)
> +{
> +  e v = { 0xded, 3 };
> +
> +  bug(&v, 32, 33);
> +
> +  if (v.a != 0xf)
> +__builtin_abort ();
> +
> +  return 0;
> +}
> +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
> diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C 
> b/gcc/testsuite/g++.target/riscv/rvv/base/pr115961-run-1.C
> n

RE: [PATCH v2] RISC-V: More support of vx and vf for autovec comparison

2024-07-19 Thread Demin Han

> -Original Message-
> From: Li, Pan2 
> Sent: 2024年7月19日 18:33
> To: Demin Han ; gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; jeffreya...@gmail.com;
> rdapp@gmail.com
> Subject: RE: [PATCH v2] RISC-V: More support of vx and vf for autovec
> comparison
> 
> > +  TEST_COND_IMM_FLOAT (T, >, 0.0, _gt) 
> > \
> >  +  TEST_COND_IMM_FLOAT (T, <, 0.0, _lt)
> > \
> >  +  TEST_COND_IMM_FLOAT (T, >=, 0.0, _ge)   
> > \
> >  +  TEST_COND_IMM_FLOAT (T, <=, 0.0, _le)   
> > \
> >  +  TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq)   
> > \
> >  +  TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne)   
> > \
> 
> Just curious, does this patch covered float imm is -0.0 (notice only +0.0 is
> mentioned)?
> If so we can have similar tests as +0.0 here.
> 
> It is totally Ok if -0.0f is not applicable here.

I have a test. 
The backend can't see -0.0 and It becomes 0.0 when translate to gimple.

> Pan
> 
Regards,
Demin


Re: [Fortran, Patch, PR77518, (coarray), v2] Fix ICE in sizeof(coarray)

2024-07-19 Thread Andre Vehreschild
Hi Paul,

thanks for the review.

> While I realise that this is not your doing, should we not
> check DECL_LANG_SPECIFIC ()) before touching GFC_DECL_SAVED_DESCRIPTOR?

I like that idea. I have added it. But what should we do when
DECL_LANG_SPECIFIC is not set? I have chosen to add a gcc_unreachable(), but
that will trigger an ICE in the future should the prerequisites not be met.

> Or is access guaranteed by the REF_COMPONENT check?

Well, as we have seen, it is not. At least that is not guaranteed to my
knowledge.

When you don't like this solution solution, then I will dig deeper to figure
what is going on and how to resolve it.

> A micro-nit line 12 s/User/Use/

Ups, thanks, fixed.

Not merging yet, therefore updated patch attached.

- Andre

>
> Apart from this, it looks to be eminently obvious. OK for mainline.
>
> Paul
>
>
> On Thu, 18 Jul 2024 at 14:16, Andre Vehreschild  wrote:
>
> > Hi all,
> >
> > the attached patch fixes an ICE when the object supplied to sizeof() is
> > a coarray of class type. This is fixed by taking the class object from
> > the se's class_container.
> >
> > Regtests ok on x86_64-pc-linux-gnu / Fedora 39. Ok for mainline?
> >
> > Regards,
> > Andre
> > --
> > Andre Vehreschild * Email: vehre ad gcc dot gnu dot org
> >


--
Andre Vehreschild * Email: vehre ad gmx dot de
From 685fd785da00eb1f610e2508f0a62d1fe0ae244d Mon Sep 17 00:00:00 2001
From: Andre Vehreschild 
Date: Thu, 18 Jul 2024 14:53:31 +0200
Subject: [PATCH] Fortran: Fix ICE in sizeof(coarray) [PR77518]

Use se's class_container where present in sizeof().

	PR fortran/77518

gcc/fortran/ChangeLog:

	* trans-intrinsic.cc (gfc_conv_intrinsic_sizeof): Use
	class_container of se when set.

gcc/testsuite/ChangeLog:

	* gfortran.dg/coarray/sizeof_1.f90: New test.
---
 gcc/fortran/trans-intrinsic.cc| 13 ++---
 .../gfortran.dg/coarray/sizeof_1.f90  | 27 +++
 2 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/coarray/sizeof_1.f90

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index d58bea30101..4f63cfd1421 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -8161,10 +8161,17 @@ gfc_conv_intrinsic_sizeof (gfc_se *se, gfc_expr *expr)
   else if (arg->rank > 0
 	   || (arg->rank == 0
 		   && arg->ref && arg->ref->type == REF_COMPONENT))
-	/* The scalarizer added an additional temp.  To get the class' vptr
-	   one has to look at the original backend_decl.  */
-	byte_size = gfc_class_vtab_size_get (
+	{
+	  /* The scalarizer added an additional temp.  To get the class' vptr
+	 one has to look at the original backend_decl.  */
+	  if (argse.class_container)
+	byte_size = gfc_class_vtab_size_get (argse.class_container);
+	  else if (DECL_LANG_SPECIFIC (arg->symtree->n.sym->backend_decl))
+	byte_size = gfc_class_vtab_size_get (
 	  GFC_DECL_SAVED_DESCRIPTOR (arg->symtree->n.sym->backend_decl));
+	  else
+	gcc_unreachable ();
+	}
   else
 	gcc_unreachable ();
 }
diff --git a/gcc/testsuite/gfortran.dg/coarray/sizeof_1.f90 b/gcc/testsuite/gfortran.dg/coarray/sizeof_1.f90
new file mode 100644
index 000..b26f8416406
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/sizeof_1.f90
@@ -0,0 +1,27 @@
+!{ dg-do run }
+
+! Check that pr77518 is fixed.
+! Based on code by Gerhard Steinmetz  
+
+program coarray_sizeof_1
+  type t
+  end type
+  type t2
+integer :: v = 42
+  end type
+  type t3
+type(t2) :: s
+integer :: n = 1
+  end type
+
+  class(t), allocatable :: z[:]
+  class(t2), allocatable :: z2[:]
+  class(t3), allocatable :: z3[:]
+
+  if (sizeof(z) /= 0) stop 1
+  if (sizeof(z2) /= sizeof(integer)) stop 2
+  allocate(z3[*])
+  if (sizeof(z3) /= sizeof(z2) + sizeof(integer)) stop 3
+  if (sizeof(z3%s) /= sizeof(z2)) stop 4
+end
+
--
2.45.2



Re: [PATCH] Fortran: character array constructor with >= 4 constant elements [PR103115]

2024-07-19 Thread Andre Vehreschild
Hi Harald,

you right, that solution looks pretty obvious to me. Ok by me. Thanks for the
patch and the opportunity to give back a review.

Keep up the good work!

Regards,
Andre

On Thu, 18 Jul 2024 21:27:13 +0200
Harald Anlauf  wrote:

> Dear all,
>
> here's a quite obvious fix for an ICE when processing an array constructor
> where the first element is of deferred length, and at least four constant
> elements followed, or an iterator with at least four elements.  There
> is a code path that then tries to combine these constant elements and
> take the element size of the first (variable length) element in the
> constructor.
>
> (For gcc with checking=release, no ICE occured; wrong code was generated
> instead.)
>
> Obvious fix: if we see that the element size is not constant, falls back
> to the case handling the constructor element-wise.
>
> Regtested on x86_64-pc-linux-gnu.  OK for mainline / backports?
>
> Thanks,
> Harald
>


--
Andre Vehreschild * Email: vehre ad gmx dot de


Re: [PATCH] bpf: create modifier for mem operand for xchg and cmpxchg

2024-07-19 Thread Cupertino Miranda


Fixed and pushed!

Thanks,
Cupertino

David Faust writes:

> On 7/15/24 08:33, Cupertino Miranda wrote:
>> Both xchg and cmpxchg instructions, in the pseudo-C dialect, do not
>> expect their memory address operand to be surrounded by parentheses.
>> For example, it should be output as "w0 =cmpxchg32_32(r8+8,w0,w2)"
>> instead of "w0 =cmpxchg32_32((r8+8),w0,w2)".
>
> After some brief searching, looks like these extra parens in [cmp]xchg*
> are an unintended consequence of:
>
>  bb5f619a938 bpf: fix printing of memory operands in pseudoc asm dialect
>
>>
>> This patch implements an operand modifier 'M' which marks the
>> instruction templates that do not expect the parentheses, and adds it do
>> xchg and cmpxchg templates.
>
> One very minor nit below, otherwise LGTM. Please apply.
> Thanks for the fix!
>
>>
>> gcc/ChangeLog:
>>  * config/bpf/atomic.md (atomic_compare_and_swap,
>>  atomic_exchange): Add operand modifier %M to the first
>>  operand.
>>  * config/bpf/bpf.cc (no_parentheses_mem_operand): Create
>>  variable.
>>  (bpf_print_operand): Set no_parentheses_mem_operand variable if
>>  %M operand is used.
>>  (bpf_print_operand_address): Conditionally output parentheses.
>>
>> gcc/testsuite/ChangeLog:
>>  * gcc.target/bpf/pseudoc-atomic-memaddr-op.c: Add test.
>> ---
>>  gcc/config/bpf/atomic.md  |  4 +--
>>  gcc/config/bpf/bpf.cc | 20 ---
>>  .../bpf/pseudoc-atomic-memaddr-op.c   | 36 +++
>>  3 files changed, 54 insertions(+), 6 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/bpf/pseudoc-atomic-memaddr-op.c
>>
>> diff --git a/gcc/config/bpf/atomic.md b/gcc/config/bpf/atomic.md
>> index 65bd5f266a1..be4511bb51b 100644
>> --- a/gcc/config/bpf/atomic.md
>> +++ b/gcc/config/bpf/atomic.md
>> @@ -129,7 +129,7 @@
>> (set (match_dup 1)
>>  (match_operand:AMO 2 "nonmemory_operand" "0"))]
>>"bpf_has_v3_atomics"
>> -  "{axchg\t%1,%0|%w0 = xchg(%1, %w0)}")
>> +  "{axchg\t%1,%0|%w0 = xchg(%M1, %w0)}")
>>
>>  ;; The eBPF atomic-compare-and-exchange instruction has the form
>>  ;;   acmp [%dst+offset], %src
>> @@ -182,4 +182,4 @@
>>(match_operand:AMO 3 "register_operand")]   ;; desired
>>   UNSPEC_ACMP))]
>>"bpf_has_v3_atomics"
>> -  "{acmp\t%1,%3|%w0 = cmpxchg(%1, %w0, %w3)}")
>> +  "{acmp\t%1,%3|%w0 = cmpxchg(%M1, %w0, %w3)}")
>> diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
>> index 687e7ba49c1..a6b6e20731c 100644
>> --- a/gcc/config/bpf/bpf.cc
>> +++ b/gcc/config/bpf/bpf.cc
>> @@ -835,6 +835,11 @@ bpf_print_register (FILE *file, rtx op, int code)
>>  }
>>  }
>>
>> +/* Variable defined to implement 'M' operand modifier for the special cases
>> +   where the parentheses should not be printed surrounding a memory address
>> +   operand. */
>> +static bool no_parentheses_mem_operand;
>> +
>>  /* Print an instruction operand.  This function is called in the macro
>> PRINT_OPERAND defined in bpf.h */
>>
>> @@ -847,6 +852,7 @@ bpf_print_operand (FILE *file, rtx op, int code)
>>bpf_print_register (file, op, code);
>>break;
>>  case MEM:
>> +  no_parentheses_mem_operand = (code == 'M');
>>output_address (GET_MODE (op), XEXP (op, 0));
>>break;
>>  case CONST_DOUBLE:
>> @@ -891,6 +897,9 @@ bpf_print_operand (FILE *file, rtx op, int code)
>>  }
>>  }
>>
>> +#define PAREN_OPEN  (asm_dialect == ASM_NORMAL ? "[" : 
>> no_parentheses_mem_operand ? "" : "(")
>> +#define PAREN_CLOSE (asm_dialect == ASM_NORMAL ? "]" : 
>> no_parentheses_mem_operand ? "" : ")")
>> +
>>  /* Print an operand which is an address.  This function should handle
>> any legit address, as accepted by bpf_legitimate_address_p, and
>> also addresses that are valid in CALL instructions.
>> @@ -904,9 +913,9 @@ bpf_print_operand_address (FILE *file, rtx addr)
>>switch (GET_CODE (addr))
>>  {
>>  case REG:
>> -  fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
>> +  fprintf (file, "%s", PAREN_OPEN);
>>bpf_print_register (file, addr, 0);
>> -  fprintf (file, asm_dialect == ASM_NORMAL ? "+0]" : "+0)");
>> +  fprintf (file, "+0%s", PAREN_CLOSE);
>>break;
>>  case PLUS:
>>{
>> @@ -923,14 +932,14 @@ bpf_print_operand_address (FILE *file, rtx addr)
>>  || (GET_CODE (op1) == UNSPEC
>>  && XINT (op1, 1) == UNSPEC_CORE_RELOC)))
>>{
>> -fprintf (file, asm_dialect == ASM_NORMAL ? "[" : "(");
>> +fprintf (file, "%s", PAREN_OPEN);
>>  bpf_print_register (file, op0, 0);
>>  fprintf (file, "+");
>>  if (GET_CODE (op1) == UNSPEC)
>>output_addr_const (file, XVECEXP (op1, 0, 0));
>>  else
>>output_addr_const (file, op1);
>> -fprintf (file, asm_dialect == ASM_NORMAL ? "]" : ")");
>> +fprintf (file, "%s", PAREN_CLOSE);
>>}
>>  else
>>  

[PATCH] testsuite: Allow vst1 instruction

2024-07-19 Thread Torbjörn SVENSSON
Ok for trunk and releases/gcc-14?

--

On Cortex-A7 with -mfloat-abi=hard and -mfpu=neon, the following
instructions are generated for the test case:

vldrd16, .L3
vst1.32 {d16}, [r0]

For Cortex-A7 with -mfloat-abi=soft, it's instead:

movsr2, #1
movsr3, #0
strdr2, r3, [r0]

Both these are valid and should not cause the test to fail.
For Cortex-M0/3/4/7/33, they all generate the same instructions as
Cortex-A7 with -mfloat-abi=soft.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr40457-2.c: Add vst1 to allowed instructions.

Signed-off-by: Torbjörn SVENSSON 
---
 gcc/testsuite/gcc.target/arm/pr40457-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr40457-2.c 
b/gcc/testsuite/gcc.target/arm/pr40457-2.c
index 31624d35127..6aed42a4fbc 100644
--- a/gcc/testsuite/gcc.target/arm/pr40457-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr40457-2.c
@@ -7,4 +7,4 @@ void foo(int* p)
   p[1] = 0;
 }
 
-/* { dg-final { scan-assembler "strd|stm" } } */
+/* { dg-final { scan-assembler "strd|stm|vst1" } } */
-- 
2.25.1



Re: [PATCH] arm: Update fp16-aapcs-[24].c after insn_propagation patch

2024-07-19 Thread Richard Earnshaw (lists)
On 11/07/2024 19:31, Richard Sandiford wrote:
> These tests used to generate:
> 
> bl  swap
> ldr r2, [sp, #4]
> mov r0, r2  @ __fp16
> 
> but g:9d20529d94b23275885f380d155fe8671ab5353a means that we can
> load directly into r0:
> 
> bl  swap
> ldrhr0, [sp, #4]@ __fp16
> 
> This patch updates the tests to "defend" this change.
> 
> While there, the scans include:
> 
> mov\tr1, r[03]}
> 
> But if the spill of r2 occurs first, there's no real reason why
> r2 couldn't be used as the temporary, instead r3.
> 
> The patch tries to update the scans while preserving the spirit
> of the originals.
> 
> Spot-checked with armv8l-unknown-linux-gnueabihf.  OK to install?
> 
> Richard

OK.

I'm not sure that these tests are really doing very much; it would probably be 
better if they could be rewritten using the gcc.target/arm/aapcs framework.  
But that's for another day.

R.

> 
> 
> gcc/testsuite/
>   * gcc.target/arm/fp16-aapcs-2.c: Expect the return value to be
>   loaded directly from the stack.  Test that the swap generates
>   two moves out of r0/r1 and two moves in.
>   * gcc.target/arm/fp16-aapcs-4.c: Likewise.
> ---
>  gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c | 8 +---
>  gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c | 8 +---
>  2 files changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c 
> b/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c
> index c34387f5782..12d20560f53 100644
> --- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c
> +++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c
> @@ -16,6 +16,8 @@ F (__fp16 a, __fp16 b, __fp16 c)
>return c;
>  }
>  
> -/* { dg-final { scan-assembler-times {mov\tr[0-9]+, r[0-2]} 3 } }  */
> -/* { dg-final { scan-assembler-times {mov\tr1, r[03]} 1 } }  */
> -/* { dg-final { scan-assembler-times {mov\tr0, r[0-9]+} 2 } }  */
> +/* The swap must include two moves out of r0/r1 and two moves in.  */
> +/* { dg-final { scan-assembler-times {mov\tr[0-9]+, r[01]} 2 } }  */
> +/* { dg-final { scan-assembler-times {mov\tr[01], r[0-9]+} 2 } }  */
> +/* c should be spilled around the call.  */
> +/* { dg-final { scan-assembler {str\tr2, ([^\n]*).*ldrh\tr0, \1} { target 
> arm_little_endian } } } */
> diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c 
> b/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c
> index daac29137ae..09fa64aa494 100644
> --- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c
> +++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c
> @@ -16,6 +16,8 @@ F (__fp16 a, __fp16 b, __fp16 c)
>return c;
>  }
>  
> -/* { dg-final { scan-assembler-times {mov\tr[0-9]+, r[0-2]} 3 } }  */
> -/* { dg-final { scan-assembler-times {mov\tr1, r[03]} 1 } }  */
> -/* { dg-final { scan-assembler-times {mov\tr0, r[0-9]+} 2 } }  */
> +/* The swap must include two moves out of r0/r1 and two moves in.  */
> +/* { dg-final { scan-assembler-times {mov\tr[0-9]+, r[01]} 2 } }  */
> +/* { dg-final { scan-assembler-times {mov\tr[01], r[0-9]+} 2 } }  */
> +/* c should be spilled around the call.  */
> +/* { dg-final { scan-assembler {str\tr2, ([^\n]*).*ldrh\tr0, \1} { target 
> arm_little_endian } } } */



Re: [PATCH] c++: alias of alias tmpl with dependent attrs [PR115897]

2024-07-19 Thread Patrick Palka
On Thu, 18 Jul 2024, Jason Merrill wrote:

> On 7/18/24 12:45 PM, Patrick Palka wrote:
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does thi look
> > OK for trunk/14?
> > 
> > -- >8 --
> > 
> > As a followup of r15-2047-g7954bb4fcb6fa8, we also need to consider
> > dependent attributes when recursing into a non-template alias that names
> > a dependent alias template specialization (and so STF_STRIP_DEPENDENT
> > is set), otherwise in the first testcase below we undesirably strip B
> > all the way to T instead of to A.
> > 
> > We also need to move the typedef recursion case of strip_typedefs up to
> > get checked before the compound type recursion cases.  Otherwise for C
> > below (which ultimately aliases T*) we end up stripping it to T* instead
> > of to A because the POINTER_TYPE recursion dominates the typedef
> > recursion.  It also means we issue an unexpected extra error in the
> > third testcase below.
> > 
> > Ideally we would also want to consider dependent attributes on
> > non-template aliases, so that we accept the second testcase below, but
> > making that work correctly would require broader changes to e.g.
> > spec_hasher which currently assumes all non-template aliases are
> > stripped and hence it'd conflate the dependent specializations A
> > and A even if we didn't strip B.
> 
> Wouldn't that just be a matter of changing structural_comptypes to consider
> dependent attributes as well as dependent specializations?

Pretty much, it seems.  ISTM we should check dependent attributes even
when !comparing_dependent_aliases since they affect type identity rather
than just SFINAE behavior.

> 
> Or better, adding attributes to dependent_alias_template_spec_p (and changing
> its name)?  It seems like other callers would also benefit from that change.

I ended up adding a new predicate opaque_alias_p separate from
dependent_alias_template_spec_p since ISTM we need to call it from
there and from alias_template_specialization_p to avoid looking through
such aliases.

So opaque_alias_p checks for type identity of an alias, whereas
dependent_alias_template_spec_p more broadly checks for SFINAE identity.

Something like the following (as an incremental patch on top of the
previous one, to consider separately for backportings since it's riskier):

-- >8 --

Subject: [PATCH 2/1] c++: non-template alias with dependent attributes 
[PR115897]

PR c++/115897

gcc/cp/ChangeLog:

* cp-tree.h (opaque_alias_p): Declare.
* decl.cc (start_decl): Use structural equality for an
opaque alias.
* pt.cc (opaque_alias_p): Define.
(alias_template_specialization_p): Don't look through an
opaque alias.
(complex_alias_template_p): Use opaque_alias_p instead of
any_dependent_template_arguments_p directly.
(dependent_alias_template_spec_p): Don't look through an
opaque alias.
(get_underlying_template): Use opaque_alias_p instead of
any_dependent_template_arguments_p.
* tree.cc (strip_typedefs): Don't strip an opaque alias.
* typeck.cc (structural_comptypes): Compare declaration
attributes for an opaque alias.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/alias-decl-79.C: Remove xfails.
---
 gcc/cp/cp-tree.h   |  1 +
 gcc/cp/decl.cc |  3 ++
 gcc/cp/pt.cc   | 23 
 gcc/cp/tree.cc |  7 ++--
 gcc/cp/typeck.cc   | 42 ++
 gcc/testsuite/g++.dg/cpp0x/alias-decl-79.C | 16 -
 6 files changed, 61 insertions(+), 31 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index c6f102564ce..f2a64b63133 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7622,6 +7622,7 @@ extern tree instantiate_non_dependent_or_null   (tree);
 extern bool variable_template_specialization_p  (tree);
 extern bool alias_type_or_template_p(tree);
 enum { nt_opaque = false, nt_transparent = true };
+extern bool opaque_alias_p (const_tree);
 extern tree alias_template_specialization_p (const_tree, bool);
 extern tree dependent_alias_template_spec_p (const_tree, bool);
 extern tree get_template_parm_object   (tree expr, tree mangle);
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 66e8a973cce..755701fd8aa 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -6144,6 +6144,9 @@ start_decl (const cp_declarator *declarator,
   if (decl == error_mark_node)
 return error_mark_node;
 
+  if (opaque_alias_p (TREE_TYPE (decl)))
+SET_TYPE_STRUCTURAL_EQUALITY (TREE_TYPE (decl));
+
   if (VAR_P (decl)
   && DECL_NAMESPACE_SCOPE_P (decl) && !TREE_PUBLIC (decl) && !was_public
   && !DECL_THIS_STATIC (decl) && !DECL_ARTIFICIAL (decl)
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 0620c8c023a..4d4a5cef92c 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -6508,6 +6508,19 @@ alias_type_or_template

Re: [PATCH] RISC-V: Support __mulbc3 and __divbc3 in libgcc for __bf16

2024-07-19 Thread Jeff Law




On 7/18/24 9:13 PM, Xiao Zeng wrote:

2024-07-18 01:53  Jeff Law  wrote:




On 7/17/24 2:01 AM, Xiao Zeng wrote:

libgcc/ChangeLog:

* Makefile.in: Support __divbc3 and __mulbc3.
* libgcc2.c (if): Support BC mode for __bf16.
(defined): Ditto.
(MTYPE): Ditto.
(CTYPE): Ditto.
(AMTYPE): Ditto.
(MODE): Ditto.
(CEXT): Ditto.
(NOTRUNC): Ditto.
* libgcc2.h (LIBGCC2_HAS_BF_MODE): Ditto.
(__attribute__): Ditto.
(__divbc3): Add __divbc3 for __bf16.
(__mulbc3): Add __mulbc3 for __bf16.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/bf16-mulbc3-divbc3.c: New test.

It looks like this failed pre-commit testing:


https://patchwork.sourceware.org/project/gcc/patch/20240717080159.34038-1-zengx...@eswincomputing.com/

Yes, I will fix this issue in the V2 patch.

Thanks.




Jeff

I'm sorry for replying to this email so late. I noticed that CI build has 
failed. After
inspection, it was found that several bf16 related __builtin_*bf16 symbols were 
not generated.

After passing the ESWIN internal CI test, I will push the V2 patch.
No need to be sorry.  We're all juggling lots of work and get to things 
as fast as we can.


jeff


Re: [PATCH v10 2/3] C: Implement musttail attribute for returns

2024-07-19 Thread Marek Polacek
On Thu, Jul 18, 2024 at 04:18:56PM -0700, Andi Kleen wrote:
> > > > > +  set_musttail_on_return (retval, xloc, musttail_p);
> > > > > +
> > > > >if (retval)
> > > > >  {
> > > > >tree semantic_type = NULL_TREE;
> > > > 
> > > > Is it deliberate that set_musttail_on_return is called outside the
> > > > if (retval) block?  If it can be moved into it, set_musttail_on_return
> > > > can be simplified to assume that retval is always non-null.
> > > 
> > > Yes it can be removed.
> 
> Actually I was wrong here, after double checking. The !retval case is
> needed to diagnose a [[musttail]] set on a plain return (which is not
> allowed following the clang spec)
> 
> So the call has to be outside the check.
> 
> The C frontend did it correctly, but the C++ part did not (fixed now)

Ah, fair enough.  Just make sure we test it somewhere, thanks.

Marek



[PATCH] rtl-optimization/116002 - cselib hash is bad

2024-07-19 Thread Richard Biener
The following addresses the bad hash function of cselib which uses
integer plus for merging.  This causes a huge number of collisions
for the testcase in the PR and thus very large compile-time.

The following rewrites it to use inchash, eliding duplicate mixing
of RTX code and mode in some cases and more consistently avoiding
a return value of zero as well as treating zero as fatal.

For cselib_hash_plus_const_int this removes the apparent attempt
of making sure to hash the same as a PLUS as cselib_hash_rtx makes
sure to dispatch to cselib_hash_plus_const_int consistently.

This reduces compile-time for the testcase in the PR from unknown
to 22s and for a reduced testcase from 73s to 9s.  There's another
pending patchset to improve the speed of inchash mixing, but it's
not in the profile for this testcase (PTA pops up now).

The generated code is equal.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Any objections?

PR rtl-optimization/116002
* cselib.cc (cselib_hash_rtx): Use inchash to get proper mixing.
Consistently avoid a zero return value when hashing successfully.
Consistently treat a zero hash value from recursing as fatal.
(cselib_hash_plus_const_int): Likewise.
---
 gcc/cselib.cc | 195 ++
 1 file changed, 101 insertions(+), 94 deletions(-)

diff --git a/gcc/cselib.cc b/gcc/cselib.cc
index cbaab7d515c..6b4cdf266f8 100644
--- a/gcc/cselib.cc
+++ b/gcc/cselib.cc
@@ -1266,14 +1266,13 @@ cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int 
create,
   if (c == 0)
 return e->hash;
 
-  unsigned hash = (unsigned) PLUS + (unsigned) GET_MODE (x);
-  hash += e->hash;
-  unsigned int tem_hash = (unsigned) CONST_INT + (unsigned) VOIDmode;
-  tem_hash += ((unsigned) CONST_INT << 7) + (unsigned HOST_WIDE_INT) c;
-  if (tem_hash == 0)
-tem_hash = (unsigned int) CONST_INT;
-  hash += tem_hash;
-  return hash ? hash : 1 + (unsigned int) PLUS;
+  inchash::hash hash;
+  hash.add_int (PLUS);
+  hash.add_int (GET_MODE (x));
+  hash.merge_hash (e->hash);
+  hash.add_hwi (c);
+
+  return hash.end () ? hash.end () : 1 + (unsigned int) PLUS;
 }
 
 /* Hash an rtx.  Return 0 if we couldn't hash the rtx.
@@ -1306,10 +1305,11 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
   int i, j;
   enum rtx_code code;
   const char *fmt;
-  unsigned int hash = 0;
+  inchash::hash hash;
 
   code = GET_CODE (x);
-  hash += (unsigned) code + (unsigned) GET_MODE (x);
+  hash.add_int (code);
+  hash.add_int (GET_MODE (x));
 
   switch (code)
 {
@@ -1326,19 +1326,16 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
   return e->hash;
 
 case DEBUG_EXPR:
-  hash += ((unsigned) DEBUG_EXPR << 7)
- + DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x));
-  return hash ? hash : (unsigned int) DEBUG_EXPR;
+  hash.add_int (DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x)));
+  return hash.end () ? hash.end() : (unsigned int) DEBUG_EXPR;
 
 case DEBUG_IMPLICIT_PTR:
-  hash += ((unsigned) DEBUG_IMPLICIT_PTR << 7)
- + DECL_UID (DEBUG_IMPLICIT_PTR_DECL (x));
-  return hash ? hash : (unsigned int) DEBUG_IMPLICIT_PTR;
+  hash.add_int (DECL_UID (DEBUG_IMPLICIT_PTR_DECL (x)));
+  return hash.end () ? hash.end () : (unsigned int) DEBUG_IMPLICIT_PTR;
 
 case DEBUG_PARAMETER_REF:
-  hash += ((unsigned) DEBUG_PARAMETER_REF << 7)
- + DECL_UID (DEBUG_PARAMETER_REF_DECL (x));
-  return hash ? hash : (unsigned int) DEBUG_PARAMETER_REF;
+  hash.add_int (DECL_UID (DEBUG_PARAMETER_REF_DECL (x)));
+  return hash.end () ? hash.end () : (unsigned int) DEBUG_PARAMETER_REF;
 
 case ENTRY_VALUE:
   /* ENTRY_VALUEs are function invariant, thus try to avoid
@@ -1347,51 +1344,49 @@ cselib_hash_rtx (rtx x, int create, machine_mode 
memmode)
 ENTRY_VALUE hash would depend on the current value
 in some register or memory.  */
   if (REG_P (ENTRY_VALUE_EXP (x)))
-   hash += (unsigned int) REG
-   + (unsigned int) GET_MODE (ENTRY_VALUE_EXP (x))
-   + (unsigned int) REGNO (ENTRY_VALUE_EXP (x));
+   hash.add_int ((unsigned int) REG
+ + (unsigned int) GET_MODE (ENTRY_VALUE_EXP (x))
+ + (unsigned int) REGNO (ENTRY_VALUE_EXP (x)));
   else if (MEM_P (ENTRY_VALUE_EXP (x))
   && REG_P (XEXP (ENTRY_VALUE_EXP (x), 0)))
-   hash += (unsigned int) MEM
-   + (unsigned int) GET_MODE (XEXP (ENTRY_VALUE_EXP (x), 0))
-   + (unsigned int) REGNO (XEXP (ENTRY_VALUE_EXP (x), 0));
+   hash.add_int ((unsigned int) MEM
+ + (unsigned int) GET_MODE (XEXP (ENTRY_VALUE_EXP (x), 0))
+ + (unsigned int) REGNO (XEXP (ENTRY_VALUE_EXP (x), 0)));
   else
-   hash += cselib_hash_rtx (ENTRY_VALUE_EXP (x), create, memmode);
-  return hash ? hash : (unsigned int) ENTRY_VALUE;
+   hash.add_int (c

Re: [PATCH] testsuite: Allow vst1 instruction

2024-07-19 Thread Richard Earnshaw (lists)
On 19/07/2024 14:07, Torbjörn SVENSSON wrote:
> Ok for trunk and releases/gcc-14?
> 
> --
> 
> On Cortex-A7 with -mfloat-abi=hard and -mfpu=neon, the following
> instructions are generated for the test case:
> 
> vldrd16, .L3
> vst1.32 {d16}, [r0]
> 
> For Cortex-A7 with -mfloat-abi=soft, it's instead:
> 
> movsr2, #1
> movsr3, #0
> strdr2, r3, [r0]
> 
> Both these are valid and should not cause the test to fail.
> For Cortex-M0/3/4/7/33, they all generate the same instructions as
> Cortex-A7 with -mfloat-abi=soft.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/pr40457-2.c: Add vst1 to allowed instructions.
> 
> Signed-off-by: Torbjörn SVENSSON 
> ---
>  gcc/testsuite/gcc.target/arm/pr40457-2.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arm/pr40457-2.c 
> b/gcc/testsuite/gcc.target/arm/pr40457-2.c
> index 31624d35127..6aed42a4fbc 100644
> --- a/gcc/testsuite/gcc.target/arm/pr40457-2.c
> +++ b/gcc/testsuite/gcc.target/arm/pr40457-2.c
> @@ -7,4 +7,4 @@ void foo(int* p)
>p[1] = 0;
>  }
>  
> -/* { dg-final { scan-assembler "strd|stm" } } */
> +/* { dg-final { scan-assembler "strd|stm|vst1" } } */

Whilst the code generated is functionally correct, I'm not sure the compiler 
should be generating that.  This new code needs more code space (the constant 
isn't shown, but it needs 8 bytes in the literal pool).  I suspect this means 
the SLP cost model for arm is in need of attention.

R.


Re: [PATCH] testsuite: Avoid running incompatible Arm tests

2024-07-19 Thread Richard Earnshaw (lists)
On 12/07/2024 09:02, Torbjörn SVENSSON wrote:
> Ok for trunk and releases/gcc-14?
> 
> --
> 
> Overriding the -mpfu and -mfloat-abi might be incompatible with selected
> multilib. As a result, verify that the current multilib is compatible
> with the effective target without changing the -mfpu or -mfloat-abi
> options.
> 
> gcc/testsuite/ChangeLog:
> 
>   * lib/target-supports.exp
>   (check_effective_target_arm_hard_vfp_ok): Check -mfpu value.
>   (check_effective_target_arm_fp16_alternative_ok_nocache):
>   Reuse check_effective_target_arm_fp16_ok.
>   (check_effective_target_arm_fp16_none_ok_nocache): Likewise.
>   (check_effective_target_arm_v8_neon_ok_nocache): Align checks
>   with skeleton from check_effective_target_arm_fp16_ok_nocache.
>   (check_effective_target_arm_neonv2_ok_nocache): Likewise.> 

Hmm, this is a tricky one.  I really want to be moving away from any value 
being passed to -mfpu other than "auto" and this looks like it's moving in the 
wrong direction.

Which tests does this affect?

R.

> Signed-off-by: Torbjörn SVENSSON 
> Co-authored-by: Yvan ROUX 
> ---
>  gcc/testsuite/lib/target-supports.exp | 119 ++
>  1 file changed, 83 insertions(+), 36 deletions(-)
> 
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index f001c28072f..f90b80bb495 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -4829,6 +4829,7 @@ proc check_effective_target_arm_v8_vfp_ok {} {
>  
>  proc check_effective_target_arm_hard_vfp_ok { } {
>  if { [check_effective_target_arm32]
> +  && ! [check-flags [list "" { *-*-* } { "-mfpu=*" } { "-mfpu=vfp" }]]
>&& ! [check-flags [list "" { *-*-* } { "-mfloat-abi=*" } { 
> "-mfloat-abi=hard" }]] } {
>   return [check_no_compiler_messages arm_hard_vfp_ok executable {
>   int main() { return 0;}
> @@ -5405,11 +5406,12 @@ proc 
> check_effective_target_arm_fp16_alternative_ok_nocache { } {
>   # Not supported by the target system.
>   return 0
>  }
> +global et_arm_fp16_flags
>  global et_arm_fp16_alternative_flags
>  set et_arm_fp16_alternative_flags ""
> -if { [check_effective_target_arm32] } {
> - foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
> -"-mfpu=neon-fp16 -mfloat-abi=softfp"} {
> +
> +if { [check_effective_target_arm32] && 
> [check_effective_target_arm_fp16_ok] } {
> + foreach flags [list "" $et_arm_fp16_flags] {
>   if { [check_no_compiler_messages_nocache \
> arm_fp16_alternative_ok object {
>   #if !defined (__ARM_FP16_FORMAT_ALTERNATIVE) || ! (__ARM_FP & 2)
> @@ -5434,9 +5436,9 @@ proc check_effective_target_arm_fp16_alternative_ok { } 
> {
>  # format.  Some multilibs may be incompatible with the options needed.
>  
>  proc check_effective_target_arm_fp16_none_ok_nocache { } {
> -if { [check_effective_target_arm32] } {
> - foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
> -"-mfpu=neon-fp16 -mfloat-abi=softfp"} {
> +global et_arm_fp16_flags
> +if { [check_effective_target_arm32] && 
> [check_effective_target_arm_fp16_ok] } {
> + foreach flags [list "" $et_arm_fp16_flags] {
>   if { [check_no_compiler_messages_nocache \
> arm_fp16_none_ok object {
>   #if defined (__ARM_FP16_FORMAT_ALTERNATIVE)
> @@ -5467,23 +5469,46 @@ proc check_effective_target_arm_fp16_none_ok { } {
>  proc check_effective_target_arm_v8_neon_ok_nocache { } {
>  global et_arm_v8_neon_flags
>  set et_arm_v8_neon_flags ""
> -if { [check_effective_target_arm32] } {
> - foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp-armv8" 
> "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
> - if { [check_no_compiler_messages_nocache arm_v8_neon_ok object {
> - #if __ARM_ARCH < 8
> - #error not armv8 or later
> - #endif
> - #include "arm_neon.h"
> - void
> - foo ()
> - {
> -   __asm__ volatile ("vrintn.f32 q0, q0");
> - }
> - } "$flags -march=armv8-a"] } {
> - set et_arm_v8_neon_flags $flags
> - return 1
> - }
> +if { ! [check_effective_target_arm32] } {
> + return 0;
> +}
> +if [check-flags \
> + [list "" { *-*-* } { "-mfpu=*" } \
> +  { "-mfpu=*fp-armv8*" } ]] {
> + # Multilib flags would override -mfpu.
> + return 0
> +}
> +if [check-flags [list "" { *-*-* } { "-mfloat-abi=soft" } { "" } ]] {
> + # Must generate floating-point instructions.
> + return 0
> +}
> +if [check_effective_target_arm_hf_eabi] {
> + # Use existing float-abi and force an fpu which supports fp16
> + set et_arm_v8_neon_flags "-mfpu=neon-fp-armv8"
> + return 1;
> +}
> +if [check-flags [list "" { *-*-* 

Re: [PATCH] c++: normalizing ttp parm constraints [PR115656]

2024-07-19 Thread Patrick Palka
On Fri, Jul 5, 2024 at 12:18 PM Patrick Palka  wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
> for trunk/14 and perhaps 13?
>
> Alternatively we can set current_template_parms from weakly_subsumes
> instead, who has only one caller anyway.

Ping.

>
> -- >8 --
>
> Here we normalize the constraint same_as for the first
> time during constraint subsumption checking of B / TT as part of ttp
> coercion.  During this normalization the set of in-scope template
> parameters i.e. current_template_parms is empty, which tricks the
> satisfaction cache into thinking that the satisfaction value of the
> constraint is independent of its template parameters, and we incorrectly
> conflate the satisfaction value with auto = bool vs auto = long and
> accept the specialization A.
>
> This patch fixes this by setting current_template_parms appropirately
> during subsumption checking.
>
> PR c++/115656
>
> gcc/cp/ChangeLog:
>
> * pt.cc (is_compatible_template_arg): Set current_template_parms
> around the call to weakly_subsumes.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp2a/concepts-ttp7.C: New test.
> ---
>  gcc/cp/pt.cc   |  4 
>  gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C | 12 
>  2 files changed, 16 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C
>
> diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> index 017cc7fd0ab..1f6553790a5 100644
> --- a/gcc/cp/pt.cc
> +++ b/gcc/cp/pt.cc
> @@ -8493,6 +8493,10 @@ is_compatible_template_arg (tree parm, tree arg, tree 
> args)
>  return false;
>  }
>
> +  /* Normalization needs to know the effective set of in-scope
> + template parameters.  */
> +  auto ctp = make_temp_override (current_template_parms,
> +DECL_TEMPLATE_PARMS (arg));
>return weakly_subsumes (parm_cons, arg);
>  }
>
> diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C 
> b/gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C
> new file mode 100644
> index 000..bc123ecf75e
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C
> @@ -0,0 +1,12 @@
> +// PR c++/115656
> +// { dg-do compile { target c++20 } }
> +
> +template concept same_as = __is_same(T, U);
> +
> +template U, template> class TT>
> +struct A { };
> +
> +template> class B;
> +
> +A a1;
> +A a2; // { dg-error "constraint failure" }
> --
> 2.45.2.746.g06e570c0df
>



[PATCH v1 0/3][libgcc] store signing key and signing method in DWARF _Unwind_FrameState

2024-07-19 Thread Matthieu Longo
This patch series is only a refactoring of the existing implementation of PAuth 
and returned-address signing. The existing behavior is preserved.

1. aarch64: store signing key and signing method in DWARF _Unwind_FrameState

_Unwind_FrameState already contains several CIE and FDE information (see the 
attributes below the comment "The information we care about from the CIE/FDE" 
in libgcc/unwind-dw2.h).
The patch aims at moving the information from DWARF CIE (signing key stored in 
the augmentation string) and FDE (the used signing method) into 
_Unwind_FrameState along the already-stored CIE and FDE information.
Note: those information have to be saved in frame_state_reg_info instead of 
_Unwind_FrameState as they need to be savable by DW_CFA_remember_state and 
restorable by DW_CFA_restore_state, that both rely on the attribute "prev".
Those new information in _Unwind_FrameState simplifies the look-up of the 
signing key when the return address is demangled. It also allows future signing 
methods to be easily added.
_Unwind_FrameState is not a part of the public API of libunwind, so the change 
is backward compatible.

A new architecture-specific handler MD_ARCH_EXTENSION_FRAME_INIT allows to 
reset values in the frame state and unwind context if needed by the 
architecture extension before changing the frame state to the caller context.
A new architecture-specific handler MD_ARCH_EXTENSION_CIE_AUG_HANDLER isolates 
the architecture-specific augmentation strings in AArch64 backend, and allows 
others architectures to reuse augmentation strings that would have clashed with 
AArch64 DWARF extensions.
aarch64_demangle_return_addr, DW_CFA_AARCH64_negate_ra_state and 
DW_CFA_val_expression cases in libgcc/unwind-dw2-execute_cfa.h were documented 
to clarify where the value of the RA state register is stored (FS and CONTEXT 
respectively).

2. libgcc: hide CIE and FDE data for DWARF architecture extensions behind a 
handler.

This patch provides a new handler MD_ARCH_FRAME_STATE_T to hide an 
architecture-specific structure containing CIE and FDE data related to DWARF 
architecture extensions.
Hiding the architecture-specific attributes behind a handler has the following 
benefits:
1. isolating those data from the generic ones in _Unwind_FrameState
2. avoiding casts to custom types.
3. preserving typing information when debugging with GDB, and so 
facilitating their printing.

This approach required to add a new header md-unwind-def.h included at the top 
of libgcc/unwind-dw2.h, and redirecting to the corresponding architecture 
header via a symbolic link.
An obvious drawback is the increase in complexity with macros, and headers. It 
also caused a split of architecture definitions between md-unwind-def.h (types 
definitions used in unwind-dw2.h) and md-unwind.h (local types definitions and 
handlers implementations).
The naming of md-unwind.h with .h extension is a bit misleading as the file is 
only included in the middle of unwind-dw2.c. Changing this naming would require 
modification of others backends, which I prefered to abstain from.
Overall the benefits are worth the added complexity from my perspective.

3. libgcc: update configure (regenerated by autoreconf)

Regenerate the build files.


## Testing

Those changes were testing by covering the 3 following cases:
- backtracing.
- exception handling in a C++ program.
- gcc/testsuite/gcc.target/aarch64/pr104689.c: pac-ret with unusual DWARF [1]

Regression tested on aarch64-unknown-linux-gnu, and no regression found.

[1]: https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594414.html


Ok for master? I don't have commit access so I need someone to commit on my 
behalf.

Regards,
Matthieu.


Matthieu Longo (3):
  aarch64: store signing key and signing method in DWARF
_Unwind_FrameState
  libgcc: hide CIE and FDE data for DWARF architecture extensions behind
a handler.
  libgcc: update configure (regenerated by autoreconf)

 libgcc/Makefile.in |   6 +-
 libgcc/config.host |  13 +-
 libgcc/config/aarch64/aarch64-unwind-def.h |  41 ++
 libgcc/config/aarch64/aarch64-unwind.h | 150 +
 libgcc/configure   |   2 +
 libgcc/configure.ac|   1 +
 libgcc/unwind-dw2-execute_cfa.h|  34 +++--
 libgcc/unwind-dw2.c|  19 ++-
 libgcc/unwind-dw2.h|  19 ++-
 9 files changed, 233 insertions(+), 52 deletions(-)
 create mode 100644 libgcc/config/aarch64/aarch64-unwind-def.h

-- 
2.45.2



[PATCH v1 1/3] aarch64: store signing key and signing method in DWARF _Unwind_FrameState

2024-07-19 Thread Matthieu Longo
This patch is only a refactoring of the existing implementation
of PAuth and returned-address signing. The existing behavior is
preserved.

_Unwind_FrameState already contains several CIE and FDE information
(see the attributes below the comment "The information we care
about from the CIE/FDE" in libgcc/unwind-dw2.h).
The patch aims at moving the information from DWARF CIE (signing
key stored in the augmentation string) and FDE (the used signing
method) into _Unwind_FrameState along the already-stored CIE and
FDE information.
Note: those information have to be saved in frame_state_reg_info
instead of _Unwind_FrameState as they need to be savable by
DW_CFA_remember_state and restorable by DW_CFA_restore_state, that
both rely on the attribute "prev".

Those new information in _Unwind_FrameState simplifies the look-up
of the signing key when the return address is demangled. It also
allows future signing methods to be easily added.

_Unwind_FrameState is not a part of the public API of libunwind,
so the change is backward compatible.

A new architecture-specific handler MD_ARCH_EXTENSION_FRAME_INIT
allows to reset values (if needed) in the frame state and unwind
context before changing the frame state to the caller context.

A new architecture-specific handler MD_ARCH_EXTENSION_CIE_AUG_HANDLER
isolates the architecture-specific augmentation strings in AArch64
backend, and allows others architectures to reuse augmentation
strings that would have clashed with AArch64 DWARF extensions.

aarch64_demangle_return_addr, DW_CFA_AARCH64_negate_ra_state and
DW_CFA_val_expression cases in libgcc/unwind-dw2-execute_cfa.h
were documented to clarify where the value of the RA state register
is stored (FS and CONTEXT respectively).

libgcc/ChangeLog:

  * config/aarch64/aarch64-unwind.h
  (AARCH64_DWARF_RA_STATE_MASK): The mask for RA state register.
  (aarch64_RA_signing_method_t): The diversifiers used to sign a
  function's return address.
  (aarch64_pointer_auth_key): The key used to sign a function's
  return address.
  (aarch64_cie_signed_with_b_key): Deleted as the signing key is
  available now in _Unwind_FrameState.
  (MD_ARCH_EXTENSION_CIE_AUG_HANDLER): New CIE augmentation string
  handler for architecture extensions.
  (MD_ARCH_EXTENSION_FRAME_INIT): New architecture-extension
  initialization routine for DWARF frame state and context before
  execution of DWARF instructions.
  (aarch64_context_RA_state_get): Read RA state register from CONTEXT.
  (aarch64_RA_state_get): Read RA state register from FS.
  (aarch64_RA_state_set): Write RA state register into FS.
  (aarch64_RA_state_toggle): Toggle RA state register in FS.
  (aarch64_cie_aug_handler): Handler AArch64 augmentation strings.
  (aarch64_arch_extension_frame_init): Initialize defaults for the
  signing key (PAUTH_KEY_A), and RA state register (RA_no_signing).
  (aarch64_demangle_return_addr): Rely on the frame registers and
  the signing_key attribute in _Unwind_FrameState.
  * unwind-dw2-execute_cfa.h:
  Use the right alias DW_CFA_AARCH64_negate_ra_state for __aarch64__
  instead of DW_CFA_GNU_window_save.
  (DW_CFA_AARCH64_negate_ra_state): Save the signing method in RA
  state register. Toggle RA state register without resetting 'how'
  to REG_UNSAVED.
  * unwind-dw2.c:
  (extract_cie_info): Save the signing key in the current
  _Unwind_FrameState while parsing the augmentation data.
  (uw_frame_state_for): Reset some attributes related to architecture
  extensions in _Unwind_FrameState.
  (uw_update_context): Move authentication code to AArch64 unwinding.
  * unwind-dw2.h (enum register_rule): Give a name to the existing
  enum for the register rules, and replace 'unsigned char' by 'enum
  register_rule' to facilitate debugging in GDB.
  (_Unwind_FrameState): Add a new architecture-extension attribute
  to store the signing key.
---
 libgcc/config/aarch64/aarch64-unwind.h | 154 -
 libgcc/unwind-dw2-execute_cfa.h|  34 --
 libgcc/unwind-dw2.c|  19 ++-
 libgcc/unwind-dw2.h|  17 ++-
 4 files changed, 175 insertions(+), 49 deletions(-)

diff --git a/libgcc/config/aarch64/aarch64-unwind.h 
b/libgcc/config/aarch64/aarch64-unwind.h
index daf96624b5e..cc225a7e207 100644
--- a/libgcc/config/aarch64/aarch64-unwind.h
+++ b/libgcc/config/aarch64/aarch64-unwind.h
@@ -25,55 +25,155 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #if !defined (AARCH64_UNWIND_H) && !defined (__ILP32__)
 #define AARCH64_UNWIND_H
 
-#define DWARF_REGNUM_AARCH64_RA_STATE 34
+#include "ansidecl.h"
+#include 
+
+#define AARCH64_DWARF_REGNUM_RA_STATE 34
+#define AARCH64_DWARF_RA_STATE_MASK   0x1
+
+/* The diversifiers used to sign a function's return address. */
+typedef enum
+{
+  AARCH64_RA_no_signing = 0x0,
+  AARCH64_RA_signing_SP = 0x1,
+} __attribute__((packed)) aarch64_RA_signing_method_t;
+
+/* The key used to sign a function's return address. */
+typedef enum {
+  AARCH64_PAUTH_KE

[PATCH v1 2/3] libgcc: hide CIE and FDE data for DWARF architecture extensions behind a handler.

2024-07-19 Thread Matthieu Longo
This patch provides a new handler MD_ARCH_FRAME_STATE_T to hide an
architecture-specific structure containing CIE and FDE data related
to DWARF architecture extensions.

Hiding the architecture-specific attributes behind a handler has the
following benefits:
1. isolating those data from the generic ones in _Unwind_FrameState
2. avoiding casts to custom types.
3. preserving typing information when debugging with GDB, and so
   facilitating their printing.

This approach required to add a new header md-unwind-def.h included at
the top of libgcc/unwind-dw2.h, and redirecting to the corresponding
architecture header via a symbolic link.

An obvious drawback is the increase in complexity with macros, and
headers. It also caused a split of architecture definitions between
md-unwind-def.h (types definitions used in unwind-dw2.h) and
md-unwind.h (local types definitions and handlers implementations).
The naming of md-unwind.h with .h extension is a bit misleading as
the file is only included in the middle of unwind-dw2.c. Changing
this naming would require modification of others backends, which I
prefered to abstain from. Overall the benefits are worth the added
complexity from my perspective.

libgcc/ChangeLog:

* Makefile.in: New target for symbolic link to md-unwind-def.h
* config.host: New parameter md_unwind_def_header. Set it to
aarch64/aarch64-unwind-def.h for AArch64 targets, or no-unwind.h
by default.
* config/aarch64/aarch64-unwind.h
(aarch64_pointer_auth_key): Move to aarch64-unwind-def.h
(aarch64_cie_aug_handler): Update.
(aarch64_arch_extension_frame_init): Update.
(aarch64_demangle_return_addr): Update.
* configure.ac: New substitute variable md_unwind_def_header.
* unwind-dw2.h (defined): MD_ARCH_FRAME_STATE_T.
* config/aarch64/aarch64-unwind-def.h: New file.
---
 libgcc/Makefile.in |  6 +++-
 libgcc/config.host | 13 +--
 libgcc/config/aarch64/aarch64-unwind-def.h | 41 ++
 libgcc/config/aarch64/aarch64-unwind.h | 14 +++-
 libgcc/configure.ac|  1 +
 libgcc/unwind-dw2.h|  6 ++--
 6 files changed, 67 insertions(+), 14 deletions(-)
 create mode 100644 libgcc/config/aarch64/aarch64-unwind-def.h

diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 0e46e9ef768..ffc45f21267 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -47,6 +47,7 @@ with_aix_soname = @with_aix_soname@
 solaris_ld_v2_maps = @solaris_ld_v2_maps@
 enable_execute_stack = @enable_execute_stack@
 unwind_header = @unwind_header@
+md_unwind_def_header = @md_unwind_def_header@
 md_unwind_header = @md_unwind_header@
 sfp_machine_header = @sfp_machine_header@
 thread_header = @thread_header@
@@ -358,13 +359,16 @@ SHLIBUNWIND_INSTALL =
 
 
 # Create links to files specified in config.host.
-LIBGCC_LINKS = enable-execute-stack.c unwind.h md-unwind-support.h \
+LIBGCC_LINKS = enable-execute-stack.c \
+   unwind.h md-unwind-def.h md-unwind-support.h \
sfp-machine.h gthr-default.h
 
 enable-execute-stack.c: $(srcdir)/$(enable_execute_stack)
-$(LN_S) $< $@
 unwind.h: $(srcdir)/$(unwind_header)
-$(LN_S) $< $@
+md-unwind-def.h: $(srcdir)/config/$(md_unwind_def_header)
+   -$(LN_S) $< $@
 md-unwind-support.h: $(srcdir)/config/$(md_unwind_header)
-$(LN_S) $< $@
 sfp-machine.h: $(srcdir)/config/$(sfp_machine_header)
diff --git a/libgcc/config.host b/libgcc/config.host
index 9fae51d4ce7..61825e72fe4 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -51,8 +51,10 @@
 #  If either is set, EXTRA_PARTS and
 #  EXTRA_MULTILIB_PARTS inherited from the GCC
 #  subdirectory will be ignored.
-#  md_unwind_headerThe name of a header file defining
-#  MD_FALLBACK_FRAME_STATE_FOR.
+#  md_unwind_def_header The name of a header file defining 
architecture-specific
+#  frame information types for unwinding.
+#  md_unwind_headerThe name of a header file defining architecture-specific
+#  handlers used in the unwinder.
 #  sfp_machine_header  The name of a sfp-machine.h header file for soft-fp.
 #  Defaults to "$cpu_type/sfp-machine.h" if it exists,
 #  no-sfp-machine.h otherwise.
@@ -72,6 +74,7 @@ extra_parts=
 tmake_file=
 tm_file=
 tm_define=
+md_unwind_def_header=no-unwind.h
 md_unwind_header=no-unwind.h
 unwind_header=unwind-generic.h
 
@@ -403,6 +406,7 @@ aarch64*-*-elf | aarch64*-*-rtems*)
tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
tmake_file="${tmake_file} t-dfprules"
+   md_unwind_def_header=aarch64/aarch64-unwind-def.h
md_unwind_header=aarch64/aarch64-unwind.h
;;
 aarch64*-*-freebsd*)
@@ -411,6 +415,7 @@ aarch64*-*-freebs

Re: [PATCH] c++: missing SFINAE during alias CTAD [PR115296]

2024-07-19 Thread Patrick Palka
On Fri, Jul 5, 2024 at 1:50 PM Patrick Palka  wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
> OK for trunk/14?
>
> -- >8 --
>
> During the alias CTAD transformation, if substitution failed for some
> guide we should just discard the guide silently.  We currently do
> discard the guide, but not silently, which causes us to reject the
> below testcase due to forming a too-large array type when transforming
> the user-defined deduction guides.
>
> This patch fixes this by passing complain=tf_none instead of
> =tf_warning_or_error in the couple of spots where we expect subsitution
> to possibly fail and so subsequently check for error_mark_node.

Ping.  Alternatively we could set complain=tf_none everywhere.

>
> PR c++/115296
>
> gcc/cp/ChangeLog:
>
> * pt.cc (alias_ctad_tweaks): Pass complain=tf_none to
> tsubst_decl and tsubst_constraint_info.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp2a/class-deduction-alias23.C: New test.
> ---
>  gcc/cp/pt.cc  |  4 ++--
>  .../g++.dg/cpp2a/class-deduction-alias23.C| 20 +++
>  2 files changed, 22 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C
>
> diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> index d1316483e24..a382dce8788 100644
> --- a/gcc/cp/pt.cc
> +++ b/gcc/cp/pt.cc
> @@ -30451,7 +30451,7 @@ alias_ctad_tweaks (tree tmpl, tree uguides)
>   /* Parms are to have DECL_CHAIN tsubsted, which would be skipped
>  if cp_unevaluated_operand.  */
>   cp_evaluated ev;
> - g = tsubst_decl (DECL_TEMPLATE_RESULT (f), targs, complain,
> + g = tsubst_decl (DECL_TEMPLATE_RESULT (f), targs, tf_none,
>/*use_spec_table=*/false);
> }
>   if (g == error_mark_node)
> @@ -30478,7 +30478,7 @@ alias_ctad_tweaks (tree tmpl, tree uguides)
> {
>   if (tree outer_targs = outer_template_args (f))
> ci = tsubst_constraint_info (ci, outer_targs, complain, 
> in_decl);
> - ci = tsubst_constraint_info (ci, targs, complain, in_decl);
> + ci = tsubst_constraint_info (ci, targs, tf_none, in_decl);
> }
>   if (ci == error_mark_node)
> continue;
> diff --git a/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C 
> b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C
> new file mode 100644
> index 000..e5766586761
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C
> @@ -0,0 +1,20 @@
> +// PR c++/115296
> +// { dg-do compile { target c++20 } }
> +
> +using size_t = decltype(sizeof(0));
> +
> +template
> +struct span { span(T); };
> +
> +template
> +span(T(&)[N]) -> span; // { dg-bogus "array exceeds maximum" }
> +
> +template
> +requires (sizeof(T(&)[N]) != 42) // { dg-bogus "array exceeds maximum" }
> +span(T*) -> span;
> +
> +template
> +using array_view = span;
> +
> +span x = 0;
> +array_view y = 0;
> --
> 2.45.2.746.g06e570c0df
>



[PATCH v1 3/3] libgcc: update configure (regenerated by autoreconf)

2024-07-19 Thread Matthieu Longo
libgcc/ChangeLog:

* configure: Regenerate.
---
 libgcc/configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgcc/configure b/libgcc/configure
index a69d314374a..15a0be23644 100755
--- a/libgcc/configure
+++ b/libgcc/configure
@@ -587,6 +587,7 @@ ac_includes_default='/* none */'
 ac_subst_vars='LTLIBOBJS
 LIBOBJS
 md_unwind_header
+md_unwind_def_header
 unwind_header
 enable_execute_stack
 asm_hidden_op
@@ -5786,6 +5787,7 @@ fi
 
 
 
+
 # We need multilib support.
 ac_config_files="$ac_config_files Makefile"
 
-- 
2.45.2



Re: [patch,avr] Support new built-in for faster mask computation

2024-07-19 Thread Jeff Law




On 7/18/24 3:12 PM, Georg-Johann Lay wrote:

This new builtin provides a faster way to compute
expressions like  1 << x  or ~(1 << x) that are sometimes
used as masks for setting bits in the I/O region, and when
x is not known at compile-time.

The open coded C expression takes 5 + 4 * x cycles to compute an
8-bit result, whereas the builtin takes only 7 cycles independent
of x.

The implementation is straight forward and uses 3 new insns.

Ok for trunk?

Johann

--

AVR: Support new built-in function __builtin_avr_mask1.

gcc/
 * config/avr/builtins.def (MASK1): New DEF_BUILTIN.
 * config/avr/avr.cc (avr_rtx_costs_1): Handle rtx costs for
 expressions like __builtin_avr_mask1.
 (avr_init_builtins) : New tree type.
 (avr_expand_builtin) [AVR_BUILTIN_MASK1]: Diagnose unexpected forms.
 (avr_fold_builtin) [AVR_BUILTIN_MASK1]: Handle case.
 * config/avr/avr.md (gen_mask1): New expand helper.
 (mask1_0x01_split, mask1_0x80_split, mask1_0xfe_split): New
 insn-and-split.
 (*mask1_0x01, *mask1_0x80, *mask1_0xfe): New insns.
 * doc/extend.texi (AVR Built-in Functions) <__builtin_avr_mask1>:
 Document new built-in function.
gcc/testsuite/
 * gcc.target/avr/torture/builtin-mask1.c: New test.

OK from a technical standpoint.  Just a few nits.




builtin-mask1.diff

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index b9064424ffe..d8417c4ba3e 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -13135,6 +13135,16 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
switch (mode)
{
case E_QImode:
+ if (speed
+ && XEXP (x, 0) == const1_rtx

CONST1_RTX (mode) is probably more appropriate.



+ && AND == GET_CODE (XEXP (x, 1)))

We generally write conditions with the constant as the 2nd operand.  So
GET_CODE (XEXP (x, 1) == AND




@@ -13308,6 +13318,15 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,
  break;
  
  	case E_HImode:

+ if (CONST_INT_P (XEXP (x, 0))
+ && INTVAL (XEXP (x, 0)) == 128
+ && AND == GET_CODE (XEXP (x, 1)))

Similarly for this condition.




+switch (0xff & INTVAL (operands[1]))

Similarly here.



+{
+  case 0x01: emit (gen_mask1_0x01_split (operands[0], operands[2])); break;
+  case 0x80: emit (gen_mask1_0x80_split (operands[0], operands[2])); break;
+  case 0xfe: emit (gen_mask1_0xfe_split (operands[0], operands[2])); break;
Don't put code on the same line as the case.  Bring it down to the next 
line and indent 2 positions further.


Those are all NFC, so approved with those changes.

I'm not sure the builtin is strictly needed since this could likely be 
handled in the shift expanders and/or combiner patterns.  But I've got 
no objection to adding the builtin.


jeff



Re: [PATCH] c++: xobj fn call without obj [PR115783]

2024-07-19 Thread Patrick Palka
On Sat, Jul 6, 2024 at 8:22 PM Patrick Palka  wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this
> look OK for trunk/14?

Ping.

>
> -- >8 --
>
> The code path for rejecting an object-less call to a non-static
> member function should also consider xobj member functions so
> that we properly reject the below calls with a "cannot call
> member function without object" diagnostic.
>
> PR c++/115783
>
> gcc/cp/ChangeLog:
>
> * call.cc (build_new_method_call): Generalize METHOD_TYPE
> check to DECL_OBJECT_MEMBER_FUNCTION_P.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/cpp23/explicit-obj-diagnostics11.C: New test.
> ---
>  gcc/cp/call.cc|  2 +-
>  .../g++.dg/cpp23/explicit-obj-diagnostics11.C | 48 +++
>  2 files changed, 49 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C
>
> diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
> index 83070b2f633..41ef7562f27 100644
> --- a/gcc/cp/call.cc
> +++ b/gcc/cp/call.cc
> @@ -11822,7 +11822,7 @@ build_new_method_call (tree instance, tree fns, 
> vec **args,
>  fn);
> }
>
> - if (TREE_CODE (TREE_TYPE (fn)) == METHOD_TYPE
> + if (DECL_OBJECT_MEMBER_FUNCTION_P (fn)
>   && !DECL_CONSTRUCTOR_P (fn)
>   && is_dummy_object (instance))
> {
> diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C 
> b/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C
> new file mode 100644
> index 000..cc2571f62a2
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C
> @@ -0,0 +1,48 @@
> +// PR c++/115783
> +// { dg-do compile { target c++23 } }
> +
> +struct A {
> +  int f(this auto);
> +
> +  static void s() {
> +f(); // { dg-error "without object" }
> +  }
> +};
> +
> +int n = A::f(); // { dg-error "without object" }
> +
> +struct B {
> +  void ns() {
> +A::f(); // { dg-error "without object" }
> +  }
> +
> +  static void s() {
> +A::f(); // { dg-error "without object" }
> +  }
> +};
> +
> +template
> +struct C {
> +  void ns() {
> +A::f(); // { dg-error "without object" }
> +T::f(); // { dg-error "without object" }
> +  }
> +
> +  static void s() {
> +A::f(); // { dg-error "without object" }
> +T::f(); // { dg-error "without object" }
> +  };
> +};
> +
> +template struct C;
> +
> +template
> +struct D : T {
> +  void ns() {
> +A::f(); // { dg-error "without object" }
> +T::f(); // { dg-error "not a member of 'B'" }
> +  }
> +};
> +
> +template struct D; // { dg-message "required from here" }
> +template struct D; // { dg-bogus "required from here" }
> --
> 2.45.2.746.g06e570c0df
>



Re: [PATCH v2 1/2] arm: [MVE intrinsics] fix vdup iterator

2024-07-19 Thread Richard Earnshaw (lists)
On 11/07/2024 10:36, Christophe Lyon wrote:
> This patch fixes a bug where the mode iterator for mve_vdup
> should be MVE_VLD_ST instead of MVE_vecs: V2DI and V2DF (thus vdup.64)
> are not supported by MVE.
> 
> 2024-07-02  Jolen Li  
>   Christophe Lyon  
> 
>   gcc/
>   * config/arm/mve.md (mve_vdup): Fix mode iterator.

OK.

R.

> ---
>  gcc/config/arm/mve.md | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 4b4d6298ffb..afe5fba698c 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -95,8 +95,8 @@ (define_insn "mve_mov"
> (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*")])
>  
>  (define_insn "mve_vdup"
> -  [(set (match_operand:MVE_vecs 0 "s_register_operand" "=w")
> - (vec_duplicate:MVE_vecs
> +  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
> + (vec_duplicate:MVE_VLD_ST
> (match_operand: 1 "s_register_operand" "r")))]
>"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
>"vdup.\t%q0, %1"



[Patch, v3] gcn/mkoffload.cc: Use #embed for including the generated ELF file

2024-07-19 Thread Tobias Burnus

Hi,

Jakub Jelinek wrote:

+  "#if defined(__STDC_EMBED_FOUND__) && __has_embed (\"%s\") == 
__STDC_EMBED_FOUND__\n"

If this was an attempt to deal gracefully with no #embed support, then
the above would be wrong and should have been
#if defined(__STDC_EMBED_FOUND__) && defined(__has_embed)
#if __has_embed ("whatever") == __STDC_EMBED_FOUND__


I was kind of both – assuming that #embed is available (as it should be 
compiled by the accompanied compiler) but handle the case that it is not.


However, as '#embed' is well diagnosed if unsupported, that part is not 
really needed.



Now, if all you want is an error if the file doesn't exist, then
#embed "whatever"
will do that too […]

If you want an error not just when it doesn't exist, but also when it
is empty, then you could do
#embed "whatever" if_empty (%%%)


The idea was to also error out if the file is empty – as that shouldn't 
happen here: if offloading code was found, the code gen should be done. 
However, using an invalid expression seems to be a good idea as that's 
really a special case that shouldn't happen.


* * *

I have additionally replaced the #include by __UINTPTR_TYPE__ and 
__SIZE_TYPE__ to avoid including 3 header files; this doesn't have a 
large effect, but still.


Updated patch attached.

OK for mainline, once Jakub's #embed is committed?

* * *

BTW: Testing shows for a hello world program (w/o #embed patch)

For -foffload=...: 'disable' 0.04s, 'nvptx-none' 0.15s, 'amdgcn-amdhsa' 
1.2s.


With a simple #embed (this patch plus Jakub's first patch), the 
performance is unchanged. I then applied Jakub's follow up patches, but 
I then get an ICE (Jakub will have a look).


But compiling it with 'g++' (→ COLLECT_GCC is g++) works; result: takes 
0.2s (~6× faster) and compiling for both nvptx and gcn takes 0.3s, 
nearly 5× faster.


Tobias
 gcn/mkoffload.cc: Use #embed for including the generated ELF file

gcc/ChangeLog:

	* config/gcn/mkoffload.cc (read_file): Remove.
	(process_asm): Do not add '#include' to generated C file.
	(process_obj): Generate C file that uses #embed and use
	__SIZE_TYPE__ and __UINTPTR_TYPE__ instead the #include-defined
	size_t and uintptr.
	(main): Update call to it; remove no longer needed file I/O.

 gcc/config/gcn/mkoffload.cc | 79 +++--
 1 file changed, 12 insertions(+), 67 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 810298a799b..c3c998639ff 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -182,44 +182,6 @@ xputenv (const char *string)
   putenv (CONST_CAST (char *, string));
 }
 
-/* Read the whole input file.  It will be NUL terminated (but
-   remember, there could be a NUL in the file itself.  */
-
-static const char *
-read_file (FILE *stream, size_t *plen)
-{
-  size_t alloc = 16384;
-  size_t base = 0;
-  char *buffer;
-
-  if (!fseek (stream, 0, SEEK_END))
-{
-  /* Get the file size.  */
-  long s = ftell (stream);
-  if (s >= 0)
-	alloc = s + 100;
-  fseek (stream, 0, SEEK_SET);
-}
-  buffer = XNEWVEC (char, alloc);
-
-  for (;;)
-{
-  size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
-
-  if (!n)
-	break;
-  base += n;
-  if (base + 1 == alloc)
-	{
-	  alloc *= 2;
-	  buffer = XRESIZEVEC (char, buffer, alloc);
-	}
-}
-  buffer[base] = 0;
-  *plen = base;
-  return buffer;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'.  */
 
@@ -657,10 +619,6 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
   struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
   struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *);
 
-  fprintf (cfile, "#include \n");
-  fprintf (cfile, "#include \n");
-  fprintf (cfile, "#include \n\n");
-
   fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
   fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count);
 
@@ -725,35 +683,28 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
 /* Embed an object file into a C source file.  */
 
 static void
-process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
+process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires)
 {
-  size_t len = 0;
-  const char *input = read_file (in, &len);
-
   /* Dump out an array containing the binary.
- FIXME: do this with objcopy.  */
-  fprintf (cfile, "static unsigned char gcn_code[] = {");
-  for (size_t i = 0; i < len; i += 17)
-{
-  fprintf (cfile, "\n\t");
-  for (size_t j = i; j < i + 17 && j < len; j++)
-	fprintf (cfile, "%3u,", (unsigned char) input[j]);
-}
-  fprintf (cfile, "\n};\n\n");
+ If the file is empty, a parse error is shown as the argument to is_empty
+ is an undeclared identifier.  */
+  fprintf (cfile,
+	   "static unsigned char gcn_code[] = {\n"
+	   "#embed \"%s\" if_empty (error_file_is_empty)\n"
+	   "};\n\n", 

[pushed] c++: add fixed testcase [PR109464]

2024-07-19 Thread Patrick Palka
Seems to be fixed by r15-521-g6ad7ca1bb90573.

PR c++/109464

gcc/testsuite/ChangeLog:

* g++.dg/template/explicit-instantiation8.C: New test.
---
 .../g++.dg/template/explicit-instantiation8.C | 24 +++
 1 file changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/template/explicit-instantiation8.C

diff --git a/gcc/testsuite/g++.dg/template/explicit-instantiation8.C 
b/gcc/testsuite/g++.dg/template/explicit-instantiation8.C
new file mode 100644
index 000..92152a2992e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/explicit-instantiation8.C
@@ -0,0 +1,24 @@
+// PR c++/109464
+// { dg-do compile { target c++11 } }
+
+template
+struct shallow
+ {
+   int len;
+   constexpr shallow() : len(0) { }
+  };
+
+template
+struct use_shallow
+  {
+   static constexpr shallow s_zstr = { };
+   static_assert(s_zstr.len == 0, "");
+  };
+
+extern template struct shallow;
+extern template struct use_shallow;
+
+template struct shallow;
+template struct use_shallow;
+
+// { dg-final { scan-assembler "_ZN7shallowIcEC2Ev" } }
-- 
2.46.0.rc0.106.g1c4a234a1c



Re: [PATCH] testsuite: Allow vst1 instruction

2024-07-19 Thread Torbjorn SVENSSON




On 2024-07-19 16:36, Richard Earnshaw (lists) wrote:

On 19/07/2024 14:07, Torbjörn SVENSSON wrote:

Ok for trunk and releases/gcc-14?

--

On Cortex-A7 with -mfloat-abi=hard and -mfpu=neon, the following
instructions are generated for the test case:

 vldrd16, .L3
 vst1.32 {d16}, [r0]

For Cortex-A7 with -mfloat-abi=soft, it's instead:

 movsr2, #1
 movsr3, #0
 strdr2, r3, [r0]

Both these are valid and should not cause the test to fail.
For Cortex-M0/3/4/7/33, they all generate the same instructions as
Cortex-A7 with -mfloat-abi=soft.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr40457-2.c: Add vst1 to allowed instructions.

Signed-off-by: Torbjörn SVENSSON 
---
  gcc/testsuite/gcc.target/arm/pr40457-2.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr40457-2.c 
b/gcc/testsuite/gcc.target/arm/pr40457-2.c
index 31624d35127..6aed42a4fbc 100644
--- a/gcc/testsuite/gcc.target/arm/pr40457-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr40457-2.c
@@ -7,4 +7,4 @@ void foo(int* p)
p[1] = 0;
  }
  
-/* { dg-final { scan-assembler "strd|stm" } } */

+/* { dg-final { scan-assembler "strd|stm|vst1" } } */


Whilst the code generated is functionally correct, I'm not sure the compiler 
should be generating that.  This new code needs more code space (the constant 
isn't shown, but it needs 8 bytes in the literal pool).  I suspect this means 
the SLP cost model for arm is in need of attention.


Do you want me to create a ticket for the SLP cost model review or will 
you handle it?


It looks like arm-gnu-toolchain-11.3.rel1-x86_64-arm-none-eabi does 
produce the old output, while 
arm-gnu-toolchain-12.2.rel1-x86_64-arm-none-eabi changed it.


Kind regards,
Torbjörn



Re: [PATCH v2 2/2] arm: [MVE intrinsics] Improve vdupq_n implementation

2024-07-19 Thread Richard Earnshaw (lists)
On 11/07/2024 10:36, Christophe Lyon wrote:
> This patch makes the non-predicated vdupq_n MVE intrinsics use
> vec_duplicate rather than an unspec.  This enables the compiler to
> generate better code sequences (for instance using vmov when
> possible).
> 
> The patch renames the existing mve_vdup pattern into
> @mve_vdupq_n, and removes the now useless
> @mve_q_n_f and @mve_q_n_ ones.
> 
> As a side-effect, it needs to update the mve_unpredicated_insn
> predicates in @mve_q_m_n_ and
> @mve_q_m_n_f.
> 
> Using vec_duplicates means the compiler is now able to use vmov in the
> tests with an immediate argument in vdupq_n_[su]{8,16,32}.c:
>   vmov.i8 q0,#0x1
> 
> However, this is only possible when the immediate has a suitable value
> (MVE encoding constraints, see imm_for_neon_mov_operand predicate).
> 
> Provided we adjust the cost computations in arm_rtx_costs_internal(),
> when the immediate does not meet the vmov constraints, we now generate:
>   mov r0, #imm
>   vdup.xx q0,r0
> 
> or
>   ldr r0, .L4
>   vdup.32 q0,r0
> in the f32 case (with 1.1 as immediate).
> 
> Without the cost adjustment, we would generate:
>   vldr.64 d0, .L4
>   vldr.64 d1, .L4+8
> and an associated literal pool entry.
> 
> Regarding the testsuite updates:
> 
> * The signed versions of vdupq_* tests lack a version with an
> immediate argument.  This patch adds them, similar to what we already
> have for vdupq_n_u*.c tests.
> 
> * Code generation for different immediate values is checked with the
> new tests this patch introduces.  Note there's no need for s8/u8 tests
> because 8-bit immediates always comply wth imm_for_neon_mov_operand.
> 
> * We can remove xfail from vcmp*f tests since we now generate:
>   movw r3, #15462
>   vcmp.f16 eq, q0, r3
> instead of the previous:
>   vldr.64 d6, .L5
>   vldr.64 d7, .L5+8
>   vcmp.f16 eq, q0, q3
> 
> Changes v1->v2:
> * Dropped change to cost computation for Neon, and associated
>   testcases updates (crypto-vsha1*)
> * Updated expected regexp in vdupq_n_[su]16-2.c to account for
>   different assembly comments (none for arm-none-eabi, '@ movhi' for
>   arm-linux-gnueabihf)
> 
> Tested on arm-linux-gnueabihf and arm-none-eabi with no regression.
> 
> 2024-07-02  Jolen Li  
>   Christophe Lyon  
> 
>   gcc/
>   * config/arm/arm-mve-builtins-base.cc (vdupq_impl): New class.
>   (vdupq): Use new implementation.
>   * config/arm/arm.cc (arm_rtx_costs_internal): Handle HFmode
>   for COST_DOUBLE. Update consting for CONST_VECTOR.
>   * config/arm/arm_mve_builtins.def: Merge vdupq_n_f, vdupq_n_s
>   and vdupq_n_u into vdupq_n.
>   * config/arm/mve.md (mve_vdup): Rename into ...
>   (@mve_vdup_n): ... this.
>   (@mve_q_n_f): Delete.
>   (@mve_q_n_): Delete..
>   (@mve_q_m_n_): Update mve_unpredicated_insn
>   attribute.
>   (@mve_q_m_n_f): Likewise.
> 
>   gcc/testsuite/
>   * gcc.target/arm/mve/intrinsics/vdupq_n_u8.c (foo1): Update
>   expected code.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_u16.c (foo1): Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_u32.c (foo1): Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_s8.c: Add test with
>   immediate argument.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_s16.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_s32.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_f16.c (foo1): Update
>   expected code.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_f32.c (foo1): Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_m_n_s16.c: Add test with
>   immediate argument.
>   * gcc.target/arm/mve/intrinsics/vdupq_m_n_s32.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_m_n_s8.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_x_n_s16.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_x_n_s32.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_x_n_s8.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_f32-2.c: New test.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_s16-2.c: New test.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_s32-2.c: New test.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_u16-2.c: New test.
>   * gcc.target/arm/mve/intrinsics/vdupq_n_u32-2.c: New test.
>   * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16.c: Remove xfail.
>   * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vcmpleq_n_f16.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vcmpleq_n_f32.c: Likewise.
>   * gcc.target/arm/mve/intrinsics/vcmpltq_n_f16.c: Likewise.
>   * gcc.targe

Re: [PATCH] testsuite: Allow vst1 instruction

2024-07-19 Thread Richard Earnshaw (lists)
On 19/07/2024 16:10, Torbjorn SVENSSON wrote:
> 
> 
> On 2024-07-19 16:36, Richard Earnshaw (lists) wrote:
>> On 19/07/2024 14:07, Torbjörn SVENSSON wrote:
>>> Ok for trunk and releases/gcc-14?
>>>
>>> -- 
>>>
>>> On Cortex-A7 with -mfloat-abi=hard and -mfpu=neon, the following
>>> instructions are generated for the test case:
>>>
>>>  vldr    d16, .L3
>>>  vst1.32 {d16}, [r0]
>>>
>>> For Cortex-A7 with -mfloat-abi=soft, it's instead:
>>>
>>>  movs    r2, #1
>>>  movs    r3, #0
>>>  strd    r2, r3, [r0]
>>>
>>> Both these are valid and should not cause the test to fail.
>>> For Cortex-M0/3/4/7/33, they all generate the same instructions as
>>> Cortex-A7 with -mfloat-abi=soft.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc.target/arm/pr40457-2.c: Add vst1 to allowed instructions.
>>>
>>> Signed-off-by: Torbjörn SVENSSON 
>>> ---
>>>   gcc/testsuite/gcc.target/arm/pr40457-2.c | 2 +-
>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/gcc/testsuite/gcc.target/arm/pr40457-2.c 
>>> b/gcc/testsuite/gcc.target/arm/pr40457-2.c
>>> index 31624d35127..6aed42a4fbc 100644
>>> --- a/gcc/testsuite/gcc.target/arm/pr40457-2.c
>>> +++ b/gcc/testsuite/gcc.target/arm/pr40457-2.c
>>> @@ -7,4 +7,4 @@ void foo(int* p)
>>>     p[1] = 0;
>>>   }
>>>   -/* { dg-final { scan-assembler "strd|stm" } } */
>>> +/* { dg-final { scan-assembler "strd|stm|vst1" } } */
>>
>> Whilst the code generated is functionally correct, I'm not sure the compiler 
>> should be generating that.  This new code needs more code space (the 
>> constant isn't shown, but it needs 8 bytes in the literal pool).  I suspect 
>> this means the SLP cost model for arm is in need of attention.
> 
> Do you want me to create a ticket for the SLP cost model review or will you 
> handle it?
> 
> It looks like arm-gnu-toolchain-11.3.rel1-x86_64-arm-none-eabi does produce 
> the old output, while arm-gnu-toolchain-12.2.rel1-x86_64-arm-none-eabi 
> changed it.
> 
> Kind regards,
> Torbjörn
> 


A ticket would be good please.  I don't think I'm going to have time to look at 
this soon, so it will likely get forgotten without one.

It would be interesting to know which change led to this regression as well.

R.


Re: [PATCH] c-family: Introduce the -Winvalid-noreturn flag from clang with extra tuneability

2024-07-19 Thread Jason Merrill

On 7/19/24 2:29 AM, Julian Waters wrote:

Attempting to resend with a different client, plain text enabled...


Still corrupted by word wrap and quoted-printable encoding, 
unfortunately.  Probably the easiest solution is to send the patch as an 
attachment rather than pasted into the message.


I’ve managed to address your review comments, and rewrite the patch to 
include both -Winvalid-noreturn and -Winvalid-noreturn=


Hmm, I keep suggesting that you follow the pattern of 
-fstrong-eval-order, and this patch moves away from that pattern without 
any discussion.  Could you say something about why you decided not to?


but have trouble 
figuring out how to format invoke.texi and where to add the 
documentation for the new warning options.


For documentation, I don't see any proposed change, so I'm not sure 
where to start on formatting advice.  I would expect the documentation 
to go in alphabetical order under @node Warning Options.


I’ve also made this a Common 
option, since it’s come to my attention that the warning is not specific 
to c-family, but is also used by other languages too (See tree-cfg.cc). 


Sounds good.


gcc/ChangeLog:

         * common.opt: Introduce -Winvalid-noreturn and -Winvalid-noreturn=
         * opts.cc (common_handle_option): Handle new option
         * flag-types.h: New flags for -Winvalid-noreturn=
         * tree-cfg.cc (pass_warn_function_return::execute): Use it


Conventionally, change descriptions should end with a period.


gcc/config/mingw/ChangeLog:

         * mingw32.h (EXTRA_OS_CPP_BUILTINS): Fix semicolons


This change should not be part of this patch.  It also seems unnecessary 
to me, but you can propose it separately if you want.



@@ -667,6 +672,14 @@ Winvalid-memory-model
  Common Var(warn_invalid_memory_model) Init(1) Warning
  Warn when an atomic memory model parameter is known to be outside the 
valid range.

+Winvalid-noreturn
+Common Warning
+Warn when code marked noreturn returns implicitly and/or explicitly.
+
+Winvalid-noreturn=
+Common Joined Warning


This should have RejectNegative.


diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 1e497f0bb91..591acd2a7d2 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -375,6 +375,13 @@ enum incremental_link {
    INCREMENTAL_LINK_LTO
  };
+/* Kind of noreturn to ignore when reporting invalid noreturns  */


Missing period.


+enum class invalid_noreturn_kind {
+  NONE,
+  IMPLICIT,
+  EXPLICIT
+};


The inverse sense of this enum is confusing, since it isn't reflected in 
the name.  When I see "invalid_noreturn_kind" I think it expresses which 
category to warn about.  Can we reverse it?



  /* Different trace modes.  */
  enum sanitize_coverage_code {
    /* Trace PC.  */
diff --git a/gcc/opts.cc b/gcc/opts.cc
index be90a632338..19d31c262d7 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -2860,6 +2860,25 @@ common_handle_option (struct gcc_options *opts,
        dc->m_fatal_errors = value;
        break;
+    case OPT_Winvalid_noreturn_:
+      if (lang_mask == CL_DRIVER)
+break;
+
+      if (value)
+warning (0, "%<-Winvalid-noreturn=%> is enabled by default, ignoring");


We don't in general warn about redundant flags in general.  They might 
be intended to override e.g. a previous -Wno-invalid-noreturn.



+      else if (strcmp (arg, "explicit"))
+{
+  opts->x_flag_invalid_noreturn = invalid_noreturn_kind::EXPLICIT;


If I write -Winvalid-noreturn=explicit, I would expect to get warnings 
about explicit returns, but as discussed above this seems to mean 
ignoring explicit returns.



+}
+      else if (strcmp (arg, "implicit"))
+{
+  opts->x_flag_invalid_noreturn = invalid_noreturn_kind::IMPLICIT;
+}
+      else
+error_at (loc, "Unknown option for %<-Winvalid-noreturn=%>");


Seems like we also need options for warning about all or none.

I see no handling for -W{no-,}invalid-noreturn; the if (value) above 
won't handle it because those options don't hit this case; you would 
need a separate case OPT_Winvalid_noreturn: or use the Alias property in 
the common.opt entry.


You also need testcases for all the different option forms and their 
effects.


Jason



Re: [PATCH] c++: xobj fn call without obj [PR115783]

2024-07-19 Thread Jason Merrill

On 7/6/24 8:22 PM, Patrick Palka wrote:

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this
look OK for trunk/14?


OK.


-- >8 --

The code path for rejecting an object-less call to a non-static
member function should also consider xobj member functions so
that we properly reject the below calls with a "cannot call
member function without object" diagnostic.

PR c++/115783

gcc/cp/ChangeLog:

* call.cc (build_new_method_call): Generalize METHOD_TYPE
check to DECL_OBJECT_MEMBER_FUNCTION_P.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/explicit-obj-diagnostics11.C: New test.
---
  gcc/cp/call.cc|  2 +-
  .../g++.dg/cpp23/explicit-obj-diagnostics11.C | 48 +++
  2 files changed, 49 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 83070b2f633..41ef7562f27 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -11822,7 +11822,7 @@ build_new_method_call (tree instance, tree fns, vec **args,
 fn);
}
  
-	  if (TREE_CODE (TREE_TYPE (fn)) == METHOD_TYPE

+ if (DECL_OBJECT_MEMBER_FUNCTION_P (fn)
  && !DECL_CONSTRUCTOR_P (fn)
  && is_dummy_object (instance))
{
diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C 
b/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C
new file mode 100644
index 000..cc2571f62a2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-diagnostics11.C
@@ -0,0 +1,48 @@
+// PR c++/115783
+// { dg-do compile { target c++23 } }
+
+struct A {
+  int f(this auto);
+
+  static void s() {
+f(); // { dg-error "without object" }
+  }
+};
+
+int n = A::f(); // { dg-error "without object" }
+
+struct B {
+  void ns() {
+A::f(); // { dg-error "without object" }
+  }
+
+  static void s() {
+A::f(); // { dg-error "without object" }
+  }
+};
+
+template
+struct C {
+  void ns() {
+A::f(); // { dg-error "without object" }
+T::f(); // { dg-error "without object" }
+  }
+
+  static void s() {
+A::f(); // { dg-error "without object" }
+T::f(); // { dg-error "without object" }
+  };
+};
+
+template struct C;
+
+template
+struct D : T {
+  void ns() {
+A::f(); // { dg-error "without object" }
+T::f(); // { dg-error "not a member of 'B'" }
+  }
+};
+
+template struct D; // { dg-message "required from here" }
+template struct D; // { dg-bogus "required from here" }




Re: [PATCH] c++: missing SFINAE during alias CTAD [PR115296]

2024-07-19 Thread Jason Merrill

On 7/19/24 10:55 AM, Patrick Palka wrote:

On Fri, Jul 5, 2024 at 1:50 PM Patrick Palka  wrote:


Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
OK for trunk/14?

-- >8 --

During the alias CTAD transformation, if substitution failed for some
guide we should just discard the guide silently.  We currently do
discard the guide, but not silently, which causes us to reject the
below testcase due to forming a too-large array type when transforming
the user-defined deduction guides.

This patch fixes this by passing complain=tf_none instead of
=tf_warning_or_error in the couple of spots where we expect subsitution
to possibly fail and so subsequently check for error_mark_node.


Ping.  Alternatively we could set complain=tf_none everywhere.


That sounds better, unless you think there's a reason to have different 
complain args for different calls.


Jason



Re: [Ping, Patch, Fortran, PR88624, v1] Fix Rejects allocatable coarray passed as a dummy argument

2024-07-19 Thread Paul Richard Thomas
Hi Andre,

The patch looks fine to me. Please add the original testcase as
pr88624.f90, since it can be a compile only. The addition to
coarray/dummy_1.f90 is fine as well but I think that it is good to address
the reporter's problem directly.

Thanks

Paul


On Wed, 17 Jul 2024 at 14:10, Andre Vehreschild  wrote:

> Hi all,
>
> and another ping...
>
> Regtests ok on x86_64-pc-linux-gnu / Fedora 39. Ok for mainline?
>
> - Andre
>
> On Thu, 11 Jul 2024 15:42:54 +0200
> Andre Vehreschild  wrote:
>
> > Hi all,
> >
> > attached patch fixes using of coarrays as dummy arguments. The coarray
> > dummy argument was not dereferenced correctly, which is fixed now.
> >
> > Regtests ok on x86_64-pc-linux-gnu / Fedora 39. Ok for mainline.
> >
> > Regards,
> >   Andre
> > --
> > Andre Vehreschild * Email: vehre ad gcc dot gnu dot org
>
>
> --
> Andre Vehreschild * Email: vehre ad gmx dot de
>


Re: [PATCH] c++: missing SFINAE during alias CTAD [PR115296]

2024-07-19 Thread Patrick Palka
On Fri, 19 Jul 2024, Jason Merrill wrote:

> On 7/19/24 10:55 AM, Patrick Palka wrote:
> > On Fri, Jul 5, 2024 at 1:50 PM Patrick Palka  wrote:
> > > 
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
> > > OK for trunk/14?
> > > 
> > > -- >8 --
> > > 
> > > During the alias CTAD transformation, if substitution failed for some
> > > guide we should just discard the guide silently.  We currently do
> > > discard the guide, but not silently, which causes us to reject the
> > > below testcase due to forming a too-large array type when transforming
> > > the user-defined deduction guides.
> > > 
> > > This patch fixes this by passing complain=tf_none instead of
> > > =tf_warning_or_error in the couple of spots where we expect subsitution
> > > to possibly fail and so subsequently check for error_mark_node.
> > 
> > Ping.  Alternatively we could set complain=tf_none everywhere.
> 
> That sounds better, unless you think there's a reason to have different
> complain args for different calls.

I was initially worried about a stray error_mark_node silently leaking
into the rewritten guide signature (since we don't check for
error_mark_node after each substitution) but on second thought that
seems unlikely.  The substitution steps in alias_ctad_tweaks  that
aren't checked should probably never fail, since they're just reindexing
template parameters etc.

So like so?

-- >8 --

Subject: [PATCH] c++: missing SFINAE during alias CTAD [PR115296]

PR c++/115296

gcc/cp/ChangeLog:

* pt.cc (alias_ctad_tweaks): Use complain=tf_none
instead of tf_warning_or_error.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/class-deduction-alias23.C: New test.
---
 gcc/cp/pt.cc  |  2 +-
 .../g++.dg/cpp2a/class-deduction-alias23.C| 19 +++
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 45453c0d45a..8e9951a9066 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -30298,7 +30298,7 @@ alias_ctad_tweaks (tree tmpl, tree uguides)
  (INNERMOST_TEMPLATE_PARMS (fullatparms)));
 }
 
-  tsubst_flags_t complain = tf_warning_or_error;
+  tsubst_flags_t complain = tf_none;
   tree aguides = NULL_TREE;
   tree atparms = INNERMOST_TEMPLATE_PARMS (fullatparms);
   unsigned natparms = TREE_VEC_LENGTH (atparms);
diff --git a/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C 
b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C
new file mode 100644
index 000..117212c67de
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C
@@ -0,0 +1,19 @@
+// PR c++/115296
+// { dg-do compile { target c++20 } }
+
+using size_t = decltype(sizeof(0));
+
+template
+struct span { span(T); };
+
+template
+span(T(&)[N]) -> span; // { dg-bogus "array exceeds maximum" }
+
+template
+requires (sizeof(T[N]) != 42) // { dg-bogus "array exceeds maximum" }
+span(T*) -> span;
+
+template
+using array_view = span;
+
+array_view x = 0;
-- 
2.46.0.rc0.106.g1c4a234a1c


Ping: [Patch, fortran] PR115070 (and PR115348) - [13/14/15 Regression] ICE using IEEE_ARITHMETIC in a derived type method with class, intent(out)

2024-07-19 Thread Paul Richard Thomas
Hi All,

Ping!

I understand now why this works. The scope of the block is merged and so
all the previous declarations that would otherwise disappear are added,
even by the empty statement.

Regards

Paul


On Mon, 15 Jul 2024 at 17:10, Paul Richard Thomas <
paul.richard.tho...@gmail.com> wrote:

> Hi All,
>
> I am not sure that I understand why this bug occurs. The regression was
> introduced by my patch that had gfc_trans_class_init_assign return
> NULL_TREE, when all the components of the default initializer are NULL.
> Note that this only afflicts scalar dummy arguments.
>
> With pr115070:
> void my_sub (struct __class_my_mod_My_type_t & restrict obs)
>   c_char fpstate.5[33];// This disappears, when NULL is returned.
>   try
> {
>   _gfortran_ieee_procedure_entry ((void *) &fpstate.5);
>
> With pr115348:
> void myroutine (struct __class_mymodule_Mytype_t & restrict self)
> {
>   static logical(kind=4) is_recursive.0 = 0;  // This disappears when NULL
> is returned
>   try
> {
>   if (is_recursive.0)
>
> The fix is equally magical in that finishing build_empty_stmt seems to
> provide the backend with everything that it needs to retain these
> declarations. See the attached patch. If somebody can explain what causes
> the problem and why the patch fixes it, I would be very pleased. As far as
> I can tell, the tail end of trans_code should have been sufficient to
> handle the return of NULL_TREE.
>
> Anyway, work it does and regtests OK. OK for mainline and backporting?
>
> Regards
>
> Paul
>
>


Re: Ping: [Patch, fortran] PR115070 (and PR115348) - [13/14/15 Regression] ICE using IEEE_ARITHMETIC in a derived type method with class, intent(out)

2024-07-19 Thread Steve Kargl
Thanks for the patch and chasing down the magic.
Path is ok to commit.

-- 
steve


On Fri, Jul 19, 2024 at 05:32:26PM +0100, Paul Richard Thomas wrote:
> Hi All,
> 
> Ping!
> 
> I understand now why this works. The scope of the block is merged and so
> all the previous declarations that would otherwise disappear are added,
> even by the empty statement.
> 
> Regards
> 
> Paul
> 
> 
> On Mon, 15 Jul 2024 at 17:10, Paul Richard Thomas <
> paul.richard.tho...@gmail.com> wrote:
> 
> > Hi All,
> >
> > I am not sure that I understand why this bug occurs. The regression was
> > introduced by my patch that had gfc_trans_class_init_assign return
> > NULL_TREE, when all the components of the default initializer are NULL.
> > Note that this only afflicts scalar dummy arguments.
> >
> > With pr115070:
> > void my_sub (struct __class_my_mod_My_type_t & restrict obs)
> >   c_char fpstate.5[33];// This disappears, when NULL is returned.
> >   try
> > {
> >   _gfortran_ieee_procedure_entry ((void *) &fpstate.5);
> >
> > With pr115348:
> > void myroutine (struct __class_mymodule_Mytype_t & restrict self)
> > {
> >   static logical(kind=4) is_recursive.0 = 0;  // This disappears when NULL
> > is returned
> >   try
> > {
> >   if (is_recursive.0)
> >
> > The fix is equally magical in that finishing build_empty_stmt seems to
> > provide the backend with everything that it needs to retain these
> > declarations. See the attached patch. If somebody can explain what causes
> > the problem and why the patch fixes it, I would be very pleased. As far as
> > I can tell, the tail end of trans_code should have been sufficient to
> > handle the return of NULL_TREE.
> >
> > Anyway, work it does and regtests OK. OK for mainline and backporting?
> >
> > Regards
> >
> > Paul
> >
> >

-- 
Steve


[PATCH v2] MATCH: Add simplification for MAX and MIN to match.pd [PR109878]

2024-07-19 Thread Eikansh Gupta
Min and max could be optimized if both operands are defined by
(same) variable restricted by an and(&). For signed types,
optimization can be done when both constant have same sign bit.
The patch also adds optimization for specific case of min/max(a, a&CST).

This patch adds match pattern for:

max (a & CST0, a & CST1) -> a & CST0 IFF CST0 & CST1 == CST1
min (a & CST0, a & CST1) -> a & CST0 IFF CST0 & CST1 == CST0
min (a, a & CST) --> a & CST
max (a, a & CST) --> a

PR tree-optimization/109878

gcc/ChangeLog:

* match.pd min/max (a & CST0, a & CST1): New pattern.
min/max (a, a & CST): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr109878-1.c: New test.
* gcc.dg/tree-ssa/pr109878-2.c: New test.
* gcc.dg/tree-ssa/pr109878-3.c: New test.
* gcc.dg/tree-ssa/pr109878.c: New test.

Signed-off-by: Eikansh Gupta 
---
 gcc/match.pd   | 26 +
 gcc/testsuite/gcc.dg/tree-ssa/pr109878-1.c | 64 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr109878-2.c | 31 +++
 gcc/testsuite/gcc.dg/tree-ssa/pr109878-3.c | 42 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr109878.c   | 64 ++
 5 files changed, 227 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109878-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109878-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109878-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109878.c

diff --git a/gcc/match.pd b/gcc/match.pd
index cf359b0ec0f..dbaff6ab3da 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4321,6 +4321,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 @0
 @2)))
 
+/* min (a & CST0, a & CST1) -> a & CST0 IFF CST0 & CST1 == CST0 */
+/* max (a & CST0, a & CST1) -> a & CST0 IFF CST0 & CST1 == CST1 */
+/* If signed a, then both the constants should have same sign. */
+(for minmax (min max)
+ (simplify
+  (minmax (bit_and@3 @0 INTEGER_CST@1) (bit_and@4 @0 INTEGER_CST@2))
+   (if (TYPE_UNSIGNED (type)
+|| (tree_int_cst_sgn (@1) == tree_int_cst_sgn (@2)))
+(with { auto andvalue = wi::to_wide (@1) & wi::to_wide (@2); }
+ (if (andvalue == ((minmax == MIN_EXPR)
+   ? wi::to_wide (@1) : wi::to_wide (@2)))
+  @3
+  (if (andvalue == ((minmax != MIN_EXPR)
+? wi::to_wide (@1) : wi::to_wide (@2)))
+   @4))
+
+/* min (a, a & CST) --> a & CST */
+/* max (a, a & CST) --> a */
+(for minmax (min max)
+ (simplify
+  (minmax @0 (bit_and@1 @0 INTEGER_CST@2))
+   (if (TYPE_UNSIGNED(type))
+(if (minmax == MIN_EXPR)
+ @1
+ @0
+
 /* Simplify min (&var[off0], &var[off1]) etc. depending on whether
the addresses are known to be less, equal or greater.  */
 (for minmax (min max)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109878-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr109878-1.c
new file mode 100644
index 000..509e59adea1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109878-1.c
@@ -0,0 +1,64 @@
+/* PR tree-optimization/109878 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+
+/* All the constant pair  used here satisfy the condition:
+   (cst0 & cst1 == cst0) || (cst0 & cst1 == cst1).
+   If the above condition is true, then MIN_EXPR is not needed. */
+int min_and(int a, int b) {
+  b = a & 3;
+  a = a & 1;
+  if (b < a)
+return b;
+  else
+return a;
+}
+
+int min_and1(int a, int b) {
+  b = a & 3;
+  a = a & 15;
+  if (b < a)
+return b;
+  else
+return a;
+}
+
+int min_and2(int a, int b) {
+  b = a & -7;
+  a = a & -3;
+  if (b < a)
+return b;
+  else
+return a;
+}
+
+int min_and3(int a, int b) {
+  b = a & -5;
+  a = a & -13;
+  if (b < a)
+return b;
+  else
+return a;
+}
+
+/* When constants are of opposite signs, the simplification will only
+   work for unsigned types. */
+unsigned int min_and4(unsigned int a, unsigned int b) {
+  b = a & 3;
+  a = a & -5;
+  if (b < a)
+return b;
+  else
+return a;
+}
+
+unsigned int min_and5(unsigned int a, unsigned int b) {
+  b = a & -3;
+  a = a & 5;
+  if (b < a)
+return b;
+  else
+return a;
+}
+
+/* { dg-final { scan-tree-dump-not " MIN_EXPR " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109878-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr109878-2.c
new file mode 100644
index 000..1503dcde1cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109878-2.c
@@ -0,0 +1,31 @@
+/* PR tree-optimization/109878 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+
+/* The testcases here should not get optimized with the patch.
+   For constant pair , the condition:
+   (cst0 & cst1 == cst0) || (cst0 & cst1 == cst1)
+   is false for the constants used here. */
+int max_and(int a, int b) {
+
+  b = a & 3;
+  a = a & 5;
+  if (b > a)
+return b;
+  else
+return a;
+}
+
+/* The constants in this function satisfy the condition but a is signed.
+   For signed 

Re: [REPOST 1/3] Add support for -mcpu=power11

2024-07-19 Thread Michael Meissner
On Thu, Jul 18, 2024 at 08:08:44AM -0500, Segher Boessenkool wrote:
> Hi!
> 
> [ I reviewed this together with Ke Wen.  All blame should go to me, all
>   praise to him. ]
> 
> On Wed, Jul 10, 2024 at 01:34:26PM -0400, Michael Meissner wrote:
> > [This is a repost of the June 4th patch]
> 
> It is not a repost.  It is v2.  It has important changes.

The first 2 patches for the compiler proper are exactly the same as the June
4th post.  Yes, the June 4th post had changes from the previous post.

The only change was to the testsuite.

> > * config.gcc (rs6000*-*-*, powerpc*-*-*): Add support for power11.
> 
> Please do not add new rs6000-* stuff.  There haven't been systems like
> that for twenty years or so now.  It is mostly harmless to add this
> stuff, but it just makes more work to delete later.  And more
> importantly, this was not tested; this *cannot* be tested.

The code I added was just extra code in the powerpc section.  I did not change
the case statement.  That code had the rs6000* line, so I was just quoting it
in the ChangeLog.

> 
> > * config/rs6000/rs6000-cpus.def (ISA_POWER11_MASKS_SERVER): New define.
> > (POWERPC_MASKS): Add power11 isa bit.
> 
> There is no Power11 ISA bit.  There cannnot be a Power11 ISA bit.  There
> is no new ISA for Power11.
> 
> "Add power11 bit"?  It is a hack to easily distinguish between power10
> and power11, nothing more.  It abuses existing mechanisms a bit.  Not
> the nicest thing to do, but it works :-)
> 
> But please do not call it what it is not.  And mark it up as a hack in
> some nice place.

The issue is right now the mechanism to distinguish between the cpus is the
bits in rs6000_isa_flags.  Sure other mechanisms can be done, and perhaps we
should create them, but if you want the correct defines and .machines to be
done for things like #pragma GCC tar and the target attribute, the simplest way
is to create a new bit in rs6000_isa_flags, and just mark the option as giving
an error if the user explicity uses it.

> > * config/rs6000/rs6000.opt (-mpower11): Add internal power11 ISA flag.
> 
> s//ISA //
> 
> It is not nice to have a user-selectable option for this at all :-(
> 
> > --- a/gcc/config.gcc
> > +++ b/gcc/config.gcc
> > @@ -533,7 +533,9 @@ powerpc*-*-*)
> > extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
> > si2vmx.h"
> > extra_headers="${extra_headers} amo.h"
> > case x$with_cpu in
> > -   
> > xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower10|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
> > +   xpowerpc64 | xdefault64 | x6[23]0 | x970 | xG5 | xpower[3456789] \
> > +   | xpower1[01] | xpower6x | xrs64a | xcell | xa2 | xe500mc64 \
> > +   | xe5500 | xe6500)
> > cpu_is_64bit=yes
> > ;;
> > esac
> 
> Please do not change form and function at the same time.  It is much
> easier to see what is going on, to verify the change is correct, if you
> do it as two patches (either the cleanup first, which is nicer, or as a
> later step, which is often easier).

I can just make the line even longer if you prefer.

> > --- a/gcc/config/rs6000/ppc-auxv.h
> > +++ b/gcc/config/rs6000/ppc-auxv.h
> > @@ -47,9 +47,8 @@
> >  #define PPC_PLATFORM_PPC47612
> >  #define PPC_PLATFORM_POWER813
> >  #define PPC_PLATFORM_POWER914
> > -
> > -/* This is not yet official.  */
> >  #define PPC_PLATFORM_POWER10   15
> > +#define PPC_PLATFORM_POWER11   16
> 
> Please add a comment where the official thing is?
> 
> It is in glibc dl-procinfo.h, and there *cannot* be more than 16
> platforms currently, so how can this work?  Or do we get data
> directly from the kernel, or what?
>
> But you tested it, so you do obviously get PPC_PLATFORM_POWER11 from
> somewhere.  I cannot see from where though?

In other discussions, I was told that 16 with be the platform number for the
kernel in the future.

> > --- a/gcc/config/rs6000/rs6000-opts.h
> > +++ b/gcc/config/rs6000/rs6000-opts.h
> > @@ -67,7 +67,8 @@ enum processor_type
> > PROCESSOR_MPCCORE,
> > PROCESSOR_CELL,
> > PROCESSOR_PPCA2,
> > -   PROCESSOR_TITAN
> > +   PROCESSOR_TITAN,
> > +   PROCESSOR_POWER11
> 
> Please insert this after the p10 entry.  There is a (very vague)
> ordering here, we do not put the newest at the end.

Ok.

> > -/* Instruction costs on POWER10 processors.  */
> > +/* Instruction costs on POWER10/POWER11 processors.  */
> 
> The official names are Power10 and Power11.  POWER9 is still in SHOUTING
> CAPS, but later CPUs have the more relaxed spelling as official names
> (just like the Power ISA has had for longer already).

Ok, but I was just using the current code's format.

> > -  /* Do Power10 dependent reordering.  */
> > -  if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
> > +  /* Do Power10/power11 dependent reordering.  */
> 
> Power11.

Ok.

> > +  /* Do Power10/power11 dependent reordering.  */
> 
> Again.
>

[PATCH] recog: Disallow subregs in mode-punned value [PR115881]

2024-07-19 Thread Richard Sandiford
In g:9d20529d94b23275885f380d155fe8671ab5353a, I'd extended
insn_propagation to handle simple cases of hard-reg mode punning.
The punned "to" value was created using simplify_subreg rather
than simplify_gen_subreg, on the basis that hard-coded subregs
aren't generally useful after RA (where hard-reg propagation is
expected to happen).

This PR is about a case where the subreg gets pushed into the
operands of a plus, but the subreg on one of the operands
cannot be simplified.  Specifically, we have to generate
(subreg:SI (reg:DI sp) 0) rather than (reg:SI sp), since all
references to the stack pointer must be via stack_pointer_rtx.

However, code in x86 (reasonably) expects no subregs of registers
to appear after RA, except for special cases like strict_low_part.
This leads to an awkward situation where we can't ban subregs of sp
(because of the strict_low_part use), can't allow direct references
to sp in other modes (because of the stack_pointer_rtx requirement),
and can't allow rvalue uses of the subreg (because of the "no subregs
after RA" assumption).  It all seems a bit of a mess...

I sat on this for a while in the hope that a clean solution might
become apparent, but in the end, I think we'll just have to check
manually for nested subregs and punt on them.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR rtl-optimization/115881
* recog.cc: Include rtl-iter.h.
(insn_propagation::apply_to_rvalue_1): Check that the result
of simplify_subreg does not include nested subregs.

gcc/tetsuite/
PR rtl-optimization/115881
* cc.c-torture/compile/pr115881.c: New test.
---
 gcc/recog.cc  | 21 +++
 .../gcc.c-torture/compile/pr115881.c  | 16 ++
 2 files changed, 37 insertions(+)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr115881.c

diff --git a/gcc/recog.cc b/gcc/recog.cc
index 54b317126c2..23e4820180f 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "reload.h"
 #include "tree-pass.h"
 #include "function-abi.h"
+#include "rtl-iter.h"
 
 #ifndef STACK_POP_CODE
 #if STACK_GROWS_DOWNWARD
@@ -1082,6 +1083,7 @@ insn_propagation::apply_to_rvalue_1 (rtx *loc)
  || !REG_CAN_CHANGE_MODE_P (REGNO (x), GET_MODE (from),
 GET_MODE (x)))
return false;
+
  /* If the reference is paradoxical and the replacement
 value contains registers, we would need to check that the
 simplification below does not increase REG_NREGS for those
@@ -1090,11 +1092,30 @@ insn_propagation::apply_to_rvalue_1 (rtx *loc)
  if (paradoxical_subreg_p (GET_MODE (x), GET_MODE (from))
  && !CONSTANT_P (to))
return false;
+
  newval = simplify_subreg (GET_MODE (x), to, GET_MODE (from),
subreg_lowpart_offset (GET_MODE (x),
   GET_MODE (from)));
  if (!newval)
return false;
+
+ /* Check that the simplification didn't just push an explicit
+subreg down into subexpressions.  In particular, for a register
+R that has a fixed mode, such as the stack pointer, a subreg of:
+
+  (plus:M (reg:M R) (const_int C))
+
+would be:
+
+  (plus:N (subreg:N (reg:M R) ...) (const_int C'))
+
+But targets can legitimately assume that subregs of hard registers
+will not be created after RA (except in special circumstances,
+such as strict_low_part).  */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, newval, NONCONST)
+   if (GET_CODE (*iter) == SUBREG)
+ return false;
}
 
   if (should_unshare)
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr115881.c 
b/gcc/testsuite/gcc.c-torture/compile/pr115881.c
new file mode 100644
index 000..8379704c4c8
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr115881.c
@@ -0,0 +1,16 @@
+typedef unsigned u32;
+int list_is_head();
+void tu102_acr_wpr_build_acr_0_0_0(int, long, u32);
+void tu102_acr_wpr_build() {
+  u32 offset = 0;
+  for (; list_is_head();) {
+int hdr;
+u32 _addr = offset, _size = sizeof(hdr), *_data = &hdr;
+while (_size--) {
+  tu102_acr_wpr_build_acr_0_0_0(0, _addr, *_data++);
+  _addr += 4;
+}
+offset += sizeof(hdr);
+  }
+  tu102_acr_wpr_build_acr_0_0_0(0, offset, 0);
+}
-- 
2.25.1



[PATCH] Treat boolean vector elements as 0/-1 [PR115406]

2024-07-19 Thread Richard Sandiford
Previously we built vector boolean constants using 1 for true
elements and 0 for false elements.  This matches the predicates
produced by SVE's PTRUE instruction, but leads to a miscompilation
on AVX512, where all bits of a boolean element should be set.

One option for RTL would be to make this target-configurable.
But that isn't really possible at the tree level, where vectors
should work in a more target-independent way.  (There is currently
no way to create a "generic" packed boolean vector, but never say
never :))  And, if we were going to pick a generic behaviour,
it would make sense to use 0/-1 rather than 0/1, for consistency
with integer vectors.

Both behaviours should work with SVE on read, since SVE ignores
the upper bits in each predicate element.  And the choice shouldn't
make much difference for RTL, since all SVE predicate modes are
expressed as vectors of BI, rather than of multi-bit booleans.

I suspect there might be some fallout from this change on SVE.
But I think we should at least give it a go, and see whether any
fallout provides a strong counterargument against the approach.

Tested on aarch64-linux-gnu (configured with --with-cpu=neoverse-v1
for extra coverage) and x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR middle-end/115406
* fold-const.cc (native_encode_vector_part): For vector booleans,
check whether an element is nonzero and, if so, set all of the
correspending bits in the target image.
* simplify-rtx.cc (native_encode_rtx): Likewise.

gcc/testsuite/
PR middle-end/115406
* gcc.dg/torture/pr115406.c: New test.
---
 gcc/fold-const.cc   |  5 +++--
 gcc/simplify-rtx.cc |  3 ++-
 gcc/testsuite/gcc.dg/torture/pr115406.c | 18 ++
 3 files changed, 23 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr115406.c

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 710d697c021..a509b46b904 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -8097,16 +8097,17 @@ native_encode_vector_part (const_tree expr, unsigned 
char *ptr, int len,
   unsigned int elts_per_byte = BITS_PER_UNIT / elt_bits;
   unsigned int first_elt = off * elts_per_byte;
   unsigned int extract_elts = extract_bytes * elts_per_byte;
+  unsigned int elt_mask = (1 << elt_bits) - 1;
   for (unsigned int i = 0; i < extract_elts; ++i)
{
  tree elt = VECTOR_CST_ELT (expr, first_elt + i);
  if (TREE_CODE (elt) != INTEGER_CST)
return 0;
 
- if (ptr && wi::extract_uhwi (wi::to_wide (elt), 0, 1))
+ if (ptr && integer_nonzerop (elt))
{
  unsigned int bit = i * elt_bits;
- ptr[bit / BITS_PER_UNIT] |= 1 << (bit % BITS_PER_UNIT);
+ ptr[bit / BITS_PER_UNIT] |= elt_mask << (bit % BITS_PER_UNIT);
}
}
   return extract_bytes;
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 35ba54c6292..a49eefb34d4 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7232,7 +7232,8 @@ native_encode_rtx (machine_mode mode, rtx x, 
vec &bytes,
  target_unit value = 0;
  for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
{
- value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & mask) << j;
+ if (INTVAL (CONST_VECTOR_ELT (x, elt)))
+   value |= mask << j;
  elt += 1;
}
  bytes.quick_push (value);
diff --git a/gcc/testsuite/gcc.dg/torture/pr115406.c 
b/gcc/testsuite/gcc.dg/torture/pr115406.c
new file mode 100644
index 000..800ef2f8317
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115406.c
@@ -0,0 +1,18 @@
+// { dg-do run }
+// { dg-additional-options "-mavx512f" { target avx512f_runtime } }
+
+typedef __attribute__((__vector_size__ (1))) signed char V;
+
+signed char
+foo (V v)
+{
+  return ((V) v == v)[0];
+}
+
+int
+main ()
+{
+  signed char x = foo ((V) { });
+  if (x != -1)
+__builtin_abort ();
+}
-- 
2.25.1



Re: [PATCH] Treat boolean vector elements as 0/-1 [PR115406]

2024-07-19 Thread Richard Biener



> Am 19.07.2024 um 19:44 schrieb Richard Sandiford :
> 
> Previously we built vector boolean constants using 1 for true
> elements and 0 for false elements.  This matches the predicates
> produced by SVE's PTRUE instruction, but leads to a miscompilation
> on AVX512, where all bits of a boolean element should be set.

Note it’s for AVX with the issue that QImode masks are handled differently than 
larger mode masks.  AVX512 has only single-bit element masks where this does 
not make a difference.

> One option for RTL would be to make this target-configurable.
> But that isn't really possible at the tree level, where vectors
> should work in a more target-independent way.  (There is currently
> no way to create a "generic" packed boolean vector, but never say
> never :))  And, if we were going to pick a generic behaviour,
> it would make sense to use 0/-1 rather than 0/1, for consistency
> with integer vectors.
> 
> Both behaviours should work with SVE on read, since SVE ignores
> the upper bits in each predicate element.  And the choice shouldn't
> make much difference for RTL, since all SVE predicate modes are
> expressed as vectors of BI, rather than of multi-bit booleans.
> 
> I suspect there might be some fallout from this change on SVE.
> But I think we should at least give it a go, and see whether any
> fallout provides a strong counterargument against the approach.
> 
> Tested on aarch64-linux-gnu (configured with --with-cpu=neoverse-v1
> for extra coverage) and x86_64-linux-gnu.  OK to install?

OK,

Richard 

> Richard
> 
> 
> gcc/
>PR middle-end/115406
>* fold-const.cc (native_encode_vector_part): For vector booleans,
>check whether an element is nonzero and, if so, set all of the
>correspending bits in the target image.
>* simplify-rtx.cc (native_encode_rtx): Likewise.
> 
> gcc/testsuite/
>PR middle-end/115406
>* gcc.dg/torture/pr115406.c: New test.
> ---
> gcc/fold-const.cc   |  5 +++--
> gcc/simplify-rtx.cc |  3 ++-
> gcc/testsuite/gcc.dg/torture/pr115406.c | 18 ++
> 3 files changed, 23 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/torture/pr115406.c
> 
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 710d697c021..a509b46b904 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -8097,16 +8097,17 @@ native_encode_vector_part (const_tree expr, unsigned 
> char *ptr, int len,
>   unsigned int elts_per_byte = BITS_PER_UNIT / elt_bits;
>   unsigned int first_elt = off * elts_per_byte;
>   unsigned int extract_elts = extract_bytes * elts_per_byte;
> +  unsigned int elt_mask = (1 << elt_bits) - 1;
>   for (unsigned int i = 0; i < extract_elts; ++i)
>{
>  tree elt = VECTOR_CST_ELT (expr, first_elt + i);
>  if (TREE_CODE (elt) != INTEGER_CST)
>return 0;
> 
> -  if (ptr && wi::extract_uhwi (wi::to_wide (elt), 0, 1))
> +  if (ptr && integer_nonzerop (elt))
>{
>  unsigned int bit = i * elt_bits;
> -  ptr[bit / BITS_PER_UNIT] |= 1 << (bit % BITS_PER_UNIT);
> +  ptr[bit / BITS_PER_UNIT] |= elt_mask << (bit % BITS_PER_UNIT);
>}
>}
>   return extract_bytes;
> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
> index 35ba54c6292..a49eefb34d4 100644
> --- a/gcc/simplify-rtx.cc
> +++ b/gcc/simplify-rtx.cc
> @@ -7232,7 +7232,8 @@ native_encode_rtx (machine_mode mode, rtx x, 
> vec &bytes,
>  target_unit value = 0;
>  for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
>{
> -  value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & mask) << j;
> +  if (INTVAL (CONST_VECTOR_ELT (x, elt)))
> +value |= mask << j;
>  elt += 1;
>}
>  bytes.quick_push (value);
> diff --git a/gcc/testsuite/gcc.dg/torture/pr115406.c 
> b/gcc/testsuite/gcc.dg/torture/pr115406.c
> new file mode 100644
> index 000..800ef2f8317
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr115406.c
> @@ -0,0 +1,18 @@
> +// { dg-do run }
> +// { dg-additional-options "-mavx512f" { target avx512f_runtime } }
> +
> +typedef __attribute__((__vector_size__ (1))) signed char V;
> +
> +signed char
> +foo (V v)
> +{
> +  return ((V) v == v)[0];
> +}
> +
> +int
> +main ()
> +{
> +  signed char x = foo ((V) { });
> +  if (x != -1)
> +__builtin_abort ();
> +}
> --
> 2.25.1
> 


Re: [REPOST 1/3] Add support for -mcpu=power11

2024-07-19 Thread Peter Bergner
On 7/19/24 12:34 PM, Michael Meissner wrote:
> On Thu, Jul 18, 2024 at 08:08:44AM -0500, Segher Boessenkool wrote:
>>> --- a/gcc/config/rs6000/ppc-auxv.h
>>> +++ b/gcc/config/rs6000/ppc-auxv.h
>>> @@ -47,9 +47,8 @@
>>>  #define PPC_PLATFORM_PPC47612
>>>  #define PPC_PLATFORM_POWER813
>>>  #define PPC_PLATFORM_POWER914
>>> -
>>> -/* This is not yet official.  */
>>>  #define PPC_PLATFORM_POWER10   15
>>> +#define PPC_PLATFORM_POWER11   16
>>
>> Please add a comment where the official thing is?
>>
>> It is in glibc dl-procinfo.h, and there *cannot* be more than 16
>> platforms currently, so how can this work?  Or do we get data
>> directly from the kernel, or what?
>>
>> But you tested it, so you do obviously get PPC_PLATFORM_POWER11 from
>> somewhere.  I cannot see from where though?
> 
> In other discussions, I was told that 16 with be the platform number for the
> kernel in the future.

The AT_PLATFORM from the kernel is a string, not an integer.
To make __builtin_cpu_is ("power11") work efficiently, GLIBC stores
an integer representing each cpu AT_PLATFORM string in the TCB.
It is therefore GLIBC which "owns" the integer versions of the
platform values and yes, 16 is the correct value for Power11 and
that value exists in upstream GLIBC.

Peter




Re: [PATCH] RISC-V: Fix double mode under RV32 not utilize vf

2024-07-19 Thread Jeff Law




On 7/19/24 2:55 AM, demin.han wrote:

Currently, some binops of vector vs double scalar under RV32 can't
translated to vf but vfmv+vxx.vv.

The cause is that vec_duplicate is also expanded to broadcast for double mode
under RV32. last-combine can't process expanded broadcast.

gcc/ChangeLog:

* config/riscv/vector.md: Add !FLOAT_MODE_P constrain

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vadd-rv32gcv-nofm.c: Fix test
* gcc.target/riscv/rvv/autovec/binop/vdiv-rv32gcv-nofm.c: Ditto
* gcc.target/riscv/rvv/autovec/binop/vmul-rv32gcv-nofm.c: Ditto
* gcc.target/riscv/rvv/autovec/binop/vsub-rv32gcv-nofm.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fadd-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-5.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fma_fnma-6.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmax-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmin-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-5.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fms_fnms-6.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-1.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-2.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-3.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-4.c: Ditto
* gcc.target/riscv/rvv/autovec/cond/cond_fmul-5.c: Ditto
It looks like vadd-rv32gcv-nofm still isn't quite right according to the 
pre-commit testing:


 > 
https://github.com/ewlu/gcc-precommit-ci/issues/1931#issuecomment-2238752679



OK once that's fixed.  No need to wait for another review cycle.

And a note.  We need to be careful as some uarchs may pay a penalty when 
the vector unit needs to get an operand from the GP or FP register 
files.  So there could well be cases where using .vf or .vx forms is 
slower.  Consider these two scenarios.


First, we broadcast from the GP/FP across a vector regsiter outside a 
loop, the use a .vv form in the loop.


Second we use a .vf or .vx form in the loop instead without any broadcast.

In the former case we only pay the penalty for crossing register files 
once.  In the second case we'd pay it for every iteration of the loop.


Given this is going to be uarch sensitive, I don't mind biasing towards 
the .vx/.vf forms right now, but we may need to add some costing models 
to this in the future as we can test on a wider variety of uarchs.


jeff



[PATCH] Avoid undefined behaviour in build_option_suggestions

2024-07-19 Thread Siddhesh Poyarekar
The inner loop in build_option_suggestions uses OPTION to take the
address of OPTB and use it across iterations, which is undefined
behaviour since OPTB is defined within the loop.  Pull it outside the
loop to make this defined.

It's a trivial fix, so posting early.  OK to commit once build and test
succeeds?

Thanks,
Sid

gcc/ChangeLog:

* opt-suggestions.cc
(option_proposer::build_option_suggestions): Pull OPTB
definition out of the innermost loop.
---
 gcc/opt-suggestions.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/opt-suggestions.cc b/gcc/opt-suggestions.cc
index 849e4f5c439..71e5cb561a6 100644
--- a/gcc/opt-suggestions.cc
+++ b/gcc/opt-suggestions.cc
@@ -167,9 +167,9 @@ option_proposer::build_option_suggestions (const char 
*prefix)
add_misspelling_candidates (m_option_suggestions, option,
opt_text);
 
+   struct cl_option optb;
for (int j = 0; sanitizer_opts[j].name != NULL; ++j)
  {
-   struct cl_option optb;
/* -fsanitize=all is not valid, only -fno-sanitize=all.
   So don't register the positive misspelling candidates
   for it.  */
-- 
2.45.1



Re: [PATCH] Avoid undefined behaviour in build_option_suggestions

2024-07-19 Thread Jakub Jelinek
On Fri, Jul 19, 2024 at 12:52:06PM -0400, Siddhesh Poyarekar wrote:
> The inner loop in build_option_suggestions uses OPTION to take the
> address of OPTB and use it across iterations, which is undefined
> behaviour since OPTB is defined within the loop.  Pull it outside the
> loop to make this defined.
> 
> It's a trivial fix, so posting early.  OK to commit once build and test
> succeeds?
> 
> Thanks,
> Sid
> 
> gcc/ChangeLog:
> 
>   * opt-suggestions.cc
>   (option_proposer::build_option_suggestions): Pull OPTB
>   definition out of the innermost loop.

Ok for trunk, 14, 13 & 12 branches.

> ---
>  gcc/opt-suggestions.cc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/opt-suggestions.cc b/gcc/opt-suggestions.cc
> index 849e4f5c439..71e5cb561a6 100644
> --- a/gcc/opt-suggestions.cc
> +++ b/gcc/opt-suggestions.cc
> @@ -167,9 +167,9 @@ option_proposer::build_option_suggestions (const char 
> *prefix)
>   add_misspelling_candidates (m_option_suggestions, option,
>   opt_text);
>  
> + struct cl_option optb;
>   for (int j = 0; sanitizer_opts[j].name != NULL; ++j)
> {
> - struct cl_option optb;
>   /* -fsanitize=all is not valid, only -fno-sanitize=all.
>  So don't register the positive misspelling candidates
>  for it.  */
> -- 
> 2.45.1

Jakub



[PATCH] rs6000, Add new overloaded vector shift builtin int128, varients

2024-07-19 Thread Carl Love

GCC developers:

The following patch adds the int128 varients to the existing overloaded 
built-ins vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo, vec_srdb, 
vec_srl, vec_sro.  These varients were requested by Steve Munroe.


The patch has been tested on a Power 10 system with no regressions.

Please let me know if the patch is acceptable for mainline.

   Carl


---
 rs6000, Add new overloaded vector shift builtin int128 varients

Add the signed __int128 and unsigned __int128 argument types for the
overloaded built-ins vec_sld, vec_sldb, vec_sldw, vec_sll, vec_slo,
vec_srdb, vec_srl, vec_sro.  For each of the new argument types add a
testcase and update the documentation for the built-in.

Add the missing internal names for the float and double types for
overloaded builtin vec_sld for the float and double types.

gcc/ChangeLog:
    * config/rs6000/altivec.md (vsdb_): Change
    define_insn iterator to VEC_IC.
    * config/rs6000/rs6000-builtins.def (__builtin_altivec_vsldoi_v1ti,
    __builtin_vsx_xxsldwi_v1ti, __builtin_altivec_vsldb_v1ti,
    __builtin_altivec_vsrdb_v1ti): New builtin definitions.
    * config/rs6000/rs6000-overload.def (vec_sld, vec_sldb, vec_sldw,
    vec_sll, vec_slo, vec_srdb, vec_srl, vec_sro): New overloaded
    definitions.
    (vec_sld): Add missing internal names.
    * doc/extend.texi (vec_sld, vec_sldb, vec_sldw,    vec_sll, vec_slo,
    vec_srdb, vec_srl, vec_sro): Add documentation for new overloaded
    built-ins.

gcc/testsuite/ChangeLog:
    * gcc.target/powerpc/vec-shift-double-runnable-int128.c: New test
    file.
---
 gcc/config/rs6000/altivec.md  |   6 +-
 gcc/config/rs6000/rs6000-builtins.def |  12 +
 gcc/config/rs6000/rs6000-overload.def |  44 ++-
 gcc/doc/extend.texi   |  42 +++
 .../vec-shift-double-runnable-int128.c    | 349 ++
 5 files changed, 448 insertions(+), 5 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/powerpc/vec-shift-double-runnable-int128.c


diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 5af9bf920a2..2a18ee44526 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -878,9 +878,9 @@ (define_int_attr SLDB_lr [(UNSPEC_SLDB "l")
 (define_int_iterator VSHIFT_DBL_LR [UNSPEC_SLDB UNSPEC_SRDB])

 (define_insn "vsdb_"
- [(set (match_operand:VI2 0 "register_operand" "=v")
-  (unspec:VI2 [(match_operand:VI2 1 "register_operand" "v")
-       (match_operand:VI2 2 "register_operand" "v")
+ [(set (match_operand:VEC_IC 0 "register_operand" "=v")
+  (unspec:VEC_IC [(match_operand:VEC_IC 1 "register_operand" "v")
+       (match_operand:VEC_IC 2 "register_operand" "v")
    (match_operand:QI 3 "const_0_to_12_operand" "n")]
   VSHIFT_DBL_LR))]
   "TARGET_POWER10"
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def

index 77eb0f7e406..fbb6e1ddf85 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -964,6 +964,9 @@
   const vss __builtin_altivec_vsldoi_8hi (vss, vss, const int<4>);
 VSLDOI_8HI altivec_vsldoi_v8hi {}

+  const vsq __builtin_altivec_vsldoi_v1ti (vsq, vsq, const int<4>);
+    VSLDOI_V1TI altivec_vsldoi_v1ti {}
+
   const vss __builtin_altivec_vslh (vss, vus);
 VSLH vashlv8hi3 {}

@@ -1831,6 +1834,9 @@
   const vsll __builtin_vsx_xxsldwi_2di (vsll, vsll, const int<2>);
 XXSLDWI_2DI vsx_xxsldwi_v2di {}

+  const vsq __builtin_vsx_xxsldwi_v1ti (vsq, vsq, const int<2>);
+    XXSLDWI_Q vsx_xxsldwi_v1ti {}
+
   const vf __builtin_vsx_xxsldwi_4sf (vf, vf, const int<2>);
 XXSLDWI_4SF vsx_xxsldwi_v4sf {}

@@ -3299,6 +3305,9 @@
   const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>);
 VSLDB_V8HI vsldb_v8hi {}

+  const vsq __builtin_altivec_vsldb_v1ti (vsq, vsq, const int<3>);
+    VSLDB_V1TI vsldb_v1ti {}
+
   const vsq __builtin_altivec_vslq (vsq, vuq);
 VSLQ vashlv1ti3 {}

@@ -3317,6 +3326,9 @@
   const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
 VSRDB_V8HI vsrdb_v8hi {}

+  const vsq __builtin_altivec_vsrdb_v1ti (vsq, vsq, const int<3>);
+    VSRDB_V1TI vsrdb_v1ti {}
+
   const vsq __builtin_altivec_vsrq (vsq, vuq);
 VSRQ vlshrv1ti3 {}

diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def

index c4ecafc6f7e..302e0232533 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -3396,9 +3396,13 @@
   vull __builtin_vec_sld (vull, vull, const int);
 VSLDOI_2DI  VSLDOI_VULL
   vf __builtin_vec_sld (vf, vf, const int);
-    VSLDOI_4SF
+    VSLDOI_4SF VSLDOI_VF
   vd __builtin_vec_sld (vd, vd, const int);
-    VSLDOI_2DF
+    VSLDOI_2DF VSLDOI_VD
+  vsq __builtin_vec_sld (vsq, vsq, const int);
+    VSLDOI_V1TI  VSLDOI_VSQ
+  vuq __builtin_vec_sld

[PATCH] libsanitizer: also undef _TIME_BITS in sanitizer_procmaps_solaris.cpp

2024-07-19 Thread Thomas Petazzoni
Upstream commit
https://github.com/llvm/llvm-project/commit/26800a2c7e7996dc773b4e990dd5cca41c45e1a9
of LLVM added a #undef _TIME_BITS in
libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp to
fix the build on 32-bit Linux platforms that have enabled 64-bit
time_t using _TIME_BITS=64.

Indeed, _TIME_BITS=64 can only be used when _FILE_OFFSET_BITS=64, but
sanitizer_platform_limits_posix.cpp undefines _FILE_OFFSET_BITS before
including any header file. To fix this, the upstream fix was to also
undef _TIME_BITS.

This commit simply does the same in sanitizer_procmaps_solaris.cpp,
which also gets compiled under Linux (despite what the file name
says). In practice on Linux hosts (where _TIME_BITS=64 matters),
sanitizer_procmaps_solaris.cpp will expand to nothing, as pretty much
the rest of the file is inside a #ifdef SANITIZER_SOLARIS...#endif. So
the #undef _FILE_OFFSET_BITS and #undef _TIME_BITS are only here
before including sanitizer_platform.h, which will set the
SANITIZER_LINUX/SANITIZER_SOLARIS define depending on the platform.

Fixes:

armeb-buildroot-linux-gnueabi/sysroot/usr/include/features-time64.h:26:5: 
error: #error "_TIME_BITS=64 is allowed only with _FILE_OFFSET_BITS=64"
   26 | #   error "_TIME_BITS=64 is allowed only with _FILE_OFFSET_BITS=64"

This change has also been proposed to upstream LLVM:

  https://github.com/llvm/llvm-project/pull/99699

Signed-off-by: Thomas Petazzoni 
---
 libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp 
b/libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp
index eeb49e2afe3..1b23fd4d512 100644
--- a/libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp
@@ -11,6 +11,7 @@
 
 // Before Solaris 11.4,  doesn't work in a largefile environment.
 #undef _FILE_OFFSET_BITS
+#undef _TIME_BITS
 #include "sanitizer_platform.h"
 #if SANITIZER_SOLARIS
 #  include 
-- 
2.45.2



Re: [PATCH v2] RISC-V: More support of vx and vf for autovec comparison

2024-07-19 Thread Robin Dapp
> -(match_operand:V_VLSF 3 "register_operand")]))]
> +(match_operand:V_VLSF 3 "nonmemory_operand")]))]

Even though the integer compares have nonmemory operand here their respective
insn patterns don't (but constrain properly).

I guess what's happening with register operand and a constant 0.0 is that we'd
broadcast (vec_duplicate) instead?  Can't we special-case the vector move
pattern to allow constant zero in two ways?  That would also allow to have a
uarch-specific choice regarding .vv vs .vf at the proper spot.

-- 
Regards
 Robin



Re: [PATCH v2] RISC-V: More support of vx and vf for autovec comparison

2024-07-19 Thread Robin Dapp
> I have a test. 
> The backend can't see -0.0 and It becomes 0.0 when translate to gimple.

I don't think it should except when specifying -ffast-math or similar.
But we don't have a shortcut to load a negative zero, just the positive
one.

-- 
Regards
 Robin



[COMMITTED] PR tree-optimzation/116003 - Check for SSA_NAME not in the IL yet.

2024-07-19 Thread Andrew MacLeod
When registering an equivalence, the oracle creates an initial 
equivalence for an SSA_NAME with itself in the def block.   This 
primarily prevents dominator queries from searching past the definition 
block.


If the definition stmt for the SSA_NAME has not been placed in the CFG 
yet, we should defer creating this record until it has.    The next time 
an equivalence is registered, if the SSA_NAME is in the IL then this 
record will be created.


Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew



From 01c095ab77f8f43bf77e4c0be6c4f4c0d15a4c29 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Fri, 19 Jul 2024 17:39:40 -0400
Subject: [PATCH] Check for SSA_NAME not in the IL yet.

Check for an SSA_NAME not in the CFG before trying to create an
equivalence record in the defintion block.

	PR tree-optimization/116003
	gcc/
	* value-relation.cc (equiv_oracle::register_initial_def): Check
	if SSA_NAME is in the IL before registering.

	gcc/testsuite/
	* gcc.dg/pr116003.c: New.
---
 gcc/testsuite/gcc.dg/pr116003.c | 21 +
 gcc/value-relation.cc   |  6 +-
 2 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr116003.c

diff --git a/gcc/testsuite/gcc.dg/pr116003.c b/gcc/testsuite/gcc.dg/pr116003.c
new file mode 100644
index 000..6021058a14e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116003.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fnon-call-exceptions -fprofile-arcs -finstrument-functions -fno-tree-copy-prop" } */
+
+_BitInt(5) b5;
+
+char c;
+int i;
+_BitInt(129) b129;
+
+void
+foo(_BitInt(128) b128)
+{
+l50:
+  b128 %= b128 - b129;
+l64:
+  b128 %= c;
+  if (__builtin_add_overflow(i, 0, &c))
+goto l50;
+  if (__builtin_sub_overflow(c, 0, &b5))
+goto l64;
+}
diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc
index 9293d9ed65b..45722fcd13a 100644
--- a/gcc/value-relation.cc
+++ b/gcc/value-relation.cc
@@ -607,7 +607,11 @@ equiv_oracle::register_initial_def (tree ssa)
   if (SSA_NAME_IS_DEFAULT_DEF (ssa))
 return;
   basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (ssa));
-  gcc_checking_assert (bb && !find_equiv_dom (ssa, bb));
+
+  // If defining stmt is not in the IL, simply return.
+  if (!bb)
+return;
+  gcc_checking_assert (!find_equiv_dom (ssa, bb));
 
   unsigned v = SSA_NAME_VERSION (ssa);
   bitmap_set_bit (m_equiv_set, v);
-- 
2.45.0



[OG14] Revert "[og10] vect: Add target hook to prefer gather/scatter instructions" (was: [PATCH] [og10] vect: Add target hook to prefer gather/scatter instructions)

2024-07-19 Thread Thomas Schwinge
Hi!

On 2021-01-13T15:48:42-0800, Julian Brown  wrote:
> For AMD GCN, the instructions available for loading/storing vectors are
> always scatter/gather operations (i.e. there are separate addresses for
> each vector lane), so the current heuristic to avoid gather/scatter
> operations with too many elements in get_group_load_store_type is
> counterproductive. Avoiding such operations in that function can
> subsequently lead to a missed vectorization opportunity whereby later
> analyses in the vectorizer try to use a very wide array type which is
> not available on this target, and thus it bails out.
>
> The attached patch adds a target hook to override the "single_element_p"
> heuristic in the function as a target hook, and activates it for GCN. This
> allows much better code to be generated for affected loops.
>
> Tested with offloading to AMD GCN. I will apply to the og10 branch
> shortly.

Testing current OG14 commit 735bbbfc6eaf58522c3ebb0946b66f33958ea134 for
'--target=amdgcn-amdhsa' (I've tested '-march=gfx908', '-march=gfx1100'),
this change has been identified to be causing ~100 instances of execution
test PASS -> FAIL, thus wrong-code generation.  It's possible that we've
had the same misbehavior also on OG13 and earlier, but just nobody ever
tested that.  And/or, that at some point in time, the original patch fell
out of sync, wasn't updated for relevant upstream vectorizer changes.
Until someone gets to analyze that (and upstream these changes here), we
shall revert this commit on OG14.  Pushed to devel/omp/gcc-14 branch
commit 8678fc697046fba1014f1db6321ee670538b0881
'Revert "[og10] vect: Add target hook to prefer gather/scatter instructions"',
see attached.


List of GCC 14.1 vs OG14 regressions (... avoided by this revert commit):

'-march=gfx1100' only:

PASS: g++.dg/vect/pr97255.cc  -std=c++14 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.dg/vect/pr97255.cc  -std=c++14 execution test
PASS: g++.dg/vect/pr97255.cc  -std=c++17 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.dg/vect/pr97255.cc  -std=c++17 execution test
PASS: g++.dg/vect/pr97255.cc  -std=c++20 (test for excess errors)
[-PASS:-]{+FAIL:+} g++.dg/vect/pr97255.cc  -std=c++20 execution test
UNSUPPORTED: g++.dg/vect/pr97255.cc  -std=c++98

GCN Kernel Aborted

@@ -101950,11 +101950,11 @@ PASS: gcc.dg/torture/pr52028.c   -O0  execution 
test
PASS: gcc.dg/torture/pr52028.c   -O1  (test for excess errors)
PASS: gcc.dg/torture/pr52028.c   -O1  execution test
PASS: gcc.dg/torture/pr52028.c   -O2  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr52028.c   -O2  execution test
PASS: gcc.dg/torture/pr52028.c   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr52028.c   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
PASS: gcc.dg/torture/pr52028.c   -O3 -g  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr52028.c   -O3 -g  execution test
PASS: gcc.dg/torture/pr52028.c   -Os  (test for excess errors)
PASS: gcc.dg/torture/pr52028.c   -Os  execution test

GCN Kernel Aborted

@@ -102160,11 +102160,11 @@ PASS: gcc.dg/torture/pr53366-1.c   -O0  
execution test
PASS: gcc.dg/torture/pr53366-1.c   -O1  (test for excess errors)
PASS: gcc.dg/torture/pr53366-1.c   -O1  execution test
PASS: gcc.dg/torture/pr53366-1.c   -O2  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr53366-1.c   -O2  execution test
PASS: gcc.dg/torture/pr53366-1.c   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr53366-1.c   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
PASS: gcc.dg/torture/pr53366-1.c   -O3 -g  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr53366-1.c   -O3 -g  execution test
PASS: gcc.dg/torture/pr53366-1.c   -Os  (test for excess errors)
PASS: gcc.dg/torture/pr53366-1.c   -Os  execution test

GCN Kernel Aborted

PASS: gcc.dg/torture/pr93868.c   -O0  (test for excess errors)
PASS: gcc.dg/torture/pr93868.c   -O0  execution test
PASS: gcc.dg/torture/pr93868.c   -O1  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr93868.c   -O1  execution test
PASS: gcc.dg/torture/pr93868.c   -O2  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr93868.c   -O2  execution test
PASS: gcc.dg/torture/pr93868.c   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr93868.c   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
PASS: gcc.dg/torture/pr93868.c   -O3 -g  (test for excess errors)
[-PASS:-]{+FAIL:+} gcc.dg/torture/pr93868.

Re: [PATCH] testsuite: powerpc: fix dg-do run typo

2024-07-19 Thread Sam James
"Kewen.Lin"  writes:

> Hi Sam,

Hi Kewen,

>
> on 2024/7/19 11:28, Sam James wrote:
>> 'dg-run' is not a valid dejagnu directive, 'dg-do run' is needed here
>> for the test to be executed.
>> 
>> 2024-07-18  Sam James  
>> 
>>  PR target/108699
>>  * gcc.target/powerpc/pr108699.c: Fix 'dg-run' typo.
>> ---
>> Kewen, could you check this on powerpc to ensure it doesn't execute 
>> beforehand
>> and now it does? I could do it on powerpc but I don't have anything setup
>> right now.
>
> Oops, thanks for catching and fixing this stupid typo!  Yes, I just confirmed 
> that,
> w/ this fix pr108699.exe gets generated and executed (# of expected passes is 
> changed
> from 1 to 2).

Many thanks! Could you push for me please?

>
> BR,
> Kewen

best,
sam

>
>> 
>>  gcc/testsuite/gcc.target/powerpc/pr108699.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>> 
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108699.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr108699.c
>> index f02bac130cc7..beb8b601fd51 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr108699.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108699.c
>> @@ -1,4 +1,4 @@
>> -/* { dg-run } */
>> +/* { dg-do run } */
>>  /* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
>>  
>>  #define N 16
>> 


[PATCH] gcc: stop adding -fno-common for checking builds

2024-07-19 Thread Sam James
Originally added in r0-44646-g204250d2fcd084 and r0-44627-gfd350d241fecf6 whic
moved -fno-common from all builds to just checking builds.

Since r10-4867-g6271dd984d7f92, GCC defaults to -fno-common. There's no need
to pass it specially for checking builds.

We could keep it for older bootstrap compilers with checking but I don't see
much value in that, it was already just a bonus before.

gcc/ChangeLog:
* Makefile.in (NOCOMMON_FLAG): Delete.
(GCC_WARN_CFLAGS): Drop NOCOMMON_FLAG.
(GCC_WARN_CXXFLAGS): Drop NOCOMMON_FLAG.
* configure.ac: Ditto.
* configure: Regenerate.

gcc/d/ChangeLog:
* Make-lang.in (WARN_DFLAGS): Drop NOCOMMON_FLAG.
---
This came out of a discussion with pinskia last year but I punted it
until stage1. Been running with it since then.

 gcc/Makefile.in| 8 ++--
 gcc/configure  | 8 ++--
 gcc/configure.ac   | 3 ---
 gcc/d/Make-lang.in | 2 +-
 4 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index f4bb4a88cf31..4fc86ed7938b 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -185,10 +185,6 @@ C_LOOSE_WARN = @c_loose_warn@
 STRICT_WARN = @strict_warn@
 C_STRICT_WARN = @c_strict_warn@
 
-# This is set by --enable-checking.  The idea is to catch forgotten
-# "extern" tags in header files.
-NOCOMMON_FLAG = @nocommon_flag@
-
 NOEXCEPTION_FLAGS = @noexception_flags@
 
 ALIASING_FLAGS = @aliasing_flags@
@@ -215,8 +211,8 @@ VALGRIND_DRIVER_DEFINES = @valgrind_path_defines@
 .-warn = $(STRICT_WARN)
 build-warn = $(STRICT_WARN)
 rtl-ssa-warn = $(STRICT_WARN)
-GCC_WARN_CFLAGS = $(LOOSE_WARN) $(C_LOOSE_WARN) $($(@D)-warn) $(if 
$(filter-out $(STRICT_WARN),$($(@D)-warn)),,$(C_STRICT_WARN)) $(NOCOMMON_FLAG) 
$($@-warn)
-GCC_WARN_CXXFLAGS = $(LOOSE_WARN) $($(@D)-warn) $(NOCOMMON_FLAG) $($@-warn)
+GCC_WARN_CFLAGS = $(LOOSE_WARN) $(C_LOOSE_WARN) $($(@D)-warn) $(if 
$(filter-out $(STRICT_WARN),$($(@D)-warn)),,$(C_STRICT_WARN)) $($@-warn)
+GCC_WARN_CXXFLAGS = $(LOOSE_WARN) $($(@D)-warn) $($@-warn)
 
 # 1 2 3 ... 
 one_to__0:=1 2 3 4 5 6 7 8 9
diff --git a/gcc/configure b/gcc/configure
index 4faae0fa5fb8..01acca7fb5cc 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -862,7 +862,6 @@ valgrind_command
 valgrind_path_defines
 valgrind_path
 TREECHECKING
-nocommon_flag
 noexception_flags
 warn_cxxflags
 warn_cflags
@@ -7605,17 +7604,14 @@ do
 done
 IFS="$ac_save_IFS"
 
-nocommon_flag=""
 if test x$ac_checking != x ; then
 
 $as_echo "#define CHECKING_P 1" >>confdefs.h
 
-  nocommon_flag=-fno-common
 else
   $as_echo "#define CHECKING_P 0" >>confdefs.h
 
 fi
-
 if test x$ac_extra_checking != x ; then
 
 $as_echo "#define ENABLE_EXTRA_CHECKING 1" >>confdefs.h
@@ -21410,7 +21406,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 21413 "configure"
+#line 21409 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -21516,7 +21512,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 21519 "configure"
+#line 21515 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 3da1eaa70646..3f20c107b6aa 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -697,16 +697,13 @@ do
 done
 IFS="$ac_save_IFS"
 
-nocommon_flag=""
 if test x$ac_checking != x ; then
   AC_DEFINE(CHECKING_P, 1,
 [Define to 0/1 if you want more run-time sanity checks.  This one gets a grab
 bag of miscellaneous but relatively cheap checks.])
-  nocommon_flag=-fno-common
 else
   AC_DEFINE(CHECKING_P, 0)
 fi
-AC_SUBST(nocommon_flag)
 if test x$ac_extra_checking != x ; then
   AC_DEFINE(ENABLE_EXTRA_CHECKING, 1,
 [Define to 0/1 if you want extra run-time checking that might affect code
diff --git a/gcc/d/Make-lang.in b/gcc/d/Make-lang.in
index eaea6e039cf7..077668faae64 100644
--- a/gcc/d/Make-lang.in
+++ b/gcc/d/Make-lang.in
@@ -55,7 +55,7 @@ CHECKING_DFLAGS = -frelease
 else
 CHECKING_DFLAGS =
 endif
-WARN_DFLAGS = -Wall -Wdeprecated $(NOCOMMON_FLAG)
+WARN_DFLAGS = -Wall -Wdeprecated
 
 # D front-end doesn't use exceptions, but it does require RTTI.
 NOEXCEPTION_DFLAGS = $(filter-out -fno-rtti, $(NOEXCEPTION_FLAGS))

-- 
2.45.2



Re: [PATCH] gcc: stop adding -fno-common for checking builds

2024-07-19 Thread Andrew Pinski
On Fri, Jul 19, 2024 at 5:23 PM Sam James  wrote:
>
> Originally added in r0-44646-g204250d2fcd084 and r0-44627-gfd350d241fecf6 whic
> moved -fno-common from all builds to just checking builds.
>
> Since r10-4867-g6271dd984d7f92, GCC defaults to -fno-common. There's no need
> to pass it specially for checking builds.
>
> We could keep it for older bootstrap compilers with checking but I don't see
> much value in that, it was already just a bonus before.

Considering -fno-common has almost no effect on C++ code, removing it
fully is a decent thing to do.
It was added back when GCC was written in C and then never removed
when GCC started to build as C++.

Thanks,
Andrew Pinski

>
> gcc/ChangeLog:
> * Makefile.in (NOCOMMON_FLAG): Delete.
> (GCC_WARN_CFLAGS): Drop NOCOMMON_FLAG.
> (GCC_WARN_CXXFLAGS): Drop NOCOMMON_FLAG.
> * configure.ac: Ditto.
> * configure: Regenerate.
>
> gcc/d/ChangeLog:
> * Make-lang.in (WARN_DFLAGS): Drop NOCOMMON_FLAG.
> ---
> This came out of a discussion with pinskia last year but I punted it
> until stage1. Been running with it since then.
>
>  gcc/Makefile.in| 8 ++--
>  gcc/configure  | 8 ++--
>  gcc/configure.ac   | 3 ---
>  gcc/d/Make-lang.in | 2 +-
>  4 files changed, 5 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index f4bb4a88cf31..4fc86ed7938b 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -185,10 +185,6 @@ C_LOOSE_WARN = @c_loose_warn@
>  STRICT_WARN = @strict_warn@
>  C_STRICT_WARN = @c_strict_warn@
>
> -# This is set by --enable-checking.  The idea is to catch forgotten
> -# "extern" tags in header files.
> -NOCOMMON_FLAG = @nocommon_flag@
> -
>  NOEXCEPTION_FLAGS = @noexception_flags@
>
>  ALIASING_FLAGS = @aliasing_flags@
> @@ -215,8 +211,8 @@ VALGRIND_DRIVER_DEFINES = @valgrind_path_defines@
>  .-warn = $(STRICT_WARN)
>  build-warn = $(STRICT_WARN)
>  rtl-ssa-warn = $(STRICT_WARN)
> -GCC_WARN_CFLAGS = $(LOOSE_WARN) $(C_LOOSE_WARN) $($(@D)-warn) $(if 
> $(filter-out $(STRICT_WARN),$($(@D)-warn)),,$(C_STRICT_WARN)) 
> $(NOCOMMON_FLAG) $($@-warn)
> -GCC_WARN_CXXFLAGS = $(LOOSE_WARN) $($(@D)-warn) $(NOCOMMON_FLAG) $($@-warn)
> +GCC_WARN_CFLAGS = $(LOOSE_WARN) $(C_LOOSE_WARN) $($(@D)-warn) $(if 
> $(filter-out $(STRICT_WARN),$($(@D)-warn)),,$(C_STRICT_WARN)) $($@-warn)
> +GCC_WARN_CXXFLAGS = $(LOOSE_WARN) $($(@D)-warn) $($@-warn)
>
>  # 1 2 3 ... 
>  one_to__0:=1 2 3 4 5 6 7 8 9
> diff --git a/gcc/configure b/gcc/configure
> index 4faae0fa5fb8..01acca7fb5cc 100755
> --- a/gcc/configure
> +++ b/gcc/configure
> @@ -862,7 +862,6 @@ valgrind_command
>  valgrind_path_defines
>  valgrind_path
>  TREECHECKING
> -nocommon_flag
>  noexception_flags
>  warn_cxxflags
>  warn_cflags
> @@ -7605,17 +7604,14 @@ do
>  done
>  IFS="$ac_save_IFS"
>
> -nocommon_flag=""
>  if test x$ac_checking != x ; then
>
>  $as_echo "#define CHECKING_P 1" >>confdefs.h
>
> -  nocommon_flag=-fno-common
>  else
>$as_echo "#define CHECKING_P 0" >>confdefs.h
>
>  fi
> -
>  if test x$ac_extra_checking != x ; then
>
>  $as_echo "#define ENABLE_EXTRA_CHECKING 1" >>confdefs.h
> @@ -21410,7 +21406,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 21413 "configure"
> +#line 21409 "configure"
>  #include "confdefs.h"
>
>  #if HAVE_DLFCN_H
> @@ -21516,7 +21512,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 21519 "configure"
> +#line 21515 "configure"
>  #include "confdefs.h"
>
>  #if HAVE_DLFCN_H
> diff --git a/gcc/configure.ac b/gcc/configure.ac
> index 3da1eaa70646..3f20c107b6aa 100644
> --- a/gcc/configure.ac
> +++ b/gcc/configure.ac
> @@ -697,16 +697,13 @@ do
>  done
>  IFS="$ac_save_IFS"
>
> -nocommon_flag=""
>  if test x$ac_checking != x ; then
>AC_DEFINE(CHECKING_P, 1,
>  [Define to 0/1 if you want more run-time sanity checks.  This one gets a grab
>  bag of miscellaneous but relatively cheap checks.])
> -  nocommon_flag=-fno-common
>  else
>AC_DEFINE(CHECKING_P, 0)
>  fi
> -AC_SUBST(nocommon_flag)
>  if test x$ac_extra_checking != x ; then
>AC_DEFINE(ENABLE_EXTRA_CHECKING, 1,
>  [Define to 0/1 if you want extra run-time checking that might affect code
> diff --git a/gcc/d/Make-lang.in b/gcc/d/Make-lang.in
> index eaea6e039cf7..077668faae64 100644
> --- a/gcc/d/Make-lang.in
> +++ b/gcc/d/Make-lang.in
> @@ -55,7 +55,7 @@ CHECKING_DFLAGS = -frelease
>  else
>  CHECKING_DFLAGS =
>  endif
> -WARN_DFLAGS = -Wall -Wdeprecated $(NOCOMMON_FLAG)
> +WARN_DFLAGS = -Wall -Wdeprecated
>
>  # D front-end doesn't use exceptions, but it does require RTTI.
>  NOEXCEPTION_DFLAGS = $(filter-out -fno-rtti, $(NOEXCEPTION_FLAGS))
>
> --
> 2.45.2
>


Re:[pushed] [PATCH] LoongArch: Organize the code related to split move and merge the same functions.

2024-07-19 Thread Lulu Cheng

Pushed to r15-2167.

在 2024/7/13 下午5:04, Lulu Cheng 写道:

gcc/ChangeLog:

* config/loongarch/loongarch-protos.h
(loongarch_split_128bit_move): Delete.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_vector_move): Add a function declaration.
* config/loongarch/loongarch.cc
(loongarch_vector_costs::finish_cost): Adjust the code
formatting.
(loongarch_split_vector_move_p): Merge
loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
(loongarch_split_move_p): Merge code.
(loongarch_split_move): Likewise.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_128bit_move): Delete.
(loongarch_split_vector_move): Merge loongarch_split_128bit_move
and loongarch_split_256bit_move.
(loongarch_split_256bit_move): Delete.
(loongarch_global_init): Remove the extra semicolon at the
end of the function.
* config/loongarch/loongarch.md (*movdf_softfloat):  Added a new
condition TARGET_64BIT.
---
  gcc/config/loongarch/loongarch-protos.h |   5 +-
  gcc/config/loongarch/loongarch.cc   | 221 ++--
  gcc/config/loongarch/loongarch.md   |   1 +
  3 files changed, 58 insertions(+), 169 deletions(-)

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index e238d795a73..85f6e894399 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
  extern void loongarch_split_move (rtx, rtx);
  extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
  extern void loongarch_split_plus_constant (rtx *, machine_mode);
-extern void loongarch_split_128bit_move (rtx, rtx);
-extern bool loongarch_split_128bit_move_p (rtx, rtx);
-extern void loongarch_split_256bit_move (rtx, rtx);
-extern bool loongarch_split_256bit_move_p (rtx, rtx);
+extern void loongarch_split_vector_move (rtx, rtx);
  extern const char *loongarch_output_move (rtx, rtx);
  #ifdef RTX_CODE
  extern void loongarch_expand_scc (rtx *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 8eb47ff95c3..c7a02103ef5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4354,10 +4354,10 @@ void
  loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
  {
loop_vec_info loop_vinfo = dyn_cast (m_vinfo);
+
if (loop_vinfo)
-{
-  m_suggested_unroll_factor = determine_suggested_unroll_factor 
(loop_vinfo);
-}
+m_suggested_unroll_factor
+  = determine_suggested_unroll_factor (loop_vinfo);
  
vector_costs::finish_cost (scalar_costs);

  }
@@ -4423,6 +4423,7 @@ loongarch_subword (rtx op, bool high_p)
return simplify_gen_subreg (word_mode, op, mode, byte);
  }
  
+static bool loongarch_split_vector_move_p (rtx dest, rtx src);

  /* Return true if a move from SRC to DEST should be split into two.
 SPLIT_TYPE describes the split condition.  */
  
@@ -,13 +4445,11 @@ loongarch_split_move_p (rtx dest, rtx src)

return false;
  }
  
-  /* Check if LSX moves need splitting.  */

-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-return loongarch_split_128bit_move_p (dest, src);
  
-  /* Check if LASX moves need splitting.  */

-  if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-return loongarch_split_256bit_move_p (dest, src);
+  /* Check if vector moves need splitting.  */
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+  || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+return loongarch_split_vector_move_p (dest, src);
  
/* Otherwise split all multiword moves.  */

return size > UNITS_PER_WORD;
@@ -4463,10 +4462,9 @@ void
  loongarch_split_move (rtx dest, rtx src)
  {
gcc_checking_assert (loongarch_split_move_p (dest, src));
-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-loongarch_split_128bit_move (dest, src);
-  else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-loongarch_split_256bit_move (dest, src);
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+  || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+loongarch_split_vector_move (dest, src);
else
  gcc_unreachable ();
  }
@@ -4588,224 +4586,117 @@ loongarch_output_move_index_float (rtx x, 
machine_mode mode, bool ldr)
  
return insn[ldr][index-2];

  }
-/* Return true if a 128-bit move from SRC to DEST should be split.  */
-
-bool
-loongarch_split_128bit_move_p (rtx dest, rtx src)
-{
-  /* LSX-to-LSX moves can be done in a single instruction.  */
-  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
-return false;
-
-  /* Check for LSX loads and stores.  */
-  if (FP_REG_RTX_P (dest) && MEM_P (src))
-return false;
-  if (FP_REG_RTX_P (src) && M

[PATCH] testsuite: fix pr115929-1.c with -Wformat-security

2024-07-19 Thread Sam James
Some distributions like Gentoo make -Wformat and -Wformat-security
enabled by default. Pass -Wno-format to the test to avoid a spurious
fail in such environments.

gcc/testsuite/
PR rtl-optimization/115929
* gcc.dg/torture/pr115929-1.c: Pass -Wno-format.
---
Richard, is this OK? I could adjust the testcase if you prefer.

Please commit on my behalf if it's fine. Thanks.

 gcc/testsuite/gcc.dg/torture/pr115929-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr115929-1.c 
b/gcc/testsuite/gcc.dg/torture/pr115929-1.c
index 19b831ab99ef..d81081e2f2e9 100644
--- a/gcc/testsuite/gcc.dg/torture/pr115929-1.c
+++ b/gcc/testsuite/gcc.dg/torture/pr115929-1.c
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target lp64 } */
-/* { dg-options "-fno-gcse -fschedule-insns -fno-guess-branch-probability 
-fno-tree-fre -fno-tree-ch" } */
+/* { dg-options "-fno-gcse -fschedule-insns -fno-guess-branch-probability 
-fno-tree-fre -fno-tree-ch -Wno-format" } */
 
 int printf(const char *, ...);
 int a[6], b, c;

-- 
2.45.2



Re: [PATCH] testsuite: fix pr115929-1.c with -Wformat-security

2024-07-19 Thread Xi Ruoyao
On Sat, 2024-07-20 at 06:52 +0100, Sam James wrote:
> Some distributions like Gentoo make -Wformat and -Wformat-security
> enabled by default. Pass -Wno-format to the test to avoid a spurious
> fail in such environments.
> 
> gcc/testsuite/
>   PR rtl-optimization/115929
>   * gcc.dg/torture/pr115929-1.c: Pass -Wno-format.
> ---

IMO if you are patching GCC downstream to enable some options, you can
patch the test case in the same .patch file anyway instead of pushing it
upstream.

If we take the responsibility to make the test suite anticipate random
downstream changes, the test suite will ended up filled with different
workarounds for 42 distros.

If we have to anticipate downstream changes we should make a policy
about which changes we must anticipate (hmm and if we'll anticipate -
Wformat by default why not add a configuration option for it by the
way?), or do it in a more generic way (using a .spec file to explicitly
give the "baseline" options for testing?)

> Richard, is this OK? I could adjust the testcase if you prefer.
> 
> Please commit on my behalf if it's fine. Thanks.
> 
>  gcc/testsuite/gcc.dg/torture/pr115929-1.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/torture/pr115929-1.c 
> b/gcc/testsuite/gcc.dg/torture/pr115929-1.c
> index 19b831ab99ef..d81081e2f2e9 100644
> --- a/gcc/testsuite/gcc.dg/torture/pr115929-1.c
> +++ b/gcc/testsuite/gcc.dg/torture/pr115929-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-require-effective-target lp64 } */
> -/* { dg-options "-fno-gcse -fschedule-insns -fno-guess-branch-probability 
> -fno-tree-fre -fno-tree-ch" } */
> +/* { dg-options "-fno-gcse -fschedule-insns -fno-guess-branch-probability 
> -fno-tree-fre -fno-tree-ch -Wno-format" } */
>  
>  int printf(const char *, ...);
>  int a[6], b, c;
> 

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH] gcc: stop adding -fno-common for checking builds

2024-07-19 Thread Richard Biener



> Am 20.07.2024 um 02:31 schrieb Andrew Pinski :
> 
> On Fri, Jul 19, 2024 at 5:23 PM Sam James  wrote:
>> 
>> Originally added in r0-44646-g204250d2fcd084 and r0-44627-gfd350d241fecf6 
>> whic
>> moved -fno-common from all builds to just checking builds.
>> 
>> Since r10-4867-g6271dd984d7f92, GCC defaults to -fno-common. There's no need
>> to pass it specially for checking builds.
>> 
>> We could keep it for older bootstrap compilers with checking but I don't see
>> much value in that, it was already just a bonus before.
> 
> Considering -fno-common has almost no effect on C++ code, removing it
> fully is a decent thing to do.
> It was added back when GCC was written in C and then never removed
> when GCC started to build as C++.

Ok

Richard 

> Thanks,
> Andrew Pinski
> 
>> 
>> gcc/ChangeLog:
>>* Makefile.in (NOCOMMON_FLAG): Delete.
>>(GCC_WARN_CFLAGS): Drop NOCOMMON_FLAG.
>>(GCC_WARN_CXXFLAGS): Drop NOCOMMON_FLAG.
>>* configure.ac: Ditto.
>>* configure: Regenerate.
>> 
>> gcc/d/ChangeLog:
>>* Make-lang.in (WARN_DFLAGS): Drop NOCOMMON_FLAG.
>> ---
>> This came out of a discussion with pinskia last year but I punted it
>> until stage1. Been running with it since then.
>> 
>> gcc/Makefile.in| 8 ++--
>> gcc/configure  | 8 ++--
>> gcc/configure.ac   | 3 ---
>> gcc/d/Make-lang.in | 2 +-
>> 4 files changed, 5 insertions(+), 16 deletions(-)
>> 
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index f4bb4a88cf31..4fc86ed7938b 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -185,10 +185,6 @@ C_LOOSE_WARN = @c_loose_warn@
>> STRICT_WARN = @strict_warn@
>> C_STRICT_WARN = @c_strict_warn@
>> 
>> -# This is set by --enable-checking.  The idea is to catch forgotten
>> -# "extern" tags in header files.
>> -NOCOMMON_FLAG = @nocommon_flag@
>> -
>> NOEXCEPTION_FLAGS = @noexception_flags@
>> 
>> ALIASING_FLAGS = @aliasing_flags@
>> @@ -215,8 +211,8 @@ VALGRIND_DRIVER_DEFINES = @valgrind_path_defines@
>> .-warn = $(STRICT_WARN)
>> build-warn = $(STRICT_WARN)
>> rtl-ssa-warn = $(STRICT_WARN)
>> -GCC_WARN_CFLAGS = $(LOOSE_WARN) $(C_LOOSE_WARN) $($(@D)-warn) $(if 
>> $(filter-out $(STRICT_WARN),$($(@D)-warn)),,$(C_STRICT_WARN)) 
>> $(NOCOMMON_FLAG) $($@-warn)
>> -GCC_WARN_CXXFLAGS = $(LOOSE_WARN) $($(@D)-warn) $(NOCOMMON_FLAG) $($@-warn)
>> +GCC_WARN_CFLAGS = $(LOOSE_WARN) $(C_LOOSE_WARN) $($(@D)-warn) $(if 
>> $(filter-out $(STRICT_WARN),$($(@D)-warn)),,$(C_STRICT_WARN)) $($@-warn)
>> +GCC_WARN_CXXFLAGS = $(LOOSE_WARN) $($(@D)-warn) $($@-warn)
>> 
>> # 1 2 3 ... 
>> one_to__0:=1 2 3 4 5 6 7 8 9
>> diff --git a/gcc/configure b/gcc/configure
>> index 4faae0fa5fb8..01acca7fb5cc 100755
>> --- a/gcc/configure
>> +++ b/gcc/configure
>> @@ -862,7 +862,6 @@ valgrind_command
>> valgrind_path_defines
>> valgrind_path
>> TREECHECKING
>> -nocommon_flag
>> noexception_flags
>> warn_cxxflags
>> warn_cflags
>> @@ -7605,17 +7604,14 @@ do
>> done
>> IFS="$ac_save_IFS"
>> 
>> -nocommon_flag=""
>> if test x$ac_checking != x ; then
>> 
>> $as_echo "#define CHECKING_P 1" >>confdefs.h
>> 
>> -  nocommon_flag=-fno-common
>> else
>>   $as_echo "#define CHECKING_P 0" >>confdefs.h
>> 
>> fi
>> -
>> if test x$ac_extra_checking != x ; then
>> 
>> $as_echo "#define ENABLE_EXTRA_CHECKING 1" >>confdefs.h
>> @@ -21410,7 +21406,7 @@ else
>>   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>>   lt_status=$lt_dlunknown
>>   cat > conftest.$ac_ext <<_LT_EOF
>> -#line 21413 "configure"
>> +#line 21409 "configure"
>> #include "confdefs.h"
>> 
>> #if HAVE_DLFCN_H
>> @@ -21516,7 +21512,7 @@ else
>>   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>>   lt_status=$lt_dlunknown
>>   cat > conftest.$ac_ext <<_LT_EOF
>> -#line 21519 "configure"
>> +#line 21515 "configure"
>> #include "confdefs.h"
>> 
>> #if HAVE_DLFCN_H
>> diff --git a/gcc/configure.ac b/gcc/configure.ac
>> index 3da1eaa70646..3f20c107b6aa 100644
>> --- a/gcc/configure.ac
>> +++ b/gcc/configure.ac
>> @@ -697,16 +697,13 @@ do
>> done
>> IFS="$ac_save_IFS"
>> 
>> -nocommon_flag=""
>> if test x$ac_checking != x ; then
>>   AC_DEFINE(CHECKING_P, 1,
>> [Define to 0/1 if you want more run-time sanity checks.  This one gets a grab
>> bag of miscellaneous but relatively cheap checks.])
>> -  nocommon_flag=-fno-common
>> else
>>   AC_DEFINE(CHECKING_P, 0)
>> fi
>> -AC_SUBST(nocommon_flag)
>> if test x$ac_extra_checking != x ; then
>>   AC_DEFINE(ENABLE_EXTRA_CHECKING, 1,
>> [Define to 0/1 if you want extra run-time checking that might affect code
>> diff --git a/gcc/d/Make-lang.in b/gcc/d/Make-lang.in
>> index eaea6e039cf7..077668faae64 100644
>> --- a/gcc/d/Make-lang.in
>> +++ b/gcc/d/Make-lang.in
>> @@ -55,7 +55,7 @@ CHECKING_DFLAGS = -frelease
>> else
>> CHECKING_DFLAGS =
>> endif
>> -WARN_DFLAGS = -Wall -Wdeprecated $(NOCOMMON_FLAG)
>> +WARN_DFLAGS = -Wall -Wdeprecated
>> 
>> # D front-end doesn't use exceptions, but it does require RTTI.
>> NOEXCEPTION_DFLAGS = $(filter-out -fno-rtti, $(NOEXCEPTION_FL

Re: [PATCH] testsuite: fix pr115929-1.c with -Wformat-security

2024-07-19 Thread Sam James
Xi Ruoyao  writes:

> On Sat, 2024-07-20 at 06:52 +0100, Sam James wrote:
>> Some distributions like Gentoo make -Wformat and -Wformat-security
>> enabled by default. Pass -Wno-format to the test to avoid a spurious
>> fail in such environments.
>> 
>> gcc/testsuite/
>>  PR rtl-optimization/115929
>>  * gcc.dg/torture/pr115929-1.c: Pass -Wno-format.
>> ---
>
> IMO if you are patching GCC downstream to enable some options, you can
> patch the test case in the same .patch file anyway instead of pushing it
> upstream.

It's a fair argument.

That said, you did say the same thing reflexively about -fhardened, even
if this is a different situation ;)

One might argue that especially for torture/, supporting "not
unreasonable" random flags is not a bad thing.

>
> If we take the responsibility to make the test suite anticipate random
> downstream changes, the test suite will ended up filled with different
> workarounds for 42 distros.
>

My counterpoint would be that there are certain warnings we know various
distros enable by default, and various warnings where it's not
inconceivable we might do them upstream at some point.

Being loose about conformance in test cases is part of why the C99
enforcement took a while.

> If we have to anticipate downstream changes we should make a policy
> about which changes we must anticipate (hmm and if we'll anticipate -
> Wformat by default why not add a configuration option for it by the
> way?), or do it in a more generic way (using a .spec file to explicitly
> give the "baseline" options for testing?)
>

Anyway, I take the point, but it was cheaper to ask with a patch
attached than have it in my head and not know what the position was.

I like the .spec idea. In the past, we used custom .spec extensively
downstream, and we stopped before my tenure. My intention long-term with
Arsen is to bring it back as it feels like a better fit.

I'm also happy to adjust the testcase given I reproduced the original
issue fine. Or to leave it if the consensus is to. Whatever works for me.

I dare say we're spending time talking about it than the occasional
-Wno- costs though.

>> Richard, is this OK? I could adjust the testcase if you prefer.
>> 
>> [...]

thanks,
sam


signature.asc
Description: PGP signature


Re: [PATCH] s390: Fix unresolved iterators bhfgq and xdee

2024-07-19 Thread Stefan Schulze Frielinghaus
I'm pinging this early since I would like to make sure that it gets into
14.2 RC which is about to be done on Tuesday 23rd July.

On Tue, Jul 16, 2024 at 04:50:29PM +0200, Stefan Schulze Frielinghaus wrote:
> Code attribute bhfgq is missing a mapping for TF.  This results in
> unresolved iterators in assembler templates for *bswaptf.
> 
> With the TF mapping added the base mnemonics vlbr and vstbr are not
> "used" anymore but only the extended mnemonics (vlbr was
> interpreted as vlbr; likewise for vstbr).  Therefore, remove the base
> mnemonics from the scheduling description, otherwise, genattrtab would
> error about unknown mnemonics.
> 
> Likewise, for movtf_vr only the extended mnemonics for vrepi are used,
> now, which means the base mnemonic is "unused" and has to be removed
> from the scheduling description.
> 
> Similarly, we end up with unresolved iterators in assembler templates
> for mulfprx23 since code attribute xdee is missing a mapping for FPRX2.
> 
> Note, this is basically a cherry pick of commit r15-2060-ga4abda934aa426
> with the addition that vrepi is removed from the scheduling description,
> too.
> 
> Bootstrapped on s390.  Ok for release branches 12, 13, and 14?
> 
> gcc/ChangeLog:
> 
>   * config/s390/3931.md (vlbr, vstbr, vrepi): Remove.
>   * config/s390/s390.md (xdee): Add FPRX2 mapping.
>   * config/s390/vector.md (bhfgq): Add TF mapping.
> ---
>  gcc/config/s390/3931.md   | 7 ---
>  gcc/config/s390/s390.md   | 2 +-
>  gcc/config/s390/vector.md | 2 +-
>  3 files changed, 2 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/config/s390/3931.md b/gcc/config/s390/3931.md
> index bed1f6c21f1..9cb11b72bba 100644
> --- a/gcc/config/s390/3931.md
> +++ b/gcc/config/s390/3931.md
> @@ -404,7 +404,6 @@ vlvgg,
>  vlvgh,
>  vlvgp,
>  vst,
> -vstbr,
>  vstbrf,
>  vstbrg,
>  vstbrh,
> @@ -627,7 +626,6 @@ tm,
>  tmy,
>  vl,
>  vlbb,
> -vlbr,
>  vlbrf,
>  vlbrg,
>  vlbrh,
> @@ -661,7 +659,6 @@ vlreph,
>  vlrl,
>  vlrlr,
>  vst,
> -vstbr,
>  vstbrf,
>  vstbrg,
>  vstbrh,
> @@ -1077,7 +1074,6 @@ vrepb,
>  vrepf,
>  vrepg,
>  vreph,
> -vrepi,
>  vrepib,
>  vrepif,
>  vrepig,
> @@ -1930,7 +1926,6 @@ vrepb,
>  vrepf,
>  vrepg,
>  vreph,
> -vrepi,
>  vrepib,
>  vrepif,
>  vrepig,
> @@ -2156,7 +2151,6 @@ vistrfs,
>  vistrhs,
>  vl,
>  vlbb,
> -vlbr,
>  vlbrf,
>  vlbrg,
>  vlbrh,
> @@ -2248,7 +2242,6 @@ tbegin,
>  tbeginc,
>  tend,
>  vst,
> -vstbr,
>  vstbrf,
>  vstbrg,
>  vstbrh,
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 50a828f2bbb..8edc1261c38 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -744,7 +744,7 @@
>  ;; In FP templates, a  in "mr" will expand to "mxr" in
>  ;; TF/TDmode, "mdr" in DF/DDmode, "meer" in SFmode and "mer in
>  ;; SDmode.
> -(define_mode_attr xdee [(TF "x") (DF "d") (SF "ee") (TD "x") (DD "d") (SD 
> "e")])
> +(define_mode_attr xdee [(TF "x") (FPRX2 "x") (DF "d") (SF "ee") (TD "x") (DD 
> "d") (SD "e")])
>  
>  ;; The decimal floating point variants of add, sub, div and mul support 3
>  ;; fp register operands.  The following attributes allow to merge the bfp and
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 1bae1056951..f88e8b655fa 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -134,7 +134,7 @@
>   (V1TI "q") (TI "q")
>   (V1SF "f") (V2SF "f") (V4SF "f")
>   (V1DF "g") (V2DF "g")
> - (V1TF "q")])
> + (V1TF "q") (TF "q")])
>  
>  ; This is for vmalhw. It gets an 'w' attached to avoid confusion with
>  ; multiply and add logical high vmalh.
> -- 
> 2.45.0
>