Re: [PATCH v2] RISC-V: Add minimal support of double trap extension 1.0

2025-05-27 Thread Jerry Zhang Jian
I think I found the bug that caused the test failure, and will send v3
later.

BRs
Jerry

Jerry Zhang Jian  於 2025年5月25日 週日 下午11:05寫道:

> Add support of double trap extension [1], enabling GCC
> to recognize the following extensions at compile time.
>
> New extensions:
> - ssdbltrp
> - smdbltrp
>
> [1]
> https://github.com/riscv/riscv-double-trap/releases/download/v1.0/riscv-double-trap.pdf
>
> gcc/ChangeLog:
> * config/riscv/riscv-ext.def: New extensions
> * config/riscv/riscv-ext.opt: Auto re-generated
>
> gcc/testsuite/ChangeLog:
> * gcc/testsuite/gcc.target/riscv/arch-57.c: New test
> * gcc/testsuite/gcc.target/riscv/arch-58.c: New test
>
> Signed-off-by: Jerry Zhang Jian 
> ---
>  gcc/config/riscv/riscv-ext.def   | 26 
>  gcc/config/riscv/riscv-ext.opt   |  4 
>  gcc/doc/riscv-ext.texi   |  8 
>  gcc/testsuite/gcc.target/riscv/arch-57.c |  6 ++
>  gcc/testsuite/gcc.target/riscv/arch-58.c |  6 ++
>  5 files changed, 50 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/arch-57.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/arch-58.c
>
> diff --git a/gcc/config/riscv/riscv-ext.def
> b/gcc/config/riscv/riscv-ext.def
> index 97b576617ad..dbda8ded397 100644
> --- a/gcc/config/riscv/riscv-ext.def
> +++ b/gcc/config/riscv/riscv-ext.def
> @@ -1727,6 +1727,19 @@ DEFINE_RISCV_EXT(
>/* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
>/* EXTRA_EXTENSION_FLAGS */ 0)
>
> +DEFINE_RISCV_EXT(
> +  /* NAME */ smdbltrp,
> +  /* UPPERCAE_NAME */ SMDBLTRP,
> +  /* FULL_NAME */ "Double Trap Extensions",
> +  /* DESC */ "",
> +  /* URL */ ,
> +  /* DEP_EXTS */ ({"zicsr"}),
> +  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
> +  /* FLAG_GROUP */ sm,
> +  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
> +  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
> +  /* EXTRA_EXTENSION_FLAGS */ 0)
> +
>  DEFINE_RISCV_EXT(
>/* NAME */ ssaia,
>/* UPPERCAE_NAME */ SSAIA,
> @@ -1818,6 +1831,19 @@ DEFINE_RISCV_EXT(
>/* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
>/* EXTRA_EXTENSION_FLAGS */ 0)
>
> +DEFINE_RISCV_EXT(
> +  /* NAME */ ssdbltrp,
> +  /* UPPERCAE_NAME */ SSDBLTRP,
> +  /* FULL_NAME */ "Double Trap Extensions",
> +  /* DESC */ "",
> +  /* URL */ ,
> +  /* DEP_EXTS */ ({"zicsr"}),
> +  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
> +  /* FLAG_GROUP */ ss,
> +  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
> +  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
> +  /* EXTRA_EXTENSION_FLAGS */ 0)
> +
>  DEFINE_RISCV_EXT(
>/* NAME */ supm,
>/* UPPERCAE_NAME */ SUPM,
> diff --git a/gcc/config/riscv/riscv-ext.opt
> b/gcc/config/riscv/riscv-ext.opt
> index 9199aa31b42..5e9c5f56ad6 100644
> --- a/gcc/config/riscv/riscv-ext.opt
> +++ b/gcc/config/riscv/riscv-ext.opt
> @@ -343,6 +343,8 @@ Mask(SMNPM) Var(riscv_sm_subext)
>
>  Mask(SMSTATEEN) Var(riscv_sm_subext)
>
> +Mask(SMDBLTRP) Var(riscv_sm_subext)
> +
>  Mask(SSAIA) Var(riscv_ss_subext)
>
>  Mask(SSCOFPMF) Var(riscv_ss_subext)
> @@ -357,6 +359,8 @@ Mask(SSTC) Var(riscv_ss_subext)
>
>  Mask(SSSTRICT) Var(riscv_ss_subext)
>
> +Mask(SSDBLTRP) Var(riscv_ss_subext)
> +
>  Mask(SUPM) Var(riscv_su_subext)
>
>  Mask(SVINVAL) Var(riscv_sv_subext)
> diff --git a/gcc/doc/riscv-ext.texi b/gcc/doc/riscv-ext.texi
> index bd3d29c75ab..7a22d841d1b 100644
> --- a/gcc/doc/riscv-ext.texi
> +++ b/gcc/doc/riscv-ext.texi
> @@ -510,6 +510,10 @@
>  @tab 1.0
>  @tab State enable extension
>
> +@item smdbltrp
> +@tab 1.0
> +@tab Double Trap Extensions
> +
>  @item ssaia
>  @tab 1.0
>  @tab Advanced interrupt architecture extension for supervisor-mode
> @@ -538,6 +542,10 @@
>  @tab 1.0
>  @tab ssstrict extension
>
> +@item ssdbltrp
> +@tab 1.0
> +@tab Double Trap Extensions
> +
>  @item supm
>  @tab 1.0
>  @tab supm extension
> diff --git a/gcc/testsuite/gcc.target/riscv/arch-57.c
> b/gcc/testsuite/gcc.target/riscv/arch-57.c
> new file mode 100644
> index 000..42cf30a3171
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/arch-57.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64i_smdbltrp -mabi=lp64d" } */
> +
> +void foo(){}
> +
> +/* { dg-final { scan-assembler ".attribute arch,
> \"rv64i2p1_zicsr2p0_smdbltrp1p0\"" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/arch-58.c
> b/gcc/testsuite/gcc.target/riscv/arch-58.c
> new file mode 100644
> index 000..88b20dfb6c8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/arch-58.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64i_ssdbltrp -mabi=lp64d" } */
> +
> +void foo(){}
> +
> +/* { dg-final { scan-assembler ".attribute arch,
> \"rv64i2p1_zicsr2p0_ssdbltrp1p0\"" } } */
> --
> 2.49.0
>
>


[PATCH] RISC-V: Add Shlcofideleg extension.

2025-05-27 Thread Jiawei
This patch add the RISC-V Shlcofideleg extension. It supports delegating
LCOFI interrupts(the count-overflow interrupts) to VS-mode.[1]

[1] https://riscv.github.io/riscv-isa-manual/snapshot/privileged

gcc/ChangeLog:

* config/riscv/riscv-ext.def: New extension defs.
* config/riscv/riscv-ext.opt: Ditto.
* doc/riscv-ext.texi: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-57.c: New test.

Signed-off-by: Jiawei 
---
 gcc/config/riscv/riscv-ext.def   | 13 +
 gcc/config/riscv/riscv-ext.opt   |  2 ++
 gcc/doc/riscv-ext.texi   |  4 
 gcc/testsuite/gcc.target/riscv/arch-57.c |  5 +
 4 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/arch-57.c

diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def
index 97b576617ad..1b5dce29b15 100644
--- a/gcc/config/riscv/riscv-ext.def
+++ b/gcc/config/riscv/riscv-ext.def
@@ -1610,6 +1610,19 @@ DEFINE_RISCV_EXT(
   /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
   /* EXTRA_EXTENSION_FLAGS */ 0)
 
+DEFINE_RISCV_EXT(
+  /* NAME */ shlcofideleg,
+  /* UPPERCAE_NAME */ SHLCOFIDELEG,
+  /* FULL_NAME */ "Delegating LCOFI interrupts to VS-mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
 DEFINE_RISCV_EXT(
   /* NAME */ shtvala,
   /* UPPERCAE_NAME */ SHTVALA,
diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt
index 9199aa31b42..6868e79987c 100644
--- a/gcc/config/riscv/riscv-ext.opt
+++ b/gcc/config/riscv/riscv-ext.opt
@@ -325,6 +325,8 @@ Mask(SHCOUNTERENW) Var(riscv_sh_subext)
 
 Mask(SHGATPA) Var(riscv_sh_subext)
 
+Mask(SHLCOFIDELEG) Var(riscv_sh_subext)
+
 Mask(SHTVALA) Var(riscv_sh_subext)
 
 Mask(SHVSTVALA) Var(riscv_sh_subext)
diff --git a/gcc/doc/riscv-ext.texi b/gcc/doc/riscv-ext.texi
index bd3d29c75ab..4b92b8d75db 100644
--- a/gcc/doc/riscv-ext.texi
+++ b/gcc/doc/riscv-ext.texi
@@ -474,6 +474,10 @@
 @tab 1.0
 @tab SvNNx4 mode supported for all modes supported by satp
 
+@item shlcofideleg
+@tab 1.0
+@tab Delegating LCOFI interrupts to VS-mode
+
 @item shtvala
 @tab 1.0
 @tab The htval register provides all needed values
diff --git a/gcc/testsuite/gcc.target/riscv/arch-57.c 
b/gcc/testsuite/gcc.target/riscv/arch-57.c
new file mode 100644
index 000..de9f9fc6e53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-57.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64i_shlcofideleg -mabi=lp64" } */
+int foo()
+{
+}
-- 
2.43.0



Re: [PATCH V2] For datarefs with big gap, split them into different groups.

2025-05-27 Thread Richard Biener
On Tue, May 27, 2025 at 3:06 AM liuhongt  wrote:
>
> > > It's https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119181
> >
> > Please mention that in the changelog.  Also ...
>
> Changed.
>
> > Please put this condition in the set of conds we test in the else branch of 
> > ...
> >
> > > >   /* Do not place the same access in the interleaving chain 
> > > > twice.  */
> > > >   if (init_b == init_prev)
> > > > {
> >
> > ... this if.  There we have conditions grouped spltting groups.
> >
> Changed.
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ready push to trunk.

OK.

> The patch tries to solve miss vectorization for below case.
>
> void
> foo (int* a, int* restrict b)
> {
> b[0] = a[0] * a[64];
> b[1] = a[65] * a[1];
> b[2] = a[2] * a[66];
> b[3] = a[67] * a[3];
> b[4] = a[68] * a[4];
> b[5] = a[69] * a[5];
> b[6] = a[6] * a[70];
> b[7] = a[7] * a[71];
> }
>
> In vect_analyze_data_ref_accesses, a[0], a[1], .. a[7], a[64], ...,
> a[71] are in same group with size of 71. It caused vectorization
> unprofitable.
>
> gcc/ChangeLog:
>
> PR tree-optimization/119181
> * tree-vect-data-refs.cc (vect_analyze_data_ref_accesses):
> Split datarefs when there's a gap bigger than
> MAX_BITSIZE_MODE_ANY_MODE.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/bb-slp-pr119181.c: New test.
> ---
>  gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c | 15 +++
>  gcc/tree-vect-data-refs.cc  |  7 +++
>  2 files changed, 22 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c 
> b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c
> new file mode 100644
> index 000..b0d3e5a3cb8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +void
> +foo (int* a, int* restrict b)
> +{
> +b[0] = a[0] * a[64];
> +b[1] = a[65] * a[1];
> +b[2] = a[2] * a[66];
> +b[3] = a[67] * a[3];
> +b[4] = a[68] * a[4];
> +b[5] = a[69] * a[5];
> +b[6] = a[6] * a[70];
> +b[7] = a[7] * a[71];
> +}
> +
> +/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp2" { 
> target vect_int_mult } } } */
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 9fd1ef29650..f2deb751ed9 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -3682,6 +3682,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo,
>   != type_size_a))
> break;
>
> + /* For datarefs with big gap, it's better to split them into 
> different
> +groups.
> +.i.e a[0], a[1], a[2], .. a[7], a[100], a[101],..., a[107]  
> */
> + if ((unsigned HOST_WIDE_INT)(init_b - init_prev) * tree_to_uhwi 
> (szb)
> + > MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT)
> +   break;
> +
>   /* If the step (if not zero or non-constant) is smaller than the
>  difference between data-refs' inits this splits groups into
>  suitable sizes.  */
> --
> 2.34.1
>


Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash structs and members for P1901R2

2025-05-27 Thread Paul Keir
Sounds good. I've added the changes you suggested (diff is linked below). I'll 
send an updated patch after your review.

https://github.com/gcc-mirror/gcc/compare/97e8cd9...pkeir:gcc:4f699d8


From: Jonathan Wakely 
Sent: 23 May 2025 8:11 PM
To: Paul Keir
Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org
Subject: Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash 
structs and members for P1901R2



Warning: Do not open attachments or click on links unless you trust the sender



On Fri, 23 May 2025 at 18:56, Paul Keir  wrote:
>
> This patch implements C++26 "Enabling the Use of weak_ptr as Keys in 
> Unordered Associative Containers", as specified in P1901R2.

Splendid, thanks. I'll review this more carefully next week.

Some quick comments:

This should define the __cpp_lib_smart_pointer_owner_equality feature
test macro.

To do that you need to add a new entry in include/bits/version.def
(see the comments at the top and the existing entries which should be
fairly clear - I am in the middle of documenting this process).
Then in the $objdir/x86_64-pc-linux-gnu/libstdc++-v3/include directory
run 'make update-version' which should regenerate the bits/version.h
file to include your new macro.
Then in  define __glibcxx_want_smart_pointer_owner_equality
before including .

> diff --git a/libstdc++-v3/testsuite/20_util/owner_equal/cmp.cc 
> b/libstdc++-v3/testsuite/20_util/owner_equal/cmp.cc
> new file mode 100644
> index 000..c958d9c62ea
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/20_util/owner_equal/cmp.cc
> @@ -0,0 +1,122 @@
> +// { dg-do run { target c++26 } }
> +// { dg-require-effective-target hosted }
> +
> +// Copyright (C) 2008-2025 Free Software Foundation, Inc.

I see that all the new tests have copyright dates varying from 2008 to
2017, but they're all new files (I don't think they copy anythign from
existing tests, right?). So they should not have copyright dates
claiming to be written in the past. They also shouldn't have copyright
notices claiming to be owned by the FSF if you're contributing them
under the DCO terms, because you retain your own copyright.

My preference for new tests is not to bother with the copyright notice
or license text at all. Nothing in those tests looks very novel or
inventive, it's just repetitive, fairly mechanical testing of the API.
I am sceptical whether such things are even copyrightable. So I don't
both with the 20 lines of comments in each file, e.g. see
testsuite/20_util/weak_ptr/atomic_weak_ptr.cc from 2022.


> +// This file is part of the GNU ISO C++ Library.  This library is free
> +// software; you can redistribute it and/or modify it under the
> +// terms of the GNU General Public License as published by the
> +// Free Software Foundation; either version 3, or (at your option)
> +// any later version.
> +
> +// This library is distributed in the hope that it will be useful,
> +// but WITHOUT ANY WARRANTY; without even the implied warranty of
> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +// GNU General Public License for more details.
> +
> +// You should have received a copy of the GNU General Public License along
> +// with this library; see the file COPYING3.  If not see
> +// .
> +
> +// 20.3.2.6 Struct owner_equal [util.smartptr.owner.equal]

Please put the C++ standard version, or in this case working draft
number, in these comments, e.g.
// N5008 20.3.2.6 Struct owner_equal [util.smartptr.owner.equal]

We didn't used to do that (as you'll see in the old tests) and it
becomes pretty pointless to just have a subclause number in some
unspecified document (unlike the C standard, the C++ subclause numbers
change dramatically between standards).

I have been trying to add "C++11" or "C++03" those comments in old
tests when I touch the file for some other reason.



Please consider the environment and think before you print.

The University of the West of Scotland is a registered Scottish charity. 
Charity number SC002520.

This e-mail and any attachment is for authorised use by the intended 
recipient(s) only. It may contain proprietary material, confidential 
information and/or be subject to legal privilege. It should not be copied, 
disclosed to, retained or used by, any other party. If you are not an intended 
recipient then please promptly delete this e-mail and any attachment and all 
copies and inform the sender.

Please note that any views or opinions presented in this email are solely those 
of the author and do not necessarily represent those of the University of the 
West of Scotland.

As a public body, the University of the West of Scotland may be required to 
make available emails as well as other written forms of information as a result 
of a request made under the Freedom of Information (Scotland) Act 2002.


Re: [PATCH v1] libstdc++: Fix bug in default ctor of extents.

2025-05-27 Thread Tomasz Kaminski
On Tue, May 27, 2025 at 10:53 AM Jonathan Wakely 
wrote:

> On Mon, 26 May 2025 at 08:49, Tomasz Kaminski  wrote:
> >
> >
> >
> > On Sat, May 24, 2025 at 1:29 PM Luc Grosheintz 
> wrote:
> >>
> >> The array that stores the dynamic extents used to be default
> >> initialized. The standard requires value intialization. This
> >> commit fixes the bug and adds a test.
> >>
> >> libstdc++-v3/ChangeLog:
> >>
> >> * include/std/mdspan: Value initialize the array storing the
> >> dynamic extents.
> >> * testsuite/23_containers/mdspan/extents/ctor_default.cc: New
> >> test.
> >>
> >> Signed-off-by: Luc Grosheintz 
> >> ---
> >
> > LGTM, thanks for noticing and fixing it.
> >  We also need approval from the maintainer.
>
> OK for trunk.
>
Pushed to trunk as r16-891-ge46c5b3219436d.

>
> >>
> >>  libstdc++-v3/include/std/mdspan   |  2 +-
> >>  .../mdspan/extents/ctor_default.cc| 41 +++
> >>  2 files changed, 42 insertions(+), 1 deletion(-)
> >>  create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
> >>
> >> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> >> index 47cfa405e44..bcf2fa60fea 100644
> >> --- a/libstdc++-v3/include/std/mdspan
> >> +++ b/libstdc++-v3/include/std/mdspan
> >> @@ -146,7 +146,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>
> >>private:
> >> using _S_storage = __array_traits<_IndexType,
> _S_rank_dynamic>::_Type;
> >> -   [[no_unique_address]] _S_storage _M_dynamic_extents;
> >> +   [[no_unique_address]] _S_storage _M_dynamic_extents{};
> >
> > We know that these are integral types, so we can use {}.
> >>
> >>};
> >>
> >>  template
> >> diff --git
> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
> >> new file mode 100644
> >> index 000..eec300f6896
> >> --- /dev/null
> >> +++
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
> >> @@ -0,0 +1,41 @@
> >> +// { dg-do run { target c++23 } }
> >> +#include 
> >> +
> >> +#include 
> >> +#include 
> >> +
> >> +constexpr auto dyn = std::dynamic_extent;
> >> +
> >> +template
> >> +  constexpr void
> >> +  test_default_ctor()
> >> +  {
> >> +Extents exts;
> >> +for(size_t i = 0; i < Extents::rank(); ++i)
> >> +  if(exts.static_extent(i) == std::dynamic_extent)
> >> +   VERIFY(exts.extent(i) == 0);
> >> +  else
> >> +   VERIFY(exts.extent(i) == Extents::static_extent(i));
> >> +  }
> >> +
> >> +constexpr bool
> >> +test_default_ctor_all()
> >> +{
> >> +  test_default_ctor>();
> >> +  test_default_ctor>();
> >> +  test_default_ctor>();
> >> +  test_default_ctor>();
> >> +  test_default_ctor>();
> >> +  test_default_ctor>();
> >> +  test_default_ctor>();
> >> +  test_default_ctor>();
> >> +  return true;
> >> +}
> >> +
> >> +int
> >> +main()
> >> +{
> >> +  test_default_ctor_all();
> >> +  static_assert(test_default_ctor_all());
> >> +  return 0;
> >> +}
> >> --
> >> 2.49.0
> >>
>
>


[PATCH] RISC-V:Add the MIPS P8700 conditional move extension instruction support.

2025-05-27 Thread Umesh Kalappa
The P8700 is a high-performance processor from MIPS by extending RISCV with
the MIPS custom instruction and the following changes are added to enable the 
conditional move support from mips.

No regression found for "runtest --tool gcc 
--target_board='riscv-sim/-mabi=lp64d/-mcmodel=medlow/-mtune=mips-p8700/-O2 ' 
riscv.exp"

gcc/ChangeLog:

*common/config/riscv/riscv-common.cc (riscv_ext_version_table) :
 Added MIPS specific insns for P8700.
*config/riscv/riscv-cores.def(RISCV_CORE):Updated the march for 
mips-p8700 tune.
*config/riscv/riscv-ext-mips.def(DEFINE_RISCV_EXT):
 New file added the mips conditional mov extension.
*config/riscv/riscv-ext.def: Likewise.
*config/riscv/t-riscv:Generates riscv-ext.opt
*config/riscv/riscv-ext.opt: Generated file.
*config/riscv/riscv.cc(riscv_expand_conditional_move):Updated for mips 
cmov.
*config/riscv/riscv.md(movcc):updated expand for MIPS CCMOV.
*config/riscv/mips-insn.md:New file for mips-p8700 ccmov insn.
*testsuite/gcc.target/riscv/mipscondmov.c:New file to test the ccmov 
insn.
*gcc/doc/riscv-ext.texi:Updated for mips cmov.
---
 gcc/config/riscv/mips-insn.md| 37 
 gcc/config/riscv/riscv-ext-mips.def  | 35 ++
 gcc/testsuite/gcc.target/riscv/mipscondmov.c | 30 
 3 files changed, 102 insertions(+)
 create mode 100644 gcc/config/riscv/mips-insn.md
 create mode 100644 gcc/config/riscv/riscv-ext-mips.def
 create mode 100644 gcc/testsuite/gcc.target/riscv/mipscondmov.c

diff --git a/gcc/config/riscv/mips-insn.md b/gcc/config/riscv/mips-insn.md
new file mode 100644
index 000..ee106c4221e
--- /dev/null
+++ b/gcc/config/riscv/mips-insn.md
@@ -0,0 +1,37 @@
+;; Machine description for MIPS custom instructioins.
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+(define_insn "*movcc_bitmanip"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (if_then_else:GPR
+(match_operator 5 "equality_operator"
+   [(match_operand:X 1 "register_operand" "r")
+(match_operand:X 2 "const_0_operand" "J")])
+(match_operand:GPR 3 "reg_or_0_operand" "rJ")
+(match_operand:GPR 4 "reg_or_0_operand" "rJ")))]
+  "TARGET_XMIPSCMOV"
+{
+  enum rtx_code code = GET_CODE (operands[5]);
+  if (code == NE)
+return "mips.ccmov\t%0,%1,%z3,%z4";
+  else
+return "mips.ccmov\t%0,%1,%z4,%z3";
+}
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "")])
diff --git a/gcc/config/riscv/riscv-ext-mips.def 
b/gcc/config/riscv/riscv-ext-mips.def
new file mode 100644
index 000..86492223cb3
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-mips.def
@@ -0,0 +1,35 @@
+/* MIPS extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xmipscmov,
+  /* UPPERCAE_NAME */ XMIPSCMOV,
+  /* FULL_NAME */ "Mips conditional move extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xmips,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/testsuite/gcc.target/riscv/mipscondmov.c 
b/gcc/testsuite/gcc.target/riscv/mipscondmov.c
new file mode 100644
index 000..144a6b718ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mipscondmov.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { d

[avr,patch,applied] PR120442: Support fdiml in avr/libf7

2025-05-27 Thread Georg-Johann Lay

This patch adds fdiml to libgcc/config/avr/libf7

AVR: target/120442 - Support f7_fdim / fdiml in LibF7.

PR target/120442
Add Support for fdiml.
libgcc/config/avr/libf7/
* libf7-common.mk (LIBF_C_PARTS, m_ddd): Add fdim.
* libf7.h (f7_fdim): New proto.
* libf7.c (f7_fdim): New function.
* f7renames.sh (f7_fdim): Add rename.
* f7-wraps.h: Rebuild
* f7-renames.h: RebuildAVR: target/120442 - Support f7_fdim / fdiml in LibF7.

PR target/120442
Add Support for fdiml.
libgcc/config/avr/libf7/
* libf7-common.mk (LIBF_C_PARTS, m_ddd): Add fdim.
* libf7.h (f7_fdim): New proto.
* libf7.c (f7_fdim): New function.
* f7renames.sh (f7_fdim): Add rename.
* f7-wraps.h: Rebuild
* f7-renames.h: Rebuild

diff --git a/libgcc/config/avr/libf7/f7-renames.h b/libgcc/config/avr/libf7/f7-renames.h
index bbe571a7532..bce2dd33e8a 100644
--- a/libgcc/config/avr/libf7/f7-renames.h
+++ b/libgcc/config/avr/libf7/f7-renames.h
@@ -97,6 +97,7 @@
 #define f7_acos __f7_acos
 #define f7_atan __f7_atan
 #define f7_atan2 __f7_atan2
+#define f7_fdim __f7_fdim
 #define f7_mul_noround __f7_mul_noround
 #define f7_sqrt16_round __f7_sqrt16_round
 #define f7_sqrt16_floor __f7_sqrt16_floor
diff --git a/libgcc/config/avr/libf7/f7-wraps.h b/libgcc/config/avr/libf7/f7-wraps.h
index a455b7dbd9e..409492ed1d2 100644
--- a/libgcc/config/avr/libf7/f7-wraps.h
+++ b/libgcc/config/avr/libf7/f7-wraps.h
@@ -239,7 +239,7 @@ _ENDF __extendsfdf2
 
 ;; Functions that usually live in libm:  Depending on [long] double layout,
 ;; define  and l as weak alias(es) of __ for  in:
-;; pow fmin fmax fmod hypot atan2
+;; pow fmin fmax fmod hypot atan2 fdim
 
 ;; double __pow (double, double)
 #ifdef F7MOD_D_pow_
@@ -313,6 +313,18 @@ _DEFUN __atan2
 _ENDF __atan2
 #endif /* F7MOD_D_atan2_ */
 
+;; double __fdim (double, double)
+#ifdef F7MOD_D_fdim_
+_DEFUN __fdim
+DALIAS fdim
+LALIAS fdiml
+.global F7_NAME(fdim)
+ldi ZH, hi8(gs(F7_NAME(fdim)))
+ldi ZL, lo8(gs(F7_NAME(fdim)))
+F7jmp   call_ddd
+_ENDF __fdim
+#endif /* F7MOD_D_fdim_ */
+
 ;; Functions that usually live in libm:  Depending on [long] double layout,
 ;; define  and l as weak alias(es) of __ for  in:
 ;; ldexp frexp
diff --git a/libgcc/config/avr/libf7/f7renames.sh b/libgcc/config/avr/libf7/f7renames.sh
index 7ef251e44c9..4ced42363d1 100755
--- a/libgcc/config/avr/libf7/f7renames.sh
+++ b/libgcc/config/avr/libf7/f7renames.sh
@@ -35,9 +35,9 @@ EOF
 
 c)
 if [ x${PRE} != xf7_ ]; then
-echo " "
+echo ""
 echo "/* Renames for libf7.c, libf7.h.  */"
-echo " "
+echo ""
 for x in $*; do
 echo "#define f7_$x ${PRE}$x"
 done
@@ -46,9 +46,9 @@ EOF
 
 cst)
 if [ x${PRE} != xf7_ ]; then
-echo " "
+echo ""
 echo "/* Renames for libf7.c, libf7.h.  */"
-echo " "
+echo ""
 for x in $*; do
 echo "#define f7_const_${x}   ${PRE}const_${x}"
 echo "#define f7_const_${x}_P ${PRE}const_${x}_P"
@@ -58,9 +58,9 @@ EOF
 
 asm)
 if [ x${PRE} != xf7_ ]; then
-echo " "
+echo ""
 echo "/* Renames for libf7-asm.sx, f7-wraps.h.  */"
-echo " "
+echo ""
 for x in $*; do
 echo "#define f7_${x}_asm ${PRE}${x}_asm"
 done
diff --git a/libgcc/config/avr/libf7/libf7-common.mk b/libgcc/config/avr/libf7/libf7-common.mk
index 5d411071c8e..644be2cf195 100644
--- a/libgcc/config/avr/libf7/libf7-common.mk
+++ b/libgcc/config/avr/libf7/libf7-common.mk
@@ -8,7 +8,7 @@ F7_C_PARTS += set_float get_float get_double set_double set_pdouble
 F7_C_PARTS += fabs neg fmin fmax minmax truncx trunc floor ceil round lround
 F7_C_PARTS += horner logx log log10 log2 exp pow10 pow powi
 F7_C_PARTS += sin cos tan cotan sincos sinh cosh tanh sinhcosh
-F7_C_PARTS += asinacos asin acos atan atan2
+F7_C_PARTS += asinacos asin acos atan atan2 fdim
 F7_C_PARTS += abscmp_msb_ge cmp cmp_abs cmp_unordered
 
 F7_C_PARTS += const_1 const_1_2 const_1_3
@@ -34,7 +34,7 @@ g_xdd_cmp += le lt ge gt ne eq unord
 g_dx += floatunsidf floatsidf extendsfdf2
 g_xd += fixdfsi fixdfdi fixunsdfdi fixunsdfsi truncdfsf2
 
-m_ddd += pow fmin fmax fmod hypot atan2
+m_ddd += pow fmin fmax fmod hypot atan2 fdim
 m_ddx += ldexp frexp
 m_dd += sqrt cbrt exp exp10 pow10 log log10 log2 sin cos tan cotan asin acos atan
 m_dd += ceil floor trunc round sinh cosh tanh
@@ -59,7 +59,7 @@ F7F += lrint ldexp frexp exp logx log log10 log2
 F7F += minmax fmax fmin floor ceil round lround trunc truncx
 F7F += horner pow10 exp10 pow powi
 F7F += sin cos tan cotan sincos sinh cosh tanh sinhcosh
-F7F += asinacos asin acos atan atan2
+F7F += asinacos asin acos atan atan2 fdim
 F7F += m

Re: [PATCH v2] s390: Floating point vector lane handling

2025-05-27 Thread Stefan Schulze Frielinghaus
On Mon, May 26, 2025 at 12:17:44PM +0200, Juergen Christ wrote:
> Since floating point and vector registers overlap on s390, more
> efficient code can be generated to extract FPRs from VRs.
> Additionally, for double vectors, more efficient code can be generated
> to load specific lanes.
> 
> Bootstrapped and regtested on s390x.

Ok for mainline.

Thanks,
Stefan

> 
> gcc/ChangeLog:
> 
>   * config/s390/vector.md (VF): New mode iterator.
>   (VEC_SET_NONFLOAT): New mode iterator.
>   (VEC_SET_SINGLEFLOAT): New mode iterator.
>   (*vec_set): Split pattern in two.
>   (*vec_setv2df): Extract special handling for V2DF mode.
>   (*vec_extract): Split pattern in two.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/s390/vector/vec-extract-1.c: New test.
>   * gcc.target/s390/vector/vec-set-1.c: New test.
> 
> Signed-off-by: Juergen Christ 
> ---
>  gcc/config/s390/vector.md | 137 +++--
>  .../gcc.target/s390/vector/vec-extract-1.c| 190 ++
>  .../gcc.target/s390/vector/vec-set-1.c| 133 
>  3 files changed, 448 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-set-1.c
> 
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index e29255fe1116..340dafd729eb 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -75,6 +75,8 @@
>  V1DF V2DF
>  (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
>  
> +(define_mode_iterator VF [(V2SF "TARGET_VXE") (V4SF "TARGET_VXE") V2DF])
> +
>  ; All modes present in V_HW1 and VFT.
>  (define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF
>  V2DF (V1SF "TARGET_VXE") (V2SF "TARGET_VXE")
> @@ -506,26 +508,89 @@
>  UNSPEC_VEC_SET))]
>"TARGET_VX")
>  
> +; Iterator for vec_set that does not use special float/vect overlay tricks
> +(define_mode_iterator VEC_SET_NONFLOAT
> +  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI 
> V2SF V4SF])
> +; Iterator for single element float vectors
> +(define_mode_iterator VEC_SET_SINGLEFLOAT [(V1SF "TARGET_VXE") V1DF (V1TF 
> "TARGET_VXE")])
> +
>  ; FIXME: Support also vector mode operands for 1
>  ; FIXME: A target memory operand seems to be useful otherwise we end
>  ; up with vl vlvgg vst.  Shouldn't the middle-end be able to handle
>  ; that itself?
>  ; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih, vleif, 
> vleig
>  (define_insn "*vec_set"
> -  [(set (match_operand:V0 "register_operand"  "=v,v,v")
> - (unspec:V [(match_operand: 1 "general_operand""d,R,K")
> -(match_operand:SI2 "nonmemory_operand" "an,I,I")
> -(match_operand:V 3 "register_operand"   "0,0,0")]
> -   UNSPEC_VEC_SET))]
> +  [(set (match_operand:VEC_SET_NONFLOAT  0 "register_operand"  "=v,v,v")
> + (unspec:VEC_SET_NONFLOAT
> +   [(match_operand:  1 "general_operand""d,R,K")
> +(match_operand:SI 2 "nonmemory_operand" "an,I,I")
> +(match_operand:VEC_SET_NONFLOAT   3 "register_operand"   "0,0,0")]
> +   UNSPEC_VEC_SET))]
>"TARGET_VX
> && (!CONST_INT_P (operands[2])
> -   || UINTVAL (operands[2]) < GET_MODE_NUNITS (mode))"
> +   || UINTVAL (operands[2]) < GET_MODE_NUNITS 
> (mode))"
>"@
> vlvg\t%v0,%1,%Y2
> vle\t%v0,%1,%2
> vlei\t%v0,%1,%2"
>[(set_attr "op_type" "VRS,VRX,VRI")])
>  
> +(define_insn "*vec_set"
> +  [(set (match_operand:VEC_SET_SINGLEFLOAT 0 "register_operand"  "=v,v")
> + (unspec:VEC_SET_SINGLEFLOAT
> +   [(match_operand:1 "general_operand""v,R")
> +(match_operand:SI   2 "nonmemory_operand" "an,I")
> +(match_operand:VEC_SET_SINGLEFLOAT  3 "register_operand"   "0,0")]
> +   UNSPEC_VEC_SET))]
> +  "TARGET_VX"
> +  "@
> +   vlr\t%v0,%v1
> +   vle\t%v0,%1,0"
> + [(set_attr "op_type" "VRR,VRX")])
> +
> +(define_insn "*vec_setv2df"
> +  [(set (match_operand:V2DF0 "register_operand"  
> "=v,v,v,v")
> + (unspec:V2DF [(match_operand:DF1 "general_operand""d,R,K,v")
> +   (match_operand:SI2 "nonmemory_operand" "an,I,I,n")
> +   (match_operand:V2DF  3 "register_operand"   
> "0,0,0,0")]
> +  UNSPEC_VEC_SET))]
> +  "TARGET_VX
> +   && (!CONST_INT_P (operands[2])
> +   || UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))"
> +  "@
> +   vlvgg\t%v0,%1,%Y2
> +   vleg\t%v0,%1,%2
> +   vleig\t%v0,%1,%2
> +   #"
> +  [(set_attr "op_type" "VRS,VRX,VRI,*")])
> +
> +(define_split
> +  [(set (match_operand:V2DF0 "register_operand"  "")
> + (unspec:V2DF [(match_operand:DF1 "register_operand"  "")
> +   (match_operand:SI  

[PATCH] RISC-V:Add the MIPS P8700 conditional move extension instruction support.

2025-05-27 Thread Umesh Kalappa
The P8700 is a high-performance processor from MIPS by extending RISCV with
the MIPS custom instruction and the following changes are added to enable the 
conditional move support from mips

No regressions are found for "runtest --tool gcc 
--target_board='riscv-sim/-mabi=lp64d/-mcmodel=medlow/-mtune=mips-p8700/-O2 ' 
riscv.exp"

*config/riscv/riscv-cores.def(RISCV_CORE):Updated the march for 
mips-p8700 tune.
*config/riscv/riscv-ext-mips.def(DEFINE_RISCV_EXT):
 New file added for mips conditional mov extension.
*config/riscv/riscv-ext.def: Likewise.
*config/riscv/t-riscv:Generates riscv-ext.opt
*config/riscv/riscv-ext.opt: Generated file.
*config/riscv/riscv.cc(riscv_expand_conditional_move):Updated for mips 
cmov.
*config/riscv/riscv.md(movcc):updated expand for MIPS CCMOV.
*config/riscv/mips-insn.md:New file for mips-p8700 ccmov insn.
*testsuite/gcc.target/riscv/mipscondmov.c:Test file for mips.ccmov insn.
*gcc/doc/riscv-ext.texi:Updated for mips cmov.
---
 gcc/config/riscv/mips-insn.md| 37 
 gcc/config/riscv/riscv-cores.def |  2 +-
 gcc/config/riscv/riscv-ext-mips.def  | 35 ++
 gcc/config/riscv/riscv-ext.def   |  1 +
 gcc/config/riscv/riscv-ext.opt   |  5 +++
 gcc/config/riscv/riscv.cc| 27 --
 gcc/config/riscv/riscv.md| 13 ++-
 gcc/config/riscv/t-riscv |  3 +-
 gcc/doc/riscv-ext.texi   |  4 +++
 gcc/testsuite/gcc.target/riscv/mipscondmov.c | 30 
 10 files changed, 152 insertions(+), 5 deletions(-)
 create mode 100644 gcc/config/riscv/mips-insn.md
 create mode 100644 gcc/config/riscv/riscv-ext-mips.def
 create mode 100644 gcc/testsuite/gcc.target/riscv/mipscondmov.c

diff --git a/gcc/config/riscv/mips-insn.md b/gcc/config/riscv/mips-insn.md
new file mode 100644
index 000..ee106c4221e
--- /dev/null
+++ b/gcc/config/riscv/mips-insn.md
@@ -0,0 +1,37 @@
+;; Machine description for MIPS custom instructioins.
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+(define_insn "*movcc_bitmanip"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (if_then_else:GPR
+(match_operator 5 "equality_operator"
+   [(match_operand:X 1 "register_operand" "r")
+(match_operand:X 2 "const_0_operand" "J")])
+(match_operand:GPR 3 "reg_or_0_operand" "rJ")
+(match_operand:GPR 4 "reg_or_0_operand" "rJ")))]
+  "TARGET_XMIPSCMOV"
+{
+  enum rtx_code code = GET_CODE (operands[5]);
+  if (code == NE)
+return "mips.ccmov\t%0,%1,%z3,%z4";
+  else
+return "mips.ccmov\t%0,%1,%z4,%z3";
+}
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "")])
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 118fef23cad..b8bf81e7883 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -154,7 +154,7 @@ RISCV_CORE("xiangshan-nanhu",  
"rv64imafdc_zba_zbb_zbc_zbs_"
  "svinval_zicbom_zicboz",
  "xiangshan-nanhu")
 
-RISCV_CORE("mips-p8700",   "rv64imafd_zicsr_zmmul_"
+RISCV_CORE("mips-p8700",  "rv64imafd_"
  "zaamo_zalrsc_zba_zbb",
  "mips-p8700")
 #undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-ext-mips.def 
b/gcc/config/riscv/riscv-ext-mips.def
new file mode 100644
index 000..86492223cb3
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-mips.def
@@ -0,0 +1,35 @@
+/* MIPS extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; s

Re: [PATCH] Match: Handle commonly used unsigned modulo counters

2025-05-27 Thread Richard Biener
On Wed, May 21, 2025 at 6:05 PM MCC CS  wrote:
>
> Dear Richard,
>
> Thank you so much for your reply. I submitted the patch for the third case to
> LLVM before I've received your reply, and they said the same thing,
> that it would probably be used outside of loops as well and it would inflict
> a branch misprediction, so it should be implemented at the level
> of loop code generation only (because branch predictor could handle it
> inside loops).
>
> I didn't know that the second pattern would cause disassociation
> from division. Unexpectedly LLVM has that pattern in their match.pd
> equivalent but what you've said makes more sense.
>
> For the first pattern, I verified that trunk GCC, for:
>
> void d(unsigned x)
> {
>  if (x >= 5) __builtin_unreachable();
>  x %= 5;
>  g(x);
> }
>
> optimizes away the "%=", as you've said. However,
> for the code:
>
> void a(void)
> {
>  unsigned m = 0;
>  for(int i = 0; i < 300; i++)
>  {
>  m++;
>  m %= 600;
>  g(m);
>  }
> }
>
> it gets optimized only with the patch, which is surprising.

Indeed.  So the code that should handle this is in vr-values.cc
in the simplify_using_ranges::simplify_div_or_mod_using_ranges
function.  I don't know where this is wired in, specifically whether
or why we do not take into account loop niter information when
folding but we do seem to have global range information at
some point.  I'll note that SCEV will of course fail to analyze the
'm' IV because of the modulo expression which is unhandled.

That said, I don't understand why it doesn't work currently or
why it would work with your patch for the loop case.

Richard.

>
> Thanks
> MCCCS


Re: [PATCH] libgcc: Add DPD support + fix big-endian support of _BitInt <-> dfp conversions

2025-05-27 Thread Richard Biener
On Tue, 20 May 2025, Jakub Jelinek wrote:

> Hi!
> 
> The following patch fixes
> FAIL: gcc.dg/dfp/bitint-1.c (test for excess errors)
> FAIL: gcc.dg/dfp/bitint-2.c (test for excess errors)
> FAIL: gcc.dg/dfp/bitint-3.c (test for excess errors)
> FAIL: gcc.dg/dfp/bitint-4.c (test for excess errors)
> FAIL: gcc.dg/dfp/bitint-5.c (test for excess errors)
> FAIL: gcc.dg/dfp/bitint-6.c (test for excess errors)
> FAIL: gcc.dg/dfp/bitint-8.c (test for excess errors)
> FAIL: gcc.dg/dfp/int128-1.c (test for excess errors)
> FAIL: gcc.dg/dfp/int128-2.c (test for excess errors)
> FAIL: gcc.dg/dfp/int128-4.c (test for excess errors)
> on s390x-linux (with the 3 not yet posted patches).
> 
> The patch does multiple things:
> 1) the routines were written for the DFP BID (binary integer decimal)
>format which is used on all arches but powerpc*/s390* (those use
>DPD - densely packed decimal format); as most of the code is actually
>the same for both BID and DPD formats, I haven't copied the sources
>+ slightly modified them, but added the DPD support directly, + renaming
>of the exported symbols from __bid_* prefixed to __dpd_* prefixed that
>GCC expects on the DPD targets
> 2) while testing that I've found some big-endian issues in the existing
>support
> 3) testing also revealed that in some cases __builtin_clzll (~msb) was
>called with msb set to all ones, so invoking UB; apparently on aarch64
>and x86 we were lucky and got some value that happened to work well,
>but that wasn't the case on s390x
> 
> For 1), the patch uses two ~ 2KB tables to speed up the decoding/encoding.
> I haven't found such tables in what is added into libgcc.a, though they
> are in libdecnumber/bid/bid2dpd_dpd2bid.h, but there they are just huge
> and next to other huge tables - there is d2b which is like __dpd_d2bbitint
> in the patch but it uses 64-bit entries rather than 16-bit, then there is
> d2b2 with 64-bit entries like in d2b all multiplied by 1000, then d2b3
> similarly multiplied by 100, then d2b4 similarly multiplied by
> 10, then d2b5 similarly multiplied by 1ULL and
> d2b6 similarly multipled by 1000ULL.  Arguably it can
> save some of the multiplications, but on the other side accesses memory
> which is unlikely in the caches, and the 2048 bytes in the patch vs.
> 24 times more for d2b is IMHO significant.
> For b2d, libdecnumber/bid/bid2dpd_dpd2bid.h has again b2d table like
> __dpd_b2dbitint in the patch, except that it has 64-bit entries rather
> than 16-bit (this time 1000 entries), but then has b2d2 which has the
> same entries shifted left by 10, then b2d3 shifted left by 20, b2d4 shifted
> left by 30 and b2d5 shifted left by 40.  I can understand for d2b paying
> memory cost to speed up multiplications, but don't understand paying
> extra 4 * 8 * 1000 bytes (+ 6 * 1000 bytes for b2d not using ushort)
> just to avoid shifts.
> 
> Tested on x86_64-linux, i686-linux and s390x-linux with
> make check-gcc dfp.exp
> ok for trunk?

Isn't soft-fp imported from glibc?  I was hoping Joseph would review
this one.

Thanks,
Richard.

> 
> 2025-05-20  Jakub Jelinek  
> 
>   * config/t-softfp (softfp_bid_list): Don't guard with
>   $(enable_decimal_float) == bid.
>   * soft-fp/bitint.h (__bid_pow10bitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_pow10bitint.
>   (__dpd_d2bbitint, __dpd_b2dbitint): Declare.
>   * soft-fp/bitintpow10.c (__dpd_d2bbitint, __dpd_b2dbitint): New
>   variables.
>   * soft-fp/fixsdbitint.c (__bid_fixsdbitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
>   Add DPD support.  Fix big-endian support.
>   * soft-fp/fixddbitint.c (__bid_fixddbitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
>   Add DPD support.  Fix big-endian support.
>   * soft-fp/fixtdbitint.c (__bid_fixtdbitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
>   Add DPD support.  Fix big-endian support.
>   * soft-fp/fixsdti.c (__bid_fixsdbitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
>   (__bid_fixsdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
>   __dpd_fixsdti.
>   * soft-fp/fixddti.c (__bid_fixddbitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixddbitint.
>   (__bid_fixddti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
>   __dpd_fixddti.
>   * soft-fp/fixtdti.c (__bid_fixtdbitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixtdbitint.
>   (__bid_fixtdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to
>   __dpd_fixtdti.
>   * soft-fp/fixunssdti.c (__bid_fixsdbitint): For
>   !defined(ENABLE_DECIMAL_BID_FORMAT) redefine to __dpd_fixsdbitint.
>   (__bid_fixunssdti): For !defined(ENABLE_DECIMAL_BID_FORMAT) redefine
>   to __dpd

Re: [PATCH v1 0/1] Add error message to cmp_* and in_range.

2025-05-27 Thread Jonathan Wakely
On Tue, 27 May 2025 at 07:51, Luc Grosheintz  wrote:
>
> While reading the compiler output of
>
> make check-target-libstdc++-v3
>
> for buggy code, e.g. cmp_equal(1.0, 1.0), the error message
> was very short, and I saw no hint that neither of the two
> template arguments weren't integers. Essentially, the trace
> was:
>
>   1. my faulty line
>   2. required from here
>   3. static_assert(false)
>
> On regular builds with g++ the error message mentions the
> static_assert(__is_standard_integer), and is much less
> cryptic. Please ignore if this is intended behaviour.

That's because the testsuite runs with -fdiagnostics-plain-output
which is certainly the intended behaviour. That's useful for
machine-readable output (as needed by the testsuite so that additional
diagnostic notes don't confuse the testsuite) but isn't intended to be
used for human-readable output.

As long as the diagnostic is fine for normal I don't think we need to
optimize the code for the purposes of the testsuite. And since GCC
doesn't support any extended integer types, I'm not really concerned
about that misinterpretation of the __is_standard_integer trait. Maybe
it would make sense to rename that to __is_signed_or_unsigned_integer
though. We do already have __cv_unqual_signed_or_unsigned_integer_type
in .




>
> Tested on x86_64 with:
>
> make check-target-libstdc++-v3
>
> (in a no PCH build).
>
> Luc Grosheintz (1):
>   libstdc++: Improve diagnostic message for `cmp_*` and `in_range`.
>
>  libstdc++-v3/include/std/utility | 18 --
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> --
> 2.49.0
>



[PATCH] libstdc++: Implement C++26 std::polymorphic [PR119152]

2025-05-27 Thread Tomasz Kamiński
From: Jonathan Wakely 

This patch implements C++26 std::polymorphic as specified in P3019 with
amendment to move assignment from LWG 4251.

The implementation always allocate stored object on the heap. The manager
function (_M_manager) is similary keep with the object (polymorphic::_Obj),
which reduces the size of the polymorphic to size of the single pointer plus
allocator (that is declared with [[no_unique_address]]).

The implementation does not not use small-object optimization (SSO). We may
consider adding this in the future, as SSO is allowed by the standard. However,
storing any polimorphic object will require providing space for two pointers
(manager function and vtable pointer) and user-declared data members.

PR libstdc++/119152

libstdc++-v3/ChangeLog:

* include/bits/indirect.h (std::polymorphic, pmr::polymorphic)
[__glibcxx_polymorphic]: Define.
* include/bits/version.def (polymorphic): Define.
* include/bits/version.h: Regenerate.
* include/std/memory: Define __cpp_lib_polymorphic.
* testsuite/std/memory/polymorphic/copy.cc: New test.
* testsuite/std/memory/polymorphic/copy_alloc.cc: New test.
* testsuite/std/memory/polymorphic/ctor.cc: New test.
* testsuite/std/memory/polymorphic/ctor_poly.cc: New test.
* testsuite/std/memory/polymorphic/incomplete.cc: New test.
* testsuite/std/memory/polymorphic/invalid_neg.cc: New test.
* testsuite/std/memory/polymorphic/move.cc: New test.
* testsuite/std/memory/polymorphic/move_alloc.cc: New test.

Co-authored-by: Tomasz Kamiński 
Signed-off-by: Tomasz Kamiński 
---
Again as in case of the indirect, majority of the implementation was
provided by Jonathan Wakely. I (Tomasz Kamiński) have reviewed the
implemetantation added the test and fixed two issues:
 * 
https://forge.sourceware.org/tkaminsk/gcc/commit/29ef286d1e08f43d212e5f60ca4ea161e2245705
 * 
https://forge.sourceware.org/tkaminsk/gcc/commit/872cc86c66583a1c8b6c9746cffe4342cd6458fe

Tested on x86_64-linux. OK for trunk?

 libstdc++-v3/include/bits/indirect.h  | 376 +-
 libstdc++-v3/include/bits/version.def |   9 +
 libstdc++-v3/include/bits/version.h   |  10 +
 libstdc++-v3/include/std/memory   |   1 +
 .../testsuite/std/memory/polymorphic/copy.cc  | 157 
 .../std/memory/polymorphic/copy_alloc.cc  | 270 +
 .../testsuite/std/memory/polymorphic/ctor.cc  | 190 +
 .../std/memory/polymorphic/ctor_poly.cc   | 220 ++
 .../std/memory/polymorphic/incomplete.cc  |  13 +
 .../std/memory/polymorphic/invalid_neg.cc |  28 ++
 .../testsuite/std/memory/polymorphic/move.cc  | 177 +
 .../std/memory/polymorphic/move_alloc.cc  | 339 
 12 files changed, 1789 insertions(+), 1 deletion(-)
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/copy.cc
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/copy_alloc.cc
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/ctor.cc
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/ctor_poly.cc
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/incomplete.cc
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/invalid_neg.cc
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/move.cc
 create mode 100644 libstdc++-v3/testsuite/std/memory/polymorphic/move_alloc.cc

diff --git a/libstdc++-v3/include/bits/indirect.h 
b/libstdc++-v3/include/bits/indirect.h
index 85908e219b7..e8000d7c024 100644
--- a/libstdc++-v3/include/bits/indirect.h
+++ b/libstdc++-v3/include/bits/indirect.h
@@ -452,7 +452,381 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 #endif // __glibcxx_indirect
 
- _GLIBCXX_END_NAMESPACE_VERSION
+#if __glibcxx_polymorphic // C++26 && HOSTED
+  template>
+class polymorphic;
+
+  namespace pmr
+  {
+template
+  using polymorphic = polymorphic<_Tp, polymorphic_allocator<_Tp>>;
+  }
+
+  // [polymorphic], class template polymorphic
+  template
+class polymorphic
+{
+  static_assert(is_object_v<_Tp>);
+  static_assert(!is_array_v<_Tp>);
+  static_assert(!is_same_v<_Tp, in_place_t>);
+  static_assert(!__is_in_place_type_v<_Tp>);
+  static_assert(!is_const_v<_Tp> && !is_volatile_v<_Tp>);
+
+  using _ATraits = allocator_traits<_Alloc>;
+  static_assert(is_same_v<_Tp, typename _ATraits::value_type>);
+
+  // The owned object is embedded within a control block which knows the
+  // dynamic type and manages cloning and destroying the owned object.
+  struct _Obj
+  {
+   typename _ATraits::pointer _M_objp{}; // pointer to the owned object.
+
+   // A pointer to this type, e.g. _Obj*
+   using pointer
+ = typename _ATraits::template rebind_traits<_Obj>::pointer;
+
+   enum class _Op { _Dispose = 1, _Copy = 2, _Move = 3 };
+
+   constexpr virtual pointer
+   _

Re: [PATCH v3 1/2] tree-simplify: unify simple_comparison ops in vec_cond for bit and/or/xor [PR119196]

2025-05-27 Thread Richard Biener
On Wed, 21 May 2025, Icen Zeyada wrote:

> Merge simple_comparison patterns under a single vec_cond_expr for bit_and,
> bit_ior, and bit_xor in the simplify pass.
> 
> Ensure that when both operands of a bit_and, bit_or, or bit_xor are 
> simple_comparison
> results, they reside within the same vec_cond_expr rather than separate ones.
> This prepares the AST so that subsequent transformations (e.g., folding the
> comparisons if possible) can take effect.
> 
> PR tree-optimization/119196
> 
> gcc/ChangeLog:
> 
>   * match.pd: Merge multiple vec_cond_expr in a single one for
> bit_and, bit_ior and bit_xor.
> 
> Signed-off-by: Icen Zeyada 
> ---
>  gcc/match.pd | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 27f662f9714b..611f05ef9f9c 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6524,6 +6524,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   { build_int_cst (integer_type_node, prec - 1);}))
>  #endif
>  
> +
> +(for lop (bit_and bit_ior bit_xor)
> +   (simplify
> +   (lop
> +  (vec_cond @0 integer_minus_onep@2 integer_zerop@3)
> +  (vec_cond @1 @2 @3))
> +  (if (expand_vec_cond_expr_p (type, TREE_TYPE (@0)))
> +   (vec_cond (lop @0 @1) @2 @3
> +


We do have some related patterns below

/* Sink binary operation to branches, but only if we can fold it.  */
(for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor
 lshift rshift rdiv trunc_div ceil_div floor_div round_div 
exact_div
 trunc_mod ceil_mod floor_mod round_mod min max)
/* (c ? a : b) op (c ? d : e)  -->  c ? (a op d) : (b op e) */
...

so it probably makes sense to put this new pattern after the above
set?  Can you please add a comment as well?  I do wonder whether
there is enough canonicalization done to ensure integer_minus_onep
is always first and integer_zerop second? (I doubt), similar that
they line up with both vec_conds.

As written the expand_vec_cond_expr_p condition looks redundant -
neither the type of the result nor the type of the predicate
are changing.  So you can drop this here I think.

Thanks,
Richard.

>  (for cnd (cond vec_cond)
>   /* (a != b) ? (a - b) : 0 -> (a - b) */
>   (simplify
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[committed] libstdc++: Fix test failures for 32-bit AIX

2025-05-27 Thread Jonathan Wakely
With -maix32 (the default) we only have 16-bit wchar_t so these tests
fail. The debug.cc one is because we use -fwide-exec-charset=UTF-32BE
which tries to encode each wide character as four bytes in a 2-byte
wchar_t. The format.cc one is because the clown face character can't be
encoded in a single 16-bit wchar_t.

libstdc++-v3/ChangeLog:

* testsuite/std/format/debug.cc: Disable for targets with 16-bit
wchar_t.
* testsuite/std/format/functions/format.cc: Use -DUNICODE for
targets with 32-bit wchar_t.
(test_unicode) [UNICODE]: Only run checks when UNICODE is
defined.
---

Tested x86_64-linux and powerpc-aix.
Pushed to trunk.

 libstdc++-v3/testsuite/std/format/debug.cc| 1 +
 libstdc++-v3/testsuite/std/format/functions/format.cc | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/libstdc++-v3/testsuite/std/format/debug.cc 
b/libstdc++-v3/testsuite/std/format/debug.cc
index 6165a2954963..965b4dfbebc5 100644
--- a/libstdc++-v3/testsuite/std/format/debug.cc
+++ b/libstdc++-v3/testsuite/std/format/debug.cc
@@ -1,6 +1,7 @@
 // { dg-options "-fexec-charset=UTF-8 -fwide-exec-charset=UTF-32LE 
-DUNICODE_ENC" { target le } }
 // { dg-options "-fexec-charset=UTF-8 -fwide-exec-charset=UTF-32BE 
-DUNICODE_ENC" { target be } }
 // { dg-do run { target c++23 } }
+// { dg-require-effective-target 4byte_wchar_t }
 // { dg-add-options no_pch }
 // { dg-timeout-factor 2 }
 
diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc 
b/libstdc++-v3/testsuite/std/format/functions/format.cc
index 93c33b456e64..e4adf3aeb706 100644
--- a/libstdc++-v3/testsuite/std/format/functions/format.cc
+++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
@@ -1,6 +1,7 @@
 // { dg-options "-fexec-charset=UTF-8" }
 // { dg-do run { target c++20 } }
 // { dg-add-options no_pch }
+// { dg-additional-options "-DUNICODE" { target 4byte_wchar_t } }
 
 #include 
 
@@ -511,6 +512,7 @@ test_bool()
 void
 test_unicode()
 {
+#ifdef UNICODE
   // Similar to sC example in test_std_examples, but not from the standard.
   // Verify that the character "🤡" has estimated field width 2,
   // rather than estimated field width equal to strlen("🤡"), which would be 
4,
@@ -564,6 +566,7 @@ test_unicode()
 std::string sA = std::format("{:>5}", input[0]);
 VERIFY( sA == input[1] );
   }
+#endif
 }
 
 int main()
-- 
2.49.0



Re: [PATCH] libstdc++: Replace some uses of std::__addressof with std::addressof

2025-05-27 Thread Jonathan Wakely
On Tue, 27 May 2025 at 13:26, Tomasz Kaminski  wrote:
>
>
>
> On Fri, May 23, 2025 at 7:00 PM Jonathan Wakely  wrote:
>>
>> Since r16-154-gc91eb5a5c13f14 std::addressof is no less efficient than
>> std::__addressof, so change some uses of the latter to the former.
>>
>> We can't change them all, because some uses need to compile as C++98
>> which only has std::__addressof.
>>
>> libstdc++-v3/ChangeLog:
>>
>> * include/bits/stl_construct.h: Replace std::__addressof with
>> std::addressof.
>> * include/bits/stl_uninitialized.h: Likewise.
>> ---
>>
>> I'm undecided whether it's better to use the standard std::addressof for
>> simplicity, or to stick with std::__addressof in files where there's a
>> mix of C++98 code and >= C++11 code.
>
> After giving this a bit of thought. If I touch this files, then most likely we
> are implementing the new standard features, and in the patch I will just
> default to std::addressof, as in any other case. So preferring to use
> std::addressof when available seems like easier to maintain policy.
>
>>
>>
>> Obviously in files that don't need to compile as C++98 (such as
>> ) we could just use std::addressof.
>>
>> Tested x86_64-linux.
>>
>>
>>  libstdc++-v3/include/bits/stl_construct.h |  2 +-
>>  libstdc++-v3/include/bits/stl_uninitialized.h | 28 +--
>>  2 files changed, 15 insertions(+), 15 deletions(-)
>>
>> diff --git a/libstdc++-v3/include/bits/stl_construct.h 
>> b/libstdc++-v3/include/bits/stl_construct.h
>> index 23b8fb754710..a53274e33c0c 100644
>> --- a/libstdc++-v3/include/bits/stl_construct.h
>> +++ b/libstdc++-v3/include/bits/stl_construct.h
>> @@ -82,7 +82,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>if constexpr (__cplusplus > 201703L && is_array_v<_Tp>)
>> {
>>   for (auto& __x : *__location)
>> -   std::destroy_at(std::__addressof(__x));
>> +   std::destroy_at(std::addressof(__x));
>> }
>>else
>> __location->~_Tp();
>
> There are calls to __addressof in _Destroy at lines 212, 216, 268, 272 in 
> calls that are >= C++11.
> If we update them, we should also change all of them in file.

I have another patch locally which replaced those ones (and in my tree
that patch is actually first).

I'll finish the other changes to stl_construct.h that I'm working on,
and then after that I'll replace all >= C++11 uses of __addressof.


>
>>
>> diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h 
>> b/libstdc++-v3/include/bits/stl_uninitialized.h
>> index b1428db48b00..bde787c2beaa 100644
>> --- a/libstdc++-v3/include/bits/stl_uninitialized.h
>> +++ b/libstdc++-v3/include/bits/stl_uninitialized.h
>
> Looks like all of ones that could be updated in this file, are already.
>>
>> @@ -839,7 +839,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>  {
>>   _UninitDestroyGuard<_ForwardIterator> __guard(__first);
>>   for (; __first != __last; ++__first)
>> -   std::_Construct(std::__addressof(*__first));
>> +   std::_Construct(std::addressof(*__first));
>>   __guard.release();
>> }
>>  };
>> @@ -856,7 +856,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> return;
>>
>>   typename iterator_traits<_ForwardIterator>::value_type* __val
>> -   = std::__addressof(*__first);
>> +   = std::addressof(*__first);
>>   std::_Construct(__val);
>>   if (++__first != __last)
>> std::fill(__first, __last, *__val);
>> @@ -873,7 +873,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>  {
>>   _UninitDestroyGuard<_ForwardIterator> __guard(__first);
>>   for (; __n > 0; --__n, (void) ++__first)
>> -   std::_Construct(std::__addressof(*__first));
>> +   std::_Construct(std::addressof(*__first));
>>   __guard.release();
>>   return __first;
>> }
>> @@ -890,7 +890,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>   if (__n > 0)
>> {
>>   typename iterator_traits<_ForwardIterator>::value_type* __val
>> -   = std::__addressof(*__first);
>> +   = std::addressof(*__first);
>>   std::_Construct(__val);
>>   ++__first;
>>   __first = std::fill_n(__first, __n - 1, *__val);
>> @@ -955,7 +955,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> __alloc);
>>typedef __gnu_cxx::__alloc_traits<_Allocator> __traits;
>>for (; __first != __last; ++__first)
>> -   __traits::construct(__alloc, std::__addressof(*__first));
>> +   __traits::construct(__alloc, std::addressof(*__first));
>>__guard.release();
>>  }
>>
>> @@ -980,7 +980,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> __alloc);
>>typedef __gnu_cxx::__alloc_traits<_Allocator> __traits;
>>for (; __n > 0; --__n, (void) ++__first)
>> -   __traits::construct(__al

Re: [PATCH] [lra] force reg update after spilling to memory [PR120424]

2025-05-27 Thread Vladimir Makarov



On 5/24/25 11:06 PM, Alexandre Oliva wrote:

In the added C++ testcase, a stack slot at a negative sp offset is
used to hold a value across a call.

There are a couple of causes that directly lead to this outcome:

- the -fstack-clash-protection and -fnon-call-exception options, that
cause arm_frame_pointer_required to flip from false to true when the
first pseudo gets spilled to memory;

- when the affected pseudo is spilled to memory, we fail to update lra
regno info, because the insns that reference it are already on the
lra_constraint_insn_stack;

There is another potentially-related issue:

- when we notice that the frame pointer can no longer be eliminated to
the stack pointer, we immediately clear can_eliminate, and also
prev_can_eliminate, but update_reg_eliminate relied on the latter to
tell that it needs to propagate a previous_offset to the
newly-selected elimination, or restore the original offsets.

This patch ensures that we update insn register info after spilling a
pseudo to memory, and enables update_reg_eliminate to recognize the
case in which a previously-preferred elimination is disabled
regardless of prev_can_eliminate.

Regstrapped on x86_64-linux-gnu, also tested with gcc-14 on arm-vx7r2,
and manually tested with trunk targeting arm-eabi and arm-linux-gnu.
Ok to install?


Yes, Alex.

This is a good explanation of the patch and the bug and the affected 
code seem very old (from the first commit of LRA). So thank you very 
much for working on and fixing this PR.




for  gcc/ChangeLog

PR rtl-optimization/120424
PR middle-end/118939?
* lra-spills.cc (spill_pseudos): Update insn regno info.
* lra-eliminations.cc (update_reg_eliminate): Recognize
disabling of active elimination regardless of
prev_can_eliminate.

for  gcc/testsuite/ChangeLog

PR rtl-optimization/120424
PR middle-end/118939?
* g++.target/arm/pr120424.C: New.
* gnat.dg/controlled9.adb: New.
* gnat.dg/controlled9_pkg.ads: New.




Re: [committed] libstdc++: Fix test failures for 32-bit AIX

2025-05-27 Thread Tomasz Kaminski
On Tue, May 27, 2025 at 2:38 PM Jonathan Wakely  wrote:

> With -maix32 (the default) we only have 16-bit wchar_t so these tests
> fail. The debug.cc one is because we use -fwide-exec-charset=UTF-32BE
> which tries to encode each wide character as four bytes in a 2-byte
> wchar_t. The format.cc one is because the clown face character can't be
> encoded in a single 16-bit wchar_t.
>
What is the encoding for the wchar_t? If it is UTF-16LE or UTF16-BE we
should
define them as fwide-exec-charset. What is the box you are using for tests?

>
> libstdc++-v3/ChangeLog:
>
> * testsuite/std/format/debug.cc: Disable for targets with 16-bit
> wchar_t.
> * testsuite/std/format/functions/format.cc: Use -DUNICODE for
> targets with 32-bit wchar_t.
> (test_unicode) [UNICODE]: Only run checks when UNICODE is
> defined.
> ---
>
> Tested x86_64-linux and powerpc-aix.
> Pushed to trunk.
>
>  libstdc++-v3/testsuite/std/format/debug.cc| 1 +
>  libstdc++-v3/testsuite/std/format/functions/format.cc | 3 +++
>  2 files changed, 4 insertions(+)
>
> diff --git a/libstdc++-v3/testsuite/std/format/debug.cc
> b/libstdc++-v3/testsuite/std/format/debug.cc
> index 6165a2954963..965b4dfbebc5 100644
> --- a/libstdc++-v3/testsuite/std/format/debug.cc
> +++ b/libstdc++-v3/testsuite/std/format/debug.cc
> @@ -1,6 +1,7 @@
>  // { dg-options "-fexec-charset=UTF-8 -fwide-exec-charset=UTF-32LE
> -DUNICODE_ENC" { target le } }
>  // { dg-options "-fexec-charset=UTF-8 -fwide-exec-charset=UTF-32BE
> -DUNICODE_ENC" { target be } }
>  // { dg-do run { target c++23 } }
> +// { dg-require-effective-target 4byte_wchar_t }
>  // { dg-add-options no_pch }
>  // { dg-timeout-factor 2 }
>
> diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc
> b/libstdc++-v3/testsuite/std/format/functions/format.cc
> index 93c33b456e64..e4adf3aeb706 100644
> --- a/libstdc++-v3/testsuite/std/format/functions/format.cc
> +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
> @@ -1,6 +1,7 @@
>  // { dg-options "-fexec-charset=UTF-8" }
>  // { dg-do run { target c++20 } }
>  // { dg-add-options no_pch }
> +// { dg-additional-options "-DUNICODE" { target 4byte_wchar_t } }
>
>  #include 
>
> @@ -511,6 +512,7 @@ test_bool()
>  void
>  test_unicode()
>  {
> +#ifdef UNICODE
>// Similar to sC example in test_std_examples, but not from the
> standard.
>// Verify that the character "🤡" has estimated field width 2,
>// rather than estimated field width equal to strlen("🤡"), which would
> be 4,
> @@ -564,6 +566,7 @@ test_unicode()
>  std::string sA = std::format("{:>5}", input[0]);
>  VERIFY( sA == input[1] );
>}
> +#endif
>  }
>
>  int main()
> --
> 2.49.0
>
>


Re: [PATCH] Fix IPA-SRA issue with reverse SSO on specific pattern

2025-05-27 Thread Richard Biener
On Tue, May 27, 2025 at 2:40 PM Martin Jambor  wrote:
>
> Hi,
>
> On Wed, May 21 2025, Eric Botcazou wrote:
> > Hi,
> >
> > IPA-SRA generally works fine in the presence of reverse Scalar_Storage_Order
> > by propagating the relevant flag onto the newly generated MEM_REFs.  However
> > we have been recently faced with a specific Ada pattern that it doesn't 
> > handle
> > correctly: 'Valid applied to a floating-point component of an aggregate type
> > with reverse Scalar_Storage_Order.
> >
> > The attribute is implemented by a call to a specific routine of the runtime
> > that expects a pointer to the object so, in the case of a component with
> > reverse SSO, the compiler first loads it from the aggregate to get back the
> > native storage order, but it does the load using an array of bytes instead 
> > of
> > the floating-point type to prevent the FPU from fiddling with the value, 
> > which
> > yields in the .original dump file:
> >
> >   *(character[1:4] *) &F2b = VIEW_CONVERT_EXPR(item.f);
> >
> > Of course that's a bit convoluted, but it does not seem that another method
> > would be simpler or even work, and using VIEW_CONVERT_EXPR to toggle the SSO
> > is supposed to be supported in any case (unlike aliasing or type punning).
> >
> > The attached patch makes it work.  While the call to storage_order_barrier_p
> > from IPA-SRA is quite natural (the regular SRA has it too), the tweak to the
> > predicate itself is needed to handle the scalar->aggregate conversion, which
> > is admittedly awkward but again without clear alternative.
> >
> > Tested on x86-64/Linux, OK for the mainline and 15 branch?  Technically, 
> > this
> > is a regression in GCC 10.x and later, but the pattern is so specific, even 
> > in
> > Ada, that patching earlier branches does not seem worth the hassle.
> >
> >
> > 2025-05-21  Eric Botcazou  
> >
> >   * ipa-sra.cc (scan_expr_access): Also disqualify storage order
> >   barriers from splitting.
>
> The IPA-SRA change is OK.
>
> >   * tree.h (storage_order_barrier_p): Also return false if the
> >   operand of the VIEW_CONVERT_EXPR has reverse storage order.
>
> I cannot approve this one (but FWIW it looks OKish to me too).

That looks good to me.

Richard.

> Thanks,
>
> Martin
>
>
> >
> >
> > 2025-05-21  Eric Botcazou  
> >
> >   * gnat.dg/sso19.adb: New test.
> >   * gnat.dg/sso19_pkg.ads, gnat.dg/sso19_pkg.adb: New helper.
> >
> > --
> > Eric Botcazou
> > diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
> > index 88bfae9502c..6e6cf895988 100644
> > --- a/gcc/ipa-sra.cc
> > +++ b/gcc/ipa-sra.cc
> > @@ -1848,6 +1848,12 @@ scan_expr_access (tree expr, gimple *stmt, 
> > isra_scan_context ctx,
> >if (!desc || !desc->split_candidate)
> >  return;
> >
> > +  if (storage_order_barrier_p (expr))
> > +{
> > +  disqualify_split_candidate (desc, "Encountered a storage order 
> > barrier.");
> > +  return;
> > +}
> > +
> >if (!poffset.is_constant (&offset)
> >|| !psize.is_constant (&size)
> >|| !pmax_size.is_constant (&max_size))
> > diff --git a/gcc/tree.h b/gcc/tree.h
> > index 99f26177628..1e41316b4c9 100644
> > --- a/gcc/tree.h
> > +++ b/gcc/tree.h
> > @@ -5499,7 +5499,7 @@ storage_order_barrier_p (const_tree t)
> >&& TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (op)))
> >  return true;
> >
> > -  return false;
> > +  return reverse_storage_order_for_component_p (op);
> >  }
> >
> >  /* Given a DECL or TYPE, return the scope in which it was declared, or


Re: [PATCH 2/2] vect: Use strided loads for VMAT_STRIDED_SLP.

2025-05-27 Thread Richard Biener
On Tue, May 27, 2025 at 2:44 PM Robin Dapp  wrote:
>
> > This mangles in the non-SLP path removal, can you please separate that
> > out?
>
> So should patch 1/2 do more than it does, i.e. fully remove the non-slp
> paths rather than just if (0) them?

There should be a separate 2/3 that does this, aka remove the if (0)
and re-indents.
See the few example series for non-SLP cleanup.  Since you only
partially cleanup
vectorizable_load removal of 'nunits' and the other bits usually in
3/3 are not applicable.

Thanks,
Richard.

>
> --
> Regards
>  Robin
>


Re: [PATCH 2/2] vect: Use strided loads for VMAT_STRIDED_SLP.

2025-05-27 Thread Robin Dapp

On Tue, May 27, 2025 at 2:44 PM Robin Dapp  wrote:


> This mangles in the non-SLP path removal, can you please separate that
> out?

So should patch 1/2 do more than it does, i.e. fully remove the non-slp
paths rather than just if (0) them?


There should be a separate 2/3 that does this, aka remove the if (0)
and re-indents.
See the few example series for non-SLP cleanup.  Since you only
partially cleanup
vectorizable_load removal of 'nunits' and the other bits usually in
3/3 are not applicable.


Ok, I maybe then I'm going to cleanup vectorizable_load entirely if that's 
better.


--
Regards
Robin



Re: [PATCH v1 1/3] RISC-V: Leverage vaadd.vv for signed standard name avg_floor

2025-05-27 Thread Jeff Law




On 5/27/25 12:27 AM, Robin Dapp wrote:



Apart from that it LGTM, thanks for digging deeper here.
Just wanted to echo this.  I've had a low priority todo to review vaaddu 
and friends after seeing them get used in some hand coded versions of 
various routines in x264.  Even if you're not tackling that specific 
problem the effort in this space is definitely appreciated.


The VXRM mode switching may make this not-so-profitable, though I hope 
those problems are limited to early designs and that it'll be handled in 
a more performant way in the future.  Point being we need to keep an eye 
out for regressions and if seen we'll need to make this behavior 
conditional on a flag in the tuning structure.


Jeff


Re: [PATCH 2/2] vect: Use strided loads for VMAT_STRIDED_SLP.

2025-05-27 Thread Robin Dapp
That would be appreciated (but is of course a larger task - I was fine with 
the partial thing you did).


Ok.  Then to move things forward I'll do a 2/3 for this one first.  Once we're 
through the review cycle for the series I can work on the non-slp removal for 
the full function.


--
Regards
Robin



Re: [PATCH 2/2] vect: Use strided loads for VMAT_STRIDED_SLP.

2025-05-27 Thread Richard Biener
On Tue, May 27, 2025 at 2:53 PM Robin Dapp  wrote:
>
> > On Tue, May 27, 2025 at 2:44 PM Robin Dapp  wrote:
> >>
> >> > This mangles in the non-SLP path removal, can you please separate that
> >> > out?
> >>
> >> So should patch 1/2 do more than it does, i.e. fully remove the non-slp
> >> paths rather than just if (0) them?
> >
> > There should be a separate 2/3 that does this, aka remove the if (0)
> > and re-indents.
> > See the few example series for non-SLP cleanup.  Since you only
> > partially cleanup
> > vectorizable_load removal of 'nunits' and the other bits usually in
> > 3/3 are not applicable.
>
> Ok, I maybe then I'm going to cleanup vectorizable_load entirely if that's
> better.

That would be appreciated (but is of course a larger task - I was fine with the
partial thing you did).

Richard.

> --
> Regards
>  Robin
>


[committed] libstdc++: Regenerate include/Makefile.in

2025-05-27 Thread Jonathan Wakely
libstdc++-v3/ChangeLog:

* include/Makefile.in: Regenerate.
---

Pushed to trunk.

 libstdc++-v3/include/Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index a6e602327b6e..0ef8564f2385 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -558,8 +558,8 @@ bits_freestanding = \
 @GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/fs_ops.h \
 @GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/fs_path.h \
 @GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/fstream.tcc \
-@GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/funcwrap.h \
 @GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/funcref_impl.h \
+@GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/funcwrap.h \
 @GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/gslice.h \
 @GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/gslice_array.h \
 @GLIBCXX_HOSTED_TRUE@  ${bits_srcdir}/hashtable.h \
-- 
2.49.0



Re: [PATCH 0/3] Redirect to specific target based on TARGET_VERSION_COMPATIBLE

2025-05-27 Thread Jeff Law




On 5/22/25 9:26 PM, Yangyu Chen wrote:




On 23 May 2025, at 04:02, Jeff Law  wrote:


On 5/22/25 9:05 AM, Alfie Richards wrote:

Hi Jeff,
I sent this patch with my implementation a while ago:
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681043.html
There hasn't been any feedback on that patch yet.
These patches are still useful and I would like to go ahead with them. I am in 
favour of using my implementation as it is a bit stronger, but it also requires 
my larger FMV series to be approved first.

Can you ping your larger FMV series?  I strongly suspect everyone is digging 
out from everything that queued up while the trunk was in bugfixing stages.

Yangyu -- what are your thought here?  If we went with Alfie's patch, does it 
solve the problems you're interested in, and what patches of yours would still 
be relevant if we incorporated Alfie's work?



I agree with Alfie's approach. We are addressing the same issue.
His patch is more structured and includes test cases.

His patch lacks a target hook for RISC-V, while mine does. However,
I think it's OK if we get his patch accepted, and I will write that
for RISC-V.
So if it's OK with you I'd like to temporarily shift focus over to 
Alfie's patch to get that moved forward, then come back to the RISC-V 
specific stuff?


Jeff




Re: [PATCH 0/3] Redirect to specific target based on TARGET_VERSION_COMPATIBLE

2025-05-27 Thread Yangyu Chen



> On 27 May 2025, at 20:59, Jeff Law  wrote:
> So if it's OK with you I'd like to temporarily shift focus over to Alfie's 
> patch to get that moved forward, then come back to the RISC-V specific stuff?
> 

Sure.

Thanks,
Yangyu Chen

> Jeff
> 



Re: [PATCH RFA] fold: DECL_VALUE_EXPR isn't simple [PR120400]

2025-05-27 Thread Richard Biener
On Mon, May 26, 2025 at 4:27 PM Jason Merrill  wrote:
>
> Tested x86_64-pc-linux-gnu, OK for trunk?

LGTM.

Richard.

> Iain, will you verify that one of your coroutine testcases breaks without this
> fix?  I don't think lambda or anonymous union uses of DECL_VALUE_EXPR can 
> break
> in the same way, though this change is also correct for them.
>
> -- 8< --
>
> This PR noted that fold_truth_andor was wrongly changing && to & where the
> RHS is a VAR_DECL with DECL_VALUE_EXPR; we can't assume that such can be
> evaluated unconditionally.
>
> To be more precise we could recurse into DECL_VALUE_EXPR, but that doesn't
> seem worth bothering with since typical uses involve a COMPONENT_REF, which
> is not simple.
>
> PR c++/120400
>
> gcc/ChangeLog:
>
> * fold-const.cc (simple_operand_p): False for vars with
> DECL_VALUE_EXPR.
> ---
>  gcc/fold-const.cc | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 5f48ced5063..014f4218793 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -5085,6 +5085,11 @@ simple_operand_p (const_tree exp)
>  #pragma weak, etc).  */
>   && ! TREE_PUBLIC (exp)
>   && ! DECL_EXTERNAL (exp)
> + /* DECL_VALUE_EXPR will expand to something non-simple.  */
> + && ! ((VAR_P (exp)
> +|| TREE_CODE (exp) == PARM_DECL
> +|| TREE_CODE (exp) == RESULT_DECL)
> +   && DECL_HAS_VALUE_EXPR_P (exp))
>   /* Weakrefs are not safe to be read, since they can be NULL.
>  They are !TREE_PUBLIC && !DECL_EXTERNAL but still
>  have DECL_WEAK flag set.  */
>
> base-commit: f59ff19bc3d37f4dd159db541ed4f07efb10fcc8
> --
> 2.49.0
>


Re: [PATCH] RISC-V:Add the MIPS P8700 conditional move extension instruction support.

2025-05-27 Thread Umesh Kalappa
Hi all,

Sorry for the noise ,looks like patch was truncated and will be sending a
new email with proper patch for the same.

Thank you and again my apologies for the noise.
~U

On Tue, May 27, 2025 at 3:41 PM Umesh Kalappa 
wrote:

> The P8700 is a high-performance processor from MIPS by extending RISCV with
> the MIPS custom instruction and the following changes are added to enable
> the conditional move support from mips.
>
> No regression found for "runtest --tool gcc
> --target_board='riscv-sim/-mabi=lp64d/-mcmodel=medlow/-mtune=mips-p8700/-O2
> ' riscv.exp"
>
> gcc/ChangeLog:
>
> *common/config/riscv/riscv-common.cc (riscv_ext_version_table) :
>  Added MIPS specific insns for P8700.
> *config/riscv/riscv-cores.def(RISCV_CORE):Updated the march for
> mips-p8700 tune.
> *config/riscv/riscv-ext-mips.def(DEFINE_RISCV_EXT):
>  New file added the mips conditional mov extension.
> *config/riscv/riscv-ext.def: Likewise.
> *config/riscv/t-riscv:Generates riscv-ext.opt
> *config/riscv/riscv-ext.opt: Generated file.
> *config/riscv/riscv.cc(riscv_expand_conditional_move):Updated for
> mips cmov.
> *config/riscv/riscv.md(movcc):updated expand for MIPS CCMOV.
> *config/riscv/mips-insn.md:New file for mips-p8700 ccmov insn.
> *testsuite/gcc.target/riscv/mipscondmov.c:New file to test the
> ccmov insn.
> *gcc/doc/riscv-ext.texi:Updated for mips cmov.
> ---
>  gcc/config/riscv/mips-insn.md| 37 
>  gcc/config/riscv/riscv-ext-mips.def  | 35 ++
>  gcc/testsuite/gcc.target/riscv/mipscondmov.c | 30 
>  3 files changed, 102 insertions(+)
>  create mode 100644 gcc/config/riscv/mips-insn.md
>  create mode 100644 gcc/config/riscv/riscv-ext-mips.def
>  create mode 100644 gcc/testsuite/gcc.target/riscv/mipscondmov.c
>
> diff --git a/gcc/config/riscv/mips-insn.md b/gcc/config/riscv/mips-insn.md
> new file mode 100644
> index 000..ee106c4221e
> --- /dev/null
> +++ b/gcc/config/riscv/mips-insn.md
> @@ -0,0 +1,37 @@
> +;; Machine description for MIPS custom instructioins.
> +;; Copyright (C) 2025 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;; GNU General Public License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; .
> +
> +(define_insn "*movcc_bitmanip"
> +  [(set (match_operand:GPR 0 "register_operand" "=r")
> +   (if_then_else:GPR
> +(match_operator 5 "equality_operator"
> +   [(match_operand:X 1 "register_operand" "r")
> +(match_operand:X 2 "const_0_operand" "J")])
> +(match_operand:GPR 3 "reg_or_0_operand" "rJ")
> +(match_operand:GPR 4 "reg_or_0_operand" "rJ")))]
> +  "TARGET_XMIPSCMOV"
> +{
> +  enum rtx_code code = GET_CODE (operands[5]);
> +  if (code == NE)
> +return "mips.ccmov\t%0,%1,%z3,%z4";
> +  else
> +return "mips.ccmov\t%0,%1,%z4,%z3";
> +}
> +  [(set_attr "type" "condmove")
> +   (set_attr "mode" "")])
> diff --git a/gcc/config/riscv/riscv-ext-mips.def
> b/gcc/config/riscv/riscv-ext-mips.def
> new file mode 100644
> index 000..86492223cb3
> --- /dev/null
> +++ b/gcc/config/riscv/riscv-ext-mips.def
> @@ -0,0 +1,35 @@
> +/* MIPS extension definition file for RISC-V.
> +   Copyright (C) 2025 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +.
> +
> +Please run `make riscv-regen` in build folder to make sure updated
> anything.
> +
> +Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
> +
> +DEFINE_RISCV_EXT(
> +  /* NAME */ xmipscmov,
> +  /* UPPERCAE_NAME */ XMIPSCMOV,
> +  /* FULL_NAME */ "Mips conditional move extension",
> +  /* DESC */ "",
> +  /* URL */ ,
> +  /* DEP_EXTS */ ({}),
> +  /* SUPPORTED_VERSIONS *

[committed] doc: Fix typo in description of nonstring attribute

2025-05-27 Thread Jonathan Wakely
gcc/ChangeLog:

* doc/extend.texi (Common Variable Attributes): Fix typo in
description of nonstring.
---

Pushed as obvious.

 gcc/doc/extend.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 442fce653a40..989df965ed98 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -7323,7 +7323,7 @@ truncate the copy without appending the terminating 
@code{NUL} character.
 Using the attribute makes it possible to suppress the warning.  However,
 when the array is declared with the attribute the call to @code{strlen} is
 diagnosed because when the array doesn't contain a @code{NUL}-terminated
-string the call is undefined.  To copy, compare, of search non-string
+string the call is undefined.  To copy, compare, or search non-string
 character arrays use the @code{memcpy}, @code{memcmp}, @code{memchr},
 and other functions that operate on arrays of bytes.  In addition,
 calling @code{strnlen} and @code{strndup} with such arrays is safe
-- 
2.49.0



Re: [PATCH 1/2] forwprop: Change test in loop of optimize_memcpy_to_memset

2025-05-27 Thread Richard Biener
On Tue, May 27, 2025 at 5:02 AM Andrew Pinski  wrote:
>
> This was noticed in the review of copy propagation for aggregates
> patch, instead of checking for a NULL or a non-ssa name of vuse,
> we should instead check if it the vuse is a default name and stop
> then.
>
> Bootstrapped and tested on x86_64-linux-gnu.
>
> gcc/ChangeLog:
>
> * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Change check
> from NULL/non-ssa name to default name.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/tree-ssa-forwprop.cc | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 4c048a9a298..e457a69ed48 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1226,7 +1226,8 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>gimple *defstmt;
>unsigned limit = param_sccvn_max_alias_queries_per_access;
>do {
> -if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> +/* If the vuse is the default definition, then there is no stores 
> beforhand. */
> +if (SSA_NAME_IS_DEFAULT_DEF (vuse))

Since forwprop does update_ssa in the end I was wondering whether any
bare non-SSA VUSE/VDEFs sneak in - for this the != SSA_NAME check
would be useful.  On a GIMPLE stmt gimple_vuse () will return NULL
when it's not a load or store (or with a novops call), as you are using
gimple_store_p/gimple_assign_load_p there might be a disconnect
between those predicates and the presence of a vuse (I hope not, but ...)

The patch looks OK to me, the comments above apply to the copy propagation case.

Thanks,
Richard.

>return false;
>  defstmt = SSA_NAME_DEF_STMT (vuse);
>  if (is_a (defstmt))
> --
> 2.43.0
>


Re: [PATCH 2/2] forwprop: Add stats for memcpy->memset

2025-05-27 Thread Richard Biener
On Tue, May 27, 2025 at 5:02 AM Andrew Pinski  wrote:
>
> As part of the review of copy prop for aggregates, it was
> mentioned there should be some statistics added, and I noticed
> the memcpy->memset was missing the statistics too. So this adds
> that.

OK.

> gcc/ChangeLog:
>
> * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Adds
> statistics when the statement changed.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/tree-ssa-forwprop.cc | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index e457a69ed48..81ea7d4195e 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1324,6 +1324,7 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>tree ctor = build_constructor (TREE_TYPE (dest), NULL);
>gimple_assign_set_rhs_from_tree (gsip, ctor);
>update_stmt (stmt);
> +  statistics_counter_event (cfun, "copy zeroing propagation of 
> aggregate", 1);
>  }
>else /* If stmt is memcpy, transform it into memset.  */
>  {
> @@ -1333,6 +1334,7 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>gimple_call_set_fntype (call, TREE_TYPE (fndecl));
>gimple_call_set_arg (call, 1, val);
>update_stmt (stmt);
> +  statistics_counter_event (cfun, "memcpy to memset changed", 1);
>  }
>
>if (dump_file && (dump_flags & TDF_DETAILS))
> --
> 2.43.0
>


Re: [PATCH 2/2] vect: Use strided loads for VMAT_STRIDED_SLP.

2025-05-27 Thread Richard Biener
On Tue, May 20, 2025 at 11:35 AM Robin Dapp  wrote:
>
> This patch enables strided loads for VMAT_STRIDED_SLP.  Instead of
> building vectors from scalars or other vectors we can use strided loads
> directly when applicable.
>
> The current implementation limits strided loads to cases where we can
> load entire groups and not subsets of them.  A future improvement would
> be to e.g. load a group of three uint8_t
>
>   g0 g1  g2, g0 + stride g1 + stride g2 + stride, ...
>
> by
>
>   vlse16 vlse8
>
> and permute those into place (after re-interpreting as vector of
> uint8_t).
>
> For satd_8x4 in particular we can do even better by eliding the strided
> SLP load permutations, essentially turning
>
>   vlse64 v0, (a0)
>   vlse64 v1, (a1)
>   VEC_PERM_EXPR26, 27 }>;
>   VEC_PERM_EXPR30, 31 }>;
>
> into
>
>   vlse32 v0, (a0)
>   vlse32 v1, (a1)
>   vlse32 v0, 4(a0)
>   vlse32 v1, 4(a1)
>
> but that is going to be a follow up.
>
> Bootstrapped and regtested on x86, aarch64, and power10.
> Regtested on rv64gcv_zvl512b.  I'm seeing one additional failure in
> gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c
> where we use a larger LMUL than we should but IMHO this can wait.

This mangles in the non-SLP path removal, can you please separate that
out?

Thanks,
Richard.

> PR target/118109
>
> gcc/ChangeLog:
>
> * internal-fn.cc (internal_strided_fn_supported_p): New
> function.
> * internal-fn.h (internal_strided_fn_supported_p): Declare.
> * tree-vect-stmts.cc (vect_supportable_strided_type): New
> function.
> (vectorizable_load): Add strided-load support for strided
> groups.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr118019-2.c: New test.
> ---
>  gcc/internal-fn.cc|  21 ++
>  gcc/internal-fn.h |   2 +
>  .../gcc.target/riscv/rvv/autovec/pr118019-2.c |  51 +
>  gcc/tree-vect-stmts.cc| 203 +++---
>  4 files changed, 247 insertions(+), 30 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 6b04443f7cd..aec90ef87cc 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -5203,6 +5203,27 @@ internal_gather_scatter_fn_supported_p (internal_fn 
> ifn, tree vector_type,
>return ok;
>  }
>
> +/* Return true if the target supports a strided load/store function IFN
> +   with VECTOR_TYPE.  If supported and ELSVALS is nonzero the supported else
> +   values will be added to the vector ELSVALS points to.  */
> +
> +bool
> +internal_strided_fn_supported_p (internal_fn ifn, tree vector_type,
> +vec *elsvals)
> +{
> +  machine_mode mode = TYPE_MODE (vector_type);
> +  optab optab = direct_internal_fn_optab (ifn);
> +  insn_code icode = direct_optab_handler (optab, mode);
> +
> +  bool ok = icode != CODE_FOR_nothing;
> +
> +  if (ok && elsvals)
> +get_supported_else_vals
> +  (icode, internal_fn_else_index (IFN_MASK_LEN_STRIDED_LOAD), *elsvals);
> +
> +  return ok;
> +}
> +
>  /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
> for pointers of type TYPE when the accesses have LENGTH bytes and their
> common byte alignment is ALIGN.  */
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index afd4f8e64c7..7d386246a42 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -242,6 +242,8 @@ extern int internal_fn_stored_value_index (internal_fn);
>  extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
> tree, tree, int,
> vec * = nullptr);
> +extern bool internal_strided_fn_supported_p (internal_fn ifn, tree 
> vector_type,
> +vec *elsvals);
>  extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
> poly_uint64, unsigned int);
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c
> new file mode 100644
> index 000..9918d4d7f52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv_zvl512b -mabi=lp64d 
> -mno-vector-strict-align" } */
> +
> +/* Ensure we use strided loads.  */
> +
> +typedef unsigned char uint8_t;
> +typedef unsigned short uint16_t;
> +typedef unsigned int uint32_t;
> +
> +#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3)
>   \
> +  {  
>   \
> +int t0 = s0 + s1;
>   \
> +int t1 = s0 - s1;   

[COMMITTED PATCH v4 1/3] sbitmap: Rename bitmap_bit_in_range_p to bitmap_any_bit_in_range_p

2025-05-27 Thread Konstantinos Eleftheriou
This patch renames `bitmap_bit_in_range_p` to `bitmap_any_bit_in_range_p`
to better reflect its purpose.

gcc/ChangeLog:

* sbitmap.cc (bitmap_bit_in_range_p): Renamed the function.
(bitmap_any_bit_in_range_p): New function name.
(bitmap_bit_in_range_p_checking): Renamed the function.
(bitmap_any_bit_in_range_p_checking): New function name.
(test_set_range): Updated function calls to use the new name.
(test_bit_in_range): Likewise.
* sbitmap.h (bitmap_bit_in_range_p): Renamed the function.
(bitmap_any_bit_in_range_p): New function name.
* tree-ssa-dse.cc (live_bytes_read):
Updated function call to use the new name.

Signed-off-by: Konstantinos Eleftheriou 
---

(no changes since v1)

 gcc/sbitmap.cc  | 109 ++--
 gcc/sbitmap.h   |   5 +-
 gcc/tree-ssa-dse.cc |   2 +-
 3 files changed, 59 insertions(+), 57 deletions(-)

diff --git a/gcc/sbitmap.cc b/gcc/sbitmap.cc
index df2e1aa49358..64c7517397b3 100644
--- a/gcc/sbitmap.cc
+++ b/gcc/sbitmap.cc
@@ -330,7 +330,8 @@ bitmap_set_range (sbitmap bmap, unsigned int start, 
unsigned int count)
the simple bitmap BMAP.  Return FALSE otherwise.  */
 
 bool
-bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int 
end)
+bitmap_any_bit_in_range_p (const_sbitmap bmap, unsigned int start,
+  unsigned int end)
 {
   gcc_checking_assert (start <= end);
   bitmap_check_index (bmap, end);
@@ -863,14 +864,14 @@ namespace selftest {
 
 /* Selftests for sbitmaps.  */
 
-/* Checking function that uses both bitmap_bit_in_range_p and
+/* Checking function that uses both bitmap_any_bit_in_range_p and
loop of bitmap_bit_p and verifies consistent results.  */
 
 static bool
-bitmap_bit_in_range_p_checking (sbitmap s, unsigned int start,
+bitmap_any_bit_in_range_p_checking (sbitmap s, unsigned int start,
unsigned end)
 {
-  bool r1 = bitmap_bit_in_range_p (s, start, end);
+  bool r1 = bitmap_any_bit_in_range_p (s, start, end);
   bool r2 = false;
 
   for (unsigned int i = start; i <= end; i++)
@@ -893,33 +894,33 @@ test_set_range ()
   bitmap_clear (s);
 
   bitmap_set_range (s, 0, 1);
-  ASSERT_TRUE (bitmap_bit_in_range_p_checking (s, 0, 0));
-  ASSERT_FALSE (bitmap_bit_in_range_p_checking (s, 1, 15));
+  ASSERT_TRUE (bitmap_any_bit_in_range_p_checking (s, 0, 0));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p_checking (s, 1, 15));
   bitmap_set_range (s, 15, 1);
-  ASSERT_FALSE (bitmap_bit_in_range_p_checking (s, 1, 14));
-  ASSERT_TRUE (bitmap_bit_in_range_p_checking (s, 15, 15));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p_checking (s, 1, 14));
+  ASSERT_TRUE (bitmap_any_bit_in_range_p_checking (s, 15, 15));
   sbitmap_free (s);
 
   s = sbitmap_alloc (1024);
   bitmap_clear (s);
   bitmap_set_range (s, 512, 1);
-  ASSERT_FALSE (bitmap_bit_in_range_p_checking (s, 0, 511));
-  ASSERT_FALSE (bitmap_bit_in_range_p_checking (s, 513, 1023));
-  ASSERT_TRUE (bitmap_bit_in_range_p_checking (s, 512, 512));
-  ASSERT_TRUE (bitmap_bit_in_range_p_checking (s, 508, 512));
-  ASSERT_TRUE (bitmap_bit_in_range_p_checking (s, 508, 513));
-  ASSERT_FALSE (bitmap_bit_in_range_p_checking (s, 508, 511));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p_checking (s, 0, 511));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p_checking (s, 513, 1023));
+  ASSERT_TRUE (bitmap_any_bit_in_range_p_checking (s, 512, 512));
+  ASSERT_TRUE (bitmap_any_bit_in_range_p_checking (s, 508, 512));
+  ASSERT_TRUE (bitmap_any_bit_in_range_p_checking (s, 508, 513));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p_checking (s, 508, 511));
 
   bitmap_clear (s);
   bitmap_set_range (s, 512, 64);
-  ASSERT_FALSE (bitmap_bit_in_range_p_checking (s, 0, 511));
-  ASSERT_FALSE (bitmap_bit_in_range_p_checking (s, 512 + 64, 1023));
-  ASSERT_TRUE (bitmap_bit_in_range_p_checking (s, 512, 512));
-  ASSERT_TRUE (bitmap_bit_in_range_p_checking (s, 512 + 63, 512 + 63));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p_checking (s, 0, 511));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p_checking (s, 512 + 64, 1023));
+  ASSERT_TRUE (bitmap_any_bit_in_range_p_checking (s, 512, 512));
+  ASSERT_TRUE (bitmap_any_bit_in_range_p_checking (s, 512 + 63, 512 + 63));
   sbitmap_free (s);
 }
 
-/* Verify bitmap_bit_in_range_p functions for sbitmap.  */
+/* Verify bitmap_any_bit_in_range_p functions for sbitmap.  */
 
 static void
 test_bit_in_range ()
@@ -927,15 +928,15 @@ test_bit_in_range ()
   sbitmap s = sbitmap_alloc (1024);
   bitmap_clear (s);
 
-  ASSERT_FALSE (bitmap_bit_in_range_p (s, 512, 1023));
+  ASSERT_FALSE (bitmap_any_bit_in_range_p (s, 512, 1023));
   bitmap_set_bit (s, 100);
 
-  ASSERT_FALSE (bitmap_bit_in_range_p (s, 512, 1023));
-  ASSERT_FALSE (bitmap_bit_in_range_p (s, 0, 99));
-  ASSERT_FALSE (bitmap_bit_in_range_p (s, 101, 1023));
-  ASSERT_TRUE (bitmap_bit_in_range_p (s, 1, 100));
-  ASSERT_TRUE (bitmap_bit_in_range_p (s, 64, 100));
-  ASSERT

[COMMITTED PATCH v4 3/3] asf: Fix calling of emit_move_insn on registers of different modes [PR119884]

2025-05-27 Thread Konstantinos Eleftheriou
This patch uses `lowpart_subreg` for the base register initialization,
instead of zero-extending it. We had tried this solution before, but
we were leaving undefined bytes in the upper part of the register.
This shouldn't be happening as we are supposed to write the whole
register when the load is eliminated. This was occurring when having
multiple stores with the same offset as the load, generating a
register move for all of them, overwriting the bit inserts that
were inserted before them.

In order to overcome this, we are removing redundant stores from the sequence,
i.e. stores that write to addresses that will be overwritten by stores that
come after them in the sequence. We are using the same bitmap that is used
for the load elimination check, to keep track of the bytes that are written
by each store.

Also, we are now allowing the load to be eliminated even when there are
overlaps between the stores, as there is no obvious reason why we shouldn't
do that, we just want the stores to cover all of the load's bytes.

Bootstrapped/regtested on AArch64 and x86_64.

PR rtl-optimization/119884

gcc/ChangeLog:

* avoid-store-forwarding.cc (process_store_forwarding):
Use `lowpart_subreg` for the base register initialization
and remove redundant stores from the store/load sequence.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr119884.c: New test.

Signed-off-by: Konstantinos Eleftheriou 
---

Changes in v4:
- Merged previous 1/3 and 2/3 patches.
- Rename `bitmap_bit_in_range_p` to `bimap_any_bit_in_range_p`.
- Rename `bitmap_is_range_set_p` to `bitmap_all_bits_in_range_p`.
- Rename `bitmap_bit_in_range_p_1` to `bitmap_bit_in_range_p`.
- Make `bitmap_bit_in_range_p` static.
- Add comment to the redundant store removal.

Changes in v3:
- Remove redundant stores, instead of generating a register move for
the first store that has the same offset as the load only.

Changes in v2:
- Use `lowpart_subreg` for the base register initialization, but
only for the first store that has the same offset as the load.

Changes in v1:
- Add a check for the register modes to match before calling `emit_mov_insn`.

 gcc/avoid-store-forwarding.cc| 51 +++-
 gcc/testsuite/gcc.target/i386/pr119884.c | 13 ++
 2 files changed, 53 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr119884.c

diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc
index 5d960adec359..6825d0426ecc 100644
--- a/gcc/avoid-store-forwarding.cc
+++ b/gcc/avoid-store-forwarding.cc
@@ -176,20 +176,28 @@ process_store_forwarding (vec &stores, 
rtx_insn *load_insn,
   /* Memory sizes should be constants at this stage.  */
   HOST_WIDE_INT load_size = MEM_SIZE (load_mem).to_constant ();
 
-  /* If the stores cover all the bytes of the load without overlap then we can
- eliminate the load entirely and use the computed value instead.  */
+  /* If the stores cover all the bytes of the load, then we can eliminate
+ the load entirely and use the computed value instead.
+ We can also eliminate stores on addresses that are overwritten
+ by later stores.  */
 
   sbitmap forwarded_bytes = sbitmap_alloc (load_size);
   bitmap_clear (forwarded_bytes);
 
   unsigned int i;
   store_fwd_info* it;
+  auto_vec redundant_stores;
+  auto_vec store_ind_to_remove;
   FOR_EACH_VEC_ELT (stores, i, it)
 {
   HOST_WIDE_INT store_size = MEM_SIZE (it->store_mem).to_constant ();
-  if (bitmap_bit_in_range_p (forwarded_bytes, it->offset,
-it->offset + store_size - 1))
-   break;
+  if (bitmap_all_bits_in_range_p (forwarded_bytes, it->offset,
+ it->offset + store_size - 1))
+   {
+ redundant_stores.safe_push (*it);
+ store_ind_to_remove.safe_push (i);
+ continue;
+   }
   bitmap_set_range (forwarded_bytes, it->offset, store_size);
 }
 
@@ -215,6 +223,15 @@ process_store_forwarding (vec &stores, 
rtx_insn *load_insn,
fprintf (dump_file, "(Load elimination candidate)\n");
 }
 
+  /* Remove redundant stores from the vector.  Although this is quadratic,
+ there doesn't seem to be much point optimizing it.  The number of
+ redundant stores is expected to be low and the length of the list is
+ limited by a --param.  The dependence checking that we did earlier is
+ also quadratic in the size of this list.  */
+  store_ind_to_remove.reverse ();
+  for (int i : store_ind_to_remove)
+stores.ordered_remove (i);
+
   rtx load = single_set (load_insn);
   rtx dest;
 
@@ -231,18 +248,16 @@ process_store_forwarding (vec &stores, 
rtx_insn *load_insn,
 {
   it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem));
   rtx_insn *insns = NULL;
+  const bool has_zero_offset = it->offset == 0;
 
   /* If we're eliminating the load then find the store with zero offset
 and use it as the base reg

[COMMITTED PATCH v4 2/3] sbitmap: Add bitmap_all_bits_in_range_p function

2025-05-27 Thread Konstantinos Eleftheriou
This patch adds the `bitmap_all_bits_in_range_p` function in sbitmap,
which checks if all the bits in a range are set.

Helper function `bitmap_bit_in_range_p` has also been added, in order
to be used by `bitmap_all_bits_in_range_p` and `bitmap_any_bit_in_range_p`.
When the function's `any_inverted` parameter is true, the function checks
if any of the bits in the range is unset, otherwise it checks if any of
them is set.

Function `bitmap_any_bit_in_range_p` has been updated to call
`bitmap_bit_in_range_p` with the `any_inverted` parameter set to
false, retaining its previous functionality.

Function `bitmap_all_bits_in_range_p` calls `bitmap_bit_in_range_p`
with `any_inverted` set to true and returns the negation of the
result, i.e. true if all the bits in the range are set.

gcc/ChangeLog:

* sbitmap.cc (bitmap_any_bit_in_range_p):
Call and return the result of `bitmap_bit_in_range_p` with the
`any_inverted` parameter set to false.
(bitmap_bit_in_range_p): New function.
(bitmap_all_bits_in_range_p): New function.
* sbitmap.h (bitmap_all_bits_in_range_p): New function.

Signed-off-by: Konstantinos Eleftheriou 
---

Changes in v4:
- Merged previous 1/3 and 2/3 patches.
- Rename `bitmap_bit_in_range_p` to `bimap_any_bit_in_range_p`.
- Rename `bitmap_is_range_set_p` to `bitmap_all_bits_in_range_p`.
- Rename `bitmap_bit_in_range_p_1` to `bitmap_bit_in_range_p`.
- Make `bitmap_bit_in_range_p` static.

 gcc/sbitmap.cc | 40 
 gcc/sbitmap.h  |  2 ++
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/gcc/sbitmap.cc b/gcc/sbitmap.cc
index 64c7517397b3..075d0d35b810 100644
--- a/gcc/sbitmap.cc
+++ b/gcc/sbitmap.cc
@@ -326,12 +326,13 @@ bitmap_set_range (sbitmap bmap, unsigned int start, 
unsigned int count)
   bmap->elms[start_word] |= mask;
 }
 
-/* Return TRUE if any bit between START and END inclusive is set within
-   the simple bitmap BMAP.  Return FALSE otherwise.  */
+/* Helper function for bitmap_any_bit_in_range_p and
+   bitmap_all_bits_in_range_p.  If ANY_INVERTED is true, the function checks
+   if any bit in the range is unset.  */
 
-bool
-bitmap_any_bit_in_range_p (const_sbitmap bmap, unsigned int start,
-  unsigned int end)
+static bool
+bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start,
+  unsigned int end, bool any_inverted)
 {
   gcc_checking_assert (start <= end);
   bitmap_check_index (bmap, end);
@@ -351,7 +352,8 @@ bitmap_any_bit_in_range_p (const_sbitmap bmap, unsigned int 
start,
 
   SBITMAP_ELT_TYPE low_mask = ((SBITMAP_ELT_TYPE)1 << start_bitno) - 1;
   SBITMAP_ELT_TYPE mask = high_mask - low_mask;
-  if (bmap->elms[start_word] & mask)
+  const SBITMAP_ELT_TYPE expected_partial = any_inverted ? mask : 0;
+  if ((bmap->elms[start_word] & mask) != expected_partial)
return true;
   start_word++;
 }
@@ -361,9 +363,10 @@ bitmap_any_bit_in_range_p (const_sbitmap bmap, unsigned 
int start,
 
   /* Now test words at a time until we hit a partial word.  */
   unsigned int nwords = (end_word - start_word);
+  const SBITMAP_ELT_TYPE expected = any_inverted ? ~(SBITMAP_ELT_TYPE)0 : 0;
   while (nwords)
 {
-  if (bmap->elms[start_word])
+  if (bmap->elms[start_word] != expected)
return true;
   start_word++;
   nwords--;
@@ -373,7 +376,28 @@ bitmap_any_bit_in_range_p (const_sbitmap bmap, unsigned 
int start,
   SBITMAP_ELT_TYPE mask = ~(SBITMAP_ELT_TYPE)0;
   if (end_bitno + 1 < SBITMAP_ELT_BITS)
 mask = ((SBITMAP_ELT_TYPE)1 << (end_bitno + 1)) - 1;
-  return (bmap->elms[start_word] & mask) != 0;
+  const SBITMAP_ELT_TYPE expected_partial = any_inverted ? mask : 0;
+  return (bmap->elms[start_word] & mask) != expected_partial;
+}
+
+/* Return TRUE if all bits between START and END inclusive are set within
+   the simple bitmap BMAP.  Return FALSE otherwise.  */
+
+bool
+bitmap_all_bits_in_range_p (const_sbitmap bmap, unsigned int start,
+   unsigned int end)
+{
+  return !bitmap_bit_in_range_p (bmap, start, end, true);
+}
+
+/* Return TRUE if any bit between START and END inclusive is set within
+   the simple bitmap BMAP.  Return FALSE otherwise.  */
+
+bool
+bitmap_any_bit_in_range_p (const_sbitmap bmap, unsigned int start,
+  unsigned int end)
+{
+  return bitmap_bit_in_range_p (bmap, start, end, false);
 }
 
 #if GCC_VERSION < 3400
diff --git a/gcc/sbitmap.h b/gcc/sbitmap.h
index 927d296f4606..633d84fa1510 100644
--- a/gcc/sbitmap.h
+++ b/gcc/sbitmap.h
@@ -289,6 +289,8 @@ extern bool bitmap_xor (sbitmap, const_sbitmap, 
const_sbitmap);
 extern bool bitmap_subset_p (const_sbitmap, const_sbitmap);
 extern bool bitmap_any_bit_in_range_p (const_sbitmap, unsigned int,
   unsigned int);
+extern bool bitmap_all_bits_in_range_p (const_sbitmap, unsigned int,
+   unsi

[avr,patch,applied] PR120441: Limit f7_exp's expo to 1024 (not to 512).

2025-05-27 Thread Georg-Johann Lay

f7_exp's exponent was limited to |a| < 512, but exponents to to
1024 * ln2 = 709 may occur.

Applied as obvious.

Johann

--

AVR: target/120441 - Fix f7_exp for |x| ≥ 512.

f7_exp limited exponents to 512, but 1023 * ln2 ≈ 709,
hence 1024 is a correct limit.

libgcc/config/avr/libf7/
PR target/120441
* libf7.c (f7_exp): Limit aa->expo to 10 (not to 9).

diff --git a/libgcc/config/avr/libf7/libf7.c 
b/libgcc/config/avr/libf7/libf7.c

index a64554c562f..7d70804a59a 100644
--- a/libgcc/config/avr/libf7/libf7.c
+++ b/libgcc/config/avr/libf7/libf7.c
@@ -1649,10 +1649,10 @@ void f7_exp (f7_t *cc, const f7_t *aa)
 return f7_set_nan (cc);

   /* The maximal exponent of 2 for a double is 1023, hence we may limit
- to  |A| < 1023 * ln2 ~ 709.  We limit to  1024 ~ 1.99 * 2^9  */
+ to  |A| < 1023 * ln2 ~ 709.  We limit to  1024 = 2^10  */

   if (f7_class_inf (a_class)
-  || (f7_class_nonzero (a_class) && aa->expo >= 9))
+  || (f7_class_nonzero (a_class) && aa->expo >= 10))
 {
   if (f7_class_sign (a_class))
return f7_clr (cc);


[PATCH] RISC-V: Avoid division by zero in check_builtin_call [PR120436].

2025-05-27 Thread Robin Dapp

Hi,

in check_builtin_call we eventually perform a division by zero when no
vector modes are present.  This patch just avoids the division in that
case.

Regtested on rv64gcv_zvl512b.  I guess this is obvious enough that it can be 
pushed after the CI approves.


Regards
Robin

PR target/120436

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-shapes.cc (struct vset_def):
Avoid division by zero.
(struct vget_def): Ditto.
* config/riscv/riscv-vector-builtins.h (struct function_group_info):
Use required_extensions_specified instead of duplicating code.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr120436.c: New test.
---
.../riscv/riscv-vector-builtins-shapes.cc |  4 ++
gcc/config/riscv/riscv-vector-builtins.h  | 40 +--
.../gcc.target/riscv/rvv/base/pr120436.c  | 16 
3 files changed, 21 insertions(+), 39 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index b855d4c5fa5..9832eb9e3d1 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -908,6 +908,8 @@ struct vset_def : public build_base
  {
poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2));
+if (maybe_eq (inner_size, 0))
+  return false;
unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
return c.require_immediate (1, 0, nvecs - 1);
  }
@@ -920,6 +922,8 @@ struct vget_def : public misc_def
  {
poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ());
+if (maybe_eq (inner_size, 0))
+  return false;
unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
return c.require_immediate (1, 0, nvecs - 1);
  }
diff --git a/gcc/config/riscv/riscv-vector-builtins.h 
b/gcc/config/riscv/riscv-vector-builtins.h
index ffc289364b0..1f2587ab6af 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -331,45 +331,7 @@ struct function_group_info
  /* Return true if required extension is enabled */
  bool match (required_ext ext_value) const
  {
-switch (ext_value)
-{
-  case VECTOR_EXT:
-return TARGET_VECTOR;
-  case ZVBB_EXT:
-return TARGET_ZVBB;
-  case ZVBB_OR_ZVKB_EXT:
-return (TARGET_ZVBB || TARGET_ZVKB);
-  case ZVBC_EXT:
-return TARGET_ZVBC;
-  case ZVKG_EXT:
-return TARGET_ZVKG;
-  case ZVKNED_EXT:
-return TARGET_ZVKNED;
-  case ZVKNHA_OR_ZVKNHB_EXT:
-return (TARGET_ZVKNHA || TARGET_ZVKNHB);
-  case ZVKNHB_EXT:
-return TARGET_ZVKNHB;
-  case ZVKSED_EXT:
-return TARGET_ZVKSED;
-  case ZVKSH_EXT:
-return TARGET_ZVKSH;
-  case XTHEADVECTOR_EXT:
-   return TARGET_XTHEADVECTOR;
-  case ZVFBFMIN_EXT:
-   return TARGET_ZVFBFMIN;
-  case ZVFBFWMA_EXT:
-   return TARGET_ZVFBFWMA;
-  case XSFVQMACCQOQ_EXT:
-   return TARGET_XSFVQMACCQOQ;
-  case XSFVQMACCDOD_EXT:
-   return TARGET_XSFVQMACCDOD;
-  case XSFVFNRCLIPXFQF_EXT:
-   return TARGET_XSFVFNRCLIPXFQF;
-  case XSFVCP_EXT:
-   return TARGET_XSFVCP;
-  default:
-gcc_unreachable ();
-}
+return required_extensions_specified (ext_value);
  }
  /* The base name, as a string.  */
  const char *base_name;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c
new file mode 100644
index 000..d22091e5949
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O0" } */
+
+/* Use -O0 as otherwise the unused intrinsics get
+   optimized away.  We used to ICE here instead of
+   emitting an error.  */
+
+#include "riscv_vector.h"
+
+void
+clean_subreg (int32_t *in, int32_t *out, size_t m) /* { dg-error {this 
operation requires the RVV ISA extension} } */
+{
+  vint16m8_t v24, v8, v16;
+  vint32m8_t result = __riscv_vle32_v_i32m8 (in, 32); /* { dg-error {built-in 
function '__riscv_vle32_v_i32m8\(in, 32\)' requires the 'v' ISA extension} } */
+  vint32m1_t v0 = __riscv_vget_v_i32m8_i32m1 (result, 0);
+}
--
2.49.0



Re: [PATCH 0/3] Redirect to specific target based on TARGET_VERSION_COMPATIBLE

2025-05-27 Thread Alfie Richards

Hi Jeff,

On 22/05/2025 21:02, Jeff Law wrote:



On 5/22/25 9:05 AM, Alfie Richards wrote:

Hi Jeff,

I sent this patch with my implementation a while ago:
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681043.html

There hasn't been any feedback on that patch yet.

These patches are still useful and I would like to go ahead with them. 
I am in favour of using my implementation as it is a bit stronger, but 
it also requires my larger FMV series to be approved first.
Can you ping your larger FMV series?  I strongly suspect everyone is 
digging out from everything that queued up while the trunk was in 
bugfixing stages.



Hers the series: 
https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681047.html


I'd love any feedback on that and to get it moving.

Kind regards,
Alfie



Yangyu -- what are your thought here?  If we went with Alfie's patch, 
does it solve the problems you're interested in, and what patches of 
yours would still be relevant if we incorporated Alfie's work?


Jeff





Re: [PATCH] RISC-V: Avoid division by zero in check_builtin_call [PR120436].

2025-05-27 Thread 钟居哲
LGTM 



juzhe.zh...@rivai.ai
 
From: Robin Dapp
Date: 2025-05-27 16:31
To: gcc-patches
CC: kito.ch...@gmail.com; juzhe.zh...@rivai.ai; jeffreya...@gmail.com; 
pan2...@intel.com; rdapp@gmail.com
Subject: [PATCH] RISC-V: Avoid division by zero in check_builtin_call 
[PR120436].
Hi,
 
in check_builtin_call we eventually perform a division by zero when no
vector modes are present.  This patch just avoids the division in that
case.
 
Regtested on rv64gcv_zvl512b.  I guess this is obvious enough that it can be 
pushed after the CI approves.
 
Regards
Robin
 
PR target/120436
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-shapes.cc (struct vset_def):
Avoid division by zero.
(struct vget_def): Ditto.
* config/riscv/riscv-vector-builtins.h (struct function_group_info):
Use required_extensions_specified instead of duplicating code.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/pr120436.c: New test.
---
.../riscv/riscv-vector-builtins-shapes.cc |  4 ++
gcc/config/riscv/riscv-vector-builtins.h  | 40 +--
.../gcc.target/riscv/rvv/base/pr120436.c  | 16 
3 files changed, 21 insertions(+), 39 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index b855d4c5fa5..9832eb9e3d1 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -908,6 +908,8 @@ struct vset_def : public build_base
   {
 poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
 poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2));
+if (maybe_eq (inner_size, 0))
+  return false;
 unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
 return c.require_immediate (1, 0, nvecs - 1);
   }
@@ -920,6 +922,8 @@ struct vget_def : public misc_def
   {
 poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
 poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ());
+if (maybe_eq (inner_size, 0))
+  return false;
 unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
 return c.require_immediate (1, 0, nvecs - 1);
   }
diff --git a/gcc/config/riscv/riscv-vector-builtins.h 
b/gcc/config/riscv/riscv-vector-builtins.h
index ffc289364b0..1f2587ab6af 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -331,45 +331,7 @@ struct function_group_info
   /* Return true if required extension is enabled */
   bool match (required_ext ext_value) const
   {
-switch (ext_value)
-{
-  case VECTOR_EXT:
-return TARGET_VECTOR;
-  case ZVBB_EXT:
-return TARGET_ZVBB;
-  case ZVBB_OR_ZVKB_EXT:
-return (TARGET_ZVBB || TARGET_ZVKB);
-  case ZVBC_EXT:
-return TARGET_ZVBC;
-  case ZVKG_EXT:
-return TARGET_ZVKG;
-  case ZVKNED_EXT:
-return TARGET_ZVKNED;
-  case ZVKNHA_OR_ZVKNHB_EXT:
-return (TARGET_ZVKNHA || TARGET_ZVKNHB);
-  case ZVKNHB_EXT:
-return TARGET_ZVKNHB;
-  case ZVKSED_EXT:
-return TARGET_ZVKSED;
-  case ZVKSH_EXT:
-return TARGET_ZVKSH;
-  case XTHEADVECTOR_EXT:
- return TARGET_XTHEADVECTOR;
-  case ZVFBFMIN_EXT:
- return TARGET_ZVFBFMIN;
-  case ZVFBFWMA_EXT:
- return TARGET_ZVFBFWMA;
-  case XSFVQMACCQOQ_EXT:
- return TARGET_XSFVQMACCQOQ;
-  case XSFVQMACCDOD_EXT:
- return TARGET_XSFVQMACCDOD;
-  case XSFVFNRCLIPXFQF_EXT:
- return TARGET_XSFVFNRCLIPXFQF;
-  case XSFVCP_EXT:
- return TARGET_XSFVCP;
-  default:
-gcc_unreachable ();
-}
+return required_extensions_specified (ext_value);
   }
   /* The base name, as a string.  */
   const char *base_name;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c
new file mode 100644
index 000..d22091e5949
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O0" } */
+
+/* Use -O0 as otherwise the unused intrinsics get
+   optimized away.  We used to ICE here instead of
+   emitting an error.  */
+
+#include "riscv_vector.h"
+
+void
+clean_subreg (int32_t *in, int32_t *out, size_t m) /* { dg-error {this 
operation requires the RVV ISA extension} } */
+{
+  vint16m8_t v24, v8, v16;
+  vint32m8_t result = __riscv_vle32_v_i32m8 (in, 32); /* { dg-error {built-in 
function '__riscv_vle32_v_i32m8\(in, 32\)' requires the 'v' ISA extension} } */
+  vint32m1_t v0 = __riscv_vget_v_i32m8_i32m1 (result, 0);
+}
-- 
2.49.0
 
 


Re: [PATCH v1] libstdc++: Fix bug in default ctor of extents.

2025-05-27 Thread Jonathan Wakely
On Mon, 26 May 2025 at 08:49, Tomasz Kaminski  wrote:
>
>
>
> On Sat, May 24, 2025 at 1:29 PM Luc Grosheintz  
> wrote:
>>
>> The array that stores the dynamic extents used to be default
>> initialized. The standard requires value intialization. This
>> commit fixes the bug and adds a test.
>>
>> libstdc++-v3/ChangeLog:
>>
>> * include/std/mdspan: Value initialize the array storing the
>> dynamic extents.
>> * testsuite/23_containers/mdspan/extents/ctor_default.cc: New
>> test.
>>
>> Signed-off-by: Luc Grosheintz 
>> ---
>
> LGTM, thanks for noticing and fixing it.
>  We also need approval from the maintainer.

OK for trunk.

>>
>>  libstdc++-v3/include/std/mdspan   |  2 +-
>>  .../mdspan/extents/ctor_default.cc| 41 +++
>>  2 files changed, 42 insertions(+), 1 deletion(-)
>>  create mode 100644 
>> libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
>>
>> diff --git a/libstdc++-v3/include/std/mdspan 
>> b/libstdc++-v3/include/std/mdspan
>> index 47cfa405e44..bcf2fa60fea 100644
>> --- a/libstdc++-v3/include/std/mdspan
>> +++ b/libstdc++-v3/include/std/mdspan
>> @@ -146,7 +146,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>
>>private:
>> using _S_storage = __array_traits<_IndexType, 
>> _S_rank_dynamic>::_Type;
>> -   [[no_unique_address]] _S_storage _M_dynamic_extents;
>> +   [[no_unique_address]] _S_storage _M_dynamic_extents{};
>
> We know that these are integral types, so we can use {}.
>>
>>};
>>
>>  template
>> diff --git 
>> a/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc 
>> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
>> new file mode 100644
>> index 000..eec300f6896
>> --- /dev/null
>> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/ctor_default.cc
>> @@ -0,0 +1,41 @@
>> +// { dg-do run { target c++23 } }
>> +#include 
>> +
>> +#include 
>> +#include 
>> +
>> +constexpr auto dyn = std::dynamic_extent;
>> +
>> +template
>> +  constexpr void
>> +  test_default_ctor()
>> +  {
>> +Extents exts;
>> +for(size_t i = 0; i < Extents::rank(); ++i)
>> +  if(exts.static_extent(i) == std::dynamic_extent)
>> +   VERIFY(exts.extent(i) == 0);
>> +  else
>> +   VERIFY(exts.extent(i) == Extents::static_extent(i));
>> +  }
>> +
>> +constexpr bool
>> +test_default_ctor_all()
>> +{
>> +  test_default_ctor>();
>> +  test_default_ctor>();
>> +  test_default_ctor>();
>> +  test_default_ctor>();
>> +  test_default_ctor>();
>> +  test_default_ctor>();
>> +  test_default_ctor>();
>> +  test_default_ctor>();
>> +  return true;
>> +}
>> +
>> +int
>> +main()
>> +{
>> +  test_default_ctor_all();
>> +  static_assert(test_default_ctor_all());
>> +  return 0;
>> +}
>> --
>> 2.49.0
>>


Re: [committed] libstdc++: Fix test failures for 32-bit AIX

2025-05-27 Thread Jonathan Wakely
On Tue, 27 May 2025 at 13:46, Tomasz Kaminski  wrote:
>
>
>
> On Tue, May 27, 2025 at 2:38 PM Jonathan Wakely  wrote:
>>
>> With -maix32 (the default) we only have 16-bit wchar_t so these tests
>> fail. The debug.cc one is because we use -fwide-exec-charset=UTF-32BE
>> which tries to encode each wide character as four bytes in a 2-byte
>> wchar_t. The format.cc one is because the clown face character can't be
>> encoded in a single 16-bit wchar_t.
>
> What is the encoding for the wchar_t? If it is UTF-16LE or UTF16-BE we should
> define them as fwide-exec-charset. What is the box you are using for tests?

$ g++ -E -dM -x c++ /dev/null | grep WIDE
#define __GNUC_WIDE_EXECUTION_CHARSET_NAME "UTF-16BE"

I use cfarm119 in the compile farm.



Re: [PATCH v1 0/1] Add error message to cmp_* and in_range.

2025-05-27 Thread Luc Grosheintz




On 5/27/25 14:34, Jonathan Wakely wrote:

On Tue, 27 May 2025 at 07:51, Luc Grosheintz  wrote:


While reading the compiler output of

 make check-target-libstdc++-v3

for buggy code, e.g. cmp_equal(1.0, 1.0), the error message
was very short, and I saw no hint that neither of the two
template arguments weren't integers. Essentially, the trace
was:

   1. my faulty line
   2. required from here
   3. static_assert(false)

On regular builds with g++ the error message mentions the
static_assert(__is_standard_integer), and is much less
cryptic. Please ignore if this is intended behaviour.


That's because the testsuite runs with -fdiagnostics-plain-output
which is certainly the intended behaviour. That's useful for
machine-readable output (as needed by the testsuite so that additional
diagnostic notes don't confuse the testsuite) but isn't intended to be
used for human-readable output.

As long as the diagnostic is fine for normal I don't think we need to
optimize the code for the purposes of the testsuite. And since GCC
doesn't support any extended integer types, I'm not really concerned
about that misinterpretation of the __is_standard_integer trait. Maybe
it would make sense to rename that to __is_signed_or_unsigned_integer
though. We do already have __cv_unqual_signed_or_unsigned_integer_type
in .




That makes sense, and sorry for submitting this :) If there's no
extended ints, then maybe the name is quite nice? It's certainly
shorter.






Tested on x86_64 with:

 make check-target-libstdc++-v3

(in a no PCH build).

Luc Grosheintz (1):
   libstdc++: Improve diagnostic message for `cmp_*` and `in_range`.

  libstdc++-v3/include/std/utility | 18 --
  1 file changed, 12 insertions(+), 6 deletions(-)

--
2.49.0







Re: [PATCH v4 1/3][C FE]Extend "counted_by" attribute to pointer fields of structures.

2025-05-27 Thread Qing Zhao
Ping. 

thanks.

Qing

> On May 13, 2025, at 17:03, Qing Zhao  wrote:
> 
> For example:
> struct PP {
>  size_t count2;
>  char other1;
>  char *array2 __attribute__ ((counted_by (count2)));
>  int other2;
> } *pp;
> 
> specifies that the "array2" is an array that is pointed by the
> pointer field, and its number of elements is given by the field
> "count2" in the same structure.
> 
> gcc/c-family/ChangeLog:
> 
> * c-attribs.cc (handle_counted_by_attribute): Accept counted_by
> attribute for pointer fields.
> 
> gcc/c/ChangeLog:
> 
> * c-decl.cc (verify_counted_by_attribute): Change the 2nd argument
> to a vector of fields with counted_by attribute. Verify all fields
> in this vector.
> (finish_struct): Collect all the fields with counted_by attribute
> to a vector and pass this vector to verify_counted_by_attribute.
> 
> gcc/ChangeLog:
> 
> * doc/extend.texi: Extend counted_by attribute to pointer fields in
> structures. Add one more requirement to pointers with counted_by
>attribute.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.dg/flex-array-counted-by.c: Update test.
> * gcc.dg/pointer-counted-by-2.c: New test.
> * gcc.dg/pointer-counted-by-3.c: New test.
> * gcc.dg/pointer-counted-by.c: New test.
> ---
> gcc/c-family/c-attribs.cc|  25 +++-
> gcc/c/c-decl.cc  |  91 +++--
> gcc/doc/extend.texi  |  38 +-
> gcc/testsuite/gcc.dg/flex-array-counted-by.c |   2 +-
> gcc/testsuite/gcc.dg/pointer-counted-by-2.c  |  10 ++
> gcc/testsuite/gcc.dg/pointer-counted-by-3.c  | 127 +++
> gcc/testsuite/gcc.dg/pointer-counted-by.c|  73 +++
> 7 files changed, 315 insertions(+), 51 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/pointer-counted-by-2.c
> create mode 100644 gcc/testsuite/gcc.dg/pointer-counted-by-3.c
> create mode 100644 gcc/testsuite/gcc.dg/pointer-counted-by.c
> 
> diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
> index 5a0e3d328ba..717133afc59 100644
> --- a/gcc/c-family/c-attribs.cc
> +++ b/gcc/c-family/c-attribs.cc
> @@ -2906,22 +2906,34 @@ handle_counted_by_attribute (tree *node, tree name,
> " declaration %q+D", name, decl);
>   *no_add_attrs = true;
> }
> -  /* This attribute only applies to field with array type.  */
> -  else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
> +  /* This attribute only applies to field with array type or pointer type.  
> */
> +  else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE
> +   && TREE_CODE (TREE_TYPE (decl)) != POINTER_TYPE)
> {
>   error_at (DECL_SOURCE_LOCATION (decl),
> - "%qE attribute is not allowed for a non-array field",
> - name);
> + "%qE attribute is not allowed for a non-array"
> + " or non-pointer field", name);
>   *no_add_attrs = true;
> }
>   /* This attribute only applies to a C99 flexible array member type.  */
> -  else if (! c_flexible_array_member_type_p (TREE_TYPE (decl)))
> +  else if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE
> +   && !c_flexible_array_member_type_p (TREE_TYPE (decl)))
> {
>   error_at (DECL_SOURCE_LOCATION (decl),
> "%qE attribute is not allowed for a non-flexible"
> " array member field", name);
>   *no_add_attrs = true;
> }
> +  /* This attribute cannot be applied to a pointer type whose pointee type
> + is void.  */
> +  else if (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE
> +   && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == VOID_TYPE)
> +{
> +  error_at (DECL_SOURCE_LOCATION (decl),
> + "%qE attribute is not allowed for a pointer field"
> + " with void pointee type", name);
> +  *no_add_attrs = true;
> +}
>   /* The argument should be an identifier.  */
>   else if (TREE_CODE (argval) != IDENTIFIER_NODE)
> {
> @@ -2930,7 +2942,8 @@ handle_counted_by_attribute (tree *node, tree name,
>   *no_add_attrs = true;
> }
>   /* Issue error when there is a counted_by attribute with a different
> - field as the argument for the same flexible array member field.  */
> + field as the argument for the same flexible array member or
> + pointer field.  */
>   else if (old_counted_by != NULL_TREE)
> {
>   tree old_fieldname = TREE_VALUE (TREE_VALUE (old_counted_by));
> diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
> index 8c420f22976..53e7b726ee6 100644
> --- a/gcc/c/c-decl.cc
> +++ b/gcc/c/c-decl.cc
> @@ -9448,56 +9448,62 @@ c_update_type_canonical (tree t)
> }
> }
> 
> -/* Verify the argument of the counted_by attribute of the flexible array
> -   member FIELD_DECL is a valid field of the containing structure,
> -   STRUCT_TYPE, Report error and remove this attribute when it's not.  */
> +/* Verify the argument of the counted_by attribute of each of the
> +   FIELDS_WITH_COUNTED_BY is a valid field of the containing structure,
> +   STRUCT_TYPE, Report error and remove the corresponding attribute
> +   when it's not.  */
> 
> static void
> -verify_counted_by_attribute (tree struct_type, tree f

Re: [PATCH v2] Evaluate the object size by the size of the pointee type when the type is a structure with flexible array member which is annotated with counted_by.

2025-05-27 Thread Qing Zhao
Ping.

thanks.

Qing

> On May 7, 2025, at 12:59, Qing Zhao  wrote:
> 
> Hi, 
> 
> This is the 2nd version of the patch for:
> 
> Evaluate the object size by the size of the pointee type when the type
> is a structure with flexible array member which is annotated with
> counted_by. 
> 
> Per the following discussion: (Questions on replacing a structure
> pointer reference to a call to .ACCESS_WITH_SIZE in C FE)
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2025-April/680540.html
> https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681229.html
> 
> The summary of the above discussion: 
>   A. It’s not safe in general to replace a structure pointer 
> reference to a call to .ACCESS_WITH_SIZE in C FE. 
>  Since data-flow analysis is needed to make sure that the access
> to the size member is valid, i.e, the structure is accessible 
> and initialized, etc.
> 
>   B. It should be safe to generate the reference to field member
> when we evaluate the BDOS builtin as the 1st version of the
> patch . And doing this in tree-object-size should also cover
> -fsanitize=object-size.
> 
>   C. When generating the reference to the field member in tree-object-size,
> we should guard this reference with a checking on the pointer to 
> the structure is valid.
> 
> Compare to the 1st version, the major changes are based on the above:
> 
>   1. Update the comments per Sid's suggestions.
>   2. Reorg the code to make it easily to be understood:
> Add a new routine "is_pointee_fam_struct_with_counted_by" to make 
> the checking easier;
>   3. Add one more cond_expr to guard the size_expr as:
> 
>  (prt == NULL) ? SIZE_UNKNOWN : SIZE_EXPR
> 
>   4. In order to gimplify the above COND_EXPR (the current 
> force_gimple_operand doesnot work for the new control flow), Add 
> one more new routine "insert_cond_and_size" to construct the 
> whole control flow graph for the above cond_expr. Please refer to:
>  https://gcc.gnu.org/pipermail/gcc-patches/2025-April/682021.html
>  for this discussion.
> 
>   5. Add new testing cases for testing the prt==NULL cases. 
> 
> The patch has been bootstrapped and regression tested on both x86 and aarch64.
> 
> Okay for trunk?
> 
> thanks.
> 
> Qing
> 
> 
> In tree-object-size.cc, if the size is UNKNOWN after evaluating use-def
> chain, We can evaluate the SIZE of the pointee TYPE ONLY when this TYPE
> is a structure type with flexible array member which is attached a
> counted_by attribute, since a structure with FAM can not be an element
> of an array, so, the pointer must point to a single object with this
> structure with FAM.
> 
> This is only available for C now.
> 
> gcc/c/ChangeLog:
> 
> * c-lang.cc (LANG_HOOKS_BUILD_COUNTED_BY_REF):
> Define to below function.
> * c-tree.h (c_build_counted_by_ref): New extern function.
> * c-typeck.cc (build_counted_by_ref): Rename to ...
> (c_build_counted_by_ref): ...this.
> (handle_counted_by_for_component_ref): Call the renamed function.
> 
> gcc/ChangeLog:
> 
> * langhooks-def.h (LANG_HOOKS_BUILD_COUNTED_BY_REF):
> New language hook.
> * langhooks.h (struct lang_hooks_for_types): Add
> build_counted_by_ref.
> * tree-object-size.cc (struct object_size_info): Add a new field
> insert_cf.
> (insert_cond_and_size): New function.
> (gimplify_size_expressions): Handle new field insert_cf.
> (compute_builtin_object_size): Init the new field to false;
> (is_pointee_fam_struct_with_counted_by): New function.
> (record_with_fam_object_size): New function.
> (collect_object_sizes_for): Call record_with_fam_object_size.
> (dynamic_object_sizes_execute_one): Special handling for insert_cf.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.dg/flex-array-counted-by-3.c: Update test for whole object size;
> * gcc.dg/flex-array-counted-by-4.c: Likewise.
> * gcc.dg/flex-array-counted-by-5.c: Likewise.
> * gcc.dg/flex-array-counted-by-10.c: New test.
> ---
> gcc/c/c-lang.cc   |   3 +
> gcc/c/c-tree.h|   1 +
> gcc/c/c-typeck.cc |   6 +-
> gcc/langhooks-def.h   |   4 +-
> gcc/langhooks.h   |   5 +
> .../gcc.dg/flex-array-counted-by-10.c |  41 +++
> .../gcc.dg/flex-array-counted-by-3.c  |   5 +
> .../gcc.dg/flex-array-counted-by-4.c  |  34 +-
> .../gcc.dg/flex-array-counted-by-5.c  |   4 +
> gcc/tree-object-size.cc   | 302 +-
> 10 files changed, 383 insertions(+), 22 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/flex-array-counted-by-10.c
> 
> diff --git a/gcc/c/c-lang.cc b/gcc/c/c-lang.cc
> index c69077b2a93..e9ec9e6e64a 100644
> --- a/gcc/c/c-lang.cc
> +++ b/gcc/c/c-lang.cc
> @@ -51,6 +51,9 @@ enum c_language_kind c_language = clk_c;
> #undef LANG_HOOKS_GET_SARIF_SOURCE_LANGUAGE
> #define LANG_HOOKS_GET_SARIF_SOURCE_LANGUAGE c_get_sarif_source_language
> 
> +#undef LANG_HOOKS_BU

[PATCH] testsuite: arm: remove arm32 check from a few effective-targets

2025-05-27 Thread Christophe Lyon
A few arm effective-targets call check_effective_target_arm32 even
though they would force an -march=XXX flag which support Arm and/or
Thumb-2, thus making the arm32 check useless.  This has an impact when
the toolchain is configured with a default -march or -mcpu which
supports Thumb-1 only: in such a case, arm32 is false and we skip many
tests, thus reducing coverage.

This patch removes the call to check_effective_target_arm32 where it
is useless, enabling almost 6400 tests.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp
(check_effective_target_arm_neon_ok_nocache): Remove arm32 check.
(check_effective_target_arm_neon_fp16_ok_nocache): Likewise.
(check_effective_target_arm_neon_softfp_fp16_ok_nocache): Likewise.
(check_effective_target_arm_v8_neon_ok_nocache): Likewise.
(check_effective_target_arm_neonv2_ok_nocache): Likewise.
---
 gcc/testsuite/lib/target-supports.exp | 69 ---
 1 file changed, 31 insertions(+), 38 deletions(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 245943b7c3f..c7879bfdd5b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5470,23 +5470,21 @@ proc add_options_for_arm_vfp3 { flags } {
 proc check_effective_target_arm_neon_ok_nocache { } {
 global et_arm_neon_flags
 set et_arm_neon_flags ""
-if { [check_effective_target_arm32] } {
-   foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon 
-mfloat-abi=softfp" "-mfpu=neon -mfloat-abi=softfp -mcpu=unset -march=armv7-a" 
"-mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard 
-mcpu=unset -march=armv7-a"} {
-   if { [check_no_compiler_messages_nocache arm_neon_ok object {
-   #include 
-   int dummy;
-   #ifndef __ARM_NEON__
-   #error not NEON
-   #endif
-   /* Avoid the case where a test adds -mfpu=neon, but the 
toolchain is
-  configured for -mcpu=arm926ej-s, for example.  */
-   #if __ARM_ARCH < 7 || __ARM_ARCH_PROFILE == 'M'
-   #error Architecture does not support NEON.
-   #endif
-   } "$flags"] } {
-   set et_arm_neon_flags $flags
-   return 1
-   }
+foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon" "-mfpu=neon 
-mfloat-abi=softfp" "-mfpu=neon -mfloat-abi=softfp -mcpu=unset -march=armv7-a" 
"-mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard" "-mfpu=neon -mfloat-abi=hard 
-mcpu=unset -march=armv7-a"} {
+   if { [check_no_compiler_messages_nocache arm_neon_ok object {
+   #include 
+   int dummy;
+   #ifndef __ARM_NEON__
+   #error not NEON
+   #endif
+   /* Avoid the case where a test adds -mfpu=neon, but the toolchain is
+   configured for -mcpu=arm926ej-s, for example.  */
+   #if __ARM_ARCH < 7 || __ARM_ARCH_PROFILE == 'M'
+   #error Architecture does not support NEON.
+   #endif
+   } "$flags"] } {
+   set et_arm_neon_flags $flags
+   return 1
}
 }
 
@@ -5672,8 +5670,7 @@ proc check_effective_target_arm_neon_fp16_ok_nocache { } {
 global et_arm_neon_fp16_flags
 global et_arm_neon_flags
 set et_arm_neon_fp16_flags ""
-if { [check_effective_target_arm32]
-&& [check_effective_target_arm_neon_ok] } {
+if { [check_effective_target_arm_neon_ok] } {
foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
   "-mfpu=neon-fp16 -mfloat-abi=softfp"
   "-mfp16-format=ieee"
@@ -5711,8 +5708,7 @@ proc 
check_effective_target_arm_neon_softfp_fp16_ok_nocache { } {
 global et_arm_neon_softfp_fp16_flags
 global et_arm_neon_flags
 set et_arm_neon_softfp_fp16_flags ""
-if { [check_effective_target_arm32]
-&& [check_effective_target_arm_neon_ok] } {
+if { [check_effective_target_arm_neon_ok] } {
foreach flags {"-mfpu=neon-fp16 -mfloat-abi=softfp"
   "-mfpu=neon-fp16 -mfloat-abi=softfp -mfp16-format=ieee"} 
{
if { [check_no_compiler_messages_nocache arm_neon_softfp_fp16_ok 
object {
@@ -5848,22 +5844,20 @@ proc check_effective_target_arm_fp16_none_ok { } {
 proc check_effective_target_arm_v8_neon_ok_nocache { } {
 global et_arm_v8_neon_flags
 set et_arm_v8_neon_flags ""
-if { [check_effective_target_arm32] } {
-   foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp-armv8" 
"-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} {
-   if { [check_no_compiler_messages_nocache arm_v8_neon_ok object {
-   #if __ARM_ARCH < 8
-   #error not armv8 or later
-   #endif
-   #include "arm_neon.h"
-   void
-   foo ()
-   {
- __asm__ volatile ("vrintn.f32 q0, q0");
-   }
-   } "

Re: [PATCH v4 0/8] Implement layouts from mdspan.

2025-05-27 Thread Luc Grosheintz

Since, I believe now we're through the larger questions about
how to implement layouts. If reviewing all three over and over
is too painful, it might now make sense to split the patch into
separate patches, one per layout.

On 5/26/25 16:04, Luc Grosheintz wrote:

This follows up on:
https://gcc.gnu.org/pipermail/libstdc++/2025-May/061572.html

Note that this patch series can only be applied after merging:
https://gcc.gnu.org/pipermail/libstdc++/2025-May/061653.html

The important changes since v3 are:
   * Fixed and testsed several related overflow issues that occured in
 extents of size 0 by using `size_t` to compute products.
   * Fixed and tested default ctors.
   * Add missing code for module support.
   * Documented deviation from standard.

The smaller changes include:
   * Squashed the three small commits that make cosmetic changes to
 std::extents.
   * Remove layout_left related changes from the layout_stride commit.
   * Remove superfluous `mapping(extents_type(__exts))`.
   * Fix indenting and improve comment in layout_stride.
   * Add an easy check for representable required_span_size to
 layout_stride.
   * Inline __dynamic_extents_prod

Thank you Tomasz for all the great reviews!

Luc Grosheintz (8):
   libstdc++: Improve naming and whitespace for extents.
   libstdc++: Implement layout_left from mdspan.
   libstdc++: Add tests for layout_left.
   libstdc++: Implement layout_right from mdspan.
   libstdc++: Add tests for layout_right.
   libstdc++: Implement layout_stride from mdspan.
   libstdc++: Add tests for layout_stride.
   libstdc++: Make layout_left(layout_stride) noexcept.

  libstdc++-v3/include/std/mdspan   | 711 +-
  libstdc++-v3/src/c++23/std.cc.in  |   5 +-
  .../mdspan/layouts/class_mandate_neg.cc   |  42 ++
  .../23_containers/mdspan/layouts/ctors.cc | 459 +++
  .../23_containers/mdspan/layouts/empty.cc |  78 ++
  .../23_containers/mdspan/layouts/mapping.cc   | 568 ++
  .../23_containers/mdspan/layouts/stride.cc| 500 
  7 files changed, 2349 insertions(+), 14 deletions(-)
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/layouts/ctors.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/layouts/empty.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/layouts/mapping.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/layouts/stride.cc





Re: [PATCH v4 2/8] libstdc++: Implement layout_left from mdspan.

2025-05-27 Thread Tomasz Kaminski
On Mon, May 26, 2025 at 9:15 PM Luc Grosheintz 
wrote:

>
>
> On 5/26/25 18:17, Tomasz Kaminski wrote:
> > On Mon, May 26, 2025 at 4:15 PM Luc Grosheintz  >
> > wrote:
> >
> >> Implements the parts of layout_left that don't depend on any of the
> >> other layouts.
> >>
> >> libstdc++-v3/ChangeLog:
> >>
> >>  * include/std/mdspan (layout_left): New class.
> >>  * src/c++23/std.cc.in: Add layout_left.
> >>
> >> Signed-off-by: Luc Grosheintz 
> >> ---
> >>   libstdc++-v3/include/std/mdspan  | 304 ++-
> >>   libstdc++-v3/src/c++23/std.cc.in |   1 +
> >>   2 files changed, 304 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/libstdc++-v3/include/std/mdspan
> >> b/libstdc++-v3/include/std/mdspan
> >> index 0f49b0e09a0..d81072596b4 100644
> >> --- a/libstdc++-v3/include/std/mdspan
> >> +++ b/libstdc++-v3/include/std/mdspan
> >> @@ -144,6 +144,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>{ return __exts[__i]; });
> >>}
> >>
> >> +   static constexpr span
> >> +   _S_static_extents(size_t __begin, size_t __end) noexcept
> >> +   {
> >> + return {_Extents.data() + __begin, _Extents.data() + __end};
> >> +   }
> >>
> > Oh, I think I was very unclear, regarding removing the dependency on
> > index_type.
> > What I was thinking of, is changing this function to:
> > +   static consteval array&
> > +   _S_static_extents() noexcept
> > +   {
> > + return _Extents;
> > +   }
>
> Sorry, this was clear. I implemented it once, then we found the
> issues with overflow and while cleaning out the mess I'd made
> to compute things with an __unsigned_prod, I reverted this back
> to the original version.
>
> If you look at __static_extents_prod, it already doesn't depend
> on the index_type. Therefore, it a) felt like a change related
> to something we'd agreed to postpone; and b) I preferred the
> symmetry between __static_extents and __dynamic_extents; I'll
> change it to the proposed version.
>
I think we are good keeping current versions. and then return back to it
later, to see if we can make static_extent being folded at compile time,
and look at some runtime instruction/binary size tradeoffs later.

>
> >
> >> +
> >> +   constexpr span
> >> +   _M_dynamic_extents(size_t __begin, size_t __end) const noexcept
> >> +   requires (_Extents.size() > 0)
> >> +   {
> >> + return {_M_dyn_exts + _S_dynamic_index[__begin],
> >> + _M_dyn_exts + _S_dynamic_index[__end]};
> >> +   }
> >> +
> >> private:
> >>  using _S_storage = __array_traits<_IndexType,
> >> _S_rank_dynamic>::_Type;
> >>  [[no_unique_address]] _S_storage _M_dyn_exts{};
> >> @@ -160,6 +174,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>  || _Extent <= numeric_limits<_IndexType>::max();
> >> }
> >>
> >> +  namespace __mdspan
> >> +  {
> >> +template
> >> +  constexpr span
> >> +  __static_extents(size_t __begin = 0, size_t __end =
> >> _Extents::rank())
> >> +  { return _Extents::_S_storage::_S_static_extents(__begin,
> __end); }
> >
> > Also adjusting this one to have:
> > template
> >   constexpr const std::array&
> >  __static_extents()
> > +  { return _Extents::_S_storage::_S_static_extents; }
> >
> >> +
> >> +template
> >> +  constexpr span
> >> +  __dynamic_extents(const _Extents& __exts, size_t __begin = 0,
> >> +   size_t __end = _Extents::rank())
> >> +  {
> >> +   return __exts._M_exts._M_dynamic_extents(__begin, __end);
> >> +  }
> >> +  }
> >> +
> >> template
> >>   class extents
> >>   {
> >> @@ -251,7 +281,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>  : _M_exts(span(__exts))
> >>  { }
> >>
> >> -
> >> template<__mdspan::__valid_index_type _OIndexType,
> >> size_t _Nm>
> >>  requires (_Nm == rank() || _Nm == rank_dynamic())
> >>  constexpr explicit(_Nm != rank_dynamic())
> >> @@ -276,6 +305,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>  }
> >>
> >>   private:
> >> +  friend span
> >> +  __mdspan::__static_extents(size_t, size_t);
> >> +
> >> +  friend span
> >> +  __mdspan::__dynamic_extents(const extents&, size_t,
> >> size_t);
> >> +
> >> using _S_storage = __mdspan::_ExtentsStorage<
> >>  _IndexType, array{_Extents...}>;
> >> [[no_unique_address]] _S_storage _M_exts;
> >> @@ -286,6 +321,58 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>
> >> namespace __mdspan
> >> {
> >> +template
> >> +  constexpr bool
> >> +  __contains_zero(span<_Tp, _Nm> __exts)
> >> +  {
> >> +   for (size_t __i = 0; __i < __exts.size(); ++__i)
> >> + if (__exts[__i] == 0)
> >> +   return true;
> >> +   return false;
> >> +  }
> >> +
> >> +constexpr size_t
> >> +__static_extents_prod(const auto& __sta_exts)
> >>
> > Then this could be implemented as:
> > template

[PATCH] tree-optimization/117965 - phiprop validity checking is too strict

2025-05-27 Thread Richard Biener
The PR shows that when using std::clamp from the C++ standard library
and there is surrounding code using exceptions then phiprop can fail
to simplify the code so phiopt can turn the clamping into efficient
min/max operations.

The validation code is needlessly complicated, steming from the
time we had memory-SSA with multiple virtual operands.  The following
simplifies this, thereby fixing this issue.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/117965
* tree-ssa-phiprop.cc (phivn_valid_p): Remove.
(propagate_with_phi): Pass in virtual PHI node from BB,
rewrite load motion validity check to require the same
virtual use along all paths.

* g++.dg/tree-ssa/pr117965-1.C: New testcase.
* g++.dg/tree-ssa/pr117965-2.C: Likewise.
---
 gcc/testsuite/g++.dg/tree-ssa/pr117965-1.C | 28 +++
 gcc/testsuite/g++.dg/tree-ssa/pr117965-2.C | 19 +
 gcc/tree-ssa-phiprop.cc| 96 +++---
 3 files changed, 97 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr117965-1.C
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr117965-2.C

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr117965-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr117965-1.C
new file mode 100644
index 000..84f0f2b75df
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr117965-1.C
@@ -0,0 +1,28 @@
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-phiopt1" }
+
+void f();
+void f(int&);
+
+static inline const int &
+clamp(const int &v, const int &min, const int &max)
+{
+  const int &t = (v > min) ? v : min;
+  return t > max ? max : t;
+}
+
+void clamp2(int num1) {
+  try {
+  int low = -12, high = 12;
+  f();
+  num1 = clamp(num1, low, high);
+  f(num1);
+  } catch(...)
+  {
+   __builtin_printf("caught.\n");
+return;
+  }
+}
+
+// { dg-final { scan-tree-dump-times "MAX" 1 "phiopt1" } } */
+// { dg-final { scan-tree-dump-times "MIN" 1 "phiopt1" } } */
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr117965-2.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr117965-2.C
new file mode 100644
index 000..3e94fb3c05c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr117965-2.C
@@ -0,0 +1,19 @@
+// { dg-do compile { target c++17 } }
+// { dg-options "-O2 -fdump-tree-phiopt1" }
+
+#include 
+#include 
+
+void clamp2 ()
+{
+  float low = -1.0f, high = 1.0f;
+  float num1, num2, num3;
+  std::cin >> num1;
+  num1 = std::clamp(num1, low, high);
+  std::cout << num1;
+}
+
+// { dg-final { scan-tree-dump-times " < -1.0" 1 "phiopt1" } }
+// { dg-final { scan-tree-dump-times " \\\? -1.0e\\\+0 : " 1 "phiopt1" } }
+// { dg-final { scan-tree-dump-times " > 1.0" 1 "phiopt1" } }
+// { dg-final { scan-tree-dump-times " \\\? 1.0e\\\+0 : " 1 "phiopt1" } }
diff --git a/gcc/tree-ssa-phiprop.cc b/gcc/tree-ssa-phiprop.cc
index a2e1fb16a30..897bd583ea7 100644
--- a/gcc/tree-ssa-phiprop.cc
+++ b/gcc/tree-ssa-phiprop.cc
@@ -99,35 +99,6 @@ struct phiprop_d
   tree vuse;
 };
 
-/* Verify if the value recorded for NAME in PHIVN is still valid at
-   the start of basic block BB.  */
-
-static bool
-phivn_valid_p (struct phiprop_d *phivn, tree name, basic_block bb)
-{
-  tree vuse = phivn[SSA_NAME_VERSION (name)].vuse;
-  gimple *use_stmt;
-  imm_use_iterator ui2;
-  bool ok = true;
-
-  /* The def stmts of the virtual uses need to be dominated by bb.  */
-  gcc_assert (vuse != NULL_TREE);
-
-  FOR_EACH_IMM_USE_STMT (use_stmt, ui2, vuse)
-{
-  /* If BB does not dominate a VDEF, the value is invalid.  */
-  if ((gimple_vdef (use_stmt) != NULL_TREE
-  || gimple_code (use_stmt) == GIMPLE_PHI)
- && !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), bb))
-   {
- ok = false;
- break;
-   }
-}
-
-  return ok;
-}
-
 /* Insert a new phi node for the dereference of PHI at basic_block
BB with the virtual operands from USE_STMT.  */
 
@@ -275,12 +246,13 @@ chk_uses (tree, tree *idx, void *data)
   :;
Returns true if a transformation was done and edge insertions
need to be committed.  Global data PHIVN and N is used to track
-   past transformation results.  We need to be especially careful here
+   past transformation results.  VPHI is the virtual PHI node in BB
+   if there is one.  We need to be especially careful here
with aliasing issues as we are moving memory reads.  */
 
 static bool
-propagate_with_phi (basic_block bb, gphi *phi, struct phiprop_d *phivn,
-   size_t n, bitmap dce_ssa_names)
+propagate_with_phi (basic_block bb, gphi *vphi, gphi *phi,
+   struct phiprop_d *phivn, size_t n, bitmap dce_ssa_names)
 {
   tree ptr = PHI_RESULT (phi);
   gimple *use_stmt;
@@ -298,6 +270,7 @@ propagate_with_phi (basic_block bb, gphi *phi, struct 
phiprop_d *phivn,
  && TYPE_MODE (TREE_TYPE (TREE_TYPE (ptr))) == BLKmode))
 return false;
 
+  tree up_vuse = NULL_TREE;
   /* Check if we can "cheaply" dereference all

Re: [PATCH v4 0/8] Implement layouts from mdspan.

2025-05-27 Thread Tomasz Kaminski
On Tue, May 27, 2025 at 4:32 PM Luc Grosheintz 
wrote:

> Since, I believe now we're through the larger questions about
> how to implement layouts. If reviewing all three over and over
> is too painful, it might now make sense to split the patch into
> separate patches, one per layout.
>
I think we are OK. As you mentioned we are past general discussion,
so I need to do more throughroul review with checking against the standard.
I will try to book some time for this this week.


> On 5/26/25 16:04, Luc Grosheintz wrote:
> > This follows up on:
> > https://gcc.gnu.org/pipermail/libstdc++/2025-May/061572.html
> >
> > Note that this patch series can only be applied after merging:
> > https://gcc.gnu.org/pipermail/libstdc++/2025-May/061653.html
> >
> > The important changes since v3 are:
> >* Fixed and testsed several related overflow issues that occured in
> >  extents of size 0 by using `size_t` to compute products.
> >* Fixed and tested default ctors.
> >* Add missing code for module support.
> >* Documented deviation from standard.
> >
> > The smaller changes include:
> >* Squashed the three small commits that make cosmetic changes to
> >  std::extents.
> >* Remove layout_left related changes from the layout_stride commit.
> >* Remove superfluous `mapping(extents_type(__exts))`.
> >* Fix indenting and improve comment in layout_stride.
> >* Add an easy check for representable required_span_size to
> >  layout_stride.
> >* Inline __dynamic_extents_prod
> >
> > Thank you Tomasz for all the great reviews!
> >
> > Luc Grosheintz (8):
> >libstdc++: Improve naming and whitespace for extents.
> >libstdc++: Implement layout_left from mdspan.
> >libstdc++: Add tests for layout_left.
> >libstdc++: Implement layout_right from mdspan.
> >libstdc++: Add tests for layout_right.
> >libstdc++: Implement layout_stride from mdspan.
> >libstdc++: Add tests for layout_stride.
> >libstdc++: Make layout_left(layout_stride) noexcept.
> >
> >   libstdc++-v3/include/std/mdspan   | 711 +-
> >   libstdc++-v3/src/c++23/std.cc.in  |   5 +-
> >   .../mdspan/layouts/class_mandate_neg.cc   |  42 ++
> >   .../23_containers/mdspan/layouts/ctors.cc | 459 +++
> >   .../23_containers/mdspan/layouts/empty.cc |  78 ++
> >   .../23_containers/mdspan/layouts/mapping.cc   | 568 ++
> >   .../23_containers/mdspan/layouts/stride.cc| 500 
> >   7 files changed, 2349 insertions(+), 14 deletions(-)
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/class_mandate_neg.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/ctors.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/empty.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/mapping.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/stride.cc
> >
>
>


[PATCH] fortran: add constant input support for trig functions with half-revolutions

2025-05-27 Thread Yuao Ma
Hi all,

I've reverted the recent format changes, as three reviewers indicated they
caused more harm than good.

Are there any functional problems I need to address?

Thanks,
Yuao



0001-fortran-add-constant-input-support-for-trig-function.patch
Description: 0001-fortran-add-constant-input-support-for-trig-function.patch


Re: [PATCH] libstdc++: Replace some uses of std::__addressof with std::addressof

2025-05-27 Thread Tomasz Kaminski
On Fri, May 23, 2025 at 7:00 PM Jonathan Wakely  wrote:

> Since r16-154-gc91eb5a5c13f14 std::addressof is no less efficient than
> std::__addressof, so change some uses of the latter to the former.
>
> We can't change them all, because some uses need to compile as C++98
> which only has std::__addressof.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/stl_construct.h: Replace std::__addressof with
> std::addressof.
> * include/bits/stl_uninitialized.h: Likewise.
> ---
>
> I'm undecided whether it's better to use the standard std::addressof for
> simplicity, or to stick with std::__addressof in files where there's a
> mix of C++98 code and >= C++11 code.
>
After giving this a bit of thought. If I touch this files, then most likely
we
are implementing the new standard features, and in the patch I will just
default to std::addressof, as in any other case. So preferring to use
std::addressof when available seems like easier to maintain policy.


>
> Obviously in files that don't need to compile as C++98 (such as
> ) we could just use std::addressof.
>
> Tested x86_64-linux.
>

>  libstdc++-v3/include/bits/stl_construct.h |  2 +-
>  libstdc++-v3/include/bits/stl_uninitialized.h | 28 +--
>  2 files changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/stl_construct.h
> b/libstdc++-v3/include/bits/stl_construct.h
> index 23b8fb754710..a53274e33c0c 100644
> --- a/libstdc++-v3/include/bits/stl_construct.h
> +++ b/libstdc++-v3/include/bits/stl_construct.h
> @@ -82,7 +82,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>if constexpr (__cplusplus > 201703L && is_array_v<_Tp>)
> {
>   for (auto& __x : *__location)
> -   std::destroy_at(std::__addressof(__x));
> +   std::destroy_at(std::addressof(__x));
> }
>else
> __location->~_Tp();
>
There are calls to __addressof in _Destroy at lines 212, 216, 268, 272 in
calls that are >= C++11.
If we update them, we should also change all of them in file.


> diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h
> b/libstdc++-v3/include/bits/stl_uninitialized.h
> index b1428db48b00..bde787c2beaa 100644
> --- a/libstdc++-v3/include/bits/stl_uninitialized.h
> +++ b/libstdc++-v3/include/bits/stl_uninitialized.h
>
Looks like all of ones that could be updated in this file, are already.

> @@ -839,7 +839,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  {
>   _UninitDestroyGuard<_ForwardIterator> __guard(__first);
>   for (; __first != __last; ++__first)
> -   std::_Construct(std::__addressof(*__first));
> +   std::_Construct(std::addressof(*__first));
>   __guard.release();
> }
>  };
> @@ -856,7 +856,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> return;
>
>   typename iterator_traits<_ForwardIterator>::value_type* __val
> -   = std::__addressof(*__first);
> +   = std::addressof(*__first);
>   std::_Construct(__val);
>   if (++__first != __last)
> std::fill(__first, __last, *__val);
> @@ -873,7 +873,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  {
>   _UninitDestroyGuard<_ForwardIterator> __guard(__first);
>   for (; __n > 0; --__n, (void) ++__first)
> -   std::_Construct(std::__addressof(*__first));
> +   std::_Construct(std::addressof(*__first));
>   __guard.release();
>   return __first;
> }
> @@ -890,7 +890,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   if (__n > 0)
> {
>   typename iterator_traits<_ForwardIterator>::value_type* __val
> -   = std::__addressof(*__first);
> +   = std::addressof(*__first);
>   std::_Construct(__val);
>   ++__first;
>   __first = std::fill_n(__first, __n - 1, *__val);
> @@ -955,7 +955,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> __alloc);
>typedef __gnu_cxx::__alloc_traits<_Allocator> __traits;
>for (; __first != __last; ++__first)
> -   __traits::construct(__alloc, std::__addressof(*__first));
> +   __traits::construct(__alloc, std::addressof(*__first));
>__guard.release();
>  }
>
> @@ -980,7 +980,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> __alloc);
>typedef __gnu_cxx::__alloc_traits<_Allocator> __traits;
>for (; __n > 0; --__n, (void) ++__first)
> -   __traits::construct(__alloc, std::__addressof(*__first));
> +   __traits::construct(__alloc, std::addressof(*__first));
>__guard.release();
>return __first;
>  }
> @@ -1007,7 +1007,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> {
>   _UninitDestroyGuard<_ForwardIterator> __guard(__first);
>   for (; __first != __last; ++__first)
> -   std::_Construct_novalue(std::__addressof(*__first));
> +   st

Re: [PATCH v15] ada: fix timeval timespec on 32 bits archs with 64 bits time_t [PR114065]

2025-05-27 Thread Marc Poulhiès
Hello Nicolas,

Continuing discussion from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114065
but on the mailing list.

After more testing, we've observed a regression on one simple test:

8<8<8<8<8<8<
with Ada.Text_IO; use Ada.Text_IO;
with Interfaces.C.Extensions;
with GNAT.Calendar; use GNAT.Calendar;
procedure Test is
   D, D2 : Duration;
   Tv : aliased timeval;
   procedure P_Dur (D : Duration) is
   begin
  Tv := To_Timeval (D);
  D2 := To_Duration (Tv'Access);

  Put_Line ("Duration IN:" & D'Image);

  Put_Line ("   To_Timeval -> timeval:" & Tv'Image);
  Put_Line ("   To_Duration OUT:" & D2'Image);
  New_Line;
   end P_Dur;

begin
   Put_Line ("Test timeval conv");

   P_Dur (1122334455.779);
   P_Dur (0.0);
   P_Dur (0.9);
   P_Dur (0.999_999_999);
   P_Dur (Duration'Last);
end Test;
8<8<8<8<8<8<

And here are the results using an unmodified vs a modified compiler:

8<8<8<8<8<8<
Test timeval conv
Duration IN: 1122334455.77999
   To_Timeval -> timeval:
[ 1122334455,  77,  0]
   To_Duration OUT: 1122334455.77000

Duration IN: 0.0
   To_Timeval -> timeval:
[ 0,  0,  140725911838016]
   To_Duration OUT: 0.0

Duration IN: 0.9
   To_Timeval -> timeval:
[ 0,  90,  140725911838016]
   To_Duration OUT: 0.9

Duration IN: 0.9
   To_Timeval -> timeval:
[ 0,  99,  140725911838016]
   To_Duration OUT: 0.99000

Duration IN: 9223372036.854775807
   To_Timeval -> timeval:
[ 9223372036,  854775,  140725911838016]
   To_Duration OUT: 9223372036.854775000
8<8<8<8<8<8<

8<8<8<8<8<8<
Test timeval conv
Duration IN: 1122334455.77999
   To_Timeval -> timeval:
(TV_SEC =>  1122334455,
 TV_USEC =>  78)
   To_Duration OUT: 1122334455.78000

Duration IN: 0.0
   To_Timeval -> timeval:
(TV_SEC =>  0,
 TV_USEC =>  0)
   To_Duration OUT: 0.0

Duration IN: 0.9
   To_Timeval -> timeval:
(TV_SEC =>  0,
 TV_USEC =>  90)
   To_Duration OUT: 0.9

Duration IN: 0.9
   To_Timeval -> timeval:
(TV_SEC =>  1,
 TV_USEC =>  0)
   To_Duration OUT: 1.0

raised CONSTRAINT_ERROR : s-c_time.adb:129 overflow check failed
8<8<8<8<8<8<

We can see 2 differences here.

First, the modified version of To_Timeval is rounding values whereas it is
currently truncating values. Is this change intentional?

 Then, To_Timeval(Duration'Last) throws an exception, which is probably
unexpected given that Duration'Last is a valid value. Do you think this can be
fixed? I see that you have a In_Duration function to check the inverse
conversion won't throw. In the case of To_Timeval, I think it should never throw
on valid input.

Regards,
Marc


[pushed] c++, coroutines: Fix typos in TRUTH_ANDIF_EXPRs.

2025-05-27 Thread Iain Sandoe
This needs to be added to r16-775-g18df4a10bc9694 if/when that is
backported to GCC-15.
Tested on powerpc64le-linux, x86_64-darwin, pushed to trunk as
trivial/obvious, thanks
Iain

--- 8< ---

These were typoed to TRUTH_AND_EXPR (and then that got copied).

gcc/cp/ChangeLog:

* coroutines.cc (cp_coroutine_transform::build_ramp_function):
Replace TRUTH_AND_EXPR with TRUTH_ANDIF_EXPR in three places.

Signed-off-by: Iain Sandoe 
---
 gcc/cp/coroutines.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index a6209962228..b1e555cb336 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -4944,7 +4944,7 @@ cp_coroutine_transform::build_ramp_function ()
   tree frame_cleanup = push_stmt_list ();
   tree do_fr_cleanup
= build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, iarc_x);
-  do_fr_cleanup = build2_loc (loc, TRUTH_AND_EXPR, boolean_type_node,
+  do_fr_cleanup = build2_loc (loc, TRUTH_ANDIF_EXPR, boolean_type_node,
  coro_before_return, do_fr_cleanup);
   r = build3 (COND_EXPR, void_type_node, do_fr_cleanup,
 delete_frame_call, void_node);
@@ -5036,7 +5036,7 @@ cp_coroutine_transform::build_ramp_function ()
  tree do_cleanup
= build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, 
iarc_x);
  do_cleanup
-   = build2_loc (loc, TRUTH_AND_EXPR, boolean_type_node,
+   = build2_loc (loc, TRUTH_ANDIF_EXPR, boolean_type_node,
  coro_before_return, do_cleanup);
  r = build3_loc (loc, COND_EXPR, void_type_node, do_cleanup,
  parm.fr_copy_dtor, void_node);
@@ -5096,7 +5096,7 @@ cp_coroutine_transform::build_ramp_function ()
  tree promise_cleanup = push_stmt_list ();
  tree do_cleanup
= build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node, iarc_x);
- do_cleanup = build2_loc (loc, TRUTH_AND_EXPR, boolean_type_node,
+ do_cleanup = build2_loc (loc, TRUTH_ANDIF_EXPR, boolean_type_node,
   coro_before_return, do_cleanup);
  r = build3 (COND_EXPR, void_type_node, do_cleanup,
  promise_dtor, void_node);
-- 
2.39.2 (Apple Git-143)



Re: [PATCH] Fortran: fix parsing of type parameter inquiries of substrings [PR101735]

2025-05-27 Thread Jerry D

On 5/27/25 11:24 AM, Harald Anlauf wrote:

Dear all,

the attached patch fixes a variety of small issues with parsing of
inquiry references of substrings.  The testcase exercises variations
of the examples in the PR and ensures that these are successfully
simplified.

Don't try it with other compilers... ;-)

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

I believe this is sufficiently safe that it can be backported
later to 15-branch, unless someone objects.

Thanks,
Harald


 My only question is why the gcc_assert here:

+  if (sym->ts.type == BT_CHARACTER && tail->type == REF_SUBSTRING)
+   {
+ gcc_assert (sym->attr.dimension);
+ /* Find array reference for substrings of character arrays.  */
+ for (ref = primary->ref; ref && ref->next; ref = ref->next)
+   if (ref->type == REF_ARRAY && ref->next->type == REF_SUBSTRING)
+ {
+   strarr = ref;
+   break;
+ }
+   }
+  else
+   tail->type = REF_ARRAY;

Otherwise, OK

Jerry


Re: [PATCH, fortran] PR120049 - ICE when using IS_C_ASSOCIATED ()

2025-05-27 Thread Harald Anlauf

Hi Jerry!

On 5/27/25 21:02, Jerry D wrote:

On 5/20/25 12:35 PM, Jerry D wrote:

On 5/20/25 12:01 PM, Harald Anlauf wrote:

Hi Jerry!

Am 20.05.25 um 05:23 schrieb Jerry D:

On 5/19/25 1:50 PM, Harald Anlauf wrote:

Hi Jerry,

so contrary to what the name of patch claims (pr120049-final.diff),
it fixes only the case of direct use of iso_c_binding, but not the
indirect one thru the other module, which is the reason for the
original ICE and the PR.

So if you want to push the incremental patch now, go ahead.

Cheers,
Harald


Am 18.05.25 um 23:46 schrieb Jerry D:

On 5/18/25 2:34 PM, Jerry D wrote:

On 5/18/25 2:10 PM, Harald Anlauf wrote:

Hi Jerry,

I found 2 corner invalid cases which are silently accepted with
your patch when iso_c_binding is used indirectly:

   print *, c_associated(c_loc(val), C_NULL_FUNPTR)
   print *, c_associated(C_NULL_FUNPTR, c_loc(val))

These should get rejected, too.  Can you see how to catch these, 
too?


Thanks,
Harald


Yes, will do! I try to think of cases to run through on. This helps.

Thanks,

Jerry
--- snip ---




Attached is the revised patch to fix the additional test cases. I 
had to do some trial and error to get the testsuite directives to 
work the way they should.


One will notice that the file containing the gtk_sup module is 
simplified and gets taken care of with the directives in the 
specific tests.


Regression tested on x86_64.

OK for trunk?


No, not yet.  It rejects too much (consistently).  Consider:


Harald,

Please try the new and improved patch attached.

It does pass regression testing with your addition cases and updated the 
test for gfortran.dg.


Let me know if OK.


Yes, this looks OK now.  I haven't found other simple ways to break it.

Thanks for the patch!  And for bearing my review and comments...

Harald



Regards,

Jerry





Re: [PATCH, fortran] PR120049 - ICE when using IS_C_ASSOCIATED ()

2025-05-27 Thread Jerry D

On 5/27/25 12:39 PM, Harald Anlauf wrote:

Hi Jerry!

On 5/27/25 21:02, Jerry D wrote:

On 5/20/25 12:35 PM, Jerry D wrote:

On 5/20/25 12:01 PM, Harald Anlauf wrote:

Hi Jerry!

Am 20.05.25 um 05:23 schrieb Jerry D:

On 5/19/25 1:50 PM, Harald Anlauf wrote:

Hi Jerry,

so contrary to what the name of patch claims (pr120049-final.diff),
it fixes only the case of direct use of iso_c_binding, but not the
indirect one thru the other module, which is the reason for the
original ICE and the PR.

So if you want to push the incremental patch now, go ahead.

Cheers,
Harald


Am 18.05.25 um 23:46 schrieb Jerry D:

On 5/18/25 2:34 PM, Jerry D wrote:

On 5/18/25 2:10 PM, Harald Anlauf wrote:

Hi Jerry,

I found 2 corner invalid cases which are silently accepted with
your patch when iso_c_binding is used indirectly:

   print *, c_associated(c_loc(val), C_NULL_FUNPTR)
   print *, c_associated(C_NULL_FUNPTR, c_loc(val))

These should get rejected, too.  Can you see how to catch 
these, too?


Thanks,
Harald


Yes, will do! I try to think of cases to run through on. This 
helps.


Thanks,

Jerry
--- snip ---




Attached is the revised patch to fix the additional test cases. I 
had to do some trial and error to get the testsuite directives to 
work the way they should.


One will notice that the file containing the gtk_sup module is 
simplified and gets taken care of with the directives in the 
specific tests.


Regression tested on x86_64.

OK for trunk?


No, not yet.  It rejects too much (consistently).  Consider:


Harald,

Please try the new and improved patch attached.

It does pass regression testing with your addition cases and updated 
the test for gfortran.dg.


Let me know if OK.


Yes, this looks OK now.  I haven't found other simple ways to break it.

Thanks for the patch!  And for bearing my review and comments...

Harald



Regards,

Jerry




In all honesty, I am truly grateful for your comments. I realized today 
that I have been working gfortran for about 20 years now.


Pondering things, all is good.

Jerry


Re: [PATCH] Fortran: fix parsing of type parameter inquiries of substrings [PR101735]

2025-05-27 Thread Harald Anlauf

Hi Jerry!

On 5/27/25 21:36, Jerry D wrote:

On 5/27/25 11:24 AM, Harald Anlauf wrote:

Dear all,

the attached patch fixes a variety of small issues with parsing of
inquiry references of substrings.  The testcase exercises variations
of the examples in the PR and ensures that these are successfully
simplified.

Don't try it with other compilers... ;-)

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

I believe this is sufficiently safe that it can be backported
later to 15-branch, unless someone objects.

Thanks,
Harald


  My only question is why the gcc_assert here:

+  if (sym->ts.type == BT_CHARACTER && tail->type == REF_SUBSTRING)
+    {
+  gcc_assert (sym->attr.dimension);
+  /* Find array reference for substrings of character arrays.  */
+  for (ref = primary->ref; ref && ref->next; ref = ref->next)
+    if (ref->type == REF_ARRAY && ref->next->type == REF_SUBSTRING)
+  {
+    strarr = ref;
+    break;
+  }
+    }
+  else
+    tail->type = REF_ARRAY;


I put the gcc_assert here because the enclosing if() (primary.cc:2298)
is not easy to understand and there might be situations to which the
new logic is not prepared.  (Possibly some ASSOCIATE-stuff, where types
are inferred, and which I didn't dare to exercise.)

So if it breaks there, I would rather want to see it and find out why.
(The testsuite did not hit it).  But it might be superfluous.


Otherwise, OK

Jerry



Pushed as r16-914-g787a8dec1acedf.

Thanks for the review!

Harald





Disable 'pass_nrv' for offloading compilation [PR119835] (was: [PATCH] Verify 'GIMPLE_RETURN' vs. 'RESULT_DECL' if 'aggregate_value_p' [PR119835])

2025-05-27 Thread Thomas Schwinge
Hi!

On 2025-05-23T17:01:31+0200, Richard Biener  wrote:
> Am 23.05.2025 um 16:49 schrieb Thomas Schwinge :
>> This fell out of me looking into PR119835.  This doesn't resolve the 
>> underlying
>> issue, but instead of failing GIMPLE semantics verification just by chance in
>> the 'GIMPLE pass: nrv' context, it makes the issue observable generally.
>> (... thereby regressing a small number of offloading test cases where host 
>> vs.
>> offload compilers disagree on 'aggregate_value_p' for functions that return
>> aggregate types.)
>> 
>> This cross-references just the three places in the code that I ran into;
>> likely there are more?
>> 
>> No regressions for powerpc64le-unknown-linux-gnu, x86_64-pc-linux-gnu 
>> bootstrap
>> and 'make check' (without offloading configured).
>
> I think this is a step in the wrong direction in absence of quoting the wrong 
> thing that happens downstream when we violate this (an assert does not 
> qualify).  ESP. When at the same time we allow the actual thing returned to 
> be a register (aka SSA name)

ACK; you certainly understand GIMPLE and RTL expansion semantics better
than I do.  ;-)

You're also implicitly telling me that 
"'GIMPLE_RETURN' vs. 'RESULT_DECL' if 'aggregate_value_p'" isn't actually
a GIMPLE semantics invariant, thanks.  I conclude that in case that this
"invariant" is violated, that's not a problem for RTL expansion of
'GIMPLE_RETURN', which is then handled like all the other cases where
"we are not returning the current function's RESULT_DECL".

I'm not sure whether just disabling the 'assert' in
'gcc/tree-nrv.cc:pass_nrv::execute' is conceptually right (or may
potentially drive that pass into an inconsistent state), and as we of
course intend to eventually fix this issue properly (thanks for your
ideas in PR119835!), so for now, I propose to simply
"Disable 'pass_nrv' for offloading compilation [PR119835]", see attached.
Any comments before I push that?


Grüße
 Thomas


>From d94bb0a7f45ef102c4d44fe1a1eedd1eef041c21 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 27 May 2025 16:02:05 +0200
Subject: [PATCH] Disable 'pass_nrv' for offloading compilation [PR119835]

... to avoid running into ICEs per PR119835, until that's resolved properly.

	PR middle-end/119835
	gcc/
	* tree-nrv.cc (pass_nrv::gate) [ACCEL_COMPILER]: 'return false;'.
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c:
	'#pragma GCC optimize "-fno-inline"'.
	* testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New.
	* testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust.
---
 gcc/tree-nrv.cc| 10 +-
 .../libgomp.c-c++-common/target-abi-struct-1-O0.c  |  2 +-
 .../libgomp.c-c++-common/target-abi-struct-1.c |  1 +
 .../testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c |  6 +-
 4 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c

diff --git a/gcc/tree-nrv.cc b/gcc/tree-nrv.cc
index 180ce39de4c..e78e4b7e56b 100644
--- a/gcc/tree-nrv.cc
+++ b/gcc/tree-nrv.cc
@@ -125,7 +125,15 @@ public:
   {}
 
   /* opt_pass methods: */
-  bool gate (function *) final override { return optimize > 0; }
+  bool gate (function *) final override
+  {
+#ifdef ACCEL_COMPILER
+/* PR119835 */
+return false;
+#else
+return optimize > 0;
+#endif
+  }
 
   unsigned int execute (function *) final override;
 
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
index 35ec75d648d..9bf949a1f06 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
@@ -1,3 +1,3 @@
 /* { dg-additional-options -O0 } */
 
-#include "../libgomp.oacc-c-c++-common/abi-struct-1.c"
+#include "target-abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
new file mode 100644
index 000..d9268af55cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
@@ -0,0 +1 @@
+#include "../libgomp.oacc-c-c++-common/abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
index 80786555fe2..4b541711f36 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
@@ -1,6 +1,10 @@
 /* Inspired by 'gcc.target/nvptx/abi-struct-arg.c', 'gcc.target/nvptx/abi-struct-ret.c'.  */
 
-/* See also '../libgomp.c-c++-common/target-abi-struct-1-O0.c'.  */
+/* See also '../libgomp.c-c++-common/target-abi-struct-1.c'.  */
+
+/* To exercise PR119835 (if optimizations enabled): disable inlining, so that
+   GIMPLE passes still see the functions that return aggregate types.  */
+#pragma GCC optimize "-fno-inline

[PATCH v2 0/3] vect: Use strided loads for VMAT_STRIDED_SLP.

2025-05-27 Thread Robin Dapp
The first patch makes SLP paths unreachable and the second one removes those 
entirely.  The third patch does the actual strided-load work.

Bootstrapped and regtested on x86 and aarch64.
Regtested on rv64gcv_zvl512b.

Robin Dapp (3):
  vect: Make non-SLP paths unreachable in strided slp/elementwise.
  vect: Remove non-SLP paths in strided slp/elementwise.
  vect: Use strided loads for VMAT_STRIDED_SLP.

 gcc/internal-fn.cc|  21 ++
 gcc/internal-fn.h |   2 +
 .../gcc.target/riscv/rvv/autovec/pr118019-2.c |  51 
 gcc/tree-vect-stmts.cc| 219 ++
 4 files changed, 248 insertions(+), 45 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c

-- 
2.49.0



Re: [PATCH] fortran: add constant input support for trig functions with half-revolutions

2025-05-27 Thread Tobias Burnus

Yuao Ma wrote:

PR113152


If you run your patch through

./contrib/gcc-changelog/git_email.py 
0001-fortran-add-constant-input-support-for-trig-function.patch

you will notice that the PR is not recognized. The format as mentioned before is "PR 
component/number". Namely:

"PR fortran/113152"


gcc/fortran/ChangeLog:

* gfortran.h (enum gfc_isym_id): Add new enum.
* intrinsic.cc (add_functions): Register new intrinsics. Reformatted 
certain
lines that utilize gfc_resolve_trig{2}.


The second part is not what you are doing, you are actually changing the 
call from gfc_resolve_trigd{,2} to gfc_resolve_trig{,2}.



+ gfc_error ("If first argument of ATAN2PI at %L is zero, then the " + 
"second argument must not be zero", + &y->where);


I am a non-native speaker, but I think there is a "the" missing before 
"first".


-! Former ICE when simplifying asind, plus wrong function name in 
error message -real, parameter :: d = asind(1.1) ! { dg-error 
"Argument of ASIND at.*must be between -1 and 1" } -print *, d +real, 
parameter :: dacos = acosd(1.1) ! { dg-error "Argument of ACOSD at" } 


BTW: If you have '(1)', you need to escape it with '\\(1\\)' or as the 
(...) don't matter, just use '.1.' as pattern. For '[...]' you need to 
make sure that [...] is not read as pattern range (such as '[a-z]'), 
i.e. use '\\\[-1, 1\\\]' (albeit it also works with only two \\).


* * *


+! { dg-options "-std=f2018" }
+! { dg-do compile }
+
+real, parameter :: piacos = acospi(0.0) ! { dg-error "Function 'acospi' in 
initialization expression at" }
+print *, piacos


BTW: You could also use "intrinsic :: acospi" - which tells the compiler 
that the function is supposed to be an intrinsic.


Otherwise, it looks also good to me.

Tobias



Re: [PATCH] libgcc: Add DPD support + fix big-endian support of _BitInt <-> dfp conversions

2025-05-27 Thread Joseph Myers
On Tue, 20 May 2025, Jakub Jelinek wrote:

> Tested on x86_64-linux, i686-linux and s390x-linux with
> make check-gcc dfp.exp
> ok for trunk?

OK.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH RFA (diagnostic)] c++: modules and #pragma diagnostic

2025-05-27 Thread Patrick Palka
On Tue, 27 May 2025, David Malcolm wrote:

> On Fri, 2025-05-23 at 16:58 -0400, Jason Merrill wrote:
> > On 4/14/25 9:57 AM, Jason Merrill wrote:
> > > On 1/9/25 10:00 PM, Jason Merrill wrote:
> > > > Tested x86_64-pc-linux-gnu.  Is the diagnostic.h change OK for
> > > > trunk?
> > > 
> > > Ping?
> > 
> > Ping.
> 
> Sorry for the delay in responding; comments below...
> 
> > 
> > > > -- 8< --
> > > > 
> > > > To respect the #pragma diagnostic lines in libstdc++ headers when
> > > > compiling
> > > > with module std, we need to represent them in the module.
> > > > 
> > > > I think it's reasonable to make module_state a friend of
> > > > diagnostic_option_classifier to allow direct access to the data. 
> > > > This 
> > > > is a
> > > > different approach from how Jakub made PCH streaming members of
> > > > diagnostic_option_classifier, but it seems to me that modules
> > > > handling
> > > > belongs in module.cc.
> 
> Putting it in module.cc looks good to me, though perhaps it should be
> just a friend of diagnostic_option_classifier but not of
> diagnostic_context?  Could the functions take a
> diagnostic_option_classifier rather than a diagnostic_context? 
> diagnostic_context is something of a "big blob" of a class.
> 
> [...snip...]
> 
> > > > diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> > > > index 78fb21dc22f..49c9c092163 100644
> > > > --- a/gcc/cp/module.cc
> > > > +++ b/gcc/cp/module.cc
> 
> [...snip...]
> 
> > > > @@ -17637,6 +17640,78 @@ module_state::write_ordinary_maps (elf_out 
> > > > *to, range_t &info,
> > > >     dump.outdent ();
> > > >   }
> > > > +/* Write out any #pragma GCC diagnostic info to the .dgc section.  */
> > > > +
> > > > +void
> > > > +module_state::write_diagnostic_classification (elf_out *to,
> > > > +   diagnostic_context *dc,
> > > > +   unsigned *crc_p)
> > > > +{
> > > > +  auto &changes = dc->m_option_classifier.m_classification_history;
> > > > +
> > > > +  dump () && dump ("Writing diagnostic change locations");
> > > > +  dump.indent ();
> > > > +
> > > > +  bytes_out sec (to);
> > > > +  if (sec.streaming_p ())
> > > > +    sec.begin ();
> > > > +
> > > > +  unsigned len = changes.length ();
> > > > +  dump () && dump ("Diagnostic changes: %u", len);
> > > > +  if (sec.streaming_p ())
> > > > +    sec.u (len);
> > > > +
> > > > +  for (const auto &c: changes)
> > > > +    {
> > > > +  write_location (sec, c.location);
> > > > +  if (sec.streaming_p ())
> > > > +    {
> > > > +  sec.u (c.option);
> > > > +  sec.u (c.kind);
> > > > +    }
> > > > +    }
> 
> I confess I don't fully understand the module code yet - in particular
> the streaming vs non-streaming distinction.  What are the "if
> (sec.streaming_p ())" guards doing here?  It looks it can be false if
> the param "elf_out *to" is null (can that happen?), and if it's false,
> then this function essentially becomes a no-op.  Is that what we want?

When streaming_p is false then we're not serializing, we're just doing
dependency analysis, the same walking code is used for both.  The trees_out
class definition has the following comment:

/* The walk is used for three similar purposes:

  1. The initial scan for dependencies.
  2. Once dependencies have been found, ordering them.
  3. Writing dependencies to file (streaming_p).

 For cases where it matters, these accessers can be used to determine
 which state we're in.  */

But it seems we don't need to check it here, since streaming_p will always
be true at the current call sites of write_diagnostic_classification?

> 
> 
> > > > +
> > > > +  if (sec.streaming_p ())
> > > > +    sec.end (to, to->name (MOD_SNAME_PFX ".dgc"), crc_p);
> > > > +  dump.outdent ();
> > > > +}
> > > > +
> > > > +/* Read any #pragma GCC diagnostic info from the .dgc section.  */
> > > > +
> > > > +bool
> > > > +module_state::read_diagnostic_classification (diagnostic_context *dc)
> > > > +{
> > > > +  bytes_in sec;
> > > > +
> > > > +  if (!sec.begin (loc, from (), MOD_SNAME_PFX ".dgc"))
> > > > +    return false;
> > > > +
> > > > +  dump () && dump ("Reading diagnostic change locations");
> > > > +  dump.indent ();
> > > > +
> > > > +  unsigned len = sec.u ();
> > > > +  dump () && dump ("Diagnostic changes: %u", len);
> > > > +
> > > > +  auto &changes = dc->m_option_classifier.m_classification_history;
> > > > +  unsigned offset = changes.length ();
> > > > +  changes.reserve (len);
> > > > +  for (unsigned i = 0; i < len; ++i)
> > > > +    {
> > > > +  location_t loc = read_location (sec);
> > > > +  int opt = sec.u ();
> > > > +  diagnostic_t kind = (diagnostic_t) sec.u ();
> > > > +  if (kind == DK_POP)
> > > > +    opt += offset;
> > > > +  changes.quick_push ({ loc, opt, kind });
> > > > +    }
> > > > +
> > > > +  dump.outdent ();
> > > > +  if (!sec.end (from ()))
> > > > +    return false;
> > > > +
> > > > +  return true;
> > > > +}
> > > > +
> > > >   void
> >

Re: [PATCH RFA (diagnostic)] c++: modules and #pragma diagnostic

2025-05-27 Thread Jason Merrill

On 5/27/25 4:47 PM, Jason Merrill wrote:

On 5/27/25 1:33 PM, David Malcolm wrote:

On Fri, 2025-05-23 at 16:58 -0400, Jason Merrill wrote:

On 4/14/25 9:57 AM, Jason Merrill wrote:

On 1/9/25 10:00 PM, Jason Merrill wrote:

Tested x86_64-pc-linux-gnu.  Is the diagnostic.h change OK for
trunk?


Ping?


Ping.


Sorry for the delay in responding; comments below...




-- 8< --

To respect the #pragma diagnostic lines in libstdc++ headers when
compiling
with module std, we need to represent them in the module.

I think it's reasonable to make module_state a friend of
diagnostic_option_classifier to allow direct access to the data.
This
is a
different approach from how Jakub made PCH streaming members of
diagnostic_option_classifier, but it seems to me that modules
handling
belongs in module.cc.


Putting it in module.cc looks good to me, though perhaps it should be
just a friend of diagnostic_option_classifier but not of
diagnostic_context?  Could the functions take a
diagnostic_option_classifier rather than a diagnostic_context?
diagnostic_context is something of a "big blob" of a class.


The friend in diagnostic_context is to be able to name 
m_option_classifier.  We could instead make that member public?



[...snip...]

+  bytes_out sec (to);
+  if (sec.streaming_p ())
+    sec.begin ();


I confess I don't fully understand the module code yet - in particular
the streaming vs non-streaming distinction.  What are the "if
(sec.streaming_p ())" guards doing here?  It looks it can be false if
the param "elf_out *to" is null (can that happen?), and if it's false,
then this function essentially becomes a no-op.  Is that what we want?


Hmm, perhaps an early if (!sec.streaming_p ()) return would be simpler, 
I'll try that.


That breaks, apparently because we need the early calls to 
write_location to record that we need to represent these locations.


Jason



Re: [PATCH RFC] c++: modules and using-directives

2025-05-27 Thread Nathaniel Shead
On Wed, May 28, 2025 at 12:24:54AM -0400, Jason Merrill wrote:
> On 11/27/24 11:17 AM, Jason Merrill wrote:
> > On 11/27/24 1:43 AM, Nathaniel Shead wrote:
> > > On Wed, Nov 27, 2024 at 12:03:23AM -0500, Jason Merrill wrote:
> > > > Tested x86_64-pc-linux-gnu.
> > > > 
> > > > Does this approach make sense to you?  Any other ideas?
> > > > 
> > > > -- 8< --
> > > > 
> > > > We weren't representing 'using namespace' at all in modules, which broke
> > > > some of the  literals tests.
> > > > 
> > > > I experimented with various approaches to representing them, and
> > > > ended up
> > > > with emitting them as a pseudo-binding for "using", which as a
> > > > keyword can't
> > > > have any real bindings.  Then reading this pseudo-binding adds it to
> > > > using_directives instead of the usual handling.
> > > > 
> > > > +    /* ??? should we try to distinguish whether the using-directive
> > > > +   is purview/exported?  */
> > > > +    add_binding_entity (used, WMB_Flags(WMB_Using|WMB_Purview), &data);
> > > 
> > > I don't think the standard is entirely clear about how using-directives
> > > should interact with modules; they don't declare names, and before P2615
> > > were in fact forbidden from being explicitly exported, which implies to
> > > me that the intention was for them to not be considered outside of the
> > > declaring module.
> > 
> > P2615 is certainly clear about allowing them.  Given that, I think the
> > general rules of [module.interface] apply, so it should be found by name
> > lookup in an importing TU.
> > 
> > > That said, if we were to do this I would think the logic should match
> > > what we do for any other name, in terms of requiring it to be explicitly
> > > exported/purview as required; in particular, I would hope that something
> > > like this doesn't happen:
> > > 
> > >    // m.cpp
> > >    export module M;
> > >    using namespace std;
> > > 
> > >    // test.cpp
> > >    #include 
> > >    import M;
> > >    int main() {
> > >  cout << "hello\n";  // using-directive "inherited" from M?
> > >    }
> > 
> > Good point, I have more work to do.
> > 
> > I think that since ADL doesn't consider using-directives, we only need
> > to represent the exported ones?
> > 
> > > >   name_lookup::search_namespace_only (tree scope)
> > > >   {
> > > >     bool found = false;
> > > > +  if (modules_p () && name && !id_equal (name, "using"))
> > > > +    {
> > > > +  name_lookup u (get_identifier ("using"));
> > > > +  u.search_namespace_only (scope);
> > > > +    }
> > > 
> > > Could we just add to the list of using-directives within read_namespaces
> > > perhaps?  Probably as a second pass after all namespaces have been
> > > created so that we don't run into issues with circular directives.
> > > That would mean we wouldn't need to do this in every lookup.
> > 
> > That was my first thought, but I had trouble figuring out how.  Perhaps
> > I'll try again.
> 
> Done thus.  Any thoughts on this version?

LGTM!

Nathaniel


[PATCH v2] c-decl: Add -Wshadow=used [PR92386]

2025-05-27 Thread Matthew Sotoudeh
This is a small patch to address
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92386 updated thanks to Andrew's
feedback.

This patch implements "-Wshadow=used," which throws a warning for shadowed
variables only if the shadowed variable was previously used in the same scope
where it is being shadowed.

This type of shadowing is particularly bad because it causes GCC to output
incorrect/misleading debug information. The Bugzilla report has a minimal
example and explains why a direct fix was not desirable (debug info size would
blow up).

I tested on x86-64-linux-gnu (Debian 12) with make -k check and saw no
testsuite regressions using ./contrib/compare_tests. I also built Git and Linux
with the flag: Git had 2 benign true positives while Linux had 5 benign true
positives.

Any comments would be much appreciated; I'm new to the codebase.

Changes from v1:

* Implement as part of the normal variable shadowing warnings rather
  than during gimplification.

PR debug/92386 - gdb issue with variable-shadowing

PR debug/92386

gcc/c/ChangeLog:

* c-decl.cc (find_var_usage): helper to walk a tree looking for uses of
  a specific variable.
  (warn_if_shadowing): when Wshadow=used is passed, throw a warning if
  the variable being shadowed is used earlier in the same scope.

gcc/ChangeLog:

* common.opt: Added Wshadow=used option.

gcc/testsuite/ChangeLog:

* gcc.dg/warn-use-before-shadow.c: New test.
---
 gcc/c/c-decl.cc   | 43 ++-
 gcc/common.opt|  7 +++
 gcc/testsuite/gcc.dg/warn-use-before-shadow.c | 28 
 3 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/warn-use-before-shadow.c

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 8c420f22976..8a98300423b 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -3205,6 +3205,20 @@ duplicate_decls (tree newdecl, tree olddecl)
 }
 
 
+/* Helper used by warn_if_shadowing to search for uses of a specific variable
+   in a tree.  */
+tree find_var_usage (tree *px, int *walk_subtrees, void *data)
+{
+  if (*px == (tree) data)
+return *px;
+  if (TREE_CODE (*px) == DECL_EXPR)
+{
+  tree di = DECL_INITIAL ( DECL_EXPR_DECL (*px));
+  return walk_tree (&di, find_var_usage, data, NULL);
+}
+  return 0;
+}
+
 /* Check whether decl-node NEW_DECL shadows an existing declaration.  */
 static void
 warn_if_shadowing (tree new_decl)
@@ -3214,7 +3228,8 @@ warn_if_shadowing (tree new_decl)
   /* Shadow warnings wanted?  */
   if (!(warn_shadow
 || warn_shadow_local
-|| warn_shadow_compatible_local)
+|| warn_shadow_compatible_local
+|| warn_shadow_used)
   /* No shadow warnings for internally generated vars.  */
   || DECL_IS_UNDECLARED_BUILTIN (new_decl))
 return;
@@ -3298,6 +3313,32 @@ warn_if_shadowing (tree new_decl)
if (warned)
  inform (DECL_SOURCE_LOCATION (old_decl),
  "shadowed declaration is here");
+/* If we haven't issued any other shadowing warning for this
+   declaration, but -Wshadow=used was passed, issue a warning if the
+   now-shadowed variable was used earlier in this scope.  */
+else if (warn_shadow_used && building_stmt_list_p ())
+  {
+for (tree_stmt_iterator tsi = tsi_start (cur_stmt_list);
+ !tsi_end_p (tsi); tsi_next (&tsi))
+  {
+tree stmt = tsi_stmt (tsi),
+ found = walk_tree (&stmt, find_var_usage, old_decl, NULL);
+if (found)
+  {
+if (warning_at (EXPR_LOCATION (stmt), OPT_Wshadow_used,
+"variable %qD is used before being "
+"shadowed", found))
+  {
+inform (DECL_SOURCE_LOCATION (new_decl),
+"shadowing declaration is here");
+inform (DECL_SOURCE_LOCATION (old_decl),
+"shadowed declaration being used "
+"instead is here");
+  }
+break;
+  }
+  }
+  }
 
break;
   }
diff --git a/gcc/common.opt b/gcc/common.opt
index e3fa0dacec4..aac3e652ef1 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -751,6 +751,13 @@ Warn when one local variable shadows another local 
variable or parameter of comp
 Wshadow-compatible-local
 Common Warning Undocumented Alias(Wshadow=compatible-local)
 
+Wshadow=used
+Common Var(warn_shadow_used) Warning EnabledBy(Wshadow=used)
+Warn if a variable from an outer scope is used before it is shadowed in the 
current scope.
+
+Wshadow-used
+Common Warning Undocumented Alias(Wshadow=used)
+
 Wstack-protector
 Common Var(warn_stack_protect) Warning
 Warn when not iss

Re: [PATCH RFC] c++: modules and using-directives

2025-05-27 Thread Jason Merrill

On 11/27/24 11:17 AM, Jason Merrill wrote:

On 11/27/24 1:43 AM, Nathaniel Shead wrote:

On Wed, Nov 27, 2024 at 12:03:23AM -0500, Jason Merrill wrote:

Tested x86_64-pc-linux-gnu.

Does this approach make sense to you?  Any other ideas?

-- 8< --

We weren't representing 'using namespace' at all in modules, which broke
some of the  literals tests.

I experimented with various approaches to representing them, and 
ended up
with emitting them as a pseudo-binding for "using", which as a 
keyword can't

have any real bindings.  Then reading this pseudo-binding adds it to
using_directives instead of the usual handling.

+    /* ??? should we try to distinguish whether the using-directive
+   is purview/exported?  */
+    add_binding_entity (used, WMB_Flags(WMB_Using|WMB_Purview), &data);


I don't think the standard is entirely clear about how using-directives
should interact with modules; they don't declare names, and before P2615
were in fact forbidden from being explicitly exported, which implies to
me that the intention was for them to not be considered outside of the
declaring module.


P2615 is certainly clear about allowing them.  Given that, I think the 
general rules of [module.interface] apply, so it should be found by name 
lookup in an importing TU.



That said, if we were to do this I would think the logic should match
what we do for any other name, in terms of requiring it to be explicitly
exported/purview as required; in particular, I would hope that something
like this doesn't happen:

   // m.cpp
   export module M;
   using namespace std;

   // test.cpp
   #include 
   import M;
   int main() {
 cout << "hello\n";  // using-directive "inherited" from M?
   }


Good point, I have more work to do.

I think that since ADL doesn't consider using-directives, we only need 
to represent the exported ones?



  name_lookup::search_namespace_only (tree scope)
  {
    bool found = false;
+  if (modules_p () && name && !id_equal (name, "using"))
+    {
+  name_lookup u (get_identifier ("using"));
+  u.search_namespace_only (scope);
+    }


Could we just add to the list of using-directives within read_namespaces
perhaps?  Probably as a second pass after all namespaces have been
created so that we don't run into issues with circular directives.
That would mean we wouldn't need to do this in every lookup.


That was my first thought, but I had trouble figuring out how.  Perhaps 
I'll try again.


Done thus.  Any thoughts on this version?From e4711055f683faa2ae747507dfe8b1d51fe26760 Mon Sep 17 00:00:00 2001
From: Jason Merrill 
Date: Wed, 20 Nov 2024 23:46:54 +0100
Subject: [PATCH] c++: modules and using-directives
To: gcc-patches@gcc.gnu.org

We weren't representing 'using namespace' at all in modules, which broke
some of the  literals tests.

This only represents exported using-declarations; others should be
irrelevant to importers, as any name lookup in the imported module that
would have cared about them was done while compiling the header unit.

I experimented with various approaches to representing them; this patch
handles them in read/write_namespaces, after the namespaces themselves.  I
spent a while pondering how to deal with the depset code in order to connect
them, but then realized it would be simpler to refer to them based on their
index in the array of namespaces.

Any using-directives from an indirect import are ignored, so in an export
import, any imported using-directives are exported again.

gcc/cp/ChangeLog:

	* module.cc (module_state::write_namespaces): Write
	using-directives.
	(module_state::read_namespaces): And read them.
	* name-lookup.cc (add_using_namespace): Add overload.  Build a
	USING_DECL for modules.
	(name_lookup::search_usings, name_lookup::queue_usings)
	(using_directives_contain_std_p): Strip the USING_DECL.
	* name-lookup.h: Declare it.
	* parser.cc (cp_parser_import_declaration): Set MK_EXPORTING
	for export import.

gcc/testsuite/ChangeLog:

	* g++.dg/modules/namespace-8_a.C: New test.
	* g++.dg/modules/namespace-8_b.C: New test.
	* g++.dg/modules/namespace-9_a.C: New test.
	* g++.dg/modules/namespace-9_b.C: New test.
	* g++.dg/modules/namespace-10_a.C: New test.
	* g++.dg/modules/namespace-10_b.C: New test.
	* g++.dg/modules/namespace-10_c.C: New test.
	* g++.dg/modules/namespace-11_a.C: New test.
	* g++.dg/modules/namespace-11_b.C: New test.
	* g++.dg/modules/namespace-11_c.C: New test.
---
 gcc/cp/name-lookup.h  |  1 +
 gcc/cp/module.cc  | 71 +++
 gcc/cp/name-lookup.cc | 24 ++-
 gcc/cp/parser.cc  |  6 ++
 gcc/testsuite/g++.dg/modules/namespace-10_a.C | 11 +++
 gcc/testsuite/g++.dg/modules/namespace-10_b.C |  9 +++
 gcc/testsuite/g++.dg/modules/namespace-10_c.C |  6 ++
 gcc/testsuite/g++.dg/modules/namespace-11_a.C | 11 +++
 gcc/testsuite/g++.dg/modules/namespace-11_b.C |  9 +++
 gcc/testsuite/g++.dg/modules/namespace-11_c.

Re: [PATCH] libstdc++: Fix flat_map::operator[] for const lvalue keys [PR120432]

2025-05-27 Thread Tomasz Kaminski
On Tue, May 27, 2025 at 7:08 PM Patrick Palka  wrote:

> Tested on x86_64-pc-linux-gnu, does this look OK for trunk/15?
>
> The 'volatile' issue from that PR Will be fixed in a separate patch as
> operator[] isn't the only operation that's affected.
>
> -- >8 --
>
> The const lvalue operator[] overload wasn't properly forwarding the key
> type to the generic overload.
>
> PR libstdc++/120432
>
> libstdc++-v3/ChangeLog:
>
> * include/std/flat_map (_Flat_map_base::operator[]): Correct
> forwarding from the const lvalue key overload.
> * testsuite/23_containers/flat_map/1.cc (test08): New test.
> * testsuite/23_containers/flat_multimap/1.cc (test08): New test.
> ---
>  libstdc++-v3/include/std/flat_map  |  2 +-
>  libstdc++-v3/testsuite/23_containers/flat_map/1.cc | 10 ++
>  .../testsuite/23_containers/flat_multimap/1.cc | 10 ++
>  3 files changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/flat_map
> b/libstdc++-v3/include/std/flat_map
> index 6593988d213c..4d9ced1e8191 100644
> --- a/libstdc++-v3/include/std/flat_map
> +++ b/libstdc++-v3/include/std/flat_map
> @@ -1142,7 +1142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>// element access
>mapped_type&
>operator[](const key_type& __x)
> -  { return operator[](__x); }
> +  { return operator[](__x); }
>
Given that the operator[] that we are forading to is implemented as:
{ return try_emplace(std::forward<_Key2>(__x)).first->second; }
I would just call try_emplace directly:

>
>mapped_type&
>operator[](key_type&& __x)
> diff --git a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> index a9690208b09f..2af516410279 100644
> --- a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> +++ b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
> @@ -253,6 +253,15 @@ test07()
>VERIFY( std::ranges::equal(m, (std::pair[]){{3,4}}) );
>  }
>
> +void
> +test08()
> +{
> +  // PR libstdc++/120432 - flat_map operator[] is broken for const lvalue
> keys
> +  std::flat_map m;
> +  const int k = 42;
> +  m[k] = 0;
> +}
> +
>  int
>  main()
>  {
> @@ -266,4 +275,5 @@ main()
>test05();
>test06();
>test07();
> +  test08();
>  }
> diff --git a/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc
> b/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc
> index 1c5c9a88ab6b..638b269011d4 100644
> --- a/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc
> +++ b/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc
> @@ -231,6 +231,15 @@ test07()
>VERIFY( std::ranges::equal(m, (std::pair[]){{3,4},{3,3}}) );
>  }
>
> +void
> +test08()
> +{
> +  // PR libstdc++/120432 - flat_map operator[] is broken for const lvalue
> keys
> +  std::flat_multimap m;
> +  const int k = 42;
> +  m[k] = 0;
> +}
> +
>  int
>  main()
>  {
> @@ -244,4 +253,5 @@ main()
>test05();
>test06();
>test07();
> +  test08();
>  }
> --
> 2.49.0.654.g845c48a16a
>
>


Re: [PATCH] Fix IPA-SRA issue with reverse SSO on specific pattern

2025-05-27 Thread Martin Jambor
Hi,

On Wed, May 21 2025, Eric Botcazou wrote:
> Hi,
>
> IPA-SRA generally works fine in the presence of reverse Scalar_Storage_Order 
> by propagating the relevant flag onto the newly generated MEM_REFs.  However
> we have been recently faced with a specific Ada pattern that it doesn't 
> handle 
> correctly: 'Valid applied to a floating-point component of an aggregate type 
> with reverse Scalar_Storage_Order.
>
> The attribute is implemented by a call to a specific routine of the runtime 
> that expects a pointer to the object so, in the case of a component with 
> reverse SSO, the compiler first loads it from the aggregate to get back the 
> native storage order, but it does the load using an array of bytes instead of 
> the floating-point type to prevent the FPU from fiddling with the value, 
> which 
> yields in the .original dump file:
>
>   *(character[1:4] *) &F2b = VIEW_CONVERT_EXPR(item.f);
>
> Of course that's a bit convoluted, but it does not seem that another method 
> would be simpler or even work, and using VIEW_CONVERT_EXPR to toggle the SSO 
> is supposed to be supported in any case (unlike aliasing or type punning).
>
> The attached patch makes it work.  While the call to storage_order_barrier_p 
> from IPA-SRA is quite natural (the regular SRA has it too), the tweak to the
> predicate itself is needed to handle the scalar->aggregate conversion, which 
> is admittedly awkward but again without clear alternative.
>
> Tested on x86-64/Linux, OK for the mainline and 15 branch?  Technically, this 
> is a regression in GCC 10.x and later, but the pattern is so specific, even 
> in 
> Ada, that patching earlier branches does not seem worth the hassle.
>
>
> 2025-05-21  Eric Botcazou  
>
>   * ipa-sra.cc (scan_expr_access): Also disqualify storage order
>   barriers from splitting.

The IPA-SRA change is OK.

>   * tree.h (storage_order_barrier_p): Also return false if the
>   operand of the VIEW_CONVERT_EXPR has reverse storage order.

I cannot approve this one (but FWIW it looks OKish to me too).

Thanks,

Martin


>
>
> 2025-05-21  Eric Botcazou  
>
>   * gnat.dg/sso19.adb: New test.
>   * gnat.dg/sso19_pkg.ads, gnat.dg/sso19_pkg.adb: New helper.
>
> -- 
> Eric Botcazou
> diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
> index 88bfae9502c..6e6cf895988 100644
> --- a/gcc/ipa-sra.cc
> +++ b/gcc/ipa-sra.cc
> @@ -1848,6 +1848,12 @@ scan_expr_access (tree expr, gimple *stmt, 
> isra_scan_context ctx,
>if (!desc || !desc->split_candidate)
>  return;
>  
> +  if (storage_order_barrier_p (expr))
> +{
> +  disqualify_split_candidate (desc, "Encountered a storage order 
> barrier.");
> +  return;
> +}
> +
>if (!poffset.is_constant (&offset)
>|| !psize.is_constant (&size)
>|| !pmax_size.is_constant (&max_size))
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 99f26177628..1e41316b4c9 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -5499,7 +5499,7 @@ storage_order_barrier_p (const_tree t)
>&& TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (op)))
>  return true;
>  
> -  return false;
> +  return reverse_storage_order_for_component_p (op);
>  }
>  
>  /* Given a DECL or TYPE, return the scope in which it was declared, or


Re: [PATCH RFA (diagnostic)] c++: modules and #pragma diagnostic

2025-05-27 Thread David Malcolm
On Fri, 2025-05-23 at 16:58 -0400, Jason Merrill wrote:
> On 4/14/25 9:57 AM, Jason Merrill wrote:
> > On 1/9/25 10:00 PM, Jason Merrill wrote:
> > > Tested x86_64-pc-linux-gnu.  Is the diagnostic.h change OK for
> > > trunk?
> > 
> > Ping?
> 
> Ping.

Sorry for the delay in responding; comments below...

> 
> > > -- 8< --
> > > 
> > > To respect the #pragma diagnostic lines in libstdc++ headers when
> > > compiling
> > > with module std, we need to represent them in the module.
> > > 
> > > I think it's reasonable to make module_state a friend of
> > > diagnostic_option_classifier to allow direct access to the data. 
> > > This 
> > > is a
> > > different approach from how Jakub made PCH streaming members of
> > > diagnostic_option_classifier, but it seems to me that modules
> > > handling
> > > belongs in module.cc.

Putting it in module.cc looks good to me, though perhaps it should be
just a friend of diagnostic_option_classifier but not of
diagnostic_context?  Could the functions take a
diagnostic_option_classifier rather than a diagnostic_context? 
diagnostic_context is something of a "big blob" of a class.

[...snip...]

> > > diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> > > index 78fb21dc22f..49c9c092163 100644
> > > --- a/gcc/cp/module.cc
> > > +++ b/gcc/cp/module.cc

[...snip...]

> > > @@ -17637,6 +17640,78 @@ module_state::write_ordinary_maps (elf_out 
> > > *to, range_t &info,
> > >     dump.outdent ();
> > >   }
> > > +/* Write out any #pragma GCC diagnostic info to the .dgc section.  */
> > > +
> > > +void
> > > +module_state::write_diagnostic_classification (elf_out *to,
> > > +   diagnostic_context *dc,
> > > +   unsigned *crc_p)
> > > +{
> > > +  auto &changes = dc->m_option_classifier.m_classification_history;
> > > +
> > > +  dump () && dump ("Writing diagnostic change locations");
> > > +  dump.indent ();
> > > +
> > > +  bytes_out sec (to);
> > > +  if (sec.streaming_p ())
> > > +    sec.begin ();
> > > +
> > > +  unsigned len = changes.length ();
> > > +  dump () && dump ("Diagnostic changes: %u", len);
> > > +  if (sec.streaming_p ())
> > > +    sec.u (len);
> > > +
> > > +  for (const auto &c: changes)
> > > +    {
> > > +  write_location (sec, c.location);
> > > +  if (sec.streaming_p ())
> > > +    {
> > > +  sec.u (c.option);
> > > +  sec.u (c.kind);
> > > +    }
> > > +    }

I confess I don't fully understand the module code yet - in particular
the streaming vs non-streaming distinction.  What are the "if
(sec.streaming_p ())" guards doing here?  It looks it can be false if
the param "elf_out *to" is null (can that happen?), and if it's false,
then this function essentially becomes a no-op.  Is that what we want?


> > > +
> > > +  if (sec.streaming_p ())
> > > +    sec.end (to, to->name (MOD_SNAME_PFX ".dgc"), crc_p);
> > > +  dump.outdent ();
> > > +}
> > > +
> > > +/* Read any #pragma GCC diagnostic info from the .dgc section.  */
> > > +
> > > +bool
> > > +module_state::read_diagnostic_classification (diagnostic_context *dc)
> > > +{
> > > +  bytes_in sec;
> > > +
> > > +  if (!sec.begin (loc, from (), MOD_SNAME_PFX ".dgc"))
> > > +    return false;
> > > +
> > > +  dump () && dump ("Reading diagnostic change locations");
> > > +  dump.indent ();
> > > +
> > > +  unsigned len = sec.u ();
> > > +  dump () && dump ("Diagnostic changes: %u", len);
> > > +
> > > +  auto &changes = dc->m_option_classifier.m_classification_history;
> > > +  unsigned offset = changes.length ();
> > > +  changes.reserve (len);
> > > +  for (unsigned i = 0; i < len; ++i)
> > > +    {
> > > +  location_t loc = read_location (sec);
> > > +  int opt = sec.u ();
> > > +  diagnostic_t kind = (diagnostic_t) sec.u ();
> > > +  if (kind == DK_POP)
> > > +    opt += offset;
> > > +  changes.quick_push ({ loc, opt, kind });
> > > +    }
> > > +
> > > +  dump.outdent ();
> > > +  if (!sec.end (from ()))
> > > +    return false;
> > > +
> > > +  return true;
> > > +}
> > > +
> > >   void
> > >   module_state::write_macro_maps (elf_out *to, range_t &info, unsigned 
> > > *crc_p)
> > >   {
> > > @@ -19231,6 +19306,8 @@ module_state::write_begin (elf_out *to, 
> > > cpp_reader *reader,
> > >     if (is_header ())
> > >   macros = prepare_macros (reader);
> > > +  write_diagnostic_classification (nullptr, global_dc, nullptr);
> > > +
> > >     config.num_imports = mod_hwm;
> > >     config.num_partitions = modules->length () - mod_hwm;
> > >     auto map_info = write_prepare_maps (&config, bool 
> > > (config.num_partitions));
> > > @@ -19372,7 +19449,10 @@ module_state::write_begin (elf_out *to, 
> > > cpp_reader *reader,
> > >     /* Write the line maps.  */
> > >     if (config.ordinary_locs)
> > > -    write_ordinary_maps (to, map_info, bool (config.num_partitions), 
> > > &crc);
> > > +    {
> > > +  write_ordinary_maps (to, map_info, bool 
> > > (config.num_partitions), &crc);
> > > +  write_diagnostic_classi

Remove dead code in auto-profile.cc

2025-05-27 Thread Jan Hubicka
Hi,
this code to track what locations were used when reading auto-fdo profile
seems dead since the initial commit. Removed thus.

Comitted as obvious.
Honza

gcc/ChangeLog:

* auto-profile.cc (function_instance::mark_annotated): Remove.
(function_instance::total_annotated_count): Remove.
(autofdo_source_profile::mark_annotated): Remove.
(afdo_set_bb_count): Do not mark annotated locations.
(afdo_annotate_cfg): Likewise.

diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 9966d9312e3..91d829908d2 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -151,7 +151,6 @@ public:
  Each inline stack should only be used to annotate IR once.
  This will be enforced when instruction-level discriminator
  is supported.  */
-  bool annotated;
 };
 
 /* operator< for "const char *".  */
@@ -242,9 +241,6 @@ public:
  MAP, return the total count for all inlined indirect calls.  */
   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
 
-  /* Sum of counts that is used during annotation.  */
-  gcov_type total_annotated_count () const;
-
   /* Mark LOC as annotated.  */
   void mark_annotated (location_t loc);
 
@@ -314,9 +310,6 @@ public:
  Return true if INFO is updated.  */
   bool update_inlined_ind_target (gcall *stmt, count_info *info);
 
-  /* Mark LOC as annotated.  */
-  void mark_annotated (location_t loc);
-
 private:
   /* Map from function_instance name index (in string_table) to
  function_instance.  */
@@ -578,17 +571,6 @@ function_instance::get_count_info (location_t loc, 
count_info *info) const
   return true;
 }
 
-/* Mark LOC as annotated.  */
-
-void
-function_instance::mark_annotated (location_t loc)
-{
-  position_count_map::iterator iter = pos_counts.find (loc);
-  if (iter == pos_counts.end ())
-return;
-  iter->second.annotated = true;
-}
-
 /* Read the inlined indirect call target profile for STMT and store it in
MAP, return the total count for all inlined indirect calls.  */
 
@@ -685,22 +667,6 @@ function_instance::read_function_instance 
(function_instance_stack *stack,
   return s;
 }
 
-/* Sum of counts that is used during annotation.  */
-
-gcov_type
-function_instance::total_annotated_count () const
-{
-  gcov_type ret = 0;
-  for (callsite_map::const_iterator iter = callsites.begin ();
-   iter != callsites.end (); ++iter)
-ret += iter->second->total_annotated_count ();
-  for (position_count_map::const_iterator iter = pos_counts.begin ();
-   iter != pos_counts.end (); ++iter)
-if (iter->second.annotated)
-  ret += iter->second.count;
-  return ret;
-}
-
 /* Member functions for autofdo_source_profile.  */
 
 autofdo_source_profile::~autofdo_source_profile ()
@@ -748,21 +714,6 @@ autofdo_source_profile::get_count_info (location_t 
gimple_loc,
   return s->get_count_info (stack[0].second, info);
 }
 
-/* Mark LOC as annotated.  */
-
-void
-autofdo_source_profile::mark_annotated (location_t loc)
-{
-  inline_stack stack;
-  get_inline_stack (loc, &stack);
-  if (stack.length () == 0)
-return;
-  function_instance *s = get_function_instance_by_inline_stack (stack);
-  if (s == NULL)
-return;
-  s->mark_annotated (stack[0].second);
-}
-
 /* Update value profile INFO for STMT from the inlined indirect callsite.
Return true if INFO is updated.  */
 
@@ -1118,8 +1069,6 @@ static bool
 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
 {
   gimple_stmt_iterator gsi;
-  edge e;
-  edge_iterator ei;
   gcov_type max_count = 0;
   bool has_annotated = false;
 
@@ -1172,20 +1121,6 @@ afdo_set_bb_count (basic_block bb, const stmt_set 
&promoted)
return false;
 }
 
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
-  for (gphi_iterator gpi = gsi_start_phis (bb);
-   !gsi_end_p (gpi);
-   gsi_next (&gpi))
-{
-  gphi *phi = gpi.phi ();
-  size_t i;
-  for (i = 0; i < gimple_phi_num_args (phi); i++)
-afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
-}
-  FOR_EACH_EDGE (e, ei, bb->succs)
-  afdo_source_profile->mark_annotated (e->goto_locus);
-
   bb->count = profile_count::from_gcov_type (max_count).afdo ();
   return true;
 }
@@ -1607,10 +1542,6 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
   = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
   set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
 }
-  afdo_source_profile->mark_annotated (
-  DECL_SOURCE_LOCATION (current_function_decl));
-  afdo_source_profile->mark_annotated (cfun->function_start_locus);
-  afdo_source_profile->mark_annotated (cfun->function_end_locus);
   if (max_count.nonzero_p())
 {
   /* Calculate, propagate count and probability information on CFG.  */


Re: [PATCH v25 0/3] c: Add _Countof and

2025-05-27 Thread Joseph Myers
Thanks, I've committed these patches, with additional commit message 
changes to reference PR117025 in the standard way for GCC so that Bugzilla 
picks up the commits automatically.

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH v25 0/3] c: Add _Countof and

2025-05-27 Thread Jakub Jelinek
On Tue, May 27, 2025 at 08:22:28PM +, Joseph Myers wrote:
> Thanks, I've committed these patches, with additional commit message 
> changes to reference PR117025 in the standard way for GCC so that Bugzilla 
> picks up the commits automatically.

--- a/gcc/c/c-parser.cc 

  
+++ b/gcc/c/c-parser.cc 

  
@@ -10649,6 +10649,10 @@ c_parser_sizeof_or_countof_expression (c_parser 
*parser, enum rid rid)  
 


  
   start = c_parser_peek_token (parser)->location;  

  


  
+  if (rid == RID_COUNTOF)  

  
+pedwarn_c23 (start, OPT_Wpedantic, 

  
+"ISO C does not support %qs before C23", op_name); 

  
+   

  
   c_parser_consume_token (parser); 

  
   c_inhibit_evaluation_warnings++; 

  
   if (rid == RID_COUNTOF)  

  

The C23 in there looks like pasto, should be C2Y.

Jakub



Re: [PATCH] doc: Correct the return type of float comparison

2025-05-27 Thread Joseph Myers
On Fri, 23 May 2025, Trevor Gross wrote:

> +Comparison functions return a CMPtype which is a signed integer of
> +target-depdent size.  Typically CMPtype will be word-sized, but other 
> backends
> +may override this with the TARGET_LIBGCC_CMP_RETURN_MODE hook.  Of note,
> +AArch64 uses an single-int as the return type, and AVR uses a quarter-int.

@code{CMPtype}, @code{TARGET_LIBGCC_CMP_RETURN_MODE}, and 
s/depdent/dependent/.

-- 
Joseph S. Myers
josmy...@redhat.com



Test suite failures.

2025-05-27 Thread Jerry D

After my last commit, I always rerun make check-fortran.

Now I see a bunch of fails. I reverted my patch locally and did a 
rebuild and I still see these. Heralds patch still in there.


No failures after reverting this:

commit r16-914-g787a8dec1acedf5561c8ee43bed0b3653fca150d
Author: Harald Anlauf 

As my daughter would say. It happens to the best of us.

Jerry

--- snip ---

FAIL: gfortran.dg/associate_68.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)

FAIL: gfortran.dg/associate_68.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)

FAIL: gfortran.dg/associate_65.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/coarray_sync.f90   -O  (test for excess errors)
FAIL: gfortran.dg/data_pointer_2.f90   -O  (test for excess errors)
FAIL: gfortran.dg/data_inquiry_ref.f90   -O  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_3.f90   -O   (test for errors, line 19)
FAIL: gfortran.dg/inquiry_type_ref_3.f90   -O   (test for errors, line 20)
FAIL: gfortran.dg/inquiry_type_ref_3.f90   -O  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_4.f90   -O  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O0  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O1  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O2  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)

FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -Os  (test for excess errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O3 -g  (test for excess 
errors)

FAIL: gfortran.dg/parameter_array_init_8.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/pr114739.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)

FAIL: gfortran.dg/pr87994_2.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)

FAIL: gfortran.dg/pr87994_3.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/pr87945_2.f90   -O   (test for errors, line 5)
FAIL: gfortran.dg/pr87945_2.f90   -O   (test for errors, line 6)
FAIL: gfortran.dg/pr87945_2.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr91296.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr95373_2.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr95502.f90   -O   (test for errors, line 7)
FAIL: gfortran.dg/pr95502.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr95503.f90   -O   (test for errors, line 6)
FAIL: gfortran.dg/pr95503.f90   -O  (test for excess errors)
FAIL: gfortran.dg/statement_function_5.f90   -O  (test for excess errors)

Running /home/jerry/dev/trunk/gcc/testsuite/gfortran.dg/f202y/f202y.exp ...
FAIL: gfortran.dg/goacc/ref_inquiry.f90   -O   (test for errors, line 29)
FAIL: gfortran.dg/goacc/ref_inquiry.f90   -O   (test for errors, line 30)
FAIL: gfortran.dg/goacc/ref_inquiry.f90   -O   (test for errors, line 39)
FAIL: gfortran.dg/goacc/ref_inquiry.f90   -O   (test for errors, line 40)
FAIL: gfortran.dg/goacc/ref_inquiry.f90   -O  (test for excess errors)

R

Re: [PATCH RFA (diagnostic)] c++: modules and #pragma diagnostic

2025-05-27 Thread Jason Merrill

On 5/27/25 1:33 PM, David Malcolm wrote:

On Fri, 2025-05-23 at 16:58 -0400, Jason Merrill wrote:

On 4/14/25 9:57 AM, Jason Merrill wrote:

On 1/9/25 10:00 PM, Jason Merrill wrote:

Tested x86_64-pc-linux-gnu.  Is the diagnostic.h change OK for
trunk?


Ping?


Ping.


Sorry for the delay in responding; comments below...




-- 8< --

To respect the #pragma diagnostic lines in libstdc++ headers when
compiling
with module std, we need to represent them in the module.

I think it's reasonable to make module_state a friend of
diagnostic_option_classifier to allow direct access to the data.
This
is a
different approach from how Jakub made PCH streaming members of
diagnostic_option_classifier, but it seems to me that modules
handling
belongs in module.cc.


Putting it in module.cc looks good to me, though perhaps it should be
just a friend of diagnostic_option_classifier but not of
diagnostic_context?  Could the functions take a
diagnostic_option_classifier rather than a diagnostic_context?
diagnostic_context is something of a "big blob" of a class.


The friend in diagnostic_context is to be able to name 
m_option_classifier.  We could instead make that member public?



[...snip...]

+  bytes_out sec (to);
+  if (sec.streaming_p ())
+    sec.begin ();


I confess I don't fully understand the module code yet - in particular
the streaming vs non-streaming distinction.  What are the "if
(sec.streaming_p ())" guards doing here?  It looks it can be false if
the param "elf_out *to" is null (can that happen?), and if it's false,
then this function essentially becomes a no-op.  Is that what we want?


Hmm, perhaps an early if (!sec.streaming_p ()) return would be simpler, 
I'll try that.


Jason



Re: [PATCH RFA (diagnostic)] c++: modules and #pragma diagnostic

2025-05-27 Thread David Malcolm
On Tue, 2025-05-27 at 17:21 -0400, Patrick Palka wrote:
> 
> On Tue, 27 May 2025, Patrick Palka wrote:
> 
> > On Tue, 27 May 2025, David Malcolm wrote:
> > 
> > > On Fri, 2025-05-23 at 16:58 -0400, Jason Merrill wrote:
> > > > On 4/14/25 9:57 AM, Jason Merrill wrote:
> > > > > On 1/9/25 10:00 PM, Jason Merrill wrote:
> > > > > > Tested x86_64-pc-linux-gnu.  Is the diagnostic.h change OK
> > > > > > for
> > > > > > trunk?
> > > > > 
> > > > > Ping?
> > > > 
> > > > Ping.
> > > 
> > > Sorry for the delay in responding; comments below...
> > > 
> > > > 
> > > > > > -- 8< --
> > > > > > 
> > > > > > To respect the #pragma diagnostic lines in libstdc++
> > > > > > headers when
> > > > > > compiling
> > > > > > with module std, we need to represent them in the module.
> > > > > > 
> > > > > > I think it's reasonable to make module_state a friend of
> > > > > > diagnostic_option_classifier to allow direct access to the
> > > > > > data. 
> > > > > > This 
> > > > > > is a
> > > > > > different approach from how Jakub made PCH streaming
> > > > > > members of
> > > > > > diagnostic_option_classifier, but it seems to me that
> > > > > > modules
> > > > > > handling
> > > > > > belongs in module.cc.
> > > 
> > > Putting it in module.cc looks good to me, though perhaps it
> > > should be
> > > just a friend of diagnostic_option_classifier but not of
> > > diagnostic_context?  Could the functions take a
> > > diagnostic_option_classifier rather than a diagnostic_context? 
> > > diagnostic_context is something of a "big blob" of a class.
> > > 
> > > [...snip...]
> > > 
> > > > > > diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> > > > > > index 78fb21dc22f..49c9c092163 100644
> > > > > > --- a/gcc/cp/module.cc
> > > > > > +++ b/gcc/cp/module.cc
> > > 
> > > [...snip...]
> > > 
> > > > > > @@ -17637,6 +17640,78 @@ module_state::write_ordinary_maps
> > > > > > (elf_out 
> > > > > > *to, range_t &info,
> > > > > >     dump.outdent ();
> > > > > >   }
> > > > > > +/* Write out any #pragma GCC diagnostic info to the .dgc
> > > > > > section.  */
> > > > > > +
> > > > > > +void
> > > > > > +module_state::write_diagnostic_classification (elf_out
> > > > > > *to,
> > > > > > +   diagnostic_context *dc,
> > > > > > +   unsigned *crc_p)
> > > > > > +{
> > > > > > +  auto &changes = dc-
> > > > > > >m_option_classifier.m_classification_history;
> > > > > > +
> > > > > > +  dump () && dump ("Writing diagnostic change locations");
> > > > > > +  dump.indent ();
> > > > > > +
> > > > > > +  bytes_out sec (to);
> > > > > > +  if (sec.streaming_p ())
> > > > > > +    sec.begin ();
> > > > > > +
> > > > > > +  unsigned len = changes.length ();
> > > > > > +  dump () && dump ("Diagnostic changes: %u", len);
> > > > > > +  if (sec.streaming_p ())
> > > > > > +    sec.u (len);
> > > > > > +
> > > > > > +  for (const auto &c: changes)
> > > > > > +    {
> > > > > > +  write_location (sec, c.location);
> > > > > > +  if (sec.streaming_p ())
> > > > > > +    {
> > > > > > +  sec.u (c.option);
> > > > > > +  sec.u (c.kind);
> > > > > > +    }
> > > > > > +    }
> > > 
> > > I confess I don't fully understand the module code yet - in
> > > particular
> > > the streaming vs non-streaming distinction.  What are the "if
> > > (sec.streaming_p ())" guards doing here?  It looks it can be
> > > false if
> > > the param "elf_out *to" is null (can that happen?), and if it's
> > > false,
> > > then this function essentially becomes a no-op.  Is that what we
> > > want?
> > 
> > When streaming_p is false then we're not serializing, we're just
> > doing
> > dependency analysis, the same walking code is used for both.  The
> > trees_out
> > class definition has the following comment:
> > 
> > /* The walk is used for three similar purposes:
> > 
> >   1. The initial scan for dependencies.
> >   2. Once dependencies have been found, ordering them.
> >   3. Writing dependencies to file (streaming_p).
> > 
> >  For cases where it matters, these accessers can be used to
> > determine
> >  which state we're in.  */
> > 
> > But it seems we don't need to check it here, since streaming_p will
> > always
> > be true at the current call sites of
> > write_diagnostic_classification?
> 
> Never mind, streaming_p is clearly false for the first call.

Thanks for the clarifications

Dave



[PATCH v2 2/3] RISC-V: Reconcile the existing test for avg_floor

2025-05-27 Thread pan2 . li
From: Pan Li 

Some existing avg_floor test need updated due to change to
leverage vaadd.vv directly.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls/avg-1.c: Update asm check
to vaadd.
* gcc.target/riscv/rvv/autovec/vls/avg-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/avg-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: Ditto.
* gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c | 5 ++---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c | 5 ++---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c | 5 ++---
 .../gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c   | 7 ++-
 .../gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c   | 7 ++-
 5 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c
index 30e60d520d6..4920fa6ad41 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-1.c
@@ -25,9 +25,8 @@ DEF_AVG_FLOOR (uint8_t, uint16_t, 512)
 DEF_AVG_FLOOR (uint8_t, uint16_t, 1024)
 DEF_AVG_FLOOR (uint8_t, uint16_t, 2048)
 
-/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */
-/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 10 } } */
-/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 20 } } */
+/* { dg-final { scan-assembler-times {vaadd\.vv} 10 } } */
 /* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
 /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c
index 33df429a634..c6a120b7613 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-2.c
@@ -23,9 +23,8 @@ DEF_AVG_FLOOR (uint16_t, uint32_t, 256)
 DEF_AVG_FLOOR (uint16_t, uint32_t, 512)
 DEF_AVG_FLOOR (uint16_t, uint32_t, 1024)
 
-/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */
-/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 9 } } */
-/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 18 } } */
+/* { dg-final { scan-assembler-times {vaadd\.vv} 9 } } */
 /* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
 /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c
index 9058905e3f5..2838c1ed106 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/avg-3.c
@@ -21,9 +21,8 @@ DEF_AVG_FLOOR (uint32_t, uint64_t, 128)
 DEF_AVG_FLOOR (uint32_t, uint64_t, 256)
 DEF_AVG_FLOOR (uint32_t, uint64_t, 512)
 
-/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */
-/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 8 } } */
-/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 16 } } */
+/* { dg-final { scan-assembler-times {vaadd\.vv} 8 } } */
 /* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
 /* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c
index 5880ccca477..b7246a38dba 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c
@@ -3,9 +3,6 @@
 
 #include "vec-avg-template.h"
 
-/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */
-/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */
-/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */
-/* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 6 } } */
 /* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */
+/* { dg-final { scan-assembler-times {vaadd\.vv} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c
index 916f33d9f13..3ffe0ef39ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c
@@ -3,9 +3,6 @@
 
 #include "vec-avg-template.h"
 
-/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */
-/* { dg-final { s

[PATCH v2 1/3] RISC-V: Leverage vaadd.vv for signed standard name avg_floor

2025-05-27 Thread pan2 . li
From: Pan Li 

The signed avg_floor totally match the sematics of fixed point
rvv insn vaadd, within round down.  Thus, leverage it directly
to implement the avf_floor.

The spec of RVV is somehow not that clear about the difference
between the float point and fixed point for the rounding that
discard least-significant information.

For float point which is not two's complement, the "discard
least-significant information" indicates truncation round.  For
example as below:

* 3.5 -> 3
* -2.3 -> -2

For fixed point which is two's complement, the "discard
least-significant information" indicates round down.  For
example as below:

* 3.5 -> 3
* -2.3 -> -3

And the vaadd takes the round down which is totally matching
the sematics of the avf_floor.

The below test suites are passed for this patch series.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec.md (avg3_floor): Add insn
expand to leverage vaadd directly.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/autovec.md | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 9e51e3ce6a3..a54f552a80c 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2491,19 +2491,12 @@ (define_expand "avg3_floor"
   (sign_extend:VWEXTI
(match_operand: 2 "register_operand"))]
   "TARGET_VECTOR"
-{
-  /* First emit a widening addition.  */
-  rtx tmp1 = gen_reg_rtx (mode);
-  rtx ops1[] = {tmp1, operands[1], operands[2]};
-  insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
-
-  /* Then a narrowing shift.  */
-  rtx ops2[] = {operands[0], tmp1, const1_rtx};
-  icode = code_for_pred_narrow_scalar (ASHIFTRT, mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
-  DONE;
-})
+  {
+insn_code icode = code_for_pred (UNSPEC_VAADD, mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, 
operands);
+DONE;
+  }
+)
 
 (define_expand "avg3_ceil"
  [(set (match_operand: 0 "register_operand")
-- 
2.43.0



[PATCH v2 0/3] Refine the avg_floor with fixed point vaadd

2025-05-27 Thread pan2 . li
From: Pan Li 

The spec of RVV is somehow not that clear about the difference
between the float point and fixed point for the rounding that
discard least-significant information.

For float point which is not two's complement, the "discard
least-significant information" indicates truncation round.  For
example as below:

* 3.5 -> 3
* -2.3 -> -2

For fixed point which is two's complement, the "discard
least-significant information" indicates round down.  For
example as below:

* 3.5 -> 3
* -2.3 -> -3

And the vaadd takes the round down which is totally matching
the sematics of the avf_floor.  Thus, leverage it to implement
the avg_floor.

The below test suites are passed for this patch series.
* The rv64gcv fully regression test.

Pan Li (3):
  RISC-V: Leverage vaadd.vv for signed standard name avg_floor
  RISC-V: Reconcile the existing test for avg_floor
  RISC-V: Add test cases for avg_floor vaadd implementation

 gcc/config/riscv/autovec.md   |  19 +-
 .../gcc.target/riscv/rvv/autovec/avg.h|  23 +++
 .../gcc.target/riscv/rvv/autovec/avg_data.h   | 185 ++
 .../rvv/autovec/avg_floor-1-i16-from-i32.c|  12 ++
 .../rvv/autovec/avg_floor-1-i16-from-i64.c|  12 ++
 .../rvv/autovec/avg_floor-1-i32-from-i64.c|  12 ++
 .../rvv/autovec/avg_floor-1-i8-from-i16.c |  12 ++
 .../rvv/autovec/avg_floor-1-i8-from-i32.c |  12 ++
 .../rvv/autovec/avg_floor-1-i8-from-i64.c |  12 ++
 .../autovec/avg_floor-run-1-i16-from-i32.c|  16 ++
 .../autovec/avg_floor-run-1-i16-from-i64.c|  16 ++
 .../autovec/avg_floor-run-1-i32-from-i64.c|  16 ++
 .../rvv/autovec/avg_floor-run-1-i8-from-i16.c |  16 ++
 .../rvv/autovec/avg_floor-run-1-i8-from-i32.c |  16 ++
 .../rvv/autovec/avg_floor-run-1-i8-from-i64.c |  16 ++
 .../gcc.target/riscv/rvv/autovec/avg_run.h|  26 +++
 .../gcc.target/riscv/rvv/autovec/vls/avg-1.c  |   5 +-
 .../gcc.target/riscv/rvv/autovec/vls/avg-2.c  |   5 +-
 .../gcc.target/riscv/rvv/autovec/vls/avg-3.c  |   5 +-
 .../riscv/rvv/autovec/widen/vec-avg-rv32gcv.c |   7 +-
 .../riscv/rvv/autovec/widen/vec-avg-rv64gcv.c |   7 +-
 21 files changed, 418 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_data.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i32-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i32-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_run.h

-- 
2.43.0



[PATCH v2 3/3] RISC-V: Add test cases for avg_floor vaadd implementation

2025-05-27 Thread pan2 . li
From: Pan Li 

Add asm and run testcase for avg_floor vaadd implementation.

The below test suites are passed for this patch series.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/avg.h: New test.
* gcc.target/riscv/rvv/autovec/avg_data.h: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i32.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i64.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-1-i32-from-i64.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i16.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i32.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i64.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i32.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i64.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-run-1-i32-from-i64.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i16.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i32.c: New test.
* gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i64.c: New test.
* gcc.target/riscv/rvv/autovec/avg_run.h: New test.

Signed-off-by: Pan Li 
---
 .../gcc.target/riscv/rvv/autovec/avg.h|  23 +++
 .../gcc.target/riscv/rvv/autovec/avg_data.h   | 185 ++
 .../rvv/autovec/avg_floor-1-i16-from-i32.c|  12 ++
 .../rvv/autovec/avg_floor-1-i16-from-i64.c|  12 ++
 .../rvv/autovec/avg_floor-1-i32-from-i64.c|  12 ++
 .../rvv/autovec/avg_floor-1-i8-from-i16.c |  12 ++
 .../rvv/autovec/avg_floor-1-i8-from-i32.c |  12 ++
 .../rvv/autovec/avg_floor-1-i8-from-i64.c |  12 ++
 .../autovec/avg_floor-run-1-i16-from-i32.c|  16 ++
 .../autovec/avg_floor-run-1-i16-from-i64.c|  16 ++
 .../autovec/avg_floor-run-1-i32-from-i64.c|  16 ++
 .../rvv/autovec/avg_floor-run-1-i8-from-i16.c |  16 ++
 .../rvv/autovec/avg_floor-run-1-i8-from-i32.c |  16 ++
 .../rvv/autovec/avg_floor-run-1-i8-from-i64.c |  16 ++
 .../gcc.target/riscv/rvv/autovec/avg_run.h|  26 +++
 15 files changed, 402 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_data.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i16-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i32-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-1-i8-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i16-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i32-from-i64.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_floor-run-1-i8-from-i64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_run.h

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h
new file mode 100644
index 000..746c635ae57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg.h
@@ -0,0 +1,23 @@
+#ifndef HAVE_DEFINED_AVG_H
+#define HAVE_DEFINED_AVG_H
+
+#include 
+
+#define DEF_AVG_0(NT, WT, NAME) \
+__attribute__((noinline))   \
+void\
+test_##NAME##_##WT##_##NT##_0(NT * restrict a, NT * restrict b, \
+ NT * restrict out, int n) \
+{   \
+  for (int i = 0; i < n; i++) { \
+out[i] = (NT)(((WT)a[i] + (WT)b[i]) >> 1);  \
+  } \
+}
+#define DEF_AVG_0_WRAP(NT, WT, NAME) DEF_AVG_0(NT, WT, NAME)
+
+#define RUN_AVG_0(NT, WT, NAME, a, b, out, n) \
+  test_##NAME##_##WT##_##NT##_0(a, b, out, n)
+#define RUN_AVG_0_WRAP(NT, WT, NAME, a, b, out, n) \
+  RUN_AVG_0(NT, WT, NAME, a, b, out, n)
+
+#endif
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_data.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/avg_data.h
new file mode 100644
index 000..cbeed147a56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/a

Re: [PATCH v25 0/3] c: Add _Countof and

2025-05-27 Thread Alejandro Colomar
Hi Jakub, Joseph,

On Tue, May 27, 2025 at 10:28:23PM +0200, Jakub Jelinek wrote:
> On Tue, May 27, 2025 at 08:22:28PM +, Joseph Myers wrote:
> > Thanks, I've committed these patches, with additional commit message 
> > changes to reference PR117025 in the standard way for GCC so that Bugzilla 
> > picks up the commits automatically.

Thanks a lot!!!  :-)

> --- a/gcc/c/c-parser.cc   
>   
>   
> +++ b/gcc/c/c-parser.cc   
>   
>   
> @@ -10649,6 +10649,10 @@ c_parser_sizeof_or_countof_expression (c_parser 
> *parser, enum rid rid)
>
>   
>   
>   
>start = c_parser_peek_token (parser)->location;
>   
>   
>   
>   
>   
> +  if (rid == RID_COUNTOF)
>   
>   
> +pedwarn_c23 (start, OPT_Wpedantic,   
>   
>   
> +"ISO C does not support %qs before C23", op_name);   
>   
>   
> + 
>   
>   
>c_parser_consume_token (parser);   
>   
>   
>c_inhibit_evaluation_warnings++;   
>   
>   
>if (rid == RID_COUNTOF)
>   
>   
> 
> The C23 in there looks like pasto, should be C2Y.

Oopsy!  Sorry!  Please fix, yep, it's a pasto.  :)


Have a lovely night!
Alex

> 
>   Jakub
> 

-- 



signature.asc
Description: PGP signature


[committed, 13 branch] libstdc++: Fix backported test [PR112490]

2025-05-27 Thread Patrick Palka
On the 13 branch and older, C++ >= 20 tests need an explicit dg-options
directive specifying the -std flag, otherwise they won't run by default.

PR libstdc++/112490

libstdc++-v3/ChangeLog:

* testsuite/24_iterators/const_iterator/112490.cc: Add
dg-options directive.
---
 libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc 
b/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
index 9bb154847cff..e6e74eb675d4 100644
--- a/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
+++ b/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
@@ -1,3 +1,4 @@
+// { dg-options "-std=gnu++23" }
 // { dg-do compile { target c++23 } }
 
 // PR libstdc++/112490 - infinite meta error in
-- 
2.49.0.654.g845c48a16a



Re: [AUTOFDO][AARCH64] Add support for profilebootstrap

2025-05-27 Thread Kugan Vivekanandarajah


> On 26 May 2025, at 2:47 pm, Kugan Vivekanandarajah  
> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> > On 26 May 2025, at 2:25 pm, Andrew Pinski  wrote:
> > 
> > External email: Use caution opening links or attachments
> > 
> > 
> > On Tue, May 20, 2025 at 3:09 AM Kugan Vivekanandarajah
> >  wrote:
> >> 
> >> Thanks Richard for the review.
> >> 
> >>> On 20 May 2025, at 2:47 am, Richard Sandiford  
> >>> wrote:
> >>> 
> >>> External email: Use caution opening links or attachments
> >>> 
> >>> 
> >>> Kugan Vivekanandarajah  writes:
>  diff --git a/Makefile.in b/Makefile.in
>  index b1ed67d3d4f..b5e3e520791 100644
>  --- a/Makefile.in
>  +++ b/Makefile.in
>  @@ -4271,7 +4271,7 @@ all-stageautoprofile-bfd: 
>  configure-stageautoprofile-bfd
>  $(HOST_EXPORTS) \
>  $(POSTSTAGE1_HOST_EXPORTS)  \
>  cd $(HOST_SUBDIR)/bfd && \
>  - $$s/gcc/config/i386/$(AUTO_PROFILE) \
>  + $$s/gcc/config/@cpu_type@/$(AUTO_PROFILE) \
>  $(MAKE) $(BASE_FLAGS_TO_PASS) \
>  CFLAGS="$(STAGEautoprofile_CFLAGS)" \
>  GENERATOR_CFLAGS="$(STAGEautoprofile_GENERATOR_CFLAGS)" \
> >>> 
> >>> The usual style seems to be to assign @foo@ to a makefile variable
> >>> called foo or FOO, rather than to use @foo@ directly in rules.  Otherwise
> >>> the makefile stuff looks good.
> >>> 
> >>> I don't feel qualified to review the script, but some general shell stuff:
> >>> 
>  diff --git a/gcc/config/aarch64/gcc-auto-profile 
>  b/gcc/config/aarch64/gcc-auto-profile
>  new file mode 100755
>  index 000..0ceec035e69
>  --- /dev/null
>  +++ b/gcc/config/aarch64/gcc-auto-profile
>  @@ -0,0 +1,51 @@
>  +#!/bin/sh
>  +# Profile workload for gcc profile feedback (autofdo) using Linux perf.
>  +# Copyright The GNU Toolchain Authors.
>  +#
>  +# This file is part of GCC.
>  +#
>  +# GCC is free software; you can redistribute it and/or modify it under
>  +# the terms of the GNU General Public License as published by the Free
>  +# Software Foundation; either version 3, or (at your option) any later
>  +# version.
>  +
>  +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
>  +# WARRANTY; without even the implied warranty of MERCHANTABILITY or
>  +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
>  +# for more details.
>  +
>  +# You should have received a copy of the GNU General Public License
>  +# along with GCC; see the file COPYING3.  If not see
>  +# .  */
>  +
>  +# Run perf record with branch stack sampling and check for
>  +# specific error message to see if it is supported.
>  +use_brbe=true
>  +output=$(perf record -j any,u ls 2>&1)
> >>> 
> >>> How about using /bin/true rather than ls for the test program?
> >>> 
>  +if [[ "$output" = *"Error::P: PMU Hardware or event type doesn't 
>  support branch stack sampling."* ]]; then
> >>> 
> >>> [[ isn't POSIX, or at least dash doesn't accept it.  Since this script
> >>> is effectively linux-specific, we can probably assume that /bin/bash
> >>> exists and use that in the #! line.
> >>> 
> >>> If we use bash, then the test could use =~ rather than an exact match.
> >>> This could be useful if perf prints other diagnostics besides the
> >>> one being tested for, or if future versions of perf alter the wording
> >>> slightly.
> >>> 
>  +  use_brbe=false
>  +fi
>  +
>  +FLAGS=u
>  +if [ "$1" = "--kernel" ] ; then
>  +  FLAGS=k
>  +  shift
>  +fi
>  +if [ "$1" = "--all" ] ; then
> >>> 
> >>> How about making this an elif, so that we don't accept --kernel --all?
> >>> 
>  +  FLAGS=u,k
>  +  shift
>  +fi
>  +
>  +if [ "$use_brbe" = true ] ; then
>  +  if grep -q hypervisor /proc/cpuinfo ; then
>  +echo >&2 "Warning: branch profiling may not be functional in VMs"
>  +  fi
>  +  set -x
>  +  perf record -j any,$FLAGS "$@"
>  +  set +x
>  +else
>  +  set -x
>  +  echo >&2 "Warning: branch profiling may not be functional without 
>  BRBE"
>  +  perf record "$@"
>  +  set +x
> >>> 
> >>> Putting the set -x after the echo seems better, as for the "then" branch.
> >> 
> >> Here is the revised version that handles the above comments.
> > 
> > 
> >>  * Makefile.def: AUTO_PROFILE based on cpu_type.
> >>  * Makefile.in: Likewise.
> > 
> > Makefile.in is a generated file (from Makefile.def and Makefile.tpl),
> > It looks like you edited the file instead of regenerated it.
> > Can you please regenerate the file and/or provide the corresponding
> > corrected changes to Makefile.def/Makefile.tpl which was used to
> > regenerate Makefile.in?
> > 
> > This is what 
> > https://gcc.gnu.org/pipermail/gcc-testresults/2025-May/848013.html
> > is about too.
> 
> 
> Apolog

Re: Test suite failures.

2025-05-27 Thread Harald Anlauf

Jerry, all,

that was entirely my fault - attempting a last-minute cleanup
that reordered code, trying to use a refactoring.  I've put
on my brown bag and pushed a corrections as obvious as:

r16-921-g74a2281ae18c6d.

See attached.

Caveat: this was tested on top of r16-915, as I cannot compile
anything after r16-916.

Let me know if that works for you.

Cheers,
Harald

On 5/27/25 22:42, Jerry D wrote:

After my last commit, I always rerun make check-fortran.

Now I see a bunch of fails. I reverted my patch locally and did a 
rebuild and I still see these. Heralds patch still in there.


No failures after reverting this:

commit r16-914-g787a8dec1acedf5561c8ee43bed0b3653fca150d
Author: Harald Anlauf 

As my daughter would say. It happens to the best of us.

Jerry

--- snip ---

FAIL: gfortran.dg/associate_68.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -O3 -fomit-frame-pointer -funroll- 
loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)

FAIL: gfortran.dg/associate_68.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/associate_68.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -O3 -fomit-frame-pointer -funroll- 
loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)

FAIL: gfortran.dg/associate_65.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/associate_65.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/coarray_sync.f90   -O  (test for excess errors)
FAIL: gfortran.dg/data_pointer_2.f90   -O  (test for excess errors)
FAIL: gfortran.dg/data_inquiry_ref.f90   -O  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_3.f90   -O   (test for errors, line 19)
FAIL: gfortran.dg/inquiry_type_ref_3.f90   -O   (test for errors, line 20)
FAIL: gfortran.dg/inquiry_type_ref_3.f90   -O  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_4.f90   -O  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O0  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O1  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O2  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O3 -fomit-frame-pointer - 
funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
errors)

FAIL: gfortran.dg/inquiry_type_ref_1.f08   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/inquiry_type_ref_1.f08   -Os  (test for excess errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O0  (test for excess 
errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O1  (test for excess 
errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O2  (test for excess 
errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for 
excess errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -O3 -g  (test for excess 
errors)
FAIL: gfortran.dg/parameter_array_init_8.f90   -Os  (test for excess 
errors)

FAIL: gfortran.dg/pr114739.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -O3 -fomit-frame-pointer -funroll- 
loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)

FAIL: gfortran.dg/pr87994_2.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/pr87994_2.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O0  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O1  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O2  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -O3 -fomit-frame-pointer -funroll- 
loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)

FAIL: gfortran.dg/pr87994_3.f90   -O3 -g  (test for excess errors)
FAIL: gfortran.dg/pr87994_3.f90   -Os  (test for excess errors)
FAIL: gfortran.dg/pr87945_2.f90   -O   (test for errors, line 5)
FAIL: gfortran.dg/pr87945_2.f90   -O   (test for errors, line 6)
FAIL: gfortran.dg/pr87945_2.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr91296.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr95373_2.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr95502.f90   -O   (test for errors, line 7)
FAIL: gfortran.dg/pr95502.f90   -O  (test for excess errors)
FAIL: gfortran.dg/pr95503.f90   -O   (test for errors, line 6)
FAIL: gfortran.dg/pr95503.f90   -O  (test for excess errors)
FAIL: gfortran.dg/statement_function_5.f90   -O  (test for excess errors)

Running /home/jerry/dev/trunk/gcc

Re: Test suite failures.

2025-05-27 Thread Jerry D

On 5/27/25 2:19 PM, Harald Anlauf wrote:

Jerry, all,

that was entirely my fault - attempting a last-minute cleanup
that reordered code, trying to use a refactoring.  I've put
on my brown bag and pushed a corrections as obvious as:

r16-921-g74a2281ae18c6d.

See attached.

Caveat: this was tested on top of r16-915, as I cannot compile
anything after r16-916.

Let me know if that works for you.

Cheers,
Harald


All fixed here for me. Rebuilt and retested.

Thanks for quick response.

Jerry


Re: [PATCH v25 0/3] c: Add _Countof and

2025-05-27 Thread Jakub Jelinek
On Tue, May 27, 2025 at 11:15:14PM +0200, Alejandro Colomar wrote:
> Oopsy!  Sorry!  Please fix, yep, it's a pasto.  :)

Committed as obvious to trunk:

2025-05-27  Jakub Jelinek  

PR c/117025
* c-parser.cc (c_parser_sizeof_or_countof_expression): Use
C2Y rather than C23 in pedwarn_c23.

--- gcc/c/c-parser.cc.jj2025-05-27 23:08:37.907727004 +0200
+++ gcc/c/c-parser.cc   2025-05-27 23:25:51.540844741 +0200
@@ -10651,7 +10651,7 @@ c_parser_sizeof_or_countof_expression (c
 
   if (rid == RID_COUNTOF)
 pedwarn_c23 (start, OPT_Wpedantic,
-"ISO C does not support %qs before C23", op_name);
+"ISO C does not support %qs before C2Y", op_name);
 
   c_parser_consume_token (parser);
   c_inhibit_evaluation_warnings++;

Jakub



Re: [PATCH RFA (diagnostic)] c++: modules and #pragma diagnostic

2025-05-27 Thread Patrick Palka

On Tue, 27 May 2025, Patrick Palka wrote:

> On Tue, 27 May 2025, David Malcolm wrote:
> 
> > On Fri, 2025-05-23 at 16:58 -0400, Jason Merrill wrote:
> > > On 4/14/25 9:57 AM, Jason Merrill wrote:
> > > > On 1/9/25 10:00 PM, Jason Merrill wrote:
> > > > > Tested x86_64-pc-linux-gnu.  Is the diagnostic.h change OK for
> > > > > trunk?
> > > > 
> > > > Ping?
> > > 
> > > Ping.
> > 
> > Sorry for the delay in responding; comments below...
> > 
> > > 
> > > > > -- 8< --
> > > > > 
> > > > > To respect the #pragma diagnostic lines in libstdc++ headers when
> > > > > compiling
> > > > > with module std, we need to represent them in the module.
> > > > > 
> > > > > I think it's reasonable to make module_state a friend of
> > > > > diagnostic_option_classifier to allow direct access to the data. 
> > > > > This 
> > > > > is a
> > > > > different approach from how Jakub made PCH streaming members of
> > > > > diagnostic_option_classifier, but it seems to me that modules
> > > > > handling
> > > > > belongs in module.cc.
> > 
> > Putting it in module.cc looks good to me, though perhaps it should be
> > just a friend of diagnostic_option_classifier but not of
> > diagnostic_context?  Could the functions take a
> > diagnostic_option_classifier rather than a diagnostic_context? 
> > diagnostic_context is something of a "big blob" of a class.
> > 
> > [...snip...]
> > 
> > > > > diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> > > > > index 78fb21dc22f..49c9c092163 100644
> > > > > --- a/gcc/cp/module.cc
> > > > > +++ b/gcc/cp/module.cc
> > 
> > [...snip...]
> > 
> > > > > @@ -17637,6 +17640,78 @@ module_state::write_ordinary_maps (elf_out 
> > > > > *to, range_t &info,
> > > > >     dump.outdent ();
> > > > >   }
> > > > > +/* Write out any #pragma GCC diagnostic info to the .dgc section.  */
> > > > > +
> > > > > +void
> > > > > +module_state::write_diagnostic_classification (elf_out *to,
> > > > > +   diagnostic_context *dc,
> > > > > +   unsigned *crc_p)
> > > > > +{
> > > > > +  auto &changes = dc->m_option_classifier.m_classification_history;
> > > > > +
> > > > > +  dump () && dump ("Writing diagnostic change locations");
> > > > > +  dump.indent ();
> > > > > +
> > > > > +  bytes_out sec (to);
> > > > > +  if (sec.streaming_p ())
> > > > > +    sec.begin ();
> > > > > +
> > > > > +  unsigned len = changes.length ();
> > > > > +  dump () && dump ("Diagnostic changes: %u", len);
> > > > > +  if (sec.streaming_p ())
> > > > > +    sec.u (len);
> > > > > +
> > > > > +  for (const auto &c: changes)
> > > > > +    {
> > > > > +  write_location (sec, c.location);
> > > > > +  if (sec.streaming_p ())
> > > > > +    {
> > > > > +  sec.u (c.option);
> > > > > +  sec.u (c.kind);
> > > > > +    }
> > > > > +    }
> > 
> > I confess I don't fully understand the module code yet - in particular
> > the streaming vs non-streaming distinction.  What are the "if
> > (sec.streaming_p ())" guards doing here?  It looks it can be false if
> > the param "elf_out *to" is null (can that happen?), and if it's false,
> > then this function essentially becomes a no-op.  Is that what we want?
> 
> When streaming_p is false then we're not serializing, we're just doing
> dependency analysis, the same walking code is used for both.  The trees_out
> class definition has the following comment:
> 
> /* The walk is used for three similar purposes:
> 
>   1. The initial scan for dependencies.
>   2. Once dependencies have been found, ordering them.
>   3. Writing dependencies to file (streaming_p).
> 
>  For cases where it matters, these accessers can be used to determine
>  which state we're in.  */
> 
> But it seems we don't need to check it here, since streaming_p will always
> be true at the current call sites of write_diagnostic_classification?

Never mind, streaming_p is clearly false for the first call.

> 
> > 
> > 
> > > > > +
> > > > > +  if (sec.streaming_p ())
> > > > > +    sec.end (to, to->name (MOD_SNAME_PFX ".dgc"), crc_p);
> > > > > +  dump.outdent ();
> > > > > +}
> > > > > +
> > > > > +/* Read any #pragma GCC diagnostic info from the .dgc section.  */
> > > > > +
> > > > > +bool
> > > > > +module_state::read_diagnostic_classification (diagnostic_context *dc)
> > > > > +{
> > > > > +  bytes_in sec;
> > > > > +
> > > > > +  if (!sec.begin (loc, from (), MOD_SNAME_PFX ".dgc"))
> > > > > +    return false;
> > > > > +
> > > > > +  dump () && dump ("Reading diagnostic change locations");
> > > > > +  dump.indent ();
> > > > > +
> > > > > +  unsigned len = sec.u ();
> > > > > +  dump () && dump ("Diagnostic changes: %u", len);
> > > > > +
> > > > > +  auto &changes = dc->m_option_classifier.m_classification_history;
> > > > > +  unsigned offset = changes.length ();
> > > > > +  changes.reserve (len);
> > > > > +  for (unsigned i = 0; i < len; ++i)
> > > > > +    {
> > > > > +  location_t loc = read_location (sec);
> > > > > +  int opt = sec.

Re: [PATCH] libgcc: Add DPD support + fix big-endian support of _BitInt <-> dfp conversions

2025-05-27 Thread Jakub Jelinek
On Tue, May 27, 2025 at 02:25:14PM +0200, Richard Biener wrote:
> Isn't soft-fp imported from glibc?

Most of it, yes.
Though, the _BitInt specific stuff in there (whether
binary float <-> _BitInt or decimal float <-> _BitInt) is not owned
by glibc, it is an implementation detail of GCC, put into the same
directory as the rest because it uses the same infrastructure.

> I was hoping Joseph would review this one.

Me too.

Jakub



[committed] libstdc++: Fix some names.cc test failures on AIX

2025-05-27 Thread Jonathan Wakely
libstdc++-v3/ChangeLog:

* testsuite/17_intro/names.cc [_AIX] (n): Undefine.
* testsuite/experimental/names.cc [_AIX] (ptr): Undefine.
---

Tested x86_64-linux and powerpc-aix.
Pushed to trunk.

 libstdc++-v3/testsuite/17_intro/names.cc | 2 ++
 libstdc++-v3/testsuite/experimental/names.cc | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/libstdc++-v3/testsuite/17_intro/names.cc 
b/libstdc++-v3/testsuite/17_intro/names.cc
index 0e67c795564d..a61e49dc8191 100644
--- a/libstdc++-v3/testsuite/17_intro/names.cc
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
@@ -248,6 +248,8 @@
 #undef r
 #undef x
 #undef y
+//  defines _LC_weight_t::n
+#undef n
 //  defines pollfd_ext::u on AIX 7.3
 #undef u
 //  defines vario::v
diff --git a/libstdc++-v3/testsuite/experimental/names.cc 
b/libstdc++-v3/testsuite/experimental/names.cc
index e0a7d4f9b880..4bedd530ecc5 100644
--- a/libstdc++-v3/testsuite/experimental/names.cc
+++ b/libstdc++-v3/testsuite/experimental/names.cc
@@ -22,6 +22,12 @@
 // naming variables, parameters etc. in the library.
 
 #include "../17_intro/names.cc"
+
+#ifdef _AIX
+//  declares endnetgrent_r with ptr parameter.
+# undef n
+#endif
+
 // Filesystem
 #if __has_include()
 # include 
-- 
2.49.0



[PATCH] Fortran: fix parsing of type parameter inquiries of substrings [PR101735]

2025-05-27 Thread Harald Anlauf

Dear all,

the attached patch fixes a variety of small issues with parsing of
inquiry references of substrings.  The testcase exercises variations
of the examples in the PR and ensures that these are successfully
simplified.

Don't try it with other compilers... ;-)

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

I believe this is sufficiently safe that it can be backported
later to 15-branch, unless someone objects.

Thanks,
Harald

From 48a3bb2f5822b0e69211e89bd92fa3d497321f4c Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Tue, 27 May 2025 19:23:16 +0200
Subject: [PATCH] Fortran: fix parsing of type parameter inquiries of
 substrings [PR101735]

Handling of type parameter inquiries of substrings failed to due either
parsing issues or not following or handling reference chains properly.

	PR fortran/101735

gcc/fortran/ChangeLog:

	* expr.cc (find_inquiry_ref): If an inquiry reference applies to
	a substring, use that, and calculate substring length if needed.
	* primary.cc (extend_ref): Also handle attaching to end of
	reference chain for appending.
	(gfc_match_varspec): Discrimate between arrays of character and
	substrings of them.  If a substring is taken from a character
	component of a derived type, get the proper typespec so that
	inquiry references work correctly.
	(gfc_match_rvalue): Handle corner case where we hit a seemingly
	dangling '%' and missed an inquiry reference. Try another match.

gcc/testsuite/ChangeLog:

	* gfortran.dg/inquiry_type_ref_7.f90: New test.
---
 gcc/fortran/expr.cc   | 26 
 gcc/fortran/primary.cc| 60 --
 .../gfortran.dg/inquiry_type_ref_7.f90| 62 +++
 3 files changed, 142 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/inquiry_type_ref_7.f90

diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc
index 92a9ebdcbe8..bf858ea5791 100644
--- a/gcc/fortran/expr.cc
+++ b/gcc/fortran/expr.cc
@@ -1846,6 +1846,7 @@ find_inquiry_ref (gfc_expr *p, gfc_expr **newp)
   gfc_ref *ref;
   gfc_ref *inquiry = NULL;
   gfc_ref *inquiry_head;
+  gfc_ref *ref_ss = NULL;
   gfc_expr *tmp;
 
   tmp = gfc_copy_expr (p);
@@ -1862,6 +1863,9 @@ find_inquiry_ref (gfc_expr *p, gfc_expr **newp)
 	  {
 	inquiry = ref->next;
 	ref->next = NULL;
+	if (ref->type == REF_SUBSTRING)
+	  ref_ss = ref;
+	break;
 	  }
 }
 
@@ -1891,6 +1895,28 @@ find_inquiry_ref (gfc_expr *p, gfc_expr **newp)
 	  if (!gfc_notify_std (GFC_STD_F2003, "LEN part_ref at %C"))
 	goto cleanup;
 
+	  /* Inquire length of substring?  */
+	  if (ref_ss)
+	{
+	  if (ref_ss->u.ss.start->expr_type == EXPR_CONSTANT
+		  && ref_ss->u.ss.end->expr_type == EXPR_CONSTANT)
+		{
+		  HOST_WIDE_INT istart, iend, length;
+		  istart = gfc_mpz_get_hwi (ref_ss->u.ss.start->value.integer);
+		  iend = gfc_mpz_get_hwi (ref_ss->u.ss.end->value.integer);
+
+		  if (istart <= iend)
+		length = iend - istart + 1;
+		  else
+		length = 0;
+		  *newp = gfc_get_int_expr (gfc_default_integer_kind,
+	NULL, length);
+		  break;
+		}
+	  else
+		goto cleanup;
+	}
+
 	  if (tmp->ts.u.cl->length
 	  && tmp->ts.u.cl->length->expr_type == EXPR_CONSTANT)
 	*newp = gfc_copy_expr (tmp->ts.u.cl->length);
diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc
index ec4e13548c4..426c994e67d 100644
--- a/gcc/fortran/primary.cc
+++ b/gcc/fortran/primary.cc
@@ -2102,10 +2102,18 @@ extend_ref (gfc_expr *primary, gfc_ref *tail)
 {
   if (primary->ref == NULL)
 primary->ref = tail = gfc_get_ref ();
+  else if (tail == NULL)
+{
+  /* Set tail to end of reference chain.  */
+  for (gfc_ref *ref = primary->ref; ref; ref = ref->next)
+	if (ref->next == NULL)
+	  {
+	tail = ref;
+	break;
+	  }
+}
   else
 {
-  if (tail == NULL)
-	gfc_internal_error ("extend_ref(): Bad tail");
   tail->next = gfc_get_ref ();
   tail = tail->next;
 }
@@ -2302,9 +2310,22 @@ gfc_match_varspec (gfc_expr *primary, int equiv_flag, bool sub_flag,
   gfc_array_spec *as;
   bool coarray_only = sym->attr.codimension && !sym->attr.dimension
 			  && sym->ts.type == BT_CHARACTER;
+  gfc_ref *ref, *strarr = NULL;
 
   tail = extend_ref (primary, tail);
-  tail->type = REF_ARRAY;
+  if (sym->ts.type == BT_CHARACTER && tail->type == REF_SUBSTRING)
+	{
+	  gcc_assert (sym->attr.dimension);
+	  /* Find array reference for substrings of character arrays.  */
+	  for (ref = primary->ref; ref && ref->next; ref = ref->next)
+	if (ref->type == REF_ARRAY && ref->next->type == REF_SUBSTRING)
+	  {
+		strarr = ref;
+		break;
+	  }
+	}
+  else
+	tail->type = REF_ARRAY;
 
   /* In EQUIVALENCE, we don't know yet whether we are seeing
 	 an array, character variable or array of character
@@ -2317,7 +2338,8 @@ gfc_match_varspec (gfc_expr *primary, int equiv_flag, bool sub_flag,
   else
 	as = sym->as;
 
-  m = gfc_match_array_

Re: [PATCH v6 0/3][Middle-end]Provide more contexts for -Warray-bounds and -Wstringop-* warning messages

2025-05-27 Thread Qing Zhao
Hi, Kees,



> On May 19, 2025, at 14:23, Kees Cook  wrote:
> 
> On Fri, May 16, 2025 at 01:34:14PM +, Qing Zhao wrote:
>> Adding -fdiagnotics-details into GCC to provide more hints to the
>> end users on how the warnings come from, in order to help the user
>> to locate the exact location in source code on the specific warnings
>> due to compiler optimizations.
> 
> I just needed to examine an unexpected -Wrestrict warning, and
> discovered that this patch didn't help with it, but in looking at the
> implementation details, it turned out to be trivial to expand coverage
> to include -Wrestrict, which worked for me, and got me the
> diagnostics I needed[1].

I am so happy to see that this work can help the -Wrestrict warnings 
as well. 
> 
> Could you include this patch in the next version of the series too? I'll
> put it to use! :)

Yes, I will add the support to -Wrestrict too.

Thanks a lot.

Qing
> 
> -Kees
> 
> [1] https://lore.kernel.org/all/202505191117.C094A90F88@keescook/
> 
> 
> diff --git a/gcc/gimple-ssa-warn-restrict.cc b/gcc/gimple-ssa-warn-restrict.cc
> index a52307866cc4..0f6eddf01e4e 100644
> --- a/gcc/gimple-ssa-warn-restrict.cc
> +++ b/gcc/gimple-ssa-warn-restrict.cc
> @@ -1448,6 +1448,8 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
> 
>   tree func = gimple_call_fndecl (call);
> 
> +  rich_location_with_details richloc (loc, call);
> +
>   /* To avoid a combinatorial explosion of diagnostics format the offsets
>  or their ranges as strings and use them in the warning calls below.  */
>   char offstr[3][64];
> @@ -1493,7 +1495,7 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
>   if (sizrange[0] == sizrange[1])
> {
>  if (ovlsiz[0] == ovlsiz[1])
> -warning_at (loc, OPT_Wrestrict,
> +warning_at (&richloc, OPT_Wrestrict,
> sizrange[0] == 1
> ? (ovlsiz[0] == 1
>   ? G_("%qD accessing %wu byte at offsets %s "
> @@ -1510,7 +1512,7 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
> func, sizrange[0],
> offstr[0], offstr[1], ovlsiz[0], offstr[2]);
>  else if (ovlsiz[1] >= 0 && ovlsiz[1] < maxobjsize.to_shwi ())
> -warning_n (loc, OPT_Wrestrict, sizrange[0],
> +warning_n (&richloc, OPT_Wrestrict, sizrange[0],
>   "%qD accessing %wu byte at offsets %s "
>   "and %s overlaps between %wu and %wu bytes "
>   "at offset %s",
> @@ -1520,7 +1522,7 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
>   func, sizrange[0], offstr[0], offstr[1],
>   ovlsiz[0], ovlsiz[1], offstr[2]);
>  else
> -warning_n (loc, OPT_Wrestrict, sizrange[0],
> +warning_n (&richloc, OPT_Wrestrict, sizrange[0],
>   "%qD accessing %wu byte at offsets %s and "
>   "%s overlaps %wu or more bytes at offset %s",
>   "%qD accessing %wu bytes at offsets %s and "
> @@ -1533,7 +1535,7 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
>   if (sizrange[1] >= 0 && sizrange[1] < maxobjsize.to_shwi ())
> {
>  if (ovlsiz[0] == ovlsiz[1])
> -warning_n (loc, OPT_Wrestrict, ovlsiz[0],
> +warning_n (&richloc, OPT_Wrestrict, ovlsiz[0],
>   "%qD accessing between %wu and %wu bytes "
>   "at offsets %s and %s overlaps %wu byte at "
>   "offset %s",
> @@ -1543,7 +1545,7 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
>   func, sizrange[0], sizrange[1],
>   offstr[0], offstr[1], ovlsiz[0], offstr[2]);
>  else if (ovlsiz[1] >= 0 && ovlsiz[1] < maxobjsize.to_shwi ())
> -warning_at (loc, OPT_Wrestrict,
> +warning_at (&richloc, OPT_Wrestrict,
> "%qD accessing between %wu and %wu bytes at "
> "offsets %s and %s overlaps between %wu and %wu "
> "bytes at offset %s",
> @@ -1551,7 +1553,7 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
> offstr[0], offstr[1], ovlsiz[0], ovlsiz[1],
> offstr[2]);
>  else
> -warning_at (loc, OPT_Wrestrict,
> +warning_at (&richloc, OPT_Wrestrict,
> "%qD accessing between %wu and %wu bytes at "
> "offsets %s and %s overlaps %wu or more bytes "
> "at offset %s",
> @@ -1564,7 +1566,7 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
> ovlsiz[1] = maxobjsize.to_shwi ();
> 
>   if (ovlsiz[0] == ovlsiz[1])
> - warning_n (loc, OPT_Wrestrict, ovlsiz[0],
> + warning_n (&richloc, OPT_Wrestrict, ovlsiz[0],
>   "%qD accessing %wu or more bytes at offsets "
>   "%s and %s overlaps %wu byte at offset %s",
>   "%qD accessing %wu or more bytes at offsets "
> @@ -1572,14 +1574,14 @@ maybe_diag_overlap (location_t loc, gimple *call, 
> builtin_access &acs)
>   func, sizrange[0], offstr[0], offstr[1],
>   ovlsiz[0], offstr[2]);
>   else if (ovlsiz[1] >= 0 && ovlsiz[1] < maxobjsize.to_shwi ())
> - warning_at (loc, OPT_Wrestrict,
> + warning_at (&richloc, OPT_Wrestrict,
>"%qD accessing %wu or more bytes at offsets %s "
>"and %s overlaps between %wu and %wu bytes "
>"at offset %s",
>func, sizra

Re: [PATCH, fortran] PR120049 - ICE when using IS_C_ASSOCIATED ()

2025-05-27 Thread Jerry D

On 5/20/25 12:35 PM, Jerry D wrote:

On 5/20/25 12:01 PM, Harald Anlauf wrote:

Hi Jerry!

Am 20.05.25 um 05:23 schrieb Jerry D:

On 5/19/25 1:50 PM, Harald Anlauf wrote:

Hi Jerry,

so contrary to what the name of patch claims (pr120049-final.diff),
it fixes only the case of direct use of iso_c_binding, but not the
indirect one thru the other module, which is the reason for the
original ICE and the PR.

So if you want to push the incremental patch now, go ahead.

Cheers,
Harald


Am 18.05.25 um 23:46 schrieb Jerry D:

On 5/18/25 2:34 PM, Jerry D wrote:

On 5/18/25 2:10 PM, Harald Anlauf wrote:

Hi Jerry,

I found 2 corner invalid cases which are silently accepted with
your patch when iso_c_binding is used indirectly:

   print *, c_associated(c_loc(val), C_NULL_FUNPTR)
   print *, c_associated(C_NULL_FUNPTR, c_loc(val))

These should get rejected, too.  Can you see how to catch these, 
too?


Thanks,
Harald


Yes, will do! I try to think of cases to run through on. This helps.

Thanks,

Jerry
--- snip ---




Attached is the revised patch to fix the additional test cases. I had 
to do some trial and error to get the testsuite directives to work 
the way they should.


One will notice that the file containing the gtk_sup module is 
simplified and gets taken care of with the directives in the specific 
tests.


Regression tested on x86_64.

OK for trunk?


No, not yet.  It rejects too much (consistently).  Consider:


Harald,

Please try the new and improved patch attached.

It does pass regression testing with your addition cases and updated the 
test for gfortran.dg.


Let me know if OK.

Regards,

Jerrycommit 0b7e798489bd1d6c4a5b748e822485fe8974c811
Author: Jerry DeLisle 
Date:   Mon May 19 19:41:16 2025 -0700

Fortran: Fix c_associated argument checks.

PR fortran/120049

gcc/fortran/ChangeLog:

* check.cc (gfc_check_c_associated): Use new helper functions.
Only call check_c_ptr_1 if optional c_ptr_2 tests succeed.
(check_c_ptr_1): Handle only c_ptr_1 checks.
(check_c_ptr_2): Expand checks for c_ptr_2 and handle cases
where there is no derived pointer in the gfc_expr and check
the inmod_sym_id only if it exists.
* misc.cc (gfc_typename): Handle the case for BT_VOID rather
than throw an internal error.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr120049_a.f90: Update test directives.
* gfortran.dg/pr120049_b.f90: Update test directives
* gfortran.dg/pr120049_2.f90: New test.

Co-Authored-By: Steve Kargl 

diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index f02a2a33897..c693e421407 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -5952,49 +5952,110 @@ gfc_check_c_sizeof (gfc_expr *arg)
 }
 
 
-bool
-gfc_check_c_associated (gfc_expr *c_ptr_1, gfc_expr *c_ptr_2)
+/* Helper functions check_c_ptr_1 and check_c_ptr_2
+   used in gfc_check_c_associated.  */
+
+static inline
+bool check_c_ptr_1 (gfc_expr *c_ptr_1)
 {
-  if (c_ptr_1)
-{
-  if (c_ptr_1->expr_type == EXPR_FUNCTION && c_ptr_1->ts.type == BT_VOID)
-	return true;
+  if ((c_ptr_1->ts.type == BT_VOID)
+  && (c_ptr_1->expr_type == EXPR_FUNCTION))
+return true;
 
-  if (c_ptr_1->ts.type != BT_DERIVED
-	  || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
-	  || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR
-	  && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR))
-	{
-	  gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the "
-		 "type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where);
-	  return false;
-	}
-}
+  if (c_ptr_1->ts.type != BT_DERIVED
+  || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
+  || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR
+	  && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR))
+	goto check_1_error;
 
-  if (!scalar_check (c_ptr_1, 0))
+  if ((c_ptr_1->ts.type == BT_DERIVED)
+   && (c_ptr_1->expr_type == EXPR_STRUCTURE)
+   && (c_ptr_1->ts.u.derived->intmod_sym_id
+	   == ISOCBINDING_NULL_FUNPTR))
+goto check_1_error;
+
+  if (scalar_check (c_ptr_1, 0))
+return true;
+  else
+/*  Return since the check_1_error message may not apply here. */
 return false;
 
-  if (c_ptr_2)
-{
-  if (c_ptr_2->expr_type == EXPR_FUNCTION && c_ptr_2->ts.type == BT_VOID)
-	return true;
+check_1_error:
 
-  if (c_ptr_2->ts.type != BT_DERIVED
-	  || c_ptr_2->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
-	  || (c_ptr_1->ts.u.derived->intmod_sym_id
-	  != c_ptr_2->ts.u.derived->intmod_sym_id))
+  gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the "
+	 "type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where);
+  return false;
+}
+
+static inline
+bool check_c_ptr_2 (gfc_expr *c_ptr_1, gfc_expr *c_ptr_2)
+{
+  switch (c_ptr_2->ts.type)
+  {
+case BT_VOID:
+ 

RE: [PATCH v1 1/3] RISC-V: Leverage vaadd.vv for signed standard name avg_floor

2025-05-27 Thread Li, Pan2
> Couldn't we keep the RTL in order for other optimizations?  I'm not really 
> expecting any but at least we'd still have the opportunity.  Or does that 
> interfere with the tests?

I see, let me have a try in v2.

Pan

-Original Message-
From: Robin Dapp  
Sent: Tuesday, May 27, 2025 2:27 PM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; jeffreya...@gmail.com; 
rdapp@gmail.com; Chen, Ken ; Liu, Hongtao 
; and...@sifive.com; Robin Dapp 
Subject: Re: [PATCH v1 1/3] RISC-V: Leverage vaadd.vv for signed standard name 
avg_floor

> -(define_expand "avg3_floor"
> - [(set (match_operand: 0 "register_operand")
> -   (truncate:
> -(ashiftrt:VWEXTI
> - (plus:VWEXTI
> -  (sign_extend:VWEXTI
> -   (match_operand: 1 "register_operand"))
> -  (sign_extend:VWEXTI
> -   (match_operand: 2 "register_operand"))]
> +(define_expand "avg3_floor"
> + [(match_operand:V_VLSI 0 "register_operand")
> +  (match_operand:V_VLSI 1 "register_operand")
> +  (match_operand:V_VLSI 2 "register_operand")]
>"TARGET_VECTOR"

Couldn't we keep the RTL in order for other optimizations?  I'm not really 
expecting any but at least we'd still have the opportunity.  Or does that 
interfere with the tests?

Apart from that it LGTM, thanks for digging deeper here.

-- 
Regards
 Robin



[PATCH v2 2/3] vect: Remove non-SLP paths in strided slp/elementwise.

2025-05-27 Thread Robin Dapp
This removes the non-SLP paths that were made unreachable in the
previous patch.

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_load): Remove non-SLP paths.
---
 gcc/tree-vect-stmts.cc | 49 --
 1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 07e309d02e5..3710694ac75 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10689,8 +10689,7 @@ vectorizable_load (vec_info *vinfo,
  first_dr_info = dr_info;
}
 
-  if (1 && grouped_load
- && memory_access_type == VMAT_STRIDED_SLP)
+  if (grouped_load && memory_access_type == VMAT_STRIDED_SLP)
{
  group_size = DR_GROUP_SIZE (first_stmt_info);
  ref_type = get_group_alias_ptr_type (first_stmt_info);
@@ -10830,22 +10829,20 @@ vectorizable_load (vec_info *vinfo,
  ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
}
 
-  if (1)
+  /* For SLP permutation support we need to load the whole group,
+not only the number of vector stmts the permutation result
+fits in.  */
+  if (slp_perm)
{
- /* For SLP permutation support we need to load the whole group,
-not only the number of vector stmts the permutation result
-fits in.  */
- if (slp_perm)
-   {
- /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
-variable VF.  */
- unsigned int const_vf = vf.to_constant ();
- ncopies = CEIL (group_size * const_vf, const_nunits);
- dr_chain.create (ncopies);
-   }
- else
-   ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
+variable VF.  */
+ unsigned int const_vf = vf.to_constant ();
+ ncopies = CEIL (group_size * const_vf, const_nunits);
+ dr_chain.create (ncopies);
}
+  else
+   ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+
   unsigned int group_el = 0;
   unsigned HOST_WIDE_INT
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
@@ -10883,14 +10880,13 @@ vectorizable_load (vec_info *vinfo,
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp);
 
  group_el += lnel;
- if (0
- || group_el == group_size)
+ if (group_el == group_size)
{
  n_groups++;
  /* When doing SLP make sure to not load elements from
 the next vector iteration, those will not be accessed
 so just use the last element again.  See PR107451.  */
- if (0 || known_lt (n_groups, vf))
+ if (known_lt (n_groups, vf))
{
  tree newoff = copy_ssa_name (running_off);
  gimple *incr
@@ -10938,19 +10934,10 @@ vectorizable_load (vec_info *vinfo,
 
  if (!costing_p)
{
- if (1)
-   {
- if (slp_perm)
-   dr_chain.quick_push (gimple_assign_lhs (new_stmt));
- else
-   slp_node->push_vec_def (new_stmt);
-   }
+ if (slp_perm)
+   dr_chain.quick_push (gimple_assign_lhs (new_stmt));
  else
-   {
- if (j == 0)
-   *vec_stmt = new_stmt;
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
-   }
+   slp_node->push_vec_def (new_stmt);
}
}
   if (slp_perm)
-- 
2.49.0



[PATCH v2 3/3] vect: Use strided loads for VMAT_STRIDED_SLP.

2025-05-27 Thread Robin Dapp
From: Robin Dapp 

This patch enables strided loads for VMAT_STRIDED_SLP.  Instead of
building vectors from scalars or other vectors we can use strided loads
directly when applicable.

The current implementation limits strided loads to cases where we can
load entire groups and not subsets of them.  A future improvement would
be to e.g. load a group of three uint8_t

  g0 g1  g2, g0 + stride g1 + stride g2 + stride, ...

by

  vlse16 vlse8

and permute those into place (after re-interpreting as vector of
uint8_t).

For satd_8x4 in particular we can do even better by eliding the strided
SLP load permutations, essentially turning

  vlse64 v0, (a0)
  vlse64 v1, (a1)
  VEC_PERM_EXPR ;
  VEC_PERM_EXPR ;

into

  vlse32 v0, (a0)
  vlse32 v1, (a1)
  vlse32 v0, 4(a0)
  vlse32 v1, 4(a1)

but that is going to be a follow up.

Bootstrapped and regtested on x86, aarch64, and power10.
Regtested on rv64gcv_zvl512b.  I'm seeing one additional failure in
gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c
where we use a larger LMUL than we should but IMHO this can wait.

PR target/118109

gcc/ChangeLog:

* internal-fn.cc (internal_strided_fn_supported_p): New
function.
* internal-fn.h (internal_strided_fn_supported_p): Declare.
* tree-vect-stmts.cc (vect_supportable_strided_type): New
function.
(vectorizable_load): Add strided-load support for strided
groups.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118019-2.c: New test.
---
 gcc/internal-fn.cc|  21 ++
 gcc/internal-fn.h |   2 +
 .../gcc.target/riscv/rvv/autovec/pr118019-2.c |  51 +
 gcc/tree-vect-stmts.cc| 196 +++---
 4 files changed, 243 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 6b04443f7cd..aec90ef87cc 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5203,6 +5203,27 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, 
tree vector_type,
   return ok;
 }
 
+/* Return true if the target supports a strided load/store function IFN
+   with VECTOR_TYPE.  If supported and ELSVALS is nonzero the supported else
+   values will be added to the vector ELSVALS points to.  */
+
+bool
+internal_strided_fn_supported_p (internal_fn ifn, tree vector_type,
+vec *elsvals)
+{
+  machine_mode mode = TYPE_MODE (vector_type);
+  optab optab = direct_internal_fn_optab (ifn);
+  insn_code icode = direct_optab_handler (optab, mode);
+
+  bool ok = icode != CODE_FOR_nothing;
+
+  if (ok && elsvals)
+get_supported_else_vals
+  (icode, internal_fn_else_index (IFN_MASK_LEN_STRIDED_LOAD), *elsvals);
+
+  return ok;
+}
+
 /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
for pointers of type TYPE when the accesses have LENGTH bytes and their
common byte alignment is ALIGN.  */
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index afd4f8e64c7..7d386246a42 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -242,6 +242,8 @@ extern int internal_fn_stored_value_index (internal_fn);
 extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
tree, tree, int,
vec * = nullptr);
+extern bool internal_strided_fn_supported_p (internal_fn ifn, tree vector_type,
+vec *elsvals);
 extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
poly_uint64, unsigned int);
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c
new file mode 100644
index 000..9918d4d7f52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019-2.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl512b -mabi=lp64d 
-mno-vector-strict-align" } */
+
+/* Ensure we use strided loads.  */
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3)  
\
+  {
\
+int t0 = s0 + s1;  
\
+int t1 = s0 - s1;  
\
+int t2 = s2 + s3;  
\
+int t3 = s2 - s3;  
\
+d0 = t0 + t2;  
\
+d2 = t0 - t2;  
\
+d1 = t1 + t3;   

[PATCH] libstdc++: Fix flat_map::operator[] for const lvalue keys [PR120432]

2025-05-27 Thread Patrick Palka
Tested on x86_64-pc-linux-gnu, does this look OK for trunk/15?

The 'volatile' issue from that PR Will be fixed in a separate patch as
operator[] isn't the only operation that's affected.

-- >8 --

The const lvalue operator[] overload wasn't properly forwarding the key
type to the generic overload.

PR libstdc++/120432

libstdc++-v3/ChangeLog:

* include/std/flat_map (_Flat_map_base::operator[]): Correct
forwarding from the const lvalue key overload.
* testsuite/23_containers/flat_map/1.cc (test08): New test.
* testsuite/23_containers/flat_multimap/1.cc (test08): New test.
---
 libstdc++-v3/include/std/flat_map  |  2 +-
 libstdc++-v3/testsuite/23_containers/flat_map/1.cc | 10 ++
 .../testsuite/23_containers/flat_multimap/1.cc | 10 ++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/flat_map 
b/libstdc++-v3/include/std/flat_map
index 6593988d213c..4d9ced1e8191 100644
--- a/libstdc++-v3/include/std/flat_map
+++ b/libstdc++-v3/include/std/flat_map
@@ -1142,7 +1142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // element access
   mapped_type&
   operator[](const key_type& __x)
-  { return operator[](__x); }
+  { return operator[](__x); }
 
   mapped_type&
   operator[](key_type&& __x)
diff --git a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc 
b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
index a9690208b09f..2af516410279 100644
--- a/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
+++ b/libstdc++-v3/testsuite/23_containers/flat_map/1.cc
@@ -253,6 +253,15 @@ test07()
   VERIFY( std::ranges::equal(m, (std::pair[]){{3,4}}) );
 }
 
+void
+test08()
+{
+  // PR libstdc++/120432 - flat_map operator[] is broken for const lvalue keys
+  std::flat_map m;
+  const int k = 42;
+  m[k] = 0;
+}
+
 int
 main()
 {
@@ -266,4 +275,5 @@ main()
   test05();
   test06();
   test07();
+  test08();
 }
diff --git a/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc 
b/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc
index 1c5c9a88ab6b..638b269011d4 100644
--- a/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc
+++ b/libstdc++-v3/testsuite/23_containers/flat_multimap/1.cc
@@ -231,6 +231,15 @@ test07()
   VERIFY( std::ranges::equal(m, (std::pair[]){{3,4},{3,3}}) );
 }
 
+void
+test08()
+{
+  // PR libstdc++/120432 - flat_map operator[] is broken for const lvalue keys
+  std::flat_multimap m;
+  const int k = 42;
+  m[k] = 0;
+}
+
 int
 main()
 {
@@ -244,4 +253,5 @@ main()
   test05();
   test06();
   test07();
+  test08();
 }
-- 
2.49.0.654.g845c48a16a



Re: [PATCH] fortran: add constant input support for trig functions with half-revolutions

2025-05-27 Thread Steve Kargl
On Tue, May 27, 2025 at 02:17:46PM +, Yuao Ma wrote:
> 
> I've reverted the recent format changes, as three reviewers indicated they
> caused more harm than good.
> 

Thank you.

> Are there any functional problems I need to address?

I did not see any additional functional issues.  Patch is
OK with me.

-- 
Steve


[PATCH] testsuite: Add tls_link effective target

2025-05-27 Thread Christophe Lyon
Some tests have 'dg-do link' but currently require 'tls' which is a
compile-only check.

In some configurations of arm-none-eabi, the 'tls' effective-target
can be successful although these tests fail to link with
undefined reference to `__aeabi_read_tp'

This patch as a new tls_link effective target which makes sure we can
build an executable.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp (check_effective_target_tls_link): New.
* g++.dg/tls/pr102496-1.C: Require tls_link.
* g++.dg/tls/pr77285-1.C: Likewise.

gcc/ChangeLog:

* doc/sourcebuild.texi (tls_link): Add documentation.
---
 gcc/doc/sourcebuild.texi  |  3 +++
 gcc/testsuite/g++.dg/tls/pr102496-1.C |  2 +-
 gcc/testsuite/g++.dg/tls/pr77285-1.C  |  2 +-
 gcc/testsuite/lib/target-supports.exp | 10 ++
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 91fadc6ed01..8038e1b6af2 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1975,6 +1975,9 @@ at plain @option{-O2}.
 @item tls
 Target supports thread-local storage.
 
+@item tls_link
+Target supports linking TLS executables.
+
 @item tls_native
 Target supports native (rather than emulated) thread-local storage.
 
diff --git a/gcc/testsuite/g++.dg/tls/pr102496-1.C 
b/gcc/testsuite/g++.dg/tls/pr102496-1.C
index 8220e1e663a..e015ae95eab 100644
--- a/gcc/testsuite/g++.dg/tls/pr102496-1.C
+++ b/gcc/testsuite/g++.dg/tls/pr102496-1.C
@@ -1,6 +1,6 @@
 // PR c++/102496
 // { dg-do link { target c++11 } }
-// { dg-require-effective-target tls }
+// { dg-require-effective-target tls_link }
 // { dg-add-options tls }
 // { dg-additional-sources pr102496-2.C }
 
diff --git a/gcc/testsuite/g++.dg/tls/pr77285-1.C 
b/gcc/testsuite/g++.dg/tls/pr77285-1.C
index 7a9341429d7..340c88be77b 100644
--- a/gcc/testsuite/g++.dg/tls/pr77285-1.C
+++ b/gcc/testsuite/g++.dg/tls/pr77285-1.C
@@ -1,5 +1,5 @@
 // { dg-do link { target c++11 } }
-// { dg-require-effective-target tls }
+// { dg-require-effective-target tls_link }
 // { dg-add-options tls }
 // { dg-additional-sources pr77285-2.C }
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 6286e361fed..245943b7c3f 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1092,6 +1092,16 @@ proc check_effective_target_tls {} {
 }]
 }
 
+# Return 1 if we can link using TLS, 0 otherwise.
+
+proc check_effective_target_tls_link {} {
+return [check_no_compiler_messages tls_link executable {
+   __thread int i;
+   int main (void) { return i; }
+   void g (int j) { i = j; }
+}]
+}
+
 # Return 1 if *native* thread local storage (TLS) is supported, 0 otherwise.
 
 proc check_effective_target_tls_native {} {
-- 
2.34.1



Re: [PATCH,LRA] Do inheritance transformations for any optimization [PR118591]

2025-05-27 Thread Georg-Johann Lay

Am 25.04.25 um 16:37 schrieb Vladimir Makarov:


On 4/19/25 3:29 PM, Denis Chertykov wrote:

Bugfix for PR118591

[...]

It is difficult for me to understand AVR code but I think the reason for 
the bug is in something else.  And the fix should be different.


Hi Vladimir,

let me try to explain the bug.  It occurs with avr-gcc -Os -mlra for
the following C test case:

__attribute__((noipa))
void func2 (long long a1, long long a2, long b)
{
  static unsigned char count = 0;
  if (b != count++)
__builtin_abort ();
}

int main (void)
{
  for (long b = 0; b < 5; ++b)
{
  __asm ("/* some reg pressure */" ::: "r5", "r9");
  func2 (0, 0, b);
}

  return 0;
}

The bug is in main.  Due to the high register pressure, b lives in the
frame (or is spilled to a frame location).  Since the stack pointer (SP)
cannot access the stack (except for PUSH / POP), a frame pointer has to
be set up.  FP is reg Y = r29:r28 which is initialized as FP = SP in
the prologue.
in r28,__SP_L__  ;  FP = Y = r29:r28 := SP  *movhi/7
in r29,__SP_H__

According to the ABI, b has to be passed on the stack, so the code must
read from the frame and push the 4 bytes of b.  The generated code to
read and push b is this (-mlra -Os):
ldd r24,Y+4  ;  63  [c=4 l=1]  movqi_insn/3
push r24 ;  9   [c=4 l=1]  pushqi1/0
ldd r24,Y+4  ;  64  [c=4 l=1]  movqi_insn/3
push r24 ;  11  [c=4 l=1]  pushqi1/0
ldd r24,Y+4  ;  65  [c=4 l=1]  movqi_insn/3
push r24 ;  13  [c=4 l=1]  pushqi1/0
ldd r24,Y+4  ;  66  [c=4 l=1]  movqi_insn/3
push r24 ;  15  [c=4 l=1]  pushqi1/0

So the code is reading 4 times from the *same* location.

LRA misses that the PUSH changes SP but not FP.  They are
different registers, and changing SP does not change FP
magically.  Hence the elimination offset between FP
and SP is no more 0.  B lives in frame at Y+1...Y+4.

For reference, here is the code from Reload (-mno-lra -Os):
ldd r24,Y+4  ;  62  [c=4 l=1]  movqi_insn/3
push r24 ;  9   [c=4 l=1]  pushqi1/0
ldd r25,Y+3  ;  63  [c=4 l=1]  movqi_insn/3
push r25 ;  11  [c=4 l=1]  pushqi1/0
ldd r26,Y+2  ;  64  [c=4 l=1]  movqi_insn/3
push r26 ;  13  [c=4 l=1]  pushqi1/0
ldd r27,Y+1  ;  65  [c=4 l=1]  movqi_insn/3
push r27 ;  15  [c=4 l=1]  pushqi1/0

As it seems, lra-eliminations.cc is missing some
setup_can_eliminate (*, false), or does some incorrect
setup_can_eliminate (*, true).

As far as I know, this is the last bug that occurs with AVR+LRA.
When it is fixed, I think we can pull the LRA switch for AVR.

Johann


  1   2   >