[gcc r16-400] tree-optimization/120031 - CTZ pattern matching fails a case

2025-05-06 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:5e363ffefaceb9ff8fdeeead247c8337538e01db

commit r16-400-g5e363ffefaceb9ff8fdeeead247c8337538e01db
Author: Richard Biener 
Date:   Tue May 6 08:36:01 2025 +0200

tree-optimization/120031 - CTZ pattern matching fails a case

This PR is about the pattern matching in tree-ssa-forwprop.cc not
working for the fallback implementation in ZSTD which uses a cast
aroud the negation of the value to be tested.  There's a pattern
eliding casts in (T')-(T)x already but that only covered an
inner widening conversion.  The following extends this to other
conversions given the negation will then be carried out in an
unsigned type.

PR tree-optimization/120031
* match.pd ((nop_outer_cast)-(inner_cast)var -> -(outer_cast)(var)):
Allow inner conversions that are not widenings when the outer
type is unsigned.

* gcc.target/i386/pr120031.c: New testcase.

Diff:
---
 gcc/match.pd |  9 +
 gcc/testsuite/gcc.target/i386/pr120031.c | 15 +++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index a150de5184f6..ab496d923cc0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1123,9 +1123,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 #endif
 
 /* (nop_outer_cast)-(inner_cast)var -> -(outer_cast)(var)
-   if var is smaller in precision.
-   This is always safe for both doing the negative in signed or unsigned
-   as the value for undefined will not show up.
+   If var is smaller in precision this is always safe for both doing
+   the negative in signed or unsigned as the value for undefined will not
+   show up.  Else it is safe if the negation is done in an unsigned type.
Note the outer cast cannot be a boolean type as the only valid values
are 0,-1/1 (depending on the signedness of the boolean) and the negative
is there to get the correct value.  */
@@ -1133,7 +1133,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (convert (negate:s@1 (convert:s @0)))
  (if (INTEGRAL_TYPE_P (type)
   && tree_nop_conversion_p (type, TREE_TYPE (@1))
-  && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+  && (TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ || TYPE_UNSIGNED (type))
   && TREE_CODE (type) != BOOLEAN_TYPE)
 (negate (convert @0
 
diff --git a/gcc/testsuite/gcc.target/i386/pr120031.c 
b/gcc/testsuite/gcc.target/i386/pr120031.c
new file mode 100644
index ..e329cbc2f543
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120031.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi" } */
+
+unsigned int
+ZSTD_countTrailingZeros32_fallback (unsigned int val)
+{
+  static const unsigned int DeBruijn[32]
+= { 0, 1, 28, 2, 29, 14, 24, 3,
+   30, 22, 20, 15, 25, 17, 4, 8,
+   31, 27, 13, 23, 21, 19, 16, 7,
+   26, 12, 18, 6, 11, 5, 10, 9};
+  return DeBruijn[((unsigned int) ((val & -(int) val) * 0x077CB531U)) >> 27];
+}
+
+/* { dg-final { scan-assembler "tzcnt" } } */


[gcc r16-398] Implement Windows TLS

2025-05-06 Thread Jonathan Yong via Gcc-cvs
https://gcc.gnu.org/g:0aea633e146b75016eb0cf1c0bf67050d32bf8b6

commit r16-398-g0aea633e146b75016eb0cf1c0bf67050d32bf8b6
Author: Julian Waters 
Date:   Fri May 2 09:59:13 2025 +

Implement Windows TLS

This patch implements native Thread Local Storage access on Windows, as 
motivated by
PR80881. Currently, Thread Local Storage accesses on Windows relies on 
emulation, which
is detrimental to performance in certain applications, notably the Python 
Interpreter
and the gcc port of the Java Virtual Machine. This patch was heavily 
inspired by Daniel
Green's original work on native Windows Thread Local Storage from over a 
decade ago, which
can be found at 
https://github.com/venix1/MinGW-GDC/blob/master/patches/mingw-tls-gcc-4.8.patch
as a reference.

Co-authored-by: Eric Botcazou 
Co-authored-by: Uroš Bizjak 
Co-authored-by: Liu Hao 
Signed-off-by: Julian Waters 
Signed-off-by: Jonathan Yong <10wa...@gmail.com>

gcc/ChangeLog:

* config/i386/i386.cc (ix86_legitimate_constant_p): Handle new 
UNSPEC.
(legitimate_pic_operand_p): Handle new UNSPEC.
(legitimate_pic_address_disp_p): Handle new UNSPEC.
(ix86_legitimate_address_p): Handle new UNSPEC.
(ix86_tls_index_symbol): New symbol for _tls_index.
(ix86_tls_index): Handle creation of _tls_index symbol.
(legitimize_tls_address): Create thread local access sequence.
(output_pic_addr_const): Handle new UNSPEC.
(i386_output_dwarf_dtprel): Handle new UNSPEC.
(i386_asm_output_addr_const_extra): Handle new UNSPEC.
* config/i386/i386.h (TARGET_WIN32_TLS): Define.
* config/i386/i386.md: New UNSPEC.
* config/i386/predicates.md: Handle new UNSPEC.
* config/mingw/mingw32.h (TARGET_WIN32_TLS): Define.
(TARGET_ASM_SELECT_SECTION): Define.
(DEFAULT_TLS_SEG_REG): Define.
* config/mingw/winnt.cc (mingw_pe_select_section): Select proper 
TLS section.
(mingw_pe_unique_section): Handle TLS section.
* config/mingw/winnt.h (mingw_pe_select_section): Declare.
* configure: Regenerate.
* configure.ac: New check for broken linker thread local support

Diff:
---
 gcc/config/i386/i386.cc   | 61 ++-
 gcc/config/i386/i386.h|  1 +
 gcc/config/i386/i386.md   |  1 +
 gcc/config/i386/predicates.md |  1 +
 gcc/config/mingw/mingw32.h|  9 +++
 gcc/config/mingw/winnt.cc | 14 ++
 gcc/config/mingw/winnt.h  |  1 +
 gcc/configure | 29 
 gcc/configure.ac  | 29 
 9 files changed, 145 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index df3e3efdfd02..f28c92a9d3aa 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -11363,6 +11363,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
x = XVECEXP (x, 0, 0);
return (GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+ case UNSPEC_SECREL32:
+   x = XVECEXP (x, 0, 0);
+   return GET_CODE (x) == SYMBOL_REF;
  default:
return false;
  }
@@ -11499,6 +11502,9 @@ legitimate_pic_operand_p (rtx x)
x = XVECEXP (inner, 0, 0);
return (GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_SECREL32:
+   x = XVECEXP (inner, 0, 0);
+   return GET_CODE (x) == SYMBOL_REF;
  case UNSPEC_MACHOPIC_OFFSET:
return legitimate_pic_address_disp_p (x);
  default:
@@ -11679,6 +11685,9 @@ legitimate_pic_address_disp_p (rtx disp)
   disp = XVECEXP (disp, 0, 0);
   return (GET_CODE (disp) == SYMBOL_REF
  && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+case UNSPEC_SECREL32:
+  disp = XVECEXP (disp, 0, 0);
+  return GET_CODE (disp) == SYMBOL_REF;
 }
 
   return false;
@@ -11956,6 +11965,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool 
strict,
  case UNSPEC_INDNTPOFF:
  case UNSPEC_NTPOFF:
  case UNSPEC_DTPOFF:
+ case UNSPEC_SECREL32:
break;
 
  default:
@@ -11981,7 +11991,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool 
strict,
  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
- && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
 

[gcc r16-399] Allow a PCH to be mapped to a different address

2025-05-06 Thread Jonathan Yong via Gcc-cvs
https://gcc.gnu.org/g:16e301adf7b653c72d5c3cae1c7287877f3f20ff

commit r16-399-g16e301adf7b653c72d5c3cae1c7287877f3f20ff
Author: LIU Hao 
Date:   Wed May 11 22:42:53 2022 +0800

Allow a PCH to be mapped to a different address

First, try mapping the PCH to its original address. If that fails, try
letting the system choose one; the PCH can be relocated thereafter.

Reference: https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594556.html

2022-05-11  LIU Hao 
Signed-off-by: Jonathan Yong <10wa...@gmail.com>

PR pch/14940

gcc/ChangeLog:
* config/i386/host-mingw32.cc (mingw32_gt_pch_use_address):
Replace the loop that attempted to map the PCH only to its
original address with more adaptive operations

Diff:
---
 gcc/config/i386/host-mingw32.cc | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc
index e083f49f3da7..87804a5bb755 100644
--- a/gcc/config/i386/host-mingw32.cc
+++ b/gcc/config/i386/host-mingw32.cc
@@ -135,7 +135,6 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int 
fd,
  and earlier, backslashes are invalid in object name.  So, we need
  to check if we are on Windows2000 or higher.  */
   OSVERSIONINFO version_info;
-  int r;
 
   version_info.dwOSVersionInfoSize = sizeof (version_info);
 
@@ -169,25 +168,24 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int 
fd,
   return -1;
 }
 
-  /* Retry five times, as here might occure a race with multiple gcc's
- instances at same time.  */
-  for (r = 0; r < 5; r++)
-   {
-  mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
-  size, addr);
-  if (mmap_addr == addr)
-   break;
-  if (r != 4)
-Sleep (500);
-   }
-
-  if (mmap_addr != addr)
+  /* Try mapping the file at `addr`.  */
+  mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+  size, addr);
+  if (mmap_addr == NULL)
 {
-  w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
-  CloseHandle(mmap_handle);
-  return  -1;
+  /* We could not map the file at its original address, so let the
+system choose a different one. The PCH can be relocated later.  */
+  mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+  size, NULL);
+  if (mmap_addr == NULL)
+   {
+ w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+ CloseHandle(mmap_handle);
+ return  -1;
+   }
 }
 
+  addr = mmap_addr;
   return 1;
 }


[gcc r16-401] Fix i386 bootstrap on non-Windows platforms

2025-05-06 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:673d446894c063c92cafce9ba41340c82e960a11

commit r16-401-g673d446894c063c92cafce9ba41340c82e960a11
Author: Jan Hubicka 
Date:   Tue May 6 12:07:15 2025 +0200

Fix i386 bootstrap on non-Windows platforms

* config/i386/i386.cc (ix86_tls_index): Add ifdef.

Diff:
---
 gcc/config/i386/i386.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f28c92a9d3aa..89f518c86b5e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -12320,6 +12320,7 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg)
 
 static GTY(()) rtx ix86_tls_index_symbol;
 
+#if TARGET_WIN32_TLS
 static rtx
 ix86_tls_index (void)
 {
@@ -12331,6 +12332,7 @@ ix86_tls_index (void)
   else
 return ix86_tls_index_symbol;
 }
+#endif
 
 /* Construct the SYMBOL_REF for the tls_get_addr function.  */


[gcc r16-402] gimple-fold: Fix fold_truth_andor_for_ifcombine [PR120074]

2025-05-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:81475602c3dd57ff6987e5f902814e8e3a0a0dde

commit r16-402-g81475602c3dd57ff6987e5f902814e8e3a0a0dde
Author: Jakub Jelinek 
Date:   Tue May 6 13:00:10 2025 +0200

gimple-fold: Fix fold_truth_andor_for_ifcombine [PR120074]

The following testcase ICEs because of a mismatch between wide_int
precision, in particular lr_and_mask has 32-bit precision while sign has
16-bit.

decode_field_reference ensures that {ll,lr,rl,rr}_and_mask has
{ll,lr,rl,rr}_bitsize precision, so the
ll_and_mask |= sign;
and
rl_and_mask |= sign;
and
ll_and_mask &= sign;
and
rl_and_mask &= sign;
cases should work right, sign has in those cases {ll,rl}_bitsize
precision.  The problem is that nothing until much later guarantees
that ll_bitsize == lr_bitsize or rl_bitsize == rr_bitsize.
In the testcase there is
((b ^ a) & 3) < 0
where a is 16-bit and b is 32-bit, so it is the lsignbit handling,
and because of the xor the xor operand is moved to the *r_and_mask, so
with ll_and_mask being 16-bit 3 and lr_and_mask being 32-bit 3.

Now, either b in the above case would be INTEGER_CST, in that case
if rr_arg was also INTEGER_CST we'd use the l_const && r_const case
and try to handle it, or we'd run into (though much later)
  if (ll_bitsize != lr_bitsize || rl_bitsize != rr_bitsize
...
return 0;

One possibility is dealing with a different precision using wide_int::from.

Another option used in this patch as it is safest is
+ if (ll_bitsize != lr_bitsize)
+   return 0;
  if (!lr_and_mask.get_precision ())
lr_and_mask = sign;
  else
lr_and_mask &= sign;
and similarly in the other hunk, i.e. punt if there is a mismatch
early.

And yet another option would be to compute
the sign
  wide_int sign = wi::mask (ll_bitsize - 1, true, ll_bitsize);
  /* If ll_arg is zero-extended and we're testing the sign bit, we know
 what the result should be.  Shifting the sign bit out of sign will 
get
 us to mask the entire field out, yielding zero, i.e., the sign bit 
of
 the zero-extended value.  We know the masked value is being 
compared
 with zero, so the compare will get us the result we're looking
 for: TRUE if EQ_EXPR, FALSE if NE_EXPR.  */
  if (lsignbit > ll_bitsize && ll_unsignedp)
sign <<= 1;
once again for the lr_and_mask and rr_and_mask cases using rl_bitsize.

As we just return 0; anyway unless l_const && r_const, if l_const & r_const
are false it doesn't really matter what is chosen, but for the const
cases it matters and I'm not sure what is right.  So the second option
might be safest.

2025-05-06  Jakub Jelinek  

PR tree-optimization/120074
* gimple-fold.cc (fold_truth_andor_for_ifcombine): For
lsignbit && l_xor case, punt if ll_bitsize != lr_bitsize.  Similarly
for rsignbit && r_xor case, punt if rl_bitsize != rr_bitsize.
Formatting fix.

* gcc.dg/pr120074.c: New test.

Diff:
---
 gcc/gimple-fold.cc  |  6 +-
 gcc/testsuite/gcc.dg/pr120074.c | 20 
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 94d5a1ebbd75..5884b79cce5a 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8334,6 +8334,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
ll_and_mask &= sign;
   if (l_xor)
{
+ if (ll_bitsize != lr_bitsize)
+   return 0;
  if (!lr_and_mask.get_precision ())
lr_and_mask = sign;
  else
@@ -8355,6 +8357,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
rl_and_mask &= sign;
   if (r_xor)
{
+ if (rl_bitsize != rr_bitsize)
+   return 0;
  if (!rr_and_mask.get_precision ())
rr_and_mask = sign;
  else
@@ -8762,7 +8766,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   wide_int lr_mask, rr_mask;
   if (lr_and_mask.get_precision ())
lr_mask = wi::lshift (wide_int::from (lr_and_mask, rnprec, UNSIGNED),
- xlr_bitpos);
+ xlr_bitpos);
   else
lr_mask = wi::shifted_mask (xlr_bitpos, lr_bitsize, false, rnprec);
   if (rr_and_mask.get_precision ())
diff --git a/gcc/testsuite/gcc.dg/pr120074.c b/gcc/testsuite/gcc.dg/pr120074.c
new file mode 100644
index ..3f31516fa56a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr120074.c
@@ -0,0 +1,20 @@
+/* PR tree-optimization/120074 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fno-tree-copy-prop -fno-tree-forwprop -fno-tree-ccp" }

[gcc r15-9625] gimple-fold: Fix fold_truth_andor_for_ifcombine [PR120074]

2025-05-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:a14d65f81e18e70144ceddfc3142a8103984919d

commit r15-9625-ga14d65f81e18e70144ceddfc3142a8103984919d
Author: Jakub Jelinek 
Date:   Tue May 6 13:00:10 2025 +0200

gimple-fold: Fix fold_truth_andor_for_ifcombine [PR120074]

The following testcase ICEs because of a mismatch between wide_int
precision, in particular lr_and_mask has 32-bit precision while sign has
16-bit.

decode_field_reference ensures that {ll,lr,rl,rr}_and_mask has
{ll,lr,rl,rr}_bitsize precision, so the
ll_and_mask |= sign;
and
rl_and_mask |= sign;
and
ll_and_mask &= sign;
and
rl_and_mask &= sign;
cases should work right, sign has in those cases {ll,rl}_bitsize
precision.  The problem is that nothing until much later guarantees
that ll_bitsize == lr_bitsize or rl_bitsize == rr_bitsize.
In the testcase there is
((b ^ a) & 3) < 0
where a is 16-bit and b is 32-bit, so it is the lsignbit handling,
and because of the xor the xor operand is moved to the *r_and_mask, so
with ll_and_mask being 16-bit 3 and lr_and_mask being 32-bit 3.

Now, either b in the above case would be INTEGER_CST, in that case
if rr_arg was also INTEGER_CST we'd use the l_const && r_const case
and try to handle it, or we'd run into (though much later)
  if (ll_bitsize != lr_bitsize || rl_bitsize != rr_bitsize
...
return 0;

One possibility is dealing with a different precision using wide_int::from.

Another option used in this patch as it is safest is
+ if (ll_bitsize != lr_bitsize)
+   return 0;
  if (!lr_and_mask.get_precision ())
lr_and_mask = sign;
  else
lr_and_mask &= sign;
and similarly in the other hunk, i.e. punt if there is a mismatch
early.

And yet another option would be to compute
the sign
  wide_int sign = wi::mask (ll_bitsize - 1, true, ll_bitsize);
  /* If ll_arg is zero-extended and we're testing the sign bit, we know
 what the result should be.  Shifting the sign bit out of sign will 
get
 us to mask the entire field out, yielding zero, i.e., the sign bit 
of
 the zero-extended value.  We know the masked value is being 
compared
 with zero, so the compare will get us the result we're looking
 for: TRUE if EQ_EXPR, FALSE if NE_EXPR.  */
  if (lsignbit > ll_bitsize && ll_unsignedp)
sign <<= 1;
once again for the lr_and_mask and rr_and_mask cases using rl_bitsize.

As we just return 0; anyway unless l_const && r_const, if l_const & r_const
are false it doesn't really matter what is chosen, but for the const
cases it matters and I'm not sure what is right.  So the second option
might be safest.

2025-05-06  Jakub Jelinek  

PR tree-optimization/120074
* gimple-fold.cc (fold_truth_andor_for_ifcombine): For
lsignbit && l_xor case, punt if ll_bitsize != lr_bitsize.  Similarly
for rsignbit && r_xor case, punt if rl_bitsize != rr_bitsize.
Formatting fix.

* gcc.dg/pr120074.c: New test.

(cherry picked from commit 81475602c3dd57ff6987e5f902814e8e3a0a0dde)

Diff:
---
 gcc/gimple-fold.cc  |  6 +-
 gcc/testsuite/gcc.dg/pr120074.c | 20 
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index b64561396874..a64922aa907a 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8300,6 +8300,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
ll_and_mask &= sign;
   if (l_xor)
{
+ if (ll_bitsize != lr_bitsize)
+   return 0;
  if (!lr_and_mask.get_precision ())
lr_and_mask = sign;
  else
@@ -8321,6 +8323,8 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
rl_and_mask &= sign;
   if (r_xor)
{
+ if (rl_bitsize != rr_bitsize)
+   return 0;
  if (!rr_and_mask.get_precision ())
rr_and_mask = sign;
  else
@@ -8728,7 +8732,7 @@ fold_truth_andor_for_ifcombine (enum tree_code code, tree 
truth_type,
   wide_int lr_mask, rr_mask;
   if (lr_and_mask.get_precision ())
lr_mask = wi::lshift (wide_int::from (lr_and_mask, rnprec, UNSIGNED),
- xlr_bitpos);
+ xlr_bitpos);
   else
lr_mask = wi::shifted_mask (xlr_bitpos, lr_bitsize, false, rnprec);
   if (rr_and_mask.get_precision ())
diff --git a/gcc/testsuite/gcc.dg/pr120074.c b/gcc/testsuite/gcc.dg/pr120074.c
new file mode 100644
index ..3f31516fa56a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr120074.c
@@ -0,0 +1,20 @@
+/* PR tree-optimization/120074 */
+/* { dg-do compile } *

[gcc r16-403] tree-optimization/1157777 - STLF fails with BB vectorization of loop

2025-05-06 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:76c33109074b8e7cf6c326116b46792070122c7b

commit r16-403-g76c33109074b8e7cf6c326116b46792070122c7b
Author: Richard Biener 
Date:   Mon Mar 17 15:04:28 2025 +0100

tree-optimization/115 - STLF fails with BB vectorization of loop

The following tries to address us BB vectorizing a loop body that
swaps consecutive elements of an array like for bubble-sort.  This
causes the vector store in the previous iteration to fail to forward
to the vector load in the current iteration since there's a partial
overlap.

We try to detect this situation by looking for a load to store
data dependence and analyze this with respect to the containing loop
for a proven problematic access.  Currently the search for a
problematic pair is limited to loads and stores in the same SLP
instance which means the problematic load happens in the next
loop iteration and larger dependence distances are not considered.

On x86 with generic costing this avoids vectorizing the loop body,
but once you do core-specific tuning the saved cost for the vector
store vs. the scalar stores makes vectorization still profitable,
but at least the STLF issue is avoided.

For example on my Zen4 machine with -O2 -march=znver4 the testcase in
the PR is improving from
  insertion_sort  => 2327
to
  insertion_sort  =>  997
but plain -O2 (or -fno-tree-slp-vectorize) gives
  insertion_sort  =>  183
In the end a better target-side cost model for small vector
vectorization is needed to reject this vectorization from this side.

I'll note this is a machine independent heuristic (similar to the
avoid-store-forwarding RTL optimization pass), I expect that uarchs
implementing vectors will suffer from this kind of issue.  I know
some aarch64 uarchs can forward from upper/lower part stores, this
isn't considered at the moment.  The actual vector size/overlap
distance check could be moved to a target hook if it turns out
necessary.

There might be the chance to use a smaller vector size for the loads
avoiding the penalty rather than falling back to elementwise accesses,
that's not implemented either.

PR tree-optimization/115
* tree-vectorizer.h (_slp_tree::avoid_stlf_fail): New member.
* tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize it.
(vect_print_slp_tree): Dump it.
* tree-vect-data-refs.cc (vect_slp_analyze_instance_dependence):
For dataflow dependent loads of a store check whether there's
a cross-iteration data dependence that for sure prohibits
store-to-load forwarding and mark involved loads.
* tree-vect-stmts.cc (get_group_load_store_type): For 
avoid_stlf_fail
marked loads use VMAT_ELEMENTWISE.

* gcc.dg/vect/bb-slp-pr115777.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c | 15 +
 gcc/tree-vect-data-refs.cc  | 91 +
 gcc/tree-vect-slp.cc|  4 +-
 gcc/tree-vect-stmts.cc  |  8 +++
 gcc/tree-vectorizer.h   |  3 +
 5 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c
new file mode 100644
index ..bba0dc75f6fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+typedef unsigned int T;
+
+#define SWAP(A, B) do { T tmp = A; A = B; B = tmp; } while (0)
+
+void
+insertion_sort(T *v, int n)
+{
+  for (int i = 1; i < n; ++i)
+for (int k = i; k > 0 && v[k-1] > v[k]; --k)
+  SWAP(v[k-1], v[k]);
+}
+
+/* { dg-final { scan-tree-dump "using element-wise load" "slp1" { target { { 
x86_64-*-* i?86-*-* } && { ! ia32 } } } } } */
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index c9395e33fcdf..231a3cab4f80 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1203,6 +1203,97 @@ vect_slp_analyze_instance_dependence (vec_info *vinfo, 
slp_instance instance)
 for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
   gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
 
+  /* If this is a SLP instance with a store check if there's a dependent
+ load that cannot be forwarded from a previous iteration of a loop
+ both are in.  This is to avoid situations like that in PR115777.  */
+  if (res && store)
+{
+  stmt_vec_info store_info
+   = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (store)[0]);
+  class loop *store_loop = gimple_bb (store_info->stmt)->loop_father;
+  if (! loop_outer (store_loop))
+   return res;
+  vec loop_nest;
+  loop_nest.create (1);
+  loop_nest.quick_push (store_loop);
+  data_refer

[gcc r16-408] RISC-V: Add testcases for vec_duplicate + vadd.vv combine when GR2VR cost 0

2025-05-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:1276430a48e4f6bb592d9e3f8c92e62341f09446

commit r16-408-g1276430a48e4f6bb592d9e3f8c92e62341f09446
Author: Pan Li 
Date:   Sat May 3 10:40:20 2025 +0800

RISC-V: Add testcases for vec_duplicate + vadd.vv combine when GR2VR cost 0

Add asm dump check and run test for vec_duplicate + vadd.vv combine
to vadd.vx.  Introduce new folder to hold all related testcases.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Add new folder vx_vf for all
vec_dup + vv to vx testcases.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_run.h: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i8.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u8.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i8.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h |  17 +
 .../riscv/rvv/autovec/vx_vf/vx_binary_data.h   | 401 +
 .../riscv/rvv/autovec/vx_vf/vx_binary_run.h|  26 ++
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-i16.c|   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-i32.c|   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-i64.c|   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-i8.c |   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-u16.c|   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-u32.c|   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-u64.c|   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-1-u8.c |   8 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i16.c|  14 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i32.c|  14 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i64.c|  14 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i8.c |  14 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u16.c|  14 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u32.c|  14 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u64.c|  14 +
 .../riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u8.c |  14 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp |   2 +
 20 files changed, 622 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h
new file mode 100644
index ..66654eb90227
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h
@@ -0,0 +1,17 @@
+#ifndef HAVE_DEFINED_VX_VF_BINARY_H
+#define HAVE_DEFINED_VX_VF_BINARY_H
+
+#include 
+
+#define DEF_VX_BINARY(T, OP)\
+void\
+test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \
+{   \
+  for (unsigned i = 0; i < n; i++)  \
+out[i] = in[i] OP x;\
+}
+#define DEF_VX_BINARY_WRAP(T, OP) DEF_VX_BINARY(T, OP)
+#define RUN_VX_BINARY(out, in, x, n)  test_vx_binary(out, in, x, n)
+#define RUN_VX_BINARY_WRAP(out, in, x, n) RUN_VX_BINARY(out, in, x, n)
+
+#endif
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h
new file mode 100644
index ..11a32cbbf0fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h
@@ -0,0 +1,401 @@
+#ifndef HAVE_DEFINED_VX_BINARY_DATA_H
+#define HAVE_DEFINED_VX_BINARY_DATA_H
+
+#define N 16
+
+#define TEST_BINARY_DATA(T, NAME)  test_##T##_##NAME##_data
+#define TEST_BINARY_DATA_WRAP(T, NAME) TEST_BINARY_DATA(T, NAME)
+
+int8_t TES

[gcc r16-406] RISC-V: Add gr2vr cost helper function

2025-05-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:9e9eb78bf4b77a049be00fb9ab0047170f19c9ea

commit r16-406-g9e9eb78bf4b77a049be00fb9ab0047170f19c9ea
Author: Pan Li 
Date:   Tue May 6 16:42:16 2025 +0800

RISC-V: Add gr2vr cost helper function

After we introduced the --param=gpr2vr-cost option to set the cost
value of when operation act from gpr to vr, we would like to introduce
a new helper function to get the cost of gp2vr.  And then make sure
all reference to gr2vr should go this helper function.

The helper function will pick up the GR2VR value if the above option is
not provided, or the default GR2VR will be returned.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (get_gr2vr_cost): Add new decl to
get the cost of gr2vr.
* config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost):
Leverage the helper function to get the cost of gr2vr.
* config/riscv/riscv.cc (riscv_register_move_cost): Ditto.
(riscv_builtin_vectorization_cost): Ditto.
(get_gr2vr_cost): Add new impl of the helper function.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-protos.h|  1 +
 gcc/config/riscv/riscv-vector-costs.cc |  2 +-
 gcc/config/riscv/riscv.cc  | 19 +--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2e889903eb3a..b0d5bbb8570b 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -836,6 +836,7 @@ struct riscv_tune_info {
 const struct riscv_tune_info *
 riscv_parse_tune (const char *, bool);
 const cpu_vector_cost *get_vector_costs ();
+int get_gr2vr_cost ();
 
 enum
 {
diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 167375ca7516..c28eecd1110e 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1121,7 +1121,7 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, 
loop_vec_info loop,
 {
 case scalar_to_vec:
   stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
-   : costs->regmove->GR2VR);
+   : get_gr2vr_cost ());
   break;
 case vec_to_scalar:
   stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a0657323f65f..42d501a1291b 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -9690,7 +9690,7 @@ riscv_register_move_cost (machine_mode mode,
   if (to == V_REGS)
 {
   if (from_is_gpr)
-   return get_vector_costs ()->regmove->GR2VR;
+   return get_gr2vr_cost ();
   else if (from_is_fpr)
return get_vector_costs ()->regmove->FR2VR;
 }
@@ -12540,6 +12540,21 @@ get_vector_costs ()
   return costs;
 }
 
+/* Return the cost of operation that move from gpr to vr.
+   It will take the value of --param=gpr2vr_cost if it is provided.
+   Or the default regmove->GR2VR will be returned.  */
+
+int
+get_gr2vr_cost ()
+{
+  int cost = get_vector_costs ()->regmove->GR2VR;
+
+  if (gpr2vr_cost != GPR2VR_COST_UNPROVIDED)
+cost = gpr2vr_cost;
+
+  return cost;
+}
+
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 
 static int
@@ -12606,7 +12621,7 @@ riscv_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
{
  /* TODO: This is too pessimistic in case we can splat.  */
  int regmove_cost = fp ? costs->regmove->FR2VR
-   : costs->regmove->GR2VR;
+   : get_gr2vr_cost ();
  return (regmove_cost + common_costs->scalar_to_vec_cost)
* estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
}


[gcc r16-410] RISC-V: Add testcases for vec_duplicate + vadd.vv combine when GR2VR cost 15

2025-05-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b3a32804bebb9520b327b7cbf3e8f8b4730f9bd6

commit r16-410-gb3a32804bebb9520b327b7cbf3e8f8b4730f9bd6
Author: Pan Li 
Date:   Sat May 3 11:37:09 2025 +0800

RISC-V: Add testcases for vec_duplicate + vadd.vv combine when GR2VR cost 15

Add asm dump check and for vec_duplicate + vadd.vv combine to vadd.vx.
The late-combine will not take action when GR2VR cost is 15.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i8.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i16.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i32.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i64.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i8.c  | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u16.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u32.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u64.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u8.c  | 8 
 8 files changed, 64 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i16.c
new file mode 100644
index ..f3a262711a4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i16.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=15" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int16_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i32.c
new file mode 100644
index ..490854cfbd7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i32.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=15" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int32_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i64.c
new file mode 100644
index ..a7448dfa56b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i64.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=15" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int64_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i8.c
new file mode 100644
index ..72c7cd803fac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i8.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=15" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int8_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u16.c
new file mode 100644
index ..552b4ed7c2fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u16.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=15" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(uint16_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u32.c
new file mode 100644
index ..e319672fc044
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u32.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=15" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(uint32_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u64.c 
b/gcc

[gcc r16-405] RISC-V: Add new option --param=gpr2vr-cost= for rvv insn

2025-05-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:17c1602d5e7b237357b94808399a68ab77d42640

commit r16-405-g17c1602d5e7b237357b94808399a68ab77d42640
Author: Pan Li 
Date:   Tue May 6 16:26:06 2025 +0800

RISC-V: Add new option --param=gpr2vr-cost= for rvv insn

During investigate the combine from vec_dup and vop.vv into
vop.vx, we need to depend on the cost of the insn operate
from the gpr to vr, for example, vadd.vx.  Thus, for better
control and test, we introduce a new option, aka below:

--param=gpr2vr-cost=

To specific the cost value of the insn that operate from
the gpr to vr.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (RVV_GR2VR_COST_UNPROVIDED): Add
new macro to indicate the param is not provided.
* config/riscv/riscv.opt: Add new option --pararm=gpr2vr-cost.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/riscv-opts.h | 2 ++
 gcc/config/riscv/riscv.opt| 4 
 2 files changed, 6 insertions(+)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 26fe228e0f82..9766b89b2dff 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -162,4 +162,6 @@ enum riscv_tls_type {
 #define TARGET_VECTOR_AUTOVEC_SEGMENT \
   (TARGET_VECTOR && riscv_mautovec_segment)
 
+#define GPR2VR_COST_UNPROVIDED -1
+
 #endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7515c8ea13dd..710248099b3c 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -579,6 +579,10 @@ Inline strlen calls if possible.
 Target RejectNegative Joined UInteger Var(riscv_strcmp_inline_limit) Init(64)
 Max number of bytes to compare as part of inlined strcmp/strncmp routines 
(default: 64).
 
+-param=gpr2vr-cost=
+Target RejectNegative Joined UInteger Var(gpr2vr_cost) 
Init(GPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from GPR to VR.
+
 Enum
 Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum)
 The RVV possible LMUL (-mrvv-max-lmul=):


[gcc r16-409] RISC-V: Add testcases for vec_duplicate + vadd.vv combine when GR2VR cost 1

2025-05-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:c10491efc108bd4fc2d983bcabb50506d09e2a17

commit r16-409-gc10491efc108bd4fc2d983bcabb50506d09e2a17
Author: Pan Li 
Date:   Sat May 3 11:27:50 2025 +0800

RISC-V: Add testcases for vec_duplicate + vadd.vv combine when GR2VR cost 1

Add asm dump check and for vec_duplicate + vadd.vv combine to vadd.vx.
The late-combine will not take action when GR2VR cost is 1.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i8.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u32.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u64.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i16.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i32.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i64.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i8.c  | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u16.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u32.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u64.c | 8 
 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u8.c  | 8 
 8 files changed, 64 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i16.c
new file mode 100644
index ..eb19938afe56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i16.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=1" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int16_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i32.c
new file mode 100644
index ..24182c51811e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i32.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=1" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int32_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i64.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i64.c
new file mode 100644
index ..b3d3d4b7d209
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i64.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=1" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int64_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i8.c
new file mode 100644
index ..fb353151b53c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i8.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=1" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(int8_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u16.c
new file mode 100644
index ..6ba265893f15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u16.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=1" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(uint16_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u32.c
new file mode 100644
index ..b60412cecfa1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u32.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=gpr2vr-cost=1" } */
+
+#include "vx_binary.h"
+
+DEF_VX_BINARY(uint32_t, +)
+
+/* { dg-final { scan-assembler-not {vadd.vx} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u64.c 
b/gcc/testsui

[gcc r16-418] libgcobol: Fix bootstrap for targets without program_invocation_short_name

2025-05-06 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:67e79da5a3c0deb93cd6df1557affb6994440357

commit r16-418-g67e79da5a3c0deb93cd6df1557affb6994440357
Author: Iain Sandoe 
Date:   Tue May 6 09:42:40 2025 +0100

libgcobol: Fix bootstrap for targets without program_invocation_short_name

program_invocation_short_name is not widely available, however getprogname()
appears to be a suitable replacement.

Amend the library configuration to look for both. Use 
program_invocation_short_name
in preference to getprogname() when it is available.  If neither is found 
fall
back to a constant string.

libgcobol/ChangeLog:

* config.h.in: Regenerate.
* configure: Regenerate.
* configure.ac: Check for program_invocation_short_name and
and getprogname().
* libgcobol.cc (default_exception_handler): When the platform
has program_invocation_short_name, use it otherwise fall
back to using getprogname() or a constant string (if neither
interface is available).

Signed-off-by: Iain Sandoe 

Diff:
---
 libgcobol/config.h.in  | 10 +++
 libgcobol/configure| 81 --
 libgcobol/configure.ac | 14 -
 libgcobol/libgcobol.cc | 15 --
 4 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/libgcobol/config.h.in b/libgcobol/config.h.in
index fdf5e3e7dc1c..ee3dd6b21514 100644
--- a/libgcobol/config.h.in
+++ b/libgcobol/config.h.in
@@ -6,9 +6,16 @@
 /* Define to 1 if you have the  header file. */
 #undef HAVE_COMPLEX_H
 
+/* Define to 1 if you have the declaration of `program_invocation_short_name',
+   and to 0 if you don't. */
+#undef HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
+
 /* Define to 1 if you have the  header file. */
 #undef HAVE_DLFCN_H
 
+/* Define to 1 if you have the  header file. */
+#undef HAVE_ERRNO_H
+
 /* Define to 1 if you have the  header file. */
 #undef HAVE_FENV_H
 
@@ -21,6 +28,9 @@
 /* Define to 1 if you have the  header file. */
 #undef HAVE_FPTRAP_H
 
+/* Define to 1 if you have the `getprogname' function. */
+#undef HAVE_GETPROGNAME
+
 /* Define if you have the iconv() function and it works. */
 #undef HAVE_ICONV
 
diff --git a/libgcobol/configure b/libgcobol/configure
index 6821591852af..06e7544822cb 100755
--- a/libgcobol/configure
+++ b/libgcobol/configure
@@ -2380,6 +2380,52 @@ $as_echo "$ac_res" >&6; }
 
 } # ac_fn_cxx_check_header_compile
 
+# ac_fn_cxx_check_decl LINENO SYMBOL VAR INCLUDES
+# ---
+# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR
+# accordingly.
+ac_fn_cxx_check_decl ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  as_decl_name=`echo $2|sed 's/ *(.*//'`
+  as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'`
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is 
declared" >&5
+$as_echo_n "checking whether $as_decl_name is declared... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main ()
+{
+#ifndef $as_decl_name
+#ifdef __cplusplus
+  (void) $as_decl_use;
+#else
+  (void) $as_decl_name;
+#endif
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_cxx_check_decl
+
 # ac_fn_cxx_check_func LINENO FUNC VAR
 # 
 # Tests whether FUNC exists, setting the cache variable VAR accordingly
@@ -2796,6 +2842,8 @@ as_fn_append ac_header_list " fenv.h"
 as_fn_append ac_header_list " fptrap.h"
 as_fn_append ac_header_list " complex.h"
 as_fn_append ac_header_list " stdlib.h"
+as_fn_append ac_header_list " errno.h"
+as_fn_append ac_func_list " getprogname"
 as_fn_append ac_func_list " random_r"
 as_fn_append ac_func_list " srandom_r"
 as_fn_append ac_func_list " initstate_r"
@@ -11750,7 +11798,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11753 "configure"
+#line 11801 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11856,7 +11904,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11859 "configure"
+#line 11907 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17016,7 +17064,29 @@ done
 
 
 
-# These are GLIBC
+
+
+# Look for a way to represent the program name
+# First, check the GLIBC case
+ac_fn_cxx_check_decl "$LINENO" "program_invocation_short_name" 
"ac_cv_have_decl_program_invocation_short_

[gcc r16-413] diagnostics: add logical_location_manager; reimplement logical_location

2025-05-06 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:8ab6899dce92e6ef8cc55096789fcda60d4129cb

commit r16-413-g8ab6899dce92e6ef8cc55096789fcda60d4129cb
Author: David Malcolm 
Date:   Tue May 6 09:26:17 2025 -0400

diagnostics: add logical_location_manager; reimplement logical_location

Previously we used an abstract base class logical_location with
concrete subclasses to separate the diagnostics subsystem from
implementation details of "tree" and of libgdiagnostics.

This approach required allocating implementation objects on the heap
whenever working with logical locations, and made comparing logical
locations awkward.

This patch reworks things so that the type "logical_location" becomes a
boxed pointer (const void *), and client code provides a single object
implementing a new logical_location_manager abstract base class.  The
manager class has responsibility for providing meaning to the boxed
pointers.  Within the compiler we use a manager in which they are "tree"
pointers, whereas within libgdiagnostics we use a manager in which they
are pointers to instances of libgdiagnostics'
"struct diagnostic_logical_location".  Other kinds of manager could be
implemented.

gcc/analyzer/ChangeLog:
* checker-event.cc (checker_event::checker_event): Update
initialization of m_logical_loc.
(checker_event::maybe_add_sarif_properties): Add "builder" param.
Replace call to make_sarif_logical_location_object with call to
sarif_property_bag::set_logical_location.
(superedge_event::maybe_add_sarif_properties): Add "builder"
param.
* checker-event.h (checker_event::get_logical_location):
Reimplement.
(checker_event::maybe_add_sarif_properties): Add "builder" param.
(checker_event::maybe_add_sarif_properties): Add "builder" param.
(checker_event::m_logical_loc): Convert from tree_logical_location
to logical_location.
(superedge_event::maybe_add_sarif_properties): Add sarif_builder
param.
* checker-path.h (checker_path::checker_path): Add logical_loc_mgr
param.
* diagnostic-manager.cc
(diagnostic_manager::emit_saved_diagnostic): Pass logical location
manager to emission_path ctor.
(diagnostic_manager::get_logical_location_manager): New.
* diagnostic-manager.h
(diagnostic_manager::get_logical_location_manager): New decl.

gcc/ChangeLog:
* diagnostic-client-data-hooks.h: Include "logical-location.h".
(diagnostic_client_data_hooks::get_logical_location_manager): New.
(diagnostic_client_data_hooks::get_current_logical_location):
Convert return type from const logical_location * to
logical_location.
* diagnostic-format-json.cc: Include
"diagnostic-client-data-hooks.h".
(make_json_for_path): Update to use logical_location_manager from
the context.
* diagnostic-format-sarif.cc
(sarif_builder::get_logical_location_manager): New.
(sarif_builder::make_location_object): Update type of logical_loc
from "const logical_location *" to "logical_location".
(sarif_builder::set_any_logical_locs_arr): Likewise.
(sarif_builder::m_logical_loc_mgr): New field.
(sarif_result::on_nested_diagnostic): Use logical_location default
ctor rather than nullptr.
(sarif_builder::sarif_builder): Initialize m_logical_loc_mgr from
context's client data hooks.
(sarif_builder::make_locations_arr): Convert type of logical_loc
from from "const logical_location *" to "logical_location".
(sarif_builder::set_any_logical_locs_arr): Likewise.  Pass manager
to make_sarif_logical_location_object.
(sarif_builder::make_location_object): Likewise.
(sarif_property_bag::set_logical_location): New.
(make_sarif_logical_location_object): Update for introduction of
logical_location_manager.
(populate_thread_flow_location_object): Pass builder to
ev.maybe_add_sarif_properties.
(selftest::test_make_location_object): Use logical_location
default ctor rather than nullptr.
* diagnostic-format-sarif.h (class logical_location): Replace
forward decl with include of "logical-location.h".
(class sarif_builder): New forward decl.
(sarif_property_bag::set_logical_location): New.
(make_sarif_logical_location_object): Add "mgr" param.
* diagnostic-path.cc
(diagnostic_path::get_first_event_in_a_function): Update for
change of logical_location type.
(per_thread_summary::per_thread_summary): Pass 

[gcc r16-414] sarif output: capture nesting of logical locations [PR116176]

2025-05-06 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:f25e178b8c2cc868168b0a29ab03260fbefa4ff7

commit r16-414-gf25e178b8c2cc868168b0a29ab03260fbefa4ff7
Author: David Malcolm 
Date:   Tue May 6 09:26:18 2025 -0400

sarif output: capture nesting of logical locations [PR116176]

Previously our SARIF output did not capture nesting of logical
locations: any time a result or event referred to a logical location it
would simply put a copy of the logical location into the pertinent
location object without a "parentIndex" property.

With this patch we instead populate such locations with minimal logical
locations with an "index" that refers to theRuns.logicalLocations,
populating theRuns.logicalLocations with the full logical locations,
including "parentIndex", recursively adding entries for the ancestor
locations as needed, so that the SARIF output captures the hierarchical
structure of the logical locations.

gcc/ChangeLog:
PR other/116176
* diagnostic-format-sarif.cc (class sarif_array_of_unique): New
template.
(class sarif_logical_location): Move here from
diagnostic-format-sarif.h.
(sarif_builder::m_cached_logical_locs): New.
(sarif_builder::sarif_builder): Initialize it.
(sarif_builder::set_any_logical_locs_arr): Call
make_minimal_sarif_logical_location rather than
make_sarif_logical_location_object.
(sarif_property_bag::set_logical_location): Likewise.
(make_sarif_logical_location_object): Replace with...
(sarif_builder::ensure_sarif_logical_location_for): ...this.
Capture "parentIndex" property.  Consolidate into
theRuns.logicalLocations.
(sarif_builder::make_minimal_sarif_logical_location): New.
(sarif_builder::make_run_object): Add "index" properties to
m_cached_logical_locs and move it to theRuns.logicalLocations.
(selftest::test_sarif_array_of_unique_1): New.
(selftest::test_sarif_array_of_unique_2): New.
(selftest::diagnostic_format_sarif_cc_tests): Call the new
selftests.
* diagnostic-format-sarif.h (class sarif_logical_location): Move
to diagnostic-format-sarif.cc.
(make_sarif_logical_location_object): Drop decl.
* json.cc (value::compare): New.
(object::compare): New.
(selftest::fail_comparison): New.
(selftest::assert_json_equal): New.
(ASSERT_JSON_EQ): New.
(selftest::assert_json_non_equal): New.
(ASSERT_JSON_NE): New.
(selftest::test_comparisons): New.
(selftest::json_cc_tests): Call the new selftest.
* json.h (json::value::dyn_cast_object): New vfunc.
(json::object::dyn_cast_object): New vfunc impl.
(json::object::compare): New decl.
* libgdiagnostics.cc
(impl_logical_location_manager::get_parent): New.
* logical-location.h (logical_location_manager::get_parent): New
vfunc impl.
* selftest-logical-location.h
(test_logical_location_manager::get_parent): New vfunc impl.
* tree-logical-location.cc (assert_valid_tree): New.
(tree_logical_location_manager::get_short_name): Support types as
well as decls.
(tree_logical_location_manager::get_name_with_scope): Gracefully
handle non-decl nodes.
(tree_logical_location_manager::get_internal_name): Likewise.
(tree_logical_location_manager::get_kind): Don't attempt to handle
null nodes.  Handle NAMESPACE_DECL and RECORD_TYPE.
(tree_logical_location_manager::get_name_for_path_output):
Gracefully handle non-decl nodes.
(tree_logical_location_manager::get_parent): New.
* tree-logical-location.h
(tree_logical_location_manager::get_parent): New vfunc impl.

gcc/testsuite/ChangeLog:
PR other/116176
* g++.dg/sarif-output/logical-locations-1.C: New test.
* g++.dg/sarif-output/logical-locations-1.py: New test script.
* g++.dg/sarif-output/logical-locations-2.C: New test.
* g++.dg/sarif-output/logical-locations-2.py: New test script.
* g++.dg/sarif-output/logical-locations-3.C: New test.
* g++.dg/sarif-output/logical-locations-3.py: New test script.
* g++.dg/sarif-output/sarif-output.exp: New script, adapted
from gcc.dg/sarif-output/sarif-output.exp.
* libgdiagnostics.dg/test-logical-location-c.py: Update for using
theRun.logicalLocations.
* libgdiagnostics.dg/test-warning-with-path-c.py: Likewise.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-format-sarif.cc | 225 --
 gcc/diagnos

[gcc r16-415] diagnostics: support XML and JSON kinds of logical locations

2025-05-06 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:9fb44cc4823106729d086713dd022a4590703a4c

commit r16-415-g9fb44cc4823106729d086713dd022a4590703a4c
Author: David Malcolm 
Date:   Tue May 6 09:26:18 2025 -0400

diagnostics: support XML and JSON kinds of logical locations

gcc/ChangeLog:
* diagnostic-format-sarif.cc (maybe_get_sarif_kind): Add cases for
new kinds of logical location.
* doc/libgdiagnostics/topics/logical-locations.rst: Add new kinds
of logical location for handling XML and JSON.
* libgdiagnostics.cc (impl_logical_location_manager::get_kind):
Add cases for new kinds of logical location.
(diagnostic_text_sink::text_starter): Likewise, introducing a
macro for this.
(diagnostic_manager_debug_dump_logical_location): Likewise.
* libgdiagnostics.h (enum diagnostic_logical_location_kind_t): Add
new kinds of logical location for handling XML and JSON.
* libsarifreplay.cc (handle_logical_location_object): Add entries
to "kind_values" for decoding sarif logical location kinds
relating to XML and JSON.
* logical-location.h (enum logical_location_kind): Add new kinds
of logical location for handling XML and JSON.

gcc/testsuite/ChangeLog:
* libgdiagnostics.dg/test-nested-logical-locations-json-c.py: New 
test.
* libgdiagnostics.dg/test-nested-logical-locations-json.c: New test.
* sarif-replay.dg/2.1.0-valid/3.33.7-json-example.sarif: New test.
* sarif-replay.dg/2.1.0-valid/3.33.7-xml-example.sarif: New test.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic-format-sarif.cc |  27 
 .../libgdiagnostics/topics/logical-locations.rst   |  28 
 gcc/libgdiagnostics.cc | 117 +--
 gcc/libgdiagnostics.h  |  18 ++-
 gcc/libsarifreplay.cc  |  27 +++-
 gcc/logical-location.h |  18 ++-
 .../test-nested-logical-locations-json-c.py|  79 ++
 .../test-nested-logical-locations-json.c   | 165 +
 .../2.1.0-valid/3.33.7-json-example.sarif  |  83 +++
 .../2.1.0-valid/3.33.7-xml-example.sarif   |  77 ++
 10 files changed, 626 insertions(+), 13 deletions(-)

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 1b0743cb3c7d..454eaae4d905 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -2734,6 +2734,7 @@ maybe_get_sarif_kind (enum logical_location_kind kind)
 case LOGICAL_LOCATION_KIND_UNKNOWN:
   return nullptr;
 
+/* Kinds within executable code.  */
 case LOGICAL_LOCATION_KIND_FUNCTION:
   return "function";
 case LOGICAL_LOCATION_KIND_MEMBER:
@@ -2750,6 +2751,32 @@ maybe_get_sarif_kind (enum logical_location_kind kind)
   return "parameter";
 case LOGICAL_LOCATION_KIND_VARIABLE:
   return "variable";
+
+/* Kinds within XML or HTML documents.  */
+case LOGICAL_LOCATION_KIND_ELEMENT:
+  return "element";
+case LOGICAL_LOCATION_KIND_ATTRIBUTE:
+  return "attribute";
+case LOGICAL_LOCATION_KIND_TEXT:
+  return "text";
+case LOGICAL_LOCATION_KIND_COMMENT:
+  return "comment";
+case LOGICAL_LOCATION_KIND_PROCESSING_INSTRUCTION:
+  return "processingInstruction";
+case LOGICAL_LOCATION_KIND_DTD:
+  return "dtd";
+case LOGICAL_LOCATION_KIND_DECLARATION:
+  return "declaration";
+
+/* Kinds within JSON documents.  */
+case LOGICAL_LOCATION_KIND_OBJECT:
+  return "object";
+case LOGICAL_LOCATION_KIND_ARRAY:
+  return "array";
+case LOGICAL_LOCATION_KIND_PROPERTY:
+  return "property";
+case LOGICAL_LOCATION_KIND_VALUE:
+  return "value";
 }
 }
 
diff --git a/gcc/doc/libgdiagnostics/topics/logical-locations.rst 
b/gcc/doc/libgdiagnostics/topics/logical-locations.rst
index 85f239d6bb1e..184b56381910 100644
--- a/gcc/doc/libgdiagnostics/topics/logical-locations.rst
+++ b/gcc/doc/libgdiagnostics/topics/logical-locations.rst
@@ -51,6 +51,8 @@ source location
   This roughly corresponds to the ``kind`` property in SARIF v2.1.0
   (`§3.33.7 
`_).
 
+  Kinds within executable code:
+
   .. macro:: DIAGNOSTIC_LOGICAL_LOCATION_KIND_FUNCTION
 
   .. macro:: DIAGNOSTIC_LOGICAL_LOCATION_KIND_MEMBER
@@ -67,6 +69,32 @@ source location
 
   .. macro:: DIAGNOSTIC_LOGICAL_LOCATION_KIND_VARIABLE
 
+  Kinds within XML or HTML documents:
+
+  .. macro:: DIAGNOSTIC_LOGICAL_LOCATION_KIND_ELEMENT
+
+  .. macro:: DIAGNOSTIC_LOGICAL_LOCATION_KIND_ATTRIBUTE
+
+  .. macro:: DIAGNOSTIC_LOGICAL_LOCATION_KIND_TEXT
+
+  .. macro:: DIAGNOSTIC_LOGICAL

[gcc r16-412] libgdiagnostics: add accessors for diagnostic_logical_location [LIBGDIAGNOSTICS_ABI_1]

2025-05-06 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:bf6d85490a2a95d251b88812dbf6d239be54ac18

commit r16-412-gbf6d85490a2a95d251b88812dbf6d239be54ac18
Author: David Malcolm 
Date:   Tue May 6 09:26:17 2025 -0400

libgdiagnostics: add accessors for diagnostic_logical_location 
[LIBGDIAGNOSTICS_ABI_1]

For followup work I need to be able to get at data from a
diagnostic_logical_location after creating it, hence the
need to extend libgdiagnostics with accessor entrypoints.

This is the first extension to libgdiagnostics since the initial
release.  The patch uses symbol versioning to add the new
entrypoints in the same way that libgccjit does.

gcc/ChangeLog:
* doc/libgdiagnostics/topics/compatibility.rst: New file, based
on gcc/jit/docs/topics/compatibility.rst.
* doc/libgdiagnostics/topics/index.rst: Add compatibility.rst.
* doc/libgdiagnostics/topics/logical-locations.rst (Accessors):
New section.
* libgdiagnostics++.h (logical_location::operator bool): New.
(logical_location::operator==): New.
(logical_location::operator!=): New.
(logical_location::get_kind): New.
(logical_location::get_parent): New.
(logical_location::get_short_name): New.
(logical_location::get_fully_qualified_name): New.
(logical_location::get_decorated_name): New.
* libgdiagnostics.cc
(diagnostic_logical_location::get_fully_qualified_name): New.
(diagnostic_logical_location_get_kind): New entrypoint.
(diagnostic_logical_location_get_parent): New entrypoint.
(diagnostic_logical_location_get_short_name): New entrypoint.
(diagnostic_logical_location_get_fully_qualified_name): New
entrypoint.
(diagnostic_logical_location_get_decorated_name): New entrypoint.
* libgdiagnostics.h
(LIBDIAGNOSTICS_HAVE_LOGICAL_LOCATION_ACCESSORS): New define.
(diagnostic_logical_location_get_kind): New entrypoint.
(diagnostic_logical_location_get_parent): New entrypoint.
(diagnostic_logical_location_get_short_name): New entrypoint.
(diagnostic_logical_location_get_fully_qualified_name): New
entrypoint.
(diagnostic_logical_location_get_decorated_name): New entrypoint.
* libgdiagnostics.map (LIBGDIAGNOSTICS_ABI_1): New.

gcc/testsuite/ChangeLog:
* libgdiagnostics.dg/test-logical-location.c: Include
.
(main): Verify that the accessors work.
* libgdiagnostics.dg/test-logical-location.cc: New test.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/doc/libgdiagnostics/topics/compatibility.rst   | 179 +
 gcc/doc/libgdiagnostics/topics/index.rst   |   1 +
 .../libgdiagnostics/topics/logical-locations.rst   |  25 +++
 gcc/libgdiagnostics++.h|  64 
 gcc/libgdiagnostics.cc |  47 ++
 gcc/libgdiagnostics.h  |  26 +++
 gcc/libgdiagnostics.map|  10 ++
 .../libgdiagnostics.dg/test-logical-location.c |  12 ++
 .../libgdiagnostics.dg/test-logical-location.cc|  91 +++
 9 files changed, 455 insertions(+)

diff --git a/gcc/doc/libgdiagnostics/topics/compatibility.rst 
b/gcc/doc/libgdiagnostics/topics/compatibility.rst
new file mode 100644
index ..4df685001e67
--- /dev/null
+++ b/gcc/doc/libgdiagnostics/topics/compatibility.rst
@@ -0,0 +1,179 @@
+.. Copyright (C) 2015-2025 Free Software Foundation, Inc.
+   Originally contributed by David Malcolm 
+
+   This is free software: you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see
+   .
+
+.. default-domain:: c
+
+ABI and API compatibility
+=
+
+The libgdiagnostics developers strive for ABI and API backward-compatibility:
+programs built against libgdiagnostics.so stand a good chance of running
+without recompilation against newer versions of libgdiagnostics.so, and
+ought to recompile without modification against newer versions of
+libgdiagnostics.h.
+
+.. note:: The libgdiagnostics++.h C++ API is more experimental, and less
+  locked-down at this time.
+
+API compatibility is achieved by extending the API rather than changing
+it.  For ABI compa

[gcc r16-417] diagnostics: use diagnostic_option_id in one more place

2025-05-06 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:f4fa41cd5ccbcc1a45e68c15bda2461e8d7215b9

commit r16-417-gf4fa41cd5ccbcc1a45e68c15bda2461e8d7215b9
Author: David Malcolm 
Date:   Tue May 6 09:26:19 2025 -0400

diagnostics: use diagnostic_option_id in one more place

No functional change intended.

gcc/ChangeLog:
* selftest-diagnostic.cc (test_diagnostic_context::report): Use
diagnostic_option_id rather than plain int.
* selftest-diagnostic.h (test_diagnostic_context::report):
Likewise.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/selftest-diagnostic.cc | 2 +-
 gcc/selftest-diagnostic.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/selftest-diagnostic.cc b/gcc/selftest-diagnostic.cc
index 8cf47ab304c6..1a10807243ec 100644
--- a/gcc/selftest-diagnostic.cc
+++ b/gcc/selftest-diagnostic.cc
@@ -69,7 +69,7 @@ bool
 test_diagnostic_context::report (diagnostic_t kind,
 rich_location &richloc,
 const diagnostic_metadata *metadata,
-int option,
+diagnostic_option_id option,
 const char * fmt, ...)
 {
   va_list ap;
diff --git a/gcc/selftest-diagnostic.h b/gcc/selftest-diagnostic.h
index dccad97da029..c8f67a039850 100644
--- a/gcc/selftest-diagnostic.h
+++ b/gcc/selftest-diagnostic.h
@@ -50,7 +50,7 @@ class test_diagnostic_context : public diagnostic_context
   report (diagnostic_t kind,
  rich_location &richloc,
  const diagnostic_metadata *metadata,
- int option,
+ diagnostic_option_id option,
  const char * fmt, ...) ATTRIBUTE_GCC_DIAG(6,7);
 
   const char *test_show_locus (rich_location &richloc);


[gcc r16-416] json: implement JSON pointer; use it in sarif-replay [PR117988]

2025-05-06 Thread David Malcolm via Gcc-cvs
https://gcc.gnu.org/g:52fe9502eb153f87154cfcb6a58ab84164d7e1d9

commit r16-416-g52fe9502eb153f87154cfcb6a58ab84164d7e1d9
Author: David Malcolm 
Date:   Tue May 6 09:26:19 2025 -0400

json: implement JSON pointer; use it in sarif-replay [PR117988]

This patch extends our json class to track JSON pointers (RFC 6901),
and then uses this within sarif-replay to provide logical locations
within the JSON when reporting on issues in the SARIF.

gcc/ChangeLog:
PR sarif-replay/117988
* json.cc (json::pointer::token::token): New ctors.
(json::pointer::token::~token): New.
(json::pointer::token::operator=): New.
(json::object::set): Set the value's m_pointer_token.
(json::array::append): Likewise.
* json.h (json::pointer::token): New struct.
(json::value::get_pointer_token): New accessor.
(json::value::m_pointer_token): New field.
* libsarifreplay.cc (get_logical_location_kind_for_json_kind):
New.
(make_logical_location_from_jv): New.
(sarif_replayer::report_problem): Set the logical location of the
diagnostic.

gcc/testsuite/ChangeLog:
PR sarif-replay/117988
* sarif-replay.dg/2.1.0-invalid/3.1-not-an-object.sarif: Add
expected logical location.
* 
sarif-replay.dg/2.1.0-invalid/3.11.11-missing-arguments-for-placeholders.sarif:
Likewise.
* 
sarif-replay.dg/2.1.0-invalid/3.11.11-not-enough-arguments-for-placeholders.sarif:
Likewise.
* sarif-replay.dg/2.1.0-invalid/3.11.5-unescaped-braces.sarif: 
Likewise.
* sarif-replay.dg/2.1.0-invalid/3.13.2-no-version.sarif: Likewise.
* sarif-replay.dg/2.1.0-invalid/3.13.2-version-not-a-string.sarif: 
Likewise.
* sarif-replay.dg/2.1.0-invalid/3.13.4-bad-runs.sarif: Likewise.
* sarif-replay.dg/2.1.0-invalid/3.13.4-no-runs.sarif: Likewise.
* sarif-replay.dg/2.1.0-invalid/3.13.4-non-object-in-runs.sarif: 
Likewise.
* sarif-replay.dg/2.1.0-invalid/3.27.10-bad-level.sarif: Likewise.
* sarif-replay.dg/2.1.0-invalid/3.33.3-index-out-of-range.sarif: 
Likewise.
* sarif-replay.dg/2.1.0-unhandled/3.27.10-none-level.sarif: 
Likewise.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/json.cc| 49 ++
 gcc/json.h | 47 +
 gcc/libsarifreplay.cc  | 79 ++
 .../2.1.0-invalid/3.1-not-an-object.sarif  |  3 +
 11.11-missing-arguments-for-placeholders.sarif |  3 +
 11-not-enough-arguments-for-placeholders.sarif |  3 +
 .../2.1.0-invalid/3.11.5-unescaped-braces.sarif|  3 +
 .../2.1.0-invalid/3.13.2-no-version.sarif  |  3 +
 .../3.13.2-version-not-a-string.sarif  |  3 +
 .../2.1.0-invalid/3.13.4-bad-runs.sarif|  3 +
 .../2.1.0-invalid/3.13.4-no-runs.sarif |  3 +
 .../2.1.0-invalid/3.13.4-non-object-in-runs.sarif  |  3 +
 .../2.1.0-invalid/3.27.10-bad-level.sarif  |  3 +
 .../2.1.0-invalid/3.33.3-index-out-of-range.sarif  |  3 +
 .../2.1.0-unhandled/3.27.10-none-level.sarif   |  3 +
 15 files changed, 211 insertions(+)

diff --git a/gcc/json.cc b/gcc/json.cc
index c54401bc530e..f3f364598569 100644
--- a/gcc/json.cc
+++ b/gcc/json.cc
@@ -74,6 +74,52 @@ print_escaped_json_string (pretty_printer *pp,
   pp_character (pp, '"');
 }
 
+/* class pointer::token.  */
+
+pointer::token::token ()
+{
+  m_parent = nullptr;
+  m_data.u_member = nullptr;
+  m_kind = kind::root_value;
+}
+
+pointer::token::token (json::object &parent, const char *member)
+{
+  m_parent = &parent;
+  m_data.u_member = xstrdup (member); // ideally we'd share
+  m_kind = kind::object_member;
+}
+
+pointer::token::token (json::array &parent, size_t index)
+{
+  m_parent = &parent;
+  m_data.u_index = index;
+  m_kind = kind::array_index;
+}
+
+pointer::token::~token ()
+{
+  if (m_kind == kind::object_member)
+{
+  gcc_assert (m_data.u_member);
+  free (m_data.u_member);
+}
+}
+
+pointer::token &
+pointer::token::operator= (pointer::token &&other)
+{
+  m_parent = other.m_parent;
+  m_data = other.m_data;
+  m_kind = other.m_kind;
+
+  other.m_parent = nullptr;
+  other.m_data.u_member = nullptr;
+  other.m_kind = kind::root_value;
+
+  return *this;
+}
+
 /* class json::value.  */
 
 /* Dump this json::value tree to OUTF.
@@ -268,6 +314,8 @@ object::set (const char *key, value *v)
   m_map.put (owned_key, v);
   m_keys.safe_push (owned_key);
 }
+
+  v->m_pointer_token = pointer::token (*this, key);
 }
 
 /* Get the json::value * for KEY.
@@ -401,6 +449,7 @@ void
 array::append (value *v)
 {
   gcc_assert (v);
+  v->m_pointer_token = pointer::token (*this, m_elements.length ());
   m_elements.safe_push

[gcc r15-9629] Allow IPA_CP to handle UNDEFINED as VARYING.

2025-05-06 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:b6f68c04f440f981b281c5ad4c335761bc7d2882

commit r15-9629-gb6f68c04f440f981b281c5ad4c335761bc7d2882
Author: Andrew MacLeod 
Date:   Fri May 2 15:48:08 2025 -0400

Allow IPA_CP to handle UNDEFINED as VARYING.

When applying a bitmask to reflect ranges, it is sometimes deferred and
this can result in an UNDEFINED result.  IPA is not expecting this, and
add a check for it, and convert to VARYING if encountered.

PR tree-optimization/120048
gcc/
* ipa-cp.cc (ipcp_store_vr_results): Check for UNDEFINED.

gcc/testsuite/
* gcc.dg/pr120048.c: New.

Diff:
---
 gcc/ipa-cp.cc   | 10 ++
 gcc/testsuite/gcc.dg/pr120048.c | 12 
 2 files changed, 22 insertions(+)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 806c2bdc97f2..a8ff3c870731 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -6398,6 +6398,11 @@ ipcp_store_vr_results (void)
 TYPE_PRECISION (type),
 TYPE_SIGN (type)));
  tmp.update_bitmask (bm);
+ // Reflecting the bitmask on the ranges can sometime
+ // produce an UNDEFINED value if the the bitmask update
+ // was previously deferred.  See PR 120048.
+ if (tmp.undefined_p ())
+   tmp.set_varying (type);
  ipa_vr vr (tmp);
  ts->m_vr->quick_push (vr);
}
@@ -6419,6 +6424,11 @@ ipcp_store_vr_results (void)
 TYPE_PRECISION (type),
 TYPE_SIGN (type)));
  tmp.update_bitmask (bm);
+ // Reflecting the bitmask on the ranges can sometime
+ // produce an UNDEFINED value if the the bitmask update
+ // was previously deferred.  See PR 120048.
+ if (tmp.undefined_p ())
+   tmp.set_varying (type);
  ipa_vr vr (tmp);
  ts->m_vr->quick_push (vr);
}
diff --git a/gcc/testsuite/gcc.dg/pr120048.c b/gcc/testsuite/gcc.dg/pr120048.c
new file mode 100644
index ..6bb34b0e1689
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr120048.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vrp -fno-tree-fre" } */
+
+int a, b, c;
+static int d(short e) { return e || (a && e) ? 0 : a; }
+static void f(int e) {
+  if (!e) {
+d(0);
+b = d(e);
+  }
+}
+int main() { f(c | 1); }


[gcc r14-11745] Allow IPA_CP to handle UNDEFINED as VARYING.

2025-05-06 Thread Andrew Macleod via Gcc-cvs
https://gcc.gnu.org/g:93b85bdf830477fec8db1d8afcaca13530776161

commit r14-11745-g93b85bdf830477fec8db1d8afcaca13530776161
Author: Andrew MacLeod 
Date:   Mon May 5 12:17:13 2025 -0400

Allow IPA_CP to handle UNDEFINED as VARYING.

When applying a bitmask to reflect ranges, it is sometimes deferred and
this can result in an UNDEFINED result.  IPA is not expecting this, and
add a check for it, and convert to VARYING if encountered.

PR tree-optimization/120048
gcc/
* ipa-cp.cc (ipcp_store_vr_results): Check for UNDEFINED.

gcc/testsuite/
* gcc.dg/pr120048.c: New.

Diff:
---
 gcc/ipa-cp.cc   | 10 ++
 gcc/testsuite/gcc.dg/pr120048.c | 12 
 2 files changed, 22 insertions(+)

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 6b772fae88ff..222ba01c576f 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -6355,6 +6355,11 @@ ipcp_store_vr_results (void)
 TYPE_PRECISION (type),
 TYPE_SIGN (type)));
  r.update_bitmask (bm);
+ // Reflecting the bitmask on the ranges can sometime
+ // produce an UNDEFINED value if the the bitmask update
+ // was previously deferred.  See PR 120048.
+ if (tmp.undefined_p ())
+   tmp.set_varying (type);
  ipa_vr vr (tmp);
  ts->m_vr->quick_push (vr);
}
@@ -6377,6 +6382,11 @@ ipcp_store_vr_results (void)
 TYPE_PRECISION (type),
 TYPE_SIGN (type)));
  r.update_bitmask (bm);
+ // Reflecting the bitmask on the ranges can sometime
+ // produce an UNDEFINED value if the the bitmask update
+ // was previously deferred.  See PR 120048.
+ if (tmp.undefined_p ())
+   tmp.set_varying (type);
  ipa_vr vr (tmp);
  ts->m_vr->quick_push (vr);
}
diff --git a/gcc/testsuite/gcc.dg/pr120048.c b/gcc/testsuite/gcc.dg/pr120048.c
new file mode 100644
index ..6bb34b0e1689
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr120048.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vrp -fno-tree-fre" } */
+
+int a, b, c;
+static int d(short e) { return e || (a && e) ? 0 : a; }
+static void f(int e) {
+  if (!e) {
+d(0);
+b = d(e);
+  }
+}
+int main() { f(c | 1); }


[gcc r16-420] ipa: Do not emit info about temporary clones to ipa-clones dump (PR119852)

2025-05-06 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:fb5829a01651d427a63a12c44ecc8baa47dbfc83

commit r16-420-gfb5829a01651d427a63a12c44ecc8baa47dbfc83
Author: Martin Jambor 
Date:   Tue May 6 17:28:43 2025 +0200

ipa: Do not emit info about temporary clones to ipa-clones dump (PR119852)

As described in PR 119852, the output of -fdump-ipa-clones can contain
"(null)" as the suffix/reason for cloning when we need to create a
clone to hold the original function during recursive inlining.  Such
clone is never output and so should not be part of the dump output
either.

gcc/ChangeLog:

2025-04-23  Martin Jambor  

PR ipa/119852
* cgraphclones.cc (dump_callgraph_transformation): Document the
function.  Do not dump if suffix is NULL.

gcc/testsuite/ChangeLog:

2025-04-23  Martin Jambor  

PR ipa/119852
* gcc.dg/ipa/pr119852.c: New test.

Diff:
---
 gcc/cgraphclones.cc | 10 +++-
 gcc/testsuite/gcc.dg/ipa/pr119852.c | 50 +
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index e6223fa1f5cc..bf5bc41cde9c 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -307,12 +307,20 @@ cgraph_node::expand_all_artificial_thunks ()
   e = e->next_caller;
 }
 
+/* Dump information about creation of a call graph node clone to the dump file
+   created by the -fdump-ipa-clones option.  ORIGINAL is the function being
+   cloned, CLONE is the new clone.  SUFFIX is a string that helps identify the
+   reason for cloning, often it is the suffix used by a particular IPA pass to
+   create unique function names.  SUFFIX can be NULL and in that case the
+   dumping will not take place, which must be the case only for helper clones
+   which will never be emitted to the output.  */
+
 void
 dump_callgraph_transformation (const cgraph_node *original,
   const cgraph_node *clone,
   const char *suffix)
 {
-  if (symtab->ipa_clones_dump_file)
+  if (suffix && symtab->ipa_clones_dump_file)
 {
   fprintf (symtab->ipa_clones_dump_file,
   "Callgraph clone;%s;%d;%s;%d;%d;%s;%d;%s;%d;%d;%s\n",
diff --git a/gcc/testsuite/gcc.dg/ipa/pr119852.c 
b/gcc/testsuite/gcc.dg/ipa/pr119852.c
new file mode 100644
index ..eab8d21293cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr119852.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-clones"  } */
+
+typedef struct rtx_def *rtx;
+enum rtx_code {
+  LAST_AND_UNUSED_RTX_CODE};
+extern const char * const rtx_format[((int) LAST_AND_UNUSED_RTX_CODE)];
+struct rtx_def {
+  enum rtx_code code;
+};
+typedef int (*rtx_function) (rtx *, void *);
+extern int for_each_rtx (rtx *, rtx_function, void *);
+int
+replace_label (rtx *x, void *data)
+{
+  rtx l = *x;
+  if (l == (rtx) 0)
+{
+ {
+   rtx new_c, new_l;
+   for_each_rtx (&new_c, replace_label, data);
+ }
+}
+}
+static int
+for_each_rtx_1 (rtx exp, int n, rtx_function f, void *data)
+{
+  int result, i, j;
+  const char *format = (rtx_format[(int) (((enum rtx_code) (exp)->code))]);
+  rtx *x;
+  for (; format[n] != '\0'; n++)
+{
+  switch (format[n])
+ {
+ case 'e':
+   result = (*f) (x, data);
+ {
+   result = for_each_rtx_1 (*x, i, f, data);
+ }
+ }
+}
+}
+int
+for_each_rtx (rtx *x, rtx_function f, void *data)
+{
+  int i;
+  return for_each_rtx_1 (*x, i, f, data);
+}
+
+/* { dg-final { scan-ipa-dump-not "(null)"  "ipa-clones"  } } */


[gcc r16-419] Document option -fdump-ipa-clones

2025-05-06 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:6ecc2fee06bdd60da0e9b3fe6660b553dbdca3ca

commit r16-419-g6ecc2fee06bdd60da0e9b3fe6660b553dbdca3ca
Author: Martin Jambor 
Date:   Tue May 6 17:28:42 2025 +0200

Document option -fdump-ipa-clones

I have noticed that the option -fdump-ipa-clones is not documented
although there are users who depend on it.  This patch adds the
missing documentation along with the description of the information it
dumps and the format it uses.

I am never quite sure which of the texinfo mark-ups is the most
appropriate in which situation, I'll of course incorporate any
feedback on this as well as the general wording of the text.

After we settle on a version, I'd like to backport the documentation
also at least to GCC 15, 14 and 13.

Is it perhaps OK for master and the branches or what would better be
changed?

Thanks,

Martin

gcc/ChangeLog:

2025-04-23  Martin Jambor  

* doc/invoke.texi (Developer Options): Document -fdump-ipa-clones.

Diff:
---
 gcc/doc/invoke.texi | 87 +
 1 file changed, 87 insertions(+)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 32bc45725de9..90cbb516bc46 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20774,6 +20774,93 @@ By default, the dump will contain messages about 
successful
 optimizations (equivalent to @option{-optimized}) together with
 low-level details about the analysis.
 
+@opindex fdump-ipa-clones
+@item -fdump-ipa-clones
+
+Create a dump file containing information about creation of call graph
+node clones and removals of call graph nodes during inter-procedural
+optimizations and transformations.  Its main intended use is that tools
+that create live-patches can determine the set of functions that need to
+be live-patched to completely replace a particular function (see
+@option{-flive-patching}).  The file name is generated by appending
+suffix @code{ipa-clones} to the source file name, and the file is
+created in the same directory as the output file.  Each entry in the
+file is on a separate line containing semicolon separated fields.
+
+In the case of call graph clone creation, the individual fields are:
+
+@enumerate
+@item
+String @code{Callgraph clone}.
+
+@item
+Name of the function being cloned as it is presented to the assembler.
+
+@item
+A number that uniquely represents the function being cloned in the call
+graph.  Note that the number is unique only within a compilation unit or
+within whole-program analysis but is likely to be different in the two
+phases.
+
+@item
+The file name of the source file where the function is defined.
+
+@item
+The line on which the function definition is located.
+
+@item
+The column where the function definition is located.
+
+@item
+Name of the new function clone as it is presented to the assembler.
+
+@item
+A number that uniquely represents the new function clone in the call
+graph.  Note that the number is unique only within a compilation unit or
+within whole-program analysis but is likely to be different in the two
+phases.
+
+@item
+The file name of the source file where the source code location of the
+new clone points to.
+
+@item
+The line to which the source code location of the new clone points to.
+
+@item
+The column to which the source code location of the new clone points to.
+
+@item
+A string that determines the reason for cloning.
+
+@end enumerate
+
+In the case of call graph clone removal, the individual fields are:
+
+@enumerate
+@item
+String @code{Callgraph removal}.
+
+@item
+Name of the function being removed as it would be presented to the assembler.
+
+@item
+A number that uniquely represents the function being cloned in the call
+graph.  Note that the number is unique only within a compilation unit or
+within whole-program analysis but is likely to be different in the two
+phases.
+
+@item
+The file name of the source file where the function is defined.
+
+@item
+The line on which the function definition is located.
+
+@item
+The column where the function definition is located.
+
+@end enumerate
+
 @opindex fdump-lang
 @item -fdump-lang
 Dump language-specific information.  The file name is made by appending


[gcc r16-422] ipa: Drop the default value of suffix parameter of create_clone (PR119852)

2025-05-06 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:76c882e341cb330a4e9f677a8c3541d573820255

commit r16-422-g76c882e341cb330a4e9f677a8c3541d573820255
Author: Martin Jambor 
Date:   Tue May 6 17:28:44 2025 +0200

ipa: Drop the default value of suffix parameter of create_clone (PR119852)

In PR 119852 we agreed that since the NULL-ness of the suffix
parameter should prevent creation of a record in the ipa-clones
dump (which is implemented by a previous patch), it should not default
to NULL.

gcc/ChangeLog:

2025-04-25  Martin Jambor  

PR ipa/119852
* cgraph.h (cgraph_node::create_clone): Remove the default value of
argument suffix.  Update function comment.
* cgraphclones.cc (cgraph_node::create_clone): Update function 
comment.
* ipa-inline-transform.cc (clone_inlined_nodes): Pass NULL to suffix
of create_clone explicitely.
* ipa-inline.cc (recursive_inlining): Likewise.
* lto-cgraph.cc (input_node): Likewise.

Diff:
---
 gcc/cgraph.h| 10 +++---
 gcc/cgraphclones.cc |  7 ++-
 gcc/ipa-inline-transform.cc |  2 +-
 gcc/ipa-inline.cc   |  2 +-
 gcc/lto-cgraph.cc   |  2 +-
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 1a59bf609b51..f4ee29e998c3 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -965,15 +965,19 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
  If the new node is being inlined into another one, NEW_INLINED_TO should 
be
  the outline function the new one is (even indirectly) inlined to.
  All hooks will see this in node's inlined_to, when invoked.
- Can be NULL if the node is not inlined.  SUFFIX is string that is appended
- to the original name.  */
+ Should be NULL if the node is not inlined.
+
+ SUFFIX is string that is appended to the original name, it should only be
+ NULL if NEW_INLINED_TO is not NULL or if the clone being created is
+ temporary and a record about it should not be added into the ipa-clones
+ dump file.  */
   cgraph_node *create_clone (tree decl, profile_count count,
 bool update_original,
 vec redirect_callers,
 bool call_duplication_hook,
 cgraph_node *new_inlined_to,
 ipa_param_adjustments *param_adjustments,
-const char *suffix = NULL);
+const char *suffix);
 
   /* Create callgraph node clone with new declaration.  The actual body will be
  copied later at compilation stage.  The name of the new clone will be
diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index cb457e5f457f..b45ac4977331 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -366,9 +366,14 @@ localize_profile (cgraph_node *n)
 
If the new node is being inlined into another one, NEW_INLINED_TO should be
the outline function the new one is (even indirectly) inlined to.  All hooks
-   will see this in node's inlined_to, when invoked.  Can be NULL if the
+   will see this in node's inlined_to, when invoked.  Should be NULL if the
node is not inlined.
 
+   SUFFIX is string that is appended to the original name, it should only be
+   NULL if NEW_INLINED_TO is not NULL or if the clone being created is
+   temporary and a record about it should not be added into the ipa-clones dump
+   file.
+
If PARAM_ADJUSTMENTS is non-NULL, the parameter manipulation information
will be overwritten by the new structure.  Otherwise the new node will
share parameter manipulation information with the original node.  */
diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc
index e00887be481b..46b8e5bb6790 100644
--- a/gcc/ipa-inline-transform.cc
+++ b/gcc/ipa-inline-transform.cc
@@ -225,7 +225,7 @@ clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
   e->count,
   update_original, vNULL, true,
   inlining_into,
-  NULL);
+  NULL, NULL);
  n->used_as_abstract_origin = e->callee->used_as_abstract_origin;
  e->redirect_callee (n);
}
diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc
index 38fdbfde1b3b..35e5496d8463 100644
--- a/gcc/ipa-inline.cc
+++ b/gcc/ipa-inline.cc
@@ -1865,7 +1865,7 @@ recursive_inlining (struct cgraph_edge *edge,
{
  /* We need original clone to copy around.  */
  master_clone = node->create_clone (node->decl, node->count,
-   false, vNULL, true, NULL, NULL);
+   false, vNULL, true, NULL, NULL, NULL);
  for (e = master_clone->callees; e; e = e->next_callee)
if (!e->inline_failed)
  clone_inlined_nodes (e, 

[gcc r16-421] ipa: Fix create_version_clone_with_body declaration and comment

2025-05-06 Thread Martin Jambor via Gcc-cvs
https://gcc.gnu.org/g:1eaee43dc0c6292ce865b460d52474ca14ea1d71

commit r16-421-g1eaee43dc0c6292ce865b460d52474ca14ea1d71
Author: Martin Jambor 
Date:   Tue May 6 17:28:43 2025 +0200

ipa: Fix create_version_clone_with_body declaration and comment

I noticed that the name of the fifth parameter of
cgraph_node::create_version_clone_with_body is different in the class
definition in cgraph.h and in the actual member function definition in
cgraphclones.cc.  The former (clone_name) is misleading and so this
patch changes it to the latter (suffix) which is also used in related
functions.

The patch also updates the function comment in both places because it
clearly became out of date.

gcc/ChangeLog:

2025-04-25  Martin Jambor  

* cgraph.h (cgraph_node::create_version_clone_with_body): Fix 
function
comment.  Change the name of clone_name to suffix, in line with the
function definition.
* cgraphclones.cc (cgraph_node::create_version_clone_with_body): Fix
function comment.

Diff:
---
 gcc/cgraph.h| 9 +
 gcc/cgraphclones.cc | 7 ---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index f7b67ed0a6c5..1a59bf609b51 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1020,11 +1020,12 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
  TREE_MAP is a mapping of tree nodes we want to replace with
  new ones (according to results of prior analysis).
 
- If non-NULL ARGS_TO_SKIP determine function parameters to remove
- from new version.
- If SKIP_RETURN is true, the new version will return void.
+ If non-NULL PARAM_ADJUSTMENTS determine how function formal parameters
+ should be modified in the new version and if it should return void.
  If non-NULL BLOCK_TO_COPY determine what basic blocks to copy.
  If non_NULL NEW_ENTRY determine new entry BB of the clone.
+ SUFFIX is a string that will be used to create a new name for the new
+ function.
 
  If TARGET_ATTRIBUTES is non-null, when creating a new declaration,
  add the attributes to DECL_ATTRIBUTES.  And call valid_attribute_p
@@ -1039,7 +1040,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
 (vec redirect_callers,
  vec *tree_map,
  ipa_param_adjustments *param_adjustments,
- bitmap bbs_to_copy, basic_block new_entry_block, const char *clone_name,
+ bitmap bbs_to_copy, basic_block new_entry_block, const char *suffix,
  tree target_attributes = NULL_TREE, bool version_decl = true);
 
   /* Insert a new cgraph_function_version_info node into cgraph_fnver_htab
diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index bf5bc41cde9c..cb457e5f457f 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -1002,11 +1002,12 @@ cgraph_node::create_version_clone (tree new_decl,
TREE_MAP is a mapping of tree nodes we want to replace with
new ones (according to results of prior analysis).
 
-   If non-NULL ARGS_TO_SKIP determine function parameters to remove
-   from new version.
-   If SKIP_RETURN is true, the new version will return void.
+   If non-NULL PARAM_ADJUSTMENTS determine how function formal parameters
+   should be modified in the new version and if it should return void.
If non-NULL BLOCK_TO_COPY determine what basic blocks to copy.
If non_NULL NEW_ENTRY determine new entry BB of the clone.
+   SUFFIX is a string that will be used to create a new name for the new
+   function.
 
If TARGET_ATTRIBUTES is non-null, when creating a new declaration,
add the attributes to DECL_ATTRIBUTES.  And call valid_attribute_p


[gcc r16-423] Fix PR 119928, formal arguments used to wrongly inferred for CLASS.

2025-05-06 Thread Thomas Koenig via Gcc-cvs
https://gcc.gnu.org/g:e7a2b8b76ae0c8f1e49c780aa82ebb5f0325f515

commit r16-423-ge7a2b8b76ae0c8f1e49c780aa82ebb5f0325f515
Author: Thomas Koenig 
Date:   Tue May 6 18:05:41 2025 +0200

Fix PR 119928, formal arguments used to wrongly inferred for CLASS.

The problem was indeed that generating a formal from an actual
arglist is a bad idea when classes are involved.  Fixed in the
attached patch.  I think it still makes sense to remove the checks
when the other attributes are present (or PR96073 may come back
in different guise, even if I have to test case at present).
I have also converted the test to a run-time check.

gcc/fortran/ChangeLog:

PR fortran/119928
* interface.cc (gfc_check_dummy_characteristics): Do not issue
error if one dummy symbol has been generated from an actual
argument and the other one has OPTIONAL, INTENT, ALLOCATABLE,
POINTER, TARGET, VALUE, ASYNCHRONOUS or CONTIGUOUS.
(gfc_get_formal_from_actual_arglist): Do nothing if symbol
is a class.

gcc/testsuite/ChangeLog:

PR fortran/119928
* gfortran.dg/interface_60.f90: New test.

Diff:
---
 gcc/fortran/interface.cc   | 135 -
 gcc/testsuite/gfortran.dg/interface_60.f90 |  70 +++
 2 files changed, 143 insertions(+), 62 deletions(-)

diff --git a/gcc/fortran/interface.cc b/gcc/fortran/interface.cc
index 1e552a3df861..753f589ff677 100644
--- a/gcc/fortran/interface.cc
+++ b/gcc/fortran/interface.cc
@@ -1403,77 +1403,82 @@ gfc_check_dummy_characteristics (gfc_symbol *s1, 
gfc_symbol *s2,
}
 }
 
-  /* Check INTENT.  */
-  if (s1->attr.intent != s2->attr.intent && !s1->attr.artificial
-  && !s2->attr.artificial)
-{
-  snprintf (errmsg, err_len, "INTENT mismatch in argument '%s'",
-   s1->name);
-  return false;
-}
+  /* A lot of information is missing for artificially generated
+ formal arguments, let's not look into that.  */
 
-  /* Check OPTIONAL attribute.  */
-  if (s1->attr.optional != s2->attr.optional)
+  if (!s1->attr.artificial && !s2->attr.artificial)
 {
-  snprintf (errmsg, err_len, "OPTIONAL mismatch in argument '%s'",
-   s1->name);
-  return false;
-}
+  /* Check INTENT.  */
+  if (s1->attr.intent != s2->attr.intent)
+   {
+ snprintf (errmsg, err_len, "INTENT mismatch in argument '%s'",
+   s1->name);
+ return false;
+   }
 
-  /* Check ALLOCATABLE attribute.  */
-  if (s1->attr.allocatable != s2->attr.allocatable)
-{
-  snprintf (errmsg, err_len, "ALLOCATABLE mismatch in argument '%s'",
-   s1->name);
-  return false;
-}
+  /* Check OPTIONAL attribute.  */
+  if (s1->attr.optional != s2->attr.optional)
+   {
+ snprintf (errmsg, err_len, "OPTIONAL mismatch in argument '%s'",
+   s1->name);
+ return false;
+   }
 
-  /* Check POINTER attribute.  */
-  if (s1->attr.pointer != s2->attr.pointer)
-{
-  snprintf (errmsg, err_len, "POINTER mismatch in argument '%s'",
-   s1->name);
-  return false;
-}
+  /* Check ALLOCATABLE attribute.  */
+  if (s1->attr.allocatable != s2->attr.allocatable)
+   {
+ snprintf (errmsg, err_len, "ALLOCATABLE mismatch in argument '%s'",
+   s1->name);
+ return false;
+   }
 
-  /* Check TARGET attribute.  */
-  if (s1->attr.target != s2->attr.target)
-{
-  snprintf (errmsg, err_len, "TARGET mismatch in argument '%s'",
-   s1->name);
-  return false;
-}
+  /* Check POINTER attribute.  */
+  if (s1->attr.pointer != s2->attr.pointer)
+   {
+ snprintf (errmsg, err_len, "POINTER mismatch in argument '%s'",
+   s1->name);
+ return false;
+   }
 
-  /* Check ASYNCHRONOUS attribute.  */
-  if (s1->attr.asynchronous != s2->attr.asynchronous)
-{
-  snprintf (errmsg, err_len, "ASYNCHRONOUS mismatch in argument '%s'",
-   s1->name);
-  return false;
-}
+  /* Check TARGET attribute.  */
+  if (s1->attr.target != s2->attr.target)
+   {
+ snprintf (errmsg, err_len, "TARGET mismatch in argument '%s'",
+   s1->name);
+ return false;
+   }
 
-  /* Check CONTIGUOUS attribute.  */
-  if (s1->attr.contiguous != s2->attr.contiguous)
-{
-  snprintf (errmsg, err_len, "CONTIGUOUS mismatch in argument '%s'",
-   s1->name);
-  return false;
-}
+  /* Check ASYNCHRONOUS attribute.  */
+  if (s1->attr.asynchronous != s2->attr.asynchronous)
+   {
+ snprintf (errmsg, err_len, "ASYNCHRONOUS mismatch in argument '%s'",
+   s1->name);
+ return false;
+   }
 
-  /* Check VALUE attribute.  */
-  if (s1->attr.value != s2->attr.value)
-   

[gcc r16-424] libstdc++: Fix -Wmismatched-tags warnings for _Safe_iterator [PR120112]

2025-05-06 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:ccf0b93b3ccf67b98fbd5096852c369d2cac7904

commit r16-424-gccf0b93b3ccf67b98fbd5096852c369d2cac7904
Author: Jonathan Wakely 
Date:   Tue May 6 12:47:32 2025 +0100

libstdc++: Fix -Wmismatched-tags warnings for _Safe_iterator [PR120112]

This causes an ICE as shown in the PR, but it should be fixed in the
library code anyway.

libstdc++-v3/ChangeLog:

PR c++/120112
* include/bits/ptr_traits.h (_Safe_iterator_base): Use class
keyword in class-head of declaration.
* include/debug/debug.h (_Safe_iterator): Likewise.

Diff:
---
 libstdc++-v3/include/bits/ptr_traits.h | 2 +-
 libstdc++-v3/include/debug/debug.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/ptr_traits.h 
b/libstdc++-v3/include/bits/ptr_traits.h
index 4308669e03b7..91da88b8c137 100644
--- a/libstdc++-v3/include/bits/ptr_traits.h
+++ b/libstdc++-v3/include/bits/ptr_traits.h
@@ -36,7 +36,7 @@
 
 #if __cplusplus > 201703L
 #include 
-namespace __gnu_debug { struct _Safe_iterator_base; }
+namespace __gnu_debug { class _Safe_iterator_base; }
 #endif
 
 namespace std _GLIBCXX_VISIBILITY(default)
diff --git a/libstdc++-v3/include/debug/debug.h 
b/libstdc++-v3/include/debug/debug.h
index 0e02d58822a2..0131c0aa59d9 100644
--- a/libstdc++-v3/include/debug/debug.h
+++ b/libstdc++-v3/include/debug/debug.h
@@ -58,7 +58,7 @@ namespace __gnu_debug
   using namespace std::__debug;
 
   template
-struct _Safe_iterator;
+class _Safe_iterator;
 }
 
 #if ! defined _GLIBCXX_DEBUG || ! _GLIBCXX_HOSTED


[gcc r16-427] libstdc++: Rewrite atomic builtin checks [PR70560]

2025-05-06 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:86627faec10da53d7532805019e5296fcf15ac09

commit r16-427-g86627faec10da53d7532805019e5296fcf15ac09
Author: Jonathan Wakely 
Date:   Fri Apr 25 21:09:18 2025 +0100

libstdc++: Rewrite atomic builtin checks [PR70560]

Currently the GLIBCXX_ENABLE_ATOMIC_BUILTINS macro checks for a variety
of __atomic built-ins for bool, short and int. If all those checks pass,
then it defines _GLIBCXX_ATOMIC_BUILTINS and uses the definitions from
config/cpu/generic/atomicity_builtins/atomicity.h for the non-inline
versions of __exchange_and_add and __atomic_add that get compiled into
libsupc++.

However, the config/cpu/generic/atomicity_builtins/atomicity.h
definitions only depend on __atomic_fetch_add not on
__atomic_test_and_set or __atomic_compare_exchange. And they only
operate on a variable of type _Atomic word, which is not necessarily one
of bool, short or int (e.g. for sparcv9 _Atomic_word is 64-bit long).

This means that for a target where _Atomic_word is int but there are no
1-byte or 2-byte atomic instructions, GLIBCXX_ENABLE_ATOMIC_BUILTINS
will fail the checks for bool and short and not define the macro
_GLIBCXX_ATOMIC_BUILTINS. That means that we will use a single global
mutex for reference counting in the COW std::string and std::locale,
even though we could use __atomic_fetch_add to do it lock-free.

This commit removes most of the GLIBCXX_ENABLE_ATOMIC_BUILTINS checks,
so that it only checks __atomic_fetch_add on _Atomic_word. The macro
defined by GLIBCXX_ENABLE_ATOMIC_BUILTINS is renamed from
_GLIBCXX_ATOMIC_BUILTINS to _GLIBCXX_ATOMIC_WORD_BUILTINS to better
reflect what it really means. This will enable the inline versions of
__exchange_and_add and __atomic_add for more targets. This is not an ABI
change, because targets which didn't previously use the inline
definitions of those functions made non-inlined calls to the functions
in the library. If the definitions of those functions now start using
atomics, that doesn't change the semantics for the code calling those
functions.

On affected targets, new code compiled after this change will see the
_GLIBCXX_ATOMIC_WORD_BUILTINS macro and so will use the always-inline
versions of __exchange_and_add and __atomic_add, which use
__atomic_fetch_add directly. That is also compatible with older code
which still calls the non-inline definitions, because those non-inline
definitions now also use __atomic_fetch_add.

The only configuration where this could be an ABI change is for a target
which previously defined _GLIBCXX_ATOMIC_BUILTINS (because all the
atomic built-ins for bool, short and int are supported), but which
defines _Atomic_word to some other type for which __atomic_fetch_add is
/not/ supported. Such a target would have called the inline functions
using __atomic_fetch_add, which would actually have depended on
libatomic (which is what the configure checks were supposed to
prevent!).  After this change, that target would not define the new
macro, _GLIBCXX_ATOMIC_WORD_BUILTINS, and so would make non-inline calls
into the library where __exchange_and_add and __atomic_add would use the
global mutex. That would be an ABI break. I don't consider that a
realistic scenario, because it wouldn't have made any sense to define
_Atomic_word to a wider type than int, when doing so would have required
libatomic to make libstdc++.so work. Surely such a target would have
just used int for its _Atomic_word type.

The GLIBCXX_ENABLE_BACKTRACE macro currently uses the
glibcxx_ac_atomic_int variable defined by the checks that this commit
removes from GLIBCXX_ENABLE_ATOMIC_BUILTINS. That wasn't a good check
anyway, because libbacktrace actually depends on atomic loads+stores for
pointers as well as int, and for atomic stores for size_t. This commit
replaces the glibcxx_ac_atomic_int check with a proper test for all the
required atomic operations on all three of int, void* and size_t. This
ensures that the libbacktrace code used for std::stacktrace will either
use native atomics, or implement those loads and stores only in terms of
__sync_bool_compare_and_swap (possibly requiring that to come from
libatomic or elsewhere).

libstdc++-v3/ChangeLog:

PR libstdc++/70560
PR libstdc++/119667
* acinclude.m4 (GLIBCXX_ENABLE_ATOMIC_BUILTINS): Only check for
__atomic_fetch_add on _Atomic_word. Define new macro
_GLIBCXX_ATOMIC_WORD_BUILTINS and stop defining macro
_GLIBCXX_ATOMIC_BUILTINS.
(GLIBCXX_ENABLE_BACKTRACE): Check for __atomic_load_n and
__atomic_store_n on int, void* and size_t.
* config.h.in: Regenerate.
* configure: Regenerate.
* configure.host: 

[gcc r16-426] libstdc++: Fix parallel algos for move-only values [PR117905]

2025-05-06 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:df1d436d17c8280bd835b045bd7babf5058a7154

commit r16-426-gdf1d436d17c8280bd835b045bd7babf5058a7154
Author: Jonathan Wakely 
Date:   Wed Dec 4 21:50:22 2024 +

libstdc++: Fix  parallel algos for move-only values [PR117905]

All of reduce, transform_reduce, exclusive_scan, and inclusive_scan,
transform_exclusive_scan, and transform_inclusive_scan have a
precondition that the type of init meets the Cpp17MoveConstructible
requirements. It isn't required to be copy constructible, so when
passing it to the next internal function it needs to be moved, not
copied. We also need to move when creating local variables on the stack,
and when returning as part of a pair.

libstdc++-v3/ChangeLog:

PR libstdc++/117905
* include/pstl/glue_numeric_impl.h (reduce, transform_reduce)
(transform_reduce, inclusive_scan, transform_exclusive_scan)
(transform_inclusive_scan): Use std::move for __init parameter.
* include/pstl/numeric_impl.h (__brick_transform_reduce)
(__pattern_transform_reduce, __brick_transform_scan)
(__pattern_transform_scan): Likewise.
* include/std/numeric (inclusive_scan, transform_exclusive_scan):
Use std::move to create local copy of the first element.
* testsuite/26_numerics/pstl/numeric_ops/108236.cc: Move test
using move-only type to ...
* testsuite/26_numerics/pstl/numeric_ops/move_only.cc: New test.

Diff:
---
 libstdc++-v3/include/pstl/glue_numeric_impl.h  | 16 ++--
 libstdc++-v3/include/pstl/numeric_impl.h   | 36 -
 libstdc++-v3/include/std/numeric   |  6 +-
 .../26_numerics/pstl/numeric_ops/108236.cc | 25 --
 .../26_numerics/pstl/numeric_ops/move_only.cc  | 90 ++
 5 files changed, 119 insertions(+), 54 deletions(-)

diff --git a/libstdc++-v3/include/pstl/glue_numeric_impl.h 
b/libstdc++-v3/include/pstl/glue_numeric_impl.h
index 10d4912deede..fe2d0fd47e24 100644
--- a/libstdc++-v3/include/pstl/glue_numeric_impl.h
+++ b/libstdc++-v3/include/pstl/glue_numeric_impl.h
@@ -25,7 +25,7 @@ 
__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
 reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator 
__last, _Tp __init,
_BinaryOperation __binary_op)
 {
-return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, 
__last, __init, __binary_op,
+return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, 
__last, std::move(__init), __binary_op,
 __pstl::__internal::__no_op());
 }
 
@@ -33,7 +33,7 @@ template 
 __pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
 reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator 
__last, _Tp __init)
 {
-return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, 
__last, __init, std::plus<_Tp>(),
+return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, 
__last, std::move(__init), std::plus<_Tp>(),
 __pstl::__internal::__no_op());
 }
 
@@ -58,7 +58,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 
__first1, _Forward
 
 typedef typename iterator_traits<_ForwardIterator1>::value_type _InputType;
 return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, 
std::forward<_ExecutionPolicy>(__exec),
-  __first1, __last1, 
__first2, __init, std::plus<_InputType>(),
+  __first1, __last1, 
__first2, std::move(__init), std::plus<_InputType>(),
   
std::multiplies<_InputType>());
 }
 
@@ -70,7 +70,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 
__first1, _Forward
 {
 auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, 
__first1, __first2);
 return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, 
std::forward<_ExecutionPolicy>(__exec),
-  __first1, __last1, 
__first2, __init, __binary_op1,
+  __first1, __last1, 
__first2, std::move(__init), __binary_op1,
   __binary_op2);
 }
 
@@ -81,7 +81,7 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator 
__first, _ForwardIt
 {
 auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, 
__first);
 return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, 
std::forward<_ExecutionPolicy>(__exec),
-  __first, __last, 
__init, __binary_op, __unary_op);
+  __first, __last, 
std::move(

[gcc r16-425] libstdc++: Fix dangling pointer in fs::path::operator+=(*this) [PR120029]

2025-05-06 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:a067cbcdcc5f599a2b7d607e89674533d23c652d

commit r16-425-ga067cbcdcc5f599a2b7d607e89674533d23c652d
Author: Jonathan Wakely 
Date:   Wed Apr 30 17:31:01 2025 +0100

libstdc++: Fix dangling pointer in fs::path::operator+=(*this) [PR120029]

When concatenating a path we reallocate the left operand's storage to
make room for the new components being added. When the two operands are
the same object, or the right operand is one of the components of the
left operand, the reallocation invalidates the pointers that refer
into the right operand's storage.

The solution in this commit is to detect these aliasing cases and just
do the concatenation in terms of the contained string, as that code
already handles the case where the string aliases the path. The standard
specifies the concatenation in terms of the native() string, so all this
change does is disable the optimized implementation of concatenation for
path objects which attempts to avoid re-parsing the path from the
concatenated string.

The potential loss of performance for this case isn't likely to be an
issue, because concatenating a path with itself (or one of its existing
components) probably isn't a common use case.

The Filesystem TS implementation doesn't have the optimized form of
concatenation and always does it in terms of the native string and
reparsing the whole thing, so doesn't have this bug. A test is added to
confirm that anyway (that test has some slightly different results due
to different behaviour for trailing slashes and implicit "." filenames
in the TS spec).

libstdc++-v3/ChangeLog:

PR libstdc++/120029
* src/c++17/fs_path.cc (path::operator+=(const path&)): Handle
parameters that alias the path or one of its components.
* testsuite/27_io/filesystem/path/concat/120029.cc: New test.
* testsuite/experimental/filesystem/path/concat/120029.cc: New
test.

Diff:
---
 libstdc++-v3/src/c++17/fs_path.cc  | 10 +++
 .../27_io/filesystem/path/concat/120029.cc | 72 +
 .../experimental/filesystem/path/concat/120029.cc  | 74 ++
 3 files changed, 156 insertions(+)

diff --git a/libstdc++-v3/src/c++17/fs_path.cc 
b/libstdc++-v3/src/c++17/fs_path.cc
index 6582f10209a0..215afa08ad25 100644
--- a/libstdc++-v3/src/c++17/fs_path.cc
+++ b/libstdc++-v3/src/c++17/fs_path.cc
@@ -880,6 +880,16 @@ path::operator+=(const path& p)
   return *this;
 }
 
+  // Handle p += p which would otherwise access dangling pointers after
+  // reallocating _M_cmpts and _M_pathname.
+  if (&p == this) [[unlikely]]
+return *this += p.native();
+  // Handle p += *i where i is in [p.begin(),p.end()), for the same reason.
+  if (_M_type() == _Type::_Multi && p._M_type() != _Type::_Multi)
+for (const path& cmpt : *this)
+  if (&cmpt == &p) [[unlikely]]
+   return *this += p.native();
+
 #if _GLIBCXX_FILESYSTEM_IS_WINDOWS
   if (_M_type() == _Type::_Root_name
   || (_M_type() == _Type::_Filename && _M_pathname.size() == 1))
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/concat/120029.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/path/concat/120029.cc
new file mode 100644
index ..5153d594b50f
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/filesystem/path/concat/120029.cc
@@ -0,0 +1,72 @@
+// { dg-do run { target c++17 } }
+
+// Bug libstdc++/120029
+// Dangling iterator usage in path::operator+=(const path& p) when this == p
+
+#include 
+#include 
+
+namespace fs = std::filesystem;
+
+void
+test_root_dir()
+{
+  fs::path p = "/";
+  p += p;
+  p += p;
+  VERIFY( p == "" );
+  p += p.filename();
+  VERIFY( p == "" );
+  p += *std::prev(p.end());
+  VERIFY( p == "" );
+}
+
+void
+test_root_name()
+{
+  fs::path p = "C:/";
+  p += p;
+  p += p;
+  VERIFY( p == "C:/C:/C:/C:/" );
+  p += p.filename();
+  VERIFY( p == "C:/C:/C:/C:/" );
+  p += *std::prev(p.end());
+  VERIFY( p == "C:/C:/C:/C:/" );
+}
+
+void
+test_filename()
+{
+  fs::path p = "file";
+  p += p;
+  p += p;
+  VERIFY( p == "filefilefilefile" );
+  p += p.filename();
+  VERIFY( p == "filefilefilefilefilefilefilefile" );
+  p += *std::prev(p.end());
+  VERIFY( p == 
"filefilefilefilefilefilefilefilefilefilefilefilefilefilefilefile" );
+}
+
+void
+test_multi()
+{
+  fs::path p = "/home/username/Documents/mu";
+  p += p;
+  p += p;
+  VERIFY( p == 
"/home/username/Documents/mu/home/username/Documents/mu/home/username/Documents/mu/home/username/Documents/mu"
 );
+  p += p.filename();
+  VERIFY( p == 
"/home/username/Documents/mu/home/username/Documents/mu/home/username/Documents/mu/home/username/Documents/mumu"
 );
+  p += *std::prev(p.end());
+  VERIFY( p == 
"/home/username/Documents/mu/home/username/Documents/mu/home/username/Documents/mu/home/username/Documents/mumumumu"
 );
+  auto n = std::dis

[gcc r14-11742] libstdc++: Add missing feature-test macro in

2025-05-06 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:1db1249b83096e15c8ef2ae4ba124d3af75ea32f

commit r14-11742-g1db1249b83096e15c8ef2ae4ba124d3af75ea32f
Author: Dhruv Chawla 
Date:   Tue Apr 8 01:01:24 2025 -0700

libstdc++: Add missing feature-test macro in 

Per version.syn#2,  is required to define
__cpp_lib_addressof_constexpr as 201603L.

Bootstrapped and tested on aarch64-linux-gnu.

Signed-off-by: Dhruv Chawla 

libstdc++-v3/ChangeLog:
* include/std/memory: Define __glibcxx_want_addressof_constexpr.
* testsuite/20_util/headers/memory/version.cc: Test for macro
value.

(cherry picked from commit 0e65fef8717f404cf9c85bff51bf87d534f87828)

Diff:
---
 libstdc++-v3/include/std/memory  | 1 +
 libstdc++-v3/testsuite/20_util/headers/memory/version.cc | 4 
 2 files changed, 5 insertions(+)

diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory
index c984436f6253..b6cdd201e563 100644
--- a/libstdc++-v3/include/std/memory
+++ b/libstdc++-v3/include/std/memory
@@ -95,6 +95,7 @@
 #  include 
 #endif
 
+#define __glibcxx_want_addressof_constexpr
 #define __glibcxx_want_allocator_traits_is_always_equal
 #define __glibcxx_want_assume_aligned
 #define __glibcxx_want_atomic_shared_ptr
diff --git a/libstdc++-v3/testsuite/20_util/headers/memory/version.cc 
b/libstdc++-v3/testsuite/20_util/headers/memory/version.cc
index c82c9a018e0a..946955dd2123 100644
--- a/libstdc++-v3/testsuite/20_util/headers/memory/version.cc
+++ b/libstdc++-v3/testsuite/20_util/headers/memory/version.cc
@@ -6,3 +6,7 @@
 #if __cpp_lib_allocator_traits_is_always_equal != 201411L
 # error "Feature-test macro __cpp_lib_allocator_traits_is_always_equal has 
wrong value in "
 #endif
+
+#if __cpp_lib_addressof_constexpr != 201603L
+# error "Feature-test macro __cpp_lib_addressof_constexpr has wrong value in 
"
+#endif


[gcc r15-9627] libstdc++: Remove unnecessary dg-prune-output from tests

2025-05-06 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:ebc9606540171f33bf974d3f81e547affc052753

commit r15-9627-gebc9606540171f33bf974d3f81e547affc052753
Author: Jonathan Wakely 
Date:   Thu Apr 24 14:50:36 2025 +0100

libstdc++: Remove unnecessary dg-prune-output from tests

There are no errors matching this pattern in these tests (only in the
deque/48101_neg.cc and vector/48101_neg.cc tests).

libstdc++-v3/ChangeLog:

* testsuite/23_containers/forward_list/48101_neg.cc: Remove
dg-prune-output that doesn't match anything.
* testsuite/23_containers/list/48101_neg.cc: Likewise.
* testsuite/23_containers/multiset/48101_neg.cc: Likewise.
* testsuite/23_containers/set/48101_neg.cc: Likewise.

(cherry picked from commit 8396c80be153b5207d992b7ea6dd4fdb536ce973)

Diff:
---
 libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc | 1 -
 libstdc++-v3/testsuite/23_containers/list/48101_neg.cc | 1 -
 libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc | 1 -
 libstdc++-v3/testsuite/23_containers/set/48101_neg.cc  | 1 -
 4 files changed, 4 deletions(-)

diff --git a/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc
index 2f2ea2afb194..d18195ed3540 100644
--- a/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/forward_list/48101_neg.cc
@@ -26,6 +26,5 @@ test01()
 }
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
-// { dg-prune-output "std::allocator<.* has no member named " }
 // { dg-prune-output "must have the same value_type as its allocator" }
 // { dg-prune-output "rebind_alloc" }
diff --git a/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc
index 8b2e075ca6ac..cc51705dcc6a 100644
--- a/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/list/48101_neg.cc
@@ -26,5 +26,4 @@ test01()
 }
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
-// { dg-prune-output "std::allocator<.* has no member named " }
 // { dg-prune-output "must have the same value_type as its allocator" }
diff --git a/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc
index f0786cfc7fb8..3cc06587526b 100644
--- a/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/multiset/48101_neg.cc
@@ -29,7 +29,6 @@ test01()
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
 // { dg-error "comparison object must be invocable" "" { target *-*-* } 0 }
-// { dg-prune-output "std::allocator<.* has no member named " }
 // { dg-prune-output "must have the same value_type as its allocator" }
 // { dg-prune-output "no match for call" }
 // { dg-prune-output "invalid conversion" }
diff --git a/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc 
b/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc
index e8dec72cce5f..fe38d1af7efa 100644
--- a/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/set/48101_neg.cc
@@ -29,7 +29,6 @@ test01()
 
 // { dg-error "non-const, non-volatile value_type" "" { target *-*-* } 0 }
 // { dg-error "comparison object must be invocable" "" { target *-*-* } 0 }
-// { dg-prune-output "std::allocator<.* has no member named " }
 // { dg-prune-output "must have the same value_type as its allocator" }
 // { dg-prune-output "no match for call" }
 // { dg-prune-output "invalid conversion" }


[gcc r15-9626] libstdc++: fix possible undefined atomic lock-free type aliases in module std

2025-05-06 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:f2a69ee64f8b663d14649b7475edb8e09e83db1b

commit r15-9626-gf2a69ee64f8b663d14649b7475edb8e09e83db1b
Author: ZENG Hao 
Date:   Sun Apr 20 17:02:16 2025 +0800

libstdc++: fix possible undefined atomic lock-free type aliases in module 
std

When building for 'i386-*' targets, all basic types are 'sometimes 
lock-free'
and thus std::atomic_signed_lock_free and std::atomic_unsigned_lock_free are
not declared. In the header , they are placed in preprocessor
condition __cpp_lib_atomic_lock_free_type_aliases. In module std, they 
should
be the same.

libstdc++-v3/ChangeLog:

* src/c++23/std.cc.in (atomic_signed_lock_free): Guard with
preprocessor check for __cpp_lib_atomic_lock_free_type_aliases.
(atomic_unsigned_lock_free): Likewise.

(cherry picked from commit 466c80a885ccbd5186cd57b0785cf38b8cf2ff2f)

Diff:
---
 libstdc++-v3/src/c++23/std.cc.in | 4 
 1 file changed, 4 insertions(+)

diff --git a/libstdc++-v3/src/c++23/std.cc.in b/libstdc++-v3/src/c++23/std.cc.in
index 6da6d3829149..930a489ff44b 100644
--- a/libstdc++-v3/src/c++23/std.cc.in
+++ b/libstdc++-v3/src/c++23/std.cc.in
@@ -599,7 +599,9 @@ export namespace std
   using std::atomic_schar;
   using std::atomic_short;
   using std::atomic_signal_fence;
+#ifdef __cpp_lib_atomic_lock_free_type_aliases
   using std::atomic_signed_lock_free;
+#endif
   using std::atomic_size_t;
   using std::atomic_store;
   using std::atomic_store_explicit;
@@ -622,7 +624,9 @@ export namespace std
   using std::atomic_uintptr_t;
   using std::atomic_ullong;
   using std::atomic_ulong;
+#ifdef __cpp_lib_atomic_lock_free_type_aliases
   using std::atomic_unsigned_lock_free;
+#endif
   using std::atomic_ushort;
   using std::atomic_wait;
   using std::atomic_wait_explicit;


[gcc r16-407] RISC-V: Combine vec_duplicate + vadd.vv to vadd.vx on GR2VR cost

2025-05-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:2b5baada614af7a4d0ee49aa962c7e1f7be3

commit r16-407-g2b5baada614af7a4d0ee49aa962c7e1f7be3
Author: Pan Li 
Date:   Thu May 1 21:23:54 2025 +0800

RISC-V: Combine vec_duplicate + vadd.vv to vadd.vx on GR2VR cost

This patch would like to combine the vec_duplicate + vadd.vv to the
vadd.vx.  From example as below code.  The related pattern will depend
on the cost of vec_duplicate from GR2VR, it will:

* The pattern matching will be active by default.
* The cost of GR2VR will be added to the total cost of pattern, aka:
  vec_dup cost = gr2vr_cost
  vadd.vv v, (vec_dup (x)) = gr2vr_cost + 1

Then the late-combine will take action if the cost of GR2VR is zero,
and reject the combination if the GR2VR cost is greater than zero.

Assume we have example code like below, GR2VR cost is 0.

  #define DEF_VX_BINARY(T, OP)\
  void\
  test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \
  {   \
for (unsigned i = 0; i < n; i++)  \
  out[i] = in[i] OP x;\
  }

  DEF_VX_BINARY(int32_t, +)

Before this patch:
  10   │ test_binary_vx_add:
  11   │ beq a3,zero,.L8
  12   │ vsetvli a5,zero,e32,m1,ta,ma // Deleted if GR2VR cost zero
  13   │ vmv.v.x v2,a2// Ditto.
  14   │ sllia3,a3,32
  15   │ srlia3,a3,32
  16   │ .L3:
  17   │ vsetvli a5,a3,e32,m1,ta,ma
  18   │ vle32.v v1,0(a1)
  19   │ sllia4,a5,2
  20   │ sub a3,a3,a5
  21   │ add a1,a1,a4
  22   │ vadd.vv v1,v2,v1
  23   │ vse32.v v1,0(a0)
  24   │ add a0,a0,a4
  25   │ bne a3,zero,.L3

After this patch:
  10   │ test_binary_vx_add:
  11   │ beq a3,zero,.L8
  12   │ sllia3,a3,32
  13   │ srlia3,a3,32
  14   │ .L3:
  15   │ vsetvli a5,a3,e32,m1,ta,ma
  16   │ vle32.v v1,0(a1)
  17   │ sllia4,a5,2
  18   │ sub a3,a3,a5
  19   │ add a1,a1,a4
  20   │ vadd.vx v1,v1,a2
  21   │ vse32.v v1,0(a0)
  22   │ add a0,a0,a4
  23   │ bne a3,zero,.L3

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*_vx_): Add new
combine to convert vec_duplicate + vadd.vv to vaddvx on GR2VR
cost.
* config/riscv/riscv.cc (riscv_rtx_costs): Take care of the cost
when vec_dup and vadd v, vec_dup(x).
* config/riscv/vector-iterators.md: Add new iterator for vx.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/autovec-opt.md  | 23 +++
 gcc/config/riscv/riscv.cc| 35 ++-
 gcc/config/riscv/vector-iterators.md |  4 
 3 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 0c3b0cc7e05f..7cf7e8a92ba1 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1673,3 +1673,26 @@
 DONE;
   }
   [(set_attr "type" "vandn")])
+
+
+;; 
=
+;; Combine vec_duplicate + op.vv to op.vx
+;; Include
+;; - vadd.vx
+;; 
=
+(define_insn_and_split "*_vx_"
+ [(set (match_operand:V_VLSI0 "register_operand")
+   (any_int_binop_no_shift_vx:V_VLSI
+(vec_duplicate:V_VLSI
+  (match_operand: 1 "register_operand"))
+(match_operand:V_VLSI  2 "")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+rtx ops[] = {operands[0], operands[2], operands[1]};
+riscv_vector::emit_vlmax_insn (code_for_pred_scalar (, mode),
+  riscv_vector::BINARY_OP, ops);
+  }
+  [(set_attr "type" "vialu")])
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 42d501a1291b..3ee88db24fa5 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3863,7 +3863,40 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
  Cost Model need to be well analyzed and supported in the future. */
   if (riscv_v_ext_mode_p (mode))
 {
-  *total = COSTS_N_INSNS (1);
+  int gr2vr_cost = get_gr2vr_cost ();
+
+  switch (outer_code)
+   {
+   case SET:
+ {
+   switch (GET_CODE (x))
+ {
+ case VEC_DUPLICATE:
+   *total = gr2vr_cost * COSTS_N_INSNS (1);
+

[gcc r15-9628] libstdc++: Add missing feature-test macro in

2025-05-06 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:0d46cee2fe38b9a1aa576c8031ed22d298d88954

commit r15-9628-g0d46cee2fe38b9a1aa576c8031ed22d298d88954
Author: Dhruv Chawla 
Date:   Tue Apr 8 01:01:24 2025 -0700

libstdc++: Add missing feature-test macro in 

Per version.syn#2,  is required to define
__cpp_lib_addressof_constexpr as 201603L.

Bootstrapped and tested on aarch64-linux-gnu.

Signed-off-by: Dhruv Chawla 

libstdc++-v3/ChangeLog:
* include/std/memory: Define __glibcxx_want_addressof_constexpr.
* testsuite/20_util/headers/memory/version.cc: Test for macro
value.

(cherry picked from commit 0e65fef8717f404cf9c85bff51bf87d534f87828)

Diff:
---
 libstdc++-v3/include/std/memory  | 1 +
 libstdc++-v3/testsuite/20_util/headers/memory/version.cc | 4 
 2 files changed, 5 insertions(+)

diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory
index 99f542dcddc6..78a1250d29a1 100644
--- a/libstdc++-v3/include/std/memory
+++ b/libstdc++-v3/include/std/memory
@@ -97,6 +97,7 @@
 #  include 
 #endif
 
+#define __glibcxx_want_addressof_constexpr
 #define __glibcxx_want_allocator_traits_is_always_equal
 #define __glibcxx_want_assume_aligned
 #define __glibcxx_want_atomic_shared_ptr
diff --git a/libstdc++-v3/testsuite/20_util/headers/memory/version.cc 
b/libstdc++-v3/testsuite/20_util/headers/memory/version.cc
index c82c9a018e0a..946955dd2123 100644
--- a/libstdc++-v3/testsuite/20_util/headers/memory/version.cc
+++ b/libstdc++-v3/testsuite/20_util/headers/memory/version.cc
@@ -6,3 +6,7 @@
 #if __cpp_lib_allocator_traits_is_always_equal != 201411L
 # error "Feature-test macro __cpp_lib_allocator_traits_is_always_equal has 
wrong value in "
 #endif
+
+#if __cpp_lib_addressof_constexpr != 201603L
+# error "Feature-test macro __cpp_lib_addressof_constexpr has wrong value in 
"
+#endif


[gcc r16-404] libstdc++: Add noexcept to some std::counted_iterator operations

2025-05-06 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:ac9fec014df8d75c2185981c9d191d1080e98094

commit r16-404-gac9fec014df8d75c2185981c9d191d1080e98094
Author: Jonathan Wakely 
Date:   Thu May 1 22:56:56 2025 +0100

libstdc++: Add noexcept to some std::counted_iterator operations

This was inspired by LWG 4245 but goes further. Anything which only
reads or writes the _M_length member can be noexcept. That
member is an iterator difference_type which means it's a signed integer
type or the __max_diff_type integer-like class type, so all arithmetic
and comparisons are non-throwing.

libstdc++-v3/ChangeLog:

* include/bits/stl_iterator.h (counted_iterator): Add noexcept
to friend operators which only access the _M_length member.

Reviewed-by: Tomasz Kamiński 

Diff:
---
 libstdc++-v3/include/bits/stl_iterator.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index bed72955d0c4..478a98fe8a4f 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -2511,17 +2511,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
[[nodiscard]]
friend constexpr iter_difference_t<_It2>
operator-(const counted_iterator& __x,
- const counted_iterator<_It2>& __y)
+ const counted_iterator<_It2>& __y) noexcept
{ return __y._M_length - __x._M_length; }
 
   [[nodiscard]]
   friend constexpr iter_difference_t<_It>
-  operator-(const counted_iterator& __x, default_sentinel_t)
+  operator-(const counted_iterator& __x, default_sentinel_t) noexcept
   { return -__x._M_length; }
 
   [[nodiscard]]
   friend constexpr iter_difference_t<_It>
-  operator-(default_sentinel_t, const counted_iterator& __y)
+  operator-(default_sentinel_t, const counted_iterator& __y) noexcept
   { return __y._M_length; }
 
   constexpr counted_iterator&
@@ -2548,19 +2548,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
[[nodiscard]]
friend constexpr bool
operator==(const counted_iterator& __x,
-  const counted_iterator<_It2>& __y)
+  const counted_iterator<_It2>& __y) noexcept
{ return __x._M_length == __y._M_length; }
 
   [[nodiscard]]
   friend constexpr bool
-  operator==(const counted_iterator& __x, default_sentinel_t)
+  operator==(const counted_iterator& __x, default_sentinel_t) noexcept
   { return __x._M_length == 0; }
 
   template _It2>
[[nodiscard]]
friend constexpr strong_ordering
operator<=>(const counted_iterator& __x,
-   const counted_iterator<_It2>& __y)
+   const counted_iterator<_It2>& __y) noexcept
{ return __y._M_length <=> __x._M_length; }
 
   [[nodiscard]]


[gcc r14-11743] c++: Optimize in maybe_clone_body aliases even when not at_eof [PR113208]

2025-05-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:6d5a6a26e28d15ac05f59ee4d3c3d7d4d42acac3

commit r14-11743-g6d5a6a26e28d15ac05f59ee4d3c3d7d4d42acac3
Author: Jakub Jelinek 
Date:   Wed May 15 18:50:11 2024 +0200

c++: Optimize in maybe_clone_body aliases even when not at_eof [PR113208]

This patch reworks the cdtor alias optimization, such that we can create
aliases even when maybe_clone_body is called not at at_eof time, without 
trying
to repeat it in maybe_optimize_cdtor.

2024-05-15  Jakub Jelinek  
Jason Merrill  

PR lto/113208
* cp-tree.h (maybe_optimize_cdtor): Remove.
* decl2.cc (tentative_decl_linkage): Call maybe_make_one_only
for implicit instantiations of maybe in charge ctors/dtors
declared inline.
(import_export_decl): Don't call maybe_optimize_cdtor.
(c_parse_final_cleanups): Formatting fixes.
* optimize.cc (can_alias_cdtor): Adjust condition, for
HAVE_COMDAT_GROUP && DECL_ONE_ONLY && DECL_WEAK return true even
if not DECL_INTERFACE_KNOWN.
(maybe_clone_body): Don't clear DECL_SAVED_TREE, instead set it
to void_node.
(maybe_clone_body): Remove.
* decl.cc (cxx_comdat_group): For DECL_CLONED_FUNCTION_P
functions if SUPPORTS_ONE_ONLY return DECL_COMDAT_GROUP if already
set.

* g++.dg/abi/comdat3.C: New test.
* g++.dg/abi/comdat4.C: New test.

(cherry picked from commit 6ad7ca1bb905736c0f57688e93e9e77cbc71a325)

Diff:
---
 gcc/cp/cp-tree.h   |  1 -
 gcc/cp/decl.cc |  7 +
 gcc/cp/decl2.cc| 32 ++-
 gcc/cp/optimize.cc | 63 ++
 gcc/testsuite/g++.dg/abi/comdat3.C | 22 +
 gcc/testsuite/g++.dg/abi/comdat4.C | 28 +
 6 files changed, 78 insertions(+), 75 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 1d10c27c676c..fae6c6733f13 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7454,7 +7454,6 @@ extern bool handle_module_option (unsigned opt, const 
char *arg, int value);
 /* In optimize.cc */
 extern tree clone_attrs(tree);
 extern bool maybe_clone_body   (tree);
-extern void maybe_optimize_cdtor   (tree);
 
 /* In parser.cc */
 extern tree cp_convert_range_for (tree, tree, tree, cp_decomp *, bool,
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 7a14cb2eb892..5395bdff97b7 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -19280,6 +19280,13 @@ cxx_comdat_group (tree decl)
  else
break;
}
+  /* If a ctor/dtor has already set the comdat group by
+maybe_clone_body, don't override it.  */
+  if (SUPPORTS_ONE_ONLY
+ && TREE_CODE (decl) == FUNCTION_DECL
+ && DECL_CLONED_FUNCTION_P (decl))
+   if (tree comdat = DECL_COMDAT_GROUP (decl))
+ return comdat;
 }
 
   return decl;
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 19d3fd3287d6..b54364bebaf9 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -3328,16 +3328,23 @@ tentative_decl_linkage (tree decl)
 linkage of all functions, and as that causes writes to
 the data mapped in from the PCH file, it's advantageous
 to mark the functions at this point.  */
- if (DECL_DECLARED_INLINE_P (decl)
- && (!DECL_IMPLICIT_INSTANTIATION (decl)
- || DECL_DEFAULTED_FN (decl)))
+ if (DECL_DECLARED_INLINE_P (decl))
{
- /* This function must have external linkage, as
-otherwise DECL_INTERFACE_KNOWN would have been
-set.  */
- gcc_assert (TREE_PUBLIC (decl));
- comdat_linkage (decl);
- DECL_INTERFACE_KNOWN (decl) = 1;
+ if (!DECL_IMPLICIT_INSTANTIATION (decl)
+ || DECL_DEFAULTED_FN (decl))
+   {
+ /* This function must have external linkage, as
+otherwise DECL_INTERFACE_KNOWN would have been
+set.  */
+ gcc_assert (TREE_PUBLIC (decl));
+ comdat_linkage (decl);
+ DECL_INTERFACE_KNOWN (decl) = 1;
+   }
+ else if (DECL_MAYBE_IN_CHARGE_CDTOR_P (decl))
+   /* For implicit instantiations of cdtors try to make
+  it comdat, so that maybe_clone_body can use aliases.
+  See PR113208.  */
+   maybe_make_one_only (decl);
}
}
   else if (VAR_P (decl))
@@ -3584,9 +3591,6 @@ import_export_decl (tree decl)
 }
 
   DECL_INTERFACE_KNOWN (decl) = 1;
-
-  if (DECL_CLONED_FUNCTION_P (decl))
-maybe_optimize_cdtor (decl);
 }
 
 /* Return an expression that performs the destruction of DECL, which
@@ -5312,7 +5316

[gcc r14-11744] c++: add fixed testcase [PR109464]

2025-05-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:9d9800938ef61b7978abb33a8b202beeb3efed6b

commit r14-11744-g9d9800938ef61b7978abb33a8b202beeb3efed6b
Author: Patrick Palka 
Date:   Fri Jul 19 11:08:09 2024 -0400

c++: add fixed testcase [PR109464]

Seems to be fixed by r15-521-g6ad7ca1bb90573.

PR c++/109464

gcc/testsuite/ChangeLog:

* g++.dg/template/explicit-instantiation8.C: New test.

(cherry picked from commit 58a9f3ded1a0ccc2c8b0a42f976950041734798e)

Diff:
---
 .../g++.dg/template/explicit-instantiation8.C  | 24 ++
 1 file changed, 24 insertions(+)

diff --git a/gcc/testsuite/g++.dg/template/explicit-instantiation8.C 
b/gcc/testsuite/g++.dg/template/explicit-instantiation8.C
new file mode 100644
index ..92152a2992e9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/explicit-instantiation8.C
@@ -0,0 +1,24 @@
+// PR c++/109464
+// { dg-do compile { target c++11 } }
+
+template
+struct shallow
+ {
+   int len;
+   constexpr shallow() : len(0) { }
+  };
+
+template
+struct use_shallow
+  {
+   static constexpr shallow s_zstr = { };
+   static_assert(s_zstr.len == 0, "");
+  };
+
+extern template struct shallow;
+extern template struct use_shallow;
+
+template struct shallow;
+template struct use_shallow;
+
+// { dg-final { scan-assembler "_ZN7shallowIcEC2Ev" } }


[gcc r16-411] [RISC-V][PR middle-end/114512] Recognize more bext idioms for RISC-V

2025-05-06 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4cd741dcbd3729863ab005d7ec61a890e0a193f1

commit r16-411-g4cd741dcbd3729863ab005d7ec61a890e0a193f1
Author: Shreya Munnangi 
Date:   Tue May 6 06:38:00 2025 -0600

[RISC-V][PR middle-end/114512] Recognize more bext idioms for RISC-V

This is Shreya's next chunk of work.  When I was looking for good bugs for 
her
to chase down I cam across PR114512.  While the bug isn't necessarily a 
RISC-V
specific bug, its testcases did show how we were failing to recognize 
certain
bit extraction idioms and how the lispy nature of RTL allows us to tackle 
these
issues in the combiner.

First, the bit position may be masked.  The RISC-V port does not define
SHIFT_COUNT_TRUNCATED for valid reasons.  So if we want to optimize away a 
mask
that matches what the hardware will do, we need suitable insns that include
that explicit masking.

In addition to needing to incorporate masking, the masking may happen in a
subword mode.  So we need to recognize the mask wrapped in a zero extension.

Those two captured the most common cases.

We can also have a single bit extraction implemented as a left shift of the 
bit
into the sign bit, then a right shift by the size of a word - 1.  These are
less common, but we did cover the case derived from the upstream bug report 
as
well as one class seen reviewing the instruction stream for spec2017.

Finally, extracting a single bit at a variable position from a constant as 
seen
with some regularity in spec2017.  In that scenario, combine's chosen split
point wasn't ideal (I forget what it selected, but it definitely wasn't
helpful).  So we've got a new splitter for this case as well.

Earlier versions of this have gone through my tester as well as a bootstrap 
and
regression cycle.  This version has just gone through a cycle in my tester 
(but
missed today's bootstrap cycle).

Waiting on the upstream pre-commit tester to render its verdict, but the 
plan
is to commit on Shreya's behalf once that's clean.

Co-authored-by: Jeff Law  

PR middle-end/114512
gcc/
* config/riscv/bitmanip.md (bext* patterns): New patterns for
bext recognition plus splitter for extracting variable bit from
a constant.
* config/riscv/predicates.md (bitpos_mask_operand): New predicate.

gcc/testsuite/
* gcc.target/riscv/pr114512.c: New test.

Diff:
---
 gcc/config/riscv/bitmanip.md  |  74 
 gcc/config/riscv/predicates.md|   4 ++
 gcc/testsuite/gcc.target/riscv/pr114512.c | 109 ++
 3 files changed, 187 insertions(+)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 20d03dc8792d..95df5331769e 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1302,3 +1302,77 @@
 }
   DONE;
 })
+
+;; More forms of single bit extraction.  The RISC-V port does not
+;; define SHIFT_COUNT_TRUNCATED so we need forms where the bit position
+;; is masked.
+;;
+;; We could in theory use this for rv32 as well, but it probably does
+;; not occur in practice.  The bit position would need to be QI/HI mode,
+;; otherwise we would not need the zero extension.
+;;
+;; One could also argue that the zero extension is redundant and should
+;; have been optimized away during RTL simplification.
+(define_insn "*bextdi_position_ze_masked"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+(const_int 1)
+(zero_extend:DI
+ (and:SI (match_operand:SI 2 "register_operand" "r")
+ (const_int 63)]
+  "TARGET_64BIT && TARGET_ZBS"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
+;; Same as above, but without the extraneous zero_extend.
+(define_insn "*bextdi_position_ze_masked"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (zero_extract:X
+ (match_operand:X 1 "register_operand" "r")
+ (const_int 1)
+ (and:X (match_operand:SI 2 "register_operand" "r")
+(match_operand:SI 3 "bitpos_mask_operand" "n"]
+  "TARGET_64BIT && TARGET_ZBS"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
+
+;; Single bit extraction by first shifting it into the sign bit, then
+;; shifting it down to the low bit.
+(define_insn "*bext_position_masked"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (lshiftrt:X (ashift:X (match_operand:X 1 "register_operand" "r")
+ (match_operand:QI 2 "register_operand" "r"))
+   (match_operand:X 3 "bitpos_mask_operand" "n")))]
+  "TARGET_ZBS"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
+;; Single bit extraction by shifting into the low bit, but with the
+;;

[gcc r16-428] Fortran: Fix ICE with use of c_associated.

2025-05-06 Thread Jerry DeLisle via Gcc-cvs
https://gcc.gnu.org/g:d0571638a6bad932b226ada98b167fa47a47d838

commit r16-428-gd0571638a6bad932b226ada98b167fa47a47d838
Author: Jerry DeLisle 
Date:   Mon May 5 20:05:22 2025 -0700

Fortran: Fix ICE with use of c_associated.

PR fortran/120049

gcc/fortran/ChangeLog:

* check.cc (gfc_check_c_associated): Modify checks to avoid
ICE and allow use, intrinsic :: iso_c_binding from a separate
module file.

gcc/testsuite/ChangeLog:

* gfortran.dg/pr120049_a.f90: New test.
* gfortran.dg/pr120049_b.f90: New test.

Diff:
---
 gcc/fortran/check.cc | 42 
 gcc/testsuite/gfortran.dg/pr120049_a.f90 | 15 
 gcc/testsuite/gfortran.dg/pr120049_b.f90 |  8 ++
 3 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index 299c216cf36c..f02a2a338974 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -5955,30 +5955,40 @@ gfc_check_c_sizeof (gfc_expr *arg)
 bool
 gfc_check_c_associated (gfc_expr *c_ptr_1, gfc_expr *c_ptr_2)
 {
-  if (c_ptr_1->ts.type != BT_DERIVED
-  || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
-  || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR
- && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR))
+  if (c_ptr_1)
 {
-  gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the "
-"type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where);
-  return false;
+  if (c_ptr_1->expr_type == EXPR_FUNCTION && c_ptr_1->ts.type == BT_VOID)
+   return true;
+
+  if (c_ptr_1->ts.type != BT_DERIVED
+ || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
+ || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR
+ && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR))
+   {
+ gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the "
+"type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where);
+ return false;
+   }
 }
 
   if (!scalar_check (c_ptr_1, 0))
 return false;
 
-  if (c_ptr_2
-  && (c_ptr_2->ts.type != BT_DERIVED
+  if (c_ptr_2)
+{
+  if (c_ptr_2->expr_type == EXPR_FUNCTION && c_ptr_2->ts.type == BT_VOID)
+   return true;
+
+  if (c_ptr_2->ts.type != BT_DERIVED
  || c_ptr_2->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
  || (c_ptr_1->ts.u.derived->intmod_sym_id
- != c_ptr_2->ts.u.derived->intmod_sym_id)))
-{
-  gfc_error ("Argument C_PTR_2 at %L to C_ASSOCIATED shall have the "
-"same type as C_PTR_1: %s instead of %s", &c_ptr_1->where,
-gfc_typename (&c_ptr_1->ts),
-gfc_typename (&c_ptr_2->ts));
-  return false;
+ != c_ptr_2->ts.u.derived->intmod_sym_id))
+   {
+ gfc_error ("Argument C_PTR_2 at %L to C_ASSOCIATED shall have the "
+  "same type as C_PTR_1: %s instead of %s", &c_ptr_1->where,
+  gfc_typename (&c_ptr_1->ts), gfc_typename (&c_ptr_2->ts));
+ return false;
+   }
 }
 
   if (c_ptr_2 && !scalar_check (c_ptr_2, 1))
diff --git a/gcc/testsuite/gfortran.dg/pr120049_a.f90 
b/gcc/testsuite/gfortran.dg/pr120049_a.f90
new file mode 100644
index ..c404a4dedd9a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr120049_a.f90
@@ -0,0 +1,15 @@
+! { dg-do preprocess }
+! { dg-additional-options "-cpp" }
+!
+! Test the fix for PR86248
+program tests_gtk_sup
+  use gtk_sup
+  implicit none
+  type(c_ptr), target :: val
+  if (c_associated(val, c_loc(val))) then
+stop 1
+  endif
+  if (c_associated(c_loc(val), val)) then
+stop 2
+  endif
+end program tests_gtk_sup
diff --git a/gcc/testsuite/gfortran.dg/pr120049_b.f90 
b/gcc/testsuite/gfortran.dg/pr120049_b.f90
new file mode 100644
index ..127db984077d
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr120049_b.f90
@@ -0,0 +1,8 @@
+! { dg-do  run }
+! { dg-additional-sources pr120049_a.f90 }
+!
+! Module for pr120049.f90
+!
+module gtk_sup
+  use, intrinsic :: iso_c_binding
+end module gtk_sup


[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression intrinsic_mmloc_2

2025-05-06 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:570be3a24b978d03da8f25c024928096ffbc5569

commit 570be3a24b978d03da8f25c024928096ffbc5569
Author: Mikael Morin 
Date:   Tue May 6 20:42:15 2025 +0200

Correction régression intrinsic_mmloc_2

Diff:
---
 gcc/fortran/trans-intrinsic.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 12a317440cff..233537664be6 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5007,10 +5007,10 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
   as.rank = 1;
   as.lower[0] = gfc_get_int_expr (gfc_index_integer_kind,
  &arrayexpr->where,
- HOST_WIDE_INT_1);
+ HOST_WIDE_INT_0);
   as.upper[0] = gfc_get_int_expr (gfc_index_integer_kind,
  &arrayexpr->where,
- arrayexpr->rank);
+ arrayexpr->rank - 1);
 
   tree array = gfc_get_nodesc_array_type (type, &as, PACKED_STATIC, true);


[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression oldstyle_1

2025-05-06 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:76bee02bf2873e88b15e0cf74722b8985098ed4f

commit 76bee02bf2873e88b15e0cf74722b8985098ed4f
Author: Mikael Morin 
Date:   Mon May 5 20:22:15 2025 +0200

Correction régression oldstyle_1

Diff:
---
 gcc/fortran/trans-array.cc | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index f75747038219..9a91fa64c2a6 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -6058,6 +6058,14 @@ gfc_conv_array_initializer (tree type, gfc_expr * expr)
  else
tmp1 = gfc_conv_mpz_to_tree (c->offset, gfc_index_integer_kind);
 
+ if (min_index)
+   {
+ tmp1 = fold_build2_loc (input_location, PLUS_EXPR,
+ TREE_TYPE (tmp1), tmp1, min_index);
+ tmp2 = fold_build2_loc (input_location, PLUS_EXPR,
+ TREE_TYPE (tmp2), tmp2, min_index);
+   }
+
  range = fold_build2 (RANGE_EXPR, gfc_array_index_type, tmp1, 
tmp2);
  mpz_clear (maxval);
}


[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression loop_versioning_8

2025-05-06 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:1b73c190a28390ac6f7ff27e1a58df89527773ad

commit 1b73c190a28390ac6f7ff27e1a58df89527773ad
Author: Mikael Morin 
Date:   Tue May 6 17:41:55 2025 +0200

Correction régression loop_versioning_8

Diff:
---
 gcc/gimple-loop-versioning.cc | 187 +-
 1 file changed, 146 insertions(+), 41 deletions(-)

diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 5c9b2fb77ff9..f881e07edcc2 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -172,6 +172,8 @@ struct address_term_info
   /* True if STRIDE == 1 is a versioning opportunity when considered
  in isolation.  */
   bool versioning_opportunity_p;
+
+  unsigned HOST_WIDE_INT versioning_value;
 };
 
 /* Information about an address calculation, and the range of constant
@@ -237,6 +239,13 @@ public:
  (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime.  */
   bitmap_head unity_names;
 
+  /* We'd like to version the loop for the case in which these SSA names
+ (keyed off their SSA_NAME_VERSION) are equal to the respective access size
+ at runtime.  */
+  hash_map , unsigned 
HOST_WIDE_INT>>
+  non_unity_versioning_values;
+
   /* If versioning succeeds, this points the version of the loop that
  assumes the version conditions holds.  */
   class loop *optimized_loop;
@@ -283,7 +292,7 @@ private:
   unsigned int max_insns_for_loop (class loop *);
   bool expensive_stmt_p (gimple *);
 
-  void version_for_unity (gimple *, tree);
+  void version_for_value (gimple *, tree, unsigned HOST_WIDE_INT);
   bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT * = 0);
   bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
@@ -487,7 +496,9 @@ bool
 loop_info::worth_versioning_p () const
 {
   return (!rejected_p
- && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
+ && (!bitmap_empty_p (&unity_names)
+ || !non_unity_versioning_values.is_empty ()
+ || subloops_benefit_p));
 }
 
 loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
@@ -512,9 +523,17 @@ loop_versioning::lv_dom_walker::before_dom_children 
(basic_block bb)
 tree
 loop_versioning::name_prop::value_of_expr (tree val, gimple *)
 {
-  if (TREE_CODE (val) == SSA_NAME
-  && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
+  if (TREE_CODE (val) != SSA_NAME)
+return NULL_TREE;
+
+  if (bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
 return build_one_cst (TREE_TYPE (val));
+
+  unsigned HOST_WIDE_INT *version_size;
+  version_size = m_li.non_unity_versioning_values.get (SSA_NAME_VERSION (val));
+  if (version_size)
+return build_int_cst (TREE_TYPE (val), version_size);
+
   return NULL_TREE;
 }
 
@@ -535,6 +554,7 @@ loop_versioning::loop_versioning (function *fn)
 {
   m_loops[i].outermost = get_loop (m_fn, 0);
   bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
+  m_loops[i].non_unity_versioning_values.empty ();
 }
 
   /* Initialize the list of blocks that belong to each loop.  */
@@ -606,12 +626,40 @@ loop_versioning::expensive_stmt_p (gimple *stmt)
is invariant in the loop.  */
 
 void
-loop_versioning::version_for_unity (gimple *stmt, tree name)
+loop_versioning::version_for_value (gimple *stmt, tree name,
+   unsigned HOST_WIDE_INT value)
 {
   class loop *loop = loop_containing_stmt (stmt);
   loop_info &li = get_loop_info (loop);
 
-  if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
+  bool changed = false;
+  if (value == 1
+  && bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
+changed = true;
+  else
+{
+  bool existed;
+  unsigned HOST_WIDE_INT &versioning_value
+ = li.non_unity_versioning_values.get_or_insert (
+   SSA_NAME_VERSION (name), &existed);
+  if (!existed)
+   {
+ versioning_value = value;
+ changed = true;
+   }
+  else if (versioning_value != value)
+   {
+ li.rejected_p = true;
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, stmt, "disabling versioning of loop with"
+" multiple values for %T: " HOST_WIDE_INT_PRINT_DEC
+" and " HOST_WIDE_INT_PRINT_DEC "\n", name,
+versioning_value, value);
+ return;
+   }
+}
+
+  if (changed)
 {
   /* This is the first time we've wanted to version LOOP for NAME.
 Keep track of the outermost loop that can handle all versioning
@@ -624,7 +672,8 @@ loop_versioning::version_for_unity (gimple *stmt, tree name)
   if (dump_enabled_p ())
{
  dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
-  " for when %T == 1", name);
+   

[gcc(refs/users/mikael/heads/refactor_descriptor_v05)] Correction régression pr93671

2025-05-06 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:129921d40fb49d296a65851f227046643bc4439d

commit 129921d40fb49d296a65851f227046643bc4439d
Author: Mikael Morin 
Date:   Tue May 6 21:48:41 2025 +0200

Correction régression pr93671

Diff:
---
 gcc/fortran/trans-array.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 9a91fa64c2a6..18ffdd5c917d 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -8821,13 +8821,13 @@ structure_alloc_comps (gfc_symbol * der_type, tree 
decl, tree dest,
   /* Build the body of the loop.  */
   gfc_init_block (&loopbody);
 
-  vref = gfc_build_array_ref (var, index, true);
+  vref = gfc_build_array_ref (var, index, true, gfc_index_zero_node);
 
   if (purpose == COPY_ALLOC_COMP || purpose == COPY_ONLY_ALLOC_COMP)
{
  tmp = build_fold_indirect_ref_loc (input_location,
 gfc_conv_array_data (dest));
- dref = gfc_build_array_ref (tmp, index, true);
+ dref = gfc_build_array_ref (tmp, index, true, gfc_index_zero_node);
  tmp = structure_alloc_comps (der_type, vref, dref, rank,
   COPY_ALLOC_COMP, caf_mode, args,
   no_finalization);


[gcc r16-430] [PATCH] RISC-V: Recognized svadu and svade extension

2025-05-06 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:aed2a447c7ff4282621aa7941f840cb2ddc90354

commit r16-430-gaed2a447c7ff4282621aa7941f840cb2ddc90354
Author: Mingzhu Yan 
Date:   Tue May 6 16:59:09 2025 -0600

[PATCH] RISC-V: Recognized svadu and svade extension

This patch support svadu and svade extension.
To enable GCC to recognize and process svadu and svade extension correctly 
at compile time.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_ext_version_table): New
extension.
(riscv_ext_flag_table) Ditto.
* config/riscv/riscv.opt: New mask.

* doc/invoke.texi (RISC-V Options): New extension

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-45.c: New test.
* gcc.target/riscv/arch-46.c: New test.

Diff:
---
 gcc/common/config/riscv/riscv-common.cc  | 10 +++---
 gcc/config/riscv/riscv.opt   |  4 
 gcc/doc/invoke.texi  |  8 
 gcc/testsuite/gcc.target/riscv/arch-45.c |  5 +
 gcc/testsuite/gcc.target/riscv/arch-46.c |  5 +
 5 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 145a0f2bd95f..58c7d205b336 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -441,6 +441,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"ssstateen", ISA_SPEC_CLASS_NONE, 1, 0},
   {"sstc",  ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"svade",   ISA_SPEC_CLASS_NONE, 1, 0},
+  {"svadu",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"svinval", ISA_SPEC_CLASS_NONE, 1, 0},
   {"svnapot", ISA_SPEC_CLASS_NONE, 1, 0},
   {"svpbmt",  ISA_SPEC_CLASS_NONE, 1, 0},
@@ -1764,9 +1766,11 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   RISCV_EXT_FLAG_ENTRY ("zcmp", x_riscv_zc_subext, MASK_ZCMP),
   RISCV_EXT_FLAG_ENTRY ("zcmt", x_riscv_zc_subext, MASK_ZCMT),
 
-  RISCV_EXT_FLAG_ENTRY ("svinval", x_riscv_sv_subext, MASK_SVINVAL),
-  RISCV_EXT_FLAG_ENTRY ("svnapot", x_riscv_sv_subext, MASK_SVNAPOT),
-  RISCV_EXT_FLAG_ENTRY ("svvptc", x_riscv_sv_subext, MASK_SVVPTC),
+  RISCV_EXT_FLAG_ENTRY ("svade",   x_riscv_sv_subext, MASK_SVADE),
+  RISCV_EXT_FLAG_ENTRY ("svadu",   x_riscv_sv_subext, MASK_SVADU),
+  RISCV_EXT_FLAG_ENTRY ("svinval", x_riscv_sv_subext, MASK_SVINVAL),
+  RISCV_EXT_FLAG_ENTRY ("svnapot", x_riscv_sv_subext, MASK_SVNAPOT),
+  RISCV_EXT_FLAG_ENTRY ("svvptc",  x_riscv_sv_subext, MASK_SVVPTC),
 
   RISCV_EXT_FLAG_ENTRY ("ztso", x_riscv_ztso_subext, MASK_ZTSO),
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 710248099b3c..9e471be4055c 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -466,6 +466,10 @@ Mask(XCVBI) Var(riscv_xcv_subext)
 TargetVariable
 int riscv_sv_subext
 
+Mask(SVADE) Var(riscv_sv_subext)
+
+Mask(SVADU) Var(riscv_sv_subext)
+
 Mask(SVINVAL) Var(riscv_sv_subext)
 
 Mask(SVNAPOT) Var(riscv_sv_subext)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 90cbb516bc46..b1964b3d3f57 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -31532,6 +31532,14 @@ to @samp{zvks} and @samp{zvkg}.
 @tab 1.0
 @tab Supervisor-mode timer interrupts extension.
 
+@item svade
+@tab 1.0
+@tab Cause exception when hardware updating of A/D bits is disabled
+
+@item svadu
+@tab 1.0
+@tab Hardware Updating of A/D Bits extension.
+
 @item svinval
 @tab 1.0
 @tab Fine-grained address-translation cache invalidation extension.
diff --git a/gcc/testsuite/gcc.target/riscv/arch-45.c 
b/gcc/testsuite/gcc.target/riscv/arch-45.c
new file mode 100644
index ..afffb9955785
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-45.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_svadu -mabi=lp64" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-46.c 
b/gcc/testsuite/gcc.target/riscv/arch-46.c
new file mode 100644
index ..2a062172e754
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-46.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_svade -mabi=lp64" } */
+int foo()
+{
+}


[gcc r16-431] [PATCH] RISC-V: Minimal support for sdtrig and ssstrict extensions.

2025-05-06 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:82126b26d17ed2c5ee48ca51e3fed69e898c9a20

commit r16-431-g82126b26d17ed2c5ee48ca51e3fed69e898c9a20
Author: Dongyan Chen 
Date:   Tue May 6 17:09:54 2025 -0600

[PATCH] RISC-V: Minimal support for sdtrig and ssstrict extensions.

This patch support sdtrig and ssstrict extensions[1].
To enable GCC to recognize and process sdtrig and ssstrict extensions 
correctly
at compile time.

[1] https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: New extension.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-47.c: New test.

Diff:
---
 gcc/common/config/riscv/riscv-common.cc  | 3 +++
 gcc/testsuite/gcc.target/riscv/arch-47.c | 5 +
 2 files changed, 8 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 58c7d205b336..0233e1a108be 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -432,6 +432,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zcmp", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zcmt", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"sdtrig",  ISA_SPEC_CLASS_NONE, 1, 0},
+
   {"smaia", ISA_SPEC_CLASS_NONE, 1, 0},
   {"smepmp",ISA_SPEC_CLASS_NONE, 1, 0},
   {"smstateen", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -440,6 +442,7 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"sscofpmf",  ISA_SPEC_CLASS_NONE, 1, 0},
   {"ssstateen", ISA_SPEC_CLASS_NONE, 1, 0},
   {"sstc",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"ssstrict",  ISA_SPEC_CLASS_NONE, 1, 0},
 
   {"svade",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"svadu",   ISA_SPEC_CLASS_NONE, 1, 0},
diff --git a/gcc/testsuite/gcc.target/riscv/arch-47.c 
b/gcc/testsuite/gcc.target/riscv/arch-47.c
new file mode 100644
index ..06bc80fe7800
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-47.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_sdtrig_ssstrict -mabi=lp64" } */
+int foo()
+{
+}


[gcc r16-433] [RISC-V] Avoid unnecessary andi with -1 argument

2025-05-06 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:2c46a74d4707bd1e67561ed8514c67efc6164832

commit r16-433-g2c46a74d4707bd1e67561ed8514c67efc6164832
Author: Jeff Law 
Date:   Tue May 6 19:20:14 2025 -0600

[RISC-V] Avoid unnecessary andi with -1 argument

I was preparing to do some testing of Shreya's next patch on spec and 
stumbled
across another "andi dst,src,-1" case.  I fixed some stuff like this in the
gcc-15 cycle, but this one slipped through.

It's probably about 100M instructions on deepsjeng.  So tiny, but there's no
good reason to leave the clearly extraneous instructions in the output.

As with the other cases, it's a post-reload splitter that's not being 
careful
enough about the code it generates.

This has gone through my tester successfully.  Waiting on the pre-commit 
tester
before going forward.

gcc/
* config/riscv/riscv.md 
(*branch_shiftedarith_equals_zero):
Avoid generating unnecessary andi.  Fix formatting.

gcc/testsuite
* g++.target/riscv/redundant-andi.C: New test.

Diff:
---
 gcc/config/riscv/riscv.md   | 20 ++-
 gcc/testsuite/g++.target/riscv/redundant-andi.C | 26 +
 2 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 15c89ff4e3de..259997fef68f 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3173,15 +3173,25 @@
   "#"
   "&& reload_completed"
   [(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6)))
-   (set (match_dup 4) (and:X (match_dup 4) (match_dup 7)))
+   (set (match_dup 4) (match_dup 8))
(set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)])
   (label_ref (match_dup 0)) (pc)))]
 {
-   HOST_WIDE_INT mask = INTVAL (operands[3]);
-   int trailing = ctz_hwi (mask);
+  HOST_WIDE_INT mask = INTVAL (operands[3]);
+  int trailing = ctz_hwi (mask);
+
+  operands[6] = GEN_INT (trailing);
+  operands[7] = GEN_INT (mask >> trailing);
 
-   operands[6] = GEN_INT (trailing);
-   operands[7] = GEN_INT (mask >> trailing);
+  /* This splits after reload, so there's little chance to clean things
+ up.  Rather than emit a ton of RTL here, we can just make a new
+ operand for that RHS and use it.  For the case where the AND would
+ have been redundant, we can make it a NOP move, which does get
+ cleaned up.  */
+  if (operands[7] == CONSTM1_RTX (word_mode))
+operands[8] = operands[4];
+  else
+operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]);
 }
 [(set_attr "type" "branch")])
 
diff --git a/gcc/testsuite/g++.target/riscv/redundant-andi.C 
b/gcc/testsuite/g++.target/riscv/redundant-andi.C
new file mode 100644
index ..fe560a78ce17
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/redundant-andi.C
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcb -mabi=lp64" { target rv64 } } */
+/* { dg-options "-O2 -march=rv32gcb -mabi=ilp32" { target rv32 } } */
+
+
+typedef int move_s;
+struct state_t
+{
+  int npieces[13];
+};
+typedef struct state_t state_t;
+int
+search (state_t *s, int alpha, int beta, int depth, int is_null, int cutnode,
+   int extend, int wpcs, int bpcs, move_s moves[240])
+{
+  int i;
+  if moves[i]) >> 19) & 0x0F) != 13
+  && (((moves[i]) >> 19) & 0x0F) != 1 && (((moves[i]) >> 19) & 0x0F) != 2)
+if ((wpcs + bpcs) == 1)
+  extend += 4;
+  return extend;
+}
+
+/* A splitter was generating an unnecessary andi instruction.  Verify it's
+   not in our output.  */
+/* { dg-final { scan-assembler-not "andi\t\[a-z\]\[0-9\],\[a-z\]\[0-9\],-1" } 
} */


[gcc r16-429] i386: Add costs for integer<->float conversions

2025-05-06 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:62ffaef514aad0eae5d3bc548d8e86d21a3095e7

commit r16-429-g62ffaef514aad0eae5d3bc548d8e86d21a3095e7
Author: Jan Hubicka 
Date:   Wed May 7 00:24:04 2025 +0200

i386: Add costs for integer<->float conversions

Extend ix86_rtx_costs to cost FLOAT, UNSIGNED_FLOAT, FIX, and UNSIGNED_FIX.
There are many variants of integer<->float conversions and it seems
meaningful to start with the typical scalar and vector ones. On modern CPUs 
the
variants differs by at most 1 cycle.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_rtx_costs): Cost FLOAT, UNSIGNED_FLOAT,
FIX, UNSIGNED_FIX.
* config/i386/i386.h (struct processor_costs): Add
cvtsi2ss, cvtss2si, cvtpi2ps, cvtps2pi.
* config/i386/x86-tune-costs.h (struct processor_costs): Update 
tables.

Diff:
---
 gcc/config/i386/i386.cc  |  21 +++
 gcc/config/i386/i386.h   |   4 ++
 gcc/config/i386/x86-tune-costs.h | 133 +++
 3 files changed, 158 insertions(+)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 89f518c86b5e..bef95ea18c87 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22794,6 +22794,27 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
   else
*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   return false;
+case FLOAT:
+case UNSIGNED_FLOAT:
+  if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+   /* TODO: We do not have cost tables for x87.  */
+   *total = cost->fadd;
+  else if (VECTOR_MODE_P (mode))
+   *total = ix86_vec_cost (mode, cost->cvtpi2ps);
+  else
+   *total = cost->cvtsi2ss;
+  return false;
+
+case FIX:
+case UNSIGNED_FIX:
+  if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+   /* TODO: We do not have cost tables for x87.  */
+   *total = cost->fadd;
+  else if (VECTOR_MODE_P (mode))
+   *total = ix86_vec_cost (mode, cost->cvtps2pi);
+  else
+   *total = cost->cvtss2si;
+  return false;
 
 case ABS:
   /* SSE requires memory load for the constant operand. It may make
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 02bf357d7762..6a38de30de43 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -213,6 +213,10 @@ struct processor_costs {
   such as VCVTPD2PS with larger reg in ymm.  */
   const int vcvtps2pd512;  /* cost 512bit packed FP conversions,
   such as VCVTPD2PS with larger reg in zmm.  */
+  const int cvtsi2ss;  /* cost of CVTSI2SS instruction.  */
+  const int cvtss2si;  /* cost of CVT(T)SS2SI instruction.  */
+  const int cvtpi2ps;  /* cost of CVTPI2PS instruction.  */
+  const int cvtps2pi;  /* cost of CVT(T)PS2PI instruction.  */
   const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
   fp, vector integer and vector fp
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index cddcf6173042..6cce70a6c403 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -134,6 +134,11 @@ struct processor_costs ix86_size_cost = {/* costs for 
tuning for size */
   COSTS_N_BYTES (4),   /* cost of CVTSS2SD etc.  */
   COSTS_N_BYTES (4),   /* cost of 256bit VCVTPS2PD etc.  */
   COSTS_N_BYTES (6),   /* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_BYTES (4),   /* cost of CVTSI2SS instruction.  */
+  COSTS_N_BYTES (4),   /* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_BYTES (4),   /* cost of CVTPI2PS instruction.  */
+  COSTS_N_BYTES (4),   /* cost of CVT(T)PS2PI instruction.  */
+  
   1, 1, 1, 1,  /* reassoc int, fp, vec_int, vec_fp.  */
   ix86_size_memcpy,
   ix86_size_memset,
@@ -249,6 +254,10 @@ struct processor_costs i386_cost = {   /* 386 specific 
costs */
   COSTS_N_INSNS (27),  /* cost of CVTSS2SD etc.  */
   COSTS_N_INSNS (54),  /* cost of 256bit VCVTPS2PD etc.  */
   COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (27),  /* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (27),  /* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (27),  /* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (27),  /* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,  /* reassoc int, fp, vec_int, vec_fp.  */
   i386_memcpy,
   i386_memset,
@@ -365,6 +374,10 @@ struct processor_costs i486_cost = {   /* 486 specific 
costs */
   COSTS_N_INSNS (8),   /* cost of CVTSS2SD etc.  *