date:20250708

[PATCH 2/3] tree: Add 7 and 8 argument TREE_CHECK_* and TREE_NOT_CHECK_*

2025-07-08 Thread Alex (Waffl3x)


From 0403104f65dd8cce4c71270d492d0922a7f2702c Mon Sep 17 00:00:00 2001
From: Waffl3x 
Date: Tue, 8 Jul 2025 19:28:12 -0600
Subject: [PATCH 2/3] tree: Add 7 and 8 argument TREE_CHECK_* and
 TREE_NOT_CHECK_*

I needed an 8 argument version of TREE_NOT_CHECK_*, it felt wrong to leave
a gap so the other versions are also added.  Maybe we should add a variadic
version instead though?

gcc/ChangeLog:

	* tree.h (TREE_CHECK7): Define.
	(TREE_NOT_CHECK7): Likewise.
	(TREE_CHECK8): Likewise.
	(TREE_NOT_CHECK8): Likewise.
	(tree_check7): New.
	(tree_not_check7): New.
	(tree_check8): New.
	(tree_not_check8): New.

Signed-off-by: Waffl3x 
---
 gcc/tree.h | 169 +
 1 file changed, 169 insertions(+)

diff --git a/gcc/tree.h b/gcc/tree.h
index 8b2d7fd481f..6c0ce8e0908 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -370,6 +370,23 @@ code_helper::is_builtin_fn () const
 #define TREE_NOT_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) \
 (tree_not_check6 ((T), __FILE__, __LINE__, __FUNCTION__, \
 			(CODE1), (CODE2), (CODE3), (CODE4), (CODE5), (CODE6)))
+#define TREE_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) \
+(tree_check7 ((T), __FILE__, __LINE__, __FUNCTION__, \
+	  (CODE1), (CODE2), (CODE3), (CODE4), (CODE5), (CODE6), (CODE7)))
+
+#define TREE_NOT_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) \
+(tree_not_check7 ((T), __FILE__, __LINE__, __FUNCTION__, \
+		  (CODE1), (CODE2), (CODE3), (CODE4), \
+		  (CODE5), (CODE6), (CODE7)))
+#define TREE_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) \
+(tree_check8 ((T), __FILE__, __LINE__, __FUNCTION__, \
+	  (CODE1), (CODE2), (CODE3), (CODE4), \
+	  (CODE5), (CODE6), (CODE7), (CODE8)))
+
+#define TREE_NOT_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) \
+(tree_not_check8 ((T), __FILE__, __LINE__, __FUNCTION__, \
+		  (CODE1), (CODE2), (CODE3), (CODE4), \
+		  (CODE5), (CODE6), (CODE7), CODE8))
 
 #define CONTAINS_STRUCT_CHECK(T, STRUCT) \
 (contains_struct_check ((T), (STRUCT), __FILE__, __LINE__, __FUNCTION__))
@@ -503,6 +520,10 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define TREE_NOT_CHECK5(T, CODE1, CODE2, CODE3, CODE4, CODE5) (T)
 #define TREE_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) (T)
 #define TREE_NOT_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) (T)
+#define TREE_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) (T)
+#define TREE_NOT_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) (T)
+#define TREE_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) (T)
+#define TREE_NOT_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) (T)
 #define TREE_CLASS_CHECK(T, CODE)		(T)
 #define TREE_RANGE_CHECK(T, CODE1, CODE2)	(T)
 #define TREE_NOT_RANGE_CHECK(T, CODE1, CODE2)	(T)
@@ -3863,6 +3884,80 @@ tree_not_check6 (tree __t, const char *__f, int __l, const char *__g,
   return __t;
 }
 
+inline tree
+tree_check7 (tree __t, const char *__f, int __l, const char *__g,
+	 enum tree_code __c1, enum tree_code __c2, enum tree_code __c3,
+	 enum tree_code __c4, enum tree_code __c5, enum tree_code __c6,
+	 enum tree_code __c7)
+{
+  if (TREE_CODE (__t) == __c1
+  || TREE_CODE (__t) == __c2
+  || TREE_CODE (__t) == __c3
+  || TREE_CODE (__t) == __c4
+  || TREE_CODE (__t) == __c5
+  || TREE_CODE (__t) == __c6
+  || TREE_CODE (__t) == __c7)
+tree_check_failed (__t, __f, __l, __g, __c1, __c2, __c3, __c4, __c5, __c6,
+		   __c7, 0);
+  return __t;
+}
+
+inline tree
+tree_not_check7 (tree __t, const char *__f, int __l, const char *__g,
+		 enum tree_code __c1, enum tree_code __c2, enum tree_code __c3,
+		 enum tree_code __c4, enum tree_code __c5, enum tree_code __c6,
+		 enum tree_code __c7)
+{
+  if (TREE_CODE (__t) == __c1
+  || TREE_CODE (__t) == __c2
+  || TREE_CODE (__t) == __c3
+  || TREE_CODE (__t) == __c4
+  || TREE_CODE (__t) == __c5
+  || TREE_CODE (__t) == __c6
+  || TREE_CODE (__t) == __c7)
+tree_not_check_failed (__t, __f, __l, __g, __c1, __c2, __c3, __c4, __c5,
+			   __c6, __c7, 0);
+  return __t;
+}
+
+inline tree
+tree_check8 (tree __t, const char *__f, int __l, const char *__g,
+	 enum tree_code __c1, enum tree_code __c2, enum tree_code __c3,
+	 enum tree_code __c4, enum tree_code __c5, enum tree_code __c6,
+	 enum tree_code __c7, enum tree_code __c8)
+{
+  if (TREE_CODE (__t) == __c1
+  || TREE_CODE (__t) == __c2
+  || TREE_CODE (__t) == __c3
+  || TREE_CODE (__t) == __c4
+  || TREE_CODE (__t) == __c5
+  || TREE_CODE (__t) == __c6
+  || TREE_CODE (__t) == __c7
+  || TREE_CODE (__t) == __c8)
+tree_check_failed (__t, __f, __l, __g, __c1, __c2, __c3, __c4, __c5, __c6,
+		   __c7, __c8, 0);
+  return __t;
+}
+
+inline tree
+tree_not_check8 (tree __t, const char *__f, int __l, const char *__g,
+		 enum tree_code __c1, enum tree_code __c2

[PATCH 3/3] middle-end/121005 Add checks for TREE_LANG_FLAG_*

2025-07-08 Thread Alex (Waffl3x)


From 11f2b46bb882c427f45f194196d89b22ec470240 Mon Sep 17 00:00:00 2001
From: Waffl3x 
Date: Tue, 8 Jul 2025 19:52:05 -0600
Subject: [PATCH 3/3] middle-end/121005 Add checks for TREE_LANG_FLAG_*

TREE_LANG_FLAG_* is only valid for tree codes that do not use other union
fields in tree_base, previously it only checked for TREE_VEC and SSA_NAME.
This adds checks for INTEGER_CST, VECTOR_CST, POLYNOMIA_CHREC, MEM_REF,
TARGET_MEM_REF and everything between OMP_ATOMIC to OMP_ATOMIC_CAPTURE_NEW
inclusive.

This check is also added to TREE_UNAVAILABLE because it didn't have any
kind of checks.  The accessors for the other flags in tree_base::u::bits
are already constrained appropriately and not modified.

	PR middle-end/121005

gcc/ChangeLog:

	PR middle-end/121005
	* tree.h (TREE_CHECK_BITS_AVAILABLE): Define.
	(TREE_UNAVAILABLE): Use TREE_CHECK_BITS_AVAILABLE.
	(TREE_LANG_FLAG_0): Use TREE_CHECK_BITS_AVAILABLE.
	(TREE_LANG_FLAG_1): Likewise.
	(TREE_LANG_FLAG_2): Likewise.
	(TREE_LANG_FLAG_3): Likewise.
	(TREE_LANG_FLAG_4): Likewise.
	(TREE_LANG_FLAG_5): Likewise.
	(TREE_LANG_FLAG_6): Likewise.

Signed-off-by: Waffl3x 
---
 gcc/tree.h | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/gcc/tree.h b/gcc/tree.h
index 6c0ce8e0908..90ac3d47861 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -571,6 +571,15 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
   TREE_CHECK6 (T, INTEGER_TYPE, ENUMERAL_TYPE, BOOLEAN_TYPE, REAL_TYPE,	\
 	   FIXED_POINT_TYPE, BITINT_TYPE)
 
+/* Check if the bits field of tree_base is available for NODE.
+   Note, CALL_EXPR also sometimes makes use of the ifn union member, it would
+   be invalid to use TREE_LANG_FLAG_* when this is the case but it can't be
+   checked for here.  */
+#define TREE_CHECK_BITS_AVAILABLE(NODE) \
+  ((TREE_NOT_CHECK8 (NODE, INTEGER_CST, TREE_VEC, VECTOR_CST, SSA_NAME, \
+		 POLYNOMIAL_CHREC, MEM_REF, TARGET_MEM_REF), void), \
+   TREE_NOT_RANGE_CHECK (NODE, OMP_ATOMIC, OMP_ATOMIC_CAPTURE_NEW))
+
 /* Here is how primitive or already-canonicalized types' hash codes
are made.  */
 #define TYPE_HASH(TYPE) (TYPE_UID (TYPE))
@@ -1101,7 +1110,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 /* Nonzero in a _DECL if the use of the name is defined as an
unavailable feature by __attribute__((unavailable)).  */
 #define TREE_UNAVAILABLE(NODE) \
-  ((NODE)->base.u.bits.unavailable_flag)
+  ((TREE_CHECK_BITS_AVAILABLE (NODE))->base.u.bits.unavailable_flag)
 
 /* Nonzero indicates an IDENTIFIER_NODE that names an anonymous
aggregate, (as created by anon_aggr_name_format).  */
@@ -1154,19 +1163,19 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 
 /* These flags are available for each language front end to use internally.  */
 #define TREE_LANG_FLAG_0(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_0)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_0)
 #define TREE_LANG_FLAG_1(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_1)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_1)
 #define TREE_LANG_FLAG_2(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_2)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_2)
 #define TREE_LANG_FLAG_3(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_3)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_3)
 #define TREE_LANG_FLAG_4(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_4)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_4)
 #define TREE_LANG_FLAG_5(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_5)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_5)
 #define TREE_LANG_FLAG_6(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_6)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_6)
 
 /* Define additional fields and accessors for nodes representing constants.  */
 
-- 
2.49.0

[PATCH 1/3] tree: Add TREE_NOT_RANGE_CHECK

2025-07-08 Thread Alex (Waffl3x)


From d7107c64189f944adfb64f9e8acfb7ed2cbfe796 Mon Sep 17 00:00:00 2001
From: Waffl3x 
Date: Tue, 8 Jul 2025 19:25:20 -0600
Subject: [PATCH 1/3] tree: Add TREE_NOT_RANGE_CHECK

There was no inverted counterpart to TREE_RANGE_CHECK, this adds one.

gcc/ChangeLog:

	* tree.cc (tree_not_range_check_failed): New.
	* tree.h (TREE_NOT_RANGE_CHECK): Define.
	(tree_not_range_check_failed): New declaration.
	(tree_not_range_check): New.

Signed-off-by: Waffl3x 
---
 gcc/tree.cc | 45 +
 gcc/tree.h  | 29 +
 2 files changed, 74 insertions(+)

diff --git a/gcc/tree.cc b/gcc/tree.cc
index 6a055c8c2d0..9a5477df88d 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -9246,6 +9246,51 @@ tree_range_check_failed (const_tree node, const char *file, int line,
 		  function, trim_filename (file), line);
 }
 
+/* Similar to tree_not_check_failed, except that instead of specifying a
+   dozen codes, use the knowledge that they're all sequential.
+   C1 and C2 are an inclusive range.  */
+
+void
+tree_not_range_check_failed (const_tree node, const char *file, int line,
+			 const char *function, enum tree_code c1,
+			 enum tree_code c2)
+{
+  gcc_assert (c1 <= c2);
+  static constexpr size_t seperator_length = 4;
+  const size_t buffer_length = [&] ()
+{
+  size_t length = 0;
+  /* Overallocates by 4, which is fine, accounts for null terminator.  */
+  for (unsigned int c = c1; c <= c2; ++c)
+	length += seperator_length
+		  + strlen (get_tree_code_name ((enum tree_code) c));
+  return length;
+} (); /* IILE.  */
+  gcc_assert (buffer_length);
+  char *buffer = (char *) alloca (buffer_length);
+
+  unsigned int c = c1;
+  /* First case unrolled.  */
+  const char *node_name = get_tree_code_name ((enum tree_code) c);
+  size_t written_length = strlen (node_name);
+  /* Account for null terminator.  */
+  gcc_assert (written_length + 1 <= buffer_length);
+  strcpy (buffer, node_name);
+  ++c;
+  for (; c <= c2; ++c)
+{
+  char *unwritten_start = buffer + written_length;
+  node_name = get_tree_code_name ((enum tree_code) c);
+  written_length += strlen (node_name) + seperator_length;
+  /* Account for null terminator.  */
+  gcc_assert (written_length + 1 <= buffer_length);
+  strcpy (unwritten_start, " or ");
+  strcpy (unwritten_start + seperator_length, node_name);
+}
+  internal_error ("tree check: expected none of %s, have %s in %s, at %s:%d",
+		  buffer, get_tree_code_name (TREE_CODE (node)),
+		  function, trim_filename (file), line);
+}
 
 /* Similar to tree_check_failed, except that we check that a tree does
not have the specified code, given in CL.  */
diff --git a/gcc/tree.h b/gcc/tree.h
index e87fa0f81bc..8b2d7fd481f 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -380,6 +380,10 @@ code_helper::is_builtin_fn () const
 #define TREE_RANGE_CHECK(T, CODE1, CODE2) \
 (tree_range_check ((T), (CODE1), (CODE2), __FILE__, __LINE__, __FUNCTION__))
 
+#define TREE_NOT_RANGE_CHECK(T, CODE1, CODE2) \
+(tree_not_range_check ((T), (CODE1), (CODE2), \
+		   __FILE__, __LINE__, __FUNCTION__))
+
 #define OMP_CLAUSE_SUBCODE_CHECK(T, CODE) \
 (omp_clause_subcode_check ((T), (CODE), __FILE__, __LINE__, __FUNCTION__))
 
@@ -453,6 +457,10 @@ extern void tree_range_check_failed (const_tree, const char *, int,
  const char *, enum tree_code,
  enum tree_code)
 ATTRIBUTE_NORETURN ATTRIBUTE_COLD;
+extern void tree_not_range_check_failed (const_tree, const char *, int,
+	 const char *, enum tree_code,
+	 enum tree_code)
+ATTRIBUTE_NORETURN ATTRIBUTE_COLD;
 extern void tree_not_class_check_failed (const_tree,
 	 const enum tree_code_class,
 	 const char *, int, const char *)
@@ -497,6 +505,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define TREE_NOT_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) (T)
 #define TREE_CLASS_CHECK(T, CODE)		(T)
 #define TREE_RANGE_CHECK(T, CODE1, CODE2)	(T)
+#define TREE_NOT_RANGE_CHECK(T, CODE1, CODE2)	(T)
 #define EXPR_CHECK(T)(T)
 #define NON_TYPE_CHECK(T)			(T)
 #define TREE_INT_CST_ELT_CHECK(T, I)		((T)->int_cst.val[I])
@@ -3882,6 +3891,16 @@ tree_range_check (tree __t,
   return __t;
 }
 
+inline tree
+tree_not_range_check (tree __t,
+		  enum tree_code __code1, enum tree_code __code2,
+		  const char *__f, int __l, const char *__g)
+{
+  if (TREE_CODE (__t) < __code1 || TREE_CODE (__t) > __code2)
+tree_not_range_check_failed (__t, __f, __l, __g, __code1, __code2);
+  return __t;
+}
+
 inline tree
 omp_clause_subcode_check (tree __t, enum omp_clause_code __code,
   const char *__f, int __l, const char *__g)
@@ -4170,6 +4189,16 @@ tree_range_check (const_tree __t,
   return __t;
 }
 
+inline const_tree
+tree_not_range_check (const_tree __t,
+		  enum tree_code __code1, enum tree_code __code2,
+		  const char *__f, int __l, const char *__g)
+{
+  if (TRE

RE: [PATCH V3] x86: Enable separate shrink wrapping

2025-07-08 Thread Cui, Lili

> -Original Message-
> From: Segher Boessenkool 
> Sent: Wednesday, July 9, 2025 1:13 AM
> To: Cui, Lili 
> Cc: ubiz...@gmail.com; gcc-patches@gcc.gnu.org; Liu, Hongtao
> ; richard.guent...@gmail.com; Michael Matz
> 
> Subject: Re: [PATCH V3] x86: Enable separate shrink wrapping
> 
> Hi!
> 
> On Tue, Jul 08, 2025 at 08:51:30AM +, Cui, Lili wrote:
> > > rs6000 does not *have* a hard frame pointer!
> >
> > Oh, I see.  The handling of HARD_FRAME_POINTER_REGNUM seems
> redundant for rs6000.
> 
> The Power Architecture, Power ISA, nor any of our ABIs has a frame pointer.
> GCC generic code requires one to exist though (for no reason at all), so when
> given -fno-omit-framepointer we dedicate GPR for it (we use 31, I had to look
> it up to confirm it even, we never actually want to use it!)
> 
> > Yes, -fomit-frame-pointer does help performance.  Here is a simple small 
> > case
> https://godbolt.org/z/5Tc3jM7qc . Do you mean to optimize the %rbp here?
> 
> You tell GCC you want a frame pointer.  It follows your instructions.
> There is a function call, so the frame pointer is saved before it; the 
> compiler
> does not realise it does not actually use the frame pointer ever, it could
> optimise this whole stuff out (that's what in rs6000 the
> frame_pointer_needed_indeed thing is for:
>   frame_pointer_needed_indeed
> = frame_pointer_needed
>   && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM);
> )
> 

I was confused about this case and wondered if it could be optimized, but you 
enlightened me. I will incorporate this logic into X86 as well. Thanks！ ：-）

Lili.

> 
> Segher

[PATCH 0/3] middle-end/121005 Add checks for TREE_LANG_FLAG_*

2025-07-08 Thread Alex (Waffl3x)

This set of patches are not tested quite yet, I'm going to start it as
soon as I finish this e-mail.

I would really like to implement the multiple argument tree_check* and
tree_not_check* as a variadic template.  I know how to do it in such a
way that would limit the amount of instantiations, and should still
perform well in debug builds.

Would that be acceptable?  If so, I will rework this set of patches to
do that instead of adding 7 and 8 argument versions.

Alex

Re: [AutoFDO] Fix get_original_name to strip only names that are generated after auto-profile

2025-07-08 Thread Kugan Vivekanandarajah

Hi Honza,

> On 28 Jun 2025, at 1:04 pm, Jan Hubicka  wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> HI,
> I have tested your patch on exchange2 and noticed multiple problems:
>  1) with LTO the translation from dwarf names to symbol names is disabled
> since we free lang data sooner.  I moved the offline pass upstream which
> however also may make us miss clones intorduced betwen free lang data
> and annotation.  This is not very important right now and may be furhter
> fixed by splitting off auto-profile-read and offline passes.
>  2) I noticed that we miss a lot of AFDO inlines because some code compares
> name indexes for equality in belief that it compares symbol names.  This
> is not ture if we drop prefixes.  For this reason I integrated 
> get_original_name
> into the renaming machinery which actually updates indexes so string table
> conitnues to work as symbol table.
> This lets me to drop
>afdo_string_table->get_index (afdo_string_table->get_name (other->name 
> ()))
> hops that were introduced at some places
> 
> Now after renaming all afdo instances should go by DECL_ASSEMBLER_NAME
> names.
>  3) Detection of realized offline instances had an ordering issue where we
> omitted marking of those that were offlined later.  Since we can now
> lookup assembler names, I simplified the logic into single-pass.
> 
> autoprofiledbootstrapped/regteted x86_64-linux, comitted.
> 
> gcc/ChangeLog:
> 
>* auto-profile.cc (get_original_name): Only strip suffixes introduced
>after auto-fdo annotation.
>(string_table::get_index_by_decl):  Simplify.
>(string_table::add_name): New member function.
>(string_table::read): Micro-optimize allocation.
>(function_instance::get_function_instance_by_decl): Dump reasons
>for failure; try to compensate lost discriminators.
>(function_instance::merge): Simplify sanity check; do not check
>for realized flag; fix merging of targets.
>(function_instance::offline_if_in_set): Simplify.
>(function_instance::dump): Sanity check that names are consistent.
>(autofdo_source_profile::offline_external_functions): Also handle
>stripping suffixes.
>(walk_block): Move up in source.
>(autofdo_source_profile::offline_unrealized_inlines): Also compute
>realized functions.
>(autofdo_source_profile::get_function_instance_by_name_index): 
> Simplify.
>(autofdo_source_profile::add_function_instance): Simplify.
>(autofdo_source_profile::read): Do not strip suffxies; error on 
> duplicates.
>(mark_realized_functions): Remove.
>(auto_profile): Do not call mark_realized_functions.
>* passes.def: Move auto_profile_offline before free_lang_data.
> 
> gcc/testsuite/ChangeLog:
> 
>* gcc.dg/tree-prof/clone-test.c: New test.
> 

I am seeing an ICEs in offline pass.


during IPA pass: afdo_offline
gmsh/src/mesh/meshGEdge.cpp:979:1: internal compiler error: in 
set_call_location, at auto-profile.cc:433
  979 | }
  | ^
0x262582b internal_error(char const*, ...)
../../gcc/gcc/diagnostic-global-context.cc:517
0x864513 fancy_abort(char const*, int, char const*)
../../gcc/gcc/diagnostic.cc:1810
0x22da0e7 autofdo::function_instance::set_call_location(unsigned long)
../../gcc/gcc/auto-profile.cc:433
0x22da0e7 autofdo::function_instance::set_call_location(unsigned long)
../../gcc/gcc/auto-profile.cc:431
0x22da0e7 autofdo::function_instance::match(cgraph_node*, 
vec&, hash_map, int, simple_hashmap_traits >, 
int> >&)
../../gcc/gcc/auto-profile.cc:1498
0x22d8c8b autofdo::function_instance::match(cgraph_node*, 
vec&, hash_map, int, simple_hashmap_traits >, 
int> >&)
../../gcc/gcc/auto-profile.cc:1258
0x22d8c8b autofdo::function_instance::match(cgraph_node*, 
vec&, hash_map, int, simple_hashmap_traits >, 
int> >&)
../../gcc/gcc/auto-profile.cc:1638
0x22ddf6f autofdo::function_instance::match(cgraph_node*, 
vec&, hash_map, int, simple_hashmap_traits >, 
int> >&)
../../gcc/gcc/hash-table.h:994
0x22ddf6f autofdo::autofdo_source_profile::offline_external_functions()
../../gcc/gcc/auto-profile.cc:2032
0x22de0f3 execute
../../gcc/gcc/auto-profile.cc:4066

Here stmt is D.293641 = OBJ_TYPE_REF(_7;(const struct GEdge)from->57B) (from); 
and set_call_location has call_location_ != UNKNOWN_LOCATION 

Thanks,
Kugan

[PATCH v1] RISCV: Remove the v extension requirement for sat scalar run test

2025-07-08 Thread pan2 . li

From: Pan Li 

The sat scalar run test should not require the v extension, thus
take rv32 || rv64 instead of riscv_v for the requirement.

The below test suites are passed for this patch series.
* The rv64gcv fully regression test.
* The rv32gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat/sat_s_add-run-1-i16.c: Take rv32 || rv64
instead of riscv_v for scalar run test.
* gcc.target/riscv/sat/sat_s_add-run-1-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-1-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-1-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-2-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-2-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-2-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-2-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-3-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-3-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-3-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-3-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-4-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-4-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-4-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_add-run-4-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-1-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-1-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-1-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-1-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-2-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-2-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-2-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-2-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-3-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-3-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-3-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-3-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-4-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-4-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-4-i64.c: Ditto.
* gcc.target/riscv/sat/sat_s_sub-run-4-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-1-i16-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-1-i32-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-1-i32-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-1-i64-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-1-i64-to-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-1-i64-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-2-i16-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-2-i32-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-2-i32-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-2-i64-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-2-i64-to-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-2-i64-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-3-i16-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-3-i32-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-3-i32-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-3-i64-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-3-i64-to-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-3-i64-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-4-i16-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-4-i32-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-4-i32-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-4-i64-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-4-i64-to-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-4-i64-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-5-i16-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-5-i32-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-5-i32-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-5-i64-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-5-i64-to-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-5-i64-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-6-i16-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-6-i32-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-6-i32-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-6-i64-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-6-i64-to-i32.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-6-i64-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-7-i16-to-i8.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-7-i32-to-i16.c: Ditto.
* gcc.target/riscv/sat/sat_s_trunc-run-7-i32-to-i8.

[PATCH v2 2/3] tree: Add 7 and 8 argument TREE_[NOT_]CHECK_*

2025-07-08 Thread Alex (Waffl3x)


From 979825e4528bebf5ed83b5580e3d39fb738f43b7 Mon Sep 17 00:00:00 2001
From: Waffl3x 
Date: Tue, 8 Jul 2025 19:28:12 -0600
Subject: [PATCH 2/3] tree: Add 7 and 8 argument TREE_[NOT_]CHECK_*

I needed an 8 argument version of TREE_NOT_CHECK_*, it felt wrong to leave
a gap so the other versions are also added.  Maybe we should add a variadic
version instead though?

gcc/ChangeLog:

	* tree.h (TREE_CHECK7): Define.
	(TREE_NOT_CHECK7): Likewise.
	(TREE_CHECK8): Likewise.
	(TREE_NOT_CHECK8): Likewise.
	(tree_check7): New.
	(tree_not_check7): New.
	(tree_check8): New.
	(tree_not_check8): New.

Signed-off-by: Waffl3x 
---
 gcc/tree.h | 169 +
 1 file changed, 169 insertions(+)

diff --git a/gcc/tree.h b/gcc/tree.h
index 40255b535f6..289dcf320af 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -370,6 +370,23 @@ code_helper::is_builtin_fn () const
 #define TREE_NOT_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) \
 (tree_not_check6 ((T), __FILE__, __LINE__, __FUNCTION__, \
 			(CODE1), (CODE2), (CODE3), (CODE4), (CODE5), (CODE6)))
+#define TREE_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) \
+(tree_check7 ((T), __FILE__, __LINE__, __FUNCTION__, \
+	  (CODE1), (CODE2), (CODE3), (CODE4), (CODE5), (CODE6), (CODE7)))
+
+#define TREE_NOT_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) \
+(tree_not_check7 ((T), __FILE__, __LINE__, __FUNCTION__, \
+		  (CODE1), (CODE2), (CODE3), (CODE4), \
+		  (CODE5), (CODE6), (CODE7)))
+#define TREE_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) \
+(tree_check8 ((T), __FILE__, __LINE__, __FUNCTION__, \
+	  (CODE1), (CODE2), (CODE3), (CODE4), \
+	  (CODE5), (CODE6), (CODE7), (CODE8)))
+
+#define TREE_NOT_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) \
+(tree_not_check8 ((T), __FILE__, __LINE__, __FUNCTION__, \
+		  (CODE1), (CODE2), (CODE3), (CODE4), \
+		  (CODE5), (CODE6), (CODE7), CODE8))
 
 #define CONTAINS_STRUCT_CHECK(T, STRUCT) \
 (contains_struct_check ((T), (STRUCT), __FILE__, __LINE__, __FUNCTION__))
@@ -503,6 +520,10 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define TREE_NOT_CHECK5(T, CODE1, CODE2, CODE3, CODE4, CODE5) (T)
 #define TREE_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) (T)
 #define TREE_NOT_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) (T)
+#define TREE_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) (T)
+#define TREE_NOT_CHECK7(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7) (T)
+#define TREE_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) (T)
+#define TREE_NOT_CHECK8(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6, CODE7, CODE8) (T)
 #define TREE_CLASS_CHECK(T, CODE)		(T)
 #define TREE_RANGE_CHECK(T, CODE1, CODE2)	(T)
 #define TREE_NOT_RANGE_CHECK(T, CODE1, CODE2)	(T)
@@ -3863,6 +3884,80 @@ tree_not_check6 (tree __t, const char *__f, int __l, const char *__g,
   return __t;
 }
 
+inline tree
+tree_check7 (tree __t, const char *__f, int __l, const char *__g,
+	 enum tree_code __c1, enum tree_code __c2, enum tree_code __c3,
+	 enum tree_code __c4, enum tree_code __c5, enum tree_code __c6,
+	 enum tree_code __c7)
+{
+  if (TREE_CODE (__t) == __c1
+  || TREE_CODE (__t) == __c2
+  || TREE_CODE (__t) == __c3
+  || TREE_CODE (__t) == __c4
+  || TREE_CODE (__t) == __c5
+  || TREE_CODE (__t) == __c6
+  || TREE_CODE (__t) == __c7)
+tree_check_failed (__t, __f, __l, __g, __c1, __c2, __c3, __c4, __c5, __c6,
+		   __c7, 0);
+  return __t;
+}
+
+inline tree
+tree_not_check7 (tree __t, const char *__f, int __l, const char *__g,
+		 enum tree_code __c1, enum tree_code __c2, enum tree_code __c3,
+		 enum tree_code __c4, enum tree_code __c5, enum tree_code __c6,
+		 enum tree_code __c7)
+{
+  if (TREE_CODE (__t) == __c1
+  || TREE_CODE (__t) == __c2
+  || TREE_CODE (__t) == __c3
+  || TREE_CODE (__t) == __c4
+  || TREE_CODE (__t) == __c5
+  || TREE_CODE (__t) == __c6
+  || TREE_CODE (__t) == __c7)
+tree_not_check_failed (__t, __f, __l, __g, __c1, __c2, __c3, __c4, __c5,
+			   __c6, __c7, 0);
+  return __t;
+}
+
+inline tree
+tree_check8 (tree __t, const char *__f, int __l, const char *__g,
+	 enum tree_code __c1, enum tree_code __c2, enum tree_code __c3,
+	 enum tree_code __c4, enum tree_code __c5, enum tree_code __c6,
+	 enum tree_code __c7, enum tree_code __c8)
+{
+  if (TREE_CODE (__t) == __c1
+  || TREE_CODE (__t) == __c2
+  || TREE_CODE (__t) == __c3
+  || TREE_CODE (__t) == __c4
+  || TREE_CODE (__t) == __c5
+  || TREE_CODE (__t) == __c6
+  || TREE_CODE (__t) == __c7
+  || TREE_CODE (__t) == __c8)
+tree_check_failed (__t, __f, __l, __g, __c1, __c2, __c3, __c4, __c5, __c6,
+		   __c7, __c8, 0);
+  return __t;
+}
+
+inline tree
+tree_not_check8 (tree __t, const char *__f, int __l, const char *__g,
+		 enum tree_code __c1, enum tree_code __c2, enum tree_code

[PATCH v2 1/3] tree: Add TREE_NOT_RANGE_CHECK

2025-07-08 Thread Alex (Waffl3x)

Fixed some very obvious errors that should have been fixed before I
submitted v1, lesson learned this time I hope.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
From 32fa771d65eeca675d268bcf96b654fe5ac64eaf Mon Sep 17 00:00:00 2001
From: Waffl3x 
Date: Tue, 8 Jul 2025 19:25:20 -0600
Subject: [PATCH 1/3] tree: Add TREE_NOT_RANGE_CHECK

There was no inverted counterpart to TREE_RANGE_CHECK, this adds one.

gcc/ChangeLog:

	* tree.cc (tree_not_range_check_failed): New.
	* tree.h (TREE_NOT_RANGE_CHECK): Define.
	(tree_not_range_check_failed): New declaration.
	(tree_not_range_check): New.

Signed-off-by: Waffl3x 
---
 gcc/tree.cc | 45 +
 gcc/tree.h  | 29 +
 2 files changed, 74 insertions(+)

diff --git a/gcc/tree.cc b/gcc/tree.cc
index 6a055c8c2d0..9a5477df88d 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -9246,6 +9246,51 @@ tree_range_check_failed (const_tree node, const char *file, int line,
 		  function, trim_filename (file), line);
 }
 
+/* Similar to tree_not_check_failed, except that instead of specifying a
+   dozen codes, use the knowledge that they're all sequential.
+   C1 and C2 are an inclusive range.  */
+
+void
+tree_not_range_check_failed (const_tree node, const char *file, int line,
+			 const char *function, enum tree_code c1,
+			 enum tree_code c2)
+{
+  gcc_assert (c1 <= c2);
+  static constexpr size_t seperator_length = 4;
+  const size_t buffer_length = [&] ()
+{
+  size_t length = 0;
+  /* Overallocates by 4, which is fine, accounts for null terminator.  */
+  for (unsigned int c = c1; c <= c2; ++c)
+	length += seperator_length
+		  + strlen (get_tree_code_name ((enum tree_code) c));
+  return length;
+} (); /* IILE.  */
+  gcc_assert (buffer_length);
+  char *buffer = (char *) alloca (buffer_length);
+
+  unsigned int c = c1;
+  /* First case unrolled.  */
+  const char *node_name = get_tree_code_name ((enum tree_code) c);
+  size_t written_length = strlen (node_name);
+  /* Account for null terminator.  */
+  gcc_assert (written_length + 1 <= buffer_length);
+  strcpy (buffer, node_name);
+  ++c;
+  for (; c <= c2; ++c)
+{
+  char *unwritten_start = buffer + written_length;
+  node_name = get_tree_code_name ((enum tree_code) c);
+  written_length += strlen (node_name) + seperator_length;
+  /* Account for null terminator.  */
+  gcc_assert (written_length + 1 <= buffer_length);
+  strcpy (unwritten_start, " or ");
+  strcpy (unwritten_start + seperator_length, node_name);
+}
+  internal_error ("tree check: expected none of %s, have %s in %s, at %s:%d",
+		  buffer, get_tree_code_name (TREE_CODE (node)),
+		  function, trim_filename (file), line);
+}
 
 /* Similar to tree_check_failed, except that we check that a tree does
not have the specified code, given in CL.  */
diff --git a/gcc/tree.h b/gcc/tree.h
index e87fa0f81bc..40255b535f6 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -380,6 +380,10 @@ code_helper::is_builtin_fn () const
 #define TREE_RANGE_CHECK(T, CODE1, CODE2) \
 (tree_range_check ((T), (CODE1), (CODE2), __FILE__, __LINE__, __FUNCTION__))
 
+#define TREE_NOT_RANGE_CHECK(T, CODE1, CODE2) \
+(tree_not_range_check ((T), (CODE1), (CODE2), \
+		   __FILE__, __LINE__, __FUNCTION__))
+
 #define OMP_CLAUSE_SUBCODE_CHECK(T, CODE) \
 (omp_clause_subcode_check ((T), (CODE), __FILE__, __LINE__, __FUNCTION__))
 
@@ -453,6 +457,10 @@ extern void tree_range_check_failed (const_tree, const char *, int,
  const char *, enum tree_code,
  enum tree_code)
 ATTRIBUTE_NORETURN ATTRIBUTE_COLD;
+extern void tree_not_range_check_failed (const_tree, const char *, int,
+	 const char *, enum tree_code,
+	 enum tree_code)
+ATTRIBUTE_NORETURN ATTRIBUTE_COLD;
 extern void tree_not_class_check_failed (const_tree,
 	 const enum tree_code_class,
 	 const char *, int, const char *)
@@ -497,6 +505,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define TREE_NOT_CHECK6(T, CODE1, CODE2, CODE3, CODE4, CODE5, CODE6) (T)
 #define TREE_CLASS_CHECK(T, CODE)		(T)
 #define TREE_RANGE_CHECK(T, CODE1, CODE2)	(T)
+#define TREE_NOT_RANGE_CHECK(T, CODE1, CODE2)	(T)
 #define EXPR_CHECK(T)(T)
 #define NON_TYPE_CHECK(T)			(T)
 #define TREE_INT_CST_ELT_CHECK(T, I)		((T)->int_cst.val[I])
@@ -3882,6 +3891,16 @@ tree_range_check (tree __t,
   return __t;
 }
 
+inline tree
+tree_not_range_check (tree __t,
+		  enum tree_code __code1, enum tree_code __code2,
+		  const char *__f, int __l, const char *__g)
+{
+  if (!(TREE_CODE (__t) < __code1 || TREE_CODE (__t) > __code2))
+tree_not_range_check_failed (__t, __f, __l, __g, __code1, __code2);
+  return __t;
+}
+
 inline tree
 omp_clause_subcode_check (tree __t, enum omp_clause_code __code,
   const char *__f, int __l, const char *__g)
@@ -4170,6 +4189,16 @@ tree_range_check (const_tree __t,
   return __t;
 }
 
+

Re: [PATCH v4 6/6] libstdc++: Set FMT for complete C++23 mdspan [PR107761].

2025-07-08 Thread Tomasz Kaminski

On Tue, Jul 8, 2025 at 11:34 AM Jonathan Wakely  wrote:

> On Tue, 8 Jul 2025 at 09:27, Luc Grosheintz 
> wrote:
> >
> > PR libstdc++/107761
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/version.def (mdspan): Set to 202207 and remove
> > no_stdname.
> > * include/bits/version.h: Regenerate.
> > * testsuite/23_containers/mdspan/ftm.cc: Test presence
> > of FTM.
>
> Please spell this out in full as "feature test macro", there's no need
> to use an initialism that some people won't recognise. (And the commit
> summary line says "FMT" not "FTM" anyway ;-)
>
I am also renaming the test file for ftm.cc to version.cc.

>
> I would also prefer the test to be called version.cc not ftm.cc as
> that's what we use elsewhere.
>
> I think we can do that when pushing the commit though, we don't need
> another patch for it.
>
> Thanks for getting  done! Great work.
>
> >
> > Signed-off-by: Luc Grosheintz 
> > ---
> >  libstdc++-v3/include/bits/version.def  | 3 +--
> >  libstdc++-v3/include/bits/version.h| 3 ++-
> >  libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc | 9 +
> >  3 files changed, 12 insertions(+), 3 deletions(-)
> >  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> >
> > diff --git a/libstdc++-v3/include/bits/version.def
> b/libstdc++-v3/include/bits/version.def
> > index 64f8190d240..f1015abdbfa 100644
> > --- a/libstdc++-v3/include/bits/version.def
> > +++ b/libstdc++-v3/include/bits/version.def
> > @@ -1007,9 +1007,8 @@ ftms = {
> >
> >  ftms = {
> >name = mdspan;
> > -  no_stdname = true; // FIXME: remove
> >values = {
> > -v = 1; // FIXME: 202207
> > +v = 202207;
> >  cxxmin = 23;
> >};
> >  };
> > diff --git a/libstdc++-v3/include/bits/version.h
> b/libstdc++-v3/include/bits/version.h
> > index 744246a9938..80f6586372d 100644
> > --- a/libstdc++-v3/include/bits/version.h
> > +++ b/libstdc++-v3/include/bits/version.h
> > @@ -1126,8 +1126,9 @@
> >
> >  #if !defined(__cpp_lib_mdspan)
> >  # if (__cplusplus >= 202100L)
> > -#  define __glibcxx_mdspan 1L
> > +#  define __glibcxx_mdspan 202207L
> >  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
> > +#   define __cpp_lib_mdspan 202207L
> >  #  endif
> >  # endif
> >  #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan)
> */
> > diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> > new file mode 100644
> > index 000..106ee4010ee
> > --- /dev/null
> > +++ b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> > @@ -0,0 +1,9 @@
> > +// { dg-do compile { target c++23 } }
> > +#include 
> > +
> > +#ifndef __cpp_lib_mdspan
> > +#error "Feature test macro __cpp_lib_mdspan is missing for "
> > +#if __cpp_lib_mdspan < 202207
> > +#error "Feature test macro __cpp_lib_mdspan has the wrong value"
> > +#endif
> > +#endif
> > --
> > 2.49.0
> >
>
>

Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash structs and members for P1901R2

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 13:24, Jonathan Wakely  wrote:
>
> On Tue, 8 Jul 2025 at 12:54, Paul Keir  wrote:
> >
> > Let me know if this needs a refresh.
>
> The patch fails to apply:
>
> error: patch failed: libstdc++-v3/include/bits/shared_ptr_base.h:1715
> error: libstdc++-v3/include/bits/shared_ptr_base.h: patch does not apply
>
> but I think it's your mail client munging whitespace, not something
> that can be fixed by rebasing on trunk.
> I'll figure it out and apply it by hand.

OK, I added your github fork and did a merge --squash from there


>
>
> >
> > 
> > From: Paul Keir 
> > Sent: 06 June 2025 5:32 PM
> > To: Jonathan Wakely
> > Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org
> > Subject: Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash 
> > structs and members for P1901R2
> >
> > No problem. That should be it included below. Github diff for convenience: 
> > https://github.com/gcc-mirror/gcc/compare/e37eb85...pkeir:gcc:1b7c7c1a
> >
> > Signed-off-by: Paul Keir 
> >
> > Tested on x86_64-linux.
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/shared_ptr.h: Added owner_equal and owner_hash 
> > members to shared_ptr and weak_ptr.
> > * include/bits/shared_ptr_base.h: Added owner_equal and owner_hash 
> > structs.
> > * include/bits/version.def: Added 
> > __cpp_lib_smart_ptr_owner_equality feature macro.
> > * include/bits/version.h: Update generated for 
> > __cpp_lib_smart_ptr_owner_equality feature macro.
> > * include/std/memory: Added define for 
> > __glibcxx_want_smart_ptr_owner_equality.
> > * testsuite/20_util/owner_equal/version.cc: New test.
> > * testsuite/20_util/owner_equal/cmp.cc: New test.
> > * testsuite/20_util/owner_equal/noexcept.cc: New test.
> > * testsuite/20_util/owner_hash/cmp.cc: New test.
> > * testsuite/20_util/owner_hash/noexcept.cc: New test.
> > * testsuite/20_util/shared_ptr/observers/owner_equal.cc: New test.
> > * testsuite/20_util/shared_ptr/observers/owner_hash.cc: New test.
> > * testsuite/20_util/weak_ptr/observers/owner_equal.cc: New test.
> > * testsuite/20_util/weak_ptr/observers/owner_hash.cc: New test.
> >
> > ---
> >
> >  include/bits/shared_ptr.h  |  57 +++
> >  include/bits/shared_ptr_base.h |  40 
> >  include/bits/version.def   |   9 ++
> >  include/bits/version.h |  10 ++
> >  include/std/memory |   1 +
> >  testsuite/20_util/owner_equal/cmp.cc   | 105 
> > +
> >  testsuite/20_util/owner_equal/noexcept.cc  |  30 ++
> >  testsuite/20_util/owner_equal/version.cc   |  13 +++
> >  testsuite/20_util/owner_hash/cmp.cc|  87 +
> >  testsuite/20_util/owner_hash/noexcept.cc   |  16 
> >  .../20_util/shared_ptr/observers/owner_equal.cc|  74 +++
> >  .../20_util/shared_ptr/observers/owner_hash.cc |  71 ++
> >  .../20_util/weak_ptr/observers/owner_equal.cc  |  52 ++
> >  testsuite/20_util/weak_ptr/observers/owner_hash.cc |  50 ++
> >  14 files changed, 615 insertions(+)
> >
> > diff --git a/libstdc++-v3/include/bits/shared_ptr.h 
> > b/libstdc++-v3/include/bits/shared_ptr.h
> > index a196a0f1212..dd02ab16e59 100644
> > --- a/libstdc++-v3/include/bits/shared_ptr.h
> > +++ b/libstdc++-v3/include/bits/shared_ptr.h
> > @@ -909,6 +909,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  : public _Sp_owner_less, shared_ptr<_Tp>>
> >  { };
> >
> > +#ifdef __glibcxx_smart_ptr_owner_equality // >= C++26
> > +
> > +  /**
> > +   * @brief Provides ownership-based hashing.
> > +   * @headerfile memory
> > +   * @since C++26
> > +   */
> > +  struct owner_hash
> > +  {
> > +template
> > +size_t operator()(const shared_ptr<_Tp>& __s) const noexcept
> > +{ return __s.owner_hash(); }
> > +
> > +template
> > +size_t operator()(const weak_ptr<_Tp>& __s) const noexcept
> > +{ return __s.owner_hash(); }
> > +
> > +using is_transparent = void;
> > +  };
> > +
> > +  /**
> > +   * @brief Provides ownership-based mixed equality comparisons of
> > +   *shared and weak pointers.
> > +   * @headerfile memory
> > +   * @since C++26
> > +   */
> > +  struct owner_equal
> > +  {
> > +template
> > +bool
> > +operator()(const shared_ptr<_Tp1>& __lhs,
> > +   const shared_ptr<_Tp2>& __rhs) const noexcept
> > +{ return __lhs.owner_equal(__rhs); }
> > +
> > +template
> > +bool
> > +operator()(const shared_ptr<_Tp1>& __lhs,
> > +   const   weak_ptr<_Tp2>& __rhs) const noexcept
> > +{ return __lhs.owner_equal(__rhs); }
> > +
> > +template
> > +bool
> > +operator()(const   weak_ptr<_Tp1>& __lhs,
> > +   const shared_ptr<_Tp2>&

[PATCH] Avoid IPA opts around guality plumbing

2025-07-08 Thread Richard Biener

The following avoids inlining the actual main() (renamed to
guality_main) into the guality plumbing.  This can cause
jump threading opportunities to appear and generally increase
the chance what we actually test isn't what we think.  Likewise
make guality_check noipa instead of just noinline.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  With -m32
for me this adds

+FAIL: gcc.dg/guality/pr41447-1.c   -O2 -flto -fuse-linker-plugin 
-fno-fat-lto-o
bjects  -DPREVENT_OPTIMIZATION execution test
+FAIL: gcc.dg/guality/pr41447-1.c   -O3 -g  -DPREVENT_OPTIMIZATION  
execution te
st
+FAIL: gcc.dg/guality/pr41447-1.c   -Os  -DPREVENT_OPTIMIZATION  execution 
test

on my testing machine.  It avoids an additional FAIL with another
patch I am testing that exposes a jump threading opportunity
in main with guality_main inlined.

OK for trunk?

Thanks,
Richard.

gcc/testsuite/
* gcc.dg/guality/guality.h (guality_main): Declare noipa.
(guality_check): Likewise.
---
 gcc/testsuite/gcc.dg/guality/guality.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/guality/guality.h 
b/gcc/testsuite/gcc.dg/guality/guality.h
index d41327c24d5..48b59d2e324 100644
--- a/gcc/testsuite/gcc.dg/guality/guality.h
+++ b/gcc/testsuite/gcc.dg/guality/guality.h
@@ -204,9 +204,10 @@ int volatile guality_attached;
of this wrapping, guality_main may not have an empty argument
list.  */
 
-extern int guality_main (int argc, char *argv[]);
+extern int __attribute__((noipa))
+guality_main (int argc, char *argv[]);
 
-static void __attribute__((noinline))
+static void __attribute__((noipa))
 guality_check (const char *name, gualchk_t value, int unknown_ok);
 
 /* Set things up, run guality_main, then print a summary and quit.  */
-- 
2.43.0

Re: [PATCH] x86: Keep non all 0s/1s redundant vector loads on AMD znverN

2025-07-08 Thread Richard Biener

On Tue, Jul 8, 2025 at 12:48 PM H.J. Lu  wrote:
>
> aba3b9d3a48a0703fd565f7c5f0caf604f59970b is the first bad commit
> commit aba3b9d3a48a0703fd565f7c5f0caf604f59970b
> Author: H.J. Lu 
> Date:   Fri May 9 07:17:07 2025 +0800
>
> x86: Extend the remove_redundant_vector pass
>
> which removed non all 0s/1s redundant vector loads, caused SPEC CPU 2017
> 519.lbm_r and 470.lbm performance regressions on AMD znverN processors.
> Add a tuning option to keep non all 0s/1s redundant vector loads on AMD
> znverN processors.

Do we know what actually happens here or is this basically reverting the change
based on a new tunable and the reported regression?

If I read the pass correctly it might insert broadcasts on paths where
not originally
computed (it inserts after the scalar def, which might be far away).
ix86_broadcast_inner
suggests it replaces extracts from a broadcast with the original
broadcast value/register
which means it might increase lifetime of the broadcast register.

Both shouldn't be causing specifically regressions on Zen2, but can be
bad.   I think
we need to understand better what the pass does (it's written without
much commentary,
so I tried to quickly reverse engineer it), and improve it, avoiding
cases where it
obviously increases register lifetime.

> gcc/
>
> PR target/120941
> * config/i386/i386-features.cc (ix86_broadcast_inner): Keep
> non all 0s/1s redundant vector loads if asked.
> * config/i386/x86-tune.def (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD):
> New tuning.
>
> gcc/testsuite/
>
> PR target/120941
> * gcc.target/i386/pr120941-1a.c: New test.
> * gcc.target/i386/pr120941-1b.c: Likewise.
> * gcc.target/i386/pr120941-1c.c: Likewise.
> * gcc.target/i386/pr120941-1d.c: Likewise.
>
> OK for master?
>
> Thanks.
>
> --
> H.J.

Re: [PATCH v5 0/2] tree-optimization: extend scalar comparison folding to vectors [PR119196]

2025-07-08 Thread Richard Biener

On Tue, 8 Jul 2025, Icen Zeyada wrote:

> 
> New in V5:
> Added a check to confirm that the target is supported.

OK.

Thanks,
Richard.

> ---
> 
> This patch generalizes existing scalar bitwise comparison simplifications
> to vector types by matching patterns of the form
> 
> ```
> (cmp x y) bit_and (cmp x y)
> (cmp x y) bit_ior (cmp x y)
> (cmp x y) bit_xor (cmp x y)
> ```
> 
> 
> Icen Zeyada (2):
>   tree-simplify: unify simple_comparison ops in vec_cond for bit
> and/or/xor [PR119196]
>   gimple-fold: extend vector simplification to match scalar bitwise
> optimizations [PR119196]
> 
>  gcc/match.pd  | 66 +++---
>  .../gcc.target/aarch64/vector-compare-5.c | 67 +++
>  2 files changed, 122 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH] libstdc++: Make debug iterator pointer sequence const [PR116369]

2025-07-08 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 11:12, Jonathan Wakely  wrote:
>
> On Sat, 5 Jul 2025 at 14:03, François Dumont  wrote:
> >
> > On 01/07/2025 22:51, Jonathan Wakely wrote:
> > > On Mon, 16 Jun 2025 at 18:36, François Dumont  
> > > wrote:
> > >> I eventually wonder if it is such a big deal to add the new symbols for 
> > >> _GLIBCXX_DEBUG mode.
> > > I like this version much more than the one trying to duplicate symbols 
> > > with asm.
> > >
> > >
> > >> Here is the patch doing this. It avoids to add many const_cast which is 
> > >> what we are trying to achieve here.
> > > I'm still not really sure if this is worth it though - is it fixing a
> > > bug or a correctness problem? (using const_cast is safe if the objects
> > > aren't actually const)
> > >
> > > All the new tests already pass, even without this patch. Are these
> > > just tests for const member functions that we aren't currently testing
> > > at all?
> >
> > Those tests are showing the same UB that you fixed as part of your
> > PR116369 patch but this time with local_iterator. Even if tests are
> > passing without this patch it's still UB before it, do you prefer to
> > remove those tests then ?
>
> Ah OK, so they are showing UB ... it's just that the compiler doesn't
> actually complain about it.
>
> Please make the const containers in those tests global variables,
> instead of local variables inside main(). The compiler won't put local
> variables in ROM so the test would never fail. It might put globals in
> ROM (although not after your patch, because of the mutable members,
> which is why the patch is actually fixing something).
>
>
> >
> > Globally this patch is following your recommendations on PR116369 commit
> > where you were saying:
> >
> >  Ideally we would not need the const_cast at all. Instead, the _M_attach
> >  member (and everything it calls) should be const-qualified. That would
> >  work fine now, because the members that it ends up modifying are
> >  mutable. Making that change would require a number of new exports from
> >  the shared library, and would require retaining the old non-const
> > member
> >  functions (maybe as symbol aliases) for backwards compatibility. That
> >  might be worth changing at some point, but isn't done here.
> >
> > In addition to what is said here I made the sequence pointer const too
> > as the added mutable allows that.
> >
> > It was also the occasion to fix some types used in std::forward_list in
> > Debug mode.
> >
> > Do you think it is useless eventually ?
>
> I think it's worth doing, I was just concerned about the __asm__
> solution used in the initial patches.
>
> OK for trunk with the adjusted tests, thanks.


I see a large number of new test failures. It looks like this fix is needed:

--- a/libstdc++-v3/include/debug/forward_list
+++ b/libstdc++-v3/include/debug/forward_list
@@ -144,13 +144,13 @@ namespace __gnu_debug
  //std::swap(_M_this()->_M_version, __other._M_version);
  _Safe_iterator_base* __this_its = _M_this()->_M_iterators;
  _S_swap_aux(__other, __other._M_iterators,
- _M_this(), _M_this()->_M_iterators);
+  *_M_this(), _M_this()->_M_iterators);
  _Safe_iterator_base* __this_const_its = _M_this()->_M_const_iterators;
  _S_swap_aux(__other, __other._M_const_iterators,
- _M_this(), _M_this()->_M_const_iterators);
-  _S_swap_aux(_M_this(), __this_its,
+  *_M_this(), _M_this()->_M_const_iterators);
+  _S_swap_aux(*_M_this(), __this_its,
 __other, __other._M_iterators);
-  _S_swap_aux(_M_this(), __this_const_its,
+  _S_swap_aux(*_M_this(), __this_const_its,
 __other, __other._M_const_iterators);
}

Re: [PATCH] libstdc++: Fix attribute order on __normal_iterator friends [PR120949]

2025-07-08 Thread Tomasz Kaminski

On Sat, Jul 5, 2025 at 1:27 AM Jonathan Wakely  wrote:

> In r16-1911-g6596f5ab746533 I claimed to have reordered some attributes
> for compatibility with Clang, but it looks like I got the Clang
> restriction backwards and put them all in the wrong order.
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/120949
> * include/bits/stl_iterator.h (__normal_iterator): Fix order of
> always_inline and nodiscard attributes for Clang compatibility.
> ---
>
> Tested x86_64-linux, and tested lightly with Clang.
>
LGTM.
I have also checked that this is indeed an order that is accepted by clang.
The fact that this restriction is only put on hidden friends is strange.

>
>  libstdc++-v3/include/bits/stl_iterator.h | 30 +---
>  1 file changed, 16 insertions(+), 14 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/stl_iterator.h
> b/libstdc++-v3/include/bits/stl_iterator.h
> index a7188f46f6db..75e794f6c020 100644
> --- a/libstdc++-v3/include/bits/stl_iterator.h
> +++ b/libstdc++-v3/include/bits/stl_iterator.h
> @@ -1211,7 +1211,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  #else
> // Forward iterator requirements
>template
> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
> friend
> _GLIBCXX_CONSTEXPR
> bool
> @@ -1220,7 +1220,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _GLIBCXX_NOEXCEPT
> { return __lhs.base() == __rhs.base(); }
>
> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>friend
>_GLIBCXX_CONSTEXPR
>bool
> @@ -1229,7 +1229,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{ return __lhs.base() == __rhs.base(); }
>
>template
> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
> friend
> _GLIBCXX_CONSTEXPR
> bool
> @@ -1238,7 +1238,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _GLIBCXX_NOEXCEPT
> { return __lhs.base() != __rhs.base(); }
>
> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>friend
>_GLIBCXX_CONSTEXPR
>bool
> @@ -1248,15 +1248,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>// Random access iterator requirements
>template
> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
> friend
> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> _GLIBCXX_CONSTEXPR
> +   _GLIBCXX_CONSTEXPR
> inline bool
> operator<(const __normal_iterator& __lhs,
>   const __normal_iterator<_Iter, _Container>& __rhs)
> _GLIBCXX_NOEXCEPT
> { return __lhs.base() < __rhs.base(); }
>
> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>friend
>_GLIBCXX20_CONSTEXPR
>bool
> @@ -1265,15 +1266,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{ return __lhs.base() < __rhs.base(); }
>
>template
> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
> friend
> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> _GLIBCXX_CONSTEXPR
> +   _GLIBCXX_CONSTEXPR
> bool
> operator>(const __normal_iterator& __lhs,
>   const __normal_iterator<_Iter, _Container>& __rhs)
> _GLIBCXX_NOEXCEPT
> { return __lhs.base() > __rhs.base(); }
>
> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>friend
>_GLIBCXX_CONSTEXPR
>bool
> @@ -1282,7 +1284,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{ return __lhs.base() > __rhs.base(); }
>
>template
> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
> friend
> _GLIBCXX_CONSTEXPR
> bool
> @@ -1291,7 +1293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _GLIBCXX_NOEXCEPT
> { return __lhs.base() <= __rhs.base(); }
>
> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>friend
>_GLIBCXX_CONSTEXPR
>bool
> @@ -1300,7 +1302,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{ return __lhs.base() <= __rhs.base(); }
>
>template
> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
> friend
> _GLIBCXX_CONSTEXPR
> bool
> @@ -1309,7 +1311,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> _GLIBCXX_NOEXCEPT
> { return __lhs.base() >= __rhs.base(); }
>
> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
> +  _GLIBCXX_NODISCARD __attribute__((__always_in

Re: [PATCH] libstdc++: Ensure pool resources meet alignment requirements [PR118681]

2025-07-08 Thread Tomasz Kaminski

On Sat, Jul 5, 2025 at 1:12 AM Jonathan Wakely  wrote:

> For allocations with size > alignment and size % alignment != 0 we were
> sometimes returning pointers that did not meet the requested aligment.
> For example, allocate(24, 16) would select the pool for 24-byte objects
> and the second allocation from that pool (at offset 24 bytes into the
> pool) is only 8-byte aligned not 16-byte aligned.
>
> The pool resources need to round up the requested allocation size to a
> multiple of the alignment, so that the selected pool will always return
> allocations that meet the alignment requirement.
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/118681
> * src/c++17/memory_resource.cc (choose_block_size): New
> function.
> (synchronized_pool_resource::do_allocate): Use choose_block_size
> to determine appropriate block size.
> (synchronized_pool_resource::do_deallocate): Likewise
> (unsynchronized_pool_resource::do_allocate): Likewise.
> (unsynchronized_pool_resource::do_deallocate): Likewise
> * testsuite/20_util/synchronized_pool_resource/118681.cc: New
> test.
> * testsuite/20_util/unsynchronized_pool_resource/118681.cc: New
> test.
> ---
>
> Tested x86_64-linux.
>
I have expressed my preference of doing this check inside the
memory_resouce::allocate.
I know that this throws any potential detection of misaligned resources
under the bus,
however I think there is more benefit of not exposing custom resources to
strange alignment values,
and using the wrong pool.

On the standard level, I think we should make behavior erroneous here, with
defined behavior being bit_ceil.

>
>  libstdc++-v3/src/c++17/memory_resource.cc | 26 +++--
>  .../synchronized_pool_resource/118681.cc  |  5 ++
>  .../unsynchronized_pool_resource/118681.cc| 58 +++
>  3 files changed, 85 insertions(+), 4 deletions(-)
>  create mode 100644
> libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
>  create mode 100644
> libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
>
> diff --git a/libstdc++-v3/src/c++17/memory_resource.cc
> b/libstdc++-v3/src/c++17/memory_resource.cc
> index fac4c782c5f7..fddfe2c7dd98 100644
> --- a/libstdc++-v3/src/c++17/memory_resource.cc
> +++ b/libstdc++-v3/src/c++17/memory_resource.cc
> @@ -1242,12 +1242,30 @@ namespace pmr
>  return pools;
>}
>
> +  static inline size_t
> +  choose_block_size(size_t bytes, size_t alignment)
> +  {
> +if (bytes == 0) [[unlikely]]
> +  return alignment;
> +
> +// Use bit_ceil in case alignment is invalid (i.e. not a power of
> two).
> +size_t mask = std::__bit_ceil(alignment) - 1;
> +// Round up to a multiple of alignment.
> +size_t block_size = (bytes + mask) & ~mask;
> +
> +if (block_size >= bytes) [[likely]]
> +  return block_size;
> +
> +// Wrapped around to zero, bytes must have been impossibly large.
> +return numeric_limits::max();
> +  }
> +
>// Override for memory_resource::do_allocate
>void*
>synchronized_pool_resource::
>do_allocate(size_t bytes, size_t alignment)
>{
> -const auto block_size = std::max(bytes, alignment);
> +const auto block_size = choose_block_size(bytes, alignment);
>  const pool_options opts = _M_impl._M_opts;
>  if (block_size <= opts.largest_required_pool_block)
>{
> @@ -1294,7 +1312,7 @@ namespace pmr
>synchronized_pool_resource::
>do_deallocate(void* p, size_t bytes, size_t alignment)
>{
> -size_t block_size = std::max(bytes, alignment);
> +size_t block_size = choose_block_size(bytes, alignment);
>  if (block_size <= _M_impl._M_opts.largest_required_pool_block)
>{
> const ptrdiff_t index = pool_index(block_size, _M_impl._M_npools);
> @@ -1453,7 +1471,7 @@ namespace pmr
>void*
>unsynchronized_pool_resource::do_allocate(size_t bytes, size_t
> alignment)
>{
> -const auto block_size = std::max(bytes, alignment);
> +const auto block_size = choose_block_size(bytes, alignment);
>  if (block_size <= _M_impl._M_opts.largest_required_pool_block)
>{
> // Recreate pools if release() has been called:
> @@ -1470,7 +1488,7 @@ namespace pmr
>unsynchronized_pool_resource::
>do_deallocate(void* p, size_t bytes, size_t alignment)
>{
> -size_t block_size = std::max(bytes, alignment);
> +size_t block_size = choose_block_size(bytes, alignment);
>  if (block_size <= _M_impl._M_opts.largest_required_pool_block)
>{
> if (auto pool = _M_find_pool(block_size))
> diff --git
> a/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
> b/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
> new file mode 100644
> index ..6d7434ff9106
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
> @@ -0,0 +1,5 @@
> +// { dg-do run { target c++17 } }

Re: [PATCH 0/1] [RFC][AutoFDO]: Source filename tracking in GCOV

2025-07-08 Thread Jan Hubicka

> Hi Honza,
> 
> > On 8 Jul 2025, at 2:26 am, Jan Hubicka  wrote:
> > 
> > External email: Use caution opening links or attachments
> > 
> > 
> > Hi,
> > as discussed also on the autofdo pull request, LLVM solves the same
> > problem using -funique-internal-linkage-names
> > https://reviews.llvm.org/D73307
> > 
> > All non-public functions gets theis symbol renamed from
> > .__uniq.
> 
> How is  __uniq. added to static 
> symbols in the profile?

The patch does three things
 1) extends ipa-visibility pass to rename all non-public function
symbols adding the __uniq suffix.
This skips those marked as used so asm statements can work.
 2) makes dwarf2out to always add DW_AT_linkage_name attribute to
inlined to DW_TAG_inlined_subroutine dies
 3) extends auto-profile to accept profiles with unique names
when building without unique names and vice versa.

I think it is pretty much what LLVM does except that I compute hash
based on object file name while LLVM uses filename of the outer
translation unit (which is easy to change, I just wanted to have
something functional to see how it works in practice).

There is a comment on the pull request comment I added
https://github.com/google/autofdo/pull/244#issuecomment-3046121191
So it seems that llvm folks are not that happy with uniq suffixes since
it breaks asm statements in Linux kernel.  I originally tought renaming
is done in dwarf only but indeed renaming all static symbols is quite
radical.

Their proposal
https://discourse.llvm.org/t/rfc-keep-globalvalue-guids-stable/84801
seems to be equivalent to what we have as profile_id.  It is 64bit
identifier of a function that should be stable across builds and (modulo
conflits) unique within translated program.  Currently it is assigned
only to functions that may be used as indirect call targets and is used
by normal FDO for resolving cross-unit indirect calls.

One option would be to use profile IDs in auto-profiles too.  I guess
they can be streamed to dwarf via an extension as 64bit IDs. But it is
not clear to me that it is what LLVM folks work on and if it will
eventually get upstreamed.

If we want to finish your solution (adding file names in create_gcov). I
think we need to solve the following:
 1) extend dwarf2out to add DW_AT_linkage_name attributes for all
function symbols.  This is easy to do.
 2) veirfy that create_gcov can safely determine symbols with public
or static linkage (even inlined ones).  There is DW_AT_public
attribute
and stream file names only for public linkage symbols
 3) instead of streaming filename of file containing the symbol
stream filename of the corresponding translation unit.

I would say that the advantage of profile id is probably shorter gcov
files, advantage of streaming filename:symbol_name pairs is that the
profile info is easier to read.  What do you think?

Honza

Re: [PATCH] Avoid IPA opts around guality plumbing

2025-07-08 Thread Jakub Jelinek

On Tue, Jul 08, 2025 at 03:17:39PM +0200, Richard Biener wrote:
> The following avoids inlining the actual main() (renamed to
> guality_main) into the guality plumbing.  This can cause
> jump threading opportunities to appear and generally increase
> the chance what we actually test isn't what we think.  Likewise
> make guality_check noipa instead of just noinline.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.  With -m32
> for me this adds
> 
> +FAIL: gcc.dg/guality/pr41447-1.c   -O2 -flto -fuse-linker-plugin 
> -fno-fat-lto-o
> bjects  -DPREVENT_OPTIMIZATION execution test
> +FAIL: gcc.dg/guality/pr41447-1.c   -O3 -g  -DPREVENT_OPTIMIZATION  
> execution te
> st
> +FAIL: gcc.dg/guality/pr41447-1.c   -Os  -DPREVENT_OPTIMIZATION  execution 
> test
> 
> on my testing machine.  It avoids an additional FAIL with another
> patch I am testing that exposes a jump threading opportunity
> in main with guality_main inlined.
> 
> OK for trunk?
> 
> Thanks,
> Richard.
> 
> gcc/testsuite/
>   * gcc.dg/guality/guality.h (guality_main): Declare noipa.
>   (guality_check): Likewise.

LGTM.

Jakub

Re: [PATCH] libstdc++: Ensure pool resources meet alignment requirements [PR118681]

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 14:12, Tomasz Kaminski  wrote:
>
>
>
> On Tue, Jul 8, 2025 at 2:48 PM Jonathan Wakely  wrote:
>>
>> On Tue, 8 Jul 2025 at 13:31, Tomasz Kaminski  wrote:
>> >
>> >
>> >
>> > On Sat, Jul 5, 2025 at 1:12 AM Jonathan Wakely  wrote:
>> >>
>> >> For allocations with size > alignment and size % alignment != 0 we were
>> >> sometimes returning pointers that did not meet the requested aligment.
>> >> For example, allocate(24, 16) would select the pool for 24-byte objects
>> >> and the second allocation from that pool (at offset 24 bytes into the
>> >> pool) is only 8-byte aligned not 16-byte aligned.
>> >>
>> >> The pool resources need to round up the requested allocation size to a
>> >> multiple of the alignment, so that the selected pool will always return
>> >> allocations that meet the alignment requirement.
>> >>
>> >> libstdc++-v3/ChangeLog:
>> >>
>> >> PR libstdc++/118681
>> >> * src/c++17/memory_resource.cc (choose_block_size): New
>> >> function.
>> >> (synchronized_pool_resource::do_allocate): Use choose_block_size
>> >> to determine appropriate block size.
>> >> (synchronized_pool_resource::do_deallocate): Likewise
>> >> (unsynchronized_pool_resource::do_allocate): Likewise.
>> >> (unsynchronized_pool_resource::do_deallocate): Likewise
>> >> * testsuite/20_util/synchronized_pool_resource/118681.cc: New
>> >> test.
>> >> * testsuite/20_util/unsynchronized_pool_resource/118681.cc: New
>> >> test.
>> >> ---
>> >>
>> >> Tested x86_64-linux.
>> >
>> > I have expressed my preference of doing this check inside the 
>> > memory_resouce::allocate.
>>
>> We can still do that separately. The point of this patch is to fix the
>> misaligned allocations from the pool resources, not improve the EB vs
>> UB position for the memory_resource::allocate API.
>
> Because do_allocate is not publicly accessible, and can be called only via 
> allocate, changing
> memory_resource::allocate API will also fix the misaligned allocations from 
> pool resources.

No it won't, the bug happens with res.allocate(24,16) and 16 is a
valid alignment.

The bug is in the logic that finds the appropriate object pool to
allocate from, and happens for valid alignments.

The reason I added a call to bit_ceil in the patch is because the fix
to round up to a multiple of the alignment assumes that the alignment
is valid (previously we didn't really care if it was valid, we just
used it).

>
> And I believe,  moving the check between specific pool do_allocate to 
> memory_resource::allocate,
> would be ABI break, as we may end up with binary that contains:
>   * memory_resource::allocate without check (check is in Derived::do_allocate)
>   * Derived::do_allocate without check (check is memory_resource::allocate)
> (Let me know if I misunderstood how this kind of ABI breaks work).
>>
>>
>> > I know that this throws any potential detection of misaligned resources 
>> > under the bus,
>> > however I think there is more benefit of not exposing custom resources to 
>> > strange alignment values,
>> > and using the wrong pool.
>> >
>> > On the standard level, I think we should make behavior erroneous here, 
>> > with defined behavior being bit_ceil.
>>
>> Yeah, I think that's a good idea.
>>
>>
>> >>
>> >>
>> >>  libstdc++-v3/src/c++17/memory_resource.cc | 26 +++--
>> >>  .../synchronized_pool_resource/118681.cc  |  5 ++
>> >>  .../unsynchronized_pool_resource/118681.cc| 58 +++
>> >>  3 files changed, 85 insertions(+), 4 deletions(-)
>> >>  create mode 100644 
>> >> libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
>> >>  create mode 100644 
>> >> libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
>> >>
>> >> diff --git a/libstdc++-v3/src/c++17/memory_resource.cc 
>> >> b/libstdc++-v3/src/c++17/memory_resource.cc
>> >> index fac4c782c5f7..fddfe2c7dd98 100644
>> >> --- a/libstdc++-v3/src/c++17/memory_resource.cc
>> >> +++ b/libstdc++-v3/src/c++17/memory_resource.cc
>> >> @@ -1242,12 +1242,30 @@ namespace pmr
>> >>  return pools;
>> >>}
>> >>
>> >> +  static inline size_t
>> >> +  choose_block_size(size_t bytes, size_t alignment)
>> >> +  {
>> >> +if (bytes == 0) [[unlikely]]
>> >> +  return alignment;
>> >> +
>> >> +// Use bit_ceil in case alignment is invalid (i.e. not a power of 
>> >> two).
>> >> +size_t mask = std::__bit_ceil(alignment) - 1;
>> >> +// Round up to a multiple of alignment.
>> >> +size_t block_size = (bytes + mask) & ~mask;
>> >> +
>> >> +if (block_size >= bytes) [[likely]]
>> >> +  return block_size;
>> >> +
>> >> +// Wrapped around to zero, bytes must have been impossibly large.
>> >> +return numeric_limits::max();
>> >> +  }
>> >> +
>> >>// Override for memory_resource::do_allocate
>> >>void*
>> >>synchronized_pool_resource::
>> >>do_allocate(size_t bytes, size_t alignment)
>> >>{
>> >

[COMMITTED] s390: Always compute address of stack protector guard

2025-07-08 Thread Stefan Schulze Frielinghaus

Computing the address of the thread pointer on s390 involves multiple
instructions and therefore bears the risk that the address of the canary
or intermediate values of it are spilled after prologue in order to be
reloaded for the epilogue.  Since there exists no mechanism to ensure
that a value is not coming from stack, as a precaution compute the
address always twice, i.e., one time for the prologue and one time for
the epilogue.  Note, even if there were such a mechanism, emitting
optimal code is non-trivial since there exist cases with opposing
requirements as e.g. if the thread pointer is not only computed for the
TLS guard but also for other TLS objects.  For the latter accesses it is
desired to spill and reload the thread pointer instead of recomputing it
whereas for the former it is not.

Committed as r16-2105-gbb6075e7115208.

gcc/ChangeLog:

* config/s390/s390.md (stack_protect_get_tpsi): New insn.
(stack_protect_get_tpdi): New insn.
(stack_protect_set): Use new insn.
(stack_protect_test): Use new insn.

gcc/testsuite/ChangeLog:

* gcc.target/s390/stack-protector-guard-tls-1.c: New test.
---
 gcc/config/s390/s390.md   | 47 +--
 .../s390/stack-protector-guard-tls-1.c| 39 +++
 2 files changed, 82 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index f6db36e0ac3..02bc149b0fb 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -308,6 +308,9 @@
UNSPECV_SPLIT_STACK_CALL
 
UNSPECV_OSC_BREAK
+
+   ; Stack Protector
+   UNSPECV_SP_GET_TP
   ])
 
 ;;
@@ -365,6 +368,9 @@
(VR23_REGNUM 45)
(VR24_REGNUM 46)
(VR31_REGNUM 53)
+   ; Access registers
+   (AR0_REGNUM  36)
+   (AR1_REGNUM  37)
   ])
 
 ; Rounding modes for binary floating point numbers
@@ -11924,15 +11930,43 @@
 ; Stack Protector Patterns
 ;
 
+; Insns stack_protect_get_tp{si,di} are similar to *get_tp_{31,64} but still
+; distinct in the sense that they force recomputation of the thread pointer
+; instead of potentially reloading it from stack.
+
+(define_insn_and_split "stack_protect_get_tpsi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+   (unspec_volatile:SI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (reg:SI AR0_REGNUM))])
+
+(define_insn_and_split "stack_protect_get_tpdi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+   (unspec_volatile:DI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (reg:SI AR0_REGNUM))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (strict_low_part (match_dup 1)) (reg:SI AR1_REGNUM))]
+  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
 (define_expand "stack_protect_set"
   [(set (match_operand 0 "memory_operand" "")
(match_operand 1 "memory_operand" ""))]
   ""
 {
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+   GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
 emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
@@ -11958,9 +11992,14 @@
 {
   rtx cc_reg, test;
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+= gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+   GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
 emit_insn (gen_stack_protect_testdi (operands[0], operands[1]));
diff --git a/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c 
b/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c
new file mode 100644
index 000..1efd2455144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/stack-protector-guard-tls-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-protector-all" } */
+/* { dg-final { scan-assembler-times {\tear\t%r[0-9]+,%a[01]} 8 { target lp64 
} } } */
+/* { dg-final { scan-assembler-times {\tsllg\t%r[0-9]+,%r[0-9]+,32} 4 { target 
lp64 } } } */
+/* { dg-final { scan-assembler-times {\tear\t%r[0-9]+,%a[01]} 3 { target { ! 
lp64 } } } } */
+/* { dg-

Re: [PATCH] libstdc++: Fix attribute order on __normal_iterator friends [PR120949]

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 13:26, Tomasz Kaminski  wrote:
>
>
>
> On Sat, Jul 5, 2025 at 1:27 AM Jonathan Wakely  wrote:
>>
>> In r16-1911-g6596f5ab746533 I claimed to have reordered some attributes
>> for compatibility with Clang, but it looks like I got the Clang
>> restriction backwards and put them all in the wrong order.
>>
>> libstdc++-v3/ChangeLog:
>>
>> PR libstdc++/120949
>> * include/bits/stl_iterator.h (__normal_iterator): Fix order of
>> always_inline and nodiscard attributes for Clang compatibility.
>> ---
>>
>> Tested x86_64-linux, and tested lightly with Clang.
>
> LGTM.
> I have also checked that this is indeed an order that is accepted by clang.
> The fact that this restriction is only put on hidden friends is strange.

I think they've fixed it on Clang trunk.

But Clang trunk still complains about attributes after the 'friend'
keyword, and it might be correct about that.


>>
>>
>>  libstdc++-v3/include/bits/stl_iterator.h | 30 +---
>>  1 file changed, 16 insertions(+), 14 deletions(-)
>>
>> diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
>> b/libstdc++-v3/include/bits/stl_iterator.h
>> index a7188f46f6db..75e794f6c020 100644
>> --- a/libstdc++-v3/include/bits/stl_iterator.h
>> +++ b/libstdc++-v3/include/bits/stl_iterator.h
>> @@ -1211,7 +1211,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>  #else
>> // Forward iterator requirements
>>template
>> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>> friend
>> _GLIBCXX_CONSTEXPR
>> bool
>> @@ -1220,7 +1220,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> _GLIBCXX_NOEXCEPT
>> { return __lhs.base() == __rhs.base(); }
>>
>> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>>friend
>>_GLIBCXX_CONSTEXPR
>>bool
>> @@ -1229,7 +1229,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>{ return __lhs.base() == __rhs.base(); }
>>
>>template
>> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>> friend
>> _GLIBCXX_CONSTEXPR
>> bool
>> @@ -1238,7 +1238,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> _GLIBCXX_NOEXCEPT
>> { return __lhs.base() != __rhs.base(); }
>>
>> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>>friend
>>_GLIBCXX_CONSTEXPR
>>bool
>> @@ -1248,15 +1248,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>
>>// Random access iterator requirements
>>template
>> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>> friend
>> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD 
>> _GLIBCXX_CONSTEXPR
>> +   _GLIBCXX_CONSTEXPR
>> inline bool
>> operator<(const __normal_iterator& __lhs,
>>   const __normal_iterator<_Iter, _Container>& __rhs)
>> _GLIBCXX_NOEXCEPT
>> { return __lhs.base() < __rhs.base(); }
>>
>> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>>friend
>>_GLIBCXX20_CONSTEXPR
>>bool
>> @@ -1265,15 +1266,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>{ return __lhs.base() < __rhs.base(); }
>>
>>template
>> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>> friend
>> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD 
>> _GLIBCXX_CONSTEXPR
>> +   _GLIBCXX_CONSTEXPR
>> bool
>> operator>(const __normal_iterator& __lhs,
>>   const __normal_iterator<_Iter, _Container>& __rhs)
>> _GLIBCXX_NOEXCEPT
>> { return __lhs.base() > __rhs.base(); }
>>
>> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>>friend
>>_GLIBCXX_CONSTEXPR
>>bool
>> @@ -1282,7 +1284,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>{ return __lhs.base() > __rhs.base(); }
>>
>>template
>> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +   _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>> friend
>> _GLIBCXX_CONSTEXPR
>> bool
>> @@ -1291,7 +1293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> _GLIBCXX_NOEXCEPT
>> { return __lhs.base() <= __rhs.base(); }
>>
>> -  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
>>friend
>>_GLIBCXX_CONSTEXPR
>>bool
>> @@ -1300,7 +1302,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>{ return __lhs.base() <= __rhs.base(); }
>>
>>template
>> -   __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
>> +   _GLIBCXX_NODISCAR

Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

2025-07-08 Thread Richard Biener

On Tue, Jul 8, 2025 at 3:45 PM Robert Dubner  wrote:
>
> > -Original Message-
> > From: Andreas Schwab 
> > Sent: Tuesday, July 8, 2025 03:16
> > To: Robert Dubner 
> > Cc: Rainer Orth ; gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.
> >
> > On Jul 07 2025, Robert Dubner wrote:
> >
> > > Furthermore, even if this method did work, I need the
> CXXFLAGS_FOR_COBOL
> > > options to appear at the end of the list, not the beginning, since
> they
> > > may need to override what came before.
> >
> > Why can't you use CXXFLAGS?
>
> Because CXXFLAGS applies to everything.
>
> Not long ago, we added code using std::size() to one of the gcc/cobol
> files.  Jim and I both use GCC-11, which in its default configuration
> provides C++17 features. But somebody compiling using GCC-9 reported that
> it led to a failure.
>
> So, in an attempt to find such things before committing them to
> gcc/master, in my development environment I tried
>
> CXXFLAGS="-std-c++14" ../configure ...
>
> That leads to a build failure of libcody, with the message to the effect
> of "libcody requires C++11".

Still CXXFLAGS is the correct thing to do.  And to fix that libcody
requirement, of course.  By simply dropping NMS_CXX_11 I'd say.

> I have been unable to find any method of doing a complete build while
> setting compilation options that apply only to the gcc/cobol files. So, I
> did the investigation, and created the patch.
>
> It operates as do some other flags, including CXXFLAGS:
>
> CXXFLAGS_FOR_COBOL=xxx ..configure ...
>
> results in gcc/Makefile and gcc/cobol/Makefile that uses xxx for files in
> gcc/cobol, and for no other source code trees, in all subsequent builds
> unless overridden with
>
> make CXXFLAGS_FOR_COBOL=yyy
>
> That's what we want.  I have found no other way of doing it.
>
> >
> > --
> > Andreas Schwab, SUSE Labs, sch...@suse.de
> > GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
> > "And now for something completely different."

Re: [PATCH] riscv: allow zero in zacas subword atomic cas

2025-07-08 Thread Andreas Schwab

Please fix your git to remove the subject prefix.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash structs and members for P1901R2

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 12:54, Paul Keir  wrote:
>
> Let me know if this needs a refresh.

The patch fails to apply:

error: patch failed: libstdc++-v3/include/bits/shared_ptr_base.h:1715
error: libstdc++-v3/include/bits/shared_ptr_base.h: patch does not apply

but I think it's your mail client munging whitespace, not something
that can be fixed by rebasing on trunk.
I'll figure it out and apply it by hand.


>
> 
> From: Paul Keir 
> Sent: 06 June 2025 5:32 PM
> To: Jonathan Wakely
> Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org
> Subject: Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash 
> structs and members for P1901R2
>
> No problem. That should be it included below. Github diff for convenience: 
> https://github.com/gcc-mirror/gcc/compare/e37eb85...pkeir:gcc:1b7c7c1a
>
> Signed-off-by: Paul Keir 
>
> Tested on x86_64-linux.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/shared_ptr.h: Added owner_equal and owner_hash members 
> to shared_ptr and weak_ptr.
> * include/bits/shared_ptr_base.h: Added owner_equal and owner_hash 
> structs.
> * include/bits/version.def: Added __cpp_lib_smart_ptr_owner_equality 
> feature macro.
> * include/bits/version.h: Update generated for 
> __cpp_lib_smart_ptr_owner_equality feature macro.
> * include/std/memory: Added define for 
> __glibcxx_want_smart_ptr_owner_equality.
> * testsuite/20_util/owner_equal/version.cc: New test.
> * testsuite/20_util/owner_equal/cmp.cc: New test.
> * testsuite/20_util/owner_equal/noexcept.cc: New test.
> * testsuite/20_util/owner_hash/cmp.cc: New test.
> * testsuite/20_util/owner_hash/noexcept.cc: New test.
> * testsuite/20_util/shared_ptr/observers/owner_equal.cc: New test.
> * testsuite/20_util/shared_ptr/observers/owner_hash.cc: New test.
> * testsuite/20_util/weak_ptr/observers/owner_equal.cc: New test.
> * testsuite/20_util/weak_ptr/observers/owner_hash.cc: New test.
>
> ---
>
>  include/bits/shared_ptr.h  |  57 +++
>  include/bits/shared_ptr_base.h |  40 
>  include/bits/version.def   |   9 ++
>  include/bits/version.h |  10 ++
>  include/std/memory |   1 +
>  testsuite/20_util/owner_equal/cmp.cc   | 105 
> +
>  testsuite/20_util/owner_equal/noexcept.cc  |  30 ++
>  testsuite/20_util/owner_equal/version.cc   |  13 +++
>  testsuite/20_util/owner_hash/cmp.cc|  87 +
>  testsuite/20_util/owner_hash/noexcept.cc   |  16 
>  .../20_util/shared_ptr/observers/owner_equal.cc|  74 +++
>  .../20_util/shared_ptr/observers/owner_hash.cc |  71 ++
>  .../20_util/weak_ptr/observers/owner_equal.cc  |  52 ++
>  testsuite/20_util/weak_ptr/observers/owner_hash.cc |  50 ++
>  14 files changed, 615 insertions(+)
>
> diff --git a/libstdc++-v3/include/bits/shared_ptr.h 
> b/libstdc++-v3/include/bits/shared_ptr.h
> index a196a0f1212..dd02ab16e59 100644
> --- a/libstdc++-v3/include/bits/shared_ptr.h
> +++ b/libstdc++-v3/include/bits/shared_ptr.h
> @@ -909,6 +909,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  : public _Sp_owner_less, shared_ptr<_Tp>>
>  { };
>
> +#ifdef __glibcxx_smart_ptr_owner_equality // >= C++26
> +
> +  /**
> +   * @brief Provides ownership-based hashing.
> +   * @headerfile memory
> +   * @since C++26
> +   */
> +  struct owner_hash
> +  {
> +template
> +size_t operator()(const shared_ptr<_Tp>& __s) const noexcept
> +{ return __s.owner_hash(); }
> +
> +template
> +size_t operator()(const weak_ptr<_Tp>& __s) const noexcept
> +{ return __s.owner_hash(); }
> +
> +using is_transparent = void;
> +  };
> +
> +  /**
> +   * @brief Provides ownership-based mixed equality comparisons of
> +   *shared and weak pointers.
> +   * @headerfile memory
> +   * @since C++26
> +   */
> +  struct owner_equal
> +  {
> +template
> +bool
> +operator()(const shared_ptr<_Tp1>& __lhs,
> +   const shared_ptr<_Tp2>& __rhs) const noexcept
> +{ return __lhs.owner_equal(__rhs); }
> +
> +template
> +bool
> +operator()(const shared_ptr<_Tp1>& __lhs,
> +   const   weak_ptr<_Tp2>& __rhs) const noexcept
> +{ return __lhs.owner_equal(__rhs); }
> +
> +template
> +bool
> +operator()(const   weak_ptr<_Tp1>& __lhs,
> +   const shared_ptr<_Tp2>& __rhs) const noexcept
> +{ return __lhs.owner_equal(__rhs); }
> +
> +template
> +bool
> +operator()(const weak_ptr<_Tp1>& __lhs,
> +   const weak_ptr<_Tp2>& __rhs)   const noexcept
> +{ return __lhs.owner_equal(__rhs); }
> +
> +using is_transparent = void;
> +  };
> +
> +#endif
> +
>/**
> * @brief Base class allowing use

Re: [PATCH v2] Handle non default git prefix configurations

2025-07-08 Thread Alexander Monakov



On Mon, 7 Jul 2025, Pierre-Emmanuel Patry wrote:

> Mklog parses the diff content from prepare-commit-msg hook but fails
> when git has been configured with mnemonicPrefix. Forcing the default
> values for the prefixes would set a distinct diff configuration supported
> by mklog and prevent most failures.
> 
> contrib/ChangeLog:
> 
>   * prepare-commit-msg: Force default git prefixes.
> 
> Signed-off-by: Pierre-Emmanuel Patry 

Pushed to trunk.
Alexander

> ---
>  contrib/prepare-commit-msg | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/contrib/prepare-commit-msg b/contrib/prepare-commit-msg
> index 1b878772dcc..75d102559c7 100755
> --- a/contrib/prepare-commit-msg
> +++ b/contrib/prepare-commit-msg
> @@ -78,4 +78,4 @@ else
>  tee="cat"
>  fi
>  
> -git $cmd | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"
> +git $cmd --default-prefix | $tee | git gcc-mklog -c "$COMMIT_MSG_FILE"
>

[PATCH v5 2/2] gimple-fold: extend vector simplification to match scalar bitwise optimizations [PR119196]

2025-07-08 Thread Icen Zeyada

Generalize existing scalar gimple_fold rules to apply the same
bitwise comparison simplifications to vector types.  Previously, an
expression like

(x < y) && (x > y)

would fold to `false` if x and y are scalars, but equivalent vector
comparisons were left untouched.  This patch enables folding of
patterns of the form

(cmp x y) bit_and (cmp x y)
(cmp x y) bit_ior (cmp x y)
(cmp x y) bit_xor (cmp x y)

for vector operands as well, ensuring consistent optimization across
all data types.

PR tree-optimization/119196

gcc/ChangeLog:

  * match.pd: Allow scalar optimizations with bitwise AND/OR/XOR to apply 
to vectors.

gcc/testsuite/ChangeLog:

  * gcc.target/aarch64/vector-compare-5.c: Add new test for vector compare 
simplification.

Signed-off-by: Icen Zeyada 
---
 gcc/match.pd  | 57 +---
 .../gcc.target/aarch64/vector-compare-5.c | 67 +++
 2 files changed, 113 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 99d218ea048..67b33eee5f7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3716,6 +3716,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if ((TREE_CODE (@1) == INTEGER_CST
 && TREE_CODE (@2) == INTEGER_CST)
|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
+   || (VECTOR_TYPE_P (TREE_TYPE (@1))
+   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
 || POINTER_TYPE_P (TREE_TYPE (@1)))
&& bitwise_equal_p (@1, @2)))
 (with
@@ -3754,27 +3756,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (code1 == EQ_EXPR && val) @3)
   (if (code1 == EQ_EXPR && !val) { constant_boolean_node (false, type); })
   (if (code1 == NE_EXPR && !val && allbits) @4)
-  (if (code1 == NE_EXPR
+  (if ((code1 == NE_EXPR
&& code2 == GE_EXPR
   && cmp == 0
   && allbits)
+  && ((VECTOR_BOOLEAN_TYPE_P (type)
+  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GT_EXPR))
+  || !VECTOR_TYPE_P (TREE_TYPE (@1
(gt @c0 (convert @1)))
-  (if (code1 == NE_EXPR
+  (if ((code1 == NE_EXPR
&& code2 == LE_EXPR
   && cmp == 0
   && allbits)
+  && ((VECTOR_BOOLEAN_TYPE_P (type)
+  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR))
+  || !VECTOR_TYPE_P (TREE_TYPE (@1
(lt @c0 (convert @1)))
   /* (a != (b+1)) & (a > b) -> a > (b+1) */
-  (if (code1 == NE_EXPR
+  (if ((code1 == NE_EXPR
&& code2 == GT_EXPR
   && one_after
   && allbits)
+  && ((VECTOR_BOOLEAN_TYPE_P (type)
+  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GT_EXPR))
+  || !VECTOR_TYPE_P (TREE_TYPE (@1
(gt @c0 (convert @1)))
   /* (a != (b-1)) & (a < b) -> a < (b-1) */
-  (if (code1 == NE_EXPR
+  (if ((code1 == NE_EXPR
&& code2 == LT_EXPR
   && one_before
   && allbits)
+  && ((VECTOR_BOOLEAN_TYPE_P (type)
+  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR))
+  || !VECTOR_TYPE_P (TREE_TYPE (@1
(lt @c0 (convert @1)))
  )
 )
@@ -3793,6 +3807,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if ((TREE_CODE (@1) == INTEGER_CST
&& TREE_CODE (@2) == INTEGER_CST)
|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
+   || (VECTOR_TYPE_P (TREE_TYPE (@1))
+   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
|| POINTER_TYPE_P (TREE_TYPE (@1)))
   && operand_equal_p (@1, @2)))
(with
@@ -3843,6 +3859,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if ((TREE_CODE (@1) == INTEGER_CST
 && TREE_CODE (@2) == INTEGER_CST)
|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
+   || (VECTOR_TYPE_P (TREE_TYPE (@1)))
|| POINTER_TYPE_P (TREE_TYPE (@1)))
&& bitwise_equal_p (@1, @2)))
 (with
@@ -3884,24 +3901,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (code1 == EQ_EXPR
&& code2 == GT_EXPR
   && cmp == 0
-  && allbits)
+  && allbits
+  && ((VECTOR_BOOLEAN_TYPE_P (type)
+  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GE_EXPR))
+  || !VECTOR_TYPE_P (TREE_TYPE (@1
(ge @c0 @2))
   (if (code1 == EQ_EXPR
&& code2 == LT_EXPR
   && cmp == 0
-  && allbits)
+  && allbits
+  && ((VECTOR_BOOLEAN_TYPE_P (type)
+  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LE_EXPR))
+  || !VECTOR_TYPE_P (TREE_TYPE (@1
(le @c0 @2))
   /* (a == (b-1)) | (a >= b) -> a >= (b-1) */
   (if (code1 == EQ_EXPR
&& code2 == GE_EXPR
   && one_before
-  && allbits)
+  && allbits
+  && ((VECTOR_BOOLEAN_TYPE_P (type)
+  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GE_EXPR))
+  || !VECTOR_TYPE_P (TREE_TYPE (@1
(ge @c0 (convert @1)

[PATCH v5 0/2] tree-optimization: extend scalar comparison folding to vectors [PR119196]

2025-07-08 Thread Icen Zeyada



New in V5:
Added a check to confirm that the target is supported.

---

This patch generalizes existing scalar bitwise comparison simplifications
to vector types by matching patterns of the form

```
(cmp x y) bit_and (cmp x y)
(cmp x y) bit_ior (cmp x y)
(cmp x y) bit_xor (cmp x y)
```


Icen Zeyada (2):
  tree-simplify: unify simple_comparison ops in vec_cond for bit
and/or/xor [PR119196]
  gimple-fold: extend vector simplification to match scalar bitwise
optimizations [PR119196]

 gcc/match.pd  | 66 +++---
 .../gcc.target/aarch64/vector-compare-5.c | 67 +++
 2 files changed, 122 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c

-- 
2.43.0

[PATCH v5 1/2] tree-simplify: unify simple_comparison ops in vec_cond for bit and/or/xor [PR119196]

2025-07-08 Thread Icen Zeyada

Merge simple_comparison patterns under a single vec_cond_expr for bit_and,
bit_ior, and bit_xor in the simplify pass.

Ensure that when both operands of a bit_and, bit_or, or bit_xor are 
simple_comparison
results, they reside within the same vec_cond_expr rather than separate ones.
This prepares the AST so that subsequent transformations (e.g., folding the
comparisons if possible) can take effect.

PR tree-optimization/119196

gcc/ChangeLog:

* match.pd: Merge multiple vec_cond_expr in a single one for
  bit_and, bit_ior and bit_xor.

Signed-off-by: Icen Zeyada 
---
 gcc/match.pd | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index ec2f5603d9c..99d218ea048 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5981,6 +5981,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && !expand_vec_cond_expr_p (TREE_TYPE (@1), TREE_TYPE (@0)
(vec_cond @0 (op! @1 @3) (op! @2 @4
 
+/* (@0 ? @2 : @3) lop (@1 ? @2 : @3)  -->  (@0 lop @1) ? @2 : @3.  */
+(for lop (bit_and bit_ior bit_xor)
+   (simplify
+   (lop
+  (vec_cond @0 integer_minus_onep@2 integer_zerop@3)
+  (vec_cond @1 @2 @3))
+  (if (target_supports_op_p (TREE_TYPE (@0), lop, optab_vector))
+   (vec_cond (lop @0 @1) @2 @3
+
 /* (c ? a : b) op d  -->  c ? (a op d) : (b op d) */
  (simplify
   (op (vec_cond:s @0 @1 @2) @3)
-- 
2.43.0

Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash structs and members for P1901R2

2025-07-08 Thread Paul Keir

Let me know if this needs a refresh.


From: Paul Keir 
Sent: 06 June 2025 5:32 PM
To: Jonathan Wakely
Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org
Subject: Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash 
structs and members for P1901R2

No problem. That should be it included below. Github diff for convenience: 
https://github.com/gcc-mirror/gcc/compare/e37eb85...pkeir:gcc:1b7c7c1a

Signed-off-by: Paul Keir 

Tested on x86_64-linux.

libstdc++-v3/ChangeLog:

* include/bits/shared_ptr.h: Added owner_equal and owner_hash members 
to shared_ptr and weak_ptr.
* include/bits/shared_ptr_base.h: Added owner_equal and owner_hash 
structs.
* include/bits/version.def: Added __cpp_lib_smart_ptr_owner_equality 
feature macro.
* include/bits/version.h: Update generated for 
__cpp_lib_smart_ptr_owner_equality feature macro.
* include/std/memory: Added define for 
__glibcxx_want_smart_ptr_owner_equality.
* testsuite/20_util/owner_equal/version.cc: New test.
* testsuite/20_util/owner_equal/cmp.cc: New test.
* testsuite/20_util/owner_equal/noexcept.cc: New test.
* testsuite/20_util/owner_hash/cmp.cc: New test.
* testsuite/20_util/owner_hash/noexcept.cc: New test.
* testsuite/20_util/shared_ptr/observers/owner_equal.cc: New test.
* testsuite/20_util/shared_ptr/observers/owner_hash.cc: New test.
* testsuite/20_util/weak_ptr/observers/owner_equal.cc: New test.
* testsuite/20_util/weak_ptr/observers/owner_hash.cc: New test.

---

 include/bits/shared_ptr.h  |  57 +++
 include/bits/shared_ptr_base.h |  40 
 include/bits/version.def   |   9 ++
 include/bits/version.h |  10 ++
 include/std/memory |   1 +
 testsuite/20_util/owner_equal/cmp.cc   | 105 +
 testsuite/20_util/owner_equal/noexcept.cc  |  30 ++
 testsuite/20_util/owner_equal/version.cc   |  13 +++
 testsuite/20_util/owner_hash/cmp.cc|  87 +
 testsuite/20_util/owner_hash/noexcept.cc   |  16 
 .../20_util/shared_ptr/observers/owner_equal.cc|  74 +++
 .../20_util/shared_ptr/observers/owner_hash.cc |  71 ++
 .../20_util/weak_ptr/observers/owner_equal.cc  |  52 ++
 testsuite/20_util/weak_ptr/observers/owner_hash.cc |  50 ++
 14 files changed, 615 insertions(+)

diff --git a/libstdc++-v3/include/bits/shared_ptr.h 
b/libstdc++-v3/include/bits/shared_ptr.h
index a196a0f1212..dd02ab16e59 100644
--- a/libstdc++-v3/include/bits/shared_ptr.h
+++ b/libstdc++-v3/include/bits/shared_ptr.h
@@ -909,6 +909,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : public _Sp_owner_less, shared_ptr<_Tp>>
 { };

+#ifdef __glibcxx_smart_ptr_owner_equality // >= C++26
+
+  /**
+   * @brief Provides ownership-based hashing.
+   * @headerfile memory
+   * @since C++26
+   */
+  struct owner_hash
+  {
+template
+size_t operator()(const shared_ptr<_Tp>& __s) const noexcept
+{ return __s.owner_hash(); }
+
+template
+size_t operator()(const weak_ptr<_Tp>& __s) const noexcept
+{ return __s.owner_hash(); }
+
+using is_transparent = void;
+  };
+
+  /**
+   * @brief Provides ownership-based mixed equality comparisons of
+   *shared and weak pointers.
+   * @headerfile memory
+   * @since C++26
+   */
+  struct owner_equal
+  {
+template
+bool
+operator()(const shared_ptr<_Tp1>& __lhs,
+   const shared_ptr<_Tp2>& __rhs) const noexcept
+{ return __lhs.owner_equal(__rhs); }
+
+template
+bool
+operator()(const shared_ptr<_Tp1>& __lhs,
+   const   weak_ptr<_Tp2>& __rhs) const noexcept
+{ return __lhs.owner_equal(__rhs); }
+
+template
+bool
+operator()(const   weak_ptr<_Tp1>& __lhs,
+   const shared_ptr<_Tp2>& __rhs) const noexcept
+{ return __lhs.owner_equal(__rhs); }
+
+template
+bool
+operator()(const weak_ptr<_Tp1>& __lhs,
+   const weak_ptr<_Tp2>& __rhs)   const noexcept
+{ return __lhs.owner_equal(__rhs); }
+
+using is_transparent = void;
+  };
+
+#endif
+
   /**
* @brief Base class allowing use of the member function `shared_from_this`.
* @headerfile memory
diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index b4be1b49e4d..f820d31e56b 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -1122,6 +1122,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   _M_less(const __weak_count<_Lp>& __rhs) const noexcept
   { return std::less<_Sp_counted_base<_Lp>*>()(this->_M_pi, __rhs._M_pi); }

+#ifdef __glibcxx_smart_ptr_owner_equality // >= C++26
+  size_t
+  _M_owner_hash() const noexce

Re: [PATCH] Check backend when setting DECL_ALIGN for PARM_DECL

2025-07-08 Thread Jason Merrill


On 7/8/25 4:35 AM, Richard Biener wrote:

On Mon, Jul 7, 2025 at 11:33 PM H.J. Lu  wrote:


On Tue, Jul 8, 2025 at 5:02 AM H.J. Lu  wrote:


On Mon, Jul 7, 2025 at 11:08 PM Jason Merrill  wrote:


On 7/1/25 5:36 PM, H.J. Lu wrote:

On Tue, Jul 1, 2025 at 9:37 PM Jason Merrill  wrote:


On 6/30/25 7:03 PM, H.J. Lu wrote:

On Mon, Jun 30, 2025 at 10:36 PM Jason Merrill  wrote:


On 6/28/25 7:00 AM, H.J. Lu wrote:

Since a backend may ignore user type alignment for arguments passed on
stack, check backend for argument alignment on stack when evaluating
__alignof.


I assume that's reflected in DECL_ALIGN, so could we just add PARM_DECL to


No.  targetm.calls.function_arg_boundary may have special handling for it.


Why wouldn't we adjust DECL_ALIGN of the PARM_DECL to reflect the actual
alignment of the argument?  Are you saying it could be different from
one call to another?


Function argument alignment is different from other places in memory if
the main variant type alignment is different:


Yes, I understand that function parameter alignment can be different
from other objects of that type.

But since we have a PARM_DECL to represent that particular function
parameter, it seems natural to represent that difference in the
DECL_ALIGN of the PARM_DECL.  If you don't, its DECL_ALIGN is wrong.



__alignof returns TYPE_ALIGN, not DECL_ALIGN.  For PARM_DECL,
TYPE_ALIGN may not be the same as DECL_ALIGN.



How about this patch?



@@ -4097,6 +4097,9 @@ c_alignof_expr (location_t loc, tree expr)
}
   return c_alignof (loc, TREE_TYPE (TREE_TYPE (best)));
 }
+  /* For PARM_DECL, DECL_ALIGN may be different from TYPE_ALIGN.  */
+  else if (TREE_CODE (expr) == PARM_DECL)
+return size_int (DECL_ALIGN (expr) / BITS_PER_UNIT);


I was suggesting that you could add PARM_DECL to this case at the top of 
the function:



  else if (VAR_OR_FUNCTION_DECL_P (expr))
t = size_int (DECL_ALIGN_UNIT (expr));



Since a backend may ignore type alignment for arguments passed on stack,
call targetm.calls.function_arg_boundary to set DECL_ALIGN for PARM_DECL
and change __alignof to return DECL_ALIGN, instead of TYPE_ALIGN, for
PARM_DECL.


I don't think this will work out correctness-wise.  You'd have to patch up
all places.  Also we might turn a reference to the PARM_DECL into
a dereference of its address.  So we rely on the fact that TYPE_ALIGN
is always more conservative than DECL_ALIGN which is not the case
you are caring about.


Hmm, but that's not always true with attribute packed?  Though that only 
applies to fields, so I suppose extending DECL_PACKED handling to 
PARM_DECL might be more work than copying the underaligned passed 
argument into a local variable as you suggest.



So no, I don't think this is good design.  Get the missed copy working instead.

Richard.


gcc/

PR target/120839
* stor-layout.cc (do_type_align): Call
targetm.calls.function_arg_boundary to set DECL_ALIGN for
PARM_DECL.

gcc/c-family/

PR target/120839
* c-common.cc (c_alignof_expr): Return DECL_ALIGN for PARM_DECL.

gcc/testsuite/

PR target/120839
* gcc.target/i386/pr120839-1.c: New test.
* gcc.target/i386/pr120839-2.c: Likewise.

--
H.J.

Re: [PATCH] libstdc++: Ensure pool resources meet alignment requirements [PR118681]

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 14:12, Tomasz Kaminski  wrote:
>
>
>
> On Tue, Jul 8, 2025 at 2:48 PM Jonathan Wakely  wrote:
>>
>> On Tue, 8 Jul 2025 at 13:31, Tomasz Kaminski  wrote:
>> >
>> >
>> >
>> > On Sat, Jul 5, 2025 at 1:12 AM Jonathan Wakely  wrote:
>> >>
>> >> For allocations with size > alignment and size % alignment != 0 we were
>> >> sometimes returning pointers that did not meet the requested aligment.
>> >> For example, allocate(24, 16) would select the pool for 24-byte objects
>> >> and the second allocation from that pool (at offset 24 bytes into the
>> >> pool) is only 8-byte aligned not 16-byte aligned.
>> >>
>> >> The pool resources need to round up the requested allocation size to a
>> >> multiple of the alignment, so that the selected pool will always return
>> >> allocations that meet the alignment requirement.
>> >>
>> >> libstdc++-v3/ChangeLog:
>> >>
>> >> PR libstdc++/118681
>> >> * src/c++17/memory_resource.cc (choose_block_size): New
>> >> function.
>> >> (synchronized_pool_resource::do_allocate): Use choose_block_size
>> >> to determine appropriate block size.
>> >> (synchronized_pool_resource::do_deallocate): Likewise
>> >> (unsynchronized_pool_resource::do_allocate): Likewise.
>> >> (unsynchronized_pool_resource::do_deallocate): Likewise
>> >> * testsuite/20_util/synchronized_pool_resource/118681.cc: New
>> >> test.
>> >> * testsuite/20_util/unsynchronized_pool_resource/118681.cc: New
>> >> test.
>> >> ---
>> >>
>> >> Tested x86_64-linux.
>> >
>> > I have expressed my preference of doing this check inside the 
>> > memory_resouce::allocate.
>>
>> We can still do that separately. The point of this patch is to fix the
>> misaligned allocations from the pool resources, not improve the EB vs
>> UB position for the memory_resource::allocate API.
>
> Because do_allocate is not publicly accessible, and can be called only via 
> allocate, changing
> memory_resource::allocate API will also fix the misaligned allocations from 
> pool resources.
>
> And I believe,  moving the check between specific pool do_allocate to 
> memory_resource::allocate,
> would be ABI break, as we may end up with binary that contains:
>   * memory_resource::allocate without check (check is in Derived::do_allocate)
>   * Derived::do_allocate without check (check is memory_resource::allocate)
> (Let me know if I misunderstood how this kind of ABI breaks work).

You understand correctly, but there is no ABI break. Firstly, it only
affects programs that pass invalid alignments, which have UB already.
Secondly, there are already objects out there in the world compiled
without a check in memory_resource::allocate and we can't make them
start enforcing valid alignments now.

So it's only a problem for programs with UB, and the problem already
exists even if we change memory_resource::allocate now.

But it's still a separate issue to this bug fix :-)


>>
>>
>> > I know that this throws any potential detection of misaligned resources 
>> > under the bus,
>> > however I think there is more benefit of not exposing custom resources to 
>> > strange alignment values,
>> > and using the wrong pool.
>> >
>> > On the standard level, I think we should make behavior erroneous here, 
>> > with defined behavior being bit_ceil.
>>
>> Yeah, I think that's a good idea.
>>
>>
>> >>
>> >>
>> >>  libstdc++-v3/src/c++17/memory_resource.cc | 26 +++--
>> >>  .../synchronized_pool_resource/118681.cc  |  5 ++
>> >>  .../unsynchronized_pool_resource/118681.cc| 58 +++
>> >>  3 files changed, 85 insertions(+), 4 deletions(-)
>> >>  create mode 100644 
>> >> libstdc++-v3/testsuite/20_util/synchronized_pool_resource/118681.cc
>> >>  create mode 100644 
>> >> libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
>> >>
>> >> diff --git a/libstdc++-v3/src/c++17/memory_resource.cc 
>> >> b/libstdc++-v3/src/c++17/memory_resource.cc
>> >> index fac4c782c5f7..fddfe2c7dd98 100644
>> >> --- a/libstdc++-v3/src/c++17/memory_resource.cc
>> >> +++ b/libstdc++-v3/src/c++17/memory_resource.cc
>> >> @@ -1242,12 +1242,30 @@ namespace pmr
>> >>  return pools;
>> >>}
>> >>
>> >> +  static inline size_t
>> >> +  choose_block_size(size_t bytes, size_t alignment)
>> >> +  {
>> >> +if (bytes == 0) [[unlikely]]
>> >> +  return alignment;
>> >> +
>> >> +// Use bit_ceil in case alignment is invalid (i.e. not a power of 
>> >> two).
>> >> +size_t mask = std::__bit_ceil(alignment) - 1;
>> >> +// Round up to a multiple of alignment.
>> >> +size_t block_size = (bytes + mask) & ~mask;
>> >> +
>> >> +if (block_size >= bytes) [[likely]]
>> >> +  return block_size;
>> >> +
>> >> +// Wrapped around to zero, bytes must have been impossibly large.
>> >> +return numeric_limits::max();
>> >> +  }
>> >> +
>> >>// Override for memory_resource::do_allocate
>> >>void*
>> >>synchronized_pool_res

Re: [PATCH] libstdc++: Make debug iterator pointer sequence const [PR116369]

2025-07-08 Thread François Dumont


Now fixed as trivial, I set you as author.

François

On 08/07/2025 13:55, Jonathan Wakely wrote:

On Mon, 7 Jul 2025 at 11:12, Jonathan Wakely  wrote:

On Sat, 5 Jul 2025 at 14:03, François Dumont  wrote:

On 01/07/2025 22:51, Jonathan Wakely wrote:

On Mon, 16 Jun 2025 at 18:36, François Dumont  wrote:

I eventually wonder if it is such a big deal to add the new symbols for 
_GLIBCXX_DEBUG mode.

I like this version much more than the one trying to duplicate symbols with asm.



Here is the patch doing this. It avoids to add many const_cast which is what we 
are trying to achieve here.

I'm still not really sure if this is worth it though - is it fixing a
bug or a correctness problem? (using const_cast is safe if the objects
aren't actually const)

All the new tests already pass, even without this patch. Are these
just tests for const member functions that we aren't currently testing
at all?

Those tests are showing the same UB that you fixed as part of your
PR116369 patch but this time with local_iterator. Even if tests are
passing without this patch it's still UB before it, do you prefer to
remove those tests then ?

Ah OK, so they are showing UB ... it's just that the compiler doesn't
actually complain about it.

Please make the const containers in those tests global variables,
instead of local variables inside main(). The compiler won't put local
variables in ROM so the test would never fail. It might put globals in
ROM (although not after your patch, because of the mutable members,
which is why the patch is actually fixing something).



Globally this patch is following your recommendations on PR116369 commit
where you were saying:

  Ideally we would not need the const_cast at all. Instead, the _M_attach
  member (and everything it calls) should be const-qualified. That would
  work fine now, because the members that it ends up modifying are
  mutable. Making that change would require a number of new exports from
  the shared library, and would require retaining the old non-const
member
  functions (maybe as symbol aliases) for backwards compatibility. That
  might be worth changing at some point, but isn't done here.

In addition to what is said here I made the sequence pointer const too
as the added mutable allows that.

It was also the occasion to fix some types used in std::forward_list in
Debug mode.

Do you think it is useless eventually ?

I think it's worth doing, I was just concerned about the __asm__
solution used in the initial patches.

OK for trunk with the adjusted tests, thanks.


I see a large number of new test failures. It looks like this fix is needed:

--- a/libstdc++-v3/include/debug/forward_list
+++ b/libstdc++-v3/include/debug/forward_list
@@ -144,13 +144,13 @@ namespace __gnu_debug
   //std::swap(_M_this()->_M_version, __other._M_version);
   _Safe_iterator_base* __this_its = _M_this()->_M_iterators;
   _S_swap_aux(__other, __other._M_iterators,
- _M_this(), _M_this()->_M_iterators);
+  *_M_this(), _M_this()->_M_iterators);
   _Safe_iterator_base* __this_const_its = _M_this()->_M_const_iterators;
   _S_swap_aux(__other, __other._M_const_iterators,
- _M_this(), _M_this()->_M_const_iterators);
-  _S_swap_aux(_M_this(), __this_its,
+  *_M_this(), _M_this()->_M_const_iterators);
+  _S_swap_aux(*_M_this(), __this_its,
  __other, __other._M_iterators);
-  _S_swap_aux(_M_this(), __this_const_its,
+  _S_swap_aux(*_M_this(), __this_const_its,
  __other, __other._M_const_iterators);
 }

Re: [PATCH v2] xtensa: Fix B[GE/LT]UI instructions with immediate values of 32768 or 65536 not being emitted

2025-07-08 Thread Max Filippov

On Mon, Jul 7, 2025 at 7:40 AM Takayuki 'January June' Suwa
 wrote:
>
> This is because in canonicalize_comparison() in gcc/expmed.cc, the COMPARE
> rtx_cost() for the immediate values in the title does not change between
> the old and new versions.  This patch fixes that.
>
> (note: Currently, this patch only works if some constant propagation
> optimizations are enabled (-O2 or higher) or if bare large constant
> assignments are possible (-mconst16 or -mauto-litpools).  In the future
> I hope to make it work at -O1...)
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (xtensa_b4const_or_zero):
> Remove.
> (xtensa_b4const): Add a case where the value is 0, and rename
> to xtensa_b4const_or_zero.
> (xtensa_rtx_costs): Fix to also consider the result of
> xtensa_b4constu().
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c: New.
> ---
>   gcc/config/xtensa/xtensa.cc   | 17 +
>   .../gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c   | 19 +++
>   2 files changed, 24 insertions(+), 12 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/xtensa/BGEUI-BLTUI-32k-64k.c

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.

-- 
Thanks.
-- Max

Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash structs and members for P1901R2

2025-07-08 Thread Paul Keir

Thanks Jonathan.



From: Jonathan Wakely 
Sent: 08 July 2025 1:37 PM
To: Paul Keir
Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org
Subject: Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash 
structs and members for P1901R2



Warning: Do not open attachments or click on links unless you trust the sender



On Tue, 8 Jul 2025 at 13:24, Jonathan Wakely  wrote:
>
> On Tue, 8 Jul 2025 at 12:54, Paul Keir  wrote:
> >
> > Let me know if this needs a refresh.
>
> The patch fails to apply:
>
> error: patch failed: libstdc++-v3/include/bits/shared_ptr_base.h:1715
> error: libstdc++-v3/include/bits/shared_ptr_base.h: patch does not apply
>
> but I think it's your mail client munging whitespace, not something
> that can be fixed by rebasing on trunk.
> I'll figure it out and apply it by hand.

OK, I added your github fork and did a merge --squash from there


>
>
> >
> > 
> > From: Paul Keir 
> > Sent: 06 June 2025 5:32 PM
> > To: Jonathan Wakely
> > Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org
> > Subject: Re: [PATCH] libstdc++: Add smart ptr owner_equals and owner_hash 
> > structs and members for P1901R2
> >
> > No problem. That should be it included below. Github diff for convenience: 
> > https://github.com/gcc-mirror/gcc/compare/e37eb85...pkeir:gcc:1b7c7c1a
> >
> > Signed-off-by: Paul Keir 
> >
> > Tested on x86_64-linux.
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/shared_ptr.h: Added owner_equal and owner_hash 
> > members to shared_ptr and weak_ptr.
> > * include/bits/shared_ptr_base.h: Added owner_equal and owner_hash 
> > structs.
> > * include/bits/version.def: Added 
> > __cpp_lib_smart_ptr_owner_equality feature macro.
> > * include/bits/version.h: Update generated for 
> > __cpp_lib_smart_ptr_owner_equality feature macro.
> > * include/std/memory: Added define for 
> > __glibcxx_want_smart_ptr_owner_equality.
> > * testsuite/20_util/owner_equal/version.cc: New test.
> > * testsuite/20_util/owner_equal/cmp.cc: New test.
> > * testsuite/20_util/owner_equal/noexcept.cc: New test.
> > * testsuite/20_util/owner_hash/cmp.cc: New test.
> > * testsuite/20_util/owner_hash/noexcept.cc: New test.
> > * testsuite/20_util/shared_ptr/observers/owner_equal.cc: New test.
> > * testsuite/20_util/shared_ptr/observers/owner_hash.cc: New test.
> > * testsuite/20_util/weak_ptr/observers/owner_equal.cc: New test.
> > * testsuite/20_util/weak_ptr/observers/owner_hash.cc: New test.
> >
> > ---
> >
> >  include/bits/shared_ptr.h  |  57 +++
> >  include/bits/shared_ptr_base.h |  40 
> >  include/bits/version.def   |   9 ++
> >  include/bits/version.h |  10 ++
> >  include/std/memory |   1 +
> >  testsuite/20_util/owner_equal/cmp.cc   | 105 
> > +
> >  testsuite/20_util/owner_equal/noexcept.cc  |  30 ++
> >  testsuite/20_util/owner_equal/version.cc   |  13 +++
> >  testsuite/20_util/owner_hash/cmp.cc|  87 +
> >  testsuite/20_util/owner_hash/noexcept.cc   |  16 
> >  .../20_util/shared_ptr/observers/owner_equal.cc|  74 +++
> >  .../20_util/shared_ptr/observers/owner_hash.cc |  71 ++
> >  .../20_util/weak_ptr/observers/owner_equal.cc  |  52 ++
> >  testsuite/20_util/weak_ptr/observers/owner_hash.cc |  50 ++
> >  14 files changed, 615 insertions(+)
> >
> > diff --git a/libstdc++-v3/include/bits/shared_ptr.h 
> > b/libstdc++-v3/include/bits/shared_ptr.h
> > index a196a0f1212..dd02ab16e59 100644
> > --- a/libstdc++-v3/include/bits/shared_ptr.h
> > +++ b/libstdc++-v3/include/bits/shared_ptr.h
> > @@ -909,6 +909,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  : public _Sp_owner_less, shared_ptr<_Tp>>
> >  { };
> >
> > +#ifdef __glibcxx_smart_ptr_owner_equality // >= C++26
> > +
> > +  /**
> > +   * @brief Provides ownership-based hashing.
> > +   * @headerfile memory
> > +   * @since C++26
> > +   */
> > +  struct owner_hash
> > +  {
> > +template
> > +size_t operator()(const shared_ptr<_Tp>& __s) const noexcept
> > +{ return __s.owner_hash(); }
> > +
> > +template
> > +size_t operator()(const weak_ptr<_Tp>& __s) const noexcept
> > +{ return __s.owner_hash(); }
> > +
> > +using is_transparent = void;
> > +  };
> > +
> > +  /**
> > +   * @brief Provides ownership-based mixed equality comparisons of
> > +   *shared and weak pointers.
> > +   * @headerfile memory
> > +   * @since C++26
> > +   */
> > +  struct owner_equal
> > +  {
> > +template
> > +bool
> > +operator()(const shared_ptr<_Tp1>& __lhs,
> > +   const shared_ptr<_Tp2>& __rhs) const noexcept
> > +{ re

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-08 Thread Qing Zhao

I just updated PR121000: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121000. 

Yes, the root cause is exactly what you mentioned in the other email:
“
The IL must clearly use the value (the size of the element), otherwise
DCE or other passes will happily optimize it away, they don't keep some
expression computation around just because it is referenced in
TYPE_SIZE_UNIT of some type somewhere.
“

Thanks. 

Qing

> On Jul 8, 2025, at 10:30, Qing Zhao  wrote:
> 
> Hi, 
> 
>> On Jul 8, 2025, at 01:18, Jakub Jelinek  wrote:
>>> 
   5th argument ACCESS_MODE:
-1: Unknown access semantics
 0: none
 1: read_only
 2: write_only
 3: read_write
   6th argument: A constant 0 with the pointer TYPE to the original flexible
 array type.
>>> 
>>> Likewise, wouldn't this always be TREE_TYPE(TREE_TYPE(REF_TO_OBJ))?  For a
>>> FAM, the frontend does array_to_pointer, so with the INDIRECT_REF at the end
>>> of build_access_with_size_for_counted_by gone, I reckon you should be able
>>> to get the type of the array element.  Likewise if it was a pointer and not
>>> a FAM.
>>> 
>>> TYPE_SIZE_UNIT may not work for them like you said, but there ought to be a
>>> usable expression that we can reach from the type, no?
>> 
>> No.  The IL must clearly use the value (the size of the element), otherwise
>> DCE or other passes will happily optimize it away, they don't keep some
>> expression computation around just because it is referenced in
>> TYPE_SIZE_UNIT of some type somewhere.
>> 
>> Consider e.g.
>> void bar (int *);
>> 
>> void
>> foo (int n, int m)
>> {
>> typedef int A[m];
>> struct S { int n, m; A a[2]; A b[] __attribute__((counted_by (n))); } *p;
>> p = __builtin_malloc (sizeof (struct S) + sizeof (A) * n);
>> p->n = n;
>> p->m = m;
>> int *q = &p->b[1][0];
>> bar (q);
>> q = &p->b[0][1];
>> bar (q);
>> }
>> There is a reason why e.g. COMPONENT_REF has 3 arguments rather than 2,
>> the last one used solely for the variable length structures (primarily Ada,
>> or GNU C extension like above).
> 
> Just filed PR121000: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121000
> __builtin_dynamic_object_size should work for FAM with VLA element when 
> annotated with counted_by 
> 
> Looks like that the current implementation of counted_by for FAM has a bug 
> here to handle this case.
> Will study a little bit along with your comments in the other emails.  And 
> then respond in another email.
> 
> Thanks a lot for the comments and testing case. 
> 
>> Jakub

[PATCH] c++: optional template after :: causing error [PR119838]

2025-07-08 Thread Marek Polacek

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Found while working on Reflection where we currently reject:

  constexpr auto r = ^^::template C::type;

which should work, because "::template C::" should match the

  nested-name-specifier template(opt) simple-template-id ::

production where the template is optional.  This bug is not limited
to Reflection as demonstrated by the attached test case, so I'm
submitting it separately.

The check_template_keyword_in_nested_name_spec call should ensure that
we're dealing with a template-id if we've seen "template".

PR c++/119838

gcc/cp/ChangeLog:

* parser.cc (cp_parser_nested_name_specifier_opt): New global_p
parameter.  Look for "template" when global_p is true.
(cp_parser_simple_type_specifier): Pass global_p to
cp_parser_nested_name_specifier_opt.

gcc/testsuite/ChangeLog:

* g++.dg/parse/template32.C: New test.
---
 gcc/cp/parser.cc| 32 +++--
 gcc/testsuite/g++.dg/parse/template32.C | 13 ++
 2 files changed, 33 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/parse/template32.C

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 32c6a42b31d..70c670a6f1c 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2519,7 +2519,7 @@ static cp_expr cp_parser_id_expression
 static cp_expr cp_parser_unqualified_id
   (cp_parser *, bool, bool, bool, bool);
 static tree cp_parser_nested_name_specifier_opt
-  (cp_parser *, bool, bool, bool, bool, bool = false);
+  (cp_parser *, bool, bool, bool, bool, bool = false, bool = false);
 static tree cp_parser_nested_name_specifier
   (cp_parser *, bool, bool, bool, bool);
 static tree cp_parser_qualifying_entity
@@ -7242,18 +7242,19 @@ check_template_keyword_in_nested_name_spec (tree name)
  nested-name-specifier template [opt] simple-template-id ::
 
PARSER->SCOPE should be set appropriately before this function is
-   called.  TYPENAME_KEYWORD_P is TRUE if the `typename' keyword is in
-   effect.  TYPE_P is TRUE if we non-type bindings should be ignored
-   in name lookups.
+   called.  TYPENAME_KEYWORD_P is true if the `typename' keyword is in
+   effect.  TYPE_P is true if we non-type bindings should be ignored
+   in name lookups.  TEMPLATE_KEYWORD_P is true if the `template' keyword
+   was seen.  GLOBAL_P is true if `::' has already been parsed.
 
Sets PARSER->SCOPE to the class (TYPE) or namespace
(NAMESPACE_DECL) specified by the nested-name-specifier, or leaves
it unchanged if there is no nested-name-specifier.  Returns the new
scope iff there is a nested-name-specifier, or NULL_TREE otherwise.
 
-   If CHECK_DEPENDENCY_P is FALSE, names are looked up in dependent scopes.
+   If CHECK_DEPENDENCY_P is false, names are looked up in dependent scopes.
 
-   If IS_DECLARATION is TRUE, the nested-name-specifier is known to be
+   If IS_DECLARATION is true, the nested-name-specifier is known to be
part of a declaration and/or decl-specifier.  */
 
 static tree
@@ -7262,7 +7263,8 @@ cp_parser_nested_name_specifier_opt (cp_parser *parser,
 bool check_dependency_p,
 bool type_p,
 bool is_declaration,
-bool template_keyword_p /* = false */)
+bool template_keyword_p /* = false */,
+bool global_p /* = false */)
 {
   bool success = false;
   cp_token_position start = 0;
@@ -7310,8 +7312,9 @@ cp_parser_nested_name_specifier_opt (cp_parser *parser,
 
   /* Spot cases that cannot be the beginning of a
 nested-name-specifier.  On the second and subsequent times
-through the loop, we look for the `template' keyword.  */
-  if (success && token->keyword == RID_TEMPLATE)
+(or the first, if '::' has already been parsed) through the
+loop, we look for the `template' keyword.  */
+  if ((success || global_p) && token->keyword == RID_TEMPLATE)
;
   /* A template-id can start a nested-name-specifier.  */
   else if (token->type == CPP_TEMPLATE_ID)
@@ -7359,8 +7362,11 @@ cp_parser_nested_name_specifier_opt (cp_parser *parser,
   cp_parser_parse_tentatively (parser);
 
   /* Look for the optional `template' keyword, if this isn't the
-first time through the loop.  */
-  if (success)
+first time through the loop, or if we've already parsed '::';
+this is then the
+  nested-name-specifier template [opt] simple-template-id ::
+production.  */
+  if (success || global_p)
{
  template_keyword_p = cp_parser_optional_template_keyword (parser);
  /* DR1710: "In a qualified-id used as the name in
@@ -21167,7 +21173,9 @@ cp_parser_simple_type_specifier (cp_parser* parser,
/*typen

Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

2025-07-08 Thread James K. Lowden

On Tue, 8 Jul 2025 14:11:19 -0500 (CDT)
Robert Dubner  wrote:

> But I have other requirements.  I apologize if I keep repeating
> myself, but I am being forced to.
> 
> I want to be able to, for example,
> 
>   CXXFLAGS='-ggdb -O0'
> CXXFLAGS_FOR_COBOL="-Wsomething_or_other" ../configure 
> 
> I want those warnings to apply to gcc/cobol and not to anything else.
> 
> And then, later on, I want to be able to
> 
>   make CXXFLAGS_FOR_COBOL="-Wreplacement_warnings"
> 
>  I can't do that with the CXXFLAGS= or CXX= solutions.

I think you can, though.  Richard is saying, 

$ CXX='g++ -std=c++14' ../configure

will allow a build of the whole tree, including libcody, and

$ make -C build/gcc/cobol CXXFLAGS='-Werror'

can be used to set options for a particular build. 

I haven't tried it yet, but if that works and meets your needs, then
the problem is solved.  If it doesn't work in some circumstances, that's
another issue.  

Does that not suffice?  

--jkl

[PATCH v2 3/3] middle-end/121005 Add checks for TREE_LANG_FLAG_*

2025-07-08 Thread Alex (Waffl3x)


From df25e9a27938123e4e3737f1532b402b0fd320e3 Mon Sep 17 00:00:00 2001
From: Waffl3x 
Date: Tue, 8 Jul 2025 19:52:05 -0600
Subject: [PATCH 3/3] middle-end/121005 Add checks for TREE_LANG_FLAG_*

TREE_LANG_FLAG_* is only valid for tree codes that do not use other union
fields in tree_base, previously it only checked for TREE_VEC and SSA_NAME.
This adds checks for INTEGER_CST, VECTOR_CST, POLYNOMIA_CHREC, MEM_REF,
TARGET_MEM_REF and everything between and including OMP_ATOMIC to
OMP_ATOMIC_CAPTURE_NEW.

Checking is also added to TREE_UNAVAILABLE because it didn't have any kind
of checking.  The accessors for the other flags in tree_base::u::bits
are already constrained appropriately and not modified.

	PR middle-end/121005

gcc/ChangeLog:

	PR middle-end/121005
	* tree.h (TREE_CHECK_BITS_AVAILABLE): Define.
	(TREE_UNAVAILABLE): Use TREE_CHECK_BITS_AVAILABLE.
	(TREE_LANG_FLAG_0): Use TREE_CHECK_BITS_AVAILABLE.
	(TREE_LANG_FLAG_1): Likewise.
	(TREE_LANG_FLAG_2): Likewise.
	(TREE_LANG_FLAG_3): Likewise.
	(TREE_LANG_FLAG_4): Likewise.
	(TREE_LANG_FLAG_5): Likewise.
	(TREE_LANG_FLAG_6): Likewise.

Signed-off-by: Waffl3x 
---
 gcc/tree.h | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/gcc/tree.h b/gcc/tree.h
index 289dcf320af..e7e55104649 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -571,6 +571,15 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
   TREE_CHECK6 (T, INTEGER_TYPE, ENUMERAL_TYPE, BOOLEAN_TYPE, REAL_TYPE,	\
 	   FIXED_POINT_TYPE, BITINT_TYPE)
 
+/* Check if the bits field of tree_base is available for NODE.
+   Note, CALL_EXPR also sometimes makes use of the ifn union member, it would
+   be invalid to use TREE_LANG_FLAG_* when this is the case but it can't be
+   checked for here.  */
+#define TREE_CHECK_BITS_AVAILABLE(NODE) \
+  ((TREE_NOT_CHECK7 (NODE, INTEGER_CST, TREE_VEC, VECTOR_CST, SSA_NAME, \
+		 POLYNOMIAL_CHREC, MEM_REF, TARGET_MEM_REF), void ()), \
+   TREE_NOT_RANGE_CHECK (NODE, OMP_ATOMIC, OMP_ATOMIC_CAPTURE_NEW))
+
 /* Here is how primitive or already-canonicalized types' hash codes
are made.  */
 #define TYPE_HASH(TYPE) (TYPE_UID (TYPE))
@@ -1101,7 +1110,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 /* Nonzero in a _DECL if the use of the name is defined as an
unavailable feature by __attribute__((unavailable)).  */
 #define TREE_UNAVAILABLE(NODE) \
-  ((NODE)->base.u.bits.unavailable_flag)
+  ((TREE_CHECK_BITS_AVAILABLE (NODE))->base.u.bits.unavailable_flag)
 
 /* Nonzero indicates an IDENTIFIER_NODE that names an anonymous
aggregate, (as created by anon_aggr_name_format).  */
@@ -1154,19 +1163,19 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 
 /* These flags are available for each language front end to use internally.  */
 #define TREE_LANG_FLAG_0(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_0)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_0)
 #define TREE_LANG_FLAG_1(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_1)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_1)
 #define TREE_LANG_FLAG_2(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_2)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_2)
 #define TREE_LANG_FLAG_3(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_3)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_3)
 #define TREE_LANG_FLAG_4(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_4)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_4)
 #define TREE_LANG_FLAG_5(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_5)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_5)
 #define TREE_LANG_FLAG_6(NODE) \
-  (TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_6)
+  (TREE_CHECK_BITS_AVAILABLE (NODE)->base.u.bits.lang_flag_6)
 
 /* Define additional fields and accessors for nodes representing constants.  */
 
-- 
2.49.0

Re: [PATCH v2 3/3] middle-end/121005 Add checks for TREE_LANG_FLAG_*

2025-07-08 Thread Jakub Jelinek

On Tue, Jul 08, 2025 at 11:54:23PM -0600, Alex (Waffl3x) wrote:
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -571,6 +571,15 @@ extern void omp_clause_range_check_failed (const_tree, 
> const char *, int,
>TREE_CHECK6 (T, INTEGER_TYPE, ENUMERAL_TYPE, BOOLEAN_TYPE, REAL_TYPE,  
> \
>  FIXED_POINT_TYPE, BITINT_TYPE)
>  
> +/* Check if the bits field of tree_base is available for NODE.
> +   Note, CALL_EXPR also sometimes makes use of the ifn union member, it would
> +   be invalid to use TREE_LANG_FLAG_* when this is the case but it can't be
> +   checked for here.  */
> +#define TREE_CHECK_BITS_AVAILABLE(NODE) \
> +  ((TREE_NOT_CHECK7 (NODE, INTEGER_CST, TREE_VEC, VECTOR_CST, SSA_NAME, \
> +  POLYNOMIAL_CHREC, MEM_REF, TARGET_MEM_REF), void ()), \
> +   TREE_NOT_RANGE_CHECK (NODE, OMP_ATOMIC, OMP_ATOMIC_CAPTURE_NEW))

This evaluates NODE twice rather than once.  I'm quite sure there will be
some uses of the many macros with side-effects in the arguments.
Can't you TREE_NOT_RANGE_CHECK in the first argument of TREE_NOT_CHECK7?

Also, have you looked at how code size grew with these patches and compile
time changed?  Whether the checking doesn't become way too expensive...
E.g. if it wouldn't be better to have a bool array indexed by TREE_CODE
for TREE_CHECK_BITS_AVAILABLE, initialized somewhere.  With C++14 could be
even constexpr.

Jakub

[PATCH 1/2] Change how --param fsm-scale-path-stmts works

2025-07-08 Thread Richard Biener

Currently we scale the number of stmts allowed for forward
jump threading to limit those for backwards jump threading
by applying a factor of two to the counted stmts.  That doesn't
allow fine-grained adjustments, like by a single stmt as needed
for PR109893.  The following changes the factor to be a percentage
of the forward threading number.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Any opinion on whether I should change the param name when I change
its meaning?

* params.opt (fsm-scale-path-stmts): Change to percentage.
* doc/invoke.texi (--param fsm-scale-path-stmts): Adjust.
* tree-ssa-threadbackward.cc
(back_threader_profitability::possibly_profitable_path_p):
Adjust param_fsm_scale_path_stmts uses.
(back_threader_profitability::profitable_path_p): Likewise.
---
 gcc/doc/invoke.texi|  4 ++--
 gcc/params.opt |  4 ++--
 gcc/tree-ssa-threadbackward.cc | 17 +
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 74f5ee26042..1a0a507dddc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -17462,8 +17462,8 @@ Maximum number of arrays per scop.
 Max. size of loc list for which reverse ops should be added.
 
 @item fsm-scale-path-stmts
-Scale factor to apply to the number of statements in a threading path
-crossing a loop backedge when comparing to
+Percentage of max-jump-thread-duplication-stmts to allow for the number of
+statements in a threading path crossing a loop backedge.
 @option{--param=max-jump-thread-duplication-stmts}.
 
 @item uninit-control-dep-attempts
diff --git a/gcc/params.opt b/gcc/params.opt
index 31aa0bd5753..0a1dfb46dd1 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -143,8 +143,8 @@ Common Joined UInteger Var(param_file_cache_lines) Init(0) 
Param
 Max number of lines to index into file cache. When 0 this is automatically 
sized.
 
 -param=fsm-scale-path-stmts=
-Common Joined UInteger Var(param_fsm_scale_path_stmts) Init(2) IntegerRange(1, 
10) Param Optimization
-Scale factor to apply to the number of statements in a threading path crossing 
a loop backedge when comparing to max-jump-thread-duplication-stmts.
+Common Joined UInteger Var(param_fsm_scale_path_stmts) Init(50) 
IntegerRange(1, 100) Param Optimization
+Percentage of max-jump-thread-duplication-stmts to allow for the number of 
statements in a threading path crossing a loop backedge.
 
 -param=fully-pipelined-fma=
 Common Joined UInteger Var(param_fully_pipelined_fma) Init(0) IntegerRange(0, 
1) Param Optimization
diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc
index 3adb83e9712..ce765cb5ded 100644
--- a/gcc/tree-ssa-threadbackward.cc
+++ b/gcc/tree-ssa-threadbackward.cc
@@ -739,8 +739,8 @@ back_threader_profitability::possibly_profitable_path_p
   if ((!m_threaded_multiway_branch
|| !loop->latch
|| loop->latch->index == EXIT_BLOCK)
-  && (m_n_insns * param_fsm_scale_path_stmts
- >= param_max_jump_thread_duplication_stmts))
+  && (m_n_insns * 100 >= (param_max_jump_thread_duplication_stmts
+ * param_fsm_scale_path_stmts)))
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
@@ -749,8 +749,9 @@ back_threader_profitability::possibly_profitable_path_p
   return false;
 }
   *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
-   && (m_n_insns * param_fsm_scale_path_stmts
-   >= param_max_jump_thread_duplication_stmts));
+   && (m_n_insns * 100
+   >= (param_max_jump_thread_duplication_stmts
+   * param_fsm_scale_path_stmts)));
 
   if (dump_file && (dump_flags & TDF_DETAILS))
 fputc ('\n', dump_file);
@@ -823,8 +824,8 @@ back_threader_profitability::profitable_path_p (const 
vec &m_path,
   if (!m_threaded_multiway_branch
   && *creates_irreducible_loop
   && (!(cfun->curr_properties & PROP_loop_opts_done)
- || (m_n_insns * param_fsm_scale_path_stmts
- >= param_max_jump_thread_duplication_stmts)))
+ || (m_n_insns * 100 >= (param_max_jump_thread_duplication_stmts
+ * param_fsm_scale_path_stmts
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
@@ -839,8 +840,8 @@ back_threader_profitability::profitable_path_p (const 
vec &m_path,
  case, drastically reduce the number of statements we are allowed
  to copy.  */
   if (!(m_threaded_through_latch && m_threaded_multiway_branch)
-  && (m_n_insns * param_fsm_scale_path_stmts
- >= param_max_jump_thread_duplication_stmts))
+  && (m_n_insns * 100 >= (param_max_jump_thread_duplication_stmts
+ * param_fsm_scale_path_stmts)))
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dum

[PATCH 2/2] tree-optimization/109893 - allow more backwards jump threading

2025-07-08 Thread Richard Biener

The following changes the percentage that determines how many
stmts are allowed for backwards jump threading from 50 to 54,
enabling the missed jump threading observed in PR109893.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  It seems that
at least backward threading is prone to profile mismatches, I've
altered two testcases to deal with new ones to pop up (definitely
latent issues).

OK?

PR tree-optimization/109893
* params.opt (fsm-scale-path-stmts): Change from 50 to 54.

* gcc.dg/tree-ssa/pr109893.c: New testcase.
* gcc.dg/tree-prof/cmpsf-1.c: XFAIL.
* gcc.dg/tree-ssa/pr109893.c: Remove scan on no profile
mismatches.
---
 gcc/params.opt   |  2 +-
 gcc/testsuite/gcc.dg/tree-prof/cmpsf-1.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr109893.c | 33 
 gcc/testsuite/gcc.dg/vect/vect-117.c |  2 --
 4 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109893.c

diff --git a/gcc/params.opt b/gcc/params.opt
index 0a1dfb46dd1..fed9d6e1826 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -143,7 +143,7 @@ Common Joined UInteger Var(param_file_cache_lines) Init(0) 
Param
 Max number of lines to index into file cache. When 0 this is automatically 
sized.
 
 -param=fsm-scale-path-stmts=
-Common Joined UInteger Var(param_fsm_scale_path_stmts) Init(50) 
IntegerRange(1, 100) Param Optimization
+Common Joined UInteger Var(param_fsm_scale_path_stmts) Init(54) 
IntegerRange(1, 100) Param Optimization
 Percentage of max-jump-thread-duplication-stmts to allow for the number of 
statements in a threading path crossing a loop backedge.
 
 -param=fully-pipelined-fma=
diff --git a/gcc/testsuite/gcc.dg/tree-prof/cmpsf-1.c 
b/gcc/testsuite/gcc.dg/tree-prof/cmpsf-1.c
index 537d15d4bfa..696f459e605 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/cmpsf-1.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/cmpsf-1.c
@@ -181,4 +181,4 @@ main (void)
   exit (0);
 }
 
-/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "dom2" } } */
+/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "dom2" { 
xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109893.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr109893.c
new file mode 100644
index 000..5c98664df72
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109893.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom2" } */
+
+void foo(void);
+void bar(void);
+static char a;
+static int b, e, f;
+static int *c = &b, *g;
+int main() {
+int *j = 0;
+if (a) {
+g = 0;
+if (c)
+bar();
+} else {
+j = &e;
+c = 0;
+}
+if (c == &f == b || c == &e)
+;
+else
+__builtin_unreachable();
+if (g || e) {
+if (j == &e || j == 0)
+;
+else
+foo();
+}
+a = 4;
+}
+
+/* Jump threading in thread1 should enable to elide the call to foo.  */
+/* { dg-final { scan-tree-dump-not "foo" "dom2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-117.c 
b/gcc/testsuite/gcc.dg/vect/vect-117.c
index 4755e39f951..ddbf104cfbe 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-117.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-117.c
@@ -60,5 +60,3 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 0 
"vect" } } */
-
-/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" { target { lp64 } 
} } } */
-- 
2.43.0

Re: [PATCH v4 1/2] tree-simplify: unify simple_comparison ops in vec_cond for bit and/or/xor [PR119196]

2025-07-08 Thread Richard Biener

On Thu, 3 Jul 2025, Icen Zeyada wrote:

> Merge simple_comparison patterns under a single vec_cond_expr for bit_and,
> bit_ior, and bit_xor in the simplify pass.
> 
> Ensure that when both operands of a bit_and, bit_or, or bit_xor are 
> simple_comparison
> results, they reside within the same vec_cond_expr rather than separate ones.
> This prepares the AST so that subsequent transformations (e.g., folding the
> comparisons if possible) can take effect.
> 
> PR tree-optimization/119196
> 
> gcc/ChangeLog:
> 
>   * match.pd: Merge multiple vec_cond_expr in a single one for
> bit_and, bit_ior and bit_xor.
> 
> Signed-off-by: Icen Zeyada 
> ---
>  gcc/match.pd | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index f4416d9172c..36317b9128f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5939,6 +5939,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  && !expand_vec_cond_expr_p (TREE_TYPE (@1), TREE_TYPE (@0)
> (vec_cond @0 (op! @1 @3) (op! @2 @4
>  
> +/* (@0 ? @2 : @3) lop (@1 ? @2 : @3)  -->  (@0 lop @1) ? @2 : @3.  */
> +(for lop (bit_and bit_ior bit_xor)
> +   (simplify
> +   (lop
> +  (vec_cond @0 integer_minus_onep@2 integer_zerop@3)

Why are you restricting this to integer_minus_onep/zerop?  Is
the assumption that such vec_cond is "cheap", thus we also
do not need to add :s to them?

> +  (vec_cond @1 @2 @3))
> +   (vec_cond (lop @0 @1) @2 @3)))

So this turns a logical operation on the data type to a logical
operation on the predicate type.  You need to check this
operation is supported by the target with

   target_supports_op_p (TREE_TYPE (@0), lop, optab_vector)

I think the transform is sensible for arbitrary @2/@3 though
in that case with :s on the vec_conds.

Richard.

> +
>  /* (c ? a : b) op d  -->  c ? (a op d) : (b op d) */
>   (simplify
>(op (vec_cond:s @0 @1 @2) @3)
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH v4 2/2] gimple-fold: extend vector simplification to match scalar bitwise optimizations [PR119196]

2025-07-08 Thread Richard Biener

On Thu, 3 Jul 2025, Icen Zeyada wrote:

> Generalize existing scalar gimple_fold rules to apply the same
> bitwise comparison simplifications to vector types.  Previously, an
> expression like
> 
> (x < y) && (x > y)
> 
> would fold to `false` if x and y are scalars, but equivalent vector
> comparisons were left untouched.  This patch enables folding of
> patterns of the form
> 
> (cmp x y) bit_and (cmp x y)
> (cmp x y) bit_ior (cmp x y)
> (cmp x y) bit_xor (cmp x y)
> 
> for vector operands as well, ensuring consistent optimization across
> all data types.

This patch looks good to me.

Thanks,
Richard.

> PR tree-optimization/119196
> 
> gcc/ChangeLog:
> 
>   * match.pd: Allow scalar optimizations with bitwise AND/OR/XOR to apply 
> to vectors.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/vector-compare-5.c: Add new test for vector 
> compare simplification.
> 
> Signed-off-by: Icen Zeyada 
> ---
>  gcc/match.pd  | 57 +---
>  .../gcc.target/aarch64/vector-compare-5.c | 67 +++
>  2 files changed, 113 insertions(+), 11 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 36317b9128f..80c02a0ab02 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3674,6 +3674,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if ((TREE_CODE (@1) == INTEGER_CST
>&& TREE_CODE (@2) == INTEGER_CST)
>   || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +   || (VECTOR_TYPE_P (TREE_TYPE (@1))
> +   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
>|| POINTER_TYPE_P (TREE_TYPE (@1)))
>   && bitwise_equal_p (@1, @2)))
>  (with
> @@ -3712,27 +3714,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>(if (code1 == EQ_EXPR && val) @3)
>(if (code1 == EQ_EXPR && !val) { constant_boolean_node (false, type); 
> })
>(if (code1 == NE_EXPR && !val && allbits) @4)
> -  (if (code1 == NE_EXPR
> +  (if ((code1 == NE_EXPR
> && code2 == GE_EXPR
>  && cmp == 0
>  && allbits)
> +  && ((VECTOR_BOOLEAN_TYPE_P (type)
> +  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GT_EXPR))
> +  || !VECTOR_TYPE_P (TREE_TYPE (@1
> (gt @c0 (convert @1)))
> -  (if (code1 == NE_EXPR
> +  (if ((code1 == NE_EXPR
> && code2 == LE_EXPR
>  && cmp == 0
>  && allbits)
> +  && ((VECTOR_BOOLEAN_TYPE_P (type)
> +  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR))
> +  || !VECTOR_TYPE_P (TREE_TYPE (@1
> (lt @c0 (convert @1)))
>/* (a != (b+1)) & (a > b) -> a > (b+1) */
> -  (if (code1 == NE_EXPR
> +  (if ((code1 == NE_EXPR
> && code2 == GT_EXPR
>  && one_after
>  && allbits)
> +  && ((VECTOR_BOOLEAN_TYPE_P (type)
> +  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GT_EXPR))
> +  || !VECTOR_TYPE_P (TREE_TYPE (@1
> (gt @c0 (convert @1)))
>/* (a != (b-1)) & (a < b) -> a < (b-1) */
> -  (if (code1 == NE_EXPR
> +  (if ((code1 == NE_EXPR
> && code2 == LT_EXPR
>  && one_before
>  && allbits)
> +  && ((VECTOR_BOOLEAN_TYPE_P (type)
> +  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR))
> +  || !VECTOR_TYPE_P (TREE_TYPE (@1
> (lt @c0 (convert @1)))
>   )
>  )
> @@ -3751,6 +3765,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>(if ((TREE_CODE (@1) == INTEGER_CST
>   && TREE_CODE (@2) == INTEGER_CST)
> || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +   || (VECTOR_TYPE_P (TREE_TYPE (@1))
> +   && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
>   || POINTER_TYPE_P (TREE_TYPE (@1)))
>  && operand_equal_p (@1, @2)))
> (with
> @@ -3801,6 +3817,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if ((TREE_CODE (@1) == INTEGER_CST
>&& TREE_CODE (@2) == INTEGER_CST)
>   || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +   || (VECTOR_TYPE_P (TREE_TYPE (@1)))
>   || POINTER_TYPE_P (TREE_TYPE (@1)))
>   && bitwise_equal_p (@1, @2)))
>  (with
> @@ -3842,24 +3859,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>(if (code1 == EQ_EXPR
> && code2 == GT_EXPR
>  && cmp == 0
> -&& allbits)
> +&& allbits
> +  && ((VECTOR_BOOLEAN_TYPE_P (type)
> +  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GE_EXPR))
> +  || !VECTOR_TYPE_P (TREE_TYPE (@1
> (ge @c0 @2))
>(if (code1 == EQ_EXPR
> && code2 == LT_EXPR
>  && cmp == 0
> -&& allbits)
> +&& allbits
> +  && ((VECTOR_BOOLEAN_TYPE_P (type)
> +  && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LE_EXPR))
> +  || !VECTOR_TYPE_P (TREE_TYPE (@1
> (le @c0 @2))
>/* (a == (b-1)) | (a >= b) -> a >= (b-1) */
>(

Re: [PATCH 3/7] aarch64: Handle DImode BCAX operations

2025-07-08 Thread Richard Sandiford

Tamar Christina  writes:
>> -Original Message-
>> From: Richard Sandiford 
>> Sent: Monday, July 7, 2025 12:55 PM
>> To: Kyrylo Tkachov 
>> Cc: GCC Patches ; Richard Earnshaw
>> ; Alex Coplan ; Andrew
>> Pinski 
>> Subject: Re: [PATCH 3/7] aarch64: Handle DImode BCAX operations
>> 
>> Richard Sandiford  writes:
>> > Kyrylo Tkachov  writes:
>> >> Hi all,
>> >>
>> >> To handle DImode BCAX operations we want to do them on the SIMD side only
>> if
>> >> the incoming arguments don't require a cross-bank move.
>> >> This means we need to split back the combination to separate GP BIC+EOR
>> >> instructions if the operands are expected to be in GP regs through reload.
>> >> The split happens pre-reload if we already know that the destination will 
>> >> be
>> >> a GP reg. Otherwise if reload descides to use the "=r,r" alternative we 
>> >> ensure
>> >> operand 0 is early-clobber.
>> >> This scheme is similar to how we handle the BSL operations elsewhere in
>> >> aarch64-simd.md.
>> >>
>> >> Thus, for the functions:
>> >> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, 
>> >> b, c); }
>> >> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return 
>> >> BCAX (a,
>> b, c); }
>> >>
>> >> we now generate the desired:
>> >> bcax_d_gp:
>> >> bic x1, x1, x2
>> >> eor x0, x1, x0
>> >> ret
>> >>
>> >> bcax_d:
>> >> bcax v0.16b, v0.16b, v1.16b, v2.16b
>> >> ret
>> >>
>> >> When the inputs are in SIMD regs we use BCAX and when they are in GP regs 
>> >> we
>> >> don't force them to SIMD with extra moves.
>> >>
>> >> Bootstrapped and tested on aarch64-none-linux-gnu.
>> >> Ok for trunk?
>> >> Thanks,
>> >> Kyrill
>> >>
>> >> Signed-off-by: Kyrylo Tkachov 
>> >>
>> >> gcc/
>> >>
>> >>   * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
>> >>   define_insn_and_split.
>> >>
>> >> gcc/testsuite/
>> >>
>> >>   * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments.
>> >>
>> >> From 95268cff1261a7724190dd291f9fcb5a7c817917 Mon Sep 17 00:00:00
>> 2001
>> >> From: Kyrylo Tkachov 
>> >> Date: Thu, 3 Jul 2025 09:45:02 -0700
>> >> Subject: [PATCH 3/7] aarch64: Handle DImode BCAX operations
>> >>
>> >> To handle DImode BCAX operations we want to do them on the SIMD side only
>> if
>> >> the incoming arguments don't require a cross-bank move.
>> >> This means we need to split back the combination to separate GP BIC+EOR
>> >> instructions if the operands are expected to be in GP regs through reload.
>> >> The split happens pre-reload if we already know that the destination will 
>> >> be
>> >> a GP reg.  Otherwise if reload descides to use the "=r,r" alternative we 
>> >> ensure
>> >> operand 0 is early-clobber.
>> >> This scheme is similar to how we handle the BSL operations elsewhere in
>> >> aarch64-simd.md.
>> >>
>> >> Thus, for the functions:
>> >> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX (a, 
>> >> b, c); }
>> >> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return 
>> >> BCAX (a,
>> b, c); }
>> >>
>> >> we now generate the desired:
>> >> bcax_d_gp:
>> >> bic x1, x1, x2
>> >> eor x0, x1, x0
>> >> ret
>> >>
>> >> bcax_d:
>> >> bcaxv0.16b, v0.16b, v1.16b, v2.16b
>> >> ret
>> >>
>> >> When the inputs are in SIMD regs we use BCAX and when they are in GP regs 
>> >> we
>> >> don't force them to SIMD with extra moves.
>> >>
>> >> Bootstrapped and tested on aarch64-none-linux-gnu.
>> >>
>> >> Signed-off-by: Kyrylo Tkachov 
>> >>
>> >> gcc/
>> >>
>> >>   * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
>> >>   define_insn_and_split.
>> >>
>> >> gcc/testsuite/
>> >>
>> >>   * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode arguments.
>> >> ---
>> >>  gcc/config/aarch64/aarch64-simd.md| 29 +++
>> >>  .../gcc.target/aarch64/simd/bcax_d.c  |  6 +++-
>> >>  2 files changed, 34 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/gcc/config/aarch64/aarch64-simd.md
>> b/gcc/config/aarch64/aarch64-simd.md
>> >> index 4493e55603d..be6a16b4be8 100644
>> >> --- a/gcc/config/aarch64/aarch64-simd.md
>> >> +++ b/gcc/config/aarch64/aarch64-simd.md
>> >> @@ -9252,6 +9252,35 @@
>> >>[(set_attr "type" "crypto_sha3")]
>> >>  )
>> >>
>> >> +(define_insn_and_split "*bcaxqdi4"
>> >> +  [(set (match_operand:DI 0 "register_operand" "=w,&r")
>> >> + (xor:DI
>> >> +   (and:DI
>> >> + (not:DI (match_operand:DI 3 "register_operand" "w,r"))
>> >> + (match_operand:DI 2 "register_operand" "w,r"))
>> >> +   (match_operand:DI 1 "register_operand" "w,r")))]
>> >
>> > I think the constraint on operand 1 should be "w,r0", so that we allow
>> > operand 1 to be the same as operand 0.  Without that, and with split1
>> > disabled/sidelined, we would end up with an extra move for:
>> >
>> >   uint64_t f(uint64_t x0, uint64_t x1, uint64_t x2) {
>> > return x0 ^ (x1 & ~x2);
>> >   }
>> >
>> > (The only reason split1 avoids the extra move is that combine combines
>> > th

RE: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

2025-07-08 Thread Robert Dubner

> -Original Message-
> From: Richard Biener 
> Sent: Tuesday, July 8, 2025 14:22
> To: Robert Dubner 
> Cc: Rainer Orth ; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.
>
> On Tue, Jul 8, 2025 at 12:46 AM Robert Dubner  wrote:
> >
> >
> >
> > > -Original Message-
> > > From: Rainer Orth 
> > > Sent: Monday, July 7, 2025 18:08
> > > To: Robert Dubner 
> > > Cc: gcc-patches@gcc.gnu.org
> > > Subject: Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.
> > >
> > > Hi Robert,
> > >
> > > > I have elsewhere described my frustration in trying, during
> > development,
> > > > to set more stringent error-finding and warning-generating
> compilation
> > > > options.  But they seem to require the use of CXXFLAGS, which in at
> > > least
> > > > the case of trying to set -std=c++14, causes libcody's compilation
> to
> > > > fail, since it wants, specifically c++11.
> > > >
> > > > So, I dove in and figured out how to implement CXXFLAGS_FOR_COBOL.
> > This
> > > > new flag applies only to compilations of c++ programs in the
> gcc/cobol
> > > > source code tree.
> > > >
> > > > These changes were tested with a bootstrap build of
> > > > --enable-languages=c,c++,fortran,cobol.
> > > >
> > > > Is this okay for trunk?
> > >
> > > I think there's a far easier way which doesn't require any patch.
> Have
> > > a look at toplevel configure.ac:
> > >
> > > # When bootstrapping with GCC, build stage 1 in C++14 mode to ensure
> > that
> > > a
> > > # C++14 compiler can still start the bootstrap.  Otherwise, if
> building
> > > GCC,
> > > # require C++14 (or higher).
> > > if test "$enable_bootstrap:$GXX" = "yes:yes"; then
> > >   CXX="$CXX -std=c++14"
> > > elif test "$have_compiler" = yes; then
> > >   AX_CXX_COMPILE_STDCXX(14)
> > >
> > >   if test "${build}" != "${host}"; then
> > > AX_CXX_COMPILE_STDCXX(14, [], [], [_FOR_BUILD])
> > >   fi
> > > fi
> > >
> > > You should be able to achieve what you want by building with CXX='g++
> > > -std=c++14', just as one can build a 32-bit gcc with a 64-bit host
> > > compiler using CXX='g++ -m32'.
> >
> > In this case, although I reported my patch works with a bootstrap build,
> I
> > need it to work with an --disable-bootstrap build.  I did the bootstrap
> > just to make sure I hadn't broken something.  I anticipate using
> > CXXFLAGS_FOR_COBOL during development.
> >
> > When I try
> >
> > CXX=banana ../configure ... & make ...
>
> For me:
>
> obj> CXX=banana ~/src/gcc/configure --disable-bootstrap
> configure: loading site script /usr/share/site/x86_64-unknown-linux-gnu
> checking build system type... x86_64-pc-linux-gnu
> checking host system type... x86_64-pc-linux-gnu
> checking target system type... x86_64-pc-linux-gnu
> checking for a BSD-compatible install... /usr/bin/install -c
> checking whether ln works... yes
> ...
> checking whether banana supports C++14 features by default... no
> checking whether banana supports C++14 features with -std=gnu++14... no
> checking whether banana supports C++14 features with -std=gnu++1y... no
> checking whether banana supports C++14 features with -std=c++14... no
> checking whether banana supports C++14 features with +std=c++14... no
> checking whether banana supports C++14 features with -h std=c++14... no
> checking whether banana supports C++14 features with -std=c++1y... no
> checking whether banana supports C++14 features with +std=c++1y... no
> checking whether banana supports C++14 features with -h std=c++1y... no
> configure: error: *** A compiler with support for C++14 language
> features is required.
>
> so setting CXX definitely makes a difference even with --disable-
> bootstrap.
>
> It seems libcody alters CXX to force -std=c++11 but as CXXFLAGS comes
> after it when you add -std=c++14 to it it will break.
>
> So using CXX="g++ -std=c++14" should do the trick.  You do not need to
> re-specify that at make time.  It works for me and builds libcody just
> fine.

I agree that

CXX=banana ../configure

fails quickly regardless of any other configure parameters.  I don't know 
what I did wrong that led me to suggest otherwise.

This apparently does provide a solution for -std=c++14, one that does an end 
run around the problems that show up with

CXXFLAGS="-std=c++14"

although I have to wonder why that's not regarded as a problem.

But I have other requirements.  I apologize if I keep repeating myself, but 
I am being forced to.

I want to be able to, for example,

CXXFLAGS='-ggdb -O0' CXXFLAGS_FOR_COBOL="-Wsomething_or_other" 
../configure 


I want those warnings to apply to gcc/cobol and not to anything else.

And then, later on, I want to be able to

make CXXFLAGS_FOR_COBOL="-Wreplacement_warnings"

 I can't do that with the CXXFLAGS= or CXX= solutions.



>
> Richard.
>
> > and
> > make CXX=banana
> >
> > in both cases, the build succeeds.  The "CXX=banana" seems to do nothing
> > when --disable-bootstrap is active.
> >
>

Re: [PATCH] x86: Keep non all 0s/1s redundant vector loads on AMD znverN

2025-07-08 Thread H.J. Lu

On Tue, Jul 8, 2025 at 7:26 PM Richard Biener
 wrote:
>
> On Tue, Jul 8, 2025 at 12:48 PM H.J. Lu  wrote:
> >
> > aba3b9d3a48a0703fd565f7c5f0caf604f59970b is the first bad commit
> > commit aba3b9d3a48a0703fd565f7c5f0caf604f59970b
> > Author: H.J. Lu 
> > Date:   Fri May 9 07:17:07 2025 +0800
> >
> > x86: Extend the remove_redundant_vector pass
> >
> > which removed non all 0s/1s redundant vector loads, caused SPEC CPU 2017
> > 519.lbm_r and 470.lbm performance regressions on AMD znverN processors.
> > Add a tuning option to keep non all 0s/1s redundant vector loads on AMD
> > znverN processors.
>
> Do we know what actually happens here or is this basically reverting the 
> change
> based on a new tunable and the reported regression?
>
> If I read the pass correctly it might insert broadcasts on paths where
> not originally
> computed (it inserts after the scalar def, which might be far away).
> ix86_broadcast_inner
> suggests it replaces extracts from a broadcast with the original
> broadcast value/register
> which means it might increase lifetime of the broadcast register.
>
> Both shouldn't be causing specifically regressions on Zen2, but can be
> bad.   I think
> we need to understand better what the pass does (it's written without
> much commentary,
> so I tried to quickly reverse engineer it), and improve it, avoiding
> cases where it
> obviously increases register lifetime.

The regression doesn't show up on Intel processors.  This regression is specific
to AMD processors.  If there is a small testcase, I will find a
different way to fix
it.

> > gcc/
> >
> > PR target/120941
> > * config/i386/i386-features.cc (ix86_broadcast_inner): Keep
> > non all 0s/1s redundant vector loads if asked.
> > * config/i386/x86-tune.def (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD):
> > New tuning.
> >
> > gcc/testsuite/
> >
> > PR target/120941
> > * gcc.target/i386/pr120941-1a.c: New test.
> > * gcc.target/i386/pr120941-1b.c: Likewise.
> > * gcc.target/i386/pr120941-1c.c: Likewise.
> > * gcc.target/i386/pr120941-1d.c: Likewise.
> >
> > OK for master?
> >
> > Thanks.
> >
> > --
> > H.J.



-- 
H.J.

[PATCH] libstdc++: Add more template keywords to for Clang

2025-07-08 Thread Jonathan Wakely

This fixes:

include/c++/16.0.0/mdspan:1182:33: error: use 'template' keyword to treat 
'mapping' as a dependent template name
 1182 |   const typename _OLayout::mapping<_OExtents>&>
  |^
include/c++/16.0.0/mdspan:1185:31: error: use 'template' keyword to treat 
'mapping' as a dependent template name
 1185 | const typename _OLayout::mapping<_OExtents>&, mapping_type>
  |  ^

libstdc++-v3/ChangeLog:

* include/std/mdspan (mdspan): Add template keyword for
dependent name.
---

Tested x86_64-linux.

 libstdc++-v3/include/std/mdspan | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 5d16de5d9072..b34116a85e67 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -1179,10 +1179,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
requires is_constructible_v&>
+ const typename _OLayout::template mapping<_OExtents>&>
  && is_constructible_v
constexpr explicit(!is_convertible_v<
-   const typename _OLayout::mapping<_OExtents>&, mapping_type>
+   const typename _OLayout::template mapping<_OExtents>&, mapping_type>
  || !is_convertible_v)
mdspan(const mdspan<_OElementType, _OExtents, _OLayout, _OAccessor>&
 __other)
-- 
2.50.0

[PATCH] libstdc++: Fix __uninitialized_default for constexpr case

2025-07-08 Thread Jonathan Wakely

We should not use the std::fill optimization for trivial types during
constant evaluation, because we need to begin the lifetime of all
objects, even trivially default constructible ones.

This fixes a bug that Clang diagnosed:

include/c++/16.0.0/bits/stl_algobase.h:925:11: note: assignment to object 
outside its lifetime is not allowed in a constant expression
  925 | *__first = __val;
  | ~^~~

I initially just added the #ifdef __cpp_lib_is_constant_evaluated check,
but that gave warnings with GCC because the function isn't constexpr
until C++26. So then I tried checking __glibcxx_raw_memory_algorithms
for the value indicating constexpr uninitialized_value_construct, but
that macro depends on __cpp_constexpr >= 202406 and Clang 19 doesn't
support constexpr placement new, so doesn't define it.

So I decided to just change __uninitialized_default to use
_GLIBCXX20_CONSTEXPR which is consistent with __uninitialized_default_n
(which needs to be constexpr because it's used by std::vector). We don't
currently need to use __uninitialized_default in constexpr contexts for
C++20 code, but we might find uses for it, so now it would be possible.

libstdc++-v3/ChangeLog:

* include/bits/stl_uninitialized.h (__uninitialized_default):
Do not use optimized implementation for constexpr case. Use
_GLIBCXX20_CONSTEXPR instead of _GLIBCXX26_CONSTEXPR.
---

Tested x86_64-linux.

 libstdc++-v3/include/bits/stl_uninitialized.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h 
b/libstdc++-v3/include/bits/stl_uninitialized.h
index 3a37ddc71ba1..351c3a17457f 100644
--- a/libstdc++-v3/include/bits/stl_uninitialized.h
+++ b/libstdc++-v3/include/bits/stl_uninitialized.h
@@ -922,11 +922,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // __uninitialized_default
   // Fills [first, last) with value-initialized value_types.
   template
-_GLIBCXX26_CONSTEXPR
+_GLIBCXX20_CONSTEXPR
 inline void
 __uninitialized_default(_ForwardIterator __first,
_ForwardIterator __last)
 {
+#ifdef __cpp_lib_is_constant_evaluated
+  if (std::is_constant_evaluated())
+   return __uninitialized_default_1::
+__uninit_default(__first, __last);
+#endif
+
   typedef typename iterator_traits<_ForwardIterator>::value_type
_ValueType;
   // trivial types can have deleted assignment
-- 
2.50.0

[PATCH] libstdc++: Do not use list-initialization in std::span members [PR120997]

2025-07-08 Thread Jonathan Wakely

As the bug report shows, for span the return statements of
the form `return {data(), count};` will use the new C++26 constructor,
span(initializer_list).

Although the conversions from data() to bool and count to bool are
narrowing and should be ill-formed, in system headers the narrowing
diagnostics are suppressed. In any case, even if the compiler diagnosed
them as ill-formed, we still don't want the initializer_list constructor
to be used. We want to use the span(element_type*, size_t) constructor
instead.

Replace the braced-init-list uses with S(data(), count) where S is the
correct return type. We need to make similar changes in the C++26
working draft, which will be taken care of via an LWG issue.

libstdc++-v3/ChangeLog:

PR libstdc++/120997
* include/std/span (span::first, span::last, span::subspan): Do
not use braced-init-list for return statements.
* testsuite/23_containers/span/120997.cc: New test.
---

Tested x86_64-linux.

 libstdc++-v3/include/std/span | 15 +++---
 .../testsuite/23_containers/span/120997.cc| 46 +++
 2 files changed, 54 insertions(+), 7 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/23_containers/span/120997.cc

diff --git a/libstdc++-v3/include/std/span b/libstdc++-v3/include/std/span
index 5629a71b9bd2..44f9b36a7efe 100644
--- a/libstdc++-v3/include/std/span
+++ b/libstdc++-v3/include/std/span
@@ -376,7 +376,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  else
static_assert(_Count <= extent);
  using _Sp = span;
- return _Sp{ _SizedPtr{this->data()} };
+ return _Sp(_SizedPtr{this->data()});
}
 
   [[nodiscard]]
@@ -384,7 +384,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   first(size_type __count) const noexcept
   {
__glibcxx_assert(__count <= size());
-   return { this->data(), __count };
+   return span(this->data(), __count);
   }
 
   template
@@ -397,7 +397,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  else
static_assert(_Count <= extent);
  using _Sp = span;
- return _Sp{ _SizedPtr{this->data() + (this->size() - _Count)} };
+ return _Sp(_SizedPtr{this->data() + (this->size() - _Count)});
}
 
   [[nodiscard]]
@@ -405,7 +405,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   last(size_type __count) const noexcept
   {
__glibcxx_assert(__count <= size());
-   return { this->data() + (this->size() - __count), __count };
+   return span(this->data() + (this->size() - __count),
+ __count);
   }
 
   template
@@ -424,7 +425,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  using _Sp = span()>;
 
  if constexpr (_Count == dynamic_extent)
-   return _Sp{ this->data() + _Offset, this->size() - _Offset };
+   return _Sp(this->data() + _Offset, this->size() - _Offset);
  else
{
  if constexpr (_Extent == dynamic_extent)
@@ -437,7 +438,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  static_assert(_Count <= extent);
  static_assert(_Count <= (extent - _Offset));
}
- return _Sp{ _SizedPtr{this->data() + _Offset} };
+ return _Sp(_SizedPtr{this->data() + _Offset});
}
}
 
@@ -454,7 +455,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__glibcxx_assert(__count <= size());
__glibcxx_assert(__offset + __count <= size());
  }
-   return {this->data() + __offset, __count};
+   return span(this->data() + __offset, __count);
   }
 
 private:
diff --git a/libstdc++-v3/testsuite/23_containers/span/120997.cc 
b/libstdc++-v3/testsuite/23_containers/span/120997.cc
new file mode 100644
index ..fbf194c87388
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/span/120997.cc
@@ -0,0 +1,46 @@
+// { dg-do run { target c++26 } }
+
+#include 
+#include 
+
+void
+test_first()
+{
+  bool arr[5];
+  std::span s(arr);
+  std::span s2 = s.first(5);
+  VERIFY( s2.data() == s.data() );
+  std::span s3 = s.first<5>();
+  VERIFY( s3.data() == s.data() );
+}
+
+void
+test_last()
+{
+  bool arr[5];
+  std::span s(arr);
+  std::span s2 = s.last(5);
+  VERIFY( s2.data() == s.data() );
+  std::span s3 = s.last<5>();
+  VERIFY( s3.data() == s.data() );
+}
+
+void
+test_subspan()
+{
+  bool arr[5];
+  std::span s(arr);
+  std::span s2 = s.subspan(0, 5);
+  VERIFY( s2.data() == s.data() );
+  std::span s3 = s.subspan<0>();
+  VERIFY( s3.data() == s.data() );
+  std::span s4 = s.subspan<0, 5>();
+  VERIFY( s4.data() == s.data() );
+}
+
+int main()
+{
+  test_first();
+  test_last();
+  test_subspan();
+}
-- 
2.50.0

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-08 Thread Siddhesh Poyarekar


On 2025-07-08 17:17, Qing Zhao wrote:

Are the above the correct and efficient updates to the .ACCESS_WITH_SIZE to 
resolve both PR121000 and the issue
we have with counted_by for pointers?


I don't know about PR121000, but for counted_by with pointers, I think 
the REF_TO_OBJ (and the result_type) would also have to be a->fam and 
not &a->fam, i.e. don't generate an INDIRECT_REF to the .ACCESS_WITH_SIZE.


Thanks,
Sid

[PATCH] libstdc++: Implement std::chrono::current_zone() for Windows

2025-07-08 Thread Björn Schäpers

From: Björn Schäpers 

I have based this on my previous (not yet landed) patch, but it only
reuses the #ifdef to include . Since std::array isn't used
anywhere else I thought that was the right place to put it.

I hope the formatting is okay.

I've used wide strings for the Windows zone name and territory, since
the Windows API returns wide strings and thus they can be compared
directly. For the territory there exists a narrow string API, but
internally it calls the wide string version and narrows it down. If
desired I can switch to narrow strings, the conversion can be done by
static_cast per character since only ASCII chars are used.

-- >8 --
On Windows there is no API to get the current time zone as IANA name,
instead Windows has its own zones. But there exists a mapping provided
by the Unicode Consortium. This patch adds a script to convert the XML
file with the mapping to a lookup table and adds a Windows code path to
use that mapping.

libstdc++-v3/Changelog:

Implement std::chrono::current_zone() for Windows

* include/bits/windows_zones-map.h: New file, contains the look
up table.
* scripts/gen_windows_zones_map.py: New file, generates
windows_zones-map.h.
* src/c++20/tzdb.cc (tzdb::current_zone): Add Windows code path.

Signed-off-by: Björn Schäpers 
---
 libstdc++-v3/include/bits/windows_zones-map.h | 407 ++
 libstdc++-v3/scripts/gen_windows_zones_map.py | 128 ++
 libstdc++-v3/src/c++20/tzdb.cc| 102 -
 3 files changed, 635 insertions(+), 2 deletions(-)
 create mode 100644 libstdc++-v3/include/bits/windows_zones-map.h
 create mode 100644 libstdc++-v3/scripts/gen_windows_zones_map.py

diff --git a/libstdc++-v3/include/bits/windows_zones-map.h 
b/libstdc++-v3/include/bits/windows_zones-map.h
new file mode 100644
index 000..7be736b063d
--- /dev/null
+++ b/libstdc++-v3/include/bits/windows_zones-map.h
@@ -0,0 +1,407 @@
+// Generated by scripts/gen_windows_zones_map.py, do not edit.
+
+// Copyright The GNU Toolchain Authors.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// .
+
+/** @file bits/windows_zones-map.h
+ *  This is an internal header file, included by other library headers.
+ *  Do not attempt to use it directly. @headername{chrono}
+ */
+
+#ifndef _GLIBCXX_GET_WINDOWS_ZONES_MAP
+# error "This is not a public header, do not include it directly"
+#endif
+
+struct windows_zone_map_entry
+{
+  wstring_view windows_name;
+  wstring_view territory;
+  string_view iana_name;
+};
+
+static constexpr array windows_zone_map{
+  {
+{L"AUS Central Standard Time", L"001", "Australia/Darwin"},
+{L"AUS Eastern Standard Time", L"001", "Australia/Sydney"},
+{L"Afghanistan Standard Time", L"001", "Asia/Kabul"},
+{L"Alaskan Standard Time", L"001", "America/Anchorage"},
+{L"Aleutian Standard Time", L"001", "America/Adak"},
+{L"Altai Standard Time", L"001", "Asia/Barnaul"},
+{L"Arab Standard Time", L"001", "Asia/Riyadh"},
+{L"Arab Standard Time", L"BH", "Asia/Bahrain"},
+{L"Arab Standard Time", L"KW", "Asia/Kuwait"},
+{L"Arab Standard Time", L"QA", "Asia/Qatar"},
+{L"Arab Standard Time", L"YE", "Asia/Aden"},
+{L"Arabian Standard Time", L"001", "Asia/Dubai"},
+{L"Arabian Standard Time", L"OM", "Asia/Muscat"},
+{L"Arabian Standard Time", L"ZZ", "Etc/GMT-4"},
+{L"Arabic Standard Time", L"001", "Asia/Baghdad"},
+{L"Argentina Standard Time", L"001", "America/Buenos_Aires"},
+{L"Astrakhan Standard Time", L"001", "Europe/Astrakhan"},
+{L"Atlantic Standard Time", L"001", "America/Halifax"},
+{L"Atlantic Standard Time", L"BM", "Atlantic/Bermuda"},
+{L"Atlantic Standard Time", L"GL", "America/Thule"},
+{L"Aus Central W. Standard Time", L"001", "Australia/Eucla"},
+{L"Azerbaijan Standard Time", L"001", "Asia/Baku"},
+{L"Azores Standard Time", L"001", "Atlantic/Azores"},
+{L"Azores Standard Time", L"GL", "America/Scoresbysund"},
+{L"Bahia Standard Time", L"001", "America/Bahia"},
+{L"Bangladesh

RE: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

2025-07-08 Thread Robert Dubner

> -Original Message-
> From: Andreas Schwab 
> Sent: Tuesday, July 8, 2025 10:01
> To: Robert Dubner 
> Cc: Rainer Orth ; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.
> 
> There is already $(CFLAGS-$@) to add extra flags for a particular
> target.  This could easily be extended by adding $(CFLAGS-$(@D)) to add
> flags for all targets in a directory.

I am unfamiliar with this, and I haven't been able to figure it out from
the configure and make files.

Are you suggesting that I can somehow apply a specific set of flags when
compiling, for example,

gcc/cobol/genapi.cc

If so, how could I do that?



> 
> --
> Andreas Schwab, SUSE Labs, sch...@suse.de
> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
> "And now for something completely different."

Re: [PATCH] tree-optimization/120929: Limit MEM_REF handling to .ACCESS_WITH_SIZE

2025-07-08 Thread Qing Zhao

Hi, Jakub,

Thanks a lot for your comments and suggestions. Please see my questions below:

> On Jul 7, 2025, at 17:47, Jakub Jelinek  wrote:
> 
> On Mon, Jul 07, 2025 at 09:18:53PM +, Qing Zhao wrote:
>> From OLD:
>> 
>> _2 = &a->c;
>> _3 = &a->count;
>> _1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B);
>> _4 = *_1;
>> D.2964 = __builtin_dynamic_object_size (_4, 1);
>> 
>> To NEW:
>> 
>> _2 = a->c;
>> _3 = &a->count;
>> _1 = .ACCESS_WITH_SIZE (_2, _3, 1, 0, -1, 0B, 0);
>> D.2964 = __builtin_dynamic_object_size (_, 1);
>> 
>> 
>> NOTE, in the above, in addition to pass “a->c” instead of “&a->c” as the 
>> first parameter,  I also
>> added one more argument for .ACCESS_WITH_SIZE:
>> 
>> +   the 7th argument of the call is 1 when for FAM, 0 for pointers.
>> 
>> To distinguish whether this .ACCESS_WITH_SIZE is for FAM or for pointers. 
>> And this argument will be used in tree-object-size.cc 
>>  to get the element_type of the associated FAM 
>> or pointer array.
> 
> Even 6 arguments is IMHO too much.
> /* Expand the IFN_ACCESS_WITH_SIZE function:
>   ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE,
> TYPE_OF_SIZE, ACCESS_MODE)
>   which returns the REF_TO_OBJ same as the 1st argument;
> 
>   1st argument REF_TO_OBJ: The reference to the object;
>   2nd argument REF_TO_SIZE: The reference to the size of the object,
>   3rd argument CLASS_OF_SIZE: The size referenced by the REF_TO_SIZE 
> represents
> 0: the number of bytes.
> 1: the number of the elements of the object type;
>   4th argument TYPE_OF_SIZE: A constant 0 with its TYPE being the same as the 
> TYPE
>of the object referenced by REF_TO_SIZE
>   5th argument ACCESS_MODE:
>-1: Unknown access semantics
> 0: none
> 1: read_only
> 2: write_only
> 3: read_write
>   6th argument: A constant 0 with the pointer TYPE to the original flexible
> array type.
> 
> I agree with argument 1 and 2 and agree we need 2 INTEGER_CST arguments with
> the 2 pointer types.  Nobody says those 2 arguments have to be 0 though,
> they can be some other INTEGER_CST, similarly how MEM_REF's second argument
> is INTEGER_CST with type meaning something and value something different.
> Perhaps one can be that -1/0/1/2/3 and another one a bitmask for the
> remaining flags, or one can be say 0/1/2/3/4 ored with 0/8 ored with 0/16.
> 
> Though, it is unclear to me how the "the number of the elements of the
> object type" actually works.  If the FAM has constant sized elements
> or pointer points to constant sized element, I agree you can just grab the
> size from TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gimple_call_arg (call, 5
> But what if the FAM has a variable length type or it is pointer to VLA?
> Trying to use TYPE_SIZE_UNIT will not really work well in that case, while
> perhaps during gimplification it will be gimplified and exist, later
> optimizations will not see it being used and can optimize it away.
> If all you care is to get the size from that, why don't you just pass
> the size as the argument?  So instead of that 0: the number of bytes
> 1: the number of the elements of the object type + the former 6th
> argument just pass one argument, 1 if it is the "the number of bytes"
> case and some other number, the size of the element.  So in all cases
> the size in bytes is effectively *(type_of_size *)ref_to_size * eltsz
> This argument would be INTEGER_CST whenever it is not VLA or the VLA size.


From my understanding of the above comments, there are mainly two problems
in the current  design of .ACCESS_WITH_SIZE:

1. The correctness issue: As shown in PR121000: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121000:

The size of the element of the FAM actually _cannot_ be reliably gotten 
from 
  TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gimple_call_arg (call, 5
When the element of the FAM has a variable length type. 

In order to resolve this correctness issue, in addition to the current 
information that we pass to the call to
.ACCESS_WITH_SIZE, we SHOULD pass an additional information, i.e. the 
original TYPE_SIZE_UNIT of 
the element TYPE of the FAM to the call to  .ACCESS_WITH_SIZE.   

And with this additional information, we don’t need to distinguish whether 
the .ACCESS_WITH_SIZE is for FAMs
or for pointers anymore.

As a result, the new ACCESS_WITH_SIZE is:  (change the 6th argument to the 
TYPE_SIZE_UNIT 
   of the element TYPE of the FAM or the pointer points to)

"ACCESS_WITH_SIZE (REF_TO_OBJ, REF_TO_SIZE, CLASS_OF_SIZE,
TYPE_OF_SIZE, ACCESS_MODE)
  which returns the REF_TO_OBJ same as the 1st argument;

  1st argument REF_TO_OBJ: The reference to the object;
  2nd argument REF_TO_SIZE: The reference to the size of the object,
  3rd argument CLASS_OF_SIZE: The size referenced by the REF_TO_SIZE represents
0: the number of bytes.
1: the number of the elements of the object type;
  4th argument TYPE_OF_SI

[committed] libstdc++: Fix double free in new pool resource test [PR118681]

2025-07-08 Thread Jonathan Wakely

This was supposed to free p1 and p2, not free p2 twice.

libstdc++-v3/ChangeLog:

PR libstdc++/118681
* testsuite/20_util/unsynchronized_pool_resource/118681.cc: Fix
deallocate argument.
---

Tested x86_64-linux. Pushed to trunk.

Thanks to Daniel Boles for noticing this.

 .../testsuite/20_util/unsynchronized_pool_resource/118681.cc| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc 
b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
index 87e1b1d94043..9935f793cf91 100644
--- a/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
+++ b/libstdc++-v3/testsuite/20_util/unsynchronized_pool_resource/118681.cc
@@ -39,7 +39,7 @@ test_alignment(std::pmr::memory_resource& res, bool dealloc)
 
   if (dealloc)
   {
-   res.deallocate(p2, size, alignment);
+   res.deallocate(p1, size, alignment);
res.deallocate(p2, size, alignment);
   }
 }
-- 
2.50.0

Re: [SNAPv4] libstdc++: Add NTTP bind_front, -back, not_fn (P2714) [PR119744]

2025-07-08 Thread Tomasz Kaminski

On Tue, Jul 8, 2025 at 5:41 AM Nathan Myers  wrote:

> This is a snapshot of work in progress, for reference.
> bind_front(...) is uglified directly from the sample
> implementation in P2714, at include/std/functional:1284 .
>
> Test failures:
>
> bind_front/1.cc:53: error: static assertion failed
> bind_front/1.cc:57: error: static assertion failed
> bind_front/1.cc:214: error: static assertion failed
> bind_front/1.cc:215: error: static assertion failed
> bind_front/1.cc:216: required from here
> functional:1301: error: invalid conversion from
> 'std::invoke_result_t&,
> void*&>' {aka 'void*'} to 'int' [-fpermissive]
> [... etc. ]
> Also complains about 218, 220, 231, 233-6, 264, 267
>
The issue is raised on the line:
 int& i6 = g6(vp);
 VERIFY( &i6 == &i );
Where G6 is defined as follows:
 auto g6 = bind_front(std::ref(i)); // bound arg of type int&
  using G6 = decltype(g6);
And f:
  struct F
  {
int& operator()(int& i, void*) { return i; }
void* operator()(int, void* p) const { return p; }
  };
 constexpr static F f{};

As the template parameter object, i.e. what id-expression f refers to  in
bind_front is always constant,
g6(vp) i.e. bind_front(ref(i), vp) calls f(ref(i), vp), and because f is
const qualified, the only viable candiate is:
void* operator()(int, void* p) const { return p; }
So you get void* returned, that int& obviously cannot bind to.


>
> libstdc++-v3/ChangeLog:
> PR libstdc++/119744
> * include/bits/version.def: Redefine __cpp_lib_bind_front etc.
> * include/bits/version.h: Ditto.
> * include/std/functional: Add new bind_front etc. overloads
> * testsuite/20_util/function_objects/bind_front/1.cc
> ---
>  libstdc++-v3/include/bits/version.def |  12 ++
>  libstdc++-v3/include/bits/version.h   |  21 ++-
>  libstdc++-v3/include/std/functional   | 124 +-
>  .../20_util/function_objects/bind_front/1.cc  | 103 ++-
>  4 files changed, 278 insertions(+), 5 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/version.def
> b/libstdc++-v3/include/bits/version.def
> index 5d5758bf203..8ab9a7207e7 100644
> --- a/libstdc++-v3/include/bits/version.def
> +++ b/libstdc++-v3/include/bits/version.def
> @@ -463,6 +463,10 @@ ftms = {
>
>  ftms = {
>name = not_fn;
> +  values = {
> +v = 202306;
> +cxxmin = 26;
> +  };
>values = {
>  v = 201603;
>  cxxmin = 17;
> @@ -776,6 +780,10 @@ ftms = {
>
>  ftms = {
>name = bind_front;
> +  values = {
> +v = 202306;
> +cxxmin = 26;
> +  };
>values = {
>  v = 201907;
>  cxxmin = 20;
> @@ -784,6 +792,10 @@ ftms = {
>
>  ftms = {
>name = bind_back;
> +  values = {
> +v = 202306;
> +cxxmin = 26;
> +  };
>values = {
>  v = 202202;
>  cxxmin = 23;
> diff --git a/libstdc++-v3/include/bits/version.h
> b/libstdc++-v3/include/bits/version.h
> index 2b00e8419b3..c204ae3c48c 100644
> --- a/libstdc++-v3/include/bits/version.h
> +++ b/libstdc++-v3/include/bits/version.h
> @@ -511,7 +511,12 @@
>  #undef __glibcxx_want_make_from_tuple
>
>  #if !defined(__cpp_lib_not_fn)
> -# if (__cplusplus >= 201703L)
> +# if (__cplusplus >  202302L)
> +#  define __glibcxx_not_fn 202306L
> +#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_not_fn)
> +#   define __cpp_lib_not_fn 202306L
> +#  endif
> +# elif (__cplusplus >= 201703L)
>  #  define __glibcxx_not_fn 201603L
>  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_not_fn)
>  #   define __cpp_lib_not_fn 201603L
> @@ -866,7 +871,12 @@
>  #undef __glibcxx_want_atomic_value_initialization
>
>  #if !defined(__cpp_lib_bind_front)
> -# if (__cplusplus >= 202002L)
> +# if (__cplusplus >  202302L)
> +#  define __glibcxx_bind_front 202306L
> +#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_bind_front)
> +#   define __cpp_lib_bind_front 202306L
> +#  endif
> +# elif (__cplusplus >= 202002L)
>  #  define __glibcxx_bind_front 201907L
>  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_bind_front)
>  #   define __cpp_lib_bind_front 201907L
> @@ -876,7 +886,12 @@
>  #undef __glibcxx_want_bind_front
>
>  #if !defined(__cpp_lib_bind_back)
> -# if (__cplusplus >= 202100L) && (__cpp_explicit_this_parameter)
> +# if (__cplusplus >  202302L)
> +#  define __glibcxx_bind_back 202306L
> +#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_bind_back)
> +#   define __cpp_lib_bind_back 202306L
> +#  endif
> +# elif (__cplusplus >= 202100L) && (__cpp_explicit_this_parameter)
>  #  define __glibcxx_bind_back 202202L
>  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_bind_back)
>  #   define __cpp_lib_bind_back 202202L
> diff --git a/libstdc++-v3/include/std/functional
> b/libstdc++-v3/include/std/functional
> index 307bcb95bcc..21f0b1cb2d5 100644
> --- a/libstdc++-v3/include/std/functional
> +++ b/libstdc++-v3/include/std/functional
> @@ -940,7 +940,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   _M_bound_args(std::forward<_Arg

Re: [Ping, Fortran, Patch, PR120637, v1] Ensure expression in finalizer creation is freed only when unused.

2025-07-08 Thread Andre Vehreschild

Hi Jerry,

thanks for the review. Pushed as gcc-16-2086-gd1f05661fa6.

Thanks again, 
Andre

On Mon, 7 Jul 2025 10:49:50 -0700
Jerry D  wrote:

> On 7/7/25 8:39 AM, Andre Vehreschild wrote:
> > Ping!  
> 
> 
> OK for mainline.
> 
> Thanks,
> 
> Jerry
> > 
> > On Thu, 26 Jun 2025 15:32:47 +0200
> > Andre Vehreschild  wrote:
> >   
> >> Hi,
> >>
> >> I found a bug in the module cleanup expression at the end of the test. In
> >> the attached patch it is corrected.
> >>
> >> Regtests ok on x86_64-pc-linux-gnu / F41. Ok for mainline?
> >>
> >> Regards,
> >>Andre
> >>
> >> On Wed, 25 Jun 2025 15:48:11 +0200
> >> Andre Vehreschild  wrote:
> >>  
> >>> Hi,
> >>>
> >>> Antony Lewis reported this issue and also proposed a patch, that removes
> >>> the was_finalized tracking. While this may lead to the desired effect for
> >>> the issue at hand, I don't believe that the was_finalized tracking code
> >>> has been there for no reason.
> >>>
> >>> This patch fixes the issue that also Antony found, but by ensuring the
> >>> expression stays allocated when used instead of being freeed.
> >>>
> >>> The test has been put into the asan directory of gfortran.dg and reliably
> >>> reports the issue without the fix. (With the fix, the asan is quite).
> >>>
> >>> Regtests ok on x86_64-pc-linxu-gnu / F41. Ok for mainline?
> >>>
> >>> Regards,
> >>>   Andre  
> >>
> >>  
> > 
> >   
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de

Re: [PATCH v4 6/6] libstdc++: Set FMT for complete C++23 mdspan [PR107761].

2025-07-08 Thread Luc Grosheintz




On 7/8/25 11:32, Jonathan Wakely wrote:

On Tue, 8 Jul 2025 at 09:27, Luc Grosheintz  wrote:


 PR libstdc++/107761

libstdc++-v3/ChangeLog:

 * include/bits/version.def (mdspan): Set to 202207 and remove
 no_stdname.
 * include/bits/version.h: Regenerate.
 * testsuite/23_containers/mdspan/ftm.cc: Test presence
 of FTM.


Please spell this out in full as "feature test macro", there's no need
to use an initialism that some people won't recognise. (And the commit
summary line says "FMT" not "FTM" anyway ;-)

I would also prefer the test to be called version.cc not ftm.cc as
that's what we use elsewhere.

I think we can do that when pushing the commit though, we don't need
another patch for it.

Thanks for getting  done! Great work.


I never thought of it that way: spell it out to prevent
typos.

Thank you for fixing up the mistake when committing the
patches. It's been a nice experience contributing to
libstdc++! Thank you for the patience and thorough &
friendly reviews.

I'll continue with the C++26 parts: padded layouts, sub-
mdspan, etc.





Signed-off-by: Luc Grosheintz 
---
  libstdc++-v3/include/bits/version.def  | 3 +--
  libstdc++-v3/include/bits/version.h| 3 ++-
  libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc | 9 +
  3 files changed, 12 insertions(+), 3 deletions(-)
  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc

diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index 64f8190d240..f1015abdbfa 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1007,9 +1007,8 @@ ftms = {

  ftms = {
name = mdspan;
-  no_stdname = true; // FIXME: remove
values = {
-v = 1; // FIXME: 202207
+v = 202207;
  cxxmin = 23;
};
  };
diff --git a/libstdc++-v3/include/bits/version.h 
b/libstdc++-v3/include/bits/version.h
index 744246a9938..80f6586372d 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1126,8 +1126,9 @@

  #if !defined(__cpp_lib_mdspan)
  # if (__cplusplus >= 202100L)
-#  define __glibcxx_mdspan 1L
+#  define __glibcxx_mdspan 202207L
  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
+#   define __cpp_lib_mdspan 202207L
  #  endif
  # endif
  #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
new file mode 100644
index 000..106ee4010ee
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
@@ -0,0 +1,9 @@
+// { dg-do compile { target c++23 } }
+#include 
+
+#ifndef __cpp_lib_mdspan
+#error "Feature test macro __cpp_lib_mdspan is missing for "
+#if __cpp_lib_mdspan < 202207
+#error "Feature test macro __cpp_lib_mdspan has the wrong value"
+#endif
+#endif
--
2.49.0

[PATCH] testsuite: i386: Fix gcc.target/i386/memcpy-pr120683-1.c etc. on Solaris/x86

2025-07-08 Thread Rainer Orth

The new tests from

commit 401199377c50045ede560daf3f6e8b51749c2a87
Author: H.J. Lu 
Date:   Tue Jun 17 10:17:17 2025 +0800

x86: Improve vector_loop/unrolled_loop for memset/memcpy

FAIL on 64-bit Solaris/x86:

FAIL: gcc.target/i386/memcpy-pr120683-1.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-2.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-3.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-4.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-5.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-6.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-pr120683-7.c check-function-bodies foo
FAIL: gcc.target/i386/memcpy-strategy-12.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-1.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-10.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-11.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-12.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-13.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-14.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-15.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-16.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-17.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-18.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-19.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-2.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-20.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-21.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-22.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-23.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-3.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-4.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-5.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-6.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-7.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-8.c check-function-bodies foo
FAIL: gcc.target/i386/memset-pr120683-9.c check-function-bodies foo

Like several times before, they need to be compiled with
-fasynchronous-unwind-tables -fdwarf2-cfi-asm.

Tested on i386-pc-solaris2.11 and x86_64-pc-linux-gnu.

Ok for trunk?

I suspect such patches can be considered obvious by now?

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2025-07-08  Rainer Orth  

gcc/testsuite:
* gcc.target/i386/memcpy-pr120683-1.c (dg-options): Add
-fasynchronous-unwind-tables -fdwarf2-cfi-asm.
* gcc.target/i386/memcpy-pr120683-2.c: Likewise.
* gcc.target/i386/memcpy-pr120683-3.c: Likewise.
* gcc.target/i386/memcpy-pr120683-4.c: Likewise.
* gcc.target/i386/memcpy-pr120683-5.c: Likewise.
* gcc.target/i386/memcpy-pr120683-6.c: Likewise.
* gcc.target/i386/memcpy-pr120683-7.c: Likewise.
* gcc.target/i386/memcpy-strategy-12.c: Likewise.
* gcc.target/i386/memset-pr120683-1.c: Likewise.
* gcc.target/i386/memset-pr120683-10.c: Likewise.
* gcc.target/i386/memset-pr120683-11.c: Likewise.
* gcc.target/i386/memset-pr120683-12.c: Likewise.
* gcc.target/i386/memset-pr120683-13.c: Likewise.
* gcc.target/i386/memset-pr120683-14.c: Likewise.
* gcc.target/i386/memset-pr120683-15.c: Likewise.
* gcc.target/i386/memset-pr120683-16.c: Likewise.
* gcc.target/i386/memset-pr120683-17.c: Likewise.
* gcc.target/i386/memset-pr120683-18.c: Likewise.
* gcc.target/i386/memset-pr120683-19.c: Likewise.
* gcc.target/i386/memset-pr120683-2.c: Likewise.
* gcc.target/i386/memset-pr120683-20.c: Likewise.
* gcc.target/i386/memset-pr120683-21.c: Likewise.
* gcc.target/i386/memset-pr120683-22.c: Likewise.
* gcc.target/i386/memset-pr120683-23.c: Likewise.
* gcc.target/i386/memset-pr120683-3.c: Likewise.
* gcc.target/i386/memset-pr120683-4.c: Likewise.
* gcc.target/i386/memset-pr120683-5.c: Likewise.
* gcc.target/i386/memset-pr120683-6.c: Likewise.
* gcc.target/i386/memset-pr120683-7.c: Likewise.
* gcc.target/i386/memset-pr120683-8.c: Likewise.
* gcc.target/i386/memset-pr120683-9.c: Likewise.

diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr120683-1.c b/gcc/testsuite/gcc.target/i386/memcpy-pr120683-1.c
--- a/gcc/testsuite/gcc.target/i386/memcpy-pr120683-1.c
+++ b/gcc/testsuite/gcc.target/i386/memcpy-pr120683-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-sse -mmemcpy-strategy=unrolled_loop:256:noalign,libcall:-1:noalign"

Re: [PATCH v2] libstdc++: Search for tzdata on Windows (msys)

2025-07-08 Thread Björn Schäpers


Am 08.07.2025 um 12:01 schrieb Jonathan Wakely:

On Mon, 7 Jul 2025 at 23:53, Björn Schäpers  wrote:


From: Björn Schäpers 

Windows does not provide a tzdata.zi, but msys does. Use this, if
available, instead of the embedded (and possibly outdated) database.

libstdc++-v3/Changelog:

 Use msys provided time zone information.

 * src/c++20/tzdb.cc (zoneinfo_file): On Windows look relative
 from the DLL path for the time zone information.

Signed-off-by: Björn Schäpers 
---
  libstdc++-v3/src/c++20/tzdb.cc | 34 ++
  1 file changed, 34 insertions(+)

diff --git a/libstdc++-v3/src/c++20/tzdb.cc b/libstdc++-v3/src/c++20/tzdb.cc
index 6e244dc656d..9923d14b7a7 100644
--- a/libstdc++-v3/src/c++20/tzdb.cc
+++ b/libstdc++-v3/src/c++20/tzdb.cc
@@ -44,6 +44,12 @@
  # include// getenv
  #endif

+#if _GLIBCXX_HAVE_WINDOWS_H
+# define WIN32_LEAN_AND_MEAN
+# include 
+# include 
+#endif
+
  #if defined __GTHREADS && ATOMIC_POINTER_LOCK_FREE == 2
  # define USE_ATOMIC_LIST_HEAD 1
  // TODO benchmark atomic> vs mutex.
@@ -1144,6 +1150,34 @@ namespace std::chrono
  #ifdef _GLIBCXX_ZONEINFO_DIR
else
 path = _GLIBCXX_ZONEINFO_DIR;
+#endif
+#ifdef _GLIBCXX_HAVE_WINDOWS_H
+  if (path.empty())
+   {
+ HMODULE dll_module;
+ if (GetModuleHandleExA(
+ GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
+ | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ reinterpret_cast(&zoneinfo_file), &dll_module))


Does this assume that libstdc++.dll is installed as part of the msys
installation?

What if users build GCC themselves and install it elsewhere?

Is the correct solution for msys-based systems to build GCC with
--with-libstdcxx-zoneinfo=/path/to/msys/tzdata.zi ?



Yes the assumption is, that libstdc++-6.dll is installed in 
$msysPath/$msysSubsystem/bin and the time zones are in ../share/zoneinfo.


If supplied with --with-libstdcxx-zoneinfo=/path/to/msys/tzdata.zi path 
shouldn't be empty, but containing "/path/to/msys/tzdata.zi" and nothing changes 
with my patch.


The problem for msys packages is, that $msysPath is unknown, since the user can 
install msys anywhere. I tried using a long string constant and changing that in 
the DLL after installation, so it would remain a constant. But the string 
constant size is used to build the std::string and "/tzdata.zi" is appended to 
that. So I switched to use a runtime approach.





+   {
+ char dll_path[MAX_PATH];
+ if (GetModuleFileNameA(dll_module, dll_path, MAX_PATH) != 0)
+   {
+ string_view dll_path_view = dll_path;
+ auto pos = dll_path_view.find_last_of('\\');
+ dll_path_view = dll_path_view.substr(0, pos);
+ if (dll_path_view.ends_with("\\bin"))
+   {
+ constexpr string_view remaining_path = "share\\zoneinfo";
+ dll_path_view.remove_suffix(3); // Remove bin
+ path.reserve(dll_path_view.size()
+  + remaining_path.size());
+ path = dll_path_view;
+ path += remaining_path;
+   }
+   }
+   }
+   }
  #endif
if (!path.empty())
 path.append(filename);
--
2.50.0

RE: [PATCH 3/7] aarch64: Handle DImode BCAX operations

2025-07-08 Thread Tamar Christina

> -Original Message-
> From: Richard Sandiford 
> Sent: Tuesday, July 8, 2025 10:07 AM
> To: Tamar Christina 
> Cc: Kyrylo Tkachov ; GCC Patches  patc...@gcc.gnu.org>; Richard Earnshaw ; Alex
> Coplan ; Andrew Pinski 
> Subject: Re: [PATCH 3/7] aarch64: Handle DImode BCAX operations
> 
> Tamar Christina  writes:
> >> -Original Message-
> >> From: Richard Sandiford 
> >> Sent: Monday, July 7, 2025 12:55 PM
> >> To: Kyrylo Tkachov 
> >> Cc: GCC Patches ; Richard Earnshaw
> >> ; Alex Coplan ; Andrew
> >> Pinski 
> >> Subject: Re: [PATCH 3/7] aarch64: Handle DImode BCAX operations
> >>
> >> Richard Sandiford  writes:
> >> > Kyrylo Tkachov  writes:
> >> >> Hi all,
> >> >>
> >> >> To handle DImode BCAX operations we want to do them on the SIMD side
> only
> >> if
> >> >> the incoming arguments don't require a cross-bank move.
> >> >> This means we need to split back the combination to separate GP BIC+EOR
> >> >> instructions if the operands are expected to be in GP regs through 
> >> >> reload.
> >> >> The split happens pre-reload if we already know that the destination 
> >> >> will be
> >> >> a GP reg. Otherwise if reload descides to use the "=r,r" alternative we 
> >> >> ensure
> >> >> operand 0 is early-clobber.
> >> >> This scheme is similar to how we handle the BSL operations elsewhere in
> >> >> aarch64-simd.md.
> >> >>
> >> >> Thus, for the functions:
> >> >> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX 
> >> >> (a, b,
> c); }
> >> >> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return 
> >> >> BCAX
> (a,
> >> b, c); }
> >> >>
> >> >> we now generate the desired:
> >> >> bcax_d_gp:
> >> >> bic x1, x1, x2
> >> >> eor x0, x1, x0
> >> >> ret
> >> >>
> >> >> bcax_d:
> >> >> bcax v0.16b, v0.16b, v1.16b, v2.16b
> >> >> ret
> >> >>
> >> >> When the inputs are in SIMD regs we use BCAX and when they are in GP 
> >> >> regs
> we
> >> >> don't force them to SIMD with extra moves.
> >> >>
> >> >> Bootstrapped and tested on aarch64-none-linux-gnu.
> >> >> Ok for trunk?
> >> >> Thanks,
> >> >> Kyrill
> >> >>
> >> >> Signed-off-by: Kyrylo Tkachov 
> >> >>
> >> >> gcc/
> >> >>
> >> >> * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
> >> >> define_insn_and_split.
> >> >>
> >> >> gcc/testsuite/
> >> >>
> >> >> * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode 
> >> >> arguments.
> >> >>
> >> >> From 95268cff1261a7724190dd291f9fcb5a7c817917 Mon Sep 17
> 00:00:00
> >> 2001
> >> >> From: Kyrylo Tkachov 
> >> >> Date: Thu, 3 Jul 2025 09:45:02 -0700
> >> >> Subject: [PATCH 3/7] aarch64: Handle DImode BCAX operations
> >> >>
> >> >> To handle DImode BCAX operations we want to do them on the SIMD side
> only
> >> if
> >> >> the incoming arguments don't require a cross-bank move.
> >> >> This means we need to split back the combination to separate GP BIC+EOR
> >> >> instructions if the operands are expected to be in GP regs through 
> >> >> reload.
> >> >> The split happens pre-reload if we already know that the destination 
> >> >> will be
> >> >> a GP reg.  Otherwise if reload descides to use the "=r,r" alternative 
> >> >> we ensure
> >> >> operand 0 is early-clobber.
> >> >> This scheme is similar to how we handle the BSL operations elsewhere in
> >> >> aarch64-simd.md.
> >> >>
> >> >> Thus, for the functions:
> >> >> uint64_t bcax_d_gp (uint64_t a, uint64_t b, uint64_t c) { return BCAX 
> >> >> (a, b,
> c); }
> >> >> uint64x1_t bcax_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return 
> >> >> BCAX
> (a,
> >> b, c); }
> >> >>
> >> >> we now generate the desired:
> >> >> bcax_d_gp:
> >> >> bic x1, x1, x2
> >> >> eor x0, x1, x0
> >> >> ret
> >> >>
> >> >> bcax_d:
> >> >> bcaxv0.16b, v0.16b, v1.16b, v2.16b
> >> >> ret
> >> >>
> >> >> When the inputs are in SIMD regs we use BCAX and when they are in GP 
> >> >> regs
> we
> >> >> don't force them to SIMD with extra moves.
> >> >>
> >> >> Bootstrapped and tested on aarch64-none-linux-gnu.
> >> >>
> >> >> Signed-off-by: Kyrylo Tkachov 
> >> >>
> >> >> gcc/
> >> >>
> >> >> * config/aarch64/aarch64-simd.md (*bcaxqdi4): New
> >> >> define_insn_and_split.
> >> >>
> >> >> gcc/testsuite/
> >> >>
> >> >> * gcc.target/aarch64/simd/bcax_d.c: Add tests for DImode 
> >> >> arguments.
> >> >> ---
> >> >>  gcc/config/aarch64/aarch64-simd.md| 29 +++
> >> >>  .../gcc.target/aarch64/simd/bcax_d.c  |  6 +++-
> >> >>  2 files changed, 34 insertions(+), 1 deletion(-)
> >> >>
> >> >> diff --git a/gcc/config/aarch64/aarch64-simd.md
> >> b/gcc/config/aarch64/aarch64-simd.md
> >> >> index 4493e55603d..be6a16b4be8 100644
> >> >> --- a/gcc/config/aarch64/aarch64-simd.md
> >> >> +++ b/gcc/config/aarch64/aarch64-simd.md
> >> >> @@ -9252,6 +9252,35 @@
> >> >>[(set_attr "type" "crypto_sha3")]
> >> >>  )
> >> >>
> >> >> +(define_insn_and_split "*bcaxqdi4"
> >> >> +  [(set (match_operand:DI 0 "register_oper

Re: [PATCH v2] libstdc++: Search for tzdata on Windows (msys)

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 11:31, Björn Schäpers  wrote:
>
> Am 08.07.2025 um 12:01 schrieb Jonathan Wakely:
> > On Mon, 7 Jul 2025 at 23:53, Björn Schäpers  wrote:
> >>
> >> From: Björn Schäpers 
> >>
> >> Windows does not provide a tzdata.zi, but msys does. Use this, if
> >> available, instead of the embedded (and possibly outdated) database.
> >>
> >> libstdc++-v3/Changelog:
> >>
> >>  Use msys provided time zone information.
> >>
> >>  * src/c++20/tzdb.cc (zoneinfo_file): On Windows look relative
> >>  from the DLL path for the time zone information.
> >>
> >> Signed-off-by: Björn Schäpers 
> >> ---
> >>   libstdc++-v3/src/c++20/tzdb.cc | 34 ++
> >>   1 file changed, 34 insertions(+)
> >>
> >> diff --git a/libstdc++-v3/src/c++20/tzdb.cc 
> >> b/libstdc++-v3/src/c++20/tzdb.cc
> >> index 6e244dc656d..9923d14b7a7 100644
> >> --- a/libstdc++-v3/src/c++20/tzdb.cc
> >> +++ b/libstdc++-v3/src/c++20/tzdb.cc
> >> @@ -44,6 +44,12 @@
> >>   # include// getenv
> >>   #endif
> >>
> >> +#if _GLIBCXX_HAVE_WINDOWS_H
> >> +# define WIN32_LEAN_AND_MEAN
> >> +# include 
> >> +# include 
> >> +#endif
> >> +
> >>   #if defined __GTHREADS && ATOMIC_POINTER_LOCK_FREE == 2
> >>   # define USE_ATOMIC_LIST_HEAD 1
> >>   // TODO benchmark atomic> vs mutex.
> >> @@ -1144,6 +1150,34 @@ namespace std::chrono
> >>   #ifdef _GLIBCXX_ZONEINFO_DIR
> >> else
> >>  path = _GLIBCXX_ZONEINFO_DIR;
> >> +#endif
> >> +#ifdef _GLIBCXX_HAVE_WINDOWS_H
> >> +  if (path.empty())
> >> +   {
> >> + HMODULE dll_module;
> >> + if (GetModuleHandleExA(
> >> + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
> >> + | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
> >> + reinterpret_cast(&zoneinfo_file), 
> >> &dll_module))
> >
> > Does this assume that libstdc++.dll is installed as part of the msys
> > installation?
> >
> > What if users build GCC themselves and install it elsewhere?
> >
> > Is the correct solution for msys-based systems to build GCC with
> > --with-libstdcxx-zoneinfo=/path/to/msys/tzdata.zi ?
> >
>
> Yes the assumption is, that libstdc++-6.dll is installed in
> $msysPath/$msysSubsystem/bin and the time zones are in ../share/zoneinfo.
>
> If supplied with --with-libstdcxx-zoneinfo=/path/to/msys/tzdata.zi path
> shouldn't be empty, but containing "/path/to/msys/tzdata.zi" and nothing 
> changes
> with my patch.
>
> The problem for msys packages is, that $msysPath is unknown, since the user 
> can
> install msys anywhere. I tried using a long string constant and changing that 
> in
> the DLL after installation, so it would remain a constant. But the string
> constant size is used to build the std::string and "/tzdata.zi" is appended to
> that. So I switched to use a runtime approach.

Makes sense. So the configure option might work, but if it doesn't,
the runtime approach might work (if libstdc++-6.dll and tzdata.zi have
the expected relative layout), and if not, we use the static copy of
tzdata.zi in the library.

Thanks, this still seems like a net improvement, even if it's not 100%
guaranteed to work.


>
> >
> >> +   {
> >> + char dll_path[MAX_PATH];
> >> + if (GetModuleFileNameA(dll_module, dll_path, MAX_PATH) != 0)
> >> +   {
> >> + string_view dll_path_view = dll_path;
> >> + auto pos = dll_path_view.find_last_of('\\');
> >> + dll_path_view = dll_path_view.substr(0, pos);
> >> + if (dll_path_view.ends_with("\\bin"))
> >> +   {
> >> + constexpr string_view remaining_path = 
> >> "share\\zoneinfo";
> >> + dll_path_view.remove_suffix(3); // Remove bin
> >> + path.reserve(dll_path_view.size()
> >> +  + remaining_path.size());
> >> + path = dll_path_view;
> >> + path += remaining_path;
> >> +   }
> >> +   }
> >> +   }
> >> +   }
> >>   #endif
> >> if (!path.empty())
> >>  path.append(filename);
> >> --
> >> 2.50.0
> >>
> >
>

Re: [PATCH] testsuite: i386: Fix gcc.target/i386/memcpy-pr120683-1.c etc. on Solaris/x86

2025-07-08 Thread H.J. Lu

On Tue, Jul 8, 2025 at 6:26 PM Rainer Orth  
wrote:
>
> The new tests from
>
> commit 401199377c50045ede560daf3f6e8b51749c2a87
> Author: H.J. Lu 
> Date:   Tue Jun 17 10:17:17 2025 +0800
>
> x86: Improve vector_loop/unrolled_loop for memset/memcpy
>
> FAIL on 64-bit Solaris/x86:
>
> FAIL: gcc.target/i386/memcpy-pr120683-1.c check-function-bodies foo
> FAIL: gcc.target/i386/memcpy-pr120683-2.c check-function-bodies foo
> FAIL: gcc.target/i386/memcpy-pr120683-3.c check-function-bodies foo
> FAIL: gcc.target/i386/memcpy-pr120683-4.c check-function-bodies foo
> FAIL: gcc.target/i386/memcpy-pr120683-5.c check-function-bodies foo
> FAIL: gcc.target/i386/memcpy-pr120683-6.c check-function-bodies foo
> FAIL: gcc.target/i386/memcpy-pr120683-7.c check-function-bodies foo
> FAIL: gcc.target/i386/memcpy-strategy-12.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-1.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-10.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-11.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-12.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-13.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-14.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-15.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-16.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-17.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-18.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-19.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-2.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-20.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-21.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-22.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-23.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-3.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-4.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-5.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-6.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-7.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-8.c check-function-bodies foo
> FAIL: gcc.target/i386/memset-pr120683-9.c check-function-bodies foo
>
> Like several times before, they need to be compiled with
> -fasynchronous-unwind-tables -fdwarf2-cfi-asm.
>
> Tested on i386-pc-solaris2.11 and x86_64-pc-linux-gnu.
>
> Ok for trunk?

OK.

Thanks.

> I suspect such patches can be considered obvious by now?
>
> Thanks.
> Rainer
>
> --
> -
> Rainer Orth, Center for Biotechnology, Bielefeld University
>
>
> 2025-07-08  Rainer Orth  
>
> gcc/testsuite:
> * gcc.target/i386/memcpy-pr120683-1.c (dg-options): Add
> -fasynchronous-unwind-tables -fdwarf2-cfi-asm.
> * gcc.target/i386/memcpy-pr120683-2.c: Likewise.
> * gcc.target/i386/memcpy-pr120683-3.c: Likewise.
> * gcc.target/i386/memcpy-pr120683-4.c: Likewise.
> * gcc.target/i386/memcpy-pr120683-5.c: Likewise.
> * gcc.target/i386/memcpy-pr120683-6.c: Likewise.
> * gcc.target/i386/memcpy-pr120683-7.c: Likewise.
> * gcc.target/i386/memcpy-strategy-12.c: Likewise.
> * gcc.target/i386/memset-pr120683-1.c: Likewise.
> * gcc.target/i386/memset-pr120683-10.c: Likewise.
> * gcc.target/i386/memset-pr120683-11.c: Likewise.
> * gcc.target/i386/memset-pr120683-12.c: Likewise.
> * gcc.target/i386/memset-pr120683-13.c: Likewise.
> * gcc.target/i386/memset-pr120683-14.c: Likewise.
> * gcc.target/i386/memset-pr120683-15.c: Likewise.
> * gcc.target/i386/memset-pr120683-16.c: Likewise.
> * gcc.target/i386/memset-pr120683-17.c: Likewise.
> * gcc.target/i386/memset-pr120683-18.c: Likewise.
> * gcc.target/i386/memset-pr120683-19.c: Likewise.
> * gcc.target/i386/memset-pr120683-2.c: Likewise.
> * gcc.target/i386/memset-pr120683-20.c: Likewise.
> * gcc.target/i386/memset-pr120683-21.c: Likewise.
> * gcc.target/i386/memset-pr120683-22.c: Likewise.
> * gcc.target/i386/memset-pr120683-23.c: Likewise.
> * gcc.target/i386/memset-pr120683-3.c: Likewise.
> * gcc.target/i386/memset-pr120683-4.c: Likewise.
> * gcc.target/i386/memset-pr120683-5.c: Likewise.
> * gcc.target/i386/memset-pr120683-6.c: Likewise.
> * gcc.target/i386/memset-pr120683-7.c: Likewise.
> * gcc.target/i386/memset-pr120683-8.c: Likewise.
> * gcc.target/i386/memset-pr120683-9.c: Likewise.
>


-- 
H.J.

Re: [PATCH v2 2/2] libstdc++: Better CTAD for span and mdspan [PR120914].

2025-07-08 Thread Tomasz Kaminski

On Tue, Jul 8, 2025 at 12:08 PM Jonathan Wakely  wrote:

> On Tue, 8 Jul 2025 at 10:54, Luc Grosheintz 
> wrote:
> >
> > This implements P3029R1. In P3029R1, the CTAD for span is refined to
> > permit deducing the extent of the span from an integral constant, e.g.
> >
> >   span((T*) ptr, integral_constant{});
> >
> > is deduced as span. Similarly, in
> >
> >   auto exts = extents(integral_constant);
> >   auto md = mdspan((T*) ptr, integral_constant);
> >
> > exts and md have types extents and mdspan > extents>, respectively.
> >
> > PR libstdc++/120914
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/std/span (span): Update CTAD to enable
> > integral constants [P3029R1].
> > * include/std/mdspan (extents): ditto.
> > (mdspan): ditto.
> > * testsuite/23_containers/span/deduction.cc: Test deduction
> > guide.
> > * testsuite/23_containers/mdspan/extents/misc.cc: ditto.
> > * testsuite/23_containers/mdspan/mdspan.cc: ditto.
> >
> > Signed-off-by: Luc Grosheintz 
> > ---
> >  libstdc++-v3/include/std/mdspan   |  8 ++
> >  libstdc++-v3/include/std/span | 20 ++-
> >  .../23_containers/mdspan/extents/misc.cc  | 20 +++
> >  .../testsuite/23_containers/mdspan/mdspan.cc  | 25 +++
> >  .../testsuite/23_containers/span/deduction.cc |  3 +++
> >  5 files changed, 69 insertions(+), 7 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> > index 0fd78570b3e..152dcb3e92a 100644
> > --- a/libstdc++-v3/include/std/mdspan
> > +++ b/libstdc++-v3/include/std/mdspan
> > @@ -406,10 +406,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  template
> >auto __build_dextents_type(integer_sequence)
> > -> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
> > -
> > -template
> > -  consteval size_t
> > -  __dynamic_extent() { return dynamic_extent; }
> >}
> >
> >template
> > @@ -419,7 +415,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >template
> >  requires (is_convertible_v<_Integrals, size_t> && ...)
> >  explicit extents(_Integrals...) ->
> > -  extents()...>;
> > +  extents...>;
> >
> >struct layout_left
> >{
> > @@ -1316,7 +1312,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >   && (sizeof...(_Integrals) > 0)
> >  explicit mdspan(_ElementType*, _Integrals...)
> >  -> mdspan<_ElementType,
> > - extents __mdspan::__dynamic_extent<_Integrals>()...>>;
> > + extents __detail::__maybe_static_ext<_Integrals>...>>;
> >
> >template
> >  mdspan(_ElementType*, span<_OIndexType, _Nm>)
> > diff --git a/libstdc++-v3/include/std/span
> b/libstdc++-v3/include/std/span
> > index 49ab9109d83..5629a71b9bd 100644
> > --- a/libstdc++-v3/include/std/span
> > +++ b/libstdc++-v3/include/std/span
> > @@ -476,6 +476,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >  };
> >
> >// deduction guides
> > +  namespace __detail
> > +  {
> > +template
> > +  concept __integral_constant_like =
> is_integral_v
> > +   && !is_same_v>
> > +   && convertible_to<_Tp, decltype(_Tp::value)>
> > +   && equality_comparable_with<_Tp, decltype(_Tp::value)>
> > +   && bool_constant<_Tp() == _Tp::value>::value
> > +   && bool_constant(_Tp()) ==
> _Tp::value>
> > +::value;
> > +
> > +template
> > +  constexpr size_t __maybe_static_ext = dynamic_extent;
> > +
> > +template<__integral_constant_like _Tp>
> > +  constexpr size_t __maybe_static_ext<_Tp> = {_Tp::value};
>
> Are the braces here to detect narrowing conversions?
>
> (The paper doesn't mention why they're used, as far as I can see)
>
The information is in revision history (a bit hidden):
Initialized the *maybe-static-ext*'s specialization with {T::value} to
prevent conversions from negative values.

>
>
> > +  }
> >
> >template
> >  span(_Type(&)[_ArrayExtent]) -> span<_Type, _ArrayExtent>;
> > @@ -489,7 +506,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >
> >template
> >  span(_Iter, _End)
> > -  -> span>>;
> > +  -> span>,
> > +__detail::__maybe_static_ext<_End>>;
> >
> >template
> >  span(_Range &&)
> > diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
> > index e71fdc54230..bca8901685d 100644
> > --- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
> > +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
> > @@ -97,6 +97,25 @@ test_deduction(Extents... exts)
> >VERIFY(e == expected);
> >  }
> >
> > +constexpr bool
> > +test_integral_constant_deduction()
> > +{
> > +  auto verify = [](auto actual, auto expected)
> > +{
> > +  static_assert(std::same_as);
> > +  VERIFY(actual == expected);
> > +};
> > +
> > +  constexpr auto c1 = std::integral_constant{};
> > +  constexpr

[PATCH] x86: Keep non all 0s/1s redundant vector loads on AMD znverN

2025-07-08 Thread H.J. Lu

aba3b9d3a48a0703fd565f7c5f0caf604f59970b is the first bad commit
commit aba3b9d3a48a0703fd565f7c5f0caf604f59970b
Author: H.J. Lu 
Date:   Fri May 9 07:17:07 2025 +0800

x86: Extend the remove_redundant_vector pass

which removed non all 0s/1s redundant vector loads, caused SPEC CPU 2017
519.lbm_r and 470.lbm performance regressions on AMD znverN processors.
Add a tuning option to keep non all 0s/1s redundant vector loads on AMD
znverN processors.

gcc/

PR target/120941
* config/i386/i386-features.cc (ix86_broadcast_inner): Keep
non all 0s/1s redundant vector loads if asked.
* config/i386/x86-tune.def (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD):
New tuning.

gcc/testsuite/

PR target/120941
* gcc.target/i386/pr120941-1a.c: New test.
* gcc.target/i386/pr120941-1b.c: Likewise.
* gcc.target/i386/pr120941-1c.c: Likewise.
* gcc.target/i386/pr120941-1d.c: Likewise.

OK for master?

Thanks.

-- 
H.J.
From 27ca9842b54b9e3f585e23abe5fa6b21aa043c73 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Sat, 5 Jul 2025 04:12:47 +0800
Subject: [PATCH] x86: Keep non all 0s/1s redundant vector loads on AMD znverN

aba3b9d3a48a0703fd565f7c5f0caf604f59970b is the first bad commit
commit aba3b9d3a48a0703fd565f7c5f0caf604f59970b
Author: H.J. Lu 
Date:   Fri May 9 07:17:07 2025 +0800

x86: Extend the remove_redundant_vector pass

which removed non all 0s/1s redundant vector loads, caused SPEC CPU 2017
519.lbm_r and 470.lbm performance regressions on AMD znverN processors.
Add a tuning option to keep non all 0s/1s redundant vector loads on AMD
znverN processors.

gcc/

	PR target/120941
	* config/i386/i386-features.cc (ix86_broadcast_inner): Keep
	non all 0s/1s redundant vector loads if asked.
	* config/i386/x86-tune.def (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD):
	New tuning.

gcc/testsuite/

	PR target/120941
	* gcc.target/i386/pr120941-1a.c: New test.
	* gcc.target/i386/pr120941-1b.c: Likewise.
	* gcc.target/i386/pr120941-1c.c: Likewise.
	* gcc.target/i386/pr120941-1d.c: Likewise.

Signed-off-by: H.J. Lu 
---
 gcc/config/i386/i386-features.cc|  4 
 gcc/config/i386/x86-tune.def|  4 
 gcc/testsuite/gcc.target/i386/pr120941-1a.c | 19 +++
 gcc/testsuite/gcc.target/i386/pr120941-1b.c |  5 +
 gcc/testsuite/gcc.target/i386/pr120941-1c.c |  5 +
 gcc/testsuite/gcc.target/i386/pr120941-1d.c |  5 +
 6 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr120941-1d.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 054f8d5ddc8..574eaf2e4d2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3552,6 +3552,10 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
   return constm1_rtx;
 }
 
+  /* Skip if non all 0s/1s redundant vector loads should be kept.  */
+  if (ix86_tune_features[X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD])
+return nullptr;
+
   mode = GET_MODE (op);
   int nunits = GET_MODE_NUNITS (mode);
   if (nunits < 2)
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 91cdca7fbfc..d0b1da007f9 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -639,6 +639,10 @@ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces",
 DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues",
 	  m_ZNVER4 | m_ZNVER5)
 
+/* X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD: Keep redundant vector loads.  */
+DEF_TUNE (X86_TUNE_KEEP_REDUNDANT_VECTOR_LOAD, "keep_redundant_vector_load",
+	  m_ZNVER)
+
 /*/
 /*/
 /* Historical relics: tuning flags that helps a specific old CPU designs */
diff --git a/gcc/testsuite/gcc.target/i386/pr120941-1a.c b/gcc/testsuite/gcc.target/i386/pr120941-1a.c
new file mode 100644
index 000..daced44b4b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120941-1a.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2 -mtune=znver5" } */
+/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */
+
+#include 
+
+extern __m512i sinkz;
+extern __m256i sinky;
+extern char f;
+
+void
+foo(char c, int x)
+{
+  c += f;
+  sinkz = _mm512_set1_epi8(c);
+  if (x == 2)
+f += 3;
+  sinky = _mm256_set1_epi8(c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr120941-1b.c b/gcc/testsuite/gcc.target/i386/pr120941-1b.c
new file mode 100644
index 000..a00ba5eb8ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120941-1b.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2 -mtune=znver4" } */
+/* { dg-final { scan-assembler-times "vpbroadcastb" 2 } } */
+
+#include "pr120941-1a.c"
diff --git a/gc

Re: [Fortran, Patch, PR120711, v1] 1/(3) Fix out of bounds access in cleanup of array constructor

2025-07-08 Thread Andre Vehreschild

HI Harald, hi Mikael,

why shall the testcase be invalid? The `list` is empty. Concatenating with it
is valid in Fortran like in most other programming languages. The mapping of an
empty list in gfortran is to have the array's data pointer set to NULL and the
ubound below the lbound. Current gfortran copes fine with this, so does Intel's
Fortran compiler and flang (all executables tested with valgrind).

In F2018 §7.8 Paragraph 8 it states: "An empty sequence forms a zero-sized
array", This is the closest it gets to an unallocated array, but it does not
exclude nor include it.

Maybe someone with access to other fortran compilers can test this, too. But I
see all compilers available to me be fine with an unallocated array in the
array constructor. But I am doing to much wrong at the moment, so don't value
my words too much.

Regards,
Andre

On Tue, 8 Jul 2025 22:17:23 +0200
Harald Anlauf  wrote:

> Am 05.07.25 um 14:55 schrieb Mikael Morin:
> > Hello Andre,
> > 
> > I get a regression on this testcase with a patch that is otherwise 
> > regression-free.
> > I think the testcase is invalid.
> > It does:
> >   
> >>     type(container), allocatable :: list(:)
> >>
> >>     list = [list, new_elem(5)]  
> > 
> > so it's using the variable 'list' unallocated.
> > The original testcase in the PR had an extra zero-sized allocate 
> > statement before the assignment.
> > I think it's the only missing bit.
> >   
> 
> Mikael,
> 
> I believe you are right: the testcase is technically invalid
> without an
> 
>allocate(list(0))
> 
> as in the original testcase in the PR.
> 
> A corresponding fix is approved.
> 
> Thanks,
> Harald
> 

-- 
Andre Vehreschild * Email: vehre ad gmx dot de

Re: [Fortran, Patch, PR120711, v1] 1/(3) Fix out of bounds access in cleanup of array constructor

2025-07-08 Thread Harald Anlauf


Am 09.07.25 um 08:50 schrieb Andre Vehreschild:

HI Harald, hi Mikael,

why shall the testcase be invalid? The `list` is empty. Concatenating with it
is valid in Fortran like in most other programming languages. The mapping of an
empty list in gfortran is to have the array's data pointer set to NULL and the
ubound below the lbound. Current gfortran copes fine with this, so does Intel's
Fortran compiler and flang (all executables tested with valgrind).

In F2018 §7.8 Paragraph 8 it states: "An empty sequence forms a zero-sized
array", This is the closest it gets to an unallocated array, but it does not
exclude nor include it.

Maybe someone with access to other fortran compilers can test this, too. But I
see all compilers available to me be fine with an unallocated array in the
array constructor. But I am doing to much wrong at the moment, so don't value
my words too much.


NAG:

Runtime Error: array_constructor_1.f90, line 12: ALLOCATABLE LIST is not 
currently allocated

Program terminated by fatal error
Aborted



Regards,
Andre

On Tue, 8 Jul 2025 22:17:23 +0200
Harald Anlauf  wrote:


Am 05.07.25 um 14:55 schrieb Mikael Morin:

Hello Andre,

I get a regression on this testcase with a patch that is otherwise
regression-free.
I think the testcase is invalid.
It does:
   

     type(container), allocatable :: list(:)

     list = [list, new_elem(5)]


so it's using the variable 'list' unallocated.
The original testcase in the PR had an extra zero-sized allocate
statement before the assignment.
I think it's the only missing bit.
   


Mikael,

I believe you are right: the testcase is technically invalid
without an

allocate(list(0))

as in the original testcase in the PR.

A corresponding fix is approved.

Thanks,
Harald

Re: [PATCH v2 2/2] libstdc++: Better CTAD for span and mdspan [PR120914].

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 11:46, Tomasz Kaminski  wrote:
>
>
>
> On Tue, Jul 8, 2025 at 12:08 PM Jonathan Wakely  wrote:
>>
>> On Tue, 8 Jul 2025 at 10:54, Luc Grosheintz  wrote:
>> >
>> > This implements P3029R1. In P3029R1, the CTAD for span is refined to
>> > permit deducing the extent of the span from an integral constant, e.g.
>> >
>> >   span((T*) ptr, integral_constant{});
>> >
>> > is deduced as span. Similarly, in
>> >
>> >   auto exts = extents(integral_constant);
>> >   auto md = mdspan((T*) ptr, integral_constant);
>> >
>> > exts and md have types extents and mdspan> > extents>, respectively.
>> >
>> > PR libstdc++/120914
>> >
>> > libstdc++-v3/ChangeLog:
>> >
>> > * include/std/span (span): Update CTAD to enable
>> > integral constants [P3029R1].
>> > * include/std/mdspan (extents): ditto.
>> > (mdspan): ditto.
>> > * testsuite/23_containers/span/deduction.cc: Test deduction
>> > guide.
>> > * testsuite/23_containers/mdspan/extents/misc.cc: ditto.
>> > * testsuite/23_containers/mdspan/mdspan.cc: ditto.
>> >
>> > Signed-off-by: Luc Grosheintz 
>> > ---
>> >  libstdc++-v3/include/std/mdspan   |  8 ++
>> >  libstdc++-v3/include/std/span | 20 ++-
>> >  .../23_containers/mdspan/extents/misc.cc  | 20 +++
>> >  .../testsuite/23_containers/mdspan/mdspan.cc  | 25 +++
>> >  .../testsuite/23_containers/span/deduction.cc |  3 +++
>> >  5 files changed, 69 insertions(+), 7 deletions(-)
>> >
>> > diff --git a/libstdc++-v3/include/std/mdspan 
>> > b/libstdc++-v3/include/std/mdspan
>> > index 0fd78570b3e..152dcb3e92a 100644
>> > --- a/libstdc++-v3/include/std/mdspan
>> > +++ b/libstdc++-v3/include/std/mdspan
>> > @@ -406,10 +406,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >  template
>> >auto __build_dextents_type(integer_sequence)
>> > -> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
>> > -
>> > -template
>> > -  consteval size_t
>> > -  __dynamic_extent() { return dynamic_extent; }
>> >}
>> >
>> >template
>> > @@ -419,7 +415,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >template
>> >  requires (is_convertible_v<_Integrals, size_t> && ...)
>> >  explicit extents(_Integrals...) ->
>> > -  extents()...>;
>> > +  extents...>;
>> >
>> >struct layout_left
>> >{
>> > @@ -1316,7 +1312,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >   && (sizeof...(_Integrals) > 0)
>> >  explicit mdspan(_ElementType*, _Integrals...)
>> >  -> mdspan<_ElementType,
>> > - extents> > __mdspan::__dynamic_extent<_Integrals>()...>>;
>> > + extents> > __detail::__maybe_static_ext<_Integrals>...>>;
>> >
>> >template
>> >  mdspan(_ElementType*, span<_OIndexType, _Nm>)
>> > diff --git a/libstdc++-v3/include/std/span b/libstdc++-v3/include/std/span
>> > index 49ab9109d83..5629a71b9bd 100644
>> > --- a/libstdc++-v3/include/std/span
>> > +++ b/libstdc++-v3/include/std/span
>> > @@ -476,6 +476,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >  };
>> >
>> >// deduction guides
>> > +  namespace __detail
>> > +  {
>> > +template
>> > +  concept __integral_constant_like = 
>> > is_integral_v
>> > +   && !is_same_v>
>> > +   && convertible_to<_Tp, decltype(_Tp::value)>
>> > +   && equality_comparable_with<_Tp, decltype(_Tp::value)>
>> > +   && bool_constant<_Tp() == _Tp::value>::value
>> > +   && bool_constant(_Tp()) == 
>> > _Tp::value>
>> > +::value;
>> > +
>> > +template
>> > +  constexpr size_t __maybe_static_ext = dynamic_extent;
>> > +
>> > +template<__integral_constant_like _Tp>
>> > +  constexpr size_t __maybe_static_ext<_Tp> = {_Tp::value};
>>
>> Are the braces here to detect narrowing conversions?
>>
>> (The paper doesn't mention why they're used, as far as I can see)
>
> The information is in revision history (a bit hidden):
> Initialized the maybe-static-ext's specialization with {T::value} to prevent 
> conversions from negative values.

Thanks. OK for trunk then.


>>
>>
>>
>> > +  }
>> >
>> >template
>> >  span(_Type(&)[_ArrayExtent]) -> span<_Type, _ArrayExtent>;
>> > @@ -489,7 +506,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >
>> >template
>> >  span(_Iter, _End)
>> > -  -> span>>;
>> > +  -> span>,
>> > +__detail::__maybe_static_ext<_End>>;
>> >
>> >template
>> >  span(_Range &&)
>> > diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc 
>> > b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
>> > index e71fdc54230..bca8901685d 100644
>> > --- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
>> > +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
>> > @@ -97,6 +97,25 @@ test_deduction(Extents... exts)
>> >VERIFY(e == expected);
>> >  }
>> >
>> > +constexpr bool
>> > +test_integral_constant_deducti

Re: [PATCH v2 1/2] libstdc++: Silence a warning in a test for span.

2025-07-08 Thread Jonathan Wakely

OK

On Tue, 8 Jul 2025 at 10:50, Luc Grosheintz  wrote:
>
> In a test of span, there's an unused variable myspan. This
> commit silences the warning.
>
> libstdc++-v3/ChangeLog:
>
> * testsuite/23_containers/span/contiguous_range_neg.cc: Silence
> warning about unused variable myspan.
>
> Signed-off-by: Luc Grosheintz 
> ---
>  .../testsuite/23_containers/span/contiguous_range_neg.cc | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git 
> a/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc 
> b/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc
> index c9e9112ed6d..890fdf8aea0 100644
> --- a/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc
> +++ b/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc
> @@ -25,6 +25,7 @@ main()
>  {
>std::deque d{};
>std::span myspan(d); // { dg-error "no match" }
> +  (void) myspan;
>  }
>
>  // { dg-prune-output "data" }
> --
> 2.49.0
>

Re: [PATCH v4 6/6] libstdc++: Set FMT for complete C++23 mdspan [PR107761].

2025-07-08 Thread Tomasz Kaminski

On Tue, Jul 8, 2025 at 12:26 PM Luc Grosheintz 
wrote:

>
> On 7/8/25 11:32, Jonathan Wakely wrote:
> > On Tue, 8 Jul 2025 at 09:27, Luc Grosheintz 
> wrote:
> >>
> >>  PR libstdc++/107761
> >>
> >> libstdc++-v3/ChangeLog:
> >>
> >>  * include/bits/version.def (mdspan): Set to 202207 and remove
> >>  no_stdname.
> >>  * include/bits/version.h: Regenerate.
> >>  * testsuite/23_containers/mdspan/ftm.cc: Test presence
> >>  of FTM.
> >
> > Please spell this out in full as "feature test macro", there's no need
> > to use an initialism that some people won't recognise. (And the commit
> > summary line says "FMT" not "FTM" anyway ;-)
> >
> > I would also prefer the test to be called version.cc not ftm.cc as
> > that's what we use elsewhere.
> >
> > I think we can do that when pushing the commit though, we don't need
> > another patch for it.
> >
> > Thanks for getting  done! Great work.
>
Same for me here: Thank you! This was a really huge amount of work.

>
> I never thought of it that way: spell it out to prevent
> typos.
>
> Thank you for fixing up the mistake when committing the
> patches. It's been a nice experience contributing to
> libstdc++! Thank you for the patience and thorough &
> friendly reviews.
>
Glad to hear that.

>
> I'll continue with the C++26 parts: padded layouts, sub-
> mdspan, etc.
>
Sounds good. Alternatively, looking into optimizing layouts before getting
submdspan also sounds reasonable. For submdspan we should also look into:
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p3663r1.html
>
>
> >
> >>
> >> Signed-off-by: Luc Grosheintz 
> >> ---
> >>   libstdc++-v3/include/bits/version.def  | 3 +--
> >>   libstdc++-v3/include/bits/version.h| 3 ++-
> >>   libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc | 9 +
> >>   3 files changed, 12 insertions(+), 3 deletions(-)
> >>   create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> >>
> >> diff --git a/libstdc++-v3/include/bits/version.def
> b/libstdc++-v3/include/bits/version.def
> >> index 64f8190d240..f1015abdbfa 100644
> >> --- a/libstdc++-v3/include/bits/version.def
> >> +++ b/libstdc++-v3/include/bits/version.def
> >> @@ -1007,9 +1007,8 @@ ftms = {
> >>
> >>   ftms = {
> >> name = mdspan;
> >> -  no_stdname = true; // FIXME: remove
> >> values = {
> >> -v = 1; // FIXME: 202207
> >> +v = 202207;
> >>   cxxmin = 23;
> >> };
> >>   };
> >> diff --git a/libstdc++-v3/include/bits/version.h
> b/libstdc++-v3/include/bits/version.h
> >> index 744246a9938..80f6586372d 100644
> >> --- a/libstdc++-v3/include/bits/version.h
> >> +++ b/libstdc++-v3/include/bits/version.h
> >> @@ -1126,8 +1126,9 @@
> >>
> >>   #if !defined(__cpp_lib_mdspan)
> >>   # if (__cplusplus >= 202100L)
> >> -#  define __glibcxx_mdspan 1L
> >> +#  define __glibcxx_mdspan 202207L
> >>   #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
> >> +#   define __cpp_lib_mdspan 202207L
> >>   #  endif
> >>   # endif
> >>   #endif /* !defined(__cpp_lib_mdspan) &&
> defined(__glibcxx_want_mdspan) */
> >> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> >> new file mode 100644
> >> index 000..106ee4010ee
> >> --- /dev/null
> >> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> >> @@ -0,0 +1,9 @@
> >> +// { dg-do compile { target c++23 } }
> >> +#include 
> >> +
> >> +#ifndef __cpp_lib_mdspan
> >> +#error "Feature test macro __cpp_lib_mdspan is missing for "
> >> +#if __cpp_lib_mdspan < 202207
> >> +#error "Feature test macro __cpp_lib_mdspan has the wrong value"
> >> +#endif
> >> +#endif
> >> --
> >> 2.49.0
> >>
> >
>
>

Re: [PATCH v2 2/2] libstdc++: Better CTAD for span and mdspan [PR120914].

2025-07-08 Thread Tomasz Kaminski

Maybe adding a negative test for the error when converting negative values
would be valuable then.
I think we can add it as separate commit later, I will try to get
everything merged today,

On Tue, Jul 8, 2025 at 12:56 PM Jonathan Wakely  wrote:

> On Tue, 8 Jul 2025 at 11:46, Tomasz Kaminski  wrote:
> >
> >
> >
> > On Tue, Jul 8, 2025 at 12:08 PM Jonathan Wakely 
> wrote:
> >>
> >> On Tue, 8 Jul 2025 at 10:54, Luc Grosheintz 
> wrote:
> >> >
> >> > This implements P3029R1. In P3029R1, the CTAD for span is refined to
> >> > permit deducing the extent of the span from an integral constant, e.g.
> >> >
> >> >   span((T*) ptr, integral_constant{});
> >> >
> >> > is deduced as span. Similarly, in
> >> >
> >> >   auto exts = extents(integral_constant);
> >> >   auto md = mdspan((T*) ptr, integral_constant);
> >> >
> >> > exts and md have types extents and mdspan >> > extents>, respectively.
> >> >
> >> > PR libstdc++/120914
> >> >
> >> > libstdc++-v3/ChangeLog:
> >> >
> >> > * include/std/span (span): Update CTAD to enable
> >> > integral constants [P3029R1].
> >> > * include/std/mdspan (extents): ditto.
> >> > (mdspan): ditto.
> >> > * testsuite/23_containers/span/deduction.cc: Test deduction
> >> > guide.
> >> > * testsuite/23_containers/mdspan/extents/misc.cc: ditto.
> >> > * testsuite/23_containers/mdspan/mdspan.cc: ditto.
> >> >
> >> > Signed-off-by: Luc Grosheintz 
> >> > ---
> >> >  libstdc++-v3/include/std/mdspan   |  8 ++
> >> >  libstdc++-v3/include/std/span | 20 ++-
> >> >  .../23_containers/mdspan/extents/misc.cc  | 20 +++
> >> >  .../testsuite/23_containers/mdspan/mdspan.cc  | 25
> +++
> >> >  .../testsuite/23_containers/span/deduction.cc |  3 +++
> >> >  5 files changed, 69 insertions(+), 7 deletions(-)
> >> >
> >> > diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> >> > index 0fd78570b3e..152dcb3e92a 100644
> >> > --- a/libstdc++-v3/include/std/mdspan
> >> > +++ b/libstdc++-v3/include/std/mdspan
> >> > @@ -406,10 +406,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> >  template
> >> >auto __build_dextents_type(integer_sequence _Counts...>)
> >> > -> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
> >> > -
> >> > -template
> >> > -  consteval size_t
> >> > -  __dynamic_extent() { return dynamic_extent; }
> >> >}
> >> >
> >> >template
> >> > @@ -419,7 +415,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> >template
> >> >  requires (is_convertible_v<_Integrals, size_t> && ...)
> >> >  explicit extents(_Integrals...) ->
> >> > -  extents()...>;
> >> > +  extents...>;
> >> >
> >> >struct layout_left
> >> >{
> >> > @@ -1316,7 +1312,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> >   && (sizeof...(_Integrals) > 0)
> >> >  explicit mdspan(_ElementType*, _Integrals...)
> >> >  -> mdspan<_ElementType,
> >> > - extents __mdspan::__dynamic_extent<_Integrals>()...>>;
> >> > + extents __detail::__maybe_static_ext<_Integrals>...>>;
> >> >
> >> >template
> >> >  mdspan(_ElementType*, span<_OIndexType, _Nm>)
> >> > diff --git a/libstdc++-v3/include/std/span
> b/libstdc++-v3/include/std/span
> >> > index 49ab9109d83..5629a71b9bd 100644
> >> > --- a/libstdc++-v3/include/std/span
> >> > +++ b/libstdc++-v3/include/std/span
> >> > @@ -476,6 +476,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> >  };
> >> >
> >> >// deduction guides
> >> > +  namespace __detail
> >> > +  {
> >> > +template
> >> > +  concept __integral_constant_like =
> is_integral_v
> >> > +   && !is_same_v>
> >> > +   && convertible_to<_Tp, decltype(_Tp::value)>
> >> > +   && equality_comparable_with<_Tp, decltype(_Tp::value)>
> >> > +   && bool_constant<_Tp() == _Tp::value>::value
> >> > +   && bool_constant(_Tp()) ==
> _Tp::value>
> >> > +::value;
> >> > +
> >> > +template
> >> > +  constexpr size_t __maybe_static_ext = dynamic_extent;
> >> > +
> >> > +template<__integral_constant_like _Tp>
> >> > +  constexpr size_t __maybe_static_ext<_Tp> = {_Tp::value};
> >>
> >> Are the braces here to detect narrowing conversions?
> >>
> >> (The paper doesn't mention why they're used, as far as I can see)
> >
> > The information is in revision history (a bit hidden):
> > Initialized the maybe-static-ext's specialization with {T::value} to
> prevent conversions from negative values.
>
> Thanks. OK for trunk then.
>
>
> >>
> >>
> >>
> >> > +  }
> >> >
> >> >template
> >> >  span(_Type(&)[_ArrayExtent]) -> span<_Type, _ArrayExtent>;
> >> > @@ -489,7 +506,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> >
> >> >template
> >> >  span(_Iter, _End)
> >> > -  -> span>>;
> >> > +  -> span>,
> >> > +__detail::__maybe_static_ext<_End>>;
> >> >
> >> >template
> >> >  sp

[PATCH 2/2]middle-end: Use rounding division for ranges for partial vectors [PR120922]

2025-07-08 Thread Tamar Christina

This patch adds support for niters ranges for partial
vector loops.

Due to the last iteration being partial the bounds should
be at least 1 but niters // vf as the max.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Tested testcase on riscv64-unknown-linux-gnu. Should use
the same testcase as patch 1.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/120922
* tree-vect-loop-manip.cc (vect_gen_vector_loop_niters): Support range
for partial vectors.

---
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 
9c58a6a01d7c9c3d97a2f8595b1ea6cff2d653f6..2d01a4b0ed1c8431510ad5e6e2175dbe89371618
 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2857,11 +2857,21 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, 
tree niters,
 we set range information to make niters analyzer's life easier.
 Note the number of latch iteration value can be TYPE_MAX_VALUE so
 we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf.  */
-  if (stmts != NULL
- && const_vf > 0
- && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+  if (stmts != NULL && const_vf > 0)
{
- if (niters_no_overflow)
+ if (niters_no_overflow
+ && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+   {
+ int_range<1> vr (type,
+  wi::one (TYPE_PRECISION (type)),
+  wi::div_ceil (wi::max_value
+   (TYPE_PRECISION (type),
+TYPE_SIGN (type)),
+const_vf,
+TYPE_SIGN (type)));
+ set_range_info (niters_vector, vr);
+   }
+ else if (niters_no_overflow)
{
  int_range<1> vr (type,
   wi::one (TYPE_PRECISION (type)),


-- 
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 9c58a6a01d7c9c3d97a2f8595b1ea6cff2d653f6..2d01a4b0ed1c8431510ad5e6e2175dbe89371618 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2857,11 +2857,21 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
 	 we set range information to make niters analyzer's life easier.
 	 Note the number of latch iteration value can be TYPE_MAX_VALUE so
 	 we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf.  */
-  if (stmts != NULL
-	  && const_vf > 0
-	  && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+  if (stmts != NULL && const_vf > 0)
 	{
-	  if (niters_no_overflow)
+	  if (niters_no_overflow
+	  && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+	{
+	  int_range<1> vr (type,
+			   wi::one (TYPE_PRECISION (type)),
+			   wi::div_ceil (wi::max_value
+			(TYPE_PRECISION (type),
+			 TYPE_SIGN (type)),
+	 const_vf,
+	 TYPE_SIGN (type)));
+	  set_range_info (niters_vector, vr);
+	}
+	  else if (niters_no_overflow)
 	{
 	  int_range<1> vr (type,
 			   wi::one (TYPE_PRECISION (type)),

[PATCH] riscv: allow zero in zacas subword atomic cas

2025-07-08 Thread Andreas Schwab

gcc:
PR target/120995
* config/riscv/sync.md (zacas_atomic_cas_value_strong):
Allow op3 to be zero.

gcc/testsuite:
PR target/120995
* gcc.target/riscv/amo/zabha-zacas-atomic-cas.c: New test.
---
 gcc/config/riscv/sync.md  |  2 +-
 .../gcc.target/riscv/amo/zabha-zacas-atomic-cas.c | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index a75ea6834e4..50ec8b38f72 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -627,7 +627,7 @@
(match_operand:SHORT 1 "memory_operand" "+A"))  
;; memory
(set (match_dup 1)
(unspec_volatile:SHORT [(match_operand:SHORT 2 "register_operand" "0")  
;; expected_val
-   (match_operand:SHORT 3 "register_operand" "rJ") 
;; desired_val
+   (match_operand:SHORT 3 "reg_or_0_operand" "rJ") 
;; desired_val
(match_operand:SI 4 "const_int_operand")
;; mod_s
(match_operand:SI 5 "const_int_operand")]   
;; mod_f
 UNSPEC_COMPARE_AND_SWAP))]
diff --git a/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c 
b/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c
new file mode 100644
index 000..d3d84fd3088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/amo/zabha-zacas-atomic-cas.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* PR target/120995 ICE unrecognized subword atomic cas */
+/* { dg-options "-O" } */
+/* { dg-add-options riscv_zacas } */
+/* { dg-add-options riscv_zabha } */
+
+_Bool b;
+void atomic_bool_cmpxchg()
+{
+  __sync_bool_compare_and_swap(&b, 1, 0);
+}
-- 
2.50.0


-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Re: [PATCH v4 1/2] tree-simplify: unify simple_comparison ops in vec_cond for bit and/or/xor [PR119196]

2025-07-08 Thread Icen Zeyada



From: Richard Biener 
Sent: 08 July 2025 10:01
To: Icen Zeyada 
Cc: gcc-patches@gcc.gnu.org ; jeffreya...@gmail.com 
; i...@airs.com ; Richard Earnshaw 
; pins...@gmail.com ; Victor Do 
Nascimento ; Tamar Christina 

Subject: Re: [PATCH v4 1/2] tree-simplify: unify simple_comparison ops in 
vec_cond for bit and/or/xor [PR119196]

On Thu, 3 Jul 2025, Icen Zeyada wrote:

> Merge simple_comparison patterns under a single vec_cond_expr for bit_and,
> bit_ior, and bit_xor in the simplify pass.
>
> Ensure that when both operands of a bit_and, bit_or, or bit_xor are 
> simple_comparison
> results, they reside within the same vec_cond_expr rather than separate ones.
> This prepares the AST so that subsequent transformations (e.g., folding the
> comparisons if possible) can take effect.
>
> PR tree-optimization/119196
>
> gcc/ChangeLog:
>
>* match.pd: Merge multiple vec_cond_expr in a single one for
>  bit_and, bit_ior and bit_xor.
>
> Signed-off-by: Icen Zeyada 
> ---
>  gcc/match.pd | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index f4416d9172c..36317b9128f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5939,6 +5939,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   && !expand_vec_cond_expr_p (TREE_TYPE (@1), TREE_TYPE (@0)
> (vec_cond @0 (op! @1 @3) (op! @2 @4
>
> +/* (@0 ? @2 : @3) lop (@1 ? @2 : @3)  -->  (@0 lop @1) ? @2 : @3.  */
> +(for lop (bit_and bit_ior bit_xor)
> +   (simplify
> +   (lop
> +  (vec_cond @0 integer_minus_onep@2 integer_zerop@3)

> Why are you restricting this to integer_minus_onep/zerop?  Is
> the assumption that such vec_cond is "cheap", thus we also
> do not need to add :s to them?

>From my understanding the reason we specify those specific ones is that we 
>expect a vec_cond and therefore it can only be true or false which in this 
>case would be integer_minus_onep or integer_zerop. I believe those were the 
>values in the original tree when specifying whether an expression with a 
>vec_cond was true or not so when matching i just had those expressions in mind.

> +  (vec_cond @1 @2 @3))
> +   (vec_cond (lop @0 @1) @2 @3)))

> So this turns a logical operation on the data type to a logical
> operation on the predicate type.  You need to check this
> operation is supported by the target with

>   target_supports_op_p (TREE_TYPE (@0), lop, optab_vector)

> I think the transform is sensible for arbitrary @2/@3 though
> in that case with :s on the vec_conds.

> Richard.

I am happy to add the target check but I am curious how generalising the @2 @3 
would help with the current bug on bugzilla we were trying to solve? Or do you 
think this just might be better in the larger context?

> +
>  /* (c ? a : b) op d  -->  c ? (a op d) : (b op d) */
>   (simplify
>(op (vec_cond:s @0 @1 @2) @3)
>

--
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH v4 1/2] tree-simplify: unify simple_comparison ops in vec_cond for bit and/or/xor [PR119196]

2025-07-08 Thread Richard Biener

On Tue, 8 Jul 2025, Icen Zeyada wrote:

> 
> 
> From: Richard Biener 
> Sent: 08 July 2025 10:01
> To: Icen Zeyada 
> Cc: gcc-patches@gcc.gnu.org ; jeffreya...@gmail.com 
> ; i...@airs.com ; Richard Earnshaw 
> ; pins...@gmail.com ; Victor Do 
> Nascimento ; Tamar Christina 
> 
> Subject: Re: [PATCH v4 1/2] tree-simplify: unify simple_comparison ops in 
> vec_cond for bit and/or/xor [PR119196]
> 
> On Thu, 3 Jul 2025, Icen Zeyada wrote:
> 
> > Merge simple_comparison patterns under a single vec_cond_expr for bit_and,
> > bit_ior, and bit_xor in the simplify pass.
> >
> > Ensure that when both operands of a bit_and, bit_or, or bit_xor are 
> > simple_comparison
> > results, they reside within the same vec_cond_expr rather than separate 
> > ones.
> > This prepares the AST so that subsequent transformations (e.g., folding the
> > comparisons if possible) can take effect.
> >
> > PR tree-optimization/119196
> >
> > gcc/ChangeLog:
> >
> >* match.pd: Merge multiple vec_cond_expr in a single one for
> >  bit_and, bit_ior and bit_xor.
> >
> > Signed-off-by: Icen Zeyada 
> > ---
> >  gcc/match.pd | 8 
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index f4416d9172c..36317b9128f 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -5939,6 +5939,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   && !expand_vec_cond_expr_p (TREE_TYPE (@1), TREE_TYPE (@0)
> > (vec_cond @0 (op! @1 @3) (op! @2 @4
> >
> > +/* (@0 ? @2 : @3) lop (@1 ? @2 : @3)  -->  (@0 lop @1) ? @2 : @3.  */
> > +(for lop (bit_and bit_ior bit_xor)
> > +   (simplify
> > +   (lop
> > +  (vec_cond @0 integer_minus_onep@2 integer_zerop@3)
> 
> > Why are you restricting this to integer_minus_onep/zerop?  Is
> > the assumption that such vec_cond is "cheap", thus we also
> > do not need to add :s to them?
> 
> From my understanding the reason we specify those specific ones is that 
> we expect a vec_cond and therefore it can only be true or false which in 
> this case would be integer_minus_onep or integer_zerop. I believe those 
> were the values in the original tree when specifying whether an 
> expression with a vec_cond was true or not so when matching i just had 
> those expressions in mind.

Ah, of course it's invalid to turn (a ? 5 : 9) | (b ? 5 : 9)
into (a | b) ? 5 : 9, it's just trivially valid for the minus-one/zero
values.  Sorry for the confusion.

> > +  (vec_cond @1 @2 @3))
> > +   (vec_cond (lop @0 @1) @2 @3)))
> 
> > So this turns a logical operation on the data type to a logical
> > operation on the predicate type.  You need to check this
> > operation is supported by the target with
> 
> >   target_supports_op_p (TREE_TYPE (@0), lop, optab_vector)
> 
> > I think the transform is sensible for arbitrary @2/@3 though
> > in that case with :s on the vec_conds.
> 
> > Richard.
> 
> I am happy to add the target check but I am curious how generalising the 
> @2 @3 would help with the current bug on bugzilla we were trying to 
> solve? Or do you think this just might be better in the larger context?

So adding the target check is all that's needed here.

Thanks,
Richard.

> > +
> >  /* (c ? a : b) op d  -->  c ? (a op d) : (b op d) */
> >   (simplify
> >(op (vec_cond:s @0 @1 @2) @3)
> >
> 
> --
> Richard Biener 
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH v4 6/6] libstdc++: Set FMT for complete C++23 mdspan [PR107761].

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 09:27, Luc Grosheintz  wrote:
>
> PR libstdc++/107761
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/version.def (mdspan): Set to 202207 and remove
> no_stdname.
> * include/bits/version.h: Regenerate.
> * testsuite/23_containers/mdspan/ftm.cc: Test presence
> of FTM.

Please spell this out in full as "feature test macro", there's no need
to use an initialism that some people won't recognise. (And the commit
summary line says "FMT" not "FTM" anyway ;-)

I would also prefer the test to be called version.cc not ftm.cc as
that's what we use elsewhere.

I think we can do that when pushing the commit though, we don't need
another patch for it.

Thanks for getting  done! Great work.

>
> Signed-off-by: Luc Grosheintz 
> ---
>  libstdc++-v3/include/bits/version.def  | 3 +--
>  libstdc++-v3/include/bits/version.h| 3 ++-
>  libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc | 9 +
>  3 files changed, 12 insertions(+), 3 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
>
> diff --git a/libstdc++-v3/include/bits/version.def 
> b/libstdc++-v3/include/bits/version.def
> index 64f8190d240..f1015abdbfa 100644
> --- a/libstdc++-v3/include/bits/version.def
> +++ b/libstdc++-v3/include/bits/version.def
> @@ -1007,9 +1007,8 @@ ftms = {
>
>  ftms = {
>name = mdspan;
> -  no_stdname = true; // FIXME: remove
>values = {
> -v = 1; // FIXME: 202207
> +v = 202207;
>  cxxmin = 23;
>};
>  };
> diff --git a/libstdc++-v3/include/bits/version.h 
> b/libstdc++-v3/include/bits/version.h
> index 744246a9938..80f6586372d 100644
> --- a/libstdc++-v3/include/bits/version.h
> +++ b/libstdc++-v3/include/bits/version.h
> @@ -1126,8 +1126,9 @@
>
>  #if !defined(__cpp_lib_mdspan)
>  # if (__cplusplus >= 202100L)
> -#  define __glibcxx_mdspan 1L
> +#  define __glibcxx_mdspan 202207L
>  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
> +#   define __cpp_lib_mdspan 202207L
>  #  endif
>  # endif
>  #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc 
> b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> new file mode 100644
> index 000..106ee4010ee
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> @@ -0,0 +1,9 @@
> +// { dg-do compile { target c++23 } }
> +#include 
> +
> +#ifndef __cpp_lib_mdspan
> +#error "Feature test macro __cpp_lib_mdspan is missing for "
> +#if __cpp_lib_mdspan < 202207
> +#error "Feature test macro __cpp_lib_mdspan has the wrong value"
> +#endif
> +#endif
> --
> 2.49.0
>

[committed] s390: Split tests for 31bit support

2025-07-08 Thread Juergen Christ

The new vector pattern tests used int128 without guard.  This causes
failure on 31bit targets.  Split the tests such that the tests
requiring 128 bit support are only executed on targets supporting
them.

Committed as obvious.

Signed-off-by: Juergen Christ 

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/pattern-avg-1.c: Split test.
* gcc.target/s390/vector/pattern-mulh-1.c: Split test.
* gcc.target/s390/vector/pattern-avg-2.c: New test.
* gcc.target/s390/vector/pattern-mulh-2.c: New test.
---
 .../gcc.target/s390/vector/pattern-avg-1.c|  3 +--
 .../gcc.target/s390/vector/pattern-avg-2.c| 23 
 .../gcc.target/s390/vector/pattern-mulh-1.c   |  3 +--
 .../gcc.target/s390/vector/pattern-mulh-2.c   | 26 +++
 4 files changed, 51 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c

diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
index a15301aabe54..30c6ed476846 100644
--- a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
@@ -21,6 +21,5 @@
 TEST(char,short,16)
 TEST(short,int,8)
 TEST(int,long,4)
-TEST(long,__int128,2)
 
-/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 6 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c
new file mode 100644
index ..1cc614eb1dea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N)   \
+  void  \
+  avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a,  \
+   signed T1 *__restrict b) \
+  { \
+for (int i = 0; i < N; ++i) \
+  res[i] = ((signed T2)a[i] + b[i] + 1) >> 1;   \
+  } \
+\
+  void  \
+  uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \
+unsigned T1 *__restrict b)  \
+  { \
+for (int i = 0; i < N; ++i) \
+  res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \
+  }
+
+TEST(long,__int128,2)
+
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 2 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
index cd8e4e7d7a09..f71ef06c8252 100644
--- a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
@@ -24,6 +24,5 @@
 TEST(char,short,16,8)
 TEST(short,int,8,16)
 TEST(int,long,4,32)
-TEST(long,__int128,2,64)
 
-/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.MULH" 6 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c
new file mode 100644
index ..6ac6855b1bdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N,S) \
+  void  \
+  mulh##T1 (signed T1 *__restrict res,  \
+signed T1 *__restrict l,\
+signed T1 *__restrict r)\
+  { \
+for (int i = 0; i < N; ++i) \
+  res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S);  \
+  } \
+\
+  void  \
+  umulh##T1 (unsigned T1 *__restrict res,   \
+ unsigned

Re: [PATCH v4 5/6] libstdc++: Implement mdspan and tests [PR107761].

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 09:26, Luc Grosheintz  wrote:
>
> Implements the class mdspan as described in N4950, i.e. without P3029.
> It also adds tests for mdspan. This commit completes the implementation
> of P0009, i.e. the C++23 part .
>
> PR libstdc++/107761
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (mdspan): New class.
> * src/c++23/std.cc.in (mdspan): Add.
> * testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
> * testsuite/23_containers/mdspan/mdspan.cc: New test.
> * testsuite/23_containers/mdspan/layout_like.h: Add class
> LayoutLike which models a user-defined layout.

OK, thanks!

>
> Signed-off-by: Luc Grosheintz 
> ---
>  libstdc++-v3/include/std/mdspan   | 285 
>  libstdc++-v3/src/c++23/std.cc.in  |   3 +-
>  .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
>  .../23_containers/mdspan/layout_like.h|  83 +++
>  .../testsuite/23_containers/mdspan/mdspan.cc  | 643 ++
>  .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
>  6 files changed, 1078 insertions(+), 1 deletion(-)
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc
>
> diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
> index 7e970c2b905..0fd78570b3e 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -1057,6 +1057,291 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>{ return __p + __i; }
>  };
>
> +  namespace __mdspan
> +  {
> +template
> +  constexpr bool
> +  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm> 
> __indices)
> +  {
> +   static_assert(__exts.rank() == _Nm);
> +   for (size_t __i = 0; __i < __exts.rank(); ++__i)
> + if (__indices[__i] >= __exts.extent(__i))
> +   return false;
> +   return true;
> +  }
> +  }
> +
> +  template +  typename _LayoutPolicy = layout_right,
> +  typename _AccessorPolicy = default_accessor<_ElementType>>
> +class mdspan
> +{
> +  static_assert(!is_array_v<_ElementType>,
> +   "ElementType must not be an array type");
> +  static_assert(!is_abstract_v<_ElementType>,
> +   "ElementType must not be an abstract class type");
> +  static_assert(__mdspan::__is_extents<_Extents>,
> +   "Extents must be a specialization of std::extents");
> +  static_assert(is_same_v<_ElementType,
> + typename _AccessorPolicy::element_type>);
> +
> +public:
> +  using extents_type = _Extents;
> +  using layout_type = _LayoutPolicy;
> +  using accessor_type = _AccessorPolicy;
> +  using mapping_type = typename layout_type::template 
> mapping;
> +  using element_type = _ElementType;
> +  using value_type = remove_cv_t;
> +  using index_type = typename extents_type::index_type;
> +  using size_type = typename extents_type::size_type;
> +  using rank_type = typename extents_type::rank_type;
> +  using data_handle_type = typename accessor_type::data_handle_type;
> +  using reference = typename accessor_type::reference;
> +
> +  static constexpr rank_type
> +  rank() noexcept { return extents_type::rank(); }
> +
> +  static constexpr rank_type
> +  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
> +
> +  static constexpr size_t
> +  static_extent(rank_type __r) noexcept
> +  { return extents_type::static_extent(__r); }
> +
> +  constexpr index_type
> +  extent(rank_type __r) const noexcept { return extents().extent(__r); }
> +
> +  constexpr
> +  mdspan()
> +  requires (rank_dynamic() > 0)
> +  && is_default_constructible_v
> + && is_default_constructible_v
> + && is_default_constructible_v
> +  : _M_accessor(), _M_mapping(), _M_handle()
> +  { }
> +
> +  constexpr
> +  mdspan(const mdspan& __other) = default;
> +
> +  constexpr
> +  mdspan(mdspan&& __other) = default;
> +
> +  template<__mdspan::__valid_index_type... _OIndexTypes>
> +   requires (sizeof...(_OIndexTypes) == rank()
> +  || sizeof...(_OIndexTypes) == rank_dynamic())
> +&& is_constructible_v
> +&& is_default_constructible_v
> +   constexpr explicit
> +   mdspan(data_handle_type __handle, _OIndexTypes... __exts)
> +   : _M_accessor(),
> + _M_mapping(_Extents(static_cast(std::move(__exts))...)),
> + _M_handle(std::move(__handle))
> +   { }
> +
> +  template<__mdspan::__valid_index_type _OIndexType,
> +  size_t _Nm>
> +   requires (_Nm == rank() || _Nm == rank_dynamic())
> +

Re: [PATCH v4 0/6] Implement mdspan.

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 09:26, Luc Grosheintz  wrote:
>
> Patches 1/5 ... 4/5 aren't sent because corresponding commits from v3
> should be used. The changes for 5/5 are:
>
>   * Fix ADL issue in swap and create tests.
>   * Replace pragma once with include guard in layout_like.h.
>   * Use VERIFY(md.empty()) when checking value initialization.
>
> Since the code for swap is identical for all three members of mdspan,
> this commit intentionally checks that the accessor is ADL swappable, but
> not the other two memebers. Doing so allows us to have a single, short
> class purely dedicated to ADL. Otherwise we'd need to put OpaqueLayout
> and LayoutLike into a namespace and add ADL related logic. I tried it
> and wasn't too happy with the result. This felt shorter, more directly
> to the point while still checking that the implemented swap function
> allows ADL for swapping all members variables. I tested effectiveness
> of the test by reverting the fix and making sure the test fails.

Yeah that seems fine. I don't think we'll accidentally re-add the
std:: on the swaps for the other members without realising that's
wrong! So just testing that one of them is swapped via ADL is fine.

>
> This version of the patch series also adds the commit to update the
> FTM. The changes are:
>
>   * Update the commit message to mention the related ticket.
>   * Rename the file.
>   * Implement a more verbose version of the test.
>
> Luc Grosheintz (6):
>   libstdc++: Check prerequisites of layout_*::operator().
>   libstdc++: Check prerequisite of extents::extents.
>   libstdc++: Restructure mdspan tests to reuse IntLike.
>   libstdc++: Implement __mdspan::__size.
>   libstdc++: Implement mdspan and tests [PR107761].
>   libstdc++: Set FMT for complete C++23 mdspan [PR107761].
>
>  libstdc++-v3/include/bits/version.def |   3 +-
>  libstdc++-v3/include/bits/version.h   |   3 +-
>  libstdc++-v3/include/std/mdspan   | 315 -
>  libstdc++-v3/src/c++23/std.cc.in  |   3 +-
>  .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
>  .../mdspan/extents/class_mandates_neg.cc  |   2 +
>  .../mdspan/extents/custom_integer.cc  |  27 +-
>  .../mdspan/extents/extents_mismatch_neg.cc|  35 +
>  .../23_containers/mdspan/extents/int_like.h   |  28 +
>  .../testsuite/23_containers/mdspan/ftm.cc |   9 +
>  .../23_containers/mdspan/layout_like.h|  83 +++
>  .../mdspan/layouts/debug/out_of_bounds_neg.cc |  30 +
>  .../testsuite/23_containers/mdspan/mdspan.cc  | 643 ++
>  .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
>  14 files changed, 1212 insertions(+), 34 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc
>
> --
> 2.49.0
>

Re: [PATCH v2] libstdc++: Search for tzdata on Windows (msys)

2025-07-08 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 23:53, Björn Schäpers  wrote:
>
> From: Björn Schäpers 
>
> Windows does not provide a tzdata.zi, but msys does. Use this, if
> available, instead of the embedded (and possibly outdated) database.
>
> libstdc++-v3/Changelog:
>
> Use msys provided time zone information.
>
> * src/c++20/tzdb.cc (zoneinfo_file): On Windows look relative
> from the DLL path for the time zone information.

Thanks! This looks good now, I'll do some basic testing on mingw-w64
and Wine, and get it pushed to trunk. We should also backport to the
release branches too.



>
> Signed-off-by: Björn Schäpers 
> ---
>  libstdc++-v3/src/c++20/tzdb.cc | 34 ++
>  1 file changed, 34 insertions(+)
>
> diff --git a/libstdc++-v3/src/c++20/tzdb.cc b/libstdc++-v3/src/c++20/tzdb.cc
> index 6e244dc656d..9923d14b7a7 100644
> --- a/libstdc++-v3/src/c++20/tzdb.cc
> +++ b/libstdc++-v3/src/c++20/tzdb.cc
> @@ -44,6 +44,12 @@
>  # include// getenv
>  #endif
>
> +#if _GLIBCXX_HAVE_WINDOWS_H
> +# define WIN32_LEAN_AND_MEAN
> +# include 
> +# include 
> +#endif
> +
>  #if defined __GTHREADS && ATOMIC_POINTER_LOCK_FREE == 2
>  # define USE_ATOMIC_LIST_HEAD 1
>  // TODO benchmark atomic> vs mutex.
> @@ -1144,6 +1150,34 @@ namespace std::chrono
>  #ifdef _GLIBCXX_ZONEINFO_DIR
>else
> path = _GLIBCXX_ZONEINFO_DIR;
> +#endif
> +#ifdef _GLIBCXX_HAVE_WINDOWS_H
> +  if (path.empty())
> +   {
> + HMODULE dll_module;
> + if (GetModuleHandleExA(
> + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
> + | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
> + reinterpret_cast(&zoneinfo_file), 
> &dll_module))
> +   {
> + char dll_path[MAX_PATH];
> + if (GetModuleFileNameA(dll_module, dll_path, MAX_PATH) != 0)
> +   {
> + string_view dll_path_view = dll_path;
> + auto pos = dll_path_view.find_last_of('\\');
> + dll_path_view = dll_path_view.substr(0, pos);
> + if (dll_path_view.ends_with("\\bin"))
> +   {
> + constexpr string_view remaining_path = 
> "share\\zoneinfo";
> + dll_path_view.remove_suffix(3); // Remove bin
> + path.reserve(dll_path_view.size()
> +  + remaining_path.size());
> + path = dll_path_view;
> + path += remaining_path;
> +   }
> +   }
> +   }
> +   }
>  #endif
>if (!path.empty())
> path.append(filename);
> --
> 2.50.0
>

[PATCH v2 2/2] libstdc++: Better CTAD for span and mdspan [PR120914].

2025-07-08 Thread Luc Grosheintz

This implements P3029R1. In P3029R1, the CTAD for span is refined to
permit deducing the extent of the span from an integral constant, e.g.

  span((T*) ptr, integral_constant{});

is deduced as span. Similarly, in

  auto exts = extents(integral_constant);
  auto md = mdspan((T*) ptr, integral_constant);

exts and md have types extents and mdspan>, respectively.

PR libstdc++/120914

libstdc++-v3/ChangeLog:

* include/std/span (span): Update CTAD to enable
integral constants [P3029R1].
* include/std/mdspan (extents): ditto.
(mdspan): ditto.
* testsuite/23_containers/span/deduction.cc: Test deduction
guide.
* testsuite/23_containers/mdspan/extents/misc.cc: ditto.
* testsuite/23_containers/mdspan/mdspan.cc: ditto.

Signed-off-by: Luc Grosheintz 
---
 libstdc++-v3/include/std/mdspan   |  8 ++
 libstdc++-v3/include/std/span | 20 ++-
 .../23_containers/mdspan/extents/misc.cc  | 20 +++
 .../testsuite/23_containers/mdspan/mdspan.cc  | 25 +++
 .../testsuite/23_containers/span/deduction.cc |  3 +++
 5 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 0fd78570b3e..152dcb3e92a 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -406,10 +406,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 template
   auto __build_dextents_type(integer_sequence)
-> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
-
-template
-  consteval size_t
-  __dynamic_extent() { return dynamic_extent; }
   }
 
   template
@@ -419,7 +415,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 requires (is_convertible_v<_Integrals, size_t> && ...)
 explicit extents(_Integrals...) ->
-  extents()...>;
+  extents...>;
 
   struct layout_left
   {
@@ -1316,7 +1312,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  && (sizeof...(_Integrals) > 0)
 explicit mdspan(_ElementType*, _Integrals...)
 -> mdspan<_ElementType,
- extents()...>>;
+ extents...>>;
 
   template
 mdspan(_ElementType*, span<_OIndexType, _Nm>)
diff --git a/libstdc++-v3/include/std/span b/libstdc++-v3/include/std/span
index 49ab9109d83..5629a71b9bd 100644
--- a/libstdc++-v3/include/std/span
+++ b/libstdc++-v3/include/std/span
@@ -476,6 +476,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 };
 
   // deduction guides
+  namespace __detail
+  {
+template
+  concept __integral_constant_like = is_integral_v
+   && !is_same_v>
+   && convertible_to<_Tp, decltype(_Tp::value)>
+   && equality_comparable_with<_Tp, decltype(_Tp::value)>
+   && bool_constant<_Tp() == _Tp::value>::value
+   && bool_constant(_Tp()) == _Tp::value>
+::value;
+
+template
+  constexpr size_t __maybe_static_ext = dynamic_extent;
+
+template<__integral_constant_like _Tp>
+  constexpr size_t __maybe_static_ext<_Tp> = {_Tp::value};
+  }
 
   template
 span(_Type(&)[_ArrayExtent]) -> span<_Type, _ArrayExtent>;
@@ -489,7 +506,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
 span(_Iter, _End)
-  -> span>>;
+  -> span>,
+__detail::__maybe_static_ext<_End>>;
 
   template
 span(_Range &&)
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
index e71fdc54230..bca8901685d 100644
--- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
@@ -97,6 +97,25 @@ test_deduction(Extents... exts)
   VERIFY(e == expected);
 }
 
+constexpr bool
+test_integral_constant_deduction()
+{
+  auto verify = [](auto actual, auto expected)
+{
+  static_assert(std::same_as);
+  VERIFY(actual == expected);
+};
+
+  constexpr auto c1 = std::integral_constant{};
+  constexpr auto c2 = std::integral_constant{};
+
+  verify(std::extents(1), std::extents{1});
+  verify(std::extents(c1), std::extents{});
+  verify(std::extents(c2), std::extents{});
+  verify(std::extents(c1, 2), std::extents{2});
+  return true;
+}
+
 constexpr bool
 test_deduction_all()
 {
@@ -104,6 +123,7 @@ test_deduction_all()
   test_deduction<1>(1);
   test_deduction<2>(1.0, 2.0f);
   test_deduction<3>(int(1), short(2), size_t(3));
+  test_integral_constant_deduction();
   return true;
 }
 
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
index 9252273bf66..a650fb19bdf 100644
--- a/libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
@@ -245,6 +245,28 @@ test_from_pointer_and_shape()
   return true;
 }
 
+constexpr bool
+test_from_pointer_and_integral_constant()
+{
+  std::array buffer{};
+  double * ptr = buffer.data();
+
+  auto verify = [pt

[PATCH v2 1/2] libstdc++: Silence a warning in a test for span.

2025-07-08 Thread Luc Grosheintz

In a test of span, there's an unused variable myspan. This
commit silences the warning.

libstdc++-v3/ChangeLog:

* testsuite/23_containers/span/contiguous_range_neg.cc: Silence
warning about unused variable myspan.

Signed-off-by: Luc Grosheintz 
---
 .../testsuite/23_containers/span/contiguous_range_neg.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc 
b/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc
index c9e9112ed6d..890fdf8aea0 100644
--- a/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/span/contiguous_range_neg.cc
@@ -25,6 +25,7 @@ main()
 {
   std::deque d{};
   std::span myspan(d); // { dg-error "no match" }
+  (void) myspan;
 }
 
 // { dg-prune-output "data" }
-- 
2.49.0

Re: [SNAPv4] libstdc++: Add NTTP bind_front, -back, not_fn (P2714) [PR119744]

2025-07-08 Thread Tomasz Kaminski

To test forwarding, I would suggest using a by-value member:
  struct F
  {
quals operator()(int&) const { return { false, true }; }
quals operator()(int const&) const { return { true, true }; }
quals operator()(int&&) const { return { false, false }; }
quals operator()(int const&&) const { return { true, false }; }
  };
// Note that all functions are all const qualified, alternatively they
could be made static.

Then for g equal either to bind_front(F{}, 10) or bind_front(10),
g() should cal int&
as_const(g) should call int const&


int i;
And for bind_front(F{}, std::ref(i)) and bind_front(std::ref(i)),
regardless if called on const/mutable, lvalue or rvalue you should always
call int& overload.
And if cref(i) is used, the call int const& overload.


On Tue, Jul 8, 2025 at 9:45 AM Tomasz Kaminski  wrote:

>
>
> On Tue, Jul 8, 2025 at 5:41 AM Nathan Myers  wrote:
>
>> This is a snapshot of work in progress, for reference.
>> bind_front(...) is uglified directly from the sample
>> implementation in P2714, at include/std/functional:1284 .
>>
>> Test failures:
>>
>> bind_front/1.cc:53: error: static assertion failed
>> bind_front/1.cc:57: error: static assertion failed
>> bind_front/1.cc:214: error: static assertion failed
>> bind_front/1.cc:215: error: static assertion failed
>> bind_front/1.cc:216: required from here
>> functional:1301: error: invalid conversion from
>> 'std::invoke_result_t&,
>> void*&>' {aka 'void*'} to 'int' [-fpermissive]
>> [... etc. ]
>> Also complains about 218, 220, 231, 233-6, 264, 267
>>
> The issue is raised on the line:
>  int& i6 = g6(vp);
>  VERIFY( &i6 == &i );
> Where G6 is defined as follows:
>  auto g6 = bind_front(std::ref(i)); // bound arg of type int&
>   using G6 = decltype(g6);
> And f:
>   struct F
>   {
> int& operator()(int& i, void*) { return i; }
> void* operator()(int, void* p) const { return p; }
>   };
>  constexpr static F f{};
>
> As the template parameter object, i.e. what id-expression f refers to  in
> bind_front is always constant,
> g6(vp) i.e. bind_front(ref(i), vp) calls f(ref(i), vp), and because f
> is const qualified, the only viable candiate is:
> void* operator()(int, void* p) const { return p; }
> So you get void* returned, that int& obviously cannot bind to.
>
>
>>
>> libstdc++-v3/ChangeLog:
>> PR libstdc++/119744
>> * include/bits/version.def: Redefine __cpp_lib_bind_front etc.
>> * include/bits/version.h: Ditto.
>> * include/std/functional: Add new bind_front etc. overloads
>> * testsuite/20_util/function_objects/bind_front/1.cc
>> ---
>>  libstdc++-v3/include/bits/version.def |  12 ++
>>  libstdc++-v3/include/bits/version.h   |  21 ++-
>>  libstdc++-v3/include/std/functional   | 124 +-
>>  .../20_util/function_objects/bind_front/1.cc  | 103 ++-
>>  4 files changed, 278 insertions(+), 5 deletions(-)
>>
>> diff --git a/libstdc++-v3/include/bits/version.def
>> b/libstdc++-v3/include/bits/version.def
>> index 5d5758bf203..8ab9a7207e7 100644
>> --- a/libstdc++-v3/include/bits/version.def
>> +++ b/libstdc++-v3/include/bits/version.def
>> @@ -463,6 +463,10 @@ ftms = {
>>
>>  ftms = {
>>name = not_fn;
>> +  values = {
>> +v = 202306;
>> +cxxmin = 26;
>> +  };
>>values = {
>>  v = 201603;
>>  cxxmin = 17;
>> @@ -776,6 +780,10 @@ ftms = {
>>
>>  ftms = {
>>name = bind_front;
>> +  values = {
>> +v = 202306;
>> +cxxmin = 26;
>> +  };
>>values = {
>>  v = 201907;
>>  cxxmin = 20;
>> @@ -784,6 +792,10 @@ ftms = {
>>
>>  ftms = {
>>name = bind_back;
>> +  values = {
>> +v = 202306;
>> +cxxmin = 26;
>> +  };
>>values = {
>>  v = 202202;
>>  cxxmin = 23;
>> diff --git a/libstdc++-v3/include/bits/version.h
>> b/libstdc++-v3/include/bits/version.h
>> index 2b00e8419b3..c204ae3c48c 100644
>> --- a/libstdc++-v3/include/bits/version.h
>> +++ b/libstdc++-v3/include/bits/version.h
>> @@ -511,7 +511,12 @@
>>  #undef __glibcxx_want_make_from_tuple
>>
>>  #if !defined(__cpp_lib_not_fn)
>> -# if (__cplusplus >= 201703L)
>> +# if (__cplusplus >  202302L)
>> +#  define __glibcxx_not_fn 202306L
>> +#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_not_fn)
>> +#   define __cpp_lib_not_fn 202306L
>> +#  endif
>> +# elif (__cplusplus >= 201703L)
>>  #  define __glibcxx_not_fn 201603L
>>  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_not_fn)
>>  #   define __cpp_lib_not_fn 201603L
>> @@ -866,7 +871,12 @@
>>  #undef __glibcxx_want_atomic_value_initialization
>>
>>  #if !defined(__cpp_lib_bind_front)
>> -# if (__cplusplus >= 202002L)
>> +# if (__cplusplus >  202302L)
>> +#  define __glibcxx_bind_front 202306L
>> +#  if defined(__glibcxx_want_all) || defined(__glibcxx_want_bind_front)
>> +#   define __cpp_lib_bind_front 202306L
>> +#  endif
>> +# elif (__cplusplus >= 202002L)
>>  #  define __glibcxx_bind_front 201907L
>>  #  if defined(__glibcxx_want_all) || de

Re: [PATCH v2 2/2] libstdc++: Better CTAD for span and mdspan [PR120914].

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 10:54, Luc Grosheintz  wrote:
>
> This implements P3029R1. In P3029R1, the CTAD for span is refined to
> permit deducing the extent of the span from an integral constant, e.g.
>
>   span((T*) ptr, integral_constant{});
>
> is deduced as span. Similarly, in
>
>   auto exts = extents(integral_constant);
>   auto md = mdspan((T*) ptr, integral_constant);
>
> exts and md have types extents and mdspan extents>, respectively.
>
> PR libstdc++/120914
>
> libstdc++-v3/ChangeLog:
>
> * include/std/span (span): Update CTAD to enable
> integral constants [P3029R1].
> * include/std/mdspan (extents): ditto.
> (mdspan): ditto.
> * testsuite/23_containers/span/deduction.cc: Test deduction
> guide.
> * testsuite/23_containers/mdspan/extents/misc.cc: ditto.
> * testsuite/23_containers/mdspan/mdspan.cc: ditto.
>
> Signed-off-by: Luc Grosheintz 
> ---
>  libstdc++-v3/include/std/mdspan   |  8 ++
>  libstdc++-v3/include/std/span | 20 ++-
>  .../23_containers/mdspan/extents/misc.cc  | 20 +++
>  .../testsuite/23_containers/mdspan/mdspan.cc  | 25 +++
>  .../testsuite/23_containers/span/deduction.cc |  3 +++
>  5 files changed, 69 insertions(+), 7 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
> index 0fd78570b3e..152dcb3e92a 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -406,10 +406,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  template
>auto __build_dextents_type(integer_sequence)
> -> extents<_IndexType, ((void) _Counts, dynamic_extent)...>;
> -
> -template
> -  consteval size_t
> -  __dynamic_extent() { return dynamic_extent; }
>}
>
>template
> @@ -419,7 +415,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>template
>  requires (is_convertible_v<_Integrals, size_t> && ...)
>  explicit extents(_Integrals...) ->
> -  extents()...>;
> +  extents...>;
>
>struct layout_left
>{
> @@ -1316,7 +1312,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   && (sizeof...(_Integrals) > 0)
>  explicit mdspan(_ElementType*, _Integrals...)
>  -> mdspan<_ElementType,
> - extents()...>>;
> + extents...>>;
>
>template
>  mdspan(_ElementType*, span<_OIndexType, _Nm>)
> diff --git a/libstdc++-v3/include/std/span b/libstdc++-v3/include/std/span
> index 49ab9109d83..5629a71b9bd 100644
> --- a/libstdc++-v3/include/std/span
> +++ b/libstdc++-v3/include/std/span
> @@ -476,6 +476,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  };
>
>// deduction guides
> +  namespace __detail
> +  {
> +template
> +  concept __integral_constant_like = is_integral_v
> +   && !is_same_v>
> +   && convertible_to<_Tp, decltype(_Tp::value)>
> +   && equality_comparable_with<_Tp, decltype(_Tp::value)>
> +   && bool_constant<_Tp() == _Tp::value>::value
> +   && bool_constant(_Tp()) == 
> _Tp::value>
> +::value;
> +
> +template
> +  constexpr size_t __maybe_static_ext = dynamic_extent;
> +
> +template<__integral_constant_like _Tp>
> +  constexpr size_t __maybe_static_ext<_Tp> = {_Tp::value};

Are the braces here to detect narrowing conversions?

(The paper doesn't mention why they're used, as far as I can see)


> +  }
>
>template
>  span(_Type(&)[_ArrayExtent]) -> span<_Type, _ArrayExtent>;
> @@ -489,7 +506,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>template
>  span(_Iter, _End)
> -  -> span>>;
> +  -> span>,
> +__detail::__maybe_static_ext<_End>>;
>
>template
>  span(_Range &&)
> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc 
> b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
> index e71fdc54230..bca8901685d 100644
> --- a/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/extents/misc.cc
> @@ -97,6 +97,25 @@ test_deduction(Extents... exts)
>VERIFY(e == expected);
>  }
>
> +constexpr bool
> +test_integral_constant_deduction()
> +{
> +  auto verify = [](auto actual, auto expected)
> +{
> +  static_assert(std::same_as);
> +  VERIFY(actual == expected);
> +};
> +
> +  constexpr auto c1 = std::integral_constant{};
> +  constexpr auto c2 = std::integral_constant{};
> +
> +  verify(std::extents(1), std::extents{1});
> +  verify(std::extents(c1), std::extents{});
> +  verify(std::extents(c2), std::extents{});
> +  verify(std::extents(c1, 2), std::extents{2});
> +  return true;
> +}
> +
>  constexpr bool
>  test_deduction_all()
>  {
> @@ -104,6 +123,7 @@ test_deduction_all()
>test_deduction<1>(1);
>test_deduction<2>(1.0, 2.0f);
>test_deduction<3>(int(1), short(2), size_t(3));
> +  test_integral_constant_deduction();
>return true;
>  }
>
> diff --git a/libstdc++-v3/testsuite/23_conta

Re: [PATCH v2] libstdc++: Search for tzdata on Windows (msys)

2025-07-08 Thread Jonathan Wakely

On Mon, 7 Jul 2025 at 23:53, Björn Schäpers  wrote:
>
> From: Björn Schäpers 
>
> Windows does not provide a tzdata.zi, but msys does. Use this, if
> available, instead of the embedded (and possibly outdated) database.
>
> libstdc++-v3/Changelog:
>
> Use msys provided time zone information.
>
> * src/c++20/tzdb.cc (zoneinfo_file): On Windows look relative
> from the DLL path for the time zone information.
>
> Signed-off-by: Björn Schäpers 
> ---
>  libstdc++-v3/src/c++20/tzdb.cc | 34 ++
>  1 file changed, 34 insertions(+)
>
> diff --git a/libstdc++-v3/src/c++20/tzdb.cc b/libstdc++-v3/src/c++20/tzdb.cc
> index 6e244dc656d..9923d14b7a7 100644
> --- a/libstdc++-v3/src/c++20/tzdb.cc
> +++ b/libstdc++-v3/src/c++20/tzdb.cc
> @@ -44,6 +44,12 @@
>  # include// getenv
>  #endif
>
> +#if _GLIBCXX_HAVE_WINDOWS_H
> +# define WIN32_LEAN_AND_MEAN
> +# include 
> +# include 
> +#endif
> +
>  #if defined __GTHREADS && ATOMIC_POINTER_LOCK_FREE == 2
>  # define USE_ATOMIC_LIST_HEAD 1
>  // TODO benchmark atomic> vs mutex.
> @@ -1144,6 +1150,34 @@ namespace std::chrono
>  #ifdef _GLIBCXX_ZONEINFO_DIR
>else
> path = _GLIBCXX_ZONEINFO_DIR;
> +#endif
> +#ifdef _GLIBCXX_HAVE_WINDOWS_H
> +  if (path.empty())
> +   {
> + HMODULE dll_module;
> + if (GetModuleHandleExA(
> + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
> + | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
> + reinterpret_cast(&zoneinfo_file), 
> &dll_module))

Does this assume that libstdc++.dll is installed as part of the msys
installation?

What if users build GCC themselves and install it elsewhere?

Is the correct solution for msys-based systems to build GCC with
--with-libstdcxx-zoneinfo=/path/to/msys/tzdata.zi ?


> +   {
> + char dll_path[MAX_PATH];
> + if (GetModuleFileNameA(dll_module, dll_path, MAX_PATH) != 0)
> +   {
> + string_view dll_path_view = dll_path;
> + auto pos = dll_path_view.find_last_of('\\');
> + dll_path_view = dll_path_view.substr(0, pos);
> + if (dll_path_view.ends_with("\\bin"))
> +   {
> + constexpr string_view remaining_path = 
> "share\\zoneinfo";
> + dll_path_view.remove_suffix(3); // Remove bin
> + path.reserve(dll_path_view.size()
> +  + remaining_path.size());
> + path = dll_path_view;
> + path += remaining_path;
> +   }
> +   }
> +   }
> +   }
>  #endif
>if (!path.empty())
> path.append(filename);
> --
> 2.50.0
>

Re: [PATCH] libstdc++: Do not expose set_brackets/set_separator for formatter with format_kind other than sequence [PR119861]

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 09:23, Tomasz Kamiński  wrote:
>
> The standard defines separate specializations of range-default-formatter, out
> of which only one for range_format::sequence provide the set_brackets and
> set_separator methods. We implemented it as one specialization and exposed
> this method for range_format other than string or debug_string, i.e. when
> range_formatter was used as underlying formatter.
>
> PR libstdc++/119861
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (formatter<_Rg, _CharT>::set_separator)
> (formatter<_Rg, _CharT>::set_brackets): Constrain with
> (format_kind<_Rg> == range_format::sequence).
> * testsuite/std/format/ranges/pr119861_neg.cc: New test.
> ---
> Testing on x86_64-linux locally. Format test passed.
> OK for trunk?

OK, thanks

> I do not think this it is necessary to backport this to 15,
> as we were providing functions in more cases that needed. Let me know
> what you think.

I agree it's not very important to backport, but it also seems simple
and safe to do so.


>  libstdc++-v3/include/std/format   |  4 +-
>  .../std/format/ranges/pr119861_neg.cc | 52 +++
>  2 files changed, 54 insertions(+), 2 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc
>
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index 5749aa1995a..d584b81c78a 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -6030,13 +6030,13 @@ namespace __format
>
>constexpr void
>set_separator(basic_string_view<_CharT> __sep) noexcept
> -   requires (!_S_range_format_is_string)
> +   requires (format_kind<_Rg> == range_format::sequence)
>{ _M_under.set_separator(__sep); }
>
>constexpr void
>set_brackets(basic_string_view<_CharT> __open,
>basic_string_view<_CharT> __close) noexcept
> -   requires (!_S_range_format_is_string)
> +   requires (format_kind<_Rg> == range_format::sequence)
>{ _M_under.set_brackets(__open, __close); }
>
>// We deviate from standard, that declares this as template accepting
> diff --git a/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc 
> b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc
> new file mode 100644
> index 000..9a6ed16393e
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc
> @@ -0,0 +1,52 @@
> +// { dg-do compile { target c++23 } }
> +
> +#include 
> +#include 
> +
> +// only format_kind::sequence provides set_brackets and set_separator methods
> +
> +template
> +struct MyCont : std::vector
> +{
> +  using std::vector::vector;
> +};
> +
> +template
> +constexpr std::range_format std::format_kind> = fk;
> +
> +void test_sequence()
> +{
> +  std::formatter, char> fmtter;
> +  fmtter.set_brackets("{", "}");
> +  fmtter.set_separator(",");
> +}
> +
> +void test_map()
> +{
> +  std::formatter>, char> 
> fmtter;
> +  fmtter.set_brackets("{", "}"); // { dg-error "here" }
> +  fmtter.set_separator(","); // { dg-error "here" }
> +}
> +
> +void test_set()
> +{
> +  std::formatter, char> fmtter;
> +  fmtter.set_brackets("{", "}"); // { dg-error "here" }
> +  fmtter.set_separator(","); // { dg-error "here" }
> +}
> +
> +void test_string()
> +{
> +  std::formatter, char> fmtter;
> +  fmtter.set_brackets("{", "}"); // { dg-error "here" }
> +  fmtter.set_separator(","); // { dg-error "here" }
> +}
> +
> +void test_debug_string()
> +{
> +  std::formatter, char> fmtter;
> +  fmtter.set_brackets("{", "}"); // { dg-error "here" }
> +  fmtter.set_separator(","); // { dg-error "here" }
> +}
> +
> +// { dg-error "no matching function for call to 'std::formatter<" "" { 
> target *-*-* } 0 }
> --
> 2.49.0
>

Re: [PATCH v1 0/3] Implement aligned_accessor [P2897R7].

2025-07-08 Thread Luc Grosheintz


How would you like to track this on bugzilla:

  1. As part of submdspan:
  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110352
  https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p2630r4.html

  2. As a new issue, because it's a different paper:
  https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p2897r7.html

On 7/3/25 12:33, Luc Grosheintz wrote:

This patch series implements the aligned_accessor paper P2897R7 in three
parts:

   - Implement `is_sufficiently_aligned` which is part of .
   - Prepare the accessor tests for reuse.
   - Implement aligned_accessor.

A couple of remarks:

   - The paper P2897R7 and spec N5008 don't specify that the aligment
   for is_sufficiently_aligned must be a power of two.

   - The reasoning for why is_sufficiently_aligned isn't constexpr is
   nicely described in the paper.

   - Use of `class` in is_sufficiently_aligned is for consistency within
   that file.

   - The tests create new unsupported tests and expected failures. The
   testsuite doesn't have all that many of those; so there's likely a
   strategy to avoid this. However, I don't know how.

   - These changes are independent of mdspan, but due to the precise
   location of the code it might conflict with the mdspan patch series.

   - I skipped updating `cxxapi-data.csv` for is_sufficiently_aligned
   due to consistency with the rest of the mdspan patches, i.e. there
   will be a bulk update of the file later.

   - Each commit was tested with/without PCH and with/without
   _GLIBCXX_DEBUG filtered by 20_util/is_sufficiently_aligned
   and 23_containers/mdspan.
   The last commit was tested fully with/without PCH. All tests on
   x86_64-linux.

As always I'm happy to reorganize into different commits, if the
grouping doesn't make sense.

Luc Grosheintz (3):
   libstdc++: Implement is_sufficiently_aligned.
   libstdc++: Prepare test code for default_accessor for reuse.
   libstdc++: Implement aligned_accessor from mdspan.

  libstdc++-v3/include/bits/align.h |  16 ++
  libstdc++-v3/include/bits/version.def |  18 ++
  libstdc++-v3/include/bits/version.h   |  20 +++
  libstdc++-v3/include/std/mdspan   |  72 
  libstdc++-v3/include/std/memory   |   1 +
  libstdc++-v3/src/c++23/std.cc.in  |   4 +-
  .../20_util/is_sufficiently_aligned/1.cc  |  31 
  .../20_util/is_sufficiently_aligned/2.cc  |   7 +
  .../23_containers/mdspan/accessors/aligned.cc |  43 +
  .../mdspan/accessors/aligned_ftm.cc   |   6 +
  .../mdspan/accessors/aligned_neg.cc   |  33 
  .../accessors/debug/aligned_access_neg.cc |  23 +++
  .../accessors/debug/aligned_offset_neg.cc |  23 +++
  .../23_containers/mdspan/accessors/default.cc |  99 ---
  .../23_containers/mdspan/accessors/generic.cc | 168 ++
  15 files changed, 464 insertions(+), 100 deletions(-)
  create mode 100644 libstdc++-v3/testsuite/20_util/is_sufficiently_aligned/1.cc
  create mode 100644 libstdc++-v3/testsuite/20_util/is_sufficiently_aligned/2.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/accessors/aligned.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/accessors/aligned_ftm.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/accessors/aligned_neg.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/accessors/debug/aligned_access_neg.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/accessors/debug/aligned_offset_neg.cc
  delete mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/accessors/default.cc
  create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/accessors/generic.cc

Re: [PATCH v1 0/3] Implement aligned_accessor [P2897R7].

2025-07-08 Thread Tomasz Kaminski

I have created a separate bugzilla for it:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120994

On Tue, Jul 8, 2025 at 9:58 AM Luc Grosheintz 
wrote:

> How would you like to track this on bugzilla:
>
>1. As part of submdspan:
>https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110352
>https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p2630r4.html
>
>2. As a new issue, because it's a different paper:
>https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p2897r7.html
>
> On 7/3/25 12:33, Luc Grosheintz wrote:
> > This patch series implements the aligned_accessor paper P2897R7 in three
> > parts:
> >
> >- Implement `is_sufficiently_aligned` which is part of .
> >- Prepare the accessor tests for reuse.
> >- Implement aligned_accessor.
> >
> > A couple of remarks:
> >
> >- The paper P2897R7 and spec N5008 don't specify that the aligment
> >for is_sufficiently_aligned must be a power of two.
> >
> >- The reasoning for why is_sufficiently_aligned isn't constexpr is
> >nicely described in the paper.
> >
> >- Use of `class` in is_sufficiently_aligned is for consistency within
> >that file.
> >
> >- The tests create new unsupported tests and expected failures. The
> >testsuite doesn't have all that many of those; so there's likely a
> >strategy to avoid this. However, I don't know how.
> >
> >- These changes are independent of mdspan, but due to the precise
> >location of the code it might conflict with the mdspan patch series.
> >
> >- I skipped updating `cxxapi-data.csv` for is_sufficiently_aligned
> >due to consistency with the rest of the mdspan patches, i.e. there
> >will be a bulk update of the file later.
> >
> >- Each commit was tested with/without PCH and with/without
> >_GLIBCXX_DEBUG filtered by 20_util/is_sufficiently_aligned
> >and 23_containers/mdspan.
> >The last commit was tested fully with/without PCH. All tests on
> >x86_64-linux.
> >
> > As always I'm happy to reorganize into different commits, if the
> > grouping doesn't make sense.
> >
> > Luc Grosheintz (3):
> >libstdc++: Implement is_sufficiently_aligned.
> >libstdc++: Prepare test code for default_accessor for reuse.
> >libstdc++: Implement aligned_accessor from mdspan.
> >
> >   libstdc++-v3/include/bits/align.h |  16 ++
> >   libstdc++-v3/include/bits/version.def |  18 ++
> >   libstdc++-v3/include/bits/version.h   |  20 +++
> >   libstdc++-v3/include/std/mdspan   |  72 
> >   libstdc++-v3/include/std/memory   |   1 +
> >   libstdc++-v3/src/c++23/std.cc.in  |   4 +-
> >   .../20_util/is_sufficiently_aligned/1.cc  |  31 
> >   .../20_util/is_sufficiently_aligned/2.cc  |   7 +
> >   .../23_containers/mdspan/accessors/aligned.cc |  43 +
> >   .../mdspan/accessors/aligned_ftm.cc   |   6 +
> >   .../mdspan/accessors/aligned_neg.cc   |  33 
> >   .../accessors/debug/aligned_access_neg.cc |  23 +++
> >   .../accessors/debug/aligned_offset_neg.cc |  23 +++
> >   .../23_containers/mdspan/accessors/default.cc |  99 ---
> >   .../23_containers/mdspan/accessors/generic.cc | 168 ++
> >   15 files changed, 464 insertions(+), 100 deletions(-)
> >   create mode 100644
> libstdc++-v3/testsuite/20_util/is_sufficiently_aligned/1.cc
> >   create mode 100644
> libstdc++-v3/testsuite/20_util/is_sufficiently_aligned/2.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/accessors/aligned.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/accessors/aligned_ftm.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/accessors/aligned_neg.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/accessors/debug/aligned_access_neg.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/accessors/debug/aligned_offset_neg.cc
> >   delete mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/accessors/default.cc
> >   create mode 100644
> libstdc++-v3/testsuite/23_containers/mdspan/accessors/generic.cc
> >
>
>

[PATCH] libstdc++: Do not expose set_brackets/set_separator for formatter with format_kind other than sequence [PR119861]

2025-07-08 Thread Tomasz Kamiński

The standard defines separate specializations of range-default-formatter, out
of which only one for range_format::sequence provide the set_brackets and
set_separator methods. We implemented it as one specialization and exposed
this method for range_format other than string or debug_string, i.e. when
range_formatter was used as underlying formatter.

PR libstdc++/119861

libstdc++-v3/ChangeLog:

* include/std/format (formatter<_Rg, _CharT>::set_separator)
(formatter<_Rg, _CharT>::set_brackets): Constrain with
(format_kind<_Rg> == range_format::sequence).
* testsuite/std/format/ranges/pr119861_neg.cc: New test.
---
Testing on x86_64-linux locally. Format test passed.
OK for trunk?
I do not think this it is necessary to backport this to 15,
as we were providing functions in more cases that needed. Let me know
what you think.

 libstdc++-v3/include/std/format   |  4 +-
 .../std/format/ranges/pr119861_neg.cc | 52 +++
 2 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 5749aa1995a..d584b81c78a 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -6030,13 +6030,13 @@ namespace __format
 
   constexpr void
   set_separator(basic_string_view<_CharT> __sep) noexcept
-   requires (!_S_range_format_is_string)
+   requires (format_kind<_Rg> == range_format::sequence)
   { _M_under.set_separator(__sep); }
 
   constexpr void
   set_brackets(basic_string_view<_CharT> __open,
   basic_string_view<_CharT> __close) noexcept
-   requires (!_S_range_format_is_string)
+   requires (format_kind<_Rg> == range_format::sequence)
   { _M_under.set_brackets(__open, __close); }
 
   // We deviate from standard, that declares this as template accepting
diff --git a/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc 
b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc
new file mode 100644
index 000..9a6ed16393e
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/format/ranges/pr119861_neg.cc
@@ -0,0 +1,52 @@
+// { dg-do compile { target c++23 } }
+
+#include 
+#include 
+
+// only format_kind::sequence provides set_brackets and set_separator methods 
+
+template
+struct MyCont : std::vector
+{
+  using std::vector::vector;
+};
+
+template
+constexpr std::range_format std::format_kind> = fk;
+
+void test_sequence()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}");
+  fmtter.set_separator(",");
+}
+
+void test_map()
+{
+  std::formatter>, char> 
fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+void test_set()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+void test_string()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+void test_debug_string()
+{
+  std::formatter, char> fmtter;
+  fmtter.set_brackets("{", "}"); // { dg-error "here" }
+  fmtter.set_separator(","); // { dg-error "here" }
+}
+
+// { dg-error "no matching function for call to 'std::formatter<" "" { target 
*-*-* } 0 }
-- 
2.49.0

RE: [PATCH V3] x86: Enable separate shrink wrapping

2025-07-08 Thread Cui, Lili




> -Original Message-
> From: Segher Boessenkool 
> Sent: Friday, July 4, 2025 9:21 PM
> To: Cui, Lili 
> Cc: ubiz...@gmail.com; gcc-patches@gcc.gnu.org; Liu, Hongtao
> ; richard.guent...@gmail.com; Michael Matz
> 
> Subject: Re: [PATCH V3] x86: Enable separate shrink wrapping
> 
> Hi!
> 
> On Fri, Jul 04, 2025 at 07:23:23AM +, Cui, Lili wrote:
> > > > Initially, I looked at other architectures and disabled the hard
> > > > frame pointer,
> > >
> > > Like aarch?  Yeah I always wondered why they don't do it.  I decided
> > > that that is because of their ABI and architecture stuff they can
> > > save and restore their frame reg (r29) with the same insn as they
> > > use for the link reg (r30).  Of course they could do code to do
> > > tradeoffs there, but apparently they did no see the use for that, or
> > > perhaps from experience knew what way this would fall in the end.
> > >
> >
> > Loongarch/rs6000/riscv/aarch64 all disable
> HARD_FRAME_POINTER_REGNUM.
> 
> rs6000 does not *have* a hard frame pointer!
> 

Oh, I see.  The handling of HARD_FRAME_POINTER_REGNUM seems redundant for 
rs6000.

rs6000_get_separate_components (void)
{
...

  /* Don't mess with the hard frame pointer.  */
  if (frame_pointer_needed)
bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
...

> Generic parts of GCC require a frame pointer to exist, so when people require
> -fno-omit-frame-pointer we dedicate some GPR to it.  Gotta do something,
> eh!  It costs about 2% performance (on average, not worst
> case!)
> 
> The other archs you mention copied their code from aarch.
> 
> > > > but after reconsidering, I realized your point makes sense. If the
> > > > hard frame pointer were enabled,  we would typically emit push
> > > > %rbp and mov %rsp, %rbp at the first of prologue,  there is no
> > > > room for separate shrink wrap, but if the function itself also use
> > > > rbp, there might be room for optimization,
> > >
> > > Yup, when using a frame pointer (hard or otherwise, and a very bad
> > > plan nowadays, a 1970's thing) you typically get the frame pointer
> > > established very first thing, anything that touches the frame needs it 
> > > after
> all!
> > >
> > > But not all code accesses the frame, many early-out paths do not for
> > > example.
> > >
> >
> > Yes, currently we do shrink-wrap for the entire prologue (including the
> HARD_FRAME_POINTER), it can solve some early return issues. But we can't
> do separate-shrink-wrap for HARD_FRAME_POINTER, because
> HARD_FRAME_POINTER needs to record rsp before rsp points to the bottom
> of stack. We have to put it at the beginning of the prologue, and we have no
> chance to shrink it individually.
> 
> I'm not sure what things you mean here.
> 
> In most ABIs (yours as well I think?) the frame of a function is pointed to by
> the stack pointer at function entry, in normal functions.  "Happy functions" 
> :-)
> 

Yes, in a normal function it would be placed at entry bb, but shrink-wrap might 
move the whole prologue to after early return. X86 puts the frame pointer and 
the prologue together. For some early return situations, shrink-wrap can avoid 
the frame point being executed.

> You can set a pseudo to the stack pointer at function entry and then (either 
> or
> not) copy that to the frame pointer later, or let things be optimised away.
> 
> > I removed these two lines of code and conducted a comparison test,  and
> found that the binary unchanged. Unfortunately, I didn't identify any
> opportunities for optimization, I think it's better to keep them. Not sure if
> there might be any corner case issues.
> 
> For most archs and ABIs it is very beneficial to use -fomit-frame-pointer, I
> thought that was true for x86 even?  There is a special reg for it, sure, but 
> you
> can use that reg as a general reg as well, and that is way useful on an arch
> with so few registers :-)

Yes, -fomit-frame-pointer does help performance.  Here is a simple small case 
https://godbolt.org/z/5Tc3jM7qc . Do you mean to optimize the %rbp here?

Thanks,
Lili.
> 
> Segher

[PATCH v4 0/6] Implement mdspan.

2025-07-08 Thread Luc Grosheintz

Patches 1/5 ... 4/5 aren't sent because corresponding commits from v3
should be used. The changes for 5/5 are:

  * Fix ADL issue in swap and create tests.
  * Replace pragma once with include guard in layout_like.h.
  * Use VERIFY(md.empty()) when checking value initialization.

Since the code for swap is identical for all three members of mdspan,
this commit intentionally checks that the accessor is ADL swappable, but
not the other two memebers. Doing so allows us to have a single, short
class purely dedicated to ADL. Otherwise we'd need to put OpaqueLayout
and LayoutLike into a namespace and add ADL related logic. I tried it
and wasn't too happy with the result. This felt shorter, more directly
to the point while still checking that the implemented swap function
allows ADL for swapping all members variables. I tested effectiveness
of the test by reverting the fix and making sure the test fails.

This version of the patch series also adds the commit to update the
FTM. The changes are:

  * Update the commit message to mention the related ticket.
  * Rename the file.
  * Implement a more verbose version of the test.

Luc Grosheintz (6):
  libstdc++: Check prerequisites of layout_*::operator().
  libstdc++: Check prerequisite of extents::extents.
  libstdc++: Restructure mdspan tests to reuse IntLike.
  libstdc++: Implement __mdspan::__size.
  libstdc++: Implement mdspan and tests [PR107761].
  libstdc++: Set FMT for complete C++23 mdspan [PR107761].

 libstdc++-v3/include/bits/version.def |   3 +-
 libstdc++-v3/include/bits/version.h   |   3 +-
 libstdc++-v3/include/std/mdspan   | 315 -
 libstdc++-v3/src/c++23/std.cc.in  |   3 +-
 .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
 .../mdspan/extents/class_mandates_neg.cc  |   2 +
 .../mdspan/extents/custom_integer.cc  |  27 +-
 .../mdspan/extents/extents_mismatch_neg.cc|  35 +
 .../23_containers/mdspan/extents/int_like.h   |  28 +
 .../testsuite/23_containers/mdspan/ftm.cc |   9 +
 .../23_containers/mdspan/layout_like.h|  83 +++
 .../mdspan/layouts/debug/out_of_bounds_neg.cc |  30 +
 .../testsuite/23_containers/mdspan/mdspan.cc  | 643 ++
 .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
 14 files changed, 1212 insertions(+), 34 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/extents/extents_mismatch_neg.cc
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/extents/int_like.h
 create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
 create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/layouts/debug/out_of_bounds_neg.cc
 create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc

-- 
2.49.0

Re: [PATCH] c-family: Check backend for argument alignment on stack

2025-07-08 Thread Richard Biener

On Mon, Jul 7, 2025 at 11:08 PM H.J. Lu  wrote:
>
> On Mon, Jul 7, 2025 at 11:08 PM Jason Merrill  wrote:
> >
> > On 7/1/25 5:36 PM, H.J. Lu wrote:
> > > On Tue, Jul 1, 2025 at 9:37 PM Jason Merrill  wrote:
> > >>
> > >> On 6/30/25 7:03 PM, H.J. Lu wrote:
> > >>> On Mon, Jun 30, 2025 at 10:36 PM Jason Merrill  wrote:
> > 
> >  On 6/28/25 7:00 AM, H.J. Lu wrote:
> > > Since a backend may ignore user type alignment for arguments passed on
> > > stack, check backend for argument alignment on stack when evaluating
> > > __alignof.
> > 
> >  I assume that's reflected in DECL_ALIGN, so could we just add 
> >  PARM_DECL to
> > >>>
> > >>> No.  targetm.calls.function_arg_boundary may have special handling for 
> > >>> it.
> > >>
> > >> Why wouldn't we adjust DECL_ALIGN of the PARM_DECL to reflect the actual
> > >> alignment of the argument?  Are you saying it could be different from
> > >> one call to another?
> > >
> > > Function argument alignment is different from other places in memory if
> > > the main variant type alignment is different:
> >
> > Yes, I understand that function parameter alignment can be different
> > from other objects of that type.
> >
> > But since we have a PARM_DECL to represent that particular function
> > parameter, it seems natural to represent that difference in the
> > DECL_ALIGN of the PARM_DECL.  If you don't, its DECL_ALIGN is wrong.
> >

The parameter visible to the program does not need to share storage with
the parameter space used for parameter passing which can happen in
registers, or even partial in memory/registers or - as in this case - in
memory that is not aligned as the formal parameter is.

In such a case GCC arranges for local storage (with proper alignment)
that is populated from the argument storage (but it doesn't happen for the
case in question due to a bug IMO).

> __alignof returns TYPE_ALIGN, not DECL_ALIGN.  For PARM_DECL,
> TYPE_ALIGN may not be the same as DECL_ALIGN.

I don't think it's wise to try to "fix" this in the frontend.  A DECL_ALIGN
that is smaller than TYPE_ALIGN sounds bogus to me - while DECL_ALIGN
should always be the ultimate source of truth having a larger TYPE_ALIGN
just means the TYPE_ALIGN is wrong.  But the type is what the user
controls here, so we can't support the case in question (lower DECL_ALIGN).

Richard.

>
> --
> H.J.

[PATCH v4 5/6] libstdc++: Implement mdspan and tests [PR107761].

2025-07-08 Thread Luc Grosheintz

Implements the class mdspan as described in N4950, i.e. without P3029.
It also adds tests for mdspan. This commit completes the implementation
of P0009, i.e. the C++23 part .

PR libstdc++/107761

libstdc++-v3/ChangeLog:

* include/std/mdspan (mdspan): New class.
* src/c++23/std.cc.in (mdspan): Add.
* testsuite/23_containers/mdspan/class_mandate_neg.cc: New test.
* testsuite/23_containers/mdspan/mdspan.cc: New test.
* testsuite/23_containers/mdspan/layout_like.h: Add class
LayoutLike which models a user-defined layout.

Signed-off-by: Luc Grosheintz 
---
 libstdc++-v3/include/std/mdspan   | 285 
 libstdc++-v3/src/c++23/std.cc.in  |   3 +-
 .../23_containers/mdspan/class_mandate_neg.cc |  41 ++
 .../23_containers/mdspan/layout_like.h|  83 +++
 .../testsuite/23_containers/mdspan/mdspan.cc  | 643 ++
 .../23_containers/mdspan/out_of_bounds_neg.cc |  24 +
 6 files changed, 1078 insertions(+), 1 deletion(-)
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/class_mandate_neg.cc
 create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/layout_like.h
 create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/mdspan.cc
 create mode 100644 
libstdc++-v3/testsuite/23_containers/mdspan/out_of_bounds_neg.cc

diff --git a/libstdc++-v3/include/std/mdspan b/libstdc++-v3/include/std/mdspan
index 7e970c2b905..0fd78570b3e 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -1057,6 +1057,291 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __p + __i; }
 };
 
+  namespace __mdspan
+  {
+template
+  constexpr bool
+  __is_multi_index(const _Extents& __exts, span<_IndexType, _Nm> __indices)
+  {
+   static_assert(__exts.rank() == _Nm);
+   for (size_t __i = 0; __i < __exts.rank(); ++__i)
+ if (__indices[__i] >= __exts.extent(__i))
+   return false;
+   return true;
+  }
+  }
+
+  template>
+class mdspan
+{
+  static_assert(!is_array_v<_ElementType>,
+   "ElementType must not be an array type");
+  static_assert(!is_abstract_v<_ElementType>,
+   "ElementType must not be an abstract class type");
+  static_assert(__mdspan::__is_extents<_Extents>,
+   "Extents must be a specialization of std::extents");
+  static_assert(is_same_v<_ElementType,
+ typename _AccessorPolicy::element_type>);
+
+public:
+  using extents_type = _Extents;
+  using layout_type = _LayoutPolicy;
+  using accessor_type = _AccessorPolicy;
+  using mapping_type = typename layout_type::template 
mapping;
+  using element_type = _ElementType;
+  using value_type = remove_cv_t;
+  using index_type = typename extents_type::index_type;
+  using size_type = typename extents_type::size_type;
+  using rank_type = typename extents_type::rank_type;
+  using data_handle_type = typename accessor_type::data_handle_type;
+  using reference = typename accessor_type::reference;
+
+  static constexpr rank_type
+  rank() noexcept { return extents_type::rank(); }
+
+  static constexpr rank_type
+  rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
+
+  static constexpr size_t
+  static_extent(rank_type __r) noexcept
+  { return extents_type::static_extent(__r); }
+
+  constexpr index_type
+  extent(rank_type __r) const noexcept { return extents().extent(__r); }
+
+  constexpr
+  mdspan()
+  requires (rank_dynamic() > 0)
+  && is_default_constructible_v
+ && is_default_constructible_v
+ && is_default_constructible_v
+  : _M_accessor(), _M_mapping(), _M_handle()
+  { }
+
+  constexpr
+  mdspan(const mdspan& __other) = default;
+
+  constexpr
+  mdspan(mdspan&& __other) = default;
+
+  template<__mdspan::__valid_index_type... _OIndexTypes>
+   requires (sizeof...(_OIndexTypes) == rank()
+  || sizeof...(_OIndexTypes) == rank_dynamic())
+&& is_constructible_v
+&& is_default_constructible_v
+   constexpr explicit
+   mdspan(data_handle_type __handle, _OIndexTypes... __exts)
+   : _M_accessor(),
+ _M_mapping(_Extents(static_cast(std::move(__exts))...)),
+ _M_handle(std::move(__handle))
+   { }
+
+  template<__mdspan::__valid_index_type _OIndexType,
+  size_t _Nm>
+   requires (_Nm == rank() || _Nm == rank_dynamic())
+&& is_constructible_v
+&& is_default_constructible_v
+   constexpr explicit(_Nm != rank_dynamic())
+   mdspan(data_handle_type __handle, span<_OIndexType, _Nm> __exts)
+   : _M_accessor(), _M_mapping(extents_type(__exts)),
+ _M_handle(std::move(__handle))
+   { }
+
+  template<__mdspan::__valid_index_type _OIndexType,
+  size_t _Nm>
+   requires (_Nm == r

Re: [PATCH] libstdc++: Document that LWG 3881 is resolved, by using different apporach.

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025 at 08:35, Tomasz Kamiński  wrote:
>
> libstdc++-v3/ChangeLog:
>
> * include/std/queue (formatter, _CharT>)
> (formatter, _CharT>):
> Add _GLIBCXX_RESOLVE_LIB_DEFECTS comments.

OK thanks


> ---
> I would send this as committed, but realized, that there is hight change
> of there being a type somewhere.
>
>  libstdc++-v3/include/std/queue | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/queue b/libstdc++-v3/include/std/queue
> index 90525897da7..1b76088b31b 100644
> --- a/libstdc++-v3/include/std/queue
> +++ b/libstdc++-v3/include/std/queue
> @@ -105,7 +105,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> { return _M_f.format(__a.c, __fc); }
>
>  private:
> -  // Standard uses formatter, _CharT>.
> +  // Standard uses formatter, _CharT>, but 
> range_formatter
> +  // provides same behavior.
> +  // _GLIBCXX_RESOLVE_LIB_DEFECTS
> +  // 3881. Incorrect formatting of container adapters backed by 
> std::string
>range_formatter<_Tp, _CharT> _M_f;
>  };
>
> @@ -136,7 +139,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> { return _M_f.format(__a.c, __fc); }
>
>  private:
> -  // Standard uses formatter, _CharT>.
> +  // Standard uses formatter, _CharT>, but 
> range_formatter
> +  // provides same behavior.
> +  // _GLIBCXX_RESOLVE_LIB_DEFECTS
> +  // 3881. Incorrect formatting of container adapters backed by 
> std::string
>range_formatter<_Tp, _CharT> _M_f;
>  };
>
> --
> 2.49.0
>

[PATCH v4 6/6] libstdc++: Set FMT for complete C++23 mdspan [PR107761].

2025-07-08 Thread Luc Grosheintz

PR libstdc++/107761

libstdc++-v3/ChangeLog:

* include/bits/version.def (mdspan): Set to 202207 and remove
no_stdname.
* include/bits/version.h: Regenerate.
* testsuite/23_containers/mdspan/ftm.cc: Test presence
of FTM.

Signed-off-by: Luc Grosheintz 
---
 libstdc++-v3/include/bits/version.def  | 3 +--
 libstdc++-v3/include/bits/version.h| 3 ++-
 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc | 9 +
 3 files changed, 12 insertions(+), 3 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc

diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
index 64f8190d240..f1015abdbfa 100644
--- a/libstdc++-v3/include/bits/version.def
+++ b/libstdc++-v3/include/bits/version.def
@@ -1007,9 +1007,8 @@ ftms = {
 
 ftms = {
   name = mdspan;
-  no_stdname = true; // FIXME: remove
   values = {
-v = 1; // FIXME: 202207
+v = 202207;
 cxxmin = 23;
   };
 };
diff --git a/libstdc++-v3/include/bits/version.h 
b/libstdc++-v3/include/bits/version.h
index 744246a9938..80f6586372d 100644
--- a/libstdc++-v3/include/bits/version.h
+++ b/libstdc++-v3/include/bits/version.h
@@ -1126,8 +1126,9 @@
 
 #if !defined(__cpp_lib_mdspan)
 # if (__cplusplus >= 202100L)
-#  define __glibcxx_mdspan 1L
+#  define __glibcxx_mdspan 202207L
 #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
+#   define __cpp_lib_mdspan 202207L
 #  endif
 # endif
 #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc 
b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
new file mode 100644
index 000..106ee4010ee
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
@@ -0,0 +1,9 @@
+// { dg-do compile { target c++23 } }
+#include 
+
+#ifndef __cpp_lib_mdspan
+#error "Feature test macro __cpp_lib_mdspan is missing for "
+#if __cpp_lib_mdspan < 202207
+#error "Feature test macro __cpp_lib_mdspan has the wrong value"
+#endif
+#endif
-- 
2.49.0

Re: [PATCH v4 6/6] libstdc++: Set FMT for complete C++23 mdspan [PR107761].

2025-07-08 Thread Tomasz Kaminski

On Tue, Jul 8, 2025 at 10:26 AM Luc Grosheintz 
wrote:

> PR libstdc++/107761
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/version.def (mdspan): Set to 202207 and remove
> no_stdname.
> * include/bits/version.h: Regenerate.
> * testsuite/23_containers/mdspan/ftm.cc: Test presence
> of FTM.
>
> Signed-off-by: Luc Grosheintz 
> ---
>
LGTM.

>  libstdc++-v3/include/bits/version.def  | 3 +--
>  libstdc++-v3/include/bits/version.h| 3 ++-
>  libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc | 9 +
>  3 files changed, 12 insertions(+), 3 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
>
> diff --git a/libstdc++-v3/include/bits/version.def
> b/libstdc++-v3/include/bits/version.def
> index 64f8190d240..f1015abdbfa 100644
> --- a/libstdc++-v3/include/bits/version.def
> +++ b/libstdc++-v3/include/bits/version.def
> @@ -1007,9 +1007,8 @@ ftms = {
>
>  ftms = {
>name = mdspan;
> -  no_stdname = true; // FIXME: remove
>values = {
> -v = 1; // FIXME: 202207
> +v = 202207;
>  cxxmin = 23;
>};
>  };
> diff --git a/libstdc++-v3/include/bits/version.h
> b/libstdc++-v3/include/bits/version.h
> index 744246a9938..80f6586372d 100644
> --- a/libstdc++-v3/include/bits/version.h
> +++ b/libstdc++-v3/include/bits/version.h
> @@ -1126,8 +1126,9 @@
>
>  #if !defined(__cpp_lib_mdspan)
>  # if (__cplusplus >= 202100L)
> -#  define __glibcxx_mdspan 1L
> +#  define __glibcxx_mdspan 202207L
>  #  if defined(__glibcxx_want_all) || defined(__glibcxx_want_mdspan)
> +#   define __cpp_lib_mdspan 202207L
>  #  endif
>  # endif
>  #endif /* !defined(__cpp_lib_mdspan) && defined(__glibcxx_want_mdspan) */
> diff --git a/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> new file mode 100644
> index 000..106ee4010ee
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/23_containers/mdspan/ftm.cc
> @@ -0,0 +1,9 @@
> +// { dg-do compile { target c++23 } }
> +#include 
> +
> +#ifndef __cpp_lib_mdspan
> +#error "Feature test macro __cpp_lib_mdspan is missing for "
> +#if __cpp_lib_mdspan < 202207
> +#error "Feature test macro __cpp_lib_mdspan has the wrong value"
> +#endif
> +#endif
> --
> 2.49.0
>
>

Re: [PATCH] Check backend when setting DECL_ALIGN for PARM_DECL

2025-07-08 Thread Richard Biener

On Mon, Jul 7, 2025 at 11:33 PM H.J. Lu  wrote:
>
> On Tue, Jul 8, 2025 at 5:02 AM H.J. Lu  wrote:
> >
> > On Mon, Jul 7, 2025 at 11:08 PM Jason Merrill  wrote:
> > >
> > > On 7/1/25 5:36 PM, H.J. Lu wrote:
> > > > On Tue, Jul 1, 2025 at 9:37 PM Jason Merrill  wrote:
> > > >>
> > > >> On 6/30/25 7:03 PM, H.J. Lu wrote:
> > > >>> On Mon, Jun 30, 2025 at 10:36 PM Jason Merrill  
> > > >>> wrote:
> > > 
> > >  On 6/28/25 7:00 AM, H.J. Lu wrote:
> > > > Since a backend may ignore user type alignment for arguments passed 
> > > > on
> > > > stack, check backend for argument alignment on stack when evaluating
> > > > __alignof.
> > > 
> > >  I assume that's reflected in DECL_ALIGN, so could we just add 
> > >  PARM_DECL to
> > > >>>
> > > >>> No.  targetm.calls.function_arg_boundary may have special handling 
> > > >>> for it.
> > > >>
> > > >> Why wouldn't we adjust DECL_ALIGN of the PARM_DECL to reflect the 
> > > >> actual
> > > >> alignment of the argument?  Are you saying it could be different from
> > > >> one call to another?
> > > >
> > > > Function argument alignment is different from other places in memory if
> > > > the main variant type alignment is different:
> > >
> > > Yes, I understand that function parameter alignment can be different
> > > from other objects of that type.
> > >
> > > But since we have a PARM_DECL to represent that particular function
> > > parameter, it seems natural to represent that difference in the
> > > DECL_ALIGN of the PARM_DECL.  If you don't, its DECL_ALIGN is wrong.
> > >
> >
> > __alignof returns TYPE_ALIGN, not DECL_ALIGN.  For PARM_DECL,
> > TYPE_ALIGN may not be the same as DECL_ALIGN.
> >
>
> How about this patch?
>
> Since a backend may ignore type alignment for arguments passed on stack,
> call targetm.calls.function_arg_boundary to set DECL_ALIGN for PARM_DECL
> and change __alignof to return DECL_ALIGN, instead of TYPE_ALIGN, for
> PARM_DECL.

I don't think this will work out correctness-wise.  You'd have to patch up
all places.  Also we might turn a reference to the PARM_DECL into
a dereference of its address.  So we rely on the fact that TYPE_ALIGN
is always more conservative than DECL_ALIGN which is not the case
you are caring about.

So no, I don't think this is good design.  Get the missed copy working instead.

Richard.

> gcc/
>
> PR target/120839
> * stor-layout.cc (do_type_align): Call
> targetm.calls.function_arg_boundary to set DECL_ALIGN for
> PARM_DECL.
>
> gcc/c-family/
>
> PR target/120839
> * c-common.cc (c_alignof_expr): Return DECL_ALIGN for PARM_DECL.
>
> gcc/testsuite/
>
> PR target/120839
> * gcc.target/i386/pr120839-1.c: New test.
> * gcc.target/i386/pr120839-2.c: Likewise.
>
> --
> H.J.

Re: [PATCH] libstdc++: provide debug impl of P2697 ctor [PR119742]

2025-07-08 Thread Tomasz Kaminski

LGTM. Thanks.

On Mon, Jul 7, 2025 at 11:09 PM Nathan Myers  wrote:

> This adds the new bitset constructor from string_view
> defined in P2697 to the debug version of the type.
>
> libstdc++-v3/Changelog:
> PR libstdc++/119742
> * include/debug/bitset: Add new ctor.
> ---
>  libstdc++-v3/include/debug/bitset | 12 
>  1 file changed, 12 insertions(+)
>
> diff --git a/libstdc++-v3/include/debug/bitset
> b/libstdc++-v3/include/debug/bitset
> index ad9b7b5c4b0..43656a4efd3 100644
> --- a/libstdc++-v3/include/debug/bitset
> +++ b/libstdc++-v3/include/debug/bitset
> @@ -164,6 +164,18 @@ namespace __debug
>_CharT __zero, _CharT __one = _CharT('1'))
> : _Base(__str, __pos, __n, __zero, __one) { }
>
> +#ifdef __cpp_lib_bitset // ... from string_view
> +  template
> +   constexpr explicit
> +   bitset(std::basic_string_view<_CharT, _Traits> __s,
> + std::basic_string_view<_CharT, _Traits>::size_type __position =
> 0,
> + std::basic_string_view<_CharT, _Traits>::size_type __n =
> +   std::basic_string_view<_CharT, _Traits>::npos,
> + _CharT __zero = _CharT('0'), _CharT __one = _CharT('1'))
> +   : _Base(__s.data() + std::min(__position, __s.size()),
> +   std::min(__n, __s.size()), __zero, __one) { }
> +#endif
> +
>_GLIBCXX23_CONSTEXPR
>bitset(const _Base& __x) : _Base(__x) { }
>
> --
> 2.50.0
>
>

Re: [PATCH] cobol: Implement CXXFLAGS_FOR_COBOL.

2025-07-08 Thread Andreas Schwab

On Jul 07 2025, Robert Dubner wrote:

> Furthermore, even if this method did work, I need the CXXFLAGS_FOR_COBOL
> options to appear at the end of the list, not the beginning, since they
> may need to override what came before.

Why can't you use CXXFLAGS?

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Re: [PATCH] libstdc++: provide debug impl of P2697 ctor [PR119742]

2025-07-08 Thread Jonathan Wakely

On Tue, 8 Jul 2025, 08:04 Tomasz Kaminski,  wrote:

> LGTM. Thanks.
>
> On Mon, Jul 7, 2025 at 11:09 PM Nathan Myers  wrote:
>
>> This adds the new bitset constructor from string_view
>> defined in P2697 to the debug version of the type.
>>
>> libstdc++-v3/Changelog:
>> PR libstdc++/119742
>> * include/debug/bitset: Add new ctor.
>> ---
>>  libstdc++-v3/include/debug/bitset | 12 
>>  1 file changed, 12 insertions(+)
>>
>> diff --git a/libstdc++-v3/include/debug/bitset
>> b/libstdc++-v3/include/debug/bitset
>> index ad9b7b5c4b0..43656a4efd3 100644
>> --- a/libstdc++-v3/include/debug/bitset
>> +++ b/libstdc++-v3/include/debug/bitset
>> @@ -164,6 +164,18 @@ namespace __debug
>>_CharT __zero, _CharT __one = _CharT('1'))
>> : _Base(__str, __pos, __n, __zero, __one) { }
>>
>> +#ifdef __cpp_lib_bitset // ... from string_view
>> +  template
>> +   constexpr explicit
>> +   bitset(std::basic_string_view<_CharT, _Traits> __s,
>> + std::basic_string_view<_CharT, _Traits>::size_type __position =
>> 0,
>> + std::basic_string_view<_CharT, _Traits>::size_type __n =
>> +   std::basic_string_view<_CharT, _Traits>::npos,
>> + _CharT __zero = _CharT('0'), _CharT __one = _CharT('1'))
>> +   : _Base(__s.data() + std::min(__position, __s.size()),
>> +   std::min(__n, __s.size()), __zero, __one) { }
>> +#endif
>>
>
The base class needs to repeat those length checks anyway, so is there a
reason to do the std::min calls here rather than just pass the arguments to
the base unchanged?


+
>>_GLIBCXX23_CONSTEXPR
>>bitset(const _Base& __x) : _Base(__x) { }
>>
>> --
>> 2.50.0
>>
>>

1 2 >

1 - 100 of 164 matches

Mail list logo