date:20250416

[PATCH] Fix wrong optimization of conditional expression with enumeration type

2025-04-16 Thread Eric Botcazou

Hi,

this is a regression introduced on the mainline and 14 branch by:
  https://gcc.gnu.org/pipermail/gcc-cvs/2023-October/391658.html

The change bypasses int_fits_type_p (essentially) to work around the 
signedness constraints, but in doing so disregards the peculiarities of 
boolean types whose precision is not 1 that are dealt with by the predicate, 
leading to the creation of a problematic conversion here.

Fixed by special-casing boolean types whose precision is not 1, as done in 
several other places.

Tested on x86-64/Linux, OK for the mainline and 14 branch?


2025-04-15  Eric Botcazou  

* tree-ssa-phiopt.cc (factor_out_conditional_operation): Do not
bypass the int_fits_type_p test for boolean types whose precision
is not 1.


2025-04-15  Eric Botcazou  

* gnat.dg/opt105.adb: New test.
* gnat.dg/opt105_pkg.ads, gnat.dg/opt105_pkg.adb: New helper.

-- 
Eric Botcazoudiff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 7d2d1696ee7..a194bf675e4 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -403,12 +403,15 @@ factor_out_conditional_operation (edge e0, edge e1, basic_block merge,
   if (dominated_by_p (CDI_DOMINATORS, gimple_bb (phi), gimple_bb (arg0_def_stmt)))
 	return false;
 
-  /* Only handle if arg1 is a INTEGER_CST and one that fits
-	 into the new type or if it is the same precision.  */
+  /* If arg1 is an INTEGER_CST, fold it to new type if it fits, or else
+	 if the bits will not be modified during the conversion, except for
+	 boolean types whose precision is not 1 (see int_fits_type_p).  */
   if (!INTEGRAL_TYPE_P (TREE_TYPE (new_arg0))
 	  || !(int_fits_type_p (arg1, TREE_TYPE (new_arg0))
 	   || (TYPE_PRECISION (TREE_TYPE (new_arg0))
-		   == TYPE_PRECISION (TREE_TYPE (arg1)
+		   == TYPE_PRECISION (TREE_TYPE (arg1))
+		   && (TREE_CODE (TREE_TYPE (new_arg0)) != BOOLEAN_TYPE
+		   || TYPE_PRECISION (TREE_TYPE (new_arg0)) == 1
 	return false;
 
   /* For the INTEGER_CST case, we are just moving the
-- { dg-do run }
-- { dg-options "-O" }

with Opt105_Pkg; use Opt105_Pkg;

procedure Opt105 is

  Val : constant Enum :=
  (if Enabled then (if Disabled then Two else One) else Three);

begin
  if Cond1 then
return;
  end if;

  if Cond2 then
return;
  end if;

  case Val is
when One =>
  raise Program_Error;

when Two =>
  raise Constraint_Error;

when Three =>
  null;
  end case;
end;
package Opt105_Pkg is

  type Enum is (One, Two, Three);

  Enabled  : Boolean := False;
  Disabled : Boolean := False;

  function Cond1 return Boolean;
  function Cond2 return Boolean;

end Opt105_Pkg;
package body Opt105_Pkg is

  function Cond1 return Boolean is (False);
  function Cond2 return Boolean is (False);

end Opt105_Pkg;

Stream ipa_return_value_summary

2025-04-16 Thread Jan Hubicka

Hi,
this patch adds streaming of return summaries from compile time to ltrans
which are now needed for vrp to not ouput false errors on musttail.

Bootstrapped/regtested x86_64-linux, comitted.

Co-authored-by: Jakub Jelinek 

gcc/ChangeLog:
PR tree-optimization/119614

* ipa-prop.cc (ipa_write_return_summaries): New function.
(ipa_record_return_value_range_1): Break out from 
(ipa_record_return_value_range): ... here.
(ipa_read_return_summaries): New function.
(ipa_prop_read_section): Read return summaries.
(read_ipcp_transformation_info): Read return summaries.
(ipcp_write_transformation_summaries): Write return summaries;
do not stream stray 0.

gcc/testsuite/ChangeLog:

* g++.dg/lto/pr119614_0.C: New test.

diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 49d68ab044b..0398d69962f 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -5439,6 +5439,49 @@ ipa_read_node_info (class lto_input_block *ib, struct 
cgraph_node *node,
 }
 }
 
+/* Stream out ipa_return_summary.  */
+static void
+ipa_write_return_summaries (output_block *ob)
+{
+  if (!ipa_return_value_sum)
+{
+  streamer_write_uhwi (ob, 0);
+  return;
+}
+
+  lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
+  unsigned int count = 0;
+  for (int i = 0; i < lto_symtab_encoder_size (encoder); i++)
+{
+  symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
+  cgraph_node *cnode = dyn_cast  (snode);
+  ipa_return_value_summary *v;
+
+  if (cnode && cnode->definition && !cnode->alias
+ && (v = ipa_return_value_sum->get (cnode))
+ && v->vr)
+   count++;
+}
+  streamer_write_uhwi (ob, count);
+
+  for (int i = 0; i < lto_symtab_encoder_size (encoder); i++)
+{
+  symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
+  cgraph_node *cnode = dyn_cast  (snode);
+  ipa_return_value_summary *v;
+
+  if (cnode && cnode->definition && !cnode->alias
+ && (v = ipa_return_value_sum->get (cnode))
+ && v->vr)
+   {
+ streamer_write_uhwi
+   (ob,
+lto_symtab_encoder_encode (encoder, cnode));
+ v->vr->streamer_write (ob);
+   }
+}
+}
+
 /* Write jump functions for nodes in SET.  */
 
 void
@@ -5475,11 +5518,58 @@ ipa_prop_write_jump_functions (void)
  && ipa_node_params_sum->get (node) != NULL)
 ipa_write_node_info (ob, node);
 }
-  streamer_write_char_stream (ob->main_stream, 0);
+  ipa_write_return_summaries (ob);
   produce_asm (ob);
   destroy_output_block (ob);
 }
 
+/* Record that return value range of N is VAL.  */
+
+static void
+ipa_record_return_value_range_1 (cgraph_node *n, value_range val)
+{
+  if (!ipa_return_value_sum)
+{
+  if (!ipa_vr_hash_table)
+   ipa_vr_hash_table = hash_table::create_ggc (37);
+  ipa_return_value_sum = new (ggc_alloc_no_dtor  
())
+ ipa_return_value_sum_t (symtab, true);
+  ipa_return_value_sum->disable_insertion_hook ();
+}
+  ipa_return_value_sum->get_create (n)->vr = ipa_get_value_range (val);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+{
+  fprintf (dump_file, "Recording return range of %s:", n->dump_name ());
+  val.dump (dump_file);
+  fprintf (dump_file, "\n");
+}
+}
+
+/* Stream out ipa_return_summary.  */
+static void
+ipa_read_return_summaries (lto_input_block *ib,
+  struct lto_file_decl_data *file_data,
+  class data_in *data_in)
+{
+  unsigned int f_count = streamer_read_uhwi (ib);
+  for (unsigned int i = 0; i < f_count; i++)
+{
+  unsigned int index = streamer_read_uhwi (ib);
+  lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
+  struct cgraph_node *node
+ = dyn_cast 
+ (lto_symtab_encoder_deref (encoder, index));
+  ipa_vr rvr;
+  rvr.streamer_read (ib, data_in);
+  if (node->prevailing_p ())
+   {
+ value_range tmp;
+ rvr.get_vrange (tmp);
+ ipa_record_return_value_range_1 (node, tmp);
+   }
+}
+}
+
 /* Read section in file FILE_DATA of length LEN with data DATA.  */
 
 static void
@@ -5516,6 +5606,7 @@ ipa_prop_read_section (struct lto_file_decl_data 
*file_data, const char *data,
   gcc_assert (node->definition);
   ipa_read_node_info (&ib_main, node, data_in);
 }
+  ipa_read_return_summaries (&ib_main, file_data, data_in);
   lto_free_section_data (file_data, LTO_section_jump_functions, NULL, data,
 len);
   lto_data_in_delete (data_in);
@@ -5673,7 +5765,7 @@ ipcp_write_transformation_summaries (void)
  && lto_symtab_encoder_encode_body_p (encoder, cnode))
write_ipcp_transformation_info (ob, cnode, ts);
 }
-  streamer_write_char_stream (ob->main_stream, 0);
+  ipa_write_return_summaries (ob);
   produce_asm (ob);
   destroy_output_block (ob);
 }
@@ -571

Re: [PATCH v2]middle-end: Fix incorrect codegen with PFA and VLS [PR119351]

2025-04-16 Thread Richard Biener

On Tue, 15 Apr 2025, Tamar Christina wrote:

> Hi All,
> 
> The following example:
> 
> #define N 512
> #define START 2
> #define END 505
> 
> int x[N] __attribute__((aligned(32)));
> 
> int __attribute__((noipa))
> foo (void)
> {
>   for (signed int i = START; i < END; ++i)
> {
>   if (x[i] == 0)
> return i;
> }
>   return -1;
> }
> 
> generates incorrect code with fixed length SVE because for early break we need
> to know which value to start the scalar loop with if we take an early exit.
> 
> Historically this means that we take the first element of every induction.
> this is because there's an assumption in place, that even with masked loops 
> the
> masks come from a whilel* instruction.
> 
> As such we reduce using a BIT_FIELD_REF <, 0>.
> 
> When PFA was added this assumption was correct for non-masked loop, however we
> assumed that PFA for VLA wouldn't work for now, and disabled it using the
> alignment requirement checks.  We also expected VLS to PFA using scalar loops.
> 
> However as this PR shows, for VLS the vectorizer can, and does in some
> circumstances choose to peel using masks by masking the first iteration of the
> loop with an additional alignment mask.
> 
> When this is done, the first elements of the predicate can be inactive. In 
> this
> example element 1 is inactive based on the calculated misalignment.  hence the
> -1 value in the first vector IV element.
> 
> When we reduce using BIT_FIELD_REF we get the wrong value.
> 
> This patch updates it by creating a new scalar PHI that keeps track of whether
> we are the first iteration of the loop (with the additional masking) or 
> whether
> we have taken a loop iteration already.
> 
> The generated sequence:
> 
> pre-header:
>   bb1:
> i_1 = 
> 
> header:
>   bb2:
> i_2 = PHI 
> …
> 
> early-exit:
>   bb3:
> i_3 = iv_step * i_2 + PHI
> 
> Which eliminates the need to do an expensive mask based reduction.
> 
> This fixes gromacs with one OpenMP thread. But with > 1 there is still an 
> issue.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/119351
>   * tree-vect-loop-manip.cc (vect_can_advance_ivs_p): Record non-linear
>   inductions.
>   * tree-vectorizer.h (LOOP_VINFO_MASK_NITERS_PFA_OFFSET,
>   LOOP_VINFO_NON_LINEAR_IV): New.
>   (class _loop_vec_info): Add mask_skip_niters_pfa_offset and
>   nonlinear_iv.
>   * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize them.
>   (vectorizable_induction): If early break and PFA using masking create a
>   new phi which tracks where the scalar code needs to start...
>   (vectorizable_live_operation): ...and generate the adjustments here.
>   (vect_use_loop_mask_for_alignment_p): Reject non-linear inductions and
>   early break needing peeling.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/119351
>   * gcc.target/aarch64/sve/peel_ind_10.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_10_run.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_5.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_5_run.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_6.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_6_run.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_7.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_7_run.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_8.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_8_run.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_9.c: New test.
>   * gcc.target/aarch64/sve/peel_ind_9_run.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
> new file mode 100644
> index 
> ..b7a7bc5cb0cfdfdb74adb120c54ba15019832cf1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10.c
> @@ -0,0 +1,24 @@
> +/* Fix for PR119351 alignment peeling with vectors and VLS.  */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -msve-vector-bits=256 --param 
> aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
> +
> +#define N 512
> +#define START 0
> +#define END 505
> + 
> +int x[N] __attribute__((aligned(32)));
> +
> +int __attribute__((noipa))
> +foo (int start)
> +{
> +  for (unsigned int i = start; i < END; ++i)
> +{
> +  if (x[i] == 0)
> +return i;
> +}
> +  return -1;
> +}
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
> +/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" 
> "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10_run.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_10_run.c
> new file mode 100644
>

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 09:55, Tomasz Kaminski  wrote:
>
>
>
> On Wed, Apr 16, 2025 at 10:47 AM Jonathan Wakely  wrote:
>>
>> On 16/04/25 10:37 +0200, Tomasz Kaminski wrote:
>> >Just to clarify, we still will be missing the formatter for adaptors
>> >(stack, queue, piority_queue).
>>
>> Yes, but I doubt most people want to use those :-)
>
> We are good as long as vector::reference is formattable ;)

Surely the most important type in the library ;)

Re: [PATCH] libgcobol: mark riscv64--linux as supported target

2025-04-16 Thread Jakub Jelinek

On Wed, Apr 16, 2025 at 02:17:37PM +0200, Richard Biener wrote:
> On Tue, Apr 15, 2025 at 4:33 PM Jeff Law  wrote:
> >
> >
> >
> > On 4/15/25 7:57 AM, Andreas Schwab wrote:
> > >   * configure.tgt: Set LIBGCOBOL_SUPPORTED for riscv64-*-linux* with
> > >   64-bit multilib.
> > Can't say I'm happy with the amount of Cobol related churn at this phase
> > in our cycle.  But this should be exceedingly safe.  So OK.
> 
> For the record it now builds fine on s390x-linux (big endian) as well, but
> test results are not that good.  At least _some_ tests pass ...
> 
> Native configuration is s390x-ibm-linux-gnu
> 
> === cobol tests ===
> 
> 
> Running target unix
> FAIL: cobol.dg/literal1.cob   -O0  execution test
> FAIL: cobol.dg/literal1.cob   -O1  execution test
> [... many FAILs stripped ...]
> FAIL: cobol.dg/group2/floating-point_literals.cob   -O3 -g   output file test
> FAIL: cobol.dg/group2/floating-point_literals.cob   -Os   output file test
> 
> === cobol Summary ===
> 
> # of expected passes2757
> # of unexpected failures342
> # of expected failures  48
> # of unresolved testcases   54

E.g.
uint128 temp = (uint128)product.i64[i] * multiplier;
product.i64[i] = *(uint64_t *)(&temp);
overflows[i+1] = *(uint64_t *)((uint8_t *)(&temp) + 8);
(I think one of my pending patches fixed this but can submit it
independently) is at least one of the obvious spots where it works
solely for little endian.
This one particular can be obviously done as
uint128 temp = (uint128)product.i64[i] * multiplier;
product.i64[i] = temp;
overflows[i+1] = temp >> 64;
But guess it is pretty much everything that works on the int256s:
typedef struct int256
  {
  union
{
unsigned char  data[32];
uint64_t   i64 [4];
uint128i128[2];
};
  }int256;
whether loops on it iterate with increasing or decreasing index
depends on the endianity, which bit is sign bit as well, ...

Jakub

Re: [PATCH] libgcobol: mark riscv64--linux as supported target

2025-04-16 Thread Rainer Orth

Hi Richard,

> For the record it now builds fine on s390x-linux (big endian) as well, but
> test results are not that good.  At least _some_ tests pass ...
>
> Native configuration is s390x-ibm-linux-gnu
>
> === cobol tests ===
>
>
> Running target unix
> FAIL: cobol.dg/literal1.cob   -O0  execution test
> FAIL: cobol.dg/literal1.cob   -O1  execution test
> [... many FAILs stripped ...]
> FAIL: cobol.dg/group2/floating-point_literals.cob   -O3 -g   output file test
> FAIL: cobol.dg/group2/floating-point_literals.cob   -Os   output file test
>
> === cobol Summary ===
>
> # of expected passes2757
> # of unexpected failures342
> # of expected failures  48
> # of unresolved testcases   54

things are similar on sparcv9-sun-solaris2.11 (once my remaining build
patches are approved):

=== cobol Summary ===

# of expected passes2684
# of unexpected failures446
# of expected failures  48
# of unresolved testcases   90

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

Re: Questions on replacing a structure pointer reference to a call to .ACCESS_WITH_SIZE in C FE

2025-04-16 Thread Qing Zhao

Hi, Sid:

> On Apr 10, 2025, at 06:56, Siddhesh Poyarekar  wrote:
> 
> 
>> Maybe you could add it when a pointer to an annotated
>> struct is passed as parameter, but also there it is not
>> clear to me that we might want to materialize new
>> accesses to the struct at this point.
>> An alternative approach could be to just do it when
>> such a pointer is explicitely passed to the BDOS builtin.
> 
> I suppose bounds sanitizer won't be affected by this but wouldn't this then 
> exclude the object-size sanitizer?  I don't know if its instrumentation runs 
> early enough.

I just checked this:

1. The object-size sanitizer instruments the code in a quite early stage 
in the middle-end, much earlier than objsz phase. 
2. When object-size sanitizer instruments the code, it  might insert calls to 
__builtin_dynamic_object_size  to acquire the object size, and these 
added __builtin_dynamic_object_size calls will be evaluated in the later
objsz phase. 

So, based on this fact and the previous discussion:

1. It’s not safe in general to replace a structure pointer reference to a call 
to .ACCESS_WITH_SIZE in C FE. 
Since data-flow analysis is needed to make sure that the access to the size 
member is valid, i.e, the structure 
is accessible and initialized, etc. 

2. It should be safe to generate the reference to field member when we evaluate 
the BDOS builtin as my current
approach. And doing this in tree-object-size should also cover 
-fsanitize=object-size. 

3. When generating the reference to the field member in tree-object-size, we 
should guard this reference with a checking
on the pointer to the structure is valid. i.e:

 struct annotated {
  size_t count;
  char array[] __attribute__((counted_by (count)));
 };

static size_t __attribute__((__noinline__)) size_of (struct annotated * obj)
{
   return __builtin_dynamic_object_size (obj, 1);
}

When we try to generate the reference to obj->count when evaluating 
__builtin_dynamic_object_size (obj, 1), 
We should generate the following:

   If (obj != NULL)
 * (&obj->count)

To make sure that the pointer to the structure object is valid first. 

Let me know your comment on this.

thanks.

Qing
> 
> Thanks,
> Sid

Re: [PATCH] Fix wrong optimization of conditional expression with enumeration type

2025-04-16 Thread Richard Biener




> Am 16.04.2025 um 15:24 schrieb Eric Botcazou :
> 
> Hi,
> 
> this is a regression introduced on the mainline and 14 branch by:
>  https://gcc.gnu.org/pipermail/gcc-cvs/2023-October/391658.html
> 
> The change bypasses int_fits_type_p (essentially) to work around the
> signedness constraints, but in doing so disregards the peculiarities of
> boolean types whose precision is not 1 that are dealt with by the predicate,
> leading to the creation of a problematic conversion here.
> 
> Fixed by special-casing boolean types whose precision is not 1, as done in
> several other places.
> 
> Tested on x86-64/Linux, OK for the mainline and 14 branch?

Ok

Richard 

> 
> 2025-04-15  Eric Botcazou  
> 
>* tree-ssa-phiopt.cc (factor_out_conditional_operation): Do not
>bypass the int_fits_type_p test for boolean types whose precision
>is not 1.
> 
> 
> 2025-04-15  Eric Botcazou  
> 
>* gnat.dg/opt105.adb: New test.
>* gnat.dg/opt105_pkg.ads, gnat.dg/opt105_pkg.adb: New helper.
> 
> --
> Eric Botcazou
> 
> 
> 
>

[PATCH] libstdc++: Fix constification in range_formatter::format v2 [PR109162]

2025-04-16 Thread Tomasz Kamiński

Because the _M_format(__rg, __fc) were placed outside of if constexpr,
these method and it's childs where instantiated, even if _M_format
could be used. Now we put the calls in else branch of if constexpr.

libstdc++-v3/ChangeLog:

* include/std/format (range_formatter::format): Do not instantiate
_M_format for mutable _Rg if const _Rg can be used.
---
Testing on x86_64-linux, but there are no tests that will detect
uncessary instantiations.

 libstdc++-v3/include/std/format | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 27253f50ea8..d44c4b24abe 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -5636,12 +5636,15 @@ namespace __format
{
  using _Range = remove_reference_t<_Rg>;
  if constexpr (__format::__const_formattable_range<_Range, _CharT>)
- {
-   using _CRef = ranges::range_reference_t;
-   if constexpr (same_as, _Tp>)
- return _M_format(__rg, __fc);
- }
- return _M_format(__rg, __fc);
+   {
+ using _CRef = ranges::range_reference_t;
+ if constexpr (same_as, _Tp>)
+   return _M_format(__rg, __fc);
+ else
+   return _M_format(__rg, __fc);
+   }
+ else
+   return _M_format(__rg, __fc);
}
 
 private:
-- 
2.49.0

Re: [PATCH] libstdc++: Do not use 'not' alternative token in

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 12:59, Tomasz Kaminski  wrote:
>
>
>
> On Wed, Apr 16, 2025 at 1:32 PM Jonathan Wakely  wrote:
>>
>> On Wed, 16 Apr 2025 at 12:18, Jonathan Wakely  wrote:
>> >
>> > This fixes:
>> > FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++23 (test for 
>> > excess errors)
>> > FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++26 (test for 
>> > excess errors)
>> >
>> > The purpose of 'not defined>' is to be ill-formed (as
>> > required by [format.range.fmtkind]) and to give an error that includes
>> > the string "not defined>". That was intended to tell you
>> > that format_kind is not defined, just like it says!
>> >
>> > But user code can use -fno-operator-names so we can't use 'not' here,
>> > and "! defined" in the diagnostic doesn't seem as user-friendly. It also
>> > raises questions about whether it was intended to be the preprocessor
>> > token 'defined' (it's not) or where 'defined' is defined (it's not).
>> >
>> > Replace it with __no_primary_template> and a comment,
>> > which seems almost as good. The diagnostic now looks like:
>> >
>> > In file included from fmt.cc:1:
>> > .../include/c++/15.0.1/format: In instantiation of 'constexpr const auto 
>> > std::format_kind':
>> > fmt.cc:3:15:   required from here
>> > 3 | auto i = std::format_kind;
>> >   |   ^~~~
>> > .../include/c++/15.0.1/format:5164:31: error: use of 
>> > 'std::format_kind' before deduction of 'auto'
>> >  5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
>> > specialization
>> >   |   ^~~~
>> > .../include/c++/15.0.1/format:5164:30: error: '__no_primary_template' was 
>> > not declared in this scope
>> >  5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
>> > specialization
>> >   | ~^~
>>
>> Maybe "must define specialization" isn't really ideal, because the
>> problem might be that users are trying to use format_kind
>> when they should use format_kind, and telling them to define a
>> specialization for const R is wrong.
>
> I do not think that user's are expected to use format_kind directly
> will be confused by current specialization. I think you could adjust the 
> comment:
> // must define specialization or _Rg is reference or cv-qualified type

In general what appears in the diagnostic will only be one line, so if
we're adding a comment that we expect users to see, it needs to be
quite short.

Although we could force it to span multiple lines like this:

In file included from fmt.cc:1:
.../include/c++/15.0.1/format: In instantiation of 'constexpr const
auto std::format_kind':
fmt.cc:3:15:   required from here
   3 | auto i = std::format_kind;
 |   ^~~~
.../include/c++/15.0.1/format:5165:11: error: use of
'std::format_kind' before deduction of 'auto'
5165 |   format_kind<_Rg> // can specialize this for non-const
input ranges
 |   ^~~~
.../include/c++/15.0.1/format:5164:16: error:
'__primary_template_not_defined' was not declared in this scope
5164 |   = __primary_template_not_defined(
 | ~~~^
5165 |   format_kind<_Rg> // can specialize this for non-const
input ranges
 |
~~
5166 |   );
 |   ~

[PATCH v1][GCC16-Stage-1] RISC-V: Remove unnecessary frm restore volatile define_insn

2025-04-16 Thread pan2 . li

From: Pan Li 

After we add the frm register to the global_regs, we may not need to
define_insn that volatile to emit the frm restore insns.  The
cooperatively-managed global register will help to handle this, instead
of emit the volatile define_insn explicitly.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_emit_frm_mode_set): Refactor
the frm mode set by removing fsrmsi_restore_volatile.
* config/riscv/vector-iterators.md (unspecv): Remove as
unnecessary.
* config/riscv/vector.md (fsrmsi_restore_volatile): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-dynamic-frm-49.c: Adjust
the asm dump check times.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-50.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-52.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-74.c: Ditto.
* gcc.target/riscv/rvv/base/float-point-dynamic-frm-75.c: Ditto.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv.cc | 43 ++-
 gcc/config/riscv/vector-iterators.md  |  4 --
 gcc/config/riscv/vector.md| 13 --
 .../rvv/base/float-point-dynamic-frm-49.c |  2 +-
 .../rvv/base/float-point-dynamic-frm-50.c |  2 +-
 .../rvv/base/float-point-dynamic-frm-52.c |  2 +-
 .../rvv/base/float-point-dynamic-frm-74.c |  2 +-
 .../rvv/base/float-point-dynamic-frm-75.c |  2 +-
 8 files changed, 28 insertions(+), 42 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 38f3ae7cd84..3878702e3a1 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12047,27 +12047,30 @@ riscv_emit_frm_mode_set (int mode, int prev_mode)
   if (prev_mode == riscv_vector::FRM_DYN_CALL)
 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL.  */
 
-  if (mode != prev_mode)
-{
-  rtx frm = gen_int_mode (mode, SImode);
-
-  if (mode == riscv_vector::FRM_DYN_CALL
-   && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
-   /* No need to emit when prev mode is DYN already.  */
-   emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
-  else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
-   && prev_mode != riscv_vector::FRM_DYN
-   && prev_mode != riscv_vector::FRM_DYN_CALL)
-   /* No need to emit when prev mode is DYN or DYN_CALL already.  */
-   emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
-  else if (mode == riscv_vector::FRM_DYN
-   && prev_mode != riscv_vector::FRM_DYN_CALL)
-   /* Restore frm value from backup when switch to DYN mode.  */
-   emit_insn (gen_fsrmsi_restore (backup_reg));
-  else if (riscv_static_frm_mode_p (mode))
-   /* Set frm value when switch to static mode.  */
-   emit_insn (gen_fsrmsi_restore (frm));
+  if (mode == prev_mode)
+return;
+
+  if (riscv_static_frm_mode_p (mode))
+{
+  /* Set frm value when switch to static mode.  */
+  emit_insn (gen_fsrmsi_restore (gen_int_mode (mode, SImode)));
+  return;
 }
+
+  bool restore_p
+= /* No need to emit when prev mode is DYN.  */
+  (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_CALL
+   && prev_mode != riscv_vector::FRM_DYN)
+  /* No need to emit if prev mode is DYN or DYN_CALL.  */
+  || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_EXIT
+ && prev_mode != riscv_vector::FRM_DYN
+ && prev_mode != riscv_vector::FRM_DYN_CALL)
+  /* Restore frm value when switch to DYN mode.  */
+  || (mode == riscv_vector::FRM_DYN
+ && prev_mode != riscv_vector::FRM_DYN_CALL);
+
+  if (restore_p)
+emit_insn (gen_fsrmsi_restore (backup_reg));
 }
 
 /* Implement Mode switching.  */
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index f8da71b1d65..28f52481952 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -122,10 +122,6 @@ (define_c_enum "unspec" [
   UNSPEC_SF_VFNRCLIPU
 ])
 
-(define_c_enum "unspecv" [
-  UNSPECV_FRM_RESTORE_EXIT
-])
-
 ;; Subset of VI with fractional LMUL types
 (define_mode_iterator VI_FRAC [
   RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 51eb64fb122..9dae11a7849 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1116,19 +1116,6 @@ (define_insn "fsrmsi_restore"
(set_attr "mode" "SI")]
  )
 
-;; The volatile fsrmsi restore is used for the exit point for the
-;; dynamic mode switching. It will generate one volatile fsrm a5
-;; which won't be eliminated.
-(define_insn "fsrmsi_restore_volatile"
-  [(set (reg:SI FRM_REGNUM)
-   (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
-   UNSPECV_FRM_RESTORE_EXIT))]
-  "TARGET_VECTOR"
-  "fsrm\t%0"
-  [(set_attr "type" "wrfrm")
-   (set_at

Re: [PATCH] testsuite: Add support for GCOV_UNDER_TEST

2025-04-16 Thread Christophe Lyon

On Wed, 16 Apr 2025 at 16:14, Hans-Peter Nilsson  wrote:
>
> > From: Christophe Lyon 
> > Date: Wed, 16 Apr 2025 14:41:17 +0200
>
> > ping?
>
> Since you directed it at me and CC:ed the list; in case that
> was deliberate: I can only repeat "still ok", but I don't
> have approval rights to the testsuite parts.
>

Oops, no sorry that was not deliberate. Thanks for your confirmation :-)

Christophe

> >
> > On Thu, 10 Apr 2025 at 15:48, Hans-Peter Nilsson  wrote:
> > >
> > > > From: Christophe Lyon 
> > > > Date: Thu, 10 Apr 2025 15:38:48 +0200
> > >
> > > > On Thu, 10 Apr 2025 at 15:29, Hans-Peter Nilsson  wrote:
> > > > >
> > > > > > From: Christophe Lyon 
> > > > > > Date: Thu, 10 Apr 2025 15:21:23 +0200
> > > > >
> > > > > Not sure why I'm CC:ed on this one, not being a maintainer
> > > > > of the testsuite or targets where gcov tests are exercised,
> > > >
> > > > Because you fixed a problem in r13-4103-ge91d51457532da,
> > > > so I wanted to make sure my patch was OK for you.
> > >
> > > I've forgot everything about that commit and the context. :}
> > >
> > > So, I now had an extra glance from the transform-name
> > > perspective: still ok.
> > >
> > > brgds, H-P
> >

Re: [PATCH] Fix wrong optimization of conditional expression with enumeration type

2025-04-16 Thread Andrew Pinski

On Wed, Apr 16, 2025 at 6:25 AM Eric Botcazou  wrote:
>
> Hi,
>
> this is a regression introduced on the mainline and 14 branch by:
>   https://gcc.gnu.org/pipermail/gcc-cvs/2023-October/391658.html
>
> The change bypasses int_fits_type_p (essentially) to work around the
> signedness constraints, but in doing so disregards the peculiarities of
> boolean types whose precision is not 1 that are dealt with by the predicate,
> leading to the creation of a problematic conversion here.
>
> Fixed by special-casing boolean types whose precision is not 1, as done in
> several other places.
>
> Tested on x86-64/Linux, OK for the mainline and 14 branch?

LGTM. I do wonder if this could be documented somewhere if not
already. Because I suspect there are other places which miss that if
TYPE_PRECISION are equal, you might still need a cast for boolean
types. Maybe a helper function might be useful too.

Thanks,
Andrew

>
>
> 2025-04-15  Eric Botcazou  
>
> * tree-ssa-phiopt.cc (factor_out_conditional_operation): Do not
> bypass the int_fits_type_p test for boolean types whose precision
> is not 1.
>
>
> 2025-04-15  Eric Botcazou  
>
> * gnat.dg/opt105.adb: New test.
> * gnat.dg/opt105_pkg.ads, gnat.dg/opt105_pkg.adb: New helper.
>
> --
> Eric Botcazou

Re: [PATCH] libstdc++: Do not use 'not' alternative token in

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 15:22, Jonathan Wakely  wrote:
>
> On Wed, 16 Apr 2025 at 12:59, Tomasz Kaminski  wrote:
> >
> >
> >
> > On Wed, Apr 16, 2025 at 1:32 PM Jonathan Wakely  wrote:
> >>
> >> On Wed, 16 Apr 2025 at 12:18, Jonathan Wakely  wrote:
> >> >
> >> > This fixes:
> >> > FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++23 (test for 
> >> > excess errors)
> >> > FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++26 (test for 
> >> > excess errors)
> >> >
> >> > The purpose of 'not defined>' is to be ill-formed (as
> >> > required by [format.range.fmtkind]) and to give an error that includes
> >> > the string "not defined>". That was intended to tell you
> >> > that format_kind is not defined, just like it says!
> >> >
> >> > But user code can use -fno-operator-names so we can't use 'not' here,
> >> > and "! defined" in the diagnostic doesn't seem as user-friendly. It also
> >> > raises questions about whether it was intended to be the preprocessor
> >> > token 'defined' (it's not) or where 'defined' is defined (it's not).
> >> >
> >> > Replace it with __no_primary_template> and a comment,
> >> > which seems almost as good. The diagnostic now looks like:
> >> >
> >> > In file included from fmt.cc:1:
> >> > .../include/c++/15.0.1/format: In instantiation of 'constexpr const auto 
> >> > std::format_kind':
> >> > fmt.cc:3:15:   required from here
> >> > 3 | auto i = std::format_kind;
> >> >   |   ^~~~
> >> > .../include/c++/15.0.1/format:5164:31: error: use of 
> >> > 'std::format_kind' before deduction of 'auto'
> >> >  5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
> >> > specialization
> >> >   |   ^~~~
> >> > .../include/c++/15.0.1/format:5164:30: error: '__no_primary_template' 
> >> > was not declared in this scope
> >> >  5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
> >> > specialization
> >> >   | ~^~
> >>
> >> Maybe "must define specialization" isn't really ideal, because the
> >> problem might be that users are trying to use format_kind
> >> when they should use format_kind, and telling them to define a
> >> specialization for const R is wrong.
> >
> > I do not think that user's are expected to use format_kind directly
> > will be confused by current specialization. I think you could adjust the 
> > comment:
> > // must define specialization or _Rg is reference or cv-qualified type
>
> In general what appears in the diagnostic will only be one line, so if
> we're adding a comment that we expect users to see, it needs to be
> quite short.
>
> Although we could force it to span multiple lines like this:
>
> In file included from fmt.cc:1:
> .../include/c++/15.0.1/format: In instantiation of 'constexpr const
> auto std::format_kind':
> fmt.cc:3:15:   required from here
>3 | auto i = std::format_kind;
>  |   ^~~~
> .../include/c++/15.0.1/format:5165:11: error: use of
> 'std::format_kind' before deduction of 'auto'
> 5165 |   format_kind<_Rg> // can specialize this for non-const
> input ranges
>  |   ^~~~
> .../include/c++/15.0.1/format:5164:16: error:
> '__primary_template_not_defined' was not declared in this scope
> 5164 |   = __primary_template_not_defined(
>  | ~~~^
> 5165 |   format_kind<_Rg> // can specialize this for non-const
> input ranges
>  |
> ~~
> 5166 |   );
>  |   ~


This looks reasonable with Clang too:

In file included from fmt.cc:1:
.../include/c++/15.0.1/format:5164:9: error: use of undeclared
identifier '__primary_template_not_defined'
5164 |   = __primary_template_not_defined(
 | ^
.../include/c++/15.0.1/format:5165:11: note: in instantiation of
variable template specialization 'std::format_kind' requested
here
5165 |   format_kind<_Rg> // can specialize this for non-const
input ranges
 |   ^
fmt.cc:3:15: note: in instantiation of variable template
specialization 'std::format_kind' requested here
   3 | auto i = std::format_kind;
 |   ^
1 error generated.

For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106] (was: [committed][nvptx] Use .alias directive for mptx >= 6.3)

2025-04-16 Thread Thomas Schwinge

Hi!

On 2025-03-25T11:51:26+0100, Tom de Vries  wrote:
> On 3/25/25 11:18, Thomas Schwinge wrote:
>> On 2022-03-22T14:41:46+0100, Tom de Vries via Gcc-patches 
>>  wrote:
>>> Starting with ptx isa version 6.3, a ptx directive .alias is available.
>> 
>> Regarding the following item specifically:
>> 
>>> Unreferenced aliases are not emitted (these can occur f.i. when inlining a
>>> call to an alias).  This avoids driver link error "Internal error: reference
>>> to deleted section".
>> 
>>> --- a/gcc/config/nvptx/nvptx.cc
>>> +++ b/gcc/config/nvptx/nvptx.cc
>> 
>>> +void
>>> +nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value)
>>> +{
>>> +  [...]
>>> +  if (!cgraph_node::get (name)->referred_to_p ())
>>> +/* Prevent "Internal error: reference to deleted section".  */
>>> +return;
>>> +  [...]
>>> +}
>> 
>> I understand the high-level rationale (PR105019) behind this early
>> return, but I'm curious why you chose cgraph 'referred_to_p ()' here,
>> instead of 'TREE_USED', or 'TREE_SYMBOL_REFERENCED' (on identifier), or
>> some such?  (All untested.)  Is there any specific reason that you
>> remember, or did it just happen to do the right thing?
>
> sorry, I don't remember, my best guess is that it happened to DTRT.

That's fair.

>> In an offloading test case (PR106445), I'm running into the case that a
>> C++ constructor alias gets called (via 'nvptx_output_call_insn', and per
>> that one's 'assemble_name' call, 'TREE_SYMBOL_REFERENCED' gets set, for
>> example) -- but that alias isn't '[cgraph]->referred_to_p ()', so no
>> PTX '.alias' gets emitted, resulting in link failure.  To make that work,
>> I might just add '|| TREE_SYMBOL_REFERENCED ([identifier])' next to the
>> existing cgraph 'referred_to_p ()', but I'd like to understand this
>> better.  For example, is it actually the very problem that this alias
>> isn't cgraph 'referred_to_p ()', but it should it be?  (I have not much
>> clue about the cgraph machinery generally, and even less about the
>> applicability of it/of its state in offloading compilation,
>> specifically.)
>
> Grepping though the source code for referred_to_p, I see that it is 
> sometimes used in conjunction with needed_p.  So I wonder if that one 
> returns true for the case you're describing.

No, that one ICEs.  ;-)

lto1: internal compiler error: in needed_p, at cgraphunit.cc:247

236 /* Determine if symbol declaration is needed.  That is, visible to 
something
237either outside this translation unit, something magic in the 
system
238configury */
239 bool
240 symtab_node::needed_p (void)
241 {
242   /* Double check that no one output the function into assembly file
243  early.  */
244   if (!native_rtl_p ())
245   gcc_checking_assert
246 (!DECL_ASSEMBLER_NAME_SET_P (decl)
247  || !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)));

I intend to further look into this topic, later on, but for now have
pushed to trunk branch commit ca9cffe737d20953082333dacebb65d4261e0d0c
"For nvptx offloading, make sure to emit C++ constructor, destructor aliases 
[PR97106]",
see attached.


Grüße
 Thomas


>From ca9cffe737d20953082333dacebb65d4261e0d0c Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Wed, 16 Apr 2025 14:00:31 +0200
Subject: [PATCH] For nvptx offloading, make sure to emit C++ constructor,
 destructor aliases [PR97106]

	PR target/97106
	gcc/
	* config/nvptx/nvptx.cc (nvptx_asm_output_def_from_decls)
	[ACCEL_COMPILER]: Make sure to emit C++ constructor, destructor
	aliases.
	libgomp/
	* testsuite/libgomp.c++/pr96390.C: Un-XFAIL nvptx offloading.
	* testsuite/libgomp.c-c++-common/pr96390.c: Adjust.
---
 gcc/config/nvptx/nvptx.cc| 12 
 libgomp/testsuite/libgomp.c++/pr96390.C  |  2 --
 libgomp/testsuite/libgomp.c-c++-common/pr96390.c |  2 +-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 28da43ca740..d1e25b99701 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -7789,6 +7789,18 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name,
 #endif
 
   cgraph_node *cnode = cgraph_node::get (name);
+#ifdef ACCEL_COMPILER
+  /* For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106]
+
+ For some reason (yet to be analyzed), they're not 'cnode->referred_to_p ()'.
+ (..., or that's not the right approach at all;
+ 
+ "Re: [committed][nvptx] Use .alias directive for mptx >= 6.3").  */
+  if (DECL_CXX_CONSTRUCTOR_P (name)
+  || DECL_CXX_DESTRUCTOR_P (name))
+;
+  else
+#endif
   if (!cnode->referred_to_p ())
 /* Prevent "Internal error: reference to deleted section".  */
 return;
diff --git a/libgomp/testsuite/libgomp.c++/pr96390.C b/libgomp/testsuite/libg

Re: [PATCH] libstdc++: Fix constification in range_formatter::format v2 [PR109162]

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 15:17, Tomasz Kaminski  wrote:
>
>
>
> On Wed, Apr 16, 2025 at 3:47 PM Tomasz Kamiński  wrote:
>>
>> Because the _M_format(__rg, __fc) were placed outside of if constexpr,
>> these method and it's childs where instantiated, even if _M_format> _Range>
>> could be used. Now we put the calls in else branch of if constexpr.
>>
>> libstdc++-v3/ChangeLog:
>>
>> * include/std/format (range_formatter::format): Do not instantiate
>> _M_format for mutable _Rg if const _Rg can be used.
>> ---
>> Testing on x86_64-linux, but there are no tests that will detect
>> uncessary instantiations.
>
> This is not a correctness issue, only some dead code elimination, so
> does not need to be in v15.

OK for trunk. If that's pushed before gcc-15 branches, great.

>>
>>
>>  libstdc++-v3/include/std/format | 15 +--
>>  1 file changed, 9 insertions(+), 6 deletions(-)
>>
>> diff --git a/libstdc++-v3/include/std/format 
>> b/libstdc++-v3/include/std/format
>> index 27253f50ea8..d44c4b24abe 100644
>> --- a/libstdc++-v3/include/std/format
>> +++ b/libstdc++-v3/include/std/format
>> @@ -5636,12 +5636,15 @@ namespace __format
>> {
>>   using _Range = remove_reference_t<_Rg>;
>>   if constexpr (__format::__const_formattable_range<_Range, _CharT>)
>> - {
>> -   using _CRef = ranges::range_reference_t;
>> -   if constexpr (same_as, _Tp>)
>> - return _M_format(__rg, __fc);
>> - }
>> - return _M_format(__rg, __fc);
>> +   {
>> + using _CRef = ranges::range_reference_t;
>> + if constexpr (same_as, _Tp>)
>> +   return _M_format(__rg, __fc);
>> + else
>> +   return _M_format(__rg, __fc);
>> +   }
>> + else
>> +   return _M_format(__rg, __fc);
>> }
>>
>>  private:
>> --
>> 2.49.0
>>

Re: [PATCH] Docs: Document omp::allocator::* and ompx::allocator::* allocators.

2025-04-16 Thread Tobias Burnus


Alex wrote:

Here is a follow up patch for documentation of the omp.h allocators,
I'm not super happy with it but I wanted to get eyes on it before I go
to sleep tonight.

I want the table in there somewhere but I'm not confident that where I
put it was the right place.


I think having the C++ template classes listed under the OMP_ALLOCATOR 
environment variable feels odd.


I think it is best to move the two tables, the existing one under
"OMP_ALLOCATOR – Set the default allocator",
https://https://gcc.gnu.org/onlinedocs/libgomp/OMP_005fALLOCATOR.html
and the one you added to "11.3 Memory allocation",
https://gcc.gnu.org/onlinedocs/libgomp/Memory-allocation.html

In OMP_ALLOCATOR, I think a sentence such as:
"For the list of available predefined allocators and memory spaces, see
@ref{Memory allocation}."

And then in "Memory Allocator" changing:
"For the available predefined allocators and, as applicable, their
associated predefined memory spaces and for the available traits and
their default values, see "OMP_ALLOCATOR – Set the default allocator."
Predefined allocators without an associated memory space use the
omp_default_mem_space memory space. See additionally Offload-Target 
Specifics."


to something like:

"For the available traits and their default values, see ..."


Or we move the traits as well - and then refer to the available traits
and their default in OMP_ALLOCATOR, which seems to be even a bit
cleaner.


And I would add a first bullet point (before the "OpenMP API routines") to

@item The environment variable @ref{OMP_ALLOCATOR}.

(which expands to "OMP_ALLOCATOR" or "OMP_ALLOCATOR – Set the default 
allocator") - which seems to be missing from the list.




These are located in the omp::allocator namespace, […] extensions.

std::vector> vec;


I think an intro/lead to the example is missing - like: "For instance" 
or "The allocator templates can be used with allocator-aware containers 
like".


Otherwise, the example comes a bit sudden and without context.


Except for some additional @code and the other comments by Sandra,
changes LGTM.

Thanks,

Tobias

Re: Questions on replacing a structure pointer reference to a call to .ACCESS_WITH_SIZE in C FE

2025-04-16 Thread Qing Zhao




>> 
>> Thanks, will file bugs and fix them first.
> 
> I filed two bugs against GCC15:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119716
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119717
> 
> And assigned to myself.

Both the above were resolved.

Qing

Remove 'ALWAYS_INLINE' workaround in 'libgomp.c++/target-exceptions-pr118794-1.C' (was: [PUSHED] GCN, nvptx: Support '-mfake-exceptions', and use it for offloading compilation [PR118794])

2025-04-16 Thread Thomas Schwinge

Hi!

On 2025-04-15T00:22:26+0200, I wrote:
> --- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
> +++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C

> +/* Help nvptx offloading overcome a code generation issue;
> +   PR106445, PR118518.  */
> +#define ALWAYS_INLINE __attribute__((always_inline))
> +
>  #pragma omp begin declare target
>  
>  bool ok = false;
> @@ -19,10 +20,12 @@ bool ok = false;
>  template 
>  struct C
>  {
> +  ALWAYS_INLINE
>C()
>{
>  ok = true;
>}
> +  ALWAYS_INLINE
>C(int) {};
>~C() {};

Pushed to trunk branch commit 518efed8cb7d003cd85477060b1fe926a2d7a53b
"Remove 'ALWAYS_INLINE' workaround in 
'libgomp.c++/target-exceptions-pr118794-1.C'",
see attached.


Grüße
 Thomas


>From 518efed8cb7d003cd85477060b1fe926a2d7a53b Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Wed, 16 Apr 2025 16:52:08 +0200
Subject: [PATCH] Remove 'ALWAYS_INLINE' workaround in
 'libgomp.c++/target-exceptions-pr118794-1.C'

With commit ca9cffe737d20953082333dacebb65d4261e0d0c
"For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106]",
we're able to remove the 'ALWAYS_INLINE' workaround added in
commit fe283dba774be57b705a7a871b000d2894d2e553
"GCN, nvptx: Support '-mfake-exceptions', and use it for offloading compilation [PR118794]".

	libgomp/
	* testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Remove
	'ALWAYS_INLINE' workaround.
---
 .../testsuite/libgomp.c++/target-exceptions-pr118794-1.C| 6 --
 1 file changed, 6 deletions(-)

diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
index a73e7f897be..24e3d076a1b 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
@@ -9,10 +9,6 @@
 /* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C',
'../../../gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C'.  */
 
-/* Help nvptx offloading overcome a code generation issue;
-   PR106445, PR118518.  */
-#define ALWAYS_INLINE __attribute__((always_inline))
-
 #pragma omp begin declare target
 
 bool ok = false;
@@ -20,12 +16,10 @@ bool ok = false;
 template 
 struct C
 {
-  ALWAYS_INLINE
   C()
   {
 ok = true;
   }
-  ALWAYS_INLINE
   C(int) {};
   ~C() {};
 
-- 
2.34.1

Re: [PATCH] testsuite: Add support for GCOV_UNDER_TEST

2025-04-16 Thread Hans-Peter Nilsson

> From: Christophe Lyon 
> Date: Wed, 16 Apr 2025 14:41:17 +0200

> ping?

Since you directed it at me and CC:ed the list; in case that
was deliberate: I can only repeat "still ok", but I don't
have approval rights to the testsuite parts.

> 
> On Thu, 10 Apr 2025 at 15:48, Hans-Peter Nilsson  wrote:
> >
> > > From: Christophe Lyon 
> > > Date: Thu, 10 Apr 2025 15:38:48 +0200
> >
> > > On Thu, 10 Apr 2025 at 15:29, Hans-Peter Nilsson  wrote:
> > > >
> > > > > From: Christophe Lyon 
> > > > > Date: Thu, 10 Apr 2025 15:21:23 +0200
> > > >
> > > > Not sure why I'm CC:ed on this one, not being a maintainer
> > > > of the testsuite or targets where gcov tests are exercised,
> > >
> > > Because you fixed a problem in r13-4103-ge91d51457532da,
> > > so I wanted to make sure my patch was OK for you.
> >
> > I've forgot everything about that commit and the context. :}
> >
> > So, I now had an extra glance from the transform-name
> > perspective: still ok.
> >
> > brgds, H-P
>

Re: [PATCH] libstdc++: Fix constification in range_formatter::format v2 [PR109162]

2025-04-16 Thread Tomasz Kaminski

On Wed, Apr 16, 2025 at 3:47 PM Tomasz Kamiński  wrote:

> Because the _M_format(__rg, __fc) were placed outside of if constexpr,
> these method and it's childs where instantiated, even if _M_format _Range>
> could be used. Now we put the calls in else branch of if constexpr.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (range_formatter::format): Do not instantiate
> _M_format for mutable _Rg if const _Rg can be used.
> ---
> Testing on x86_64-linux, but there are no tests that will detect
> uncessary instantiations.
>
This is not a correctness issue, only some dead code elimination, so
does not need to be in v15.

>
>  libstdc++-v3/include/std/format | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> index 27253f50ea8..d44c4b24abe 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -5636,12 +5636,15 @@ namespace __format
> {
>   using _Range = remove_reference_t<_Rg>;
>   if constexpr (__format::__const_formattable_range<_Range,
> _CharT>)
> - {
> -   using _CRef = ranges::range_reference_t;
> -   if constexpr (same_as, _Tp>)
> - return _M_format(__rg, __fc);
> - }
> - return _M_format(__rg, __fc);
> +   {
> + using _CRef = ranges::range_reference_t;
> + if constexpr (same_as, _Tp>)
> +   return _M_format(__rg, __fc);
> + else
> +   return _M_format(__rg, __fc);
> +   }
> + else
> +   return _M_format(__rg, __fc);
> }
>
>  private:
> --
> 2.49.0
>
>

[PUSHED] Add 'libgomp.c++/pr106445-1{,-O0}.C' [PR106445]

2025-04-16 Thread Thomas Schwinge

PR target/106445
libgomp/
* testsuite/libgomp.c++/pr106445-1.C: New.
* testsuite/libgomp.c++/pr106445-1-O0.C: Likewise.
---
 libgomp/testsuite/libgomp.c++/pr106445-1-O0.C |  3 +++
 libgomp/testsuite/libgomp.c++/pr106445-1.C| 18 ++
 2 files changed, 21 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.c++/pr106445-1-O0.C
 create mode 100644 libgomp/testsuite/libgomp.c++/pr106445-1.C

diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C 
b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C
new file mode 100644
index 000..bcd499c664c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C
@@ -0,0 +1,3 @@
+// { dg-additional-options -O0 }
+
+#include "pr106445-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1.C 
b/libgomp/testsuite/libgomp.c++/pr106445-1.C
new file mode 100644
index 000..329ce62eb7b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr106445-1.C
@@ -0,0 +1,18 @@
+#include 
+
+int main()
+{
+#pragma omp target
+  {
+{
+  std::vector v;
+  if (!v.empty())
+   __builtin_abort();
+}
+{
+  std::vector v(100);
+  if (v.capacity() < 100)
+   __builtin_abort();
+}
+  }
+}
-- 
2.34.1

Re: [PATCH] RISC-V: Add pattern for vector-scalar multiply-add/sub [PR119100]

2025-04-16 Thread Paul-Antoine Arras


Hi Jeff, Robin,

Thanks for your comments.

On 30/03/2025 01:30, Jeff Law wrote:

On 3/27/25 1:39 PM, Robin Dapp wrote:

Hi Paul-Antoine,

This pattern enables the combine pass to merge a vec_duplicate into a 
plus-mult

or minus-mult RTL instruction.

Before this patch, we have two instructions, e.g.:
  vfmv.v.f    v6,fa0
  vfmadd.vv   v9,v6,v7

After, we get only one:
  vfmadd.vf   v9,fa0,v7

On SPEC2017's 503.bwaves_r, depending on the workload, the reduction 
in dynamic

instruction count varies from -4.66% to -4.75%.


The general issue with this kind of optimization (we have discussed it 
a few times already) is that, depending on the uarch, we want the 
local combine optimization that you show but not the fwprop/late- 
combine one where we propagate a vector broadcast into a loop.


So IMHO in order to continue with this and similar patterns we need at 
least accompanying rtx_cost handling that would allow us to tune per 
uarch.


Please find attached an updated patch with an additional cost model. By 
default, an instruction is 4 and the penalty for moving data from 
floating-point to vector register is 2; thus, vfmadd.vf costs 6, which 
still makes it cheaper than vec_duplicate + vfmadd.vv. Different tuning 
parameters can alter this tradeoff though.


Pan Li sent a similar patch for vadd.vv/vadd.vx I think in November 
and I believe he intended to continue when stage 1 opens.


An outstanding question is how to distinguish the combine case from 
the late-combine case.  I haven't yet thought about that in detail.


I experienced with a few variations around the testcase of the PR. When 
the loop body shrinks, the lower register pressure allows the 
vec_duplicate to be hoisted to the loop preamble. In such cases, I did 
not observe that the vec_duplicate got propagated into the loop body.


The other thing we should consider is that we can certainly theorize 
that this kind of register file crossing case can have an extra penalty 
(it traditionally does), we don't have actual evidence that it's causing 
a problem on any RISC-V designs.


So may be the way to go is add a field to the uarch tuning structure 
indicating the additional cost (if any) of a register file crossing 
vector op of this nature.  Then query that in riscv_rtx_costs or 
whatever our rtx_cost function is named.


Default that additional cost to zero initially.  Then uarch experts can 
fill in the appropriate value.  Yea, such a simplistic approach wouldn't 
handle cases like ours where you really need nearby context to be sure, 
but I don't think we want to over-engineer this solution too badly right 
now.


Note that since this isn't a regression it'll need to wait for gcc-16 
development to open before the patch can go forward.


Thanks!
JEff


ps.  I know Baylibre's remit was to test dynamic icounts and there were 
good reasons for that.  So don't worry about not having run it on 
design.  If you happen to still have executables, pass them along 
privately, I can run them on a BPI.  ROMS is a few hours of runtime, but 
that's not a big deal.


We recently received our own BPI board, so I was able to run 
503.bwaves_r on it. Unfortunately, the DIC reduction does not translate 
into similar execution time gains. The vector-scalar is only faster by 
0.33% on average over 3 iterations.


Thanks,
--
PAcommit b0f1dbf8b4ad12c0eff459d0bf6b3d9c466fd5ad
Author: Paul-Antoine Arras 
Date:   Tue Feb 25 16:38:54 2025 +0100

RISC-V: Add pattern for vector-scalar multiply-add/sub [PR119100]

This pattern enables the combine pass to merge a vec_duplicate into a plus-mult
or minus-mult RTL instruction.

Before this patch, we have two instructions, e.g.:
  vfmv.v.fv6,fa0
  vfmadd.vv   v9,v6,v7

After, we get only one:
  vfmadd.vf   v9,fa0,v7

On SPEC2017's 503.bwaves_r, depending on the workload, the reduction in dynamic
instruction count varies from -4.66% to -4.75%.

gcc/ChangeLog:

PR target/119100
* config/riscv/vector.md (*pred__scalar): Define.
* config/riscv/riscv.cc (riscv_rtx_costs): Add cost for moving data from
a scalar floating-point to a vector register.

diff --git gcc/config/riscv/riscv.cc gcc/config/riscv/riscv.cc
index 38f3ae7cd84..0f0cf04bdd9 100644
--- gcc/config/riscv/riscv.cc
+++ gcc/config/riscv/riscv.cc
@@ -3864,6 +3864,18 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
   if (riscv_v_ext_mode_p (mode))
 {
   *total = COSTS_N_INSNS (1);
+  if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS) && outer_code == SET)
+	{
+	  rtx plus_op0 = XEXP (x, 0);
+	  if (GET_CODE (plus_op0) == MULT)
+	{
+	  rtx mult_op0 = XEXP (plus_op0, 0);
+	  if (GET_CODE (mult_op0) == VEC_DUPLICATE)
+		{
+		  *total += get_vector_costs ()->regmove->FR2VR;
+		}
+	}
+	}
   return true;
 }
 
diff --git gcc/config/riscv/vector.md gcc/config/ri

Re: [PATCH] cobol: Eliminate gcc/cobol/LICENSE. [PR119759]

2025-04-16 Thread Jakub Jelinek

On Wed, Apr 16, 2025 at 01:49:50PM -0500, Robert Dubner wrote:
> I am not well-versed in license and legal issues.  But I see that except
> for the GO language, gcc/cobol is almost unique in that there is a LICENSE
> file.
> 
> This patch gets rid of it.
> 
> Okay for trunk?
> 
> 
> Subject: [PATCH] cobol: Eliminate gcc/cobol/LICENSE. [PR119759]
> 
> gcc/cobol
> 
> PR cobol/119759
> * LICENSE: Deleted.

Ok.

Jakub

[PUSHED/12 1/6] phiopt: Reset the number of iterations information of a loop when changing an exit from the loop [PR117243]

2025-04-16 Thread Andrew Pinski

After r12-5300-gf98f373dd822b3, phiopt could get the following bb structure:
  |
middle-bb -|
  ||
  |   ||   |
phi<1, 2>  |   |
cond   |   |
  ||   |
  |+---|

Which was considered 2 loops. The inner loop had esimtate of upper_bound to be 
8,
due to the original `for (b = 0; b <= 7; b++)`. The outer loop was already an
infinite one.
So phiopt would come along and change the condition to be unconditionally true,
we change the inner loop to being an infinite one but don't reset the estimate
on the loop and cleanup cfg comes along and changes it into one loop but also
does not reset the estimate of the loop. Then the loop unrolling uses the old 
estimate
and decides to add an unreachable there.o
So the fix is when phiopt changes an exit to a loop, reset the estimates, 
similar to
how cleanupcfg does it when merging some basic blocks.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/117243
PR tree-optimization/116749

gcc/ChangeLog:

* tree-ssa-phiopt.cc (replace_phi_edge_with_variable): Reset loop
estimates if the cond_block was an exit to a loop.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr117243-1.c: New test.
* gcc.dg/torture/pr117243-2.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit b7c69cc072ef0da36439ebc55c513b48e68391b7)
---
 gcc/testsuite/gcc.dg/torture/pr117243-1.c | 30 
 gcc/testsuite/gcc.dg/torture/pr117243-2.c | 34 +++
 gcc/tree-ssa-phiopt.cc| 12 
 3 files changed, 76 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr117243-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr117243-2.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr117243-1.c 
b/gcc/testsuite/gcc.dg/torture/pr117243-1.c
new file mode 100644
index 000..c4bbc31467c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr117243-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-optimized" } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } { "" } } */
+
+/* PR tree-optimization/117243 */
+/* foo should be an infinite but sometimes it gets optimized incorrectly into
+   an __builtin_unreachable(); which is not valid.  */
+void
+foo (unsigned int a, unsigned char b)
+{
+  lbl:
+  for (b = 0; b <= 7; b++)
+{
+  unsigned char c[1][1];
+  int i, j;
+  for (i = 0; i < 1; i++)
+for (j = 0; j < 1; j++)
+  c[i][j] = 1;
+  if (b)
+   goto lbl;
+}
+}
+
+int
+main ()
+{
+  foo (1, 2);
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_unreachable " "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/torture/pr117243-2.c 
b/gcc/testsuite/gcc.dg/torture/pr117243-2.c
new file mode 100644
index 000..d9b0d3eeb98
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr117243-2.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-fno-tree-ch -fdump-tree-optimized" } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } { "" } } */
+
+/* PR tree-optimization/117243 */
+/* PR tree-optimization/116749 */
+
+/* main1 should be an infinite but sometimes it gets optimized incorrectly into
+   an __builtin_unreachable(); which is not valid.  */
+int main1 (void)
+{
+int g=0;
+int l1[1];
+int *l2 = &g;
+int i;
+for (i=0; i<1; i++)
+l1[i] = (1);
+for (g=0; g; ++g)
+{
+int *l3[1] = {&l1[0]};
+}
+*l2 = *l1;
+b:
+for (i=0; i<2; ++i)
+{ 
+if (i)
+goto b;
+if (g)
+continue;
+}
+return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_unreachable " "optimized"} } */
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 3ef7d6b28fc..167b0b3be74 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-match.h"
 #include "dbgcnt.h"
 #include "tree-ssa-propagate.h"
+#include "tree-ssa-loop-niter.h"
 
 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
 static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
@@ -429,6 +430,17 @@ replace_phi_edge_with_variable (basic_block cond_block,
 edge_to_remove = EDGE_SUCC (cond_block, 1);
   else
 edge_to_remove = EDGE_SUCC (cond_block, 0);
+
+  /* If we are removing the cond on a loop exit,
+ reset number of iteration information of the loop. */
+  if (loop_exits_from_bb_p (cond_block->loop_father, cond_block))
+{
+  auto loop = cond_block->loop_father;
+  free_numbers_of_iterations_estimates (loop);
+  loop->any_upper_bound = false;
+  loop->any_likely_upper_bound = false;
+}
+
   if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
 {
   e->flags |= EDGE_FALLTHRU;
-- 
2.43.0

[PUSHED/12 2/6] backprop: Fix deleting of a phi node [PR116922]

2025-04-16 Thread Andrew Pinski

The problem here is remove_unused_var is called on a name that is
defined by a phi node but it deletes it like removing a normal statement.
remove_phi_node should be called rather than gsi_remove for phinodes.

Note there is a possibility of using simple_dce_from_worklist instead
but that is for another day.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/116922

gcc/ChangeLog:

* gimple-ssa-backprop.cc (remove_unused_var): Handle phi
nodes correctly.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr116922.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit cea87c84eacdb422caeada734ba5138c994d7022)
---
 gcc/gimple-ssa-backprop.cc  | 10 --
 gcc/testsuite/gcc.dg/torture/pr116922.c | 19 +++
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116922.c

diff --git a/gcc/gimple-ssa-backprop.cc b/gcc/gimple-ssa-backprop.cc
index 68ea403e847..f8d5841db42 100644
--- a/gcc/gimple-ssa-backprop.cc
+++ b/gcc/gimple-ssa-backprop.cc
@@ -657,8 +657,14 @@ remove_unused_var (tree var)
   print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
 }
   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
-  gsi_remove (&gsi, true);
-  release_defs (stmt);
+  if (gimple_code (stmt) == GIMPLE_PHI)
+remove_phi_node (&gsi, true);
+  else
+{
+  unlink_stmt_vdef (stmt);
+  gsi_remove (&gsi, true);
+  release_defs (stmt);
+}
 }
 
 /* Note that we're replacing OLD_RHS with NEW_RHS in STMT.  */
diff --git a/gcc/testsuite/gcc.dg/torture/pr116922.c 
b/gcc/testsuite/gcc.dg/torture/pr116922.c
new file mode 100644
index 000..0fcf912930f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116922.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffast-math" } */
+/* PR tree-optimization/116922 */
+
+
+static int g;
+
+void
+foo (int c, double v, double *r)
+{
+b:
+  do
+v /= g - v;
+  while (c);
+  *r = v;
+
+  double x;
+  foo (5, (double)0, &x);
+}
-- 
2.43.0

[PUSHED/12 4/6] phiopt: Fix value_replacement for middle bb having phi nodes [PR118922]

2025-04-16 Thread Andrew Pinski

After r12-5300-gf98f373dd822b3, value_replacement would be able to look at the
following cfg structure:
```
   [local count: 1014686024]:
  if (h_6 != 0)
goto ; [94.50%]
  else
goto ; [5.50%]

   [local count: 114863530]:
  # h_6 = PHI <0(4), 1(5)>

   [local count: 1073741824]:
  # f_8 = PHI <0(5), h_6(6)>
  _9 = f_8 ^ 1;
  a.0_10 = a;
  _11 = _9 + a.0_10;
  if (_11 != -117)
goto ; [94.50%]
  else
goto ; [5.50%]
```

value_replacement would incorrectly think the middle bb (6) was empty and so it 
decides
to remove condition in bb5 and replacing it with 0 as the function thought it 
was `h_6 ? 0 : h_6`.
But since the there is an incoming phi node to bb6 defining h_6 that is 
incorrect.

The fix is to check if there is phi nodes in the middle bb and set 
empty_or_with_defined_p to false.
This was not needed before r12-5300-gf98f373dd822b3 because the phi would have 
been dead otherwise due to
other checks.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/118922

gcc/ChangeLog:

* tree-ssa-phiopt.cc (value_replacement): Set empty_or_with_defined_p
to false when there is phi nodes for the middle bb.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr118922-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit 7232c005afb5002cdfd0a2dbd0e8b8f2d80250ce)
---
 gcc/testsuite/gcc.dg/torture/pr118922-1.c | 57 +++
 gcc/tree-ssa-phiopt.cc|  3 ++
 2 files changed, 60 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr118922-1.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr118922-1.c 
b/gcc/testsuite/gcc.dg/torture/pr118922-1.c
new file mode 100644
index 000..27e8c78c0e4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr118922-1.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* PR tree-optimization/118922 */
+
+/* Phi-opt would convert:
+   [local count: 1014686024]:
+  if (h_6 != 0)
+goto ; [94.50%]
+  else
+goto ; [5.50%]
+
+   [local count: 114863530]:
+  # h_6 = PHI <0(4), 1(5)>
+
+   [local count: 1073741824]:
+  # f_8 = PHI <0(5), h_6(6)>
+  _9 = f_8 ^ 1;
+  a.0_10 = a;
+  _11 = _9 + a.0_10;
+  if (_11 != -117)
+goto ; [94.50%]
+  else
+goto ; [5.50%]
+
+into:
+
+   [local count: 59055799]:
+  c = d_3;
+
+   [local count: 1073741824]:
+  # f_8 = PHI <0(5), 0(4)>
+  _9 = f_8 ^ 1;
+  a.0_10 = a;
+  _11 = _9 + a.0_10;
+  if (_11 != -117)
+goto ; [94.50%]
+  else
+goto ; [5.50%]
+
+as it thought the middle bb was empty as there was only a phi node there. */
+
+
+int a = -117, b, c, e;
+void g(int h) {
+  int f = 0;
+  while (!f + a - -117) {
+f = h == 0;
+if (h == 0)
+  h = 1;
+  }
+}
+int main() {
+  int d = 8;
+  for (; e;)
+d = 0;
+  c = d;
+  g(0);
+}
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 167b0b3be74..aec0350010b 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1322,6 +1322,9 @@ value_replacement (basic_block cond_bb, basic_block 
middle_bb,
&& jump_function_from_stmt (&arg1, stmt)))
empty_or_with_defined_p = false;
 }
+  /* The middle bb is not empty if there are any phi nodes. */
+  if (phi_nodes (middle_bb))
+empty_or_with_defined_p = false;
 
   cond = last_stmt (cond_bb);
   code = gimple_cond_code (cond);
-- 
2.43.0

[PUSHED/12 6/6] testcase: Add testcase for already fixed PR [PR118476]

2025-04-16 Thread Andrew Pinski

This testcase was fixed by r15-3052-gc7b76a076cb2c6ded but is
a testcase that failed in a different fashion and a much older
failure than the one added with r15-3052.

Pushed as obvious after a quick test.

PR tree-optimization/118476

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr118476-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit d45a6502d1ec87d43f1a39f87cca58f1e28369c8)
---
 gcc/testsuite/gcc.dg/torture/pr118476-1.c | 14 ++
 1 file changed, 14 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr118476-1.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr118476-1.c 
b/gcc/testsuite/gcc.dg/torture/pr118476-1.c
new file mode 100644
index 000..33509403b61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr118476-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+
+/* PR tree-optimization/118476 */
+
+typedef unsigned long long poly64x1 
__attribute__((__vector_size__(1*sizeof(long long;
+
+poly64x1 vext_p64(poly64x1 a, poly64x1 b, const int n)
+{
+  poly64x1 r = a;
+  unsigned src = (unsigned)n;
+  long long t = b[0];
+  r[0] = (src < 1) ? a[src] : t;
+  return r;
+}
-- 
2.43.0

[PUSHED/12 0/6] Backport of regression fixes to GCC 12

2025-04-16 Thread Andrew Pinski

This includes all of the regression fixes that were backported yesterday to GCC 
14
and 13 that are also regressions in GCC 12.

Andrew Pinski (6):
  phiopt: Reset the number of iterations information of a loop when
changing an exit from the loop [PR117243]
  backprop: Fix deleting of a phi node [PR116922]
  vec-lowering: Fix ABSU lowering [PR111285]
  phiopt: Fix value_replacement for middle bb having phi nodes
[PR118922]
  match: Reject non-ssa name/min invariants in gimple_extract [PR116412]
  testcase: Add testcase for already fixed PR [PR118476]

 gcc/gimple-match-head.cc   |  6 +++
 gcc/gimple-ssa-backprop.cc | 10 +++-
 gcc/testsuite/g++.dg/torture/vect-absu-1.C | 29 +++
 gcc/testsuite/gcc.dg/torture/pr116412-1.c  |  6 +++
 gcc/testsuite/gcc.dg/torture/pr116922.c| 19 
 gcc/testsuite/gcc.dg/torture/pr117243-1.c  | 30 
 gcc/testsuite/gcc.dg/torture/pr117243-2.c  | 34 +
 gcc/testsuite/gcc.dg/torture/pr118476-1.c  | 14 ++
 gcc/testsuite/gcc.dg/torture/pr118922-1.c  | 57 ++
 gcc/tree-ssa-phiopt.cc | 15 ++
 gcc/tree-vect-generic.cc   | 10 +++-
 11 files changed, 227 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/vect-absu-1.C
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116412-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116922.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr117243-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr117243-2.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr118476-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr118922-1.c

-- 
2.43.0

Re: [PATCH] PR tree-optimization/119712 - Always reflect lower bits from mask in subranges.

2025-04-16 Thread Sam James

Andrew MacLeod  writes:

> This was a fun one!   An actual bug, and it took a while to sort out. 
> After chasing down some red herrings, this turns out to be an issue of
> interaction between the range and value masks and intervening
> calculations.

(Just want to say thanks for the detailed commit message, it helps a lot
with understanding how things interact.)

> [...]
>
> It also causes a few ripples in the testsuite so 3 test cases also
> needed adjustment:
>
> [...]
>
> * gcc.dg/tree-ssa/phi-opt-value-5.c  : WIth the expanded ranges, CCP2
>   pass use to export:
>    Global Exported: d_3 = [irange] int [-INF, +INF] MASK 0xfffe
> VALUE 0x1
> and now
>    Global Exported: d_3 = [irange] int [-INF, -1][1, +INF] MASK
> 0xfffe VALUE 0x1
> which in turn makes the following comment obsolete as the optimization
> does happen earlier.:
> /* fdiv1 requires until later than phiopt2 to be able to detect that
>    d is non-zero. to be able to remove the conditional.  */
> Adjusted the testcase to expect everything to be taken care of by
> phi-opt2 pass.

The comment looks like it's still there in the patch -- does it need
dropping?

[PATCH] PR tree-optimization/119712 - Always reflect lower bits from mask in subranges.

2025-04-16 Thread Andrew MacLeod

This was a fun one!   An actual bug, and it took a while to sort out.  
After chasing down some red herrings, this turns out to be an issue of 
interaction between the range and value masks and intervening calculations.


The original patch from 11/2023 adjusts intersection so that it can 
enhance subranges based on the value mask.  ie in this testcase


[irange] int [-INF, 2147483644] MASK 0xfffc VALUE 0x1

 If adjust_range() were called on this, it would eliminate the trailing 
mask/value bit ranges that are invalid and turn it into :


[-INF, -3][1, 1][4, 2147483626] MASK 0xfffc VALUE 0x1

reflecting the lower bits into the range.   The problem develops because 
we only apply adjust_range ()  during intersection in an attempt to 
avoid expensive work when it isnt needed.


Unfortunately, that is what triggers this infinite loop.  Rangers cache 
propagates changes, and the algorithm is designed to always improve the 
range.  In this case, the first iteration through, _11 receives the 
above value, [irange] int [-INF, 2147483644] MASK 0xfffc VALUE 0x1  
which via the mask, excludes 0, 2 and 3.


The ensuing calculations in block 7 do not trigger a successful 
intersection operation, and thus the range pairs are never expanded to 
eliminate the lower ranges, and it triggers another change in values 
which leads to the next iteration being less precise, but not obviously 
so. [irange] int [-INF, 2147483644] MASK 0xfffd VALUE 0x0 is a 
result of the calculation.   As ranges as suppose to always get better 
with this algorithm, we simply compare for difference.. and this range 
is different, and thus we replace it. It only excludes 2 and 3.


Next iteration through the less precise range DOES trigger an 
intersection operation in block 7, and when that is expanded to [irange] 
int [-INF, 1][4, 2147483644] MASK 0xfffd VALUE 0x0 using that we can 
again create the more precise range for _11 that started the cycle. and 
we go on and on and on.


If we fix this so that we always expand subranges to reflect the lower 
bits in a bitmask, the initial value starts with


[irange] int [-INF, -3][1, 1][4, 2147483644] MASK 0xfffc VALUE 0x1

And everything falls into place as it should.  The fix is to be 
consistent about expanding those lower subranges.


I also added a couple of minor performance tweaks to avoid unnecessary 
work, along with removing adjust_range () directly into 
set_range_from_bitmask () .


I started at a 0.2% overall compilation increase (1.8% in VRP). In the 
end, this patch is down to 0.6% in VRP, and only 0.08% overall, so 
manageable for all the extra work.


It also causes a few ripples in the testsuite so 3 test cases also 
needed adjustment:


 * gcc.dg/pr83072-2.c :  With the addition of the expanded ranges, CCP 
use to export a global:
    Global Exported: c_3 = [irange] int [-INF, +INF] MASK 0xfffe 
VALUE 0x1

and now
   Global Exported: c_3 = [irange] int [-INF, -1][1, +INF] MASK 
0xfffe VALUE 0x1
Which in turn enables forwprop to collapse part of the testcase much 
earlier. So I turned off forwprop for the testcase


* gcc.dg/tree-ssa/phi-opt-value-5.c  : WIth the expanded ranges, CCP2 
pass use to export:
   Global Exported: d_3 = [irange] int [-INF, +INF] MASK 0xfffe 
VALUE 0x1

and now
   Global Exported: d_3 = [irange] int [-INF, -1][1, +INF] MASK 
0xfffe VALUE 0x1
which in turn makes the following comment obsolete as the optimization 
does happen earlier.:

/* fdiv1 requires until later than phiopt2 to be able to detect that
   d is non-zero. to be able to remove the conditional.  */
Adjusted the testcase to expect everything to be taken care of by 
phi-opt2 pass.


 * gcc.dg/tree-ssa/vrp122.c : Previously, CCP exported:
   Global Exported: g_4 = [irange] unsigned int [0, +INF] MASK 
0xfff0 VALUE 0x0

and then EVRP refined that and stored it, then the testcase tested for:
   Global Exported: g_4 = [irange] unsigned int [0, 0][16, +INF] MASK 
0xfff0 VALUE 0x0
Now, CCP itself exported the expanded range, so there is nothing for VRP 
to do.

adjusted the testcase to look for the expanded range in CCP.

Now we never get into this situation where the bitmask is explicitly 
applied in some places and not others.


Bootstraps on x86_64-pc-linux-gnu with no regressions. Finally.   Is 
this OK for trunk, or should I hold off a little bit?


Andrew
From 36e4b77565a1965d5bca15d196f32d5758393063 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Mon, 14 Apr 2025 16:25:15 -0400
Subject: [PATCH 3/3] Always reflect lower bits from mask in subranges.

During intersection, we expand the subranges to exclude the lower values
from a bitmask with trailing zeros.  This leads to inconsistant evaluations
and in this case of this PR, that lead to an infinite cycle.

Always expand the lower subranges in set_range_from_bitmask instead.

	PR tree-optimization/119712
	gcc/
	* value-range.cc (range_bitmask::adjust_range): Delete.
	(irange::set_range_from_bi

Re: [PATCH] [testsuite] [ppc] block-cmp-8 should require powerpc64

2025-04-16 Thread Peter Bergner

On 4/16/25 12:27 AM, Alexandre Oliva wrote:
> Since that sort of broad change will presumably not make gcc-15 (it
> wouldn't fix a regression, not even the problem addressed by the
> upthread patch), 

Yes, the patch to change powerpc64 -> powerpc64_hw is definitely a
gcc-16 patch.

> ...may I understand your initial response in this thread
> as approval of that patch?  That wasn't clear either.
> 
> (Sorry if that comes across as asking something obvious; I've noticed
> misalignments between my expectations of obviousness and those of other
> ppc maintainers before, so I've learned to be extra cautious)

It's never a bad thing asking for clarity!  That said, I am not an
official maintainer of the GCC rs6000 port.  That said, I believe this
would fall under the "obvious" rule, since this is a "dg-do run" test case
and the "has_arch_ppc64" test we're using is clearly a compile time only test
and the "powerpc64" is the correct hw test to check for 64-bit instruction
support on the test system.

If it were me, I'd give Segher and the others a couple of days to disagree
and not hearing any objections, I'd push it under the "obvious" rule.

Peter

[COMMITTED v2] combine: Correct comments about combine_validate_cost

2025-04-16 Thread Hans-Peter Nilsson

> From: Richard Sandiford 
> Date: Tue, 15 Apr 2025 09:23:21 +0100

> > Ok to commit?
> 
> OK, thanks.

Thanks!

Though, I noticed another "cheaper" in the function header.
Fixing that one was a more obvious correction (thus
committed as such), as per the commit message: what the
function determines isn't whether replacement are *cheaper*,
it's whether they are *more expensive*.  Can't help but
wonder about the consequences of trusting those comments.
Since the rewording in the previous patch was approved, I
amended this change and committed the combination.

-- >8 --
Fix misleading comments.  That function only determines whether
replacements cost more; it doesn't actually *validate* costs as being
cheaper.

For example, it returns true also if it for various reasons cannot
determine the costs, or if the new cost is the same, like when doing
an identity replacement.  The code has been the same since
r0-59417-g64b8935d4809f3.

* combine.cc: Correct comments about combine_validate_cost.
---
 gcc/combine.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 5f085187cfef..e1186087dff4 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -815,7 +815,7 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link 
*newval)
 #define SUBST_LINK(oldval, newval) do_SUBST_LINK (&oldval, newval)
 
 /* Subroutine of try_combine.  Determine whether the replacement patterns
-   NEWPAT, NEWI2PAT and NEWOTHERPAT are cheaper according to insn_cost
+   NEWPAT, NEWI2PAT and NEWOTHERPAT are more expensive according to insn_cost
than the original sequence I0, I1, I2, I3 and undobuf.other_insn.  Note
that I0, I1 and/or NEWI2PAT may be NULL_RTX.  Similarly, NEWOTHERPAT and
undobuf.other_insn may also both be NULL_RTX.  Return false if the cost
@@ -4129,8 +4129,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
}
 }
 
-  /* Only allow this combination if insn_cost reports that the
- replacement instructions are cheaper than the originals.  */
+  /* Reject this combination if insn_cost reports that the replacement
+ instructions are more expensive than the originals.  */
   if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
 {
   undo_all ();
-- 
2.30.2

Re: [PATCH] [testsuite] [ppc] require ifunc for target_clones test

2025-04-16 Thread Peter Bergner

On 4/11/25 1:08 PM, Alexandre Oliva wrote:
> 
> gcc.target/powerpc/power11-3.c uses target_clones, that depends on
> ifunc.  Require ifunc support.

This looks "obvious" to me.

The only systems we (IBM) have access to build and test on all have ifunc
support, so we clearly didn't hit this ourselves.  I know we have other
test cases that use target_clones that should probably get this update
too.  Not for you to worry about, I'll add that to my teams TODO once
stage1 reopens.

Peter

Re: [PATCH] PR tree-optimization/119712 - Always reflect lower bits from mask in subranges.

2025-04-16 Thread Andrew MacLeod




On 4/16/25 16:55, Andrew MacLeod wrote:



I started at a 0.2% overall compilation increase (1.8% in VRP). In the 
end, this patch is down to 0.6% in VRP, and only 0.08% overall, so 
manageable for all the extra work.



Final version performance run...

VRP slowed down by 0.28%
Threading sped up by 0.14%
Final compilation slowdown 0.04%...   almost noise :-)

Andrew

[COMMITTED] Doc: Document raw string literals as GNU C extension [PR88382]

2025-04-16 Thread Sandra Loosemore

gcc/ChangeLog
PR c/88382
* doc/extend.texi (Syntax Extensions): Adjust menu.
(Raw String Literals): New section.
---
 gcc/doc/extend.texi | 20 
 1 file changed, 20 insertions(+)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 5bc2785f802..0978c4c41b2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12978,6 +12978,7 @@ C and/or C++ standards, while others remain specific to 
GNU C.
 * Binary constants::Binary constants using the @samp{0b} prefix.
 * Dollar Signs::Dollar sign is allowed in identifiers.
 * Character Escapes::   @samp{\e} stands for the character @key{ESC}.
+* Raw String Literals::C++ raw string literals are supported in C.
 * Alternate Keywords::  @code{__const__}, @code{__asm__}, etc., for header 
files.
 * Function Names::  Printable strings which are the name of the current
 function.
@@ -13999,6 +14000,25 @@ machines, typically because the target assembler does 
not allow them.
 You can use the sequence @samp{\e} in a string or character constant to
 stand for the ASCII character @key{ESC}.
 
+@node Raw String Literals
+@subsection Raw String Literals
+@cindex raw string literals
+@cindex string literals, raw
+
+The C++11 standard added syntax for raw string literals prefixed
+with @samp{R}.  This syntax allows you to use an arbitrary delimiter
+sequence instead of escaping special characters within the string.
+For example, these string constants are all equivalent:
+
+@smallexample
+const char *s1 = "\\";
+const char *s2 = R"(\)";
+const char *s3 = R"foo(\)foo";
+@end smallexample
+
+As an extension, GCC also accepts raw string literals in C with
+@option{-std=gnu99} or later.
+
 @node Alternate Keywords
 @subsection Alternate Keywords
 @cindex alternate keywords
-- 
2.34.1

Re: [PATCH] s390: Use match_scratch instead of scratch in define_split [PR119834]

2025-04-16 Thread Jakub Jelinek

On Wed, Apr 16, 2025 at 08:52:17PM +0200, Jakub Jelinek wrote:
> The following testcase ICEs since r15-1579 (addition of late combiner),
> because *clrmem_short can't be split.
> The problem is that the define_insn uses
>(use (match_operand 1 "nonmemory_operand" "n,a,a,a"))
>(use (match_operand 2 "immediate_operand" "X,R,X,X"))
>(clobber (match_scratch:P 3 "=X,X,X,&a"))
> and define_split assumed that if operands[1] is const_int_operand,
> match_scratch will be always scratch, and it will be reg only if
> it was the last alternative where operands[1] is a reg.
> The pattern doesn't guarantee it though, of course RA will not try to
> uselessly assign a reg there if it is not needed, but during RA
> on the testcase below we match the last alternative, but then comes
> late combiner and propagates const_int 3 into operands[1].  And that
> matches fine, match_scratch matches either scratch or reg and the constraint
> in that case is X for the first variant, so still just fine.  But we won't
> split that because the splitters only expect scratch.
> 
> The following patch fixes it by using match_scratch instead of scratch,
> so that it accepts either.
> 
> Bootstrapped on s390x-linux, ok for trunk if regtesting passes as well?

Regtested successfully as well.

> 2025-04-16  Jakub Jelinek  
> 
>   PR target/119834
>   * config/s390/s390.md (define_split after *cpymem_short): Use
>   (clobber (match_scratch N)) instead of (clobber (scratch)).  Use
>   (match_dup 4) and operands[4] instead of (match_dup 3) and operands[3]
>   in the last of those.
>   (define_split after *clrmem_short): Use (clobber (match_scratch N))
>   instead of (clobber (scratch)).
>   (define_split after *cmpmem_short): Likewise.
> 
>   * g++.target/s390/pr119834.C: New test.

Jakub

[PING][PATCH][gcc-14] libcpp: Fix incorrect line numbers in large files [PR108900]

2025-04-16 Thread Yash . Shinde

From: Jeremy Bettis 

This patch addresses an issue in the C preprocessor where incorrect
line number information is generated when processing files with a
large number of lines. The problem arises from improper handling
of location intervals in the line map, particularly when locations
exceed LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.

By ensuring that the highest location is not decremented if it
would move to a different ordinary map, this fix resolves
the line number discrepancies observed in certain test cases.
This change improves the accuracy of line number reporting, benefiting
users relying on precise code coverage and debugging information.

Tested x86_64-linux.

libcpp/ChangeLog:

PR preprocessor/108900
* files.cc (_cpp_stack_file): Do not decrement highest_location
across distinct maps.

Signed-off-by: Jeremy Bettis 
Signed-off-by: Yash Shinde 
(cherry picked from commit d9b56c65a2697e0d7a6c0f15f1977803dc94579b)
---
 libcpp/files.cc | 9 +
 1 file changed, 9 insertions(+)

diff --git a/libcpp/files.cc b/libcpp/files.cc
index c61df339e20..5d53d7f6279 100644
--- a/libcpp/files.cc
+++ b/libcpp/files.cc
@@ -1005,6 +1005,15 @@ _cpp_stack_file (cpp_reader *pfile, _cpp_file *file, 
include_type type,
&& type < IT_DIRECTIVE_HWM
&& (pfile->line_table->highest_location
!= LINE_MAP_MAX_LOCATION - 1));
+
+  if (decrement && LINEMAPS_ORDINARY_USED (pfile->line_table))
+{
+  const line_map_ordinary *map
+   = LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table);
+  if (map && map->start_location == pfile->line_table->highest_location)
+   decrement = false;
+}
+
   if (decrement)
 pfile->line_table->highest_location--;
 
-- 
2.43.0

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 08:15, Tomasz Kamiński  wrote:
>
> This patch implements formatter specializations for pair and tuple form
> P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
> ranges are now supported.
>
> The formatters for pairs and tuples whose corresponding elements are the same
> (after applying remove_cvref_t) derive from the same __tuple_formatter class.
> This reduce the code duplication, as most of the parsing and formatting is the
> same in such cases. We use a custom reduced implementation of the tuple
> (__formatters_storage) to store the elements formatters.
>
> Handling of the padding (width and fill) options, is extracted to
> __format::__format_padded function, that is used both by __tuple_formatter and
> range_formatter. To reduce number of instantations range_formatter::format
> triggers, we cast incoming range to __format::__maybe_const_range<_Rg, 
> _CharT>&,
> before formatting it.
>
> As in the case of previous commits, the signatures of the user-facing parse
> and format methods of the provided formatters deviate from the standard by
> constraining types of parameters:
> * _CharT is constrained __formatter::__char
> * basic_format_parse_context<_CharT> for parse argument
> * basic_format_context<_Out, _CharT> for format second argument
> The standard specifies last three of above as unconstrained types.
>
> Finally, test for tuple-like std::array and std::ranges::subrange,
> that illustrate that they remain formatted as ranges.
>
> PR libstdc++/PR109162
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (__formatter_int::_M_format_character_escaped)
> (__formatter_str::format): Use __sink.out() to produce _Sink_iter.
> (__format::__format_padded, __format::maybe_const)
> (__format::__indexed_formatter_storage, __format::__tuple_formatter)
> (std::formatter, _CharT>>)
> (std::formatter, _CharT): Define.
> (std::formatter<_Rg, _CharT>::format): Cast incoming range to
> __format::__maybe_const_range<_Rg, _CharT>&.
> (std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
> and use __format_padded.
> (std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
> (std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
> (std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
> __format_padded.
> * testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
> Define.
> * testsuite/std/format/ranges/formatter.cc: Tests for `m` specifier.
> * testsuite/std/format/ranges/sequence.cc: Tests for array and 
> subrange.
> * testsuite/std/format/ranges/map.cc: New test.
> * testsuite/std/format/tuple.cc: New test.
> ---
> Testing on x86_64-linux, tests matched by `*format*` passes.
> OK for trunk? Should I wait for 16?

I'd like to get this in trunk before we branch for gcc-15, if possible.

>
>  libstdc++-v3/include/std/format   | 357 +++---
>  .../testsuite/std/format/ranges/formatter.cc  |   6 +-
>  .../testsuite/std/format/ranges/map.cc| 209 ++
>  .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
>  libstdc++-v3/testsuite/std/format/tuple.cc| 259 +
>  .../testsuite/util/testsuite_iterators.h  |   3 +
>  6 files changed, 806 insertions(+), 80 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
>  create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc
>
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index 096dda4f989..5b93eb8bc2d 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -1350,8 +1350,7 @@ namespace __format
> __fc, _M_spec);
>
>   __format::_Str_sink<_CharT> __sink;
> - __format::_Sink_iter<_CharT> __out(__sink);
> - __format::__write_escaped(__out, __s, __term);
> + __format::__write_escaped(__sink.out(), __s, __term);
>   basic_string_view<_CharT> __escaped(__sink.view().data(),
>   __sink.view().size());
>   const size_t __escaped_width = _S_trunc(__escaped, __prec);
> @@ -1387,13 +1386,13 @@ namespace __format
> {
>   ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
>   ranges::subrange __sub(__first, __first + __n);
> - return format(_String(from_range, __sub), __fc);
> + return format(_String(from_range, __sub), __fc);
> }
>   else
> {
>   // N.B. preserve the computed size
>   ranges::subrange __sub(__rg, __n);
> - return format(_String(from_range, __sub), __fc);
> + return format(_String(from_range, __sub), __fc);
> }
>

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Jonathan Wakely


On 16/04/25 09:13 +0200, Tomasz Kamiński wrote:

This patch implements formatter specializations for pair and tuple form
P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
ranges are now supported.

The formatters for pairs and tuples whose corresponding elements are the same
(after applying remove_cvref_t) derive from the same __tuple_formatter class.
This reduce the code duplication, as most of the parsing and formatting is the
same in such cases. We use a custom reduced implementation of the tuple
(__formatters_storage) to store the elements formatters.


This is a nice solution, the code for __formatters_storage and
__indexed_formatter_storage is clear and concise.


Handling of the padding (width and fill) options, is extracted to
__format::__format_padded function, that is used both by __tuple_formatter and
range_formatter. To reduce number of instantations range_formatter::format
triggers, we cast incoming range to __format::__maybe_const_range<_Rg, _CharT>&,
before formatting it.

As in the case of previous commits, the signatures of the user-facing parse
and format methods of the provided formatters deviate from the standard by
constraining types of parameters:
* _CharT is constrained __formatter::__char
* basic_format_parse_context<_CharT> for parse argument
* basic_format_context<_Out, _CharT> for format second argument
The standard specifies last three of above as unconstrained types.

Finally, test for tuple-like std::array and std::ranges::subrange,
that illustrate that they remain formatted as ranges.

PR libstdc++/PR109162

libstdc++-v3/ChangeLog:

* include/std/format (__formatter_int::_M_format_character_escaped)
(__formatter_str::format): Use __sink.out() to produce _Sink_iter.
(__format::__format_padded, __format::maybe_const)
(__format::__indexed_formatter_storage, __format::__tuple_formatter)
(std::formatter, _CharT>>)
(std::formatter, _CharT): Define.
(std::formatter<_Rg, _CharT>::format): Cast incoming range to
__format::__maybe_const_range<_Rg, _CharT>&.
(std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
and use __format_padded.
(std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
(std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
(std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
__format_padded.
* testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
Define.
* testsuite/std/format/ranges/formatter.cc: Tests for `m` specifier.
* testsuite/std/format/ranges/sequence.cc: Tests for array and subrange.
* testsuite/std/format/ranges/map.cc: New test.
* testsuite/std/format/tuple.cc: New test.
---
Testing on x86_64-linux, tests matched by `*format*` passes.
OK for trunk? Should I wait for 16?


A few minor comments below ...


libstdc++-v3/include/std/format   | 357 +++---
.../testsuite/std/format/ranges/formatter.cc  |   6 +-
.../testsuite/std/format/ranges/map.cc| 209 ++
.../testsuite/std/format/ranges/sequence.cc   |  52 ++-
libstdc++-v3/testsuite/std/format/tuple.cc| 259 +
.../testsuite/util/testsuite_iterators.h  |   3 +
6 files changed, 806 insertions(+), 80 deletions(-)
create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 096dda4f989..5b93eb8bc2d 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -1350,8 +1350,7 @@ namespace __format
__fc, _M_spec);

  __format::_Str_sink<_CharT> __sink;
- __format::_Sink_iter<_CharT> __out(__sink);
- __format::__write_escaped(__out, __s, __term);
+ __format::__write_escaped(__sink.out(), __s, __term);
  basic_string_view<_CharT> __escaped(__sink.view().data(),
  __sink.view().size());
  const size_t __escaped_width = _S_trunc(__escaped, __prec);
@@ -1387,13 +1386,13 @@ namespace __format
{
  ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
  ranges::subrange __sub(__first, __first + __n);
- return format(_String(from_range, __sub), __fc);
+ return format(_String(from_range, __sub), __fc);
}
  else
{
  // N.B. preserve the computed size
  ranges::subrange __sub(__rg, __n);
- return format(_String(from_range, __sub), __fc);
+ return format(_String(from_range, __sub), __fc);
}
}
  else
@@ -1698,7 +1697,7 @@ namespace __format
  template
typename basic_format_context<_Out,

Re: [PATCH] Locality cloning pass

2025-04-16 Thread Richard Biener

On Wed, Apr 16, 2025 at 11:08 AM xionghuluo  wrote:
>
> Hi, the bootstrap-lto-locality is much longer compared to boostrap-lto
> and bootstrap, and
>
> It seems that stage2 and stage3 only produced 5 partitions in LTO, is
> this reasonable...

Likely due to the high default of

-param=lto-max-locality-partition=
Common Joined UInteger Var(param_max_locality_partition_size)
Init(100) Param
Maximal size of a locality partition for LTO (in estimated
instructions). Value of 0 results in default value being used.

or the failure to apply the smaller param_min_partition_size to fill
up param_lto_partitions (128).

> Also could you please inform how much is the exact performance gain, please?
>
>
> make bootstrap:   27m56.054s
> make BUILD_CONFIG=bootstrap-lto: 38m25.048s
> make BUILD_CONFIG=bootstrap-lto-locality:71m1.882s
>
>
> On 2025/4/15 22:38, Kyrylo Tkachov wrote:
> >
> >> On 15 Apr 2025, at 15:42, Richard Biener  
> >> wrote:
> >>
> >> On Mon, Apr 14, 2025 at 3:11 PM Kyrylo Tkachov  wrote:
> >>> Hi Honza,
> >>>
>  On 13 Apr 2025, at 23:19, Jan Hubicka  wrote:
> 
> > +@opindex fipa-reorder-for-locality
> > +@item -fipa-reorder-for-locality
> > +Group call chains close together in the binary layout to improve code 
> > code
> > +locality.  This option is incompatible with an explicit
> > +@option{-flto-partition=} option since it enforces a custom 
> > partitioning
> > +scheme.
>  Please also cross-link this with -fprofile-reorder-functions and
>  -freorder-functions, which does similar thing.
>  If you see how to clean-up the description of the other two so user is
>  not confused.
> 
>  Perhaps say that -freorder-functions only partitions functions into
>  never-executed/cold/normal/hot and -fprofile-reroder-functions is aiming
>  for program startup optimization (it reorders by measured first time the
>  function is executed.  By accident it seems to kind of work for
>  locality.
> >>> Yeah, the option names are quite similar aren't they?
> >>> I’ve attempted to disambiguate them a bit in their description.
> >>> I’m attaching a diff from the previous version (as the full updated 
> >>> patch) to make it easier to see what’s adjusted.
> >>>
> >>>
> > +
> > +/* Helper function of to accumulate call counts.  */
> > +static bool
> > +accumulate_profile_counts_after_cloning (cgraph_node *node, void *data)
> > +{
> > +  struct profile_stats *stats = (struct profile_stats *) data;
> > +  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
> > +{
> > +  if (e->caller == stats->target)
> > + {
> > +  if (stats->rec_count.compatible_p (e->count.ipa ()))
> > +stats->rec_count += e->count.ipa ();
> > + }
> > +  else
> > + {
> > +  if (stats->nonrec_count.compatible_p (e->count.ipa ()))
> > +stats->nonrec_count += e->count.ipa ();
> > + }
>  In case part of profile is missing (which may happen if one unit has -O0
>  or so) , we may have counts to be uninitialized. Uninitialized counts are
>  compatible with everything, but any arithmetics with it will produce
>  uninitialized result which will likely confuse code later.  So I would
>  skip edges with uninitialized counts.
> 
>  On the other hand ipa counts are always compatible, so compatible_p
>  should be redundat. Main reaosn for existence of compatible_p is that we
>  can have local profiles that are 0 or unknown at IPA level.  The ipa ()
>  conversion turns all counts into IPA counts and those are compatible
>  with each other.
> 
>  I suppose compatibe_p test is there since the code ICEd in past,but I
>  think it was because of missing ipa() conversion.
> 
> 
> > +}
> > +  return false;
> > +}
> > +
> > +/* NEW_NODE is a previously created clone of ORIG_NODE already present 
> > in
> > +   current partition.  EDGES contains newly redirected edges to 
> > NEW_NODE.
> > +   Adjust profile information for both nodes and the edge.  */
> > +
> > +static void
> > +adjust_profile_info_for_non_self_rec_edges (auto_vec 
> > &edges,
> > +cgraph_node *new_node,
> > +cgraph_node *orig_node)
> > +{
> > +  profile_count orig_node_count = orig_node->count.ipa ();
> > +  profile_count edge_count = profile_count::zero ();
> > +  profile_count final_new_count = profile_count::zero ();
> > +  profile_count final_orig_count = profile_count::zero ();
> > +
> > +  for (unsigned i = 0; i < edges.length (); ++i)
> > +edge_count += edges[i]->count.ipa ();
>  Here I would again skip uninitialized.  It is probably legal for -O0
>  function to end up in partition.
> > +
> > +  final_orig_count = orig_node_count - edge_count;
> > +
> > +  /* NEW_NODE->count was adjuste

[PATCH] tree-optimization/1157777 - STLF fails with BB vectorization of loop

2025-04-16 Thread Richard Biener

The following tries to address us BB vectorizing a loop body that
swaps consecutive elements of an array like for bubble-sort.  This
causes the vector store in the previous iteration to fail to forward
to the vector load in the current iteration since there's a partial
overlap.

We try to detect this situation by looking for a load to store
data dependence and analyze this with respect to the containing loop
for a proven problematic access.  Currently the search for a
problematic pair is limited to loads and stores in the same SLP
instance which means the problematic load happens in the next
loop iteration and larger dependence distances are not considered.

On x86 with generic costing this avoids vectorizing the loop body,
but once you do core-specific tuning the saved cost for the vector
store vs. the scalar stores makes vectorization still profitable,
but at least the STLF issue is avoided.

For example on my Zen4 machine with -O2 -march=znver4 the testcase in
the PR is improving from
  insertion_sort  => 2327
to
  insertion_sort  =>  997
but plain -O2 (or -fno-tree-slp-vectorize) gives
  insertion_sort  =>  183
In the end a better target-side cost model for small vector
vectorization is needed to reject this vectorization from this side.

I'll note this is a machine independent heuristic (similar to the
avoid-store-forwarding RTL optimization pass), I expect that uarchs
implementing vectors will suffer from this kind of issue.  I know
some aarch64 uarchs can forward from upper/lower part stores, this
isn't considered at the moment.  The actual vector size/overlap
distance check could be moved to a target hook if it turns out
necessary.

There might be the chance to use a smaller vector size for the loads
avoiding the penalty rather than falling back to elementwise accesses,
that's not implemented either.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  At this point
queued for stage1, possibly for backport for 15.2.

Richard.

PR tree-optimization/115
* tree-vectorizer.h (_slp_tree::avoid_stlf_fail): New member.
* tree-vet-slp.cc (_slp_tree::_slp_tree): Initialize it.
(vect_print_slp_tree): Dump it.
* tree-vect-data.refs.cc (vect_slp_analyze_instance_dependence):
For dataflow dependent loads of a store check whether there's
a cross-iteration data dependence that for sure prohibits
store-to-load forwarding and mark involved loads.
* tree-vect-stmts.cc (get_group_load_store_type): For avoid_stlf_fail
marked loads use VMAT_ELEMENTWISE.

* gcc.dg/vect/bb-slp-pr115777.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c | 15 
 gcc/tree-vect-data-refs.cc  | 91 +
 gcc/tree-vect-slp.cc|  4 +-
 gcc/tree-vect-stmts.cc  |  8 ++
 gcc/tree-vectorizer.h   |  3 +
 5 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c
new file mode 100644
index 000..bba0dc75f6f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr115777.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+typedef unsigned int T;
+
+#define SWAP(A, B) do { T tmp = A; A = B; B = tmp; } while (0)
+
+void
+insertion_sort(T *v, int n)
+{
+  for (int i = 1; i < n; ++i)
+for (int k = i; k > 0 && v[k-1] > v[k]; --k)
+  SWAP(v[k-1], v[k]);
+}
+
+/* { dg-final { scan-tree-dump "using element-wise load" "slp1" { target { { 
x86_64-*-* i?86-*-* } && { ! ia32 } } } } } */
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index c9395e33fcd..231a3cab4f8 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1203,6 +1203,97 @@ vect_slp_analyze_instance_dependence (vec_info *vinfo, 
slp_instance instance)
 for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
   gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
 
+  /* If this is a SLP instance with a store check if there's a dependent
+ load that cannot be forwarded from a previous iteration of a loop
+ both are in.  This is to avoid situations like that in PR115777.  */
+  if (res && store)
+{
+  stmt_vec_info store_info
+   = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (store)[0]);
+  class loop *store_loop = gimple_bb (store_info->stmt)->loop_father;
+  if (! loop_outer (store_loop))
+   return res;
+  vec loop_nest;
+  loop_nest.create (1);
+  loop_nest.quick_push (store_loop);
+  data_reference *drs = nullptr;
+  for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
+   {
+ if (! STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (load)[0]))
+   continue;
+ stmt_vec_info load_info
+   = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (load)[0]);
+

RE: [PATCH]middle-end: fix masking for partial vectors and early break [PR119351]

2025-04-16 Thread Tamar Christina

> -Original Message-
> From: Richard Biener 
> Sent: Wednesday, April 16, 2025 9:57 AM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd 
> Subject: Re: [PATCH]middle-end: fix masking for partial vectors and early 
> break
> [PR119351]
> 
> On Wed, 16 Apr 2025, Tamar Christina wrote:
> 
> > Hi All,
> >
> > The following testcase shows an incorrect masked codegen:
> >
> > #define N 512
> > #define START 1
> > #define END 505
> >
> > int x[N] __attribute__((aligned(32)));
> >
> > int __attribute__((noipa))
> > foo (void)
> > {
> >   int z = 0;
> >   for (unsigned int i = START; i < END; ++i)
> > {
> >   z++;
> >   if (x[i] > 0)
> > continue;
> >
> >   return z;
> > }
> >   return -1;
> > }
> >
> > notice how there's a continue there instead of a break.  This means we 
> > generate
> > a control flow where success stays within the loop iteration:
> 
> I think that's just a fancy way of rotating the loop.
> 
> So the key issue is that when the exit test is "inverted", aka it
> stays in the loop when true and exits when false, we do
> 
>   if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
>  exit_true_edge->dest))
> {
>   new_code = EQ_EXPR;
>   reduc_optab = and_optab;
>   reduc_op = BIT_AND_EXPR;
>   cst = build_minus_one_cst (vectype);
> 
> and with PFA with mask and the initial loop mask of { 0, 0, -1, -1 }
> we then exit early and the scalar loop does not correctly handle
> this case (apart from it being a missed optimization).  For the
> regular non-inverted case we use

Indeed. 

> 
>   auto new_code = NE_EXPR;
>   auto reduc_optab = ior_optab;
>   auto reduc_op = BIT_IOR_EXPR;
>   tree cst = build_zero_cst (vectype);
> 
> and that is fine.
> 
> >
> >   mask_patt_9.12_46 = vect__1.11_45 > { 0, 0, 0, 0 };
> >   vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
> >   if (vec_mask_and_47 == { -1, -1, -1, -1 })
> > goto ; [41.48%]
> >   else
> > goto ; [58.52%]
> >
> > However when loop_mask_41 is a partial mask this comparison can lead to an
> > incorrect match.  In this case the mask is:
> >
> >   # loop_mask_41 = PHI 
> >
> > due to peeling for alignment with masking and compiling with
> > -msve-vector-bits=128.
> >
> > At codegen time we generate:
> >
> > ptrue   p15.s, vl4
> > ptrue   p7.b, vl1
> > not p7.b, p15/z, p7.b
> > .L5:
> > ld1wz29.s, p7/z, [x1, x0, lsl 2]
> > cmpgt   p7.s, p7/z, z29.s, #0
> > not p7.b, p15/z, p7.b
> > ptest   p15, p7.b
> > b.none  .L2
> > ..
> >
> > notice how at expand time the basic blocks are inverted and a not is 
> > generated.
> > But the generated not is unmasked (or predicated over an ALL true mask in 
> > this
> > case).  This has the unintended side-effect of flipping the results of the
> > inactive lanes (which were zero'd by the cmpgt) into -1.  Which then 
> > incorrectly
> > causes us to not take the branch to .L2.
> >
> > This is happening because the expander has no context about the mask, and
> since
> > we can't mask a gcond, we do the next best thing which is to mask both
> operands.
> 
> So you make this sound as if it were a bug in the expander because
> "it doesn't know"?  I think a compare against {-1,...} is flawed,
> this case needs to compare against loop_mask, not all-ones, no?
> 
> So instead of
> 
> >   vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
> >   if (vec_mask_and_47 == { -1, -1, -1, -1 })
> 
> do
> 
> >   vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
> >   if (vec_mask_and_47 == loop_mask_41)
> 
> which is sort-of what you do, of course, just in an odd way (IMO).

Yeah, but the reason I did this is that the RVV vec_len helper 
vect_gen_loop_len_mask
doesn't give me the mask.  So I guess the question what do I use in this case.

I will have to refactor vect_gen_loop_len_mask to split it into something that 
can get the
mask and does the masking like SVE.

I did it this way as I was expecting the optimizers to take care of it anyway.

Do you still want this change?

Thanks,
Tamar

> 
> Richard.
> 
> > We already mask the compare, but this patch now also masks the constant.  In
> the
> > normal case this means we drop it since {0, ..} & mask = {0, ..} but in the 
> > case
> > of an forall comparison we'll keep the mask, allowing the generated code to
> > correctly mask the results.
> >
> > For the above we now generate:
> >
> > .L5:
> > ld1wz28.s, p7/z, [x1, x0, lsl 2]
> > cmpgt   p14.s, p7/z, z28.s, #0
> > eorsp7.b, p15/z, p7.b, p14.b
> > b.none  .L2
> >
> > This fixes gromacs with > 1 OpenMP threads and improves performance.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> > -m32, -m64 and no issues.
> >
> > Ok for master? and backport to GCC-14?
> >
> > Thanks,
> > Tamar
> >
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/119351
> > * tree-vect-stmts.cc (vector

[PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kamiński

This patch implements formatter specializations for pair and tuple form
P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
ranges are now supported.

The formatters for pairs and tuples whose corresponding elements are the same
(after applying remove_cvref_t) derive from the same __tuple_formatter class.
This reduce the code duplication, as most of the parsing and formatting is the
same in such cases. We use a custom reduced implementation of the tuple
(__formatters_storage) to store the elements formatters.

Handling of the padding (width and fill) options, is extracted to
__format::__format_padded function, that is used both by __tuple_formatter and
range_formatter. To reduce number of instantations range_formatter::format
triggers, we cast incoming range to __format::__maybe_const_range<_Rg, _CharT>&,
before formatting it.

As in the case of previous commits, the signatures of the user-facing parse
and format methods of the provided formatters deviate from the standard by
constraining types of parameters:
* _CharT is constrained __formatter::__char
* basic_format_parse_context<_CharT> for parse argument
* basic_format_context<_Out, _CharT> for format second argument
The standard specifies last three of above as unconstrained types.

Finally, test for tuple-like std::array and std::ranges::subrange,
that illustrate that they remain formatted as ranges.

PR libstdc++/PR109162

libstdc++-v3/ChangeLog:

* include/std/format (__formatter_int::_M_format_character_escaped)
(__formatter_str::format): Use __sink.out() to produce _Sink_iter.
(__format::__format_padded, __format::maybe_const)
(__format::__indexed_formatter_storage, __format::__tuple_formatter)
(std::formatter, _CharT>>)
(std::formatter, _CharT): Define.
(std::formatter<_Rg, _CharT>::format): Cast incoming range to
__format::__maybe_const_range<_Rg, _CharT>&.
(std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
and use __format_padded.
(std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
(std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
(std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
__format_padded.
* testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
Define.
* testsuite/std/format/ranges/formatter.cc: Tests for `m` specifier.
* testsuite/std/format/ranges/sequence.cc: Tests for array and subrange.
* testsuite/std/format/ranges/map.cc: New test.
* testsuite/std/format/tuple.cc: New test.
---
Testing on x86_64-linux, tests matched by `*format*` passes.
OK for trunk? Should I wait for 16?

 libstdc++-v3/include/std/format   | 357 +++---
 .../testsuite/std/format/ranges/formatter.cc  |   6 +-
 .../testsuite/std/format/ranges/map.cc| 209 ++
 .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
 libstdc++-v3/testsuite/std/format/tuple.cc| 259 +
 .../testsuite/util/testsuite_iterators.h  |   3 +
 6 files changed, 806 insertions(+), 80 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
 create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 096dda4f989..5b93eb8bc2d 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -1350,8 +1350,7 @@ namespace __format
__fc, _M_spec);
 
  __format::_Str_sink<_CharT> __sink;
- __format::_Sink_iter<_CharT> __out(__sink);
- __format::__write_escaped(__out, __s, __term);
+ __format::__write_escaped(__sink.out(), __s, __term);
  basic_string_view<_CharT> __escaped(__sink.view().data(),
  __sink.view().size());
  const size_t __escaped_width = _S_trunc(__escaped, __prec);
@@ -1387,13 +1386,13 @@ namespace __format
{
  ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
  ranges::subrange __sub(__first, __first + __n);
- return format(_String(from_range, __sub), __fc);
+ return format(_String(from_range, __sub), __fc);
}
  else
{
  // N.B. preserve the computed size
  ranges::subrange __sub(__rg, __n);
- return format(_String(from_range, __sub), __fc);
+ return format(_String(from_range, __sub), __fc);
}
}
  else
@@ -1698,7 +1697,7 @@ namespace __format
   template
typename basic_format_context<_Out, _CharT>::iterator
_M_format_character_escaped(_CharT __c,
-  basic_format_context<_Out, _CharT>& __fc) 
const
+   bas

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kaminski

Just to clarify, we still will be missing the formatter for adaptors
(stack, queue, piority_queue).

On Wed, Apr 16, 2025 at 10:04 AM Jonathan Wakely  wrote:

> On Wed, 16 Apr 2025 at 08:15, Tomasz Kamiński  wrote:
> >
> > This patch implements formatter specializations for pair and tuple form
> > P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
> > ranges are now supported.
> >
> > The formatters for pairs and tuples whose corresponding elements are the
> same
> > (after applying remove_cvref_t) derive from the same __tuple_formatter
> class.
> > This reduce the code duplication, as most of the parsing and formatting
> is the
> > same in such cases. We use a custom reduced implementation of the tuple
> > (__formatters_storage) to store the elements formatters.
> >
> > Handling of the padding (width and fill) options, is extracted to
> > __format::__format_padded function, that is used both by
> __tuple_formatter and
> > range_formatter. To reduce number of instantations
> range_formatter::format
> > triggers, we cast incoming range to __format::__maybe_const_range<_Rg,
> _CharT>&,
> > before formatting it.
> >
> > As in the case of previous commits, the signatures of the user-facing
> parse
> > and format methods of the provided formatters deviate from the standard
> by
> > constraining types of parameters:
> > * _CharT is constrained __formatter::__char
> > * basic_format_parse_context<_CharT> for parse argument
> > * basic_format_context<_Out, _CharT> for format second argument
> > The standard specifies last three of above as unconstrained types.
> >
> > Finally, test for tuple-like std::array and std::ranges::subrange,
> > that illustrate that they remain formatted as ranges.
> >
> > PR libstdc++/PR109162
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/std/format
> (__formatter_int::_M_format_character_escaped)
> > (__formatter_str::format): Use __sink.out() to produce
> _Sink_iter.
> > (__format::__format_padded, __format::maybe_const)
> > (__format::__indexed_formatter_storage,
> __format::__tuple_formatter)
> > (std::formatter, _CharT>>)
> > (std::formatter, _CharT): Define.
> > (std::formatter<_Rg, _CharT>::format): Cast incoming range to
> > __format::__maybe_const_range<_Rg, _CharT>&.
> > (std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
> > and use __format_padded.
> > (std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
> > (std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
> > (std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted
> as
> > __format_padded.
> > * testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
> > Define.
> > * testsuite/std/format/ranges/formatter.cc: Tests for `m`
> specifier.
> > * testsuite/std/format/ranges/sequence.cc: Tests for array and
> subrange.
> > * testsuite/std/format/ranges/map.cc: New test.
> > * testsuite/std/format/tuple.cc: New test.
> > ---
> > Testing on x86_64-linux, tests matched by `*format*` passes.
> > OK for trunk? Should I wait for 16?
>
> I'd like to get this in trunk before we branch for gcc-15, if possible.
>
> >
> >  libstdc++-v3/include/std/format   | 357 +++---
> >  .../testsuite/std/format/ranges/formatter.cc  |   6 +-
> >  .../testsuite/std/format/ranges/map.cc| 209 ++
> >  .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
> >  libstdc++-v3/testsuite/std/format/tuple.cc| 259 +
> >  .../testsuite/util/testsuite_iterators.h  |   3 +
> >  6 files changed, 806 insertions(+), 80 deletions(-)
> >  create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
> >  create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc
> >
> > diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> > index 096dda4f989..5b93eb8bc2d 100644
> > --- a/libstdc++-v3/include/std/format
> > +++ b/libstdc++-v3/include/std/format
> > @@ -1350,8 +1350,7 @@ namespace __format
> > __fc, _M_spec);
> >
> >   __format::_Str_sink<_CharT> __sink;
> > - __format::_Sink_iter<_CharT> __out(__sink);
> > - __format::__write_escaped(__out, __s, __term);
> > + __format::__write_escaped(__sink.out(), __s, __term);
> >   basic_string_view<_CharT> __escaped(__sink.view().data(),
> >   __sink.view().size());
> >   const size_t __escaped_width = _S_trunc(__escaped, __prec);
> > @@ -1387,13 +1386,13 @@ namespace __format
> > {
> >   ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
> >   ranges::subrange __sub(__first, __first + __n);
> > - return format(_String(from_range, __sub), __fc);
> > + return format(_String(fro

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Jonathan Wakely


On 16/04/25 10:37 +0200, Tomasz Kaminski wrote:

Just to clarify, we still will be missing the formatter for adaptors
(stack, queue, piority_queue).


Yes, but I doubt most people want to use those :-)

Re: [PATCH] libatomic: Fix up libat_{,un}lock_n for mingw [PR119796]

2025-04-16 Thread Richard Biener

On Mon, 14 Apr 2025, Jakub Jelinek wrote:

> Hi!
> 
> Here is just a port of the previously posted patch to mingw which
> clearly has the same problems.
> 
> Untested though, I don't have Windows anywhere around.

OK.

Richard.

> 2025-04-14  Jakub Jelinek  
> 
>   PR libstdc++/119796
>   * config/mingw/lock.c (libat_lock_n, libat_unlock_n): Start with
>   computing how many locks will be needed and take into account
>   ((uintptr_t)ptr % WATCH_SIZE).  If some locks from the end of the
>   locks array and others from the start of it will be needed, first
>   lock the ones from the start followed by ones from the end.
> 
> --- libatomic/config/mingw/lock.c.jj  2025-04-08 14:09:40.988589457 +0200
> +++ libatomic/config/mingw/lock.c 2025-04-14 15:39:08.244686172 +0200
> @@ -87,21 +87,30 @@ libat_lock_n (void *ptr, size_t n)
>  {
>uintptr_t h = addr_hash (ptr);
>size_t i = 0;
> +  size_t nlocks
> += (n + ((uintptr_t)ptr % WATCH_SIZE) + WATCH_SIZE - 1) / WATCH_SIZE;
>  
>/* Don't lock more than all the locks we have.  */
> -  if (n > PAGE_SIZE)
> -n = PAGE_SIZE;
> +  if (nlocks > NLOCKS)
> +nlocks = NLOCKS;
>  
> -  do
> +  if (__builtin_expect (h + nlocks > NLOCKS, 0))
> +{
> +  size_t j = h + nlocks - NLOCKS;
> +  for (; i < j; ++i)
> + {
> +   if (!locks[i].mutex)
> + locks[i].mutex = CreateMutex (NULL, FALSE, NULL);
> +   WaitForSingleObject (locks[i].mutex, INFINITE);
> + }
> +}
> +
> +  for (; i < nlocks; ++i)
>  {
>if (!locks[h].mutex)
> - locks[h].mutex = CreateMutex  (NULL, FALSE, NULL);
> -  WaitForSingleObject (locks[h].mutex, INFINITE);
> -  if (++h == NLOCKS)
> - h = 0;
> -  i += WATCH_SIZE;
> + locks[h].mutex = CreateMutex (NULL, FALSE, NULL);
> +  WaitForSingleObject (locks[h++].mutex, INFINITE);
>  }
> -  while (i < n);
>  }
>  
>  void
> @@ -109,17 +118,22 @@ libat_unlock_n (void *ptr, size_t n)
>  {
>uintptr_t h = addr_hash (ptr);
>size_t i = 0;
> +  size_t nlocks
> += (n + ((uintptr_t)ptr % WATCH_SIZE) + WATCH_SIZE - 1) / WATCH_SIZE;
>  
> -  if (n > PAGE_SIZE)
> -n = PAGE_SIZE;
> +  /* Don't lock more than all the locks we have.  */
> +  if (nlocks > NLOCKS)
> +nlocks = NLOCKS;
>  
> -  do
> +  if (__builtin_expect (h + nlocks > NLOCKS, 0))
>  {
> -  if (locks[h].mutex)
> - ReleaseMutex (locks[h].mutex);
> -  if (++h == NLOCKS)
> - h = 0;
> -  i += WATCH_SIZE;
> +  size_t j = h + nlocks - NLOCKS;
> +  for (; i < j; ++i)
> + if (locks[i].mutex)
> +   ReleaseMutex (locks[i].mutex);
>  }
> -  while (i < n);
> +
> +  for (; i < nlocks; ++i, ++h)
> +if (locks[h].mutex)
> +  ReleaseMutex (locks[h].mutex);
>  }
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH] libstdc++: Fix constraint recursion in basic_const_iterator operator- [PR115046]

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025, 03:17 Hans-Peter Nilsson,  wrote:

> On Tue, 8 Apr 2025, Patrick Palka wrote:
>
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk/14?
>
> It's not mentioned very often, but is a general rule:
>
> Pretty please, add new files for new tests, don't just edit
> existing files.  (For one: if they start failing, they look like
> regressions.)
>

The reason we don't always do that is performance. Some of these ranges
tests take several seconds to compile. Adding another small test to the end
of the file doesn't increase that too much, because all the templates have
already been compiled once. Adding a new file means compiling the whole
thing (and doing the dejagnu startup checks) all over again.

The libstdc++ tests are slow enough already, editing existing test files is
a way to mitigate that.

It's even worse for me, because I run every single test with three
permutations in --target_board and then multiply that by up to seven -std
modes.

It's true that new failures in existing tests can look like regressions but
that's usually a one-off cost that is easily resolved. Does it do any real
harm apart from initially miscategorizing a bug? The ever-increasing size
of the testsuite and the time it takes is a problem forever, not a one-off
cost.

Re: [PATCH] bitintlower: Fix interaction of gimple_assign_copy_p stmts vs. has_single_use [PR119808]

2025-04-16 Thread Richard Biener

On Wed, 16 Apr 2025, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase is miscompiled, because we emit a CLOBBER in a place
> where it shouldn't be emitted.
> Before lowering we have:
>   b_5 = 0;
>   b.0_6 = b_5;
>   b.1_1 = (unsigned _BitInt(129)) b.0_6;
> ...
>= b_5;
> The bitint coalescing assigns the same partition/underlying variable
> for both b_5 and b.0_6 (possible because there is a copy assignment)
> and of course a different one for b.1_1 (and other SSA_NAMEs in between).
> This is -O0 so stmts aren't DCEd and aren't propagated that much etc.
> It is -O0 so we also don't try to optimize and omit some names from m_names
> and handle multiple stmts at once, so the expansion emits essentially
>   bitint.4 = {};
>   bitint.4 = bitint.4;
>   bitint.2 = cast of bitint.4;
>   bitint.4 = CLOBBER;
> ...
>= bitint.4;
> and the CLOBBER is the problem because bitint.4 is still live afterwards.
> We emit the clobbers to improve code generation, but do it only for
> (initially) has_single_use SSA_NAMEs (remembered in m_single_use_names)
> being used, if they don't have the same partition on the lhs and a few
> other conditions.
> The problem above is that b.0_6 which is used in the cast has_single_use
> and so was in m_single_use_names bitmask and the lhs in that case is
> bitint.2, so a different partition.  But there is gimple_assign_copy_p
> with SSA_NAME rhs1 and the partitioning special cases those and while
> b.0_6 is single use, b_5 has multiple uses.  I believe this ought to be
> a problem solely in the case of such copy stmts and its special case
> by the partitioning, if instead of b.0_6 = b_5; there would be
> b.0_6 = b_5 + 1; or whatever other stmts that performs or may perform
> changes on the value, partitioning couldn't assign the same partition
> to b.0_6 and b_5 if b_5 is used later, it couldn't have two different
> (or potentially different) values in the same bitint.N var.  With
> copy that is possible though.
> 
> So the following patch fixes it by being more careful when we set
> m_single_use_names, don't set it if it is a has_single_use SSA_NAME
> but SSA_NAME_DEF_STMT of it is a copy stmt with SSA_NAME rhs1 and that
> rhs1 doesn't have single use, or has_single_use but SSA_NAME_DEF_STMT of it
> is a copy stmt etc.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Just to make sure it doesn't change code generation too much, I've gathered
> statistics how many times
>   if (m_first
>   && m_single_use_names
>   && m_vars[p] != m_lhs
>   && m_after_stmt
>   && bitmap_bit_p (m_single_use_names, SSA_NAME_VERSION (op)))
> {
>   tree clobber = build_clobber (TREE_TYPE (m_vars[p]),
> CLOBBER_STORAGE_END);
>   g = gimple_build_assign (m_vars[p], clobber);
>   gimple_stmt_iterator gsi = gsi_for_stmt (m_after_stmt);
>   gsi_insert_after (&gsi, g, GSI_SAME_STMT);
> }
> emits a clobber on
> make check-gcc GCC_TEST_RUN_EXPENSIVE=1 
> RUNTESTFLAGS="--target_board=unix\{-m64,-m32\} GCC_TEST_RUN_EXPENSIVE=1 
> dg.exp='*bitint* pr112673.c builtin-stdc-bit-*.c pr112566-2.c pr112511.c 
> pr116588.c pr116003.c pr113693.c pr113602.c flex-array-counted-by-7.c' 
> dg-torture.exp='*bitint* pr116480-2.c pr114312.c pr114121.c' dfp.exp=*bitint* 
> i386.exp='pr118017.c pr117946.c apx-ndd-x32-2a.c' 
> vect.exp='vect-early-break_99-pr113287.c' tree-ssa.exp=pr113735.c"
> and before this patch it was 41010 clobbers and after it is 40968,
> so difference is 42 clobbers, 0.1% fewer.

OK.

Richard.

> 2025-04-16  Jakub Jelinek  
> 
>   PR middle-end/119808
>   * gimple-lower-bitint.cc (gimple_lower_bitint): Don't set
>   m_single_use_names bits for SSA_NAMEs which have single use but
>   their SSA_NAME_DEF_STMT is a copy from another SSA_NAME which doesn't
>   have a single use, or single use which is such a copy etc.
> 
>   * gcc.dg/bitint-121.c: New test.
> 
> --- gcc/gimple-lower-bitint.cc.jj 2025-04-12 13:13:47.543814860 +0200
> +++ gcc/gimple-lower-bitint.cc2025-04-15 21:00:32.779348865 +0200
> @@ -6647,10 +6647,28 @@ gimple_lower_bitint (void)
> bitmap_set_bit (large_huge.m_names, SSA_NAME_VERSION (s));
> if (has_single_use (s))
>   {
> -   if (!large_huge.m_single_use_names)
> - large_huge.m_single_use_names = BITMAP_ALLOC (NULL);
> -   bitmap_set_bit (large_huge.m_single_use_names,
> -   SSA_NAME_VERSION (s));
> +   tree s2 = s;
> +   /* The coalescing hook special cases SSA_NAME copies.
> +  Make sure not to mark in m_single_use_names single
> +  use SSA_NAMEs copied from non-single use SSA_NAMEs.  */
> +   while (gimple_assign_copy_p (SSA_NAME_DEF_STMT (s2)))
> + {
> +   s2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (s2));
> +   if (TREE_CODE (s2) != SSA_NA

[PATCH] libstdc++: Do not use 'not' alternative token in

2025-04-16 Thread Jonathan Wakely

This fixes:
FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++23 (test for excess 
errors)
FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++26 (test for excess 
errors)

The purpose of 'not defined>' is to be ill-formed (as
required by [format.range.fmtkind]) and to give an error that includes
the string "not defined>". That was intended to tell you
that format_kind is not defined, just like it says!

But user code can use -fno-operator-names so we can't use 'not' here,
and "! defined" in the diagnostic doesn't seem as user-friendly. It also
raises questions about whether it was intended to be the preprocessor
token 'defined' (it's not) or where 'defined' is defined (it's not).

Replace it with __no_primary_template> and a comment,
which seems almost as good. The diagnostic now looks like:

In file included from fmt.cc:1:
.../include/c++/15.0.1/format: In instantiation of 'constexpr const auto 
std::format_kind':
fmt.cc:3:15:   required from here
3 | auto i = std::format_kind;
  |   ^~~~
.../include/c++/15.0.1/format:5164:31: error: use of 'std::format_kind' 
before deduction of 'auto'
 5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
specialization
  |   ^~~~
.../include/c++/15.0.1/format:5164:30: error: '__no_primary_template' was not 
declared in this scope
 5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
specialization
  | ~^~

libstdc++-v3/ChangeLog:

* include/std/format (format_kind): Do not use 'not'
alternative token to make the primary template ill-formed.
Use __no_primary_template as the undefined identifier that will
appear in diagnostics.
---

Testing now on x86_64-linux.

 libstdc++-v3/include/std/format | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index b1455977c65..9ce9b3cfed1 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -5160,7 +5160,8 @@ namespace __format
 
   /// @cond undocumented
   template
-constexpr auto format_kind = not defined(format_kind<_Rg>);
+constexpr auto format_kind
+  = __no_primary_template(format_kind<_Rg>); // must define specialization
 
   template
 consteval range_format
-- 
2.49.0

[PATCH] libstdc++: Fix constification in range_formatter::format [PR109162]

2025-04-16 Thread Tomasz Kamiński

The _Rg is deduced to lvalue reference for the lvalue arguments,
and in such case __format::__maybe_const_range<_Rg, _CharT> is always _Rg
(adding const to reference does not change behavior).

Now we correctly check if _Range = const remove_reference_t<_Rg> is
formattable range, furthermore as range_formatter can only format
ranges of values of type (possibly const) _Tp, we additional check if the
remove_cvref_t> is _Tp.

The range_reference_t and range_reference_t have different
type (module remove_cvref_t) for std::vector (::reference and bool)
or flat_map (pair and pair).

PR libstdc++/PR109162

libstdc++-v3/ChangeLog:

* include/std/format (range_formatter::format): Format const range,
only if reference type is not changed.
* testsuite/std/format/ranges/formatter.cc: New tests.
---
Testing on x86_64-linux, test for *format* passed.
OK for trunk?

 libstdc++-v3/include/std/format   | 11 +++---
 .../testsuite/std/format/ranges/formatter.cc  | 22 +++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index b1455977c65..3ae1fc1877c 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -5634,9 +5634,14 @@ namespace __format
typename basic_format_context<_Out, _CharT>::iterator
format(_Rg&& __rg, basic_format_context<_Out, _CharT>& __fc) const
{
- using __maybe_const_range
-   = __format::__maybe_const_range<_Rg, _CharT>;
- return _M_format<__maybe_const_range>(__rg, __fc);
+ using _Range = remove_reference<_Rg>;
+ if constexpr (__format::__const_formattable_range<_Range, _CharT>)
+ {
+   using _CRef = ranges::range_reference_t;
+   if constexpr (same_as, _Tp>)
+ return _M_format(__rg, __fc);
+ }
+ return _M_format(__rg, __fc);
}
 
 private:
diff --git a/libstdc++-v3/testsuite/std/format/ranges/formatter.cc 
b/libstdc++-v3/testsuite/std/format/ranges/formatter.cc
index a4f5d9210dd..00ce9f6dd0c 100644
--- a/libstdc++-v3/testsuite/std/format/ranges/formatter.cc
+++ b/libstdc++-v3/testsuite/std/format/ranges/formatter.cc
@@ -1,5 +1,6 @@
 // { dg-do run { target c++23 } }
 
+#include 
 #include 
 #include 
 #include 
@@ -138,6 +139,26 @@ test_nested()
   VERIFY( res == "+<01; 02; 11; 12>+" );
 }
 
+struct MyFlatMap : std::flat_map
+{
+  using std::flat_map::flat_map;
+};
+
+template
+struct std::formatter
+  // This cannot apply format BitVector const&, because formatted type would
+  // be std::pair, and formatter for
+  // pair cannot format it.
+  : std::range_formatter
+{};
+
+void test_const_ref_type_mismatch()
+{
+  MyFlatMap m{{1, 11}, {2, 22}};
+  std::string res = std::format("{:m}", m);
+  VERIFY( res == "{1: 11, 2: 22}" );
+}
+
 template
 using VectorFormatter = std::formatter, CharT>;
 
@@ -146,4 +167,5 @@ int main()
   test_outputs();
   test_outputs();
   test_nested();
+  test_const_ref_type_mismatch();
 }
-- 
2.49.0

Re: [PATCH] libstdc++: Fix constification in range_formatter::format [PR109162]

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 12:54, Tomasz Kamiński  wrote:
>
> The _Rg is deduced to lvalue reference for the lvalue arguments,
> and in such case __format::__maybe_const_range<_Rg, _CharT> is always _Rg
> (adding const to reference does not change behavior).
>
> Now we correctly check if _Range = const remove_reference_t<_Rg> is
> formattable range, furthermore as range_formatter can only format
> ranges of values of type (possibly const) _Tp, we additional check if the
> remove_cvref_t> is _Tp.
>
> The range_reference_t and range_reference_t have different
> type (module remove_cvref_t) for std::vector (::reference and bool)

s/module/modulo/ ?

OK with that tweak.

> or flat_map (pair and pair).
>
> PR libstdc++/PR109162
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (range_formatter::format): Format const range,
> only if reference type is not changed.
> * testsuite/std/format/ranges/formatter.cc: New tests.
> ---
> Testing on x86_64-linux, test for *format* passed.
> OK for trunk?
>
>  libstdc++-v3/include/std/format   | 11 +++---
>  .../testsuite/std/format/ranges/formatter.cc  | 22 +++
>  2 files changed, 30 insertions(+), 3 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index b1455977c65..3ae1fc1877c 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -5634,9 +5634,14 @@ namespace __format
> typename basic_format_context<_Out, _CharT>::iterator
> format(_Rg&& __rg, basic_format_context<_Out, _CharT>& __fc) const
> {
> - using __maybe_const_range
> -   = __format::__maybe_const_range<_Rg, _CharT>;
> - return _M_format<__maybe_const_range>(__rg, __fc);
> + using _Range = remove_reference<_Rg>;
> + if constexpr (__format::__const_formattable_range<_Range, _CharT>)
> + {
> +   using _CRef = ranges::range_reference_t;
> +   if constexpr (same_as, _Tp>)
> + return _M_format(__rg, __fc);
> + }
> + return _M_format(__rg, __fc);
> }
>
>  private:
> diff --git a/libstdc++-v3/testsuite/std/format/ranges/formatter.cc 
> b/libstdc++-v3/testsuite/std/format/ranges/formatter.cc
> index a4f5d9210dd..00ce9f6dd0c 100644
> --- a/libstdc++-v3/testsuite/std/format/ranges/formatter.cc
> +++ b/libstdc++-v3/testsuite/std/format/ranges/formatter.cc
> @@ -1,5 +1,6 @@
>  // { dg-do run { target c++23 } }
>
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -138,6 +139,26 @@ test_nested()
>VERIFY( res == "+<01; 02; 11; 12>+" );
>  }
>
> +struct MyFlatMap : std::flat_map
> +{
> +  using std::flat_map::flat_map;
> +};
> +
> +template
> +struct std::formatter
> +  // This cannot apply format BitVector const&, because formatted type would
> +  // be std::pair, and formatter for
> +  // pair cannot format it.
> +  : std::range_formatter
> +{};
> +
> +void test_const_ref_type_mismatch()
> +{
> +  MyFlatMap m{{1, 11}, {2, 22}};
> +  std::string res = std::format("{:m}", m);
> +  VERIFY( res == "{1: 11, 2: 22}" );
> +}
> +
>  template
>  using VectorFormatter = std::formatter, CharT>;
>
> @@ -146,4 +167,5 @@ int main()
>test_outputs();
>test_outputs();
>test_nested();
> +  test_const_ref_type_mismatch();
>  }
> --
> 2.49.0
>

Re: [PATCH] libstdc++: Do not use 'not' alternative token in

2025-04-16 Thread Tomasz Kaminski

On Wed, Apr 16, 2025 at 1:32 PM Jonathan Wakely  wrote:

> On Wed, 16 Apr 2025 at 12:18, Jonathan Wakely  wrote:
> >
> > This fixes:
> > FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++23 (test for
> excess errors)
> > FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++26 (test for
> excess errors)
> >
> > The purpose of 'not defined>' is to be ill-formed (as
> > required by [format.range.fmtkind]) and to give an error that includes
> > the string "not defined>". That was intended to tell you
> > that format_kind is not defined, just like it says!
> >
> > But user code can use -fno-operator-names so we can't use 'not' here,
> > and "! defined" in the diagnostic doesn't seem as user-friendly. It also
> > raises questions about whether it was intended to be the preprocessor
> > token 'defined' (it's not) or where 'defined' is defined (it's not).
> >
> > Replace it with __no_primary_template> and a comment,
> > which seems almost as good. The diagnostic now looks like:
> >
> > In file included from fmt.cc:1:
> > .../include/c++/15.0.1/format: In instantiation of 'constexpr const auto
> std::format_kind':
> > fmt.cc:3:15:   required from here
> > 3 | auto i = std::format_kind;
> >   |   ^~~~
> > .../include/c++/15.0.1/format:5164:31: error: use of
> 'std::format_kind' before deduction of 'auto'
> >  5164 |   = __no_primary_template(format_kind<_Rg>); // must define
> specialization
> >   |   ^~~~
> > .../include/c++/15.0.1/format:5164:30: error: '__no_primary_template'
> was not declared in this scope
> >  5164 |   = __no_primary_template(format_kind<_Rg>); // must define
> specialization
> >   | ~^~
>
> Maybe "must define specialization" isn't really ideal, because the
> problem might be that users are trying to use format_kind
> when they should use format_kind, and telling them to define a
> specialization for const R is wrong.

I do not think that user's are expected to use format_kind directly
will be confused by current specialization. I think you could adjust the
comment:
// must define specialization or _Rg is reference or cv-qualified type


> They should just stop using const
> R there. Similarly, if they try to use it for a type which is not an
> input_range (like int in the error above) then we don't want to
> encourage them to specialize the template for int, they should stop
> their nonsense instead.
>
> So maybe this instead?
>
> .../include/c++/15.0.1/format:5164:50: error:
> '__only_defined_for_non_const_input_ranges' was not declared
> in this scope
> 5164 |   = __only_defined_for_non_const_input_ranges(format_kind<_Rg>);
>  | ~^~
>
>
>
>
>
>
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/std/format (format_kind): Do not use 'not'
> > alternative token to make the primary template ill-formed.
> > Use __no_primary_template as the undefined identifier that will
> > appear in diagnostics.
> > ---
> >
> > Testing now on x86_64-linux.
> >
> >  libstdc++-v3/include/std/format | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> > index b1455977c65..9ce9b3cfed1 100644
> > --- a/libstdc++-v3/include/std/format
> > +++ b/libstdc++-v3/include/std/format
> > @@ -5160,7 +5160,8 @@ namespace __format
> >
> >/// @cond undocumented
> >template
> > -constexpr auto format_kind = not defined(format_kind<_Rg>);
> > +constexpr auto format_kind
> > +  = __no_primary_template(format_kind<_Rg>); // must define
> specialization
> >
> >template
> >  consteval range_format
> > --
> > 2.49.0
> >
>
>

Re: [committed] libstdc++: Do not define __cpp_lib_ranges_iota in

2025-04-16 Thread Jonathan Wakely

On Tue, 15 Apr 2025 at 17:38, Jonathan Wakely wrote:
>
> In r14-7153-gadbc46942aee75 we removed a duplicate definition of
> __glibcxx_want_range_iota from , but __cpp_lib_ranges_iota
> should be defined in  at all.

Oops, that was meant to say "should not be defined"

>
> libstdc++-v3/ChangeLog:
>
> * include/std/ranges (__glibcxx_want_ranges_iota): Do not
> define.
> ---
>
> Tested x86_64-linux. Pushed to trunk.
>
>  libstdc++-v3/include/std/ranges | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
> index 7a339c51368..9300c364a16 100644
> --- a/libstdc++-v3/include/std/ranges
> +++ b/libstdc++-v3/include/std/ranges
> @@ -64,7 +64,6 @@
>  #define __glibcxx_want_ranges_chunk
>  #define __glibcxx_want_ranges_chunk_by
>  #define __glibcxx_want_ranges_enumerate
> -#define __glibcxx_want_ranges_iota
>  #define __glibcxx_want_ranges_join_with
>  #define __glibcxx_want_ranges_repeat
>  #define __glibcxx_want_ranges_slide
> --
> 2.49.0
>

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kaminski

On Wed, Apr 16, 2025 at 11:17 AM Jonathan Wakely  wrote:

> On 16/04/25 09:13 +0200, Tomasz Kamiński wrote:
> >This patch implements formatter specializations for pair and tuple form
> >P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
> >ranges are now supported.
> >
> >The formatters for pairs and tuples whose corresponding elements are the
> same
> >(after applying remove_cvref_t) derive from the same __tuple_formatter
> class.
> >This reduce the code duplication, as most of the parsing and formatting
> is the
> >same in such cases. We use a custom reduced implementation of the tuple
> >(__formatters_storage) to store the elements formatters.
>
> This is a nice solution, the code for __formatters_storage and
> __indexed_formatter_storage is clear and concise.
>
> >Handling of the padding (width and fill) options, is extracted to
> >__format::__format_padded function, that is used both by
> __tuple_formatter and
> >range_formatter. To reduce number of instantations range_formatter::format
> >triggers, we cast incoming range to __format::__maybe_const_range<_Rg,
> _CharT>&,
> >before formatting it.
> >
> >As in the case of previous commits, the signatures of the user-facing
> parse
> >and format methods of the provided formatters deviate from the standard by
> >constraining types of parameters:
> >* _CharT is constrained __formatter::__char
> >* basic_format_parse_context<_CharT> for parse argument
> >* basic_format_context<_Out, _CharT> for format second argument
> >The standard specifies last three of above as unconstrained types.
> >
> >Finally, test for tuple-like std::array and std::ranges::subrange,
> >that illustrate that they remain formatted as ranges.
> >
> >   PR libstdc++/PR109162
> >
> >libstdc++-v3/ChangeLog:
> >
> >   * include/std/format (__formatter_int::_M_format_character_escaped)
> >   (__formatter_str::format): Use __sink.out() to produce _Sink_iter.
> >   (__format::__format_padded, __format::maybe_const)
> >   (__format::__indexed_formatter_storage,
> __format::__tuple_formatter)
> >   (std::formatter, _CharT>>)
> >   (std::formatter, _CharT): Define.
> >   (std::formatter<_Rg, _CharT>::format): Cast incoming range to
> >   __format::__maybe_const_range<_Rg, _CharT>&.
> >   (std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
> >   and use __format_padded.
> >   (std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
> >   (std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
> >   (std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
> >   __format_padded.
> >   * testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
> >   Define.
> >   * testsuite/std/format/ranges/formatter.cc: Tests for `m`
> specifier.
> >   * testsuite/std/format/ranges/sequence.cc: Tests for array and
> subrange.
> >   * testsuite/std/format/ranges/map.cc: New test.
> >   * testsuite/std/format/tuple.cc: New test.
> >---
> >Testing on x86_64-linux, tests matched by `*format*` passes.
> >OK for trunk? Should I wait for 16?
>
> A few minor comments below ...
>
> > libstdc++-v3/include/std/format   | 357 +++---
> > .../testsuite/std/format/ranges/formatter.cc  |   6 +-
> > .../testsuite/std/format/ranges/map.cc| 209 ++
> > .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
> > libstdc++-v3/testsuite/std/format/tuple.cc| 259 +
> > .../testsuite/util/testsuite_iterators.h  |   3 +
> > 6 files changed, 806 insertions(+), 80 deletions(-)
> > create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
> > create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc
> >
> >diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> >index 096dda4f989..5b93eb8bc2d 100644
> >--- a/libstdc++-v3/include/std/format
> >+++ b/libstdc++-v3/include/std/format
> >@@ -1350,8 +1350,7 @@ namespace __format
> >   __fc, _M_spec);
> >
> > __format::_Str_sink<_CharT> __sink;
> >-__format::_Sink_iter<_CharT> __out(__sink);
> >-__format::__write_escaped(__out, __s, __term);
> >+__format::__write_escaped(__sink.out(), __s, __term);
> > basic_string_view<_CharT> __escaped(__sink.view().data(),
> > __sink.view().size());
> > const size_t __escaped_width = _S_trunc(__escaped, __prec);
> >@@ -1387,13 +1386,13 @@ namespace __format
> >   {
> > ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
> > ranges::subrange __sub(__first, __first + __n);
> >-return format(_String(from_range, __sub), __fc);
> >+return format(_String(from_range, __sub), __fc);
> >   }
> > else
> >   {
> > // N.B. preserve the computed size
> >

[PATCH v2] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kamiński

This patch implements formatter specializations for pair and tuple form
P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
ranges are now supported.

The formatters for pairs and tuples whose corresponding elements are the same
(after applying remove_cvref_t) derive from the same __tuple_formatter class.
This reduce the code duplication, as most of the parsing and formatting is the
same in such cases. We use a custom reduced implementation of the tuple
(__formatters_storage) to store the elements formatters.

Handling of the padding (width and fill) options, is extracted to
__format::__format_padded function, that is used both by __tuple_formatter and
range_formatter. To reduce number of instantations range_formatter::format
triggers, we cast incoming range to __format::__maybe_const_range<_Rg, _CharT>&,
before formatting it.

As in the case of previous commits, the signatures of the user-facing parse
and format methods of the provided formatters deviate from the standard by
constraining types of parameters:
* _CharT is constrained __formatter::__char
* basic_format_parse_context<_CharT> for parse argument
* basic_format_context<_Out, _CharT> for format second argument
The standard specifies last three of above as unconstrained types.

Finally, test for tuple-like std::array and std::ranges::subrange,
that illustrate that they remain formatted as ranges.

PR libstdc++/PR109162

libstdc++-v3/ChangeLog:

* include/std/format (__formatter_int::_M_format_character_escaped)
(__formatter_str::format): Use __sink.out() to produce _Sink_iter.
(__format::__const_formattable_range): Moved closer to range_formatter.
(__format::__maybe_const_range): Use `__conditional_t` and moved closer
to range_formatter.
(__format::__format_padded, __format::maybe_const)
(__format::__indexed_formatter_storage, __format::__tuple_formatter)
(std::formatter, _CharT>>)
(std::formatter, _CharT): Define.
(std::formatter<_Rg, _CharT>::format): Cast incoming range to
__format::__maybe_const_range<_Rg, _CharT>&.
(std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
and use __format_padded.
(std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
(std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
(std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
__format_padded.
* testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
Define.
* testsuite/std/format/ranges/formatter.cc: Tests for `m` specifier.
* testsuite/std/format/ranges/sequence.cc: Tests for array and subrange.
* testsuite/std/format/ranges/map.cc: New test.
* testsuite/std/format/tuple.cc: New test.
---
I doged the which is safe, static_cast or const_cast discussion by
calling:
+ using __maybe_const_range
+   = __format::__maybe_const_range<_Rg, _CharT>;
+ return _M_format<__maybe_const_range>(__rg, __fc);
I have also applied the `__conditional_t` to __maybe_const_range,
and moved this two helpers closer to range_formatter.

OK for trunk?
---

 libstdc++-v3/include/std/format   | 377 ++
 .../testsuite/std/format/ranges/formatter.cc  |   6 +-
 .../testsuite/std/format/ranges/map.cc| 209 ++
 .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
 libstdc++-v3/testsuite/std/format/tuple.cc| 259 
 .../testsuite/util/testsuite_iterators.h  |   3 +
 6 files changed, 813 insertions(+), 93 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
 create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc

diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index 096dda4f989..58ac9b2a48f 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -1350,8 +1350,7 @@ namespace __format
__fc, _M_spec);
 
  __format::_Str_sink<_CharT> __sink;
- __format::_Sink_iter<_CharT> __out(__sink);
- __format::__write_escaped(__out, __s, __term);
+ __format::__write_escaped(__sink.out(), __s, __term);
  basic_string_view<_CharT> __escaped(__sink.view().data(),
  __sink.view().size());
  const size_t __escaped_width = _S_trunc(__escaped, __prec);
@@ -1387,13 +1386,13 @@ namespace __format
{
  ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
  ranges::subrange __sub(__first, __first + __n);
- return format(_String(from_range, __sub), __fc);
+ return format(_String(from_range, __sub), __fc);
}
  else
{
  // N.B. preserve the computed size
  ranges::subrange __sub(__rg, __n);
-

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 10:38, Tomasz Kaminski  wrote:
>
>
>
> On Wed, Apr 16, 2025 at 11:17 AM Jonathan Wakely  wrote:
>>
>> On 16/04/25 09:13 +0200, Tomasz Kamiński wrote:
>> >This patch implements formatter specializations for pair and tuple form
>> >P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
>> >ranges are now supported.
>> >
>> >The formatters for pairs and tuples whose corresponding elements are the 
>> >same
>> >(after applying remove_cvref_t) derive from the same __tuple_formatter 
>> >class.
>> >This reduce the code duplication, as most of the parsing and formatting is 
>> >the
>> >same in such cases. We use a custom reduced implementation of the tuple
>> >(__formatters_storage) to store the elements formatters.
>>
>> This is a nice solution, the code for __formatters_storage and
>> __indexed_formatter_storage is clear and concise.
>>
>> >Handling of the padding (width and fill) options, is extracted to
>> >__format::__format_padded function, that is used both by __tuple_formatter 
>> >and
>> >range_formatter. To reduce number of instantations range_formatter::format
>> >triggers, we cast incoming range to __format::__maybe_const_range<_Rg, 
>> >_CharT>&,
>> >before formatting it.
>> >
>> >As in the case of previous commits, the signatures of the user-facing parse
>> >and format methods of the provided formatters deviate from the standard by
>> >constraining types of parameters:
>> >* _CharT is constrained __formatter::__char
>> >* basic_format_parse_context<_CharT> for parse argument
>> >* basic_format_context<_Out, _CharT> for format second argument
>> >The standard specifies last three of above as unconstrained types.
>> >
>> >Finally, test for tuple-like std::array and std::ranges::subrange,
>> >that illustrate that they remain formatted as ranges.
>> >
>> >   PR libstdc++/PR109162
>> >
>> >libstdc++-v3/ChangeLog:
>> >
>> >   * include/std/format (__formatter_int::_M_format_character_escaped)
>> >   (__formatter_str::format): Use __sink.out() to produce _Sink_iter.
>> >   (__format::__format_padded, __format::maybe_const)
>> >   (__format::__indexed_formatter_storage, __format::__tuple_formatter)
>> >   (std::formatter, _CharT>>)
>> >   (std::formatter, _CharT): Define.
>> >   (std::formatter<_Rg, _CharT>::format): Cast incoming range to
>> >   __format::__maybe_const_range<_Rg, _CharT>&.
>> >   (std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
>> >   and use __format_padded.
>> >   (std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
>> >   (std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
>> >   (std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
>> >   __format_padded.
>> >   * testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
>> >   Define.
>> >   * testsuite/std/format/ranges/formatter.cc: Tests for `m` specifier.
>> >   * testsuite/std/format/ranges/sequence.cc: Tests for array and 
>> > subrange.
>> >   * testsuite/std/format/ranges/map.cc: New test.
>> >   * testsuite/std/format/tuple.cc: New test.
>> >---
>> >Testing on x86_64-linux, tests matched by `*format*` passes.
>> >OK for trunk? Should I wait for 16?
>>
>> A few minor comments below ...
>>
>> > libstdc++-v3/include/std/format   | 357 +++---
>> > .../testsuite/std/format/ranges/formatter.cc  |   6 +-
>> > .../testsuite/std/format/ranges/map.cc| 209 ++
>> > .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
>> > libstdc++-v3/testsuite/std/format/tuple.cc| 259 +
>> > .../testsuite/util/testsuite_iterators.h  |   3 +
>> > 6 files changed, 806 insertions(+), 80 deletions(-)
>> > create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
>> > create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc
>> >
>> >diff --git a/libstdc++-v3/include/std/format 
>> >b/libstdc++-v3/include/std/format
>> >index 096dda4f989..5b93eb8bc2d 100644
>> >--- a/libstdc++-v3/include/std/format
>> >+++ b/libstdc++-v3/include/std/format
>> >@@ -1350,8 +1350,7 @@ namespace __format
>> >   __fc, _M_spec);
>> >
>> > __format::_Str_sink<_CharT> __sink;
>> >-__format::_Sink_iter<_CharT> __out(__sink);
>> >-__format::__write_escaped(__out, __s, __term);
>> >+__format::__write_escaped(__sink.out(), __s, __term);
>> > basic_string_view<_CharT> __escaped(__sink.view().data(),
>> > __sink.view().size());
>> > const size_t __escaped_width = _S_trunc(__escaped, __prec);
>> >@@ -1387,13 +1386,13 @@ namespace __format
>> >   {
>> > ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
>> > ranges::subrange __sub(__first, __first + __n);
>> >-return format(_String(from_range, __sub), __fc);
>> >+return forma

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kaminski

On Wed, Apr 16, 2025 at 11:13 AM Jonathan Wakely  wrote:

> On Wed, 16 Apr 2025 at 09:55, Tomasz Kaminski  wrote:
> >
> >
> >
> > On Wed, Apr 16, 2025 at 10:47 AM Jonathan Wakely 
> wrote:
> >>
> >> On 16/04/25 10:37 +0200, Tomasz Kaminski wrote:
> >> >Just to clarify, we still will be missing the formatter for adaptors
> >> >(stack, queue, piority_queue).
> >>
> >> Yes, but I doubt most people want to use those :-)
> >
> > We are good as long as vector::reference is formattable ;)
>
> Surely the most important type in the library ;)
>
Interestingly this  formatter is not used when formatting vector,
because we use vector const& argument, and const_iterator value type
is bool.

Re: [PATCH] libstdc++: Do not use 'not' alternative token in

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 12:18, Jonathan Wakely  wrote:
>
> This fixes:
> FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++23 (test for 
> excess errors)
> FAIL: 17_intro/headers/c++1998/operator_names.cc  -std=gnu++26 (test for 
> excess errors)
>
> The purpose of 'not defined>' is to be ill-formed (as
> required by [format.range.fmtkind]) and to give an error that includes
> the string "not defined>". That was intended to tell you
> that format_kind is not defined, just like it says!
>
> But user code can use -fno-operator-names so we can't use 'not' here,
> and "! defined" in the diagnostic doesn't seem as user-friendly. It also
> raises questions about whether it was intended to be the preprocessor
> token 'defined' (it's not) or where 'defined' is defined (it's not).
>
> Replace it with __no_primary_template> and a comment,
> which seems almost as good. The diagnostic now looks like:
>
> In file included from fmt.cc:1:
> .../include/c++/15.0.1/format: In instantiation of 'constexpr const auto 
> std::format_kind':
> fmt.cc:3:15:   required from here
> 3 | auto i = std::format_kind;
>   |   ^~~~
> .../include/c++/15.0.1/format:5164:31: error: use of 'std::format_kind' 
> before deduction of 'auto'
>  5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
> specialization
>   |   ^~~~
> .../include/c++/15.0.1/format:5164:30: error: '__no_primary_template' was not 
> declared in this scope
>  5164 |   = __no_primary_template(format_kind<_Rg>); // must define 
> specialization
>   | ~^~

Maybe "must define specialization" isn't really ideal, because the
problem might be that users are trying to use format_kind
when they should use format_kind, and telling them to define a
specialization for const R is wrong. They should just stop using const
R there. Similarly, if they try to use it for a type which is not an
input_range (like int in the error above) then we don't want to
encourage them to specialize the template for int, they should stop
their nonsense instead.

So maybe this instead?

.../include/c++/15.0.1/format:5164:50: error:
'__only_defined_for_non_const_input_ranges' was not declared
in this scope
5164 |   = __only_defined_for_non_const_input_ranges(format_kind<_Rg>);
 | ~^~






>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (format_kind): Do not use 'not'
> alternative token to make the primary template ill-formed.
> Use __no_primary_template as the undefined identifier that will
> appear in diagnostics.
> ---
>
> Testing now on x86_64-linux.
>
>  libstdc++-v3/include/std/format | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index b1455977c65..9ce9b3cfed1 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -5160,7 +5160,8 @@ namespace __format
>
>/// @cond undocumented
>template
> -constexpr auto format_kind = not defined(format_kind<_Rg>);
> +constexpr auto format_kind
> +  = __no_primary_template(format_kind<_Rg>); // must define 
> specialization
>
>template
>  consteval range_format
> --
> 2.49.0
>

[PATCH] libstdc++: Adjust comment in

2025-04-16 Thread Jonathan Wakely

We don't need to mention ranges::out_value_result in this comment,
because  doesn't care about that name.

libstdc++-v3/ChangeLog:

* include/std/numeric: Only mention ranges::iota in comment.
---

Pushed to gcc-14 as obvious.

 libstdc++-v3/include/std/numeric | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric
index ab6ae22609b..def9a39b05c 100644
--- a/libstdc++-v3/include/std/numeric
+++ b/libstdc++-v3/include/std/numeric
@@ -90,7 +90,7 @@
 #include 
 
 #if __glibcxx_ranges_iota >= 202202L // C++ >= 23
-# include  // for ranges::out_value_result, 
ranges::iota
+# include  // for ranges::iota
 #endif
 
 #ifdef __glibcxx_saturation_arithmetic // C++ >= 26
-- 
2.49.0

Re: [PATCH v2] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Jonathan Wakely


On 16/04/25 11:45 +0200, Tomasz Kamiński wrote:

This patch implements formatter specializations for pair and tuple form
P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
ranges are now supported.

The formatters for pairs and tuples whose corresponding elements are the same
(after applying remove_cvref_t) derive from the same __tuple_formatter class.
This reduce the code duplication, as most of the parsing and formatting is the
same in such cases. We use a custom reduced implementation of the tuple
(__formatters_storage) to store the elements formatters.

Handling of the padding (width and fill) options, is extracted to
__format::__format_padded function, that is used both by __tuple_formatter and
range_formatter. To reduce number of instantations range_formatter::format
triggers, we cast incoming range to __format::__maybe_const_range<_Rg, _CharT>&,
before formatting it.

As in the case of previous commits, the signatures of the user-facing parse
and format methods of the provided formatters deviate from the standard by
constraining types of parameters:
* _CharT is constrained __formatter::__char
* basic_format_parse_context<_CharT> for parse argument
* basic_format_context<_Out, _CharT> for format second argument
The standard specifies last three of above as unconstrained types.

Finally, test for tuple-like std::array and std::ranges::subrange,
that illustrate that they remain formatted as ranges.

PR libstdc++/PR109162

libstdc++-v3/ChangeLog:

* include/std/format (__formatter_int::_M_format_character_escaped)
(__formatter_str::format): Use __sink.out() to produce _Sink_iter.
(__format::__const_formattable_range): Moved closer to range_formatter.
(__format::__maybe_const_range): Use `__conditional_t` and moved closer
to range_formatter.
(__format::__format_padded, __format::maybe_const)
(__format::__indexed_formatter_storage, __format::__tuple_formatter)
(std::formatter, _CharT>>)
(std::formatter, _CharT): Define.
(std::formatter<_Rg, _CharT>::format): Cast incoming range to
__format::__maybe_const_range<_Rg, _CharT>&.
(std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
and use __format_padded.
(std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
(std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
(std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
__format_padded.
* testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
Define.
* testsuite/std/format/ranges/formatter.cc: Tests for `m` specifier.
* testsuite/std/format/ranges/sequence.cc: Tests for array and subrange.
* testsuite/std/format/ranges/map.cc: New test.
* testsuite/std/format/tuple.cc: New test.
---
I doged the which is safe, static_cast or const_cast discussion by
calling:
+ using __maybe_const_range
+   = __format::__maybe_const_range<_Rg, _CharT>;
+ return _M_format<__maybe_const_range>(__rg, __fc);


OK.


I have also applied the `__conditional_t` to __maybe_const_range,
and moved this two helpers closer to range_formatter.


Thanks.

And addressof is qualified, and trigger is renamed to _M_trigger.


OK for trunk?


OK, thanks.

Re: [PATCH] testsuite: force AMDGCN test for vect-early-break_18.c to consistent architecture [PR119286]

2025-04-16 Thread Richard Biener

On Wed, 16 Apr 2025, Tamar Christina wrote:

> Hi All,
> 
> The given test is intended to test vectorization of a strided access done by
> having a step of > 1.
> 
> GCN target doesn't support load lanes, so the testcase is expected to fail,
> other targets create a permuted load here which we then then reject.
> 
> However some GCN arch don't seem to support the permuted loads either, so the
> vectorizer tries a gather/scatter.  But the indices aren't supported by some
> target, so instead the vectorizer scalarizes the loads.
> 
> I can't really test for which architecture is being used by the compiler, so
> instead this updates the testcase to use one single architecture so we get a
> consistent result.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Cross checked the failing case on amdgcn-amdhsa
> and all pass now.
> 
> Ok for master?

OK.

> Thanks,
> Tamar
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/119286
>   * gcc.dg/vect/vect-early-break_18.c: Force -march=gfx908 for amdgcn.
> 
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
> index 
> edddb44bad66aa419d097f69ca850e5eaa66e014..cd397049c84c47cbd3e9facb87419de58ba5b148
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
> @@ -2,7 +2,7 @@
>  /* { dg-do compile } */
>  /* { dg-require-effective-target vect_early_break } */
>  /* { dg-require-effective-target vect_int } */
> -
> +/* { dg-additional-options "-march=gfx908" { target amdgcn*-*-* } } */
>  /* { dg-additional-options "-Ofast" } */
>  
>  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target 
> vect_load_lanes } } } */
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH]middle-end: fix masking for partial vectors and early break [PR119351]

2025-04-16 Thread Richard Biener

On Wed, 16 Apr 2025, Tamar Christina wrote:

> Hi All,
> 
> The following testcase shows an incorrect masked codegen:
> 
> #define N 512
> #define START 1
> #define END 505
>  
> int x[N] __attribute__((aligned(32)));
> 
> int __attribute__((noipa))
> foo (void)
> {
>   int z = 0;
>   for (unsigned int i = START; i < END; ++i)
> {
>   z++;
>   if (x[i] > 0)
> continue;
> 
>   return z;
> }
>   return -1;
> }
> 
> notice how there's a continue there instead of a break.  This means we 
> generate
> a control flow where success stays within the loop iteration:

I think that's just a fancy way of rotating the loop.

So the key issue is that when the exit test is "inverted", aka it
stays in the loop when true and exits when false, we do

  if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
 exit_true_edge->dest))
{
  new_code = EQ_EXPR;
  reduc_optab = and_optab;
  reduc_op = BIT_AND_EXPR;
  cst = build_minus_one_cst (vectype);

and with PFA with mask and the initial loop mask of { 0, 0, -1, -1 }
we then exit early and the scalar loop does not correctly handle
this case (apart from it being a missed optimization).  For the
regular non-inverted case we use

  auto new_code = NE_EXPR;
  auto reduc_optab = ior_optab;
  auto reduc_op = BIT_IOR_EXPR;
  tree cst = build_zero_cst (vectype);

and that is fine.

> 
>   mask_patt_9.12_46 = vect__1.11_45 > { 0, 0, 0, 0 };
>   vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
>   if (vec_mask_and_47 == { -1, -1, -1, -1 })
> goto ; [41.48%]
>   else
> goto ; [58.52%]
> 
> However when loop_mask_41 is a partial mask this comparison can lead to an
> incorrect match.  In this case the mask is:
> 
>   # loop_mask_41 = PHI 
> 
> due to peeling for alignment with masking and compiling with
> -msve-vector-bits=128.
> 
> At codegen time we generate:
> 
>   ptrue   p15.s, vl4
>   ptrue   p7.b, vl1
>   not p7.b, p15/z, p7.b
> .L5:
>   ld1wz29.s, p7/z, [x1, x0, lsl 2]
>   cmpgt   p7.s, p7/z, z29.s, #0
>   not p7.b, p15/z, p7.b
>   ptest   p15, p7.b
>   b.none  .L2
>   ..
> 
> notice how at expand time the basic blocks are inverted and a not is 
> generated.
> But the generated not is unmasked (or predicated over an ALL true mask in this
> case).  This has the unintended side-effect of flipping the results of the
> inactive lanes (which were zero'd by the cmpgt) into -1.  Which then 
> incorrectly
> causes us to not take the branch to .L2.
> 
> This is happening because the expander has no context about the mask, and 
> since
> we can't mask a gcond, we do the next best thing which is to mask both 
> operands.

So you make this sound as if it were a bug in the expander because
"it doesn't know"?  I think a compare against {-1,...} is flawed,
this case needs to compare against loop_mask, not all-ones, no?

So instead of

>   vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
>   if (vec_mask_and_47 == { -1, -1, -1, -1 })

do

>   vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
>   if (vec_mask_and_47 == loop_mask_41)

which is sort-of what you do, of course, just in an odd way (IMO).

Richard.

> We already mask the compare, but this patch now also masks the constant.  In 
> the
> normal case this means we drop it since {0, ..} & mask = {0, ..} but in the 
> case
> of an forall comparison we'll keep the mask, allowing the generated code to
> correctly mask the results.
> 
> For the above we now generate:
> 
> .L5:
> ld1wz28.s, p7/z, [x1, x0, lsl 2]
> cmpgt   p14.s, p7/z, z28.s, #0
> eorsp7.b, p15/z, p7.b, p14.b
> b.none  .L2
> 
> This fixes gromacs with > 1 OpenMP threads and improves performance.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Ok for master? and backport to GCC-14?
> 
> Thanks,
> Tamar
> 
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/119351
>   * tree-vect-stmts.cc (vectorizable_early_exit): Mask both operands of
>   the gcond for partial masking support.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/119351
>   * gcc.target/aarch64/sve/pr119351.c: New test.
>   * gcc.target/aarch64/sve/pr119351_run.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
> new file mode 100644
> index 
> ..198f7edb0fc01bfc74ae231db7823e9a6f0bc119
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
> @@ -0,0 +1,38 @@
> +/* Fix for PR119351 alignment peeling with vectors and VLS.  */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -msve-vector-bits=256 --param 
> aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
> +/* { dg-final { check-function-bodies "**" "" ""} } */
> +
> +#define N 512
> +#define START 1
> +#d

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kaminski

On Wed, Apr 16, 2025 at 10:47 AM Jonathan Wakely  wrote:

> On 16/04/25 10:37 +0200, Tomasz Kaminski wrote:
> >Just to clarify, we still will be missing the formatter for adaptors
> >(stack, queue, piority_queue).
>
> Yes, but I doubt most people want to use those :-)
>
We are good as long as vector::reference is formattable ;)

Re: [PATCH] testsuite: force AMDGCN test for vect-early-break_18.c to consistent architecture [PR119286]

2025-04-16 Thread Andrew Stubbs


On 16/04/2025 08:57, Tamar Christina wrote:

Hi All,

The given test is intended to test vectorization of a strided access done by
having a step of > 1.

GCN target doesn't support load lanes, so the testcase is expected to fail,
other targets create a permuted load here which we then then reject.

However some GCN arch don't seem to support the permuted loads either, so the
vectorizer tries a gather/scatter.  But the indices aren't supported by some
target, so instead the vectorizer scalarizes the loads.

I can't really test for which architecture is being used by the compiler, so
instead this updates the testcase to use one single architecture so we get a
consistent result.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Cross checked the failing case on amdgcn-amdhsa
and all pass now.

Ok for master?


This is OK.

Andrew


Thanks,
Tamar

gcc/testsuite/ChangeLog:

PR target/119286
* gcc.dg/vect/vect-early-break_18.c: Force -march=gfx908 for amdgcn.

---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
index 
edddb44bad66aa419d097f69ca850e5eaa66e014..cd397049c84c47cbd3e9facb87419de58ba5b148
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
@@ -2,7 +2,7 @@
  /* { dg-do compile } */
  /* { dg-require-effective-target vect_early_break } */
  /* { dg-require-effective-target vect_int } */
-
+/* { dg-additional-options "-march=gfx908" { target amdgcn*-*-* } } */
  /* { dg-additional-options "-Ofast" } */
  
  /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_load_lanes } } } */

Re: [PATCH] Locality cloning pass

2025-04-16 Thread xionghuluo

Hi, the bootstrap-lto-locality is much longer compared to boostrap-lto 
and bootstrap, and


It seems that stage2 and stage3 only produced 5 partitions in LTO, is 
this reasonable...


Also could you please inform how much is the exact performance gain, please?


make bootstrap:   27m56.054s
make BUILD_CONFIG=bootstrap-lto:     38m25.048s
make BUILD_CONFIG=bootstrap-lto-locality:    71m1.882s


On 2025/4/15 22:38, Kyrylo Tkachov wrote:



On 15 Apr 2025, at 15:42, Richard Biener  wrote:

On Mon, Apr 14, 2025 at 3:11 PM Kyrylo Tkachov  wrote:

Hi Honza,


On 13 Apr 2025, at 23:19, Jan Hubicka  wrote:


+@opindex fipa-reorder-for-locality
+@item -fipa-reorder-for-locality
+Group call chains close together in the binary layout to improve code code
+locality.  This option is incompatible with an explicit
+@option{-flto-partition=} option since it enforces a custom partitioning
+scheme.

Please also cross-link this with -fprofile-reorder-functions and
-freorder-functions, which does similar thing.
If you see how to clean-up the description of the other two so user is
not confused.

Perhaps say that -freorder-functions only partitions functions into
never-executed/cold/normal/hot and -fprofile-reroder-functions is aiming
for program startup optimization (it reorders by measured first time the
function is executed.  By accident it seems to kind of work for
locality.

Yeah, the option names are quite similar aren't they?
I’ve attempted to disambiguate them a bit in their description.
I’m attaching a diff from the previous version (as the full updated patch) to 
make it easier to see what’s adjusted.



+
+/* Helper function of to accumulate call counts.  */
+static bool
+accumulate_profile_counts_after_cloning (cgraph_node *node, void *data)
+{
+  struct profile_stats *stats = (struct profile_stats *) data;
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+{
+  if (e->caller == stats->target)
+ {
+  if (stats->rec_count.compatible_p (e->count.ipa ()))
+stats->rec_count += e->count.ipa ();
+ }
+  else
+ {
+  if (stats->nonrec_count.compatible_p (e->count.ipa ()))
+stats->nonrec_count += e->count.ipa ();
+ }

In case part of profile is missing (which may happen if one unit has -O0
or so) , we may have counts to be uninitialized. Uninitialized counts are
compatible with everything, but any arithmetics with it will produce
uninitialized result which will likely confuse code later.  So I would
skip edges with uninitialized counts.

On the other hand ipa counts are always compatible, so compatible_p
should be redundat. Main reaosn for existence of compatible_p is that we
can have local profiles that are 0 or unknown at IPA level.  The ipa ()
conversion turns all counts into IPA counts and those are compatible
with each other.

I suppose compatibe_p test is there since the code ICEd in past,but I
think it was because of missing ipa() conversion.



+}
+  return false;
+}
+
+/* NEW_NODE is a previously created clone of ORIG_NODE already present in
+   current partition.  EDGES contains newly redirected edges to NEW_NODE.
+   Adjust profile information for both nodes and the edge.  */
+
+static void
+adjust_profile_info_for_non_self_rec_edges (auto_vec &edges,
+cgraph_node *new_node,
+cgraph_node *orig_node)
+{
+  profile_count orig_node_count = orig_node->count.ipa ();
+  profile_count edge_count = profile_count::zero ();
+  profile_count final_new_count = profile_count::zero ();
+  profile_count final_orig_count = profile_count::zero ();
+
+  for (unsigned i = 0; i < edges.length (); ++i)
+edge_count += edges[i]->count.ipa ();

Here I would again skip uninitialized.  It is probably legal for -O0
function to end up in partition.

+
+  final_orig_count = orig_node_count - edge_count;
+
+  /* NEW_NODE->count was adjusted for other callers when the clone was
+ first created.  Just add the new edge count.  */
+  if (new_node->count.compatible_p (edge_count))
+final_new_count = new_node->count + edge_count;

And here compatible_p should be unnecesary.

+/* Accumulate frequency of all edges from EDGE->caller to EDGE->callee.  */
+
+static sreal
+accumulate_incoming_edge_frequency (cgraph_edge *edge)
+{
+  sreal count = 0;
+  struct cgraph_edge *e;
+  for (e = edge->callee->callers; e; e = e->next_caller)
+{
+  /* Make a local decision about all edges for EDGE->caller but not the
+ other nodes already in the partition.  Their edges will be visited
+ later or may have been visited before and not fit the
+ cut-off criteria.  */
+  if (e->caller == edge->caller)
+ {
+  profile_count caller_count = e->caller->inlined_to
+ ? e->caller->inlined_to->count
+ : e->caller->count;
+  if (e->count.compatible_p (caller_count))

Here again compatiblity check should not be necessary, since the counts
belong to one function body (after inlining) and should be compatible.
inliner calls e->sreal_frequency all the time withotu further chec

Re: [PATCH v2] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 11:10, Jonathan Wakely  wrote:
>
> On 16/04/25 11:45 +0200, Tomasz Kamiński wrote:
> >This patch implements formatter specializations for pair and tuple form
> >P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
> >ranges are now supported.
> >
> >The formatters for pairs and tuples whose corresponding elements are the same
> >(after applying remove_cvref_t) derive from the same __tuple_formatter class.
> >This reduce the code duplication, as most of the parsing and formatting is 
> >the
> >same in such cases. We use a custom reduced implementation of the tuple
> >(__formatters_storage) to store the elements formatters.
> >
> >Handling of the padding (width and fill) options, is extracted to
> >__format::__format_padded function, that is used both by __tuple_formatter 
> >and
> >range_formatter. To reduce number of instantations range_formatter::format
> >triggers, we cast incoming range to __format::__maybe_const_range<_Rg, 
> >_CharT>&,
> >before formatting it.
> >
> >As in the case of previous commits, the signatures of the user-facing parse
> >and format methods of the provided formatters deviate from the standard by
> >constraining types of parameters:
> >* _CharT is constrained __formatter::__char
> >* basic_format_parse_context<_CharT> for parse argument
> >* basic_format_context<_Out, _CharT> for format second argument
> >The standard specifies last three of above as unconstrained types.
> >
> >Finally, test for tuple-like std::array and std::ranges::subrange,
> >that illustrate that they remain formatted as ranges.
> >
> >   PR libstdc++/PR109162
> >
> >libstdc++-v3/ChangeLog:
> >
> >   * include/std/format (__formatter_int::_M_format_character_escaped)
> >   (__formatter_str::format): Use __sink.out() to produce _Sink_iter.
> >   (__format::__const_formattable_range): Moved closer to 
> > range_formatter.
> >   (__format::__maybe_const_range): Use `__conditional_t` and moved 
> > closer
> >   to range_formatter.
> >   (__format::__format_padded, __format::maybe_const)
> >   (__format::__indexed_formatter_storage, __format::__tuple_formatter)
> >   (std::formatter, _CharT>>)
> >   (std::formatter, _CharT): Define.
> >   (std::formatter<_Rg, _CharT>::format): Cast incoming range to
> >   __format::__maybe_const_range<_Rg, _CharT>&.
> >   (std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
> >   and use __format_padded.
> >   (std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
> >   (std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
> >   (std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
> >   __format_padded.
> >   * testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
> >   Define.
> >   * testsuite/std/format/ranges/formatter.cc: Tests for `m` specifier.
> >   * testsuite/std/format/ranges/sequence.cc: Tests for array and 
> > subrange.
> >   * testsuite/std/format/ranges/map.cc: New test.
> >   * testsuite/std/format/tuple.cc: New test.
> >---
> >I doged the which is safe, static_cast or const_cast discussion by
> >calling:
> >+ using __maybe_const_range
> >+   = __format::__maybe_const_range<_Rg, _CharT>;
> >+ return _M_format<__maybe_const_range>(__rg, __fc);
>
> OK.
>
> >I have also applied the `__conditional_t` to __maybe_const_range,
> >and moved this two helpers closer to range_formatter.
>
> Thanks.
>
> And addressof is qualified, and trigger is renamed to _M_trigger.
>
> >OK for trunk?
>
> OK, thanks.

(With the base class removed from __indexed_formatter_storage of course)

[PUSHED] MAINTAINERS: Add myself to Write After Approval

2025-04-16 Thread Alex

I forgot to do this yesterday, it is now done.
Thank you to everyone who helped me get this far,
Alex
From 0e8b6f0dad11ece6c693e4765f3c58309ff8ef12 Mon Sep 17 00:00:00 2001
From: Waffl3x 
Date: Wed, 16 Apr 2025 07:26:50 -0600
Subject: [PATCH] MAINTAINERS: Add myself to Write After Approval

ChangeLog:

	* MAINTAINERS: Add myself.
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 756227e0a50..6ff4770ed5d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -862,6 +862,7 @@ Ville Voutilainen   ville   
 Tom de Vriesvries   
 Nenad Vukicevic nenadv  
 Dmitry Vyukov   dvyukov 
+Waffl3x waffl3x 
 Jonathan Wakely redi
 Krister Walfridsson kristerw
 Feng Wang   -   
-- 
2.49.0

Put znver5 ADDSS cost back to 3

2025-04-16 Thread Jan Hubicka

Hi,

Znver5 has latency of addss 2 in typical case while all earlier versions has 
latency 3.
Unforunately addss cost is used to cost many other SSE instructions than just 
addss and
setting the cost to 2 makes us to vectorize 4 64bit stores into one 256bit 
store which
in turn regesses imagemagick.

This patch sets the cost back to 3.  Next stage1 we can untie addss from the 
other operations
and set it correctly.

bootstrapped/regtested x86_64-linux and also benchmarked on SPEC2k17

gcc/ChangeLog:

PR target/119298
* config/i386/x86-tune-costs.h (znver5_cost): Set ADDSS cost to 3.

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 7c8cb738d7c..9477345bdd7 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2120,7 +2120,7 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (1),   /* cost of cheap SSE instruction.  */
   /* ADDSS has throughput 2 and latency 2
  (in some cases when source is another addition).  */
-  COSTS_N_INSNS (2),   /* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (3),   /* cost of ADDSS/SD SUBSS/SD insns.  */
   /* MULSS has throughput 2 and latency 3.  */
   COSTS_N_INSNS (3),   /* cost of MULSS instruction.  */
   COSTS_N_INSNS (3),   /* cost of MULSD instruction.  */

[PATCH] cobol: Eliminate gcc/cobol/LICENSE. [PR119759]

2025-04-16 Thread Robert Dubner

I am not well-versed in license and legal issues.  But I see that except
for the GO language, gcc/cobol is almost unique in that there is a LICENSE
file.

This patch gets rid of it.

Okay for trunk?


Subject: [PATCH] cobol: Eliminate gcc/cobol/LICENSE. [PR119759]

gcc/cobol

PR cobol/119759
* LICENSE: Deleted.
---
 gcc/cobol/LICENSE | 29 -
 1 file changed, 29 deletions(-)
 delete mode 100644 gcc/cobol/LICENSE

diff --git a/gcc/cobol/LICENSE b/gcc/cobol/LICENSE
deleted file mode 100644
index aa5ba6024cca..
--- a/gcc/cobol/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-#
-# Copyright (c) 2021-2025 Symas Corporation
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-#   copyright notice, this list of conditions and the following
disclaimer
-#   in the documentation and/or other materials provided with the
-#   distribution.
-# * Neither the name of the Symas Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
2.34.1

Re: RISC-V: adjust testcase for gcc-14 [PR118182]

2025-04-16 Thread Jeff Law





On 4/14/25 10:22 PM, Alexandre Oliva wrote:

On Apr 14, 2025, Jeff Law  wrote:


No strong opinion.  I'd lean towards xfail or twiddling the test since
that's obviously super-save WRT codegen on the gcc-14 release branch.


Twiddling it is, then (pending approval ;-)

The pr118182-2.c testcase backported from gcc-15 depended on the late
combine pass after register allocation to substitute the zero constant
into the pred_broadcast to get to the expected vmv.s.x instruction.
Without that pass, we get a mfmv.s.f instead.  Expect that on gcc-14.

Tested both riscv64-elf and riscv32-elf on x86_64-linux-gnu.
Ok for gcc-14?


for  gcc/testsuite/ChangeLog

PR target/118182
* gcc.target/riscv/rvv/autovec/pr118182-2.c: Adjust.

Yea, this is fine and obviously safe for gcc-14 :-)

jeff

Re: RISC-V: revert pr114194 tests on gcc-14 [PR118601]

2025-04-16 Thread Jeff Law





On 4/14/25 10:24 PM, Alexandre Oliva wrote:


And here's another that came up more recently:

The gcc-14 backport that split the pr114194 testcase for rv32 and rv64
would only generate the expected rv32 sequence if commit
6b315907c0353f71169a7555e653d29a981fef67 had also been backported, but
it wasn't.  Without it, we get the same code as before on both rv32
and rv64, so revert to the original test.

Also tested on riscv64-elf and riscv32-elf on x86_64-linux-gnu.
Ok for gcc-14?


for  gcc/testsuite/ChangeLog

PR target/118601
* gcc.target/riscv/rvv/xtheadvector/pr114194.c: Restore.
* gcc.target/riscv/rvv/xtheadvector/pr114194-rv64.c: Remove.
* gcc.target/riscv/rvv/xtheadvector/pr114194-rv32.c: Likewise.

OK.
jeff

[pushed] c++: templates, attributes, #pragma target [PR114772]

2025-04-16 Thread Jason Merrill

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Since r12-5426 apply_late_template_attributes suppresses various global
state to avoid applying active pragmas to earlier declarations; we also
need to override target_option_current_node.

PR c++/114772
PR c++/101180

gcc/cp/ChangeLog:

* pt.cc (apply_late_template_attributes): Also override
target_option_current_node.

gcc/testsuite/ChangeLog:

* g++.dg/ext/pragma-target2.C: New test.
---
 gcc/cp/pt.cc  |  2 ++
 gcc/testsuite/g++.dg/ext/pragma-target2.C | 18 ++
 2 files changed, 20 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/pragma-target2.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 4349b19119b..51433e7c4ec 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -12429,6 +12429,8 @@ apply_late_template_attributes (tree *decl_p, tree 
attributes, int attr_flags,
   auto o4 = make_temp_override (scope_chain->omp_declare_target_attribute,
NULL);
   auto o5 = make_temp_override (scope_chain->omp_begin_assumes, NULL);
+  auto o6 = make_temp_override (target_option_current_node,
+   target_option_default_node);
 
   cplus_decl_attributes (decl_p, late_attrs, attr_flags);
 
diff --git a/gcc/testsuite/g++.dg/ext/pragma-target2.C 
b/gcc/testsuite/g++.dg/ext/pragma-target2.C
new file mode 100644
index 000..53eb7dd96a2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pragma-target2.C
@@ -0,0 +1,18 @@
+// PR c++/114772
+// { dg-do compile { target x86_64-*-* } }
+
+template
+inline __attribute__((always_inline))
+__attribute__((warn_unused_result))
+int walk_document(V visitor) {return 0;}
+
+template
+void parse_document() {
+int r = walk_document(false);
+}
+
+void stage2_next() {
+parse_document();
+}
+
+#pragma GCC target("pclmul")

base-commit: 6b4569a3ebdd0df44d87d67a18272ec0b878f2ee
-- 
2.49.0

[pushed] c++: format attribute redeclaration [PR116954]

2025-04-16 Thread Jason Merrill

Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Here when merging the two decls, remove_contract_attributes loses
ATTR_IS_DEPENDENT on the format attribute, so apply_late_template_attributes
just returns, so the attribute doesn't get propagated to the type where the
warning looks for it.

Fixed by using copy_node instead of tree_cons to preserve flags.

PR c++/116954

gcc/cp/ChangeLog:

* contracts.cc (remove_contract_attributes): Preserve flags
on the attribute list.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wformat-3.C: New test.
---
 gcc/cp/contracts.cc   |  6 +-
 gcc/testsuite/g++.dg/warn/Wformat-3.C | 19 +++
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wformat-3.C

diff --git a/gcc/cp/contracts.cc b/gcc/cp/contracts.cc
index f2b126c8d6b..3ca2102e866 100644
--- a/gcc/cp/contracts.cc
+++ b/gcc/cp/contracts.cc
@@ -863,7 +863,11 @@ remove_contract_attributes (tree fndecl)
   tree list = NULL_TREE;
   for (tree p = DECL_ATTRIBUTES (fndecl); p; p = TREE_CHAIN (p))
 if (!cxx_contract_attribute_p (p))
-  list = tree_cons (TREE_PURPOSE (p), TREE_VALUE (p), list);
+  {
+   tree nl = copy_node (p);
+   TREE_CHAIN (nl) = list;
+   list = nl;
+  }
   DECL_ATTRIBUTES (fndecl) = nreverse (list);
 }
 
diff --git a/gcc/testsuite/g++.dg/warn/Wformat-3.C 
b/gcc/testsuite/g++.dg/warn/Wformat-3.C
new file mode 100644
index 000..e308530761c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wformat-3.C
@@ -0,0 +1,19 @@
+// PR c++/116954
+// { dg-additional-options -Wformat }
+
+#ifndef WORKS
+template
+int fn(char (&buf)[N], const char fmt[], ...)
+  __attribute__ ((__format__ (__printf__, 2, 3)));
+#endif
+
+template
+__attribute__ ((__format__ (__printf__, 2, 3)))
+int fn(char (&)[N], const char [], ...)
+{ return 0; }
+
+int main()
+{
+  char buf[20];
+  return fn(buf, "%s", 42); /* { dg-warning "Wformat" } */
+}

base-commit: 6b4569a3ebdd0df44d87d67a18272ec0b878f2ee
-- 
2.49.0

Re: [PATCH v5 1/2] i386: Prefer PLT indirection for fentry calls under -fPIC

2025-04-16 Thread Uros Bizjak

On Tue, Apr 15, 2025 at 2:19 PM Ard Biesheuvel  wrote:
>
> On Tue, 15 Apr 2025 at 09:48, Uros Bizjak  wrote:
> >
> > On Thu, Apr 10, 2025 at 2:27 PM Ard Biesheuvel  wrote:
> > >
> > > From: Ard Biesheuvel 
> > >
> > > Commit bde21de1205 ("i386: Honour -mdirect-extern-access when calling
> > > __fentry__") updated the logic that emits mcount() / __fentry__() calls
> > > into function prologues when profiling is enabled, to avoid GOT-based
> > > indirect calls when a direct call would suffice.
> > >
> > > There are two problems with that change:
> > > - it relies on -mdirect-extern-access rather than -fno-plt to decide
> > >   whether or not a direct [PLT based] call is appropriate;
> > > - for the PLT case, it falls through to x86_print_call_or_nop(), which
> > >   does not emit the @PLT suffix, resulting in the wrong relocation to be
> > >   used (R_X86_64_PC32 instead of R_X86_64_PLT32)
> > >
> > > Fix this by testing flag_plt instead of ix86_direct_extern_access, and
> > > updating x86_print_call_or_nop() to take flag_pic and flag_plt into
> > > account. This also ensures that -mnop-mcount works as expected when
> > > emitting the PLT based profiling calls.
> > >
> > > While at it, fix the 32-bit logic as well, and issue a PLT call unless
> > > PLTs are explicitly disabled.
> > >
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119386
> > >
> > > Signed-off-by: Ard Biesheuvel 
> > >
> > > gcc/ChangeLog:
> > >
> > > PR target/119386
> > > * config/i386/i386.cc (x86_print_call_or_nop): Add @PLT suffix
> > > where appropriate.
> > > (x86_function_profiler): Fall through to x86_print_call_or_nop()
> > > for PIC codegen when flag_plt is set.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR target/119386
> > > * gcc.target/i386/pr119386-1.c: New test.
> > > * gcc.target/i386/pr119386-2.c: New test.
> >
> > OK if there are no further comments in the next day or two.
> >
>
> Thanks
>
> > BTW: Do you have commit rights?
> >
>
> No I do not.

Both patches pushed to the mainline, will be backported to gcc-14.

Thanks,
Uros.

Re: [PATCH] Add _GLIBCXX_DEBUG checks on unordered container local_iterator

2025-04-16 Thread Jonathan Wakely

On Wed, 16 Apr 2025 at 20:57, François Dumont  wrote:
>
>  libstdc++: Add _GLIBCXX_DEBUG checks on unordered container
> local_iterator
>
>  Some _GLIBCXX_DEBUG checks in include/debug/safe_local_iterator.h
> were not properly
>  tested.
>
>  Fix several tests not testing the container corresponding to their
> location in the
>  testsuite location.

This mostly looks good, except ...

>  libstdc++-v3/ChangeLog:
>
>  * testsuite/util/debug/unordered_checks.h
>  (invalid_local_iterator_arrow_operator): New test function.
>  (invalid_local_iterator_copy_instantiation): New test function.
>  (invalid_local_iterator_move_instantiation): New test function.

Should these be called invalid_local_iterator_copy_construction and
invalid_local_iterator_move_construction instead of "instantiation"?

And similarly for the filenames of the corresponding tests.


>  (invalid_local_iterator_copy_assignment): New test function.
>  (invalid_local_iterator_move_assignment): New test function.
>  (invalid_local_iterator_const_conversion): New test function.
>  *
> testsuite/23_containers/unordered_map/debug/invalid_local_iterator_arrow_operator_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_map/debug/invalid_local_iterator_const_conversion_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_map/debug/invalid_local_iterator_copy_assignment_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_map/debug/invalid_local_iterator_copy_instantiation_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_map/debug/invalid_local_iterator_move_assignment_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_map/debug/invalid_local_iterator_move_instantiation_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_map/debug/max_load_factor_neg.cc: Test
> unordered_map.
>  *
> testsuite/23_containers/unordered_multimap/debug/begin2_neg.cc: Test
> unordered_multimap.
>  *
> testsuite/23_containers/unordered_multimap/debug/bucket_size_neg.cc:
> Likewise.
>  *
> testsuite/23_containers/unordered_multimap/debug/cbegin_neg.cc: Likewise.
>  *
> testsuite/23_containers/unordered_multimap/debug/cend_neg.cc: Likewise.
>  *
> testsuite/23_containers/unordered_multimap/debug/end1_neg.cc: Likewise.
>  *
> testsuite/23_containers/unordered_multimap/debug/end2_neg.cc: Likewise.
>  *
> testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_arrow_operator_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_const_conversion_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_copy_assignment_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_copy_instantiation_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_move_assignment_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_move_instantiation_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multimap/debug/max_load_factor_neg.cc:
> Test unordered_multimap.
>  *
> testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_arrow_operator_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_const_conversion_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_copy_assignment_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_copy_instantiation_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_move_assignment_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_move_instantiation_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_set/debug/invalid_local_iterator_arrow_operator_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_set/debug/invalid_local_iterator_const_conversion_neg.cc:
>  New test case.
>  *
> testsuite/23_containers/unordered_set/debug/invalid_local_iterator_copy_assignment_neg.cc:
>  New test case.
>  *

Re: [RFC] [C]New syntax for the argument of counted_by attribute for C language

2025-04-16 Thread Bill Wendling

Here is the proposal on the Clang Discourse:

https://discourse.llvm.org/t/rfc-bounds-safety-in-c-syntax-compatibility-with-gcc/85885

-bw

[COMMITTED] Doc: Add pointer to --help use to main entry for -Q option [PR90465]

2025-04-16 Thread Sandra Loosemore

-Q does something completely different in conjunction with --help than it
does otherwise; its main entry in the manual didn't mention that, nor did
-Q have an entry in the index for the --help usage.

gcc/ChangeLog
PR driver/90465
* doc/invoke.texi (Overall Options): Add a @cindex for -Q in
connection with --help=.
(Developer Options): Point at --help= documentation for the
other use of -Q.
---
 gcc/doc/invoke.texi | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0b6644b0315..14a78fd236f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2199,6 +2199,7 @@ those that have already been displayed.  If 
@option{--help} is also
 specified anywhere on the command line then this takes precedence
 over any @option{--help=} option.
 
+@opindex Q
 If the @option{-Q} option appears on the command line before the
 @option{--help=} option, then the descriptive text displayed by
 @option{--help=} is changed.  Instead of describing the displayed
@@ -21311,8 +21312,13 @@ Toggle @option{-fvar-tracking-assignments}, in the 
same way that
 
 @opindex Q
 @item -Q
-Makes the compiler print out each function name as it is compiled, and
-print some statistics about each pass when it finishes.
+When used on the command line prior to @option{--help=}, @option{-Q}
+acts as a modifier to the help output.  @xref{Overall Options},
+for details about @option{--help=}.
+
+Otherwise, this option makes the compiler print out each function name
+as it is compiled, and print some statistics about each pass when it
+finishes.
 
 @opindex ftime-report
 @item -ftime-report
-- 
2.34.1

[PUSHED/12 3/6] vec-lowering: Fix ABSU lowering [PR111285]

2025-04-16 Thread Andrew Pinski

ABSU_EXPR lowering incorrectly used the resulting type
for the new expression but in the case of ABSU the resulting
type is an unsigned type and with ABSU is folded away. The fix
is to use a signed type for the expression instead.

Bootstrapped and tested on x86_64-linux-gnu.

PR middle-end/111285

gcc/ChangeLog:

* tree-vect-generic.cc (do_unop): Use a signed type for the
operand if the operation was ABSU_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/torture/vect-absu-1.C: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit ad0084337e901ddaedd48c14e7a5dad9fc2a093e)
---
 gcc/testsuite/g++.dg/torture/vect-absu-1.C | 29 ++
 gcc/tree-vect-generic.cc   | 10 +++-
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/vect-absu-1.C

diff --git a/gcc/testsuite/g++.dg/torture/vect-absu-1.C 
b/gcc/testsuite/g++.dg/torture/vect-absu-1.C
new file mode 100644
index 000..0b2035f638f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vect-absu-1.C
@@ -0,0 +1,29 @@
+// { dg-do run }
+// PR middle-end/111285
+
+// The lowering of vect absu was done incorrectly
+
+#define vect1 __attribute__((vector_size(sizeof(int
+
+#define negabs(a) a < 0 ? a : -a
+
+__attribute__((noinline))
+int s(int a)
+{
+  return negabs(a);
+}
+__attribute__((noinline))
+vect1 int v(vect1 int a)
+{
+  return negabs(a);
+}
+
+int main(void)
+{
+for(int i = -10; i < 10; i++)
+{
+  vect1 int t = {i};
+  if (v(t)[0] != s(i))
+__builtin_abort();
+}
+}
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index 9a3ca26d414..5a6256a0b70 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -202,7 +202,15 @@ do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree 
a,
 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
 enum tree_code code, tree type ATTRIBUTE_UNUSED)
 {
-  a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
+  tree rhs_type = inner_type;
+
+  /* For ABSU_EXPR, use the signed type for the rhs if the rhs was signed. */
+  if (code == ABSU_EXPR
+  && ANY_INTEGRAL_TYPE_P (TREE_TYPE (a))
+  && !TYPE_UNSIGNED (TREE_TYPE (a)))
+rhs_type = signed_type_for (rhs_type);
+
+  a = tree_vec_extract (gsi, rhs_type, a, bitsize, bitpos);
   return gimplify_build1 (gsi, code, inner_type, a);
 }
 
-- 
2.43.0

[PUSHED/12 5/6] match: Reject non-ssa name/min invariants in gimple_extract [PR116412]

2025-04-16 Thread Andrew Pinski

After the conversion for phiopt's conditional operand
to use maybe_push_res_to_seq, it was found that gimple_extract
will extract out from REALPART_EXPR/IMAGPART_EXPR/VCE and BIT_FIELD_REF,
a memory load. But that extraction was not needed as memory loads are not
simplified in match and simplify. So gimple_extract should return false
in those cases.

Changes since v1:
* Move the rejection to gimple_extract from factor_out_conditional_operation.

GCC13: the function is in gimple-match-head.cc rather than 
gimple-match-exports.cc.

Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/116412

gcc/ChangeLog:

* gimple-match-head.cc (gimple_extract): Return false if op0
was not a SSA name nor a min invariant for 
REALPART_EXPR/IMAGPART_EXPR/VCE
and BIT_FIELD_REF.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr116412-1.c: New test.

Signed-off-by: Andrew Pinski 
(cherry picked from commit c7b76a076cb2c6ded7ae208464019b04cb0531a2)
---
 gcc/gimple-match-head.cc  | 6 ++
 gcc/testsuite/gcc.dg/torture/pr116412-1.c | 6 ++
 2 files changed, 12 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116412-1.c

diff --git a/gcc/gimple-match-head.cc b/gcc/gimple-match-head.cc
index 2fd27fcbacc..451d736ffe4 100644
--- a/gcc/gimple-match-head.cc
+++ b/gcc/gimple-match-head.cc
@@ -943,6 +943,9 @@ gimple_extract (gimple *stmt, gimple_match_op *res_op,
|| code == VIEW_CONVERT_EXPR)
  {
tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
+   /* op0 needs to be a SSA name or an min invariant. */
+   if (TREE_CODE (op0) != SSA_NAME && !is_gimple_min_invariant 
(op0))
+ return false;
res_op->set_op (code, type, valueize_op (op0));
return true;
  }
@@ -950,6 +953,9 @@ gimple_extract (gimple *stmt, gimple_match_op *res_op,
  {
tree rhs1 = gimple_assign_rhs1 (stmt);
tree op0 = valueize_op (TREE_OPERAND (rhs1, 0));
+   /* op0 needs to be a SSA name or an min invariant. */
+   if (TREE_CODE (op0) != SSA_NAME && !is_gimple_min_invariant 
(op0))
+ return false;
res_op->set_op (code, type, op0,
TREE_OPERAND (rhs1, 1),
TREE_OPERAND (rhs1, 2),
diff --git a/gcc/testsuite/gcc.dg/torture/pr116412-1.c 
b/gcc/testsuite/gcc.dg/torture/pr116412-1.c
new file mode 100644
index 000..3bc26ecd8b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116412-1.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+double f(_Complex double a, _Complex double *b, int c)
+{
+  if (c) return __real__ a;
+  return __real__ *b;
+}
-- 
2.43.0

Re: [PATCH] [testsuite] [ppc] compile [PR112822] with -mvsx

2025-04-16 Thread Peter Bergner

On 4/15/25 11:44 PM, Alexandre Oliva wrote:
> On Apr 15, 2025, Peter Bergner  wrote:
>> I have verified the modified test case ICEs with the exact same
>> error as the original test case using the commit immediately
>> before the commit the fixed the ICE.
> 
> Awesome, thanks!  I hereby withdraw the proposed patch, in favor of yours.

Ok, pushed as obvious after testing.  Thanks.

Peter

Re: [PATCH] x86: Update gcc.target/i386/apx-interrupt-1.c

2025-04-16 Thread H.J. Lu

On Tue, Apr 15, 2025 at 12:19 PM Uros Bizjak  wrote:
>
> On Tue, Apr 15, 2025 at 2:23 PM H.J. Lu  wrote:
> >
> > On Tue, Apr 15, 2025 at 12:45 AM Uros Bizjak  wrote:
> > >
> > > On Tue, Apr 15, 2025 at 1:06 AM H.J. Lu  wrote:
> > > >
> > > > ix86_add_cfa_restore_note omits the REG_CFA_RESTORE REG note for 
> > > > registers
> > > > pushed in red-zone.  Since
> > > >
> > > > commit 0a074b8c7e79f9d9359d044f1499b0a9ce9d2801
> > > > Author: H.J. Lu 
> > > > Date:   Sun Apr 13 12:20:42 2025 -0700
> > > >
> > > > APX: Don't use red-zone with 32 GPRs and no caller-saved registers
> > > >
> > > > disabled red-zone, update gcc.target/i386/apx-interrupt-1.c to expect
> > > > 31 .cfi_restore directives.
> > >
> > > Hm, did you also account for RED_ZONE_RESERVE? The last 8-byte slot is
> > > reserved for internal use by the compiler.
> >
> > There is no red-zone in this case.
> >
> > > Uros.
> > >
> > > >
> > > > PR target/119784
> > > > * gcc.target/i386/apx-interrupt-1.c: Expect 31 .cfi_restore
> > > > directives.
>
> OK.

I am backporting this to GCC 14 together with the other PR target/119784 fix.

> Thanks,
> Uros.
>
> > > >
> > > > Signed-off-by: H.J. Lu 
> > > > ---
> > > >  gcc/testsuite/gcc.target/i386/apx-interrupt-1.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c 
> > > > b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c
> > > > index fefe2e6d6fc..fa1acc7a142 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c
> > > > @@ -66,7 +66,7 @@ void foo (void *frame)
> > > >  /* { dg-final { scan-assembler-times {\t\.cfi_offset 132, -120} 1 } } 
> > > > */
> > > >  /* { dg-final { scan-assembler-times {\t\.cfi_offset 131, -128} 1 } } 
> > > > */
> > > >  /* { dg-final { scan-assembler-times {\t\.cfi_offset 130, -136} 1 } } 
> > > > */
> > > > -/* { dg-final { scan-assembler-times ".cfi_restore" 15} } */
> > > > +/* { dg-final { scan-assembler-times ".cfi_restore" 31 } } */
> > > >  /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)ax" 1 
> > > > } } */
> > > >  /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)bx" 1 
> > > > } } */
> > > >  /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)cx" 1 
> > > > } } */
> > > > --
> > > > 2.49.0
> > > >
> >
> >
> >
> > --
> > H.J.



-- 
H.J.

[PATCH] c: Allow $@` in GNU23/GNU2Y raw string delimiters [PR110343]

2025-04-16 Thread Jakub Jelinek

Hi!

Aaron mentioned in the PR that late in C23 N3124 was adopted and
$@` are now part of basic character set.  The paper has been implemented
in GCC from what I can see, but we should allow for GNU23/2Y $@` in
raw string delimiters as well, like they are allowed for C++26, because
the delimiters can contain anything from basic character set but space,
()\, tab, form-feed, newline and backspace.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-04-16  Jakub Jelinek  

PR c++/110343
* lex.cc (lex_raw_string): For C allow $@` in raw string delimiters
if CPP_OPTION (pfile, low_ucns) i.e. for C23 and later.

* gcc.dg/raw-string-1.c: New test.

--- libcpp/lex.cc.jj2025-04-08 14:09:47.173503355 +0200
+++ libcpp/lex.cc   2025-04-16 17:18:04.556931275 +0200
@@ -2711,8 +2711,9 @@ lex_raw_string (cpp_reader *pfile, cpp_t
   || c == '!' || c == '=' || c == ','
   || c == '"' || c == '\''
   || ((c == '$' || c == '@' || c == '`')
-  && CPP_OPTION (pfile, cplusplus)
-  && CPP_OPTION (pfile, lang) > CLK_CXX23)))
+  && (CPP_OPTION (pfile, cplusplus)
+  ? CPP_OPTION (pfile, lang) > CLK_CXX23
+  : CPP_OPTION (pfile, low_ucns)
prefix[prefix_len++] = c;
  else
{
--- gcc/testsuite/gcc.dg/raw-string-1.c.jj  2025-04-16 17:32:20.595541753 
+0200
+++ gcc/testsuite/gcc.dg/raw-string-1.c 2025-04-16 17:35:09.266302136 +0200
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu23" } */
+
+const void *s0 = R"0123456789abcdefg()0123456789abcdefg" 0;
+   /* { dg-error "raw string delimiter longer" "longer" { target *-*-* } 
.-1 } */
+   /* { dg-error "stray" "stray" { target *-*-* } .-2 } */
+const void *s1 = R" () " 0;
+   /* { dg-error "invalid character" "invalid" { target *-*-* } .-1 } */
+   /* { dg-error "stray" "stray" { target *-*-* } .-2 } */
+const void *s2 = R"()  " 0;
+   /* { dg-error "invalid character" "invalid" { target *-*-* } .-1 } */
+   /* { dg-error "stray" "stray" { target *-*-* } .-2 } */
+const void *s3 = R")())" 0;
+   /* { dg-error "invalid character" "invalid" { target *-*-* } .-1 } */
+   /* { dg-error "stray" "stray" { target *-*-* } .-2 } */
+const char *s4 = R"@()@";
+const char *s5 = R"$()$";
+const char *s6 = R"`()`";
+const void *s7 = R"\u0040()\u0040" 0;
+   /* { dg-error "invalid character" "invalid" { target *-*-* } .-1 } */
+   /* { dg-error "stray" "stray" { target *-*-* } .-2 } */
+const char *s8 = R"`@$$@`@`$()`@$$@`@`$";
+
+int main () {}

Jakub

[committed, gcc-13] libstdc++: Add dg-options "-std=gnu++20" to backported tests

2025-04-16 Thread Jonathan Wakely

These tests were backported from gcc-14 where the testsuite
automatically adds -std=gnu++20 as needed. That doesn't happen on the
older release branches, so an explicit dg-options directive is needed to
ensure the tests are run by default. Otherwise they'll only be run when
somebody uses a custom --target_board that includes -std=gnu++20.

For 23_containers/span/nodiscard.cc we need to remove the dg-warning
lines that only match for -std=gnu++23, because with the explicit
dg-options to set -std=gnu++20 we never actually test it with C++23
mode.

For 29_atomics/headers/stdatomic.h/115807.cc we need to compile with
-std=gnu++23 instead.

libstdc++-v3/ChangeLog:

* testsuite/20_util/integer_sequence/112473.cc: Compile with
-std=gnu++20.
* testsuite/21_strings/char_traits/requirements/113200.cc:
Likewise.
* testsuite/23_containers/array/comparison_operators/106212.cc:
Likewise.
* testsuite/23_containers/array/creation/115522.cc: Likewise.
* testsuite/23_containers/span/117966.cc: Likewise.
* testsuite/23_containers/span/nodiscard.cc: Likewise. Remove
dg-warning directives for et c++23.
* testsuite/23_containers/vector/cons/113841.cc: Compile with
-std=gnu++20.
* testsuite/24_iterators/common_iterator/101527.cc: Likewise.
* testsuite/24_iterators/counted_iterator/101527.cc: Likewise.
* testsuite/24_iterators/move_iterator/lwg3736.cc: Likewise.
* testsuite/25_algorithms/lexicographical_compare_three_way/113960.cc:
Likewise.
* testsuite/27_io/filesystem/iterators/lwg3480.cc: Likewise.
* testsuite/29_atomics/headers/stdatomic.h/115807.cc: Compile
with -std=gnu++23.
* testsuite/experimental/scopeguard/114152.cc: Compile with
-std=gnu++20.
* testsuite/std/format/arguments/112607.cc: Likewise.
* testsuite/std/format/arguments/args_neg.cc: Likewise.
* testsuite/std/format/context.cc: Likewise.
* testsuite/std/format/formatter/112832.cc: Likewise.
* testsuite/std/format/formatter/basic.cc: Likewise.
* testsuite/std/ranges/subrange/111948.cc: Likewise.
* testsuite/std/ranges/subrange/lwg3589.cc: Likewise.
* testsuite/std/time/format/pr117085.cc: Likewise.
* testsuite/std/time/month/2.cc: Likewise.
* testsuite/std/time/time_zone/sys_info_abbrev.cc: Likewise.
* testsuite/std/time/tzdb/links.cc: Likewise.
* testsuite/std/time/weekday/2.cc: Likewise.
---

Tested x86_64-linux. Pushed to gcc-13.

 libstdc++-v3/testsuite/20_util/integer_sequence/112473.cc | 1 +
 .../21_strings/char_traits/requirements/113200.cc | 1 +
 .../23_containers/array/comparison_operators/106212.cc| 2 +-
 .../testsuite/23_containers/array/creation/115522.cc  | 1 +
 libstdc++-v3/testsuite/23_containers/span/117966.cc   | 2 +-
 libstdc++-v3/testsuite/23_containers/span/nodiscard.cc| 8 +---
 .../testsuite/23_containers/vector/cons/113841.cc | 1 +
 .../testsuite/24_iterators/common_iterator/101527.cc  | 1 +
 .../testsuite/24_iterators/counted_iterator/101527.cc | 1 +
 .../testsuite/24_iterators/move_iterator/lwg3736.cc   | 1 +
 .../lexicographical_compare_three_way/113960.cc   | 1 +
 .../testsuite/27_io/filesystem/iterators/lwg3480.cc   | 1 +
 .../testsuite/29_atomics/headers/stdatomic.h/115807.cc| 1 +
 libstdc++-v3/testsuite/experimental/scopeguard/114152.cc  | 1 +
 libstdc++-v3/testsuite/std/format/arguments/112607.cc | 1 +
 libstdc++-v3/testsuite/std/format/arguments/args_neg.cc   | 1 +
 libstdc++-v3/testsuite/std/format/context.cc  | 1 +
 libstdc++-v3/testsuite/std/format/formatter/112832.cc | 1 +
 libstdc++-v3/testsuite/std/format/formatter/basic.cc  | 1 +
 libstdc++-v3/testsuite/std/ranges/subrange/111948.cc  | 1 +
 libstdc++-v3/testsuite/std/ranges/subrange/lwg3589.cc | 1 +
 libstdc++-v3/testsuite/std/time/format/pr117085.cc| 1 +
 libstdc++-v3/testsuite/std/time/month/2.cc| 1 +
 .../testsuite/std/time/time_zone/sys_info_abbrev.cc   | 1 +
 libstdc++-v3/testsuite/std/time/tzdb/links.cc | 1 +
 libstdc++-v3/testsuite/std/time/weekday/2.cc  | 1 +
 26 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/libstdc++-v3/testsuite/20_util/integer_sequence/112473.cc 
b/libstdc++-v3/testsuite/20_util/integer_sequence/112473.cc
index 14abfbc8149..1c8035c6b1e 100644
--- a/libstdc++-v3/testsuite/20_util/integer_sequence/112473.cc
+++ b/libstdc++-v3/testsuite/20_util/integer_sequence/112473.cc
@@ -1,3 +1,4 @@
+// { dg-options "-std=gnu++20" }
 // { dg-do compile { target c++20 } }
 
 // PR libstdc++/112473 - integer_sequence accepts non-integer types
diff --git 
a/libstdc++-v3/testsuite/21_strings/char_traits/requirements/113200.cc 
b/libstdc++-v3/testsuite/21_strings/char_traits/requirements/113200.cc
index 0fe765d53bc..6a1b03293ef 100644
--- a/

[PATCH] Add _GLIBCXX_DEBUG checks on unordered container local_iterator

2025-04-16 Thread François Dumont

    libstdc++: Add _GLIBCXX_DEBUG checks on unordered container 
local_iterator


    Some _GLIBCXX_DEBUG checks in include/debug/safe_local_iterator.h 
were not properly

    tested.

    Fix several tests not testing the container corresponding to their 
location in the

    testsuite location.

    libstdc++-v3/ChangeLog:

    * testsuite/util/debug/unordered_checks.h
    (invalid_local_iterator_arrow_operator): New test function.
    (invalid_local_iterator_copy_instantiation): New test function.
    (invalid_local_iterator_move_instantiation): New test function.
    (invalid_local_iterator_copy_assignment): New test function.
    (invalid_local_iterator_move_assignment): New test function.
    (invalid_local_iterator_const_conversion): New test function.
    * 
testsuite/23_containers/unordered_map/debug/invalid_local_iterator_arrow_operator_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_map/debug/invalid_local_iterator_const_conversion_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_map/debug/invalid_local_iterator_copy_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_map/debug/invalid_local_iterator_copy_instantiation_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_map/debug/invalid_local_iterator_move_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_map/debug/invalid_local_iterator_move_instantiation_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_map/debug/max_load_factor_neg.cc: Test 
unordered_map.
    * 
testsuite/23_containers/unordered_multimap/debug/begin2_neg.cc: Test 
unordered_multimap.
    * 
testsuite/23_containers/unordered_multimap/debug/bucket_size_neg.cc: 
Likewise.
    * 
testsuite/23_containers/unordered_multimap/debug/cbegin_neg.cc: Likewise.
    * 
testsuite/23_containers/unordered_multimap/debug/cend_neg.cc: Likewise.
    * 
testsuite/23_containers/unordered_multimap/debug/end1_neg.cc: Likewise.
    * 
testsuite/23_containers/unordered_multimap/debug/end2_neg.cc: Likewise.
    * 
testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_arrow_operator_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_const_conversion_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_copy_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_copy_instantiation_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_move_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multimap/debug/invalid_local_iterator_move_instantiation_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multimap/debug/max_load_factor_neg.cc: 
Test unordered_multimap.
    * 
testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_arrow_operator_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_const_conversion_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_copy_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_copy_instantiation_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_move_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_multiset/debug/invalid_local_iterator_move_instantiation_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_set/debug/invalid_local_iterator_arrow_operator_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_set/debug/invalid_local_iterator_const_conversion_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_set/debug/invalid_local_iterator_copy_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_set/debug/invalid_local_iterator_copy_instantiation_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_set/debug/invalid_local_iterator_move_assignment_neg.cc:

    New test case.
    * 
testsuite/23_containers/unordered_set/debug/invalid_local_iterator_move_instantiation_neg.cc:

    New test case.

Tested under Linux x86_64, ok to commit ?

François
diff --git 
a/libstdc++-v3/testsuite/23_containers/u

Re: [PATCH] rx: avoid adding setpsw for rx_cmpstrn when len is const

2025-04-16 Thread Jeff Law





On 4/13/25 10:50 PM, Keith Packard wrote:

pattern using rx_cmpstrn is cmpstrsi for which len is a constant -1,
so we'll be moving the setpsw instructions from rx_cmpstrn to
cmpstrnsi as follows:

  1. Adjust the predicate on the length operand from "register_operand"
 to "nonmemory_operand". This will allow constants to appear here,
 instead of having them already transferred into a register.

  2. Check to see if the len value is constant, and then check if it is
 actually zero. In that case, short-circuit the rest of the pattern
 and set the result register to 0.

  3. Emit 'setpsw c' and 'setpsw z' instructions when the len is not a
 constant, in case it turns out to be zero at runtime.

  4. Remove the two 'setpsw' instructions from rx_cmpstrn.
Thanks.  I made some minor stylistic adjustments and pushed the change 
to the trunk.  Thanks!


jeff

[PATCH] s390: Use match_scratch instead of scratch in define_split [PR119834]

2025-04-16 Thread Jakub Jelinek

Hi!

The following testcase ICEs since r15-1579 (addition of late combiner),
because *clrmem_short can't be split.
The problem is that the define_insn uses
   (use (match_operand 1 "nonmemory_operand" "n,a,a,a"))
   (use (match_operand 2 "immediate_operand" "X,R,X,X"))
   (clobber (match_scratch:P 3 "=X,X,X,&a"))
and define_split assumed that if operands[1] is const_int_operand,
match_scratch will be always scratch, and it will be reg only if
it was the last alternative where operands[1] is a reg.
The pattern doesn't guarantee it though, of course RA will not try to
uselessly assign a reg there if it is not needed, but during RA
on the testcase below we match the last alternative, but then comes
late combiner and propagates const_int 3 into operands[1].  And that
matches fine, match_scratch matches either scratch or reg and the constraint
in that case is X for the first variant, so still just fine.  But we won't
split that because the splitters only expect scratch.

The following patch fixes it by using match_scratch instead of scratch,
so that it accepts either.

Bootstrapped on s390x-linux, ok for trunk if regtesting passes as well?

2025-04-16  Jakub Jelinek  

PR target/119834
* config/s390/s390.md (define_split after *cpymem_short): Use
(clobber (match_scratch N)) instead of (clobber (scratch)).  Use
(match_dup 4) and operands[4] instead of (match_dup 3) and operands[3]
in the last of those.
(define_split after *clrmem_short): Use (clobber (match_scratch N))
instead of (clobber (scratch)).
(define_split after *cmpmem_short): Likewise.

* g++.target/s390/pr119834.C: New test.

--- gcc/config/s390/s390.md.jj  2025-04-14 07:26:46.447883840 +0200
+++ gcc/config/s390/s390.md 2025-04-16 13:41:04.215127231 +0200
@@ -3597,7 +3597,7 @@ (define_split
 (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "const_int_operand" ""))
(use (match_operand 3 "immediate_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
 [(set (match_dup 0) (match_dup 1))
@@ -3609,7 +3609,7 @@ (define_split
 (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "register_operand" ""))
(use (match_operand 3 "memory_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
 [(unspec [(match_dup 2) (match_dup 3)
@@ -3623,14 +3623,14 @@ (define_split
 (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand 2 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))]
+   (clobber (match_scratch 3))]
   "TARGET_Z10 && reload_completed"
   [(parallel
 [(unspec [(match_dup 2) (const_int 0)
-  (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+ (label_ref (match_dup 4))] UNSPEC_EXECUTE)
  (set (match_dup 0) (match_dup 1))
  (use (const_int 1))])]
-  "operands[3] = gen_label_rtx ();")
+  "operands[4] = gen_label_rtx ();")
 
 (define_split
   [(set (match_operand:BLK 0 "memory_operand" "")
@@ -3852,7 +3852,7 @@ (define_split
 (const_int 0))
(use (match_operand 1 "const_int_operand" ""))
(use (match_operand 2 "immediate_operand" ""))
-   (clobber (scratch))
+   (clobber (match_scratch 3))
(clobber (reg:CC CC_REGNUM))]
   "reload_completed"
   [(parallel
@@ -3866,7 +3866,7 @@ (define_split
 (const_int 0))
(use (match_operand 1 "register_operand" ""))
(use (match_operand 2 "memory_operand" ""))
-   (clobber (scratch))
+   (clobber (match_scratch 3))
(clobber (reg:CC CC_REGNUM))]
   "reload_completed"
   [(parallel
@@ -3882,7 +3882,7 @@ (define_split
 (const_int 0))
(use (match_operand 1 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))
+   (clobber (match_scratch 2))
(clobber (reg:CC CC_REGNUM))]
   "TARGET_Z10 && reload_completed"
   [(parallel
@@ -4047,7 +4047,7 @@ (define_split
  (match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "const_int_operand" ""))
(use (match_operand 3 "immediate_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
 [(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
@@ -4060,7 +4060,7 @@ (define_split
  (match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "register_operand" ""))
(use (match_operand 3 "memory_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
 [(unspec [(match_dup 2) (match_dup 3)
@@ -4075,7 +4075,7 @@ (define_split
  (match_operand:BLK 1 "memory_operand" "")))
(use (match_operand 2 "register_operand" ""))
(use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))]
+   (clobber (match_

Re: [PATCH] PR tree-optimization/119712 - Always reflect lower bits from mask in subranges.

2025-04-16 Thread Andrew MacLeod




On 4/16/25 17:28, Sam James wrote:

Andrew MacLeod  writes:

* gcc.dg/tree-ssa/phi-opt-value-5.c  : WIth the expanded ranges, CCP2
   pass use to export:
    Global Exported: d_3 = [irange] int [-INF, +INF] MASK 0xfffe
VALUE 0x1
and now
    Global Exported: d_3 = [irange] int [-INF, -1][1, +INF] MASK
0xfffe VALUE 0x1
which in turn makes the following comment obsolete as the optimization
does happen earlier.:
/* fdiv1 requires until later than phiopt2 to be able to detect that
    d is non-zero. to be able to remove the conditional.  */
Adjusted the testcase to expect everything to be taken care of by
phi-opt2 pass.
The comment looks like it's still there in the patch -- does it need
dropping?


Yep, thanks.

Andrew

Re: [PATCH] libgcobol: mark riscv64--linux as supported target

2025-04-16 Thread Richard Biener

On Tue, Apr 15, 2025 at 4:33 PM Jeff Law  wrote:
>
>
>
> On 4/15/25 7:57 AM, Andreas Schwab wrote:
> >   * configure.tgt: Set LIBGCOBOL_SUPPORTED for riscv64-*-linux* with
> >   64-bit multilib.
> Can't say I'm happy with the amount of Cobol related churn at this phase
> in our cycle.  But this should be exceedingly safe.  So OK.

For the record it now builds fine on s390x-linux (big endian) as well, but
test results are not that good.  At least _some_ tests pass ...

Native configuration is s390x-ibm-linux-gnu

=== cobol tests ===

Running target unix
FAIL: cobol.dg/literal1.cob   -O0  execution test
FAIL: cobol.dg/literal1.cob   -O1  execution test
[... many FAILs stripped ...]
FAIL: cobol.dg/group2/floating-point_literals.cob   -O3 -g   output file test
FAIL: cobol.dg/group2/floating-point_literals.cob   -Os   output file test

=== cobol Summary ===

# of expected passes2757
# of unexpected failures342
# of expected failures  48
# of unresolved testcases   54

> jeff
>

Re: [PATCH] libatomic: Fix up libat_{,un}lock_n [PR119796]

2025-04-16 Thread Richard Biener

On Mon, 14 Apr 2025, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR (and I think in PR101075 too), we can run into
> deadlock with libat_lock_n calls with larger n.
> As mentioned in PR66842, we use multiple locks (normally 64 mutexes
> for each 64 byte cache line in 4KiB page) and currently can lock more
> than one lock, in particular for n [0, 64] a single lock, for n [65, 128]
> 2 locks, for n [129, 192] 3 locks etc.
> There are two problems with this:
> 1) we can deadlock if there is some wrap-around, because the locks are
>acquired always in the order from addr_hash (ptr) up to
>locks[NLOCKS-1].mutex and then if needed from locks[0].mutex onwards;
>so if e.g. 2 threads perform libat_lock_n with n = 2048+64, in one
>case at pointer starting at page boundary and in another case at
>page boundary + 2048 bytes, the first thread can lock the first
>32 mutexes, the second thread can lock the last 32 mutexes and
>then first thread wait for the lock 32 held by second thread and
>second thread wait for the lock 0 held by the first thread;
>fixed below by always locking the locks in order of increasing
>index, if there is a wrap-around, by locking in 2 loops, first
>locking some locks at the start of the array and second at the
>end of it
> 2) the number of locks seems to be determined solely depending on the
>n value, I think that is wrong, we don't know the structure alignment
>on the libatomic side, it could very well be 1 byte aligned struct,
>and so how many cachelines are actually (partly or fully) occupied
>by the atomic access depends not just on the size, but also on
>ptr % WATCH_SIZE, e.g. 2 byte structure at address page_boundary+63
>should IMHO lock 2 locks because it occupies the first and second
>cacheline
> 
> Note, before this patch it locked exactly one lock for n = 0, while
> with this patch it could lock either no locks at all (if it is at cacheline
> boundary) or 1 (otherwise).
> Dunno of libatomic APIs can be called for zero sizes and whether
> we actually care that much how many mutexes are locked in that case,
> because one can't actually read/write anything into zero sized memory.
> If you think it is important, I could add else if (nlocks == 0) nlocks = 1;
> in both spots.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2025-04-14  Jakub Jelinek  
> 
>   PR libstdc++/119796
>   * config/posix/lock.c (libat_lock_n, libat_unlock_n): Start with
>   computing how many locks will be needed and take into account
>   ((uintptr_t)ptr % WATCH_SIZE).  If some locks from the end of the
>   locks array and others from the start of it will be needed, first
>   lock the ones from the start followed by ones from the end.
> 
> --- libatomic/config/posix/lock.c.jj  2025-04-08 14:09:40.988589457 +0200
> +++ libatomic/config/posix/lock.c 2025-04-14 14:27:11.933210898 +0200
> @@ -81,19 +81,22 @@ libat_lock_n (void *ptr, size_t n)
>  {
>uintptr_t h = addr_hash (ptr);
>size_t i = 0;
> +  size_t nlocks
> += (n + ((uintptr_t)ptr % WATCH_SIZE) + WATCH_SIZE - 1) / WATCH_SIZE;
>  
>/* Don't lock more than all the locks we have.  */
> -  if (n > PAGE_SIZE)
> -n = PAGE_SIZE;
> +  if (nlocks > NLOCKS)
> +nlocks = NLOCKS;
>  
> -  do
> +  if (__builtin_expect (h + nlocks > NLOCKS, 0))
>  {
> -  pthread_mutex_lock (&locks[h].mutex);
> -  if (++h == NLOCKS)
> - h = 0;
> -  i += WATCH_SIZE;
> +  size_t j = h + nlocks - NLOCKS;
> +  for (; i < j; ++i)
> + pthread_mutex_lock (&locks[i].mutex);
>  }
> -  while (i < n);
> +
> +  for (; i < nlocks; ++i)
> +pthread_mutex_lock (&locks[h++].mutex);
>  }
>  
>  void
> @@ -101,16 +104,20 @@ libat_unlock_n (void *ptr, size_t n)
>  {
>uintptr_t h = addr_hash (ptr);
>size_t i = 0;
> +  size_t nlocks
> += (n + ((uintptr_t)ptr % WATCH_SIZE) + WATCH_SIZE - 1) / WATCH_SIZE;
>  
> -  if (n > PAGE_SIZE)
> -n = PAGE_SIZE;
> +  /* Don't lock more than all the locks we have.  */
> +  if (nlocks > NLOCKS)
> +nlocks = NLOCKS;
>  
> -  do
> +  if (__builtin_expect (h + nlocks > NLOCKS, 0))
>  {
> -  pthread_mutex_unlock (&locks[h].mutex);
> -  if (++h == NLOCKS)
> - h = 0;
> -  i += WATCH_SIZE;
> +  size_t j = h + nlocks - NLOCKS;
> +  for (; i < j; ++i)
> + pthread_mutex_unlock (&locks[i].mutex);
>  }
> -  while (i < n);
> +
> +  for (; i < nlocks; ++i)
> +pthread_mutex_unlock (&locks[h++].mutex);
>  }
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Re: [PATCH][GCC14] Extend check-function-bodies to allow label and directives

2025-04-16 Thread Richard Biener

On Tue, 15 Apr 2025, H.J. Lu wrote:

> Hi,
> 
> I'd like to backport this testsuite enhancement to GCC 14 so that
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2025-April/680896.html
> 
> can be backported to GCC 14 with testcases unchanged.

OK.

> 
> H.J.
> ---
> As PR target/116174 shown, we may need to verify labels and the directive
> order.  Extend check-function-bodies to support matched output lines to
> allow label and directives.
> 
> gcc/
> 
>   * doc/sourcebuild.texi (check-function-bodies): Add an optional
>   argument for matched output lines.
> 
> gcc/testsuite/
> 
>   * gcc.target/i386/pr116174.c: Use check-function-bodies.
>   * lib/scanasm.exp (parse_function_bodies): Append the line if
>   $up_config(matched) matches the line.
>   (check-function-bodies): Add an argument for matched.  Set
>   up_config(matched) to $matched.  Append the expected line without
>   $config(line_prefix) to function_regexp if it starts with ".L".
> 
> Signed-off-by: H.J. Lu 
> (cherry picked from commit d6bb1e257fc414d21bc31faa7ddecbc93a197e3c)
> ---
>  gcc/doc/sourcebuild.texi |  9 ++---
>  gcc/testsuite/gcc.target/i386/pr116174.c | 18 +++---
>  gcc/testsuite/lib/scanasm.exp| 15 +--
>  3 files changed, 34 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index 23dedef4161..c8130dc1ba9 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -3440,7 +3440,7 @@ assembly output.
>  Passes if @var{symbol} is not defined as a hidden symbol in the test's
>  assembly output.
>  
> -@item check-function-bodies @var{prefix} @var{terminator} [@var{options} [@{ 
> target/xfail @var{selector} @}]]
> +@item check-function-bodies @var{prefix} @var{terminator} [@var{options} [@{ 
> target/xfail @var{selector} @} [@var{matched}]]]
>  Looks through the source file for comments that give the expected assembly
>  output for selected functions.  Each line of expected output starts with the
>  prefix string @var{prefix} and the expected output for a function as a whole
> @@ -3467,8 +3467,11 @@ Depending on the configuration (see
>  @code{configure_check-function-bodies} in
>  @file{gcc/testsuite/lib/scanasm.exp}), the test may discard from the
>  compiler's assembly output directives such as @code{.cfi_startproc},
> -local label definitions such as @code{.LFB0}, and more.
> -It then matches the result against the expected
> +local label definitions such as @code{.LFB0}, and more.  This behavior
> +can be overridden using the optional @var{matched} argument, which
> +specifies a regexp for lines that should not be discarded in this way.
> +
> +The test then matches the result against the expected
>  output for a function as a single regular expression.  This means that
>  later lines can use backslashes to refer back to @samp{(@dots{})}
>  captures on earlier lines.  For example:
> diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c 
> b/gcc/testsuite/gcc.target/i386/pr116174.c
> index 8877d0b51af..686aeb9ff31 100644
> --- a/gcc/testsuite/gcc.target/i386/pr116174.c
> +++ b/gcc/testsuite/gcc.target/i386/pr116174.c
> @@ -1,6 +1,20 @@
>  /* { dg-do compile { target *-*-linux* } } */
> -/* { dg-options "-O2 -fcf-protection=branch" } */
> +/* { dg-options "-O2 -g0 -fcf-protection=branch" } */
> +/* Keep labels and directives ('.p2align', '.cfi_startproc').
> +/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} 
>  } } */
>  
> +/*
> +**foo:
> +**.LFB0:
> +**   .cfi_startproc
> +** (
> +**   endbr64
> +**   .p2align 5
> +** |
> +**   endbr32
> +** )
> +**...
> +*/
>  char *
>  foo (char *dest, const char *src)
>  {
> @@ -8,5 +22,3 @@ foo (char *dest, const char *src)
>  /* nothing */;
>return --dest;
>  }
> -
> -/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */
> diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
> index 6cf9997240d..d1c8e3b5079 100644
> --- a/gcc/testsuite/lib/scanasm.exp
> +++ b/gcc/testsuite/lib/scanasm.exp
> @@ -952,6 +952,9 @@ proc parse_function_bodies { config filename result } {
>   verbose "parse_function_bodies: $function_name:\n$function_body"
>   set up_result($function_name) $function_body
>   set in_function 0
> + } elseif { $up_config(matched) ne "" \
> +&& [regexp $up_config(matched) $line] } {
> + append function_body $line "\n"
>   } elseif { [regexp $up_config(fluff) $line] } {
>   verbose "parse_function_bodies: $function_name: ignoring fluff 
> line: $line"
>   } else {
> @@ -982,7 +985,7 @@ proc check_function_body { functions name body_regexp } {
>  
>  # Check the implementations of functions against expected output.  Used as:
>  #
> -# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } }
> +# { dg-do { check-function-bodies PREFIX

[PATCH]middle-end: fix masking for partial vectors and early break [PR119351]

2025-04-16 Thread Tamar Christina

Hi All,

The following testcase shows an incorrect masked codegen:

#define N 512
#define START 1
#define END 505
 
int x[N] __attribute__((aligned(32)));

int __attribute__((noipa))
foo (void)
{
  int z = 0;
  for (unsigned int i = START; i < END; ++i)
{
  z++;
  if (x[i] > 0)
continue;

  return z;
}
  return -1;
}

notice how there's a continue there instead of a break.  This means we generate
a control flow where success stays within the loop iteration:

  mask_patt_9.12_46 = vect__1.11_45 > { 0, 0, 0, 0 };
  vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
  if (vec_mask_and_47 == { -1, -1, -1, -1 })
goto ; [41.48%]
  else
goto ; [58.52%]

However when loop_mask_41 is a partial mask this comparison can lead to an
incorrect match.  In this case the mask is:

  # loop_mask_41 = PHI 

due to peeling for alignment with masking and compiling with
-msve-vector-bits=128.

At codegen time we generate:

ptrue   p15.s, vl4
ptrue   p7.b, vl1
not p7.b, p15/z, p7.b
.L5:
ld1wz29.s, p7/z, [x1, x0, lsl 2]
cmpgt   p7.s, p7/z, z29.s, #0
not p7.b, p15/z, p7.b
ptest   p15, p7.b
b.none  .L2
..

notice how at expand time the basic blocks are inverted and a not is generated.
But the generated not is unmasked (or predicated over an ALL true mask in this
case).  This has the unintended side-effect of flipping the results of the
inactive lanes (which were zero'd by the cmpgt) into -1.  Which then incorrectly
causes us to not take the branch to .L2.

This is happening because the expander has no context about the mask, and since
we can't mask a gcond, we do the next best thing which is to mask both operands.

We already mask the compare, but this patch now also masks the constant.  In the
normal case this means we drop it since {0, ..} & mask = {0, ..} but in the case
of an forall comparison we'll keep the mask, allowing the generated code to
correctly mask the results.

For the above we now generate:

.L5:
ld1wz28.s, p7/z, [x1, x0, lsl 2]
cmpgt   p14.s, p7/z, z28.s, #0
eorsp7.b, p15/z, p7.b, p14.b
b.none  .L2

This fixes gromacs with > 1 OpenMP threads and improves performance.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master? and backport to GCC-14?

Thanks,
Tamar


gcc/ChangeLog:

PR tree-optimization/119351
* tree-vect-stmts.cc (vectorizable_early_exit): Mask both operands of
the gcond for partial masking support.

gcc/testsuite/ChangeLog:

PR tree-optimization/119351
* gcc.target/aarch64/sve/pr119351.c: New test.
* gcc.target/aarch64/sve/pr119351_run.c: New test.

---
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
new file mode 100644
index 
..198f7edb0fc01bfc74ae231db7823e9a6f0bc119
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
@@ -0,0 +1,38 @@
+/* Fix for PR119351 alignment peeling with vectors and VLS.  */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -msve-vector-bits=256 --param 
aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+/* { dg-final { check-function-bodies "**" "" ""} } */
+
+#define N 512
+#define START 1
+#define END 505
+ 
+int x[N] __attribute__((aligned(32)));
+
+/*
+** foo:
+** ...
+** ld1wz[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
+** cmpgt   p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** eorsp[0-9]+.b, p[0-9]+/z, p[0-9]+.b, p[0-9]+.b
+** ...
+*/
+
+int __attribute__((noipa))
+foo (void)
+{
+  int z = 0;
+  for (unsigned int i = START; i < END; ++i)
+{
+  z++;
+  if (x[i] > 0)
+continue;
+
+  return z;
+}
+  return -1;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
+/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" 
"vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c
new file mode 100644
index 
..d36ab0eb7a900504e7dc2266ec5a19d1beeb5123
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c
@@ -0,0 +1,20 @@
+/* Fix for PR119351 alignment peeling with vectors and VLS.  */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw 
} } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw 
} } */
+
+#include "pr119351.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  x[0] = 1;
+  x[1] = 21;
+  x[2] = 39;
+  x[3] = 59;
+  int res = foo ();
+  if (res != 4)
+__builtin_abort ();

Re: [PATCH] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kaminski

There are few whitespace changes that are caused by me mass removing spaces
that are followed by tabs.
They seem to affect only the code that was recently added by me in debug
string and range support, so I think it is ok to keep them.

On Wed, Apr 16, 2025 at 9:16 AM Tomasz Kamiński  wrote:

> This patch implements formatter specializations for pair and tuple form
> P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
> ranges are now supported.
>
> The formatters for pairs and tuples whose corresponding elements are the
> same
> (after applying remove_cvref_t) derive from the same __tuple_formatter
> class.
> This reduce the code duplication, as most of the parsing and formatting is
> the
> same in such cases. We use a custom reduced implementation of the tuple
> (__formatters_storage) to store the elements formatters.
>
> Handling of the padding (width and fill) options, is extracted to
> __format::__format_padded function, that is used both by __tuple_formatter
> and
> range_formatter. To reduce number of instantations range_formatter::format
> triggers, we cast incoming range to __format::__maybe_const_range<_Rg,
> _CharT>&,
> before formatting it.
>
> As in the case of previous commits, the signatures of the user-facing parse
> and format methods of the provided formatters deviate from the standard by
> constraining types of parameters:
> * _CharT is constrained __formatter::__char
> * basic_format_parse_context<_CharT> for parse argument
> * basic_format_context<_Out, _CharT> for format second argument
> The standard specifies last three of above as unconstrained types.
>
> Finally, test for tuple-like std::array and std::ranges::subrange,
> that illustrate that they remain formatted as ranges.
>
> PR libstdc++/PR109162
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (__formatter_int::_M_format_character_escaped)
> (__formatter_str::format): Use __sink.out() to produce _Sink_iter.
> (__format::__format_padded, __format::maybe_const)
> (__format::__indexed_formatter_storage,
> __format::__tuple_formatter)
> (std::formatter, _CharT>>)
> (std::formatter, _CharT): Define.
> (std::formatter<_Rg, _CharT>::format): Cast incoming range to
> __format::__maybe_const_range<_Rg, _CharT>&.
> (std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
> and use __format_padded.
> (std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
> (std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
> (std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
> __format_padded.
> * testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
> Define.
> * testsuite/std/format/ranges/formatter.cc: Tests for `m`
> specifier.
> * testsuite/std/format/ranges/sequence.cc: Tests for array and
> subrange.
> * testsuite/std/format/ranges/map.cc: New test.
> * testsuite/std/format/tuple.cc: New test.
> ---
> Testing on x86_64-linux, tests matched by `*format*` passes.
> OK for trunk? Should I wait for 16?
>
>  libstdc++-v3/include/std/format   | 357 +++---
>  .../testsuite/std/format/ranges/formatter.cc  |   6 +-
>  .../testsuite/std/format/ranges/map.cc| 209 ++
>  .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
>  libstdc++-v3/testsuite/std/format/tuple.cc| 259 +
>  .../testsuite/util/testsuite_iterators.h  |   3 +
>  6 files changed, 806 insertions(+), 80 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
>  create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc
>
> diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> index 096dda4f989..5b93eb8bc2d 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -1350,8 +1350,7 @@ namespace __format
> __fc, _M_spec);
>
>   __format::_Str_sink<_CharT> __sink;
> - __format::_Sink_iter<_CharT> __out(__sink);
> - __format::__write_escaped(__out, __s, __term);
> + __format::__write_escaped(__sink.out(), __s, __term);
>   basic_string_view<_CharT> __escaped(__sink.view().data(),
>   __sink.view().size());
>   const size_t __escaped_width = _S_trunc(__escaped, __prec);
> @@ -1387,13 +1386,13 @@ namespace __format
> {
>   ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
>   ranges::subrange __sub(__first, __first + __n);
> - return format(_String(from_range, __sub), __fc);
> + return format(_String(from_range, __sub), __fc);
> }
>   else
> {
>   // N.B. preserve the computed size
>   ranges::subrange __sub(__

[PATCH] testsuite: force AMDGCN test for vect-early-break_18.c to consistent architecture [PR119286]

2025-04-16 Thread Tamar Christina

Hi All,

The given test is intended to test vectorization of a strided access done by
having a step of > 1.

GCN target doesn't support load lanes, so the testcase is expected to fail,
other targets create a permuted load here which we then then reject.

However some GCN arch don't seem to support the permuted loads either, so the
vectorizer tries a gather/scatter.  But the indices aren't supported by some
target, so instead the vectorizer scalarizes the loads.

I can't really test for which architecture is being used by the compiler, so
instead this updates the testcase to use one single architecture so we get a
consistent result.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Cross checked the failing case on amdgcn-amdhsa
and all pass now.

Ok for master?

Thanks,
Tamar

gcc/testsuite/ChangeLog:

PR target/119286
* gcc.dg/vect/vect-early-break_18.c: Force -march=gfx908 for amdgcn.

---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
index 
edddb44bad66aa419d097f69ca850e5eaa66e014..cd397049c84c47cbd3e9facb87419de58ba5b148
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
@@ -2,7 +2,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_early_break } */
 /* { dg-require-effective-target vect_int } */
-
+/* { dg-additional-options "-march=gfx908" { target amdgcn*-*-* } } */
 /* { dg-additional-options "-Ofast" } */
 
 /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target 
vect_load_lanes } } } */


-- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
index edddb44bad66aa419d097f69ca850e5eaa66e014..cd397049c84c47cbd3e9facb87419de58ba5b148 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c
@@ -2,7 +2,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target vect_early_break } */
 /* { dg-require-effective-target vect_int } */
-
+/* { dg-additional-options "-march=gfx908" { target amdgcn*-*-* } } */
 /* { dg-additional-options "-Ofast" } */
 
 /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_load_lanes } } } */

Re: [PATCH v2] libstdc++: Implement formatters for pair and tuple [PR109162]

2025-04-16 Thread Tomasz Kaminski

On Wed, Apr 16, 2025 at 11:49 AM Tomasz Kamiński 
wrote:

> This patch implements formatter specializations for pair and tuple form
> P2286R8. In addition using 'm` and range_format::map (from P2585R1) for
> ranges are now supported.
>
> The formatters for pairs and tuples whose corresponding elements are the
> same
> (after applying remove_cvref_t) derive from the same __tuple_formatter
> class.
> This reduce the code duplication, as most of the parsing and formatting is
> the
> same in such cases. We use a custom reduced implementation of the tuple
> (__formatters_storage) to store the elements formatters.
>
> Handling of the padding (width and fill) options, is extracted to
> __format::__format_padded function, that is used both by __tuple_formatter
> and
> range_formatter. To reduce number of instantations range_formatter::format
> triggers, we cast incoming range to __format::__maybe_const_range<_Rg,
> _CharT>&,
> before formatting it.
>
> As in the case of previous commits, the signatures of the user-facing parse
> and format methods of the provided formatters deviate from the standard by
> constraining types of parameters:
> * _CharT is constrained __formatter::__char
> * basic_format_parse_context<_CharT> for parse argument
> * basic_format_context<_Out, _CharT> for format second argument
> The standard specifies last three of above as unconstrained types.
>
> Finally, test for tuple-like std::array and std::ranges::subrange,
> that illustrate that they remain formatted as ranges.
>
> PR libstdc++/PR109162
>
> libstdc++-v3/ChangeLog:
>
> * include/std/format (__formatter_int::_M_format_character_escaped)
> (__formatter_str::format): Use __sink.out() to produce _Sink_iter.
> (__format::__const_formattable_range): Moved closer to
> range_formatter.
> (__format::__maybe_const_range): Use `__conditional_t` and moved
> closer
> to range_formatter.
> (__format::__format_padded, __format::maybe_const)
> (__format::__indexed_formatter_storage,
> __format::__tuple_formatter)
> (std::formatter, _CharT>>)
> (std::formatter, _CharT): Define.
> (std::formatter<_Rg, _CharT>::format): Cast incoming range to
> __format::__maybe_const_range<_Rg, _CharT>&.
> (std::formatter<_Rg, _CharT>::_M_format): Extracted from format,
> and use __format_padded.
> (std::formatter<_Rg, _CharT>::_M_format_no_padding): Rename...
> (std::formatter<_Rg, _CharT>::_M_format_elems): ...to this.
> (std::formatter<_Rg, _CharT>::_M_format_with_padding): Extracted as
> __format_padded.
> * testsuite/util/testsuite_iterators.h (test_input_range_nocopy):
> Define.
> * testsuite/std/format/ranges/formatter.cc: Tests for `m`
> specifier.
> * testsuite/std/format/ranges/sequence.cc: Tests for array and
> subrange.
> * testsuite/std/format/ranges/map.cc: New test.
> * testsuite/std/format/tuple.cc: New test.
> ---
> I doged the which is safe, static_cast or const_cast discussion by
> calling:
> + using __maybe_const_range
> +   = __format::__maybe_const_range<_Rg, _CharT>;
> + return _M_format<__maybe_const_range>(__rg, __fc);
> I have also applied the `__conditional_t` to __maybe_const_range,
> and moved this two helpers closer to range_formatter.
>
> OK for trunk?
> ---
>
>  libstdc++-v3/include/std/format   | 377 ++
>  .../testsuite/std/format/ranges/formatter.cc  |   6 +-
>  .../testsuite/std/format/ranges/map.cc| 209 ++
>  .../testsuite/std/format/ranges/sequence.cc   |  52 ++-
>  libstdc++-v3/testsuite/std/format/tuple.cc| 259 
>  .../testsuite/util/testsuite_iterators.h  |   3 +
>  6 files changed, 813 insertions(+), 93 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/std/format/ranges/map.cc
>  create mode 100644 libstdc++-v3/testsuite/std/format/tuple.cc
>
> diff --git a/libstdc++-v3/include/std/format
> b/libstdc++-v3/include/std/format
> index 096dda4f989..58ac9b2a48f 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -1350,8 +1350,7 @@ namespace __format
> __fc, _M_spec);
>
>   __format::_Str_sink<_CharT> __sink;
> - __format::_Sink_iter<_CharT> __out(__sink);
> - __format::__write_escaped(__out, __s, __term);
> + __format::__write_escaped(__sink.out(), __s, __term);
>   basic_string_view<_CharT> __escaped(__sink.view().data(),
>   __sink.view().size());
>   const size_t __escaped_width = _S_trunc(__escaped, __prec);
> @@ -1387,13 +1386,13 @@ namespace __format
> {
>   ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
>   ranges::subrange __sub(__first, __first + __n);
> - return format(_String(from_ran

Re: [PATCH] testsuite: Add support for GCOV_UNDER_TEST

2025-04-16 Thread Christophe Lyon

ping?

On Thu, 10 Apr 2025 at 15:48, Hans-Peter Nilsson  wrote:
>
> > From: Christophe Lyon 
> > Date: Thu, 10 Apr 2025 15:38:48 +0200
>
> > On Thu, 10 Apr 2025 at 15:29, Hans-Peter Nilsson  wrote:
> > >
> > > > From: Christophe Lyon 
> > > > Date: Thu, 10 Apr 2025 15:21:23 +0200
> > >
> > > Not sure why I'm CC:ed on this one, not being a maintainer
> > > of the testsuite or targets where gcov tests are exercised,
> >
> > Because you fixed a problem in r13-4103-ge91d51457532da,
> > so I wanted to make sure my patch was OK for you.
>
> I've forgot everything about that commit and the context. :}
>
> So, I now had an extra glance from the transform-name
> perspective: still ok.
>
> brgds, H-P

Re: [PATCH] doc: say "compatible types" for -fstrict-aliasing

2025-04-16 Thread Sam James

Jakub Jelinek  writes:

> On Wed, Mar 26, 2025 at 10:41:52AM +, Sam James wrote:
>> Include the term used in the standard to ease further research for users.
>> 
>> gcc/ChangeLog:
>> 
>>  * doc/invoke.texi: Use "compatible types" term.
>> ---
>>  gcc/doc/invoke.texi | 8 
>>  1 file changed, 4 insertions(+), 4 deletions(-)
>> 
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index b3f7f0479cc4..ad749f2fd258 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -14552,10 +14552,10 @@ Allow the compiler to assume the strictest 
>> aliasing rules applicable to
>>  the language being compiled.  For C (and C++), this activates
>>  optimizations based on the type of expressions.  In particular, an
>>  object of one type is assumed never to reside at the same address as an
>> -object of a different type, unless the types are almost the same.  For
>> -example, an @code{unsigned int} can alias an @code{int}, but not a
>> -@code{void*} or a @code{double}.  A character type may alias any other
>> -type.
>> +object of a different type, unless the types are almost the same
>> +(``compatible types'').  For example, an @code{unsigned int} can alias an
>
> IMHO `` '' quoting does not belong to texi docs.
> Use some @emph{} or @dfn{} or @strong{} or something like that.

Will do (though I used this quoting as it was in the file already).

[PATCH v2] doc: say "compatible types" for -fstrict-aliasing

2025-04-16 Thread Sam James

Include the term used in the standard to ease further research for users,
and while at it, rephrase the description of the rule entirely using
Alexander Monakov's suggestion: it was previously wrong (and imprecise) as
"the same address" may well be re-used later on, and the issue is the
access via an expression of the wrong type.

gcc/ChangeLog:

* doc/invoke.texi: Use "compatible types" term. Rephrase to be
more precise (and correct).
---
v2: Use @dfn{} rather than ``quotes'', as suggested by Jakub. Rephrase using
Alexander Monakov's suggestion (as both my change wasn't quite right before,
and the previous text wasn't accurate either).

 gcc/doc/invoke.texi | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 14a78fd236f6..6952f4b6b02c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14649,12 +14649,14 @@ Enabled at levels @option{-O2}, @option{-O3}, 
@option{-Os}.
 @item -fstrict-aliasing
 Allow the compiler to assume the strictest aliasing rules applicable to
 the language being compiled.  For C (and C++), this activates
-optimizations based on the type of expressions.  In particular, an
-object of one type is assumed never to reside at the same address as an
-object of a different type, unless the types are almost the same.  For
-example, an @code{unsigned int} can alias an @code{int}, but not a
-@code{void*} or a @code{double}.  A character type may alias any other
-type.
+optimizations based on the type of expressions.  In particular, accessing
+an object of one type via an expression of a different type is not allowed,
+unless the types are @dfn{compatible types}, differ in signedness or
+qualifiers, or the expression has a character type.  Accessing scalar
+objects via a corresponding vector type is also allowed.
+
+For example, an @code{unsigned int} can alias an @code{int}, but not a
+@code{void*} or a @code{double}.  A character type may alias any other type.
 
 @anchor{Type-punning}Pay special attention to code like this:
 @smallexample

base-commit: 7b9d8d43154efcb56cee1787e3267183dd6a372e
-- 
2.49.0

Re: [PATCH] libstdc++: Constrain formatters for chrono types [PR119517]

2025-04-16 Thread Tomasz Kaminski

On Fri, Mar 28, 2025 at 9:33 PM Jonathan Wakely  wrote:

> On 28/03/25 16:31 +0100, Tomasz Kamiński wrote:
> >The formatters for chrono types defined the parse/format methods
> >as accepting unconstrained types, this in combination with lack
> >of constrain on _CharT lead to them falsy statisfying formattable
> >requirements for any type used as character.
> >
> >This patch adjust the fromatter::parse signature to:
> > constexpr typename basic_format_parse_context<_CharT>::iterator
> > parse(basic_format_parse_context<_CharT>& __pc);
> >And formatter::format to:
> > template
> >   typename basic_format_context<_Out, _CharT>::iterator
> >   format(const T& __t,
> >  basic_format_context<_Out, _CharT>& __fc) const;
> >
> >Furthermore we _CharT with __format::__char (char or wchar_t),
> >
> >   PR libstdc++/119517
> >
> >libstdc++-v3/ChangeLog:
> >
> >   * include/bits/chrono_io.h (formatter):
> >   Add __format::__char for _CharT and adjust parse and format
> >   method signatures.
> >   * testsuite/std/time/format/pr119517.cc: New test.
> >---
> >Testing on x86_64-linux, std/time/format tests passed.
> >OK for trunk?
> >
> > libstdc++-v3/include/bits/chrono_io.h | 448 +-
> > .../testsuite/std/time/format/pr119517.cc |  44 ++
> > 2 files changed, 262 insertions(+), 230 deletions(-)
> > create mode 100644 libstdc++-v3/testsuite/std/time/format/pr119517.cc
> >
> >diff --git a/libstdc++-v3/include/bits/chrono_io.h
> b/libstdc++-v3/include/bits/chrono_io.h
> >index c55b651d049..3a5bc5695fb 100644
> >--- a/libstdc++-v3/include/bits/chrono_io.h
> >+++ b/libstdc++-v3/include/bits/chrono_io.h
> >@@ -1785,277 +1785,272 @@ namespace __format
> >   __format::__formatter_chrono<_CharT> _M_f;
> > };
> >
> >-  template
> >+  template<__format::__char _CharT>
> > struct formatter
> > {
> >-  template
> >-  constexpr typename _ParseContext::iterator
> >-  parse(_ParseContext& __pc)
> >-  { return _M_f._M_parse(__pc, __format::_Day); }
> >+  constexpr typename basic_format_parse_context<_CharT>::iterator
> >+  parse(basic_format_parse_context<_CharT>& __pc)
> >+  { return _M_f._M_parse(__pc, __format::_Day); }
> >
> >-  template
> >-  typename _FormatContext::iterator
> >-  format(const chrono::day& __t, _FormatContext& __fc) const
> >+  template
> >+  typename basic_format_context<_Out, _CharT>::iterator
> >+  format(const chrono::day& __t,
> >+ basic_format_context<_Out, _CharT>& __fc) const
> >   { return _M_f._M_format(__t, __fc); }
> >
> > private:
> >   __format::__formatter_chrono<_CharT> _M_f;
> > };
> >
> >-  template
> >+  template<__format::__char _CharT>
> > struct formatter
> > {
> >-  template
> >-  constexpr typename _ParseContext::iterator
> >-  parse(_ParseContext& __pc)
> >-  { return _M_f._M_parse(__pc, __format::_Month); }
> >+  constexpr typename basic_format_parse_context<_CharT>::iterator
> >+  parse(basic_format_parse_context<_CharT>& __pc)
> >+  { return _M_f._M_parse(__pc, __format::_Month); }
> >
> >-  template
> >-  typename _FormatContext::iterator
> >-  format(const chrono::month& __t, _FormatContext& __fc) const
> >+  template
> >+  typename basic_format_context<_Out, _CharT>::iterator
> >+  format(const chrono::month& __t,
> >+ basic_format_context<_Out, _CharT>& __fc) const
> >   { return _M_f._M_format(__t, __fc); }
> >
> > private:
> >   __format::__formatter_chrono<_CharT> _M_f;
> > };
> >
> >-  template
> >+  template<__format::__char _CharT>
> > struct formatter
> > {
> >-  template
> >-  constexpr typename _ParseContext::iterator
> >-  parse(_ParseContext& __pc)
> >-  { return _M_f._M_parse(__pc, __format::_Year); }
> >+  constexpr typename basic_format_parse_context<_CharT>::iterator
> >+  parse(basic_format_parse_context<_CharT>& __pc)
> >+  { return _M_f._M_parse(__pc, __format::_Year); }
> >
> >-  template
> >-  typename _FormatContext::iterator
> >-  format(const chrono::year& __t, _FormatContext& __fc) const
> >+  template
> >+  typename basic_format_context<_Out, _CharT>::iterator
> >+  format(const chrono::year& __t,
> >+ basic_format_context<_Out, _CharT>& __fc) const
> >   { return _M_f._M_format(__t, __fc); }
> >
> > private:
> >   __format::__formatter_chrono<_CharT> _M_f;
> > };
> >
> >-  template
> >+  template<__format::__char _CharT>
> > struct formatter
> > {
> >-  template
> >-  constexpr typename _ParseContext::iterator
> >-  parse(_ParseContext& __pc)
> >-  { return _M_f._M_parse(__pc, __format::_Weekday); }
> >+  constexpr typename basic_format_parse_context<_CharT>::iterator
> >+  parse(basic_format_parse_context<_CharT>& __pc)
> >+  { return _M_f._M_parse(__pc, __format::_Weekday); }
> >
> >-  template

Re: [PATCH]middle-end: fix masking for partial vectors and early break [PR119351]

2025-04-16 Thread Richard Biener




> Am 16.04.2025 um 23:37 schrieb Tamar Christina :
> 
> Hi All,
> 
> The following testcase shows an incorrect masked codegen:
> 
> #define N 512
> #define START 1
> #define END 505
> 
> int x[N] __attribute__((aligned(32)));
> 
> int __attribute__((noipa))
> foo (void)
> {
>  int z = 0;
>  for (unsigned int i = START; i < END; ++i)
>{
>  z++;
>  if (x[i] > 0)
>continue;
> 
>  return z;
>}
>  return -1;
> }
> 
> notice how there's a continue there instead of a break.  This means we 
> generate
> a control flow where success stays within the loop iteration:
> 
>  mask_patt_9.12_46 = vect__1.11_45 > { 0, 0, 0, 0 };
>  vec_mask_and_47 = mask_patt_9.12_46 & loop_mask_41;
>  if (vec_mask_and_47 == { -1, -1, -1, -1 })
>goto ; [41.48%]
>  else
>goto ; [58.52%]
> 
> However when loop_mask_41 is a partial mask this comparison can lead to an
> incorrect match.  In this case the mask is:
> 
>  # loop_mask_41 = PHI 
> 
> due to peeling for alignment with masking and compiling with
> -msve-vector-bits=128.
> 
> At codegen time we generate:
> 
>ptrue   p15.s, vl4
>ptrue   p7.b, vl1
>not p7.b, p15/z, p7.b
> .L5:
>ld1wz29.s, p7/z, [x1, x0, lsl 2]
>cmpgt   p7.s, p7/z, z29.s, #0
>not p7.b, p15/z, p7.b
>ptest   p15, p7.b
>b.none  .L2
>..
> 
> Here the basic blocks are rotated and a not is generated.
> But the generated not is unmasked (or predicated over an ALL true mask in this
> case).  This has the unintended side-effect of flipping the results of the
> inactive lanes (which were zero'd by the cmpgt) into -1.  Which then 
> incorrectly
> causes us to not take the branch to .L2.
> 
> This is happening because we're not comparing against the right value for the
> forall case.  This patch gets rid of the forall case by rewriting the
> if(all(mask)) into if (!all(mask)) which is the same as if (any(~mask)) by
> negating the masks and flipping the branches.
> 
>1. For unmasked loops we simply reduce the ~mask.
>2. For masked loops we reduce (~mask & loop_mask) which is the same as
>   doing (mask & loop_mask) ^ loop_mask.   
> 
> For the above we now generate:
> 
> .L5:
>ld1wz28.s, p7/z, [x1, x0, lsl 2]
>cmple   p7.s, p7/z, z28.s, #0
>ptest   p15, p7.b
>b.none  .L2
> 
> This fixes gromacs with > 1 OpenMP threads and improves performance.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Ok for master? and backport to GCC-14?

Ok

Thanks,
Richard 

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>PR tree-optimization/119351
>* tree-vect-stmts.cc (vectorizable_early_exit): Mask both operands of
>the gcond for partial masking support.
> 
> gcc/testsuite/ChangeLog:
> 
>PR tree-optimization/119351
>* gcc.target/aarch64/sve/pr119351.c: New test.
>* gcc.target/aarch64/sve/pr119351_run.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
> new file mode 100644
> index 
> ..85aab355f95f83e1fa65d280f14fb8ade7f7e658
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c
> @@ -0,0 +1,39 @@
> +/* Fix for PR119351 alignment peeling with vectors and VLS.  */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -msve-vector-bits=256 --param 
> aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
> +/* { dg-final { check-function-bodies "**" "" ""} } */
> +
> +#define N 512
> +#define START 1
> +#define END 505
> +
> +int x[N] __attribute__((aligned(32)));
> +
> +/*
> +** foo:
> +**...
> +**ld1wz[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
> +**cmplep[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
> +**ptestp[0-9]+, p[0-9]+.b
> +**...
> +*/
> +
> +int __attribute__((noipa))
> +foo (void)
> +{
> +  int z = 0;
> +  for (unsigned int i = START; i < END; ++i)
> +{
> +  z++;
> +  if (x[i] > 0)
> +continue;
> +
> +  return z;
> +}
> +  return -1;
> +}
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
> +/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" 
> "vect" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c
> new file mode 100644
> index 
> ..d36ab0eb7a900504e7dc2266ec5a19d1beeb5123
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351_run.c
> @@ -0,0 +1,20 @@
> +/* Fix for PR119351 alignment peeling with vectors and VLS.  */
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
> +/* { dg-additional-options "-msve-vector-bits=256" { target 
> aarch64_sve256_hw } } */
> +/* { dg-additio

1 2 >

1 - 100 of 102 matches

Mail list logo