[PATCH v5] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Siddhesh Poyarekar
Denormal behaviour is well defined for IEEE128 long doubles, so don't
XFAIL some gfortran tests on ppc64le when configured with the IEEE128
long double ABI.

gcc/testsuite/ChangeLog:

PR testsuite/118127
* lib/target-supports.exp
(check_effective_target_ppc_not_well_defined_denormals): New
procedure.
* gfortran.dg/default_format_2.f90: xfail for
ppc_not_well_defined_denormals.
* gfortran.dg/default_format_denormal_2.f90: Likewise.
* gfortran.dg/large_real_kind_form_io_2.f90: Likewise.

Signed-off-by: Siddhesh Poyarekar 
---
Change from v4:
- PASS only for IEEE128 instead of XFAILing only for IBM128.

 gcc/testsuite/gfortran.dg/default_format_2.f90 |  2 +-
 .../gfortran.dg/default_format_denormal_2.f90  |  2 +-
 .../gfortran.dg/large_real_kind_form_io_2.f90  |  2 +-
 gcc/testsuite/lib/target-supports.exp  | 14 ++
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/default_format_2.f90 
b/gcc/testsuite/gfortran.dg/default_format_2.f90
index 5ad7b3a6429..c2100cb8710 100644
--- a/gcc/testsuite/gfortran.dg/default_format_2.f90
+++ b/gcc/testsuite/gfortran.dg/default_format_2.f90
@@ -1,4 +1,4 @@
-! { dg-do run { xfail powerpc*-apple-darwin* powerpc*-*-linux* } }
+! { dg-do run { xfail ppc_not_well_defined_denormals } }
 ! { dg-require-effective-target fortran_large_real }
 ! Test XFAILed on these platforms because the system's printf() lacks
 ! proper support for denormalized long doubles. See PR24685
diff --git a/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90 
b/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90
index e9ccf5e8f61..addfe0860d6 100644
--- a/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90
+++ b/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90
@@ -1,4 +1,4 @@
-! { dg-do run { xfail powerpc*-*-* } }
+! { dg-do run { xfail ppc_not_well_defined_denormals } }
 ! { dg-require-effective-target fortran_large_real }
 ! Test XFAILed on this platform because the system's printf() lacks
 ! proper support for denormalized long doubles. See PR24685
diff --git a/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90 
b/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90
index 34b8aec462c..eb0632b615a 100644
--- a/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90
+++ b/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90
@@ -1,4 +1,4 @@
-! { dg-do run { xfail powerpc*-apple-darwin* powerpc*-*-linux* } }
+! { dg-do run { xfail ppc_not_well_defined_denormals } }
 ! Test XFAILed on these platforms because the system's printf() lacks
 ! proper support for denormalized long doubles. See PR24685
 ! { dg-require-effective-target fortran_large_real }
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 9ad1e1965bb..62efbf557cb 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1854,6 +1854,20 @@ proc check_effective_target_fortran_integer_16 { } {
 }]
 }
 
+# Check if the PPC target defaults to the IBM long double format.
+
+proc check_effective_target_ppc_not_well_defined_denormals { } {
+if { ![istarget powerpc*-*-*] } {
+  return 0
+}
+
+return [check_no_compiler_messages ppc_not_well_defined_denormals assembly 
{
+  #if defined(__LONG_DOUBLE_IEEE128__)
+  #error "__LONG_DOUBLE_IEEE128__ defined, denormals are well defined"
+  #endif
+}]
+}
+
 # Return 1 if we can statically link libgfortran, 0 otherwise.
 #
 # When the target name changes, replace the cached result.
-- 
2.47.1



Re: [PATCH v5] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Peter Bergner
On 1/29/25 6:18 AM, Siddhesh Poyarekar wrote:
> Denormal behaviour is well defined for IEEE128 long doubles, so don't
> XFAIL some gfortran tests on ppc64le when configured with the IEEE128
> long double ABI.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/118127
>   * lib/target-supports.exp
>   (check_effective_target_ppc_not_well_defined_denormals): New
>   procedure.
>   * gfortran.dg/default_format_2.f90: xfail for
>   ppc_not_well_defined_denormals.
>   * gfortran.dg/default_format_denormal_2.f90: Likewise.
>   * gfortran.dg/large_real_kind_form_io_2.f90: Likewise.

LGTM, although I cannot approve it.

Peter




Re: [PATCH] libstdc++: Fix views::transform(move_only_fn{}) forwarding [PR118413]

2025-01-29 Thread Jonathan Wakely
On Tue, 28 Jan 2025 at 16:00, Patrick Palka  wrote:
>
> On Mon, 27 Jan 2025, Patrick Palka wrote:
>
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk/14?
> >
> > -- >8 --
> >
> > This case was incorrectly failing in C++23 mode even after P2492R2
> > because the perfect forwarding simplification mechanism assumed bound
> > arguments are copy-constructible which is no longer necessarily true
> > after that paper.
>
> On closer inspection this isn't quite right: the mechanism has for a
> while intended to check for copy-constructibility of the bound arguments
> (even before P2492R2), but it used the wrong predicate:
> is_trivially_copyable instead of is_trivially_copy_constructible.
> Here's a simpler fix that's more suitable for backporting.
>
> Disabling the mechanism outright in C++23 mode and relying on deducing
> "this" is still desirable but I reckon that's more stage 1 material.
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk/14?

OK for both, thanks.

>
> -- >8 --
>
> Subject: [PATCH] libstdc++: Fix views::transform(move_only_fn{}) forwarding
>  [PR118413]
>
> The range adaptor perfect forwarding simplification mechanism is currently
> only enabled for trivially copyable bound arguments, to prevent undesirable
> copies of complex objects.  But "trivially copyable" is the wrong property
> to check for here, since a move-only type with a trivial move constructor
> is considered trivially copyable, and after P2492R2 we can't assume that the
> bound arguments are copy-constructible.  This patch makes the mechanism
> more specifically check for trivial copy constructibility instead so that
> it's properly disabled for move-only bound arguments.
>
> PR libstdc++/118413
>
> libstdc++-v3/ChangeLog:
>
> * include/std/ranges (views::__adaptor::_Partial): Refine
> constraints on the "simple" partial specializations to require
> is_trivially_copy_constructible_v instead of
> is_trivially_copyable_v.
> * testsuite/std/ranges/adaptors/adjacent_transform/1.cc (test04):
> Test partial application of a move-only function as well.
> * testsuite/std/ranges/adaptors/transform.cc (test09): Likewise.
> ---
>  libstdc++-v3/include/std/ranges   | 4 ++--
>  .../testsuite/std/ranges/adaptors/adjacent_transform/1.cc | 1 +
>  libstdc++-v3/testsuite/std/ranges/adaptors/transform.cc   | 2 ++
>  3 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
> index ad69a94b21f..5c795a90fbc 100644
> --- a/libstdc++-v3/include/std/ranges
> +++ b/libstdc++-v3/include/std/ranges
> @@ -1145,7 +1145,7 @@ namespace views::__adaptor
>// which makes overload resolution failure diagnostics more concise.
>template
>  requires __adaptor_has_simple_extra_args<_Adaptor, _Args...>
> -  && (is_trivially_copyable_v<_Args> && ...)
> +  && (is_trivially_copy_constructible_v<_Args> && ...)
>  struct _Partial<_Adaptor, _Args...> : 
> _RangeAdaptorClosure<_Partial<_Adaptor, _Args...>>
>  {
>tuple<_Args...> _M_args;
> @@ -1176,7 +1176,7 @@ namespace views::__adaptor
>// where _Adaptor accepts a single extra argument.
>template
>  requires __adaptor_has_simple_extra_args<_Adaptor, _Arg>
> -  && is_trivially_copyable_v<_Arg>
> +  && is_trivially_copy_constructible_v<_Arg>
>  struct _Partial<_Adaptor, _Arg> : 
> _RangeAdaptorClosure<_Partial<_Adaptor, _Arg>>
>  {
>_Arg _M_arg;
> diff --git 
> a/libstdc++-v3/testsuite/std/ranges/adaptors/adjacent_transform/1.cc 
> b/libstdc++-v3/testsuite/std/ranges/adaptors/adjacent_transform/1.cc
> index a5791b3da70..772e4b3b6a0 100644
> --- a/libstdc++-v3/testsuite/std/ranges/adaptors/adjacent_transform/1.cc
> +++ b/libstdc++-v3/testsuite/std/ranges/adaptors/adjacent_transform/1.cc
> @@ -110,6 +110,7 @@ test04()
>};
>// P2494R2 Relaxing range adaptors to allow for move only types
>static_assert( requires { views::pairwise_transform(x, move_only{}); } );
> +  static_assert( requires { x | views::pairwise_transform(move_only{}); } );
>  }
>
>  int
> diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/transform.cc 
> b/libstdc++-v3/testsuite/std/ranges/adaptors/transform.cc
> index dfc91fb5e1f..934d2f65dcf 100644
> --- a/libstdc++-v3/testsuite/std/ranges/adaptors/transform.cc
> +++ b/libstdc++-v3/testsuite/std/ranges/adaptors/transform.cc
> @@ -191,8 +191,10 @@ test09()
>  #if __cpp_lib_ranges >= 202207L
>// P2494R2 Relaxing range adaptors to allow for move only types
>static_assert( requires { transform(x, move_only{}); } );
> +  static_assert( requires { x | transform(move_only{}); } ); // PR 
> libstdc++/118413
>  #else
>static_assert( ! requires { transform(x, move_only{}); } );
> +  static_assert( ! requires { x | transform(move_only{}); } );
>  #endif
>  }
>
> --
> 2.48.1.91.g5f8f7081f7
>
>
> > It'd be easy enough to fix the me

[PATCH] RX: Restrict displacement ranges in "Q" constraint

2025-01-29 Thread Yoshinori Sato
When using the "Q" constraint in the inline assembler, the displacement value
could exceed the range specified by the instruction.
To avoid this issue, a displacement range check is added to the "Q" constraint.

Signed-off-by: Yoshinori Sato 
---
 gcc/config/rx/constraints.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md
index c5188a3bc78..8f42ade56a8 100644
--- a/gcc/config/rx/constraints.md
+++ b/gcc/config/rx/constraints.md
@@ -80,7 +80,8 @@
(ior (match_code "reg" "0")
(and (match_code "plus" "0")
 (and (match_code "reg,subreg" "00")
- (match_code "const_int" "01")
+ (and (match_code "const_int" "01")
+  (match_test "rx_is_restricted_memory_address (XEXP 
(op, 0), mode)"))
 )
)
)
-- 
2.39.5



Re: [PATCH v5] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Siddhesh Poyarekar

On 2025-01-29 07:34, Jakub Jelinek wrote:

Denormal behaviour is well defined for IEEE128 long doubles, so don't
XFAIL some gfortran tests on ppc64le when configured with the IEEE128
long double ABI.


If long double is just IEEE754 double, then I think denormals
are equally well behaved, it is solely the case of IBM double double.
Of course, these 3 tests won't run anyway in that case because
fortran_large_real effective target will be false.


Yes, that's what I see in tests.


So I think you want an effective target which is solely about IBM double
double being the default long double.  No need to have ppc in the
name of the effective target.
Now, long_double_ibm128 effective target is not what you want to use
because that isn't what the long double is, but what long double can be if
one dg-add-options long_double_ibm128.
So, perhaps long_double_is_ibm128 effective target with
#ifndef __LONG_DOUBLE_IBM128__
#error ...
#endif
in the test?


So basically you're saying v4[1] with the name as long_double_is_ibm128 
instead of ppc_default_long_double_ibm?


Sid

[1] 
https://inbox.sourceware.org/gcc-patches/20250128225735.156840-1-siddh...@gotplt.org/T/#u


Re: [PATCH] c++: wrong-code with consteval constructor [PR117501]

2025-01-29 Thread Jason Merrill

On 1/27/25 6:19 PM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/14?

-- >8 --
We've had a wrong-code problem since r14-4140, due to which we
forget to initialize a variable.

In consteval39.C, we evaluate

 struct QQQ q;
   <>>
   (const char *) "" ) >;

into

 struct QQQ q;
   <;

and then the useless expr_stmt is dropped on the floor, so q isn't
initialized.  As pre-r14-4140, we need to handle constructors specially.


Hmm, why didn't the following code in eval_outermost make this a 
rejects-valid bug rather than wrong-code?


  /* If T is calling a constructor to initialize an object, reframe  
 it as an AGGR_INIT_EXPR to avoid trying to modify an object 
 from outside the constant evaluation, which will fail even if   
 the value is actually constant (is_constant_evaluated3.C).  */


Your change should share code with this block doing the same thing in 
cp_fold_r:



if (TREE_CODE (r) != CALL_EXPR)
  {
if (DECL_CONSTRUCTOR_P (callee))
  {
loc = EXPR_LOCATION (x);
tree a = CALL_EXPR_ARG (x, 0);
bool return_this = targetm.cxx.cdtor_returns_this ();
if (return_this)
  a = cp_save_expr (a);
tree s = build_fold_indirect_ref_loc (loc, a);
r = cp_build_init_expr (s, r);
if (return_this)
  r = build2_loc (loc, COMPOUND_EXPR, TREE_TYPE (x), r,
  fold_convert_loc (loc, TREE_TYPE (x), a));
  }
x = r;
break;
  }


Like there, we shouldn't need this for AGGR_INIT_EXPR, only CALL_EXPR.

Jason



[PATCH v2] get source line for diagnostic from preprocessed file [PR preprocessor/79106]

2025-01-29 Thread Bader, Lucas
Hi,

as discussed, I rebased and tested my patch against current master. 
Additionally, it now has the Signed-off-by tag.
Looking forward to your comments.

Best
Lucas



Within a compile cluster, only the preprocessed output of GCC is transferred
to remote nodes for compilation. When GCC produces advanced diagnostics
(with -fdiagnostics-show-caret), e.g. prints out the affected source
line and fixit hints, it attempts to read the source file again, even when
compiling a preprocessed file (-fpreprocessed). This leads to wrong
diagnostics when building with a compile cluster, or, more generally,
when changing or deleting the original source file.

This patch alters GCC to read from the preprocessed file instead by
calculating the corresponding source line. This behavior is consistent
with clang.

gcc/c-family/ChangeLog:

* c-opts.cc (c_common_handle_option): pass -fpreprocessed
option value to global diagnostic configuration

gcc/ChangeLog:

* diagnostic-show-locus.cc (layout::calculate_x_offset_display): read 
line from source or preprocessed
file based on -fpreprocessed value
(source_line::source_line): read line from source or preprocessed
file based on -fpreprocessed value
(layout_printer::print_line): read line from source or preprocessed
file based on -fpreprocessed value

* diagnostic.cc (diagnostic_context::initialize): initialize new members
* diagnostic.h: new members for reading
source lines from preprocessed files

* input.cc (file_cache::get_source_line_preprocessed): new function
to read source lines from preprocessed files
(test_reading_source_line_preprocessed): new test case
(input_cc_tests): execute new test case
* input.h (class file_cache): add new member function

* opts-global.cc (read_cmdline_options): pass input filename to global
diagnostic context

Signed-off-by: Lucas Bader 
---
 gcc/c-family/c-opts.cc   |   1 +
 gcc/diagnostic-show-locus.cc |  25 --
 gcc/diagnostic.cc|   2 +
 gcc/diagnostic.h |   6 ++
 gcc/input.cc | 169 +++
 gcc/input.h  |   2 +
 gcc/opts-global.cc   |   4 +
 7 files changed, 204 insertions(+), 5 deletions(-)

diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index 87b231861a6..105d8fa6eff 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const char *arg, 
HOST_WIDE_INT value,
 
 case OPT_fpreprocessed:
   cpp_opts->preprocessed = value;
+  global_dc->m_is_preprocessed = value;
   break;
 
 case OPT_fdebug_cpp:
diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc
index 898efe74acf..7ac183a126f 100644
--- a/gcc/diagnostic-show-locus.cc
+++ b/gcc/diagnostic-show-locus.cc
@@ -1786,8 +1786,13 @@ layout::calculate_x_offset_display ()
   return;
 }
 
-  const char_span line = m_file_cache.get_source_line (m_exploc.file,
-  m_exploc.line);
+  char_span line (NULL, 0);
+  if (global_dc->m_main_input_file_path != NULL && 
global_dc->m_is_preprocessed)
+line = m_file_cache.get_source_line_preprocessed (
+ m_exploc.file, global_dc->m_main_input_file_path, m_exploc.line);
+  else
+line = m_file_cache.get_source_line (m_exploc.file, m_exploc.line);
+
   if (!line)
 {
   /* Nothing to do, we couldn't find the source line.  */
@@ -2780,7 +2785,12 @@ public:
 
 source_line::source_line (file_cache &fc, const char *filename, int line)
 {
-  char_span span = fc.get_source_line (filename, line);
+  char_span span (NULL, 0);
+  if (global_dc->m_main_input_file_path != NULL && 
global_dc->m_is_preprocessed)
+span = fc.get_source_line_preprocessed (
+   filename, global_dc->m_main_input_file_path, line);
+  else
+span = fc.get_source_line (filename, line);
   chars = span.get_buffer ();
   width = span.length ();
 }
@@ -3132,8 +3142,13 @@ layout_printer::show_ruler (int max_column)
 void
 layout_printer::print_line (linenum_type row)
 {
-  char_span line
-= m_layout.m_file_cache.get_source_line (m_layout.m_exploc.file, row);
+  char_span line (NULL, 0);
+  if (global_dc->m_main_input_file_path != NULL && 
global_dc->m_is_preprocessed)
+line = m_layout.m_file_cache.get_source_line_preprocessed (
+   m_layout.m_exploc.file, global_dc->m_main_input_file_path, row);
+  else
+line = m_layout.m_file_cache.get_source_line (m_layout.m_exploc.file, row);
+
   if (!line)
 return;
 
diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index c2f6714c24a..c8a93d407ad 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -290,6 +290,8 @@ diagnostic_context::initialize (int n_opts)
   m_diagrams.m_theme = nullptr;
   m_original_argv = nullptr;
   m_diagnostic_buffer = nullptr;
+  m_main_input_file_path = nullptr;

Re: [PATCH v5] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Jakub Jelinek
On Wed, Jan 29, 2025 at 08:01:17AM -0500, Siddhesh Poyarekar wrote:
> On 2025-01-29 07:34, Jakub Jelinek wrote:
> > > Denormal behaviour is well defined for IEEE128 long doubles, so don't
> > > XFAIL some gfortran tests on ppc64le when configured with the IEEE128
> > > long double ABI.
> > 
> > If long double is just IEEE754 double, then I think denormals
> > are equally well behaved, it is solely the case of IBM double double.
> > Of course, these 3 tests won't run anyway in that case because
> > fortran_large_real effective target will be false.
> 
> Yes, that's what I see in tests.
> 
> > So I think you want an effective target which is solely about IBM double
> > double being the default long double.  No need to have ppc in the
> > name of the effective target.
> > Now, long_double_ibm128 effective target is not what you want to use
> > because that isn't what the long double is, but what long double can be if
> > one dg-add-options long_double_ibm128.
> > So, perhaps long_double_is_ibm128 effective target with
> > #ifndef __LONG_DOUBLE_IBM128__
> > #error ...
> > #endif
> > in the test?
> 
> So basically you're saying v4[1] with the name as long_double_is_ibm128
> instead of ppc_default_long_double_ibm?

And some comment adjustment.
# Check if the PPC target defaults to the IBM long double format.
changed to maybe
# Check if long double on the target defaults to the IBM extended format.

Inside of it it surely can just filter out non-powerpc targets like done in
the patch because we know no other targets uses that format for long double
(at least right now).  And hopefully no target does that in the future ;)

Jakub



Re: [PATCH v6] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Siddhesh Poyarekar

On 2025-01-29 08:24, Jakub Jelinek wrote:

On Wed, Jan 29, 2025 at 08:19:38AM -0500, Siddhesh Poyarekar wrote:

Denormal behaviour is well defined for IEEE128 long doubles, so
XFAIL some gfortran tests only for targets with the IBM128 long double
ABI.

gcc/testsuite/ChangeLog:

PR testsuite/118127
* lib/target-supports.exp
(check_effective_target_long_double_is_ibm128): New
procedure.
* gfortran.dg/default_format_2.f90: xfail for
long_double_is_ibm128.
* gfortran.dg/default_format_denormal_2.f90: Likewise.
* gfortran.dg/large_real_kind_form_io_2.f90: Likewise.

Signed-off-by: Siddhesh Poyarekar 


Ok, thanks.


Thanks, may I backport this to gcc14 as well?

Sid


Re: [PATCH v6] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Jakub Jelinek
On Wed, Jan 29, 2025 at 10:25:39AM -0500, Siddhesh Poyarekar wrote:
> > > gcc/testsuite/ChangeLog:
> > > 
> > >   PR testsuite/118127
> > >   * lib/target-supports.exp
> > >   (check_effective_target_long_double_is_ibm128): New
> > >   procedure.
> > >   * gfortran.dg/default_format_2.f90: xfail for
> > >   long_double_is_ibm128.
> > >   * gfortran.dg/default_format_denormal_2.f90: Likewise.
> > >   * gfortran.dg/large_real_kind_form_io_2.f90: Likewise.
> > > 
> > > Signed-off-by: Siddhesh Poyarekar 
> > 
> > Ok, thanks.
> 
> Thanks, may I backport this to gcc14 as well?

Yes.

Jakub



Re: [PATCH] middle-end/118692 - ICE with out-of-bound ref expansion

2025-01-29 Thread Jakub Jelinek
On Wed, Jan 29, 2025 at 03:46:46PM +0100, Richard Biener wrote:
> The following guards the BIT_FIELD_REF expansion fallback for
> MEM_REFs of entities expanded to register (or constant) further,
> avoiding large out-of-bound offsets by, when the access does not
> overlap the base object, expanding the offset as if it were zero.
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> 
> I didn't feel lucky enough trying to simply return const0_rtx for
> fully out-of-bound accesses.
> 
> OK?
> 
> Thanks,
> Richard.
> 
>   PR middle-end/118692
>   * expr.cc (expand_expr_real_1): When expanding a MEM_REF
>   as BIT_FIELD_REF avoid large offsets for accesses not
>   overlapping the base object.
> 
>   * gcc.dg/pr118692.c: New testcase.
> ---
>  gcc/expr.cc |  8 
>  gcc/testsuite/gcc.dg/pr118692.c | 10 ++
>  2 files changed, 18 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr118692.c
> 
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 7f3149b85ee..10467f82c0d 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -11806,6 +11806,14 @@ expand_expr_real_1 (tree exp, rtx target, 
> machine_mode tmode,
> set_mem_size (temp, int_size_in_bytes (type));
>   return temp;
> }
> + /* When the access is fully outside of the underlying object
> +expand the offset as zero.  This avoids out-of-bound
> +BIT_FIELD_REFs and generates smaller code for these cases
> +with UB.  */
> + type_size = tree_to_poly_uint64 (TYPE_SIZE_UNIT (type));
> + if (!ranges_maybe_overlap_p (offset, type_size, 0,
> +  GET_MODE_SIZE (DECL_MODE (base
> +   offset = 0;
>   exp = build3 (BIT_FIELD_REF, type, base, TYPE_SIZE (type),
> bitsize_int (offset * BITS_PER_UNIT));
>   REF_REVERSE_STORAGE_ORDER (exp) = reverse;
> diff --git a/gcc/testsuite/gcc.dg/pr118692.c b/gcc/testsuite/gcc.dg/pr118692.c
> new file mode 100644
> index 000..49cff35b8b7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr118692.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +_Complex cf;

Please use _Complex double here.
> +
> +void
> +foo(char c)
> +{
> +  cf += *(_Complex *)__builtin_memcpy(8143523003042804629 + &c, 0, 0);

Ditto here.
I think it might be a good idea to use LL suffix after the constant too.

Otherwise LGTM.

Jakub



Welcome to the Chinese WOS, Scopus Journal (Multidisciplinary)

2025-01-29 Thread Venian Lin
*The Journal of Hunan University Natural Sciences *is the leading Chinese
peer-reviewed journal that publishes articles in all areas of natural
sciences. The Journal is meant to serve as a means of communication and
discussion of important issues related to science and scientific
activities. The Journal publishes only original articles in English and
Chinese which have international importance. In addition to full-length
research papers, the Journal publishes review articles. Papers can be
focused on fundamental research leading to new methods, or adaptation of
existing methods for new applications.
Currently, The Journal of Hunan University (Natural Sciences) contains such
columns as biotechnologies and chemical engineering, mathematics, computer
science, engineering, mechanical engineering, electrical engineering, civil
engineering and materials science and management, and so on.

  


*E-mail:* editorial-off...@jonuns..com
*tel:* +86-731-88-223-132
Address: 85, Lushan Road (S), Yuelu District, Changsha, Hunan Province, Zip
Code: 410082 (Department of the Journal)


[PATCH] middle-end/118692 - ICE with out-of-bound ref expansion

2025-01-29 Thread Richard Biener
The following guards the BIT_FIELD_REF expansion fallback for
MEM_REFs of entities expanded to register (or constant) further,
avoiding large out-of-bound offsets by, when the access does not
overlap the base object, expanding the offset as if it were zero.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I didn't feel lucky enough trying to simply return const0_rtx for
fully out-of-bound accesses.

OK?

Thanks,
Richard.

PR middle-end/118692
* expr.cc (expand_expr_real_1): When expanding a MEM_REF
as BIT_FIELD_REF avoid large offsets for accesses not
overlapping the base object.

* gcc.dg/pr118692.c: New testcase.
---
 gcc/expr.cc |  8 
 gcc/testsuite/gcc.dg/pr118692.c | 10 ++
 2 files changed, 18 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr118692.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 7f3149b85ee..10467f82c0d 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -11806,6 +11806,14 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
tmode,
  set_mem_size (temp, int_size_in_bytes (type));
return temp;
  }
+   /* When the access is fully outside of the underlying object
+  expand the offset as zero.  This avoids out-of-bound
+  BIT_FIELD_REFs and generates smaller code for these cases
+  with UB.  */
+   type_size = tree_to_poly_uint64 (TYPE_SIZE_UNIT (type));
+   if (!ranges_maybe_overlap_p (offset, type_size, 0,
+GET_MODE_SIZE (DECL_MODE (base
+ offset = 0;
exp = build3 (BIT_FIELD_REF, type, base, TYPE_SIZE (type),
  bitsize_int (offset * BITS_PER_UNIT));
REF_REVERSE_STORAGE_ORDER (exp) = reverse;
diff --git a/gcc/testsuite/gcc.dg/pr118692.c b/gcc/testsuite/gcc.dg/pr118692.c
new file mode 100644
index 000..49cff35b8b7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr118692.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+_Complex cf;
+
+void
+foo(char c)
+{
+  cf += *(_Complex *)__builtin_memcpy(8143523003042804629 + &c, 0, 0);
+}
-- 
2.43.0


[PATCH] middle-end/118684 - fix fallout of wrong stack local alignment fix

2025-01-29 Thread Richard Biener
When we expand BIT_FIELD_REF  we can end up creating
a stack local, running into the fix.  But get_object_alignment
will return 8 for any SSA_NAME because that's not an "object" we
handle.  Deal with handled components on registers by singling out
SSA_NAME bases, using their type alignment instead of
get_object_alignment (I considered "robustifying" get_object_alignment,
but decided not to at this point).

This fixes an ICE on gcc.dg/pr41123.c on arm as reported by the CI.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR middle-end/118684
* expr.cc (expand_expr_real_1): When creating a stack local
during expansion of a handled component, when the base is
a SSA_NAME use its type alignment and avoid calling
get_object_alignment.

* gcc.dg/pr118684.c: Require automatic_stack_alignment.
---
 gcc/expr.cc | 4 +++-
 gcc/testsuite/gcc.dg/pr118684.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 95f41f69fcf..7f3149b85ee 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -12152,7 +12152,9 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
tmode,
if (!poly_int_tree_p (TYPE_SIZE_UNIT (TREE_TYPE (tem)), &size))
  size = max_int_size_in_bytes (TREE_TYPE (tem));
memloc = assign_stack_local (TYPE_MODE (TREE_TYPE (tem)), size,
-get_object_alignment (tem));
+TREE_CODE (tem) == SSA_NAME
+? TYPE_ALIGN (TREE_TYPE (tem))
+: get_object_alignment (tem));
emit_move_insn (memloc, op0);
op0 = memloc;
clear_mem_expr = true;
diff --git a/gcc/testsuite/gcc.dg/pr118684.c b/gcc/testsuite/gcc.dg/pr118684.c
index 08cc24dc061..28fd76e366d 100644
--- a/gcc/testsuite/gcc.dg/pr118684.c
+++ b/gcc/testsuite/gcc.dg/pr118684.c
@@ -1,4 +1,4 @@
-/* { dg-do run } */
+/* { dg-do run { target automatic_stack_alignment } } */
 /* { dg-options "-O2" } */
 
 typedef int v4si __attribute__((vector_size(16)));
-- 
2.43.0


RE: [PATCH v3 1/4] RISC-V: Refactor SAT_* operand rtx extend to reg help func [NFC]

2025-01-29 Thread Li, Pan2
> I think you meant "the value extended into" rather than "the extended to".
> OK with that fix.

Thanks Jeff, will commit the series with that fix.

Pan

-Original Message-
From: Jeff Law  
Sent: Tuesday, January 28, 2025 11:18 PM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; rdapp@gmail.com
Subject: Re: [PATCH v3 1/4] RISC-V: Refactor SAT_* operand rtx extend to reg 
help func [NFC]



On 1/27/25 4:07 AM, pan2...@intel.com wrote:
> From: Pan Li 
> 
> This patch would like to refactor the helper function of the SAT_*
> scalar.  The helper function will convert the define_pattern ops
> to the xmode reg for the underlying code-gen.  This patch add
> new parameter for ZERO_EXTEND or SIGN_EXTEND if the input is const_int
> or the mode is non-Xmode.
> 
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> 
> gcc/ChangeLog:
> 
>   * config/riscv/riscv.cc (riscv_gen_zero_extend_rtx): Rename from ...
>   (riscv_extend_to_xmode_reg): Rename to and add rtx_code for
>   zero/sign extend if non-Xmode.
>   (riscv_expand_usadd): Leverage the renamed function with ZERO_EXTEND.
>   (riscv_expand_ussub): Ditto.
> 
> Signed-off-by: Pan Li 
> ---
>   gcc/config/riscv/riscv.cc | 77 ---
>   1 file changed, 48 insertions(+), 29 deletions(-)
> 
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index dd50fe4eddf..34f0a888c5c 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -12644,14 +12644,26 @@ riscv_get_raw_result_mode (int regno)
>   /* Generate a REG rtx of Xmode from the given rtx and mode.
>  The rtx x can be REG (QI/HI/SI/DI) or const_int.
>  The machine_mode mode is the original mode from define pattern.
> +   The rtx_code can be ZERO_EXTEND or SIGN_EXTEND.
>   
> -   If rtx is REG and Xmode, the RTX x will be returned directly.
> +   If rtx is REG:
>   
> -   If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be
> -   returned.
> +   1.  If rtx Xmode, the RTX x will be returned directly.
> +   2.  If rtx non-Xmode, the extended to new REG of Xmode will be returned.
I think you meant "the value extended into" rather than "the extended to".

OK with that fix.

Jeff


[PATCH] tree-optimization/114052 - consider infinite sub-loops when lowering iter bound

2025-01-29 Thread Richard Biener
When we walk stmts to find always executed stmts with UB in the last
iteration to be able to reduce the iteration count by one we fail
to consider infinite subloops in the last iteration that would make
such stmt not execute.  The following adds this and also changes
the DFS walk (which could end up stop iterating prematurely?) with
a walk over the DOM order of the loop body.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

PR tree-optimization/114052
* tree-ssa-loop-niter.cc (estimate_numbers_of_iterations):
Collect the loop body in DOM order.
(maybe_lower_iteration_bound): Walk the loop body in DOM
order.  Check for infinite subloops we might not exit.

* gcc.dg/pr114052-1.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr114052-1.c | 41 +
 gcc/tree-ssa-loop-niter.cc| 51 ---
 2 files changed, 75 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr114052-1.c

diff --git a/gcc/testsuite/gcc.dg/pr114052-1.c 
b/gcc/testsuite/gcc.dg/pr114052-1.c
new file mode 100644
index 000..98e93bf670d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114052-1.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-require-effective-target signal } */
+/* { dg-require-effective-target alarm } */
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+#include 
+#include 
+
+volatile int y;
+void __attribute__((noipa)) put(int x)
+{
+  if (y)
+__builtin_printf ("%i\n", x);
+}
+
+void __attribute__((noipa)) f(void)
+{
+  int counter = 0;
+  while (1) {
+  if (counter >= 2) continue;
+  put (counter++);
+  }
+}
+
+void do_exit (int i)
+{
+  exit (0);
+}
+
+int main()
+{
+  struct sigaction s;
+  sigemptyset (&s.sa_mask);
+  s.sa_handler = do_exit;
+  s.sa_flags = 0;
+  sigaction (SIGALRM, &s, NULL);
+  alarm (1);
+  f();
+}
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index de8d5ae6233..4264d9acb44 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -4686,13 +4686,11 @@ discover_iteration_bound_by_body_walk (class loop *loop)
count by 1.  */
 
 static void
-maybe_lower_iteration_bound (class loop *loop)
+maybe_lower_iteration_bound (class loop *loop, basic_block *bbs)
 {
   hash_set *not_executed_last_iteration = NULL;
   class nb_iter_bound *elt;
   bool found_exit = false;
-  auto_vec queue;
-  bitmap visited;
 
   /* Collect all statements with interesting (i.e. lower than
  nb_iterations_upper_bound) bound on them.
@@ -4714,22 +4712,45 @@ maybe_lower_iteration_bound (class loop *loop)
   if (!not_executed_last_iteration)
 return;
 
-  /* Start DFS walk in the loop header and see if we can reach the
+  /* Walk the loop body collected in DOM order and see if we can reach the
  loop latch or any of the exits (including statements with side
  effects that may terminate the loop otherwise) without visiting
  any of the statements known to have undefined effect on the last
  iteration.  */
-  queue.safe_push (loop->header);
-  visited = BITMAP_ALLOC (NULL);
-  bitmap_set_bit (visited, loop->header->index);
   found_exit = false;
-
-  do
+  class loop *inn_loop = loop;
+  for (unsigned i = 0; i < loop->num_nodes && !found_exit; ++i)
 {
-  basic_block bb = queue.pop ();
+  basic_block bb = bbs[i];
   gimple_stmt_iterator gsi;
   bool stmt_found = false;
 
+  if (!flow_bb_inside_loop_p (inn_loop, bb))
+   {
+ /* When we are leaving a possibly infinite inner loop
+we have to stop processing.  */
+ if (!finite_loop_p (inn_loop))
+   {
+ found_exit = true;
+ break;
+   }
+ /* If the loop was finite we can continue with processing
+the loop we exited to.  */
+ inn_loop = bb->loop_father;
+   }
+
+  if (bb->loop_father->header == bb)
+   /* Record that we enter into a subloop since it might not
+  be finite.  */
+   inn_loop = bb->loop_father;
+
+  /* An irreducible region might be infinite.  */
+  if (bb->flags & BB_IRREDUCIBLE_LOOP)
+   {
+ found_exit = true;
+ break;
+   }
+
   /* Loop for possible exits and statements bounding the execution.  */
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
@@ -4762,12 +4783,9 @@ maybe_lower_iteration_bound (class loop *loop)
  found_exit = true;
  break;
}
- if (bitmap_set_bit (visited, e->dest->index))
-   queue.safe_push (e->dest);
}
}
 }
-  while (queue.length () && !found_exit);
 
   /* If every path through the loop reach bounding statement before exit,
  then we know the last iteration of the loop will have undefined effect
@@ -4783,7 +4801,6 @@ maybe_lower_iteration_bound (class loop *loop)
  false, true);
 }
 
-  

[PATCH] tree-optimization/114052 - consider infinite sub-loops when lowering iter bound

2025-01-29 Thread Richard Biener
When we walk stmts to find always executed stmts with UB in the last
iteration to be able to reduce the iteration count by one we fail
to consider infinite subloops in the last iteration that would make
such stmt not execute.  The following adds this.

Third try - bootstrapped and tested on x86_64-unknown-linux-gnu.  I'm
reasonably happy with this and will push tomorrow unless I hear
otherwise.

Richard.

PR tree-optimization/114052
* tree-ssa-loop-niter.cc (maybe_lower_iteration_bound): Check
for infinite subloops we might not exit.

* gcc.dg/pr114052-1.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr114052-1.c | 41 +++
 gcc/tree-ssa-loop-niter.cc|  9 ++-
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr114052-1.c

diff --git a/gcc/testsuite/gcc.dg/pr114052-1.c 
b/gcc/testsuite/gcc.dg/pr114052-1.c
new file mode 100644
index 000..98e93bf670d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114052-1.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-require-effective-target signal } */
+/* { dg-require-effective-target alarm } */
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+#include 
+#include 
+
+volatile int y;
+void __attribute__((noipa)) put(int x)
+{
+  if (y)
+__builtin_printf ("%i\n", x);
+}
+
+void __attribute__((noipa)) f(void)
+{
+  int counter = 0;
+  while (1) {
+  if (counter >= 2) continue;
+  put (counter++);
+  }
+}
+
+void do_exit (int i)
+{
+  exit (0);
+}
+
+int main()
+{
+  struct sigaction s;
+  sigemptyset (&s.sa_mask);
+  s.sa_handler = do_exit;
+  s.sa_flags = 0;
+  sigaction (SIGALRM, &s, NULL);
+  alarm (1);
+  f();
+}
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index de8d5ae6233..7743970bf3d 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -4757,7 +4757,14 @@ maybe_lower_iteration_bound (class loop *loop)
   FOR_EACH_EDGE (e, ei, bb->succs)
{
  if (loop_exit_edge_p (loop, e)
- || e == loop_latch_edge (loop))
+ || e == loop_latch_edge (loop)
+ /* When exiting an inner loop, verify it is finite.  */
+ || (!flow_bb_inside_loop_p (bb->loop_father, e->dest)
+ && !finite_loop_p (bb->loop_father))
+ /* When we enter an irreducible region and the entry
+does not contain a bounding stmt assume it might be
+infinite.  */
+ || (bb->flags & BB_IRREDUCIBLE_LOOP))
{
  found_exit = true;
  break;
-- 
2.43.0


Re: [PATCH v2] get source line for diagnostic from preprocessed file [PR preprocessor/79106]

2025-01-29 Thread David Malcolm
On Wed, 2025-01-29 at 13:05 +, Bader, Lucas wrote:
> Hi,
> 
> as discussed, I rebased and tested my patch against current master.
> Additionally, it now has the Signed-off-by tag.
> Looking forward to your comments.
> 
> Best
> Lucas

Thanks for the updated patch.

Various notes inline throughout below...

> 
> 
> 
> Within a compile cluster, only the preprocessed output of GCC is
> transferred
> to remote nodes for compilation. When GCC produces advanced
> diagnostics
> (with -fdiagnostics-show-caret), e.g. prints out the affected source
> line and fixit hints, it attempts to read the source file again, even
> when
> compiling a preprocessed file (-fpreprocessed). This leads to wrong
> diagnostics when building with a compile cluster, or, more generally,
> when changing or deleting the original source file.
> 
> This patch alters GCC to read from the preprocessed file instead by
> calculating the corresponding source line. This behavior is
> consistent
> with clang.
> 
> gcc/c-family/ChangeLog:
> 
>   * c-opts.cc (c_common_handle_option): pass -fpreprocessed
>   option value to global diagnostic configuration
> 
> gcc/ChangeLog:
> 
>   * diagnostic-show-locus.cc
> (layout::calculate_x_offset_display): read line from source or
> preprocessed
>   file based on -fpreprocessed value
>   (source_line::source_line): read line from source or
> preprocessed
>   file based on -fpreprocessed value
>   (layout_printer::print_line): read line from source or
> preprocessed
>   file based on -fpreprocessed value
> 
>   * diagnostic.cc (diagnostic_context::initialize): initialize
> new members
>   * diagnostic.h: new members for reading
>   source lines from preprocessed files
> 
>   * input.cc (file_cache::get_source_line_preprocessed): new
> function
>   to read source lines from preprocessed files
>   (test_reading_source_line_preprocessed): new test case
>   (input_cc_tests): execute new test case
>   * input.h (class file_cache): add new member function
> 
>   * opts-global.cc (read_cmdline_options): pass input filename
> to global
>   diagnostic context


Some minor ChangeLog nits:
* you should add "PR preprocessor/79106" (without quotes) to the start
of each ChangeLog entry; have a look at the existing ChangeLog entries
to see how we do it
* the entries should read as sentences, so please capitalize the first
letter, and add periods
* please wrap at 74 columns.

You can check the patch using
  ./contrib/gcc-changelog/git_check_commit.py
(which is run as a server-side pre-commit hook, I believe)


> 
> Signed-off-by: Lucas Bader 
> ---
>  gcc/c-family/c-opts.cc   |   1 +
>  gcc/diagnostic-show-locus.cc |  25 --
>  gcc/diagnostic.cc    |   2 +
>  gcc/diagnostic.h |   6 ++
>  gcc/input.cc | 169
> +++
>  gcc/input.h  |   2 +
>  gcc/opts-global.cc   |   4 +
>  7 files changed, 204 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
> index 87b231861a6..105d8fa6eff 100644
> --- a/gcc/c-family/c-opts.cc
> +++ b/gcc/c-family/c-opts.cc
> @@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const char
> *arg, HOST_WIDE_INT value,
>  
>  case OPT_fpreprocessed:
>    cpp_opts->preprocessed = value;
> +  global_dc->m_is_preprocessed = value;
>    break;
>  
>  case OPT_fdebug_cpp:
> diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-
> locus.cc
> index 898efe74acf..7ac183a126f 100644
> --- a/gcc/diagnostic-show-locus.cc
> +++ b/gcc/diagnostic-show-locus.cc
> @@ -1786,8 +1786,13 @@ layout::calculate_x_offset_display ()
>    return;
>  }
>  
> -  const char_span line = m_file_cache.get_source_line
> (m_exploc.file,
> -   
> m_exploc.line);
> +  char_span line (NULL, 0);
> +  if (global_dc->m_main_input_file_path != NULL && global_dc-
> >m_is_preprocessed)
> +    line = m_file_cache.get_source_line_preprocessed (
> +   m_exploc.file, global_dc->m_main_input_file_path,
> m_exploc.line);
> +  else
> +    line = m_file_cache.get_source_line (m_exploc.file,
> m_exploc.line);
> +

Here and in two other places below in this file the patch adds logic
of the form:

  if (global_dc->main_input_file_path != NULL && global_dc->is_preprocessed)
line = location_get_source_line_preprocessed (some file,
  
global_dc->main_input_file_path,
  some line);
  else
line = location_get_source_line (some file, some line);

Please introduce a subroutine for this to avoid the repetition,
something like:

static char_span
get_line_maybe_preprocessed (const char *filename, int line_num);

I wonder if this logic should be moved into the file_cache itself, but
let's leave that for now.


>    if (!line)
>  {
>    /* Nothing to do, 

Re: [PATCH] asf: Enable pass at O2 or higher

2025-01-29 Thread Richard Biener
On Wed, 29 Jan 2025, Christoph Müllner wrote:

> The avoid-store-forwarding pass is disabled by default and therefore
> in the risk of bit-rotting.  This patch addresses this by enabling
> the pass at O2 or higher.
> 
> The assembly patterns in `bitfield-bitint-abi-align16.c` and
> `bitfield-bitint-abi-align8.c` have been updated to account for
> the ASF transformations.
> 
> This was bootstrapped on x86-64 and AArch64 and showed no
> regressions in the test suite (--enable-checking=yes,extra and
> all languages).

OK for GCC 16 stage1.

Richard.

> gcc/ChangeLog:
> 
>   * doc/invoke.texi: Document asf as an O2 enabled option.
>   * opts.cc: Enable asf at O2.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/bitfield-bitint-abi-align16.c:
>   Modify testcases to account for the asf transformations.
>   * gcc.target/aarch64/bitfield-bitint-abi-align8.c: Likewise.
>   * gcc.target/aarch64/avoid-store-forwarding-6.c: New test.
> 
> Co-developed-by: Konstantinos Eleftheriou 
> Signed-off-by: Christoph Müllner 
> ---
>  gcc/doc/invoke.texi   |  3 +-
>  gcc/opts.cc   |  1 +
>  .../aarch64/avoid-store-forwarding-6.c| 29 +++
>  .../aarch64/bitfield-bitint-abi-align16.c | 28 ++
>  .../aarch64/bitfield-bitint-abi-align8.c  | 28 ++
>  5 files changed, 64 insertions(+), 25 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
> 
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e54a287..52d0489ab24 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -12716,6 +12716,7 @@ also turns on the following optimization flags:
>  @c Please keep the following list alphabetized!
>  @gccoptlist{-falign-functions  -falign-jumps
>  -falign-labels  -falign-loops
> +-favoid-store-forwarding
>  -fcaller-saves
>  -fcode-hoisting
>  -fcrossjumping
> @@ -12876,7 +12877,7 @@ Many CPUs will stall for many cycles when a load 
> partially depends on previous
>  smaller stores.  This pass tries to detect such cases and avoid the penalty 
> by
>  changing the order of the load and store and then fixing up the loaded value.
>  
> -Disabled by default.
> +Enabled by default at @option{-O2} and higher.
>  
>  @opindex ffp-contract
>  @item -ffp-contract=@var{style}
> diff --git a/gcc/opts.cc b/gcc/opts.cc
> index 23900c7b1c0..9914d20ad47 100644
> --- a/gcc/opts.cc
> +++ b/gcc/opts.cc
> @@ -627,6 +627,7 @@ static const struct default_options 
> default_options_table[] =
>  { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 },
>  
>  /* -O2 and -Os optimizations.  */
> +{ OPT_LEVELS_2_PLUS, OPT_favoid_store_forwarding, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_fcaller_saves, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_fcode_hoisting, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_fcrossjumping, NULL, 1 },
> diff --git a/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c 
> b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
> new file mode 100644
> index 000..320fa5e101e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +/* Same as avoid-store-forwarding-1.c but without -favoid-store-forwarding.  
> */
> +
> +typedef union {
> +char arr_8[8];
> +long long_value;
> +} DataUnion;
> +
> +long ssll_1 (DataUnion *data, char x)
> +{
> +  data->arr_8[0] = x;
> +  return data->long_value;
> +}
> +
> +long ssll_2 (DataUnion *data, char x)
> +{
> +  data->arr_8[1] = x;
> +  return data->long_value;
> +}
> +
> +long ssll_3 (DataUnion *data, char x)
> +{
> +  data->arr_8[7] = x;
> +  return data->long_value;
> +}
> +
> +/* { dg-final { scan-assembler-times {ldr\tx[0-9]+, 
> \[x[0-9]+\]\n\tstrb\tw[0-9]+, \[x[0-9]+(, \d+)?\]\n\tbfi\tx[0-9]+, x[0-9]+, 
> \d+, \d+} 3 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c 
> b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> index c29a230a771..b4501d81c45 100644
> --- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> @@ -91,10 +91,11 @@
>  **   mov (w[0-9]+), 0
>  **   bfi \3, w\2, 0, 1
>  **   and x3, x\2, 9223372036854775807
> -**   mov x2, 0
> +**   mov (x[0-9]+), 0
> +**   bfi \4, (x[0-9]+), 0, 8
>  **   str xzr, \[sp\]
>  **   strb\3, \[sp\]
> -**   ldr x1, \[sp\]
> +**   mov \5, 0
>  **   add sp, sp, 16
>  **   b   fp
>  */
> @@ -183,19 +184,21 @@
>  **   sxtw(x[0-9]+), w1
>  **   mov x0, \2
>  **   and x7, \2, 9223372036854775807
> +**   mov (x[0-9]+), 0
>  **   mov (w[0-9]+), 0
> -**   bfi \3, w\1, 0, 1
> +**   bfi \4, w\1, 0, 1
> +**   mov (x[0-9]+), \3
> +**   bfi \5, (x[0-9]+), 0, 8
> +**   stp x7, \5,

Re: [PATCH v8] Target-independent store forwarding avoidance.

2025-01-29 Thread Christoph Müllner
On Thu, Nov 28, 2024 at 8:37 AM Richard Biener 
wrote:

> On Mon, Nov 25, 2024 at 3:28 AM Philipp Tomsich
>  wrote:
> >
> > Pushed to master with the following fixups:
> >   - new timevar added
> >   - nits addressed
> >   - whitespace fixes
>
> The pass seems to be disabled by default everywhere - I thought we
> decided to avoid adding
> passes like this because they tend to bit-rot quickly and become a
> maintenance burden.
>
> What was the plan here?
>

A patch for enabling ASF for O2 or higher is on the list:
  https://gcc.gnu.org/pipermail/gcc-patches/2025-January/674692.html

BR
Christoph


>
> Richard.
>
> > Philipp.
> >
> >
> > On Mon, 25 Nov 2024 at 03:30, Jeff Law  wrote:
> > >
> > >
> > >
> > > On 11/9/24 2:48 AM, Konstantinos Eleftheriou wrote:
> > > > From: kelefth 
> > > >
> > > > This pass detects cases of expensive store forwarding and tries to
> avoid them
> > > > by reordering the stores and using suitable bit insertion sequences.
> > > > For example it can transform this:
> > > >
> > > >   strbw2, [x1, 1]
> > > >   ldr x0, [x1]  # Expensive store forwarding to larger
> load.
> > > >
> > > > To:
> > > >
> > > >   ldr x0, [x1]
> > > >   strbw2, [x1]
> > > >   bfi x0, x2, 0, 8
> > > >
> > > > Assembly like this can appear with bitfields or type punning /
> unions.
> > > > On stress-ng when running the cpu-union microbenchmark the following
> speedups
> > > > have been observed.
> > > >
> > > >Neoverse-N1:  +29.4%
> > > >Intel Coffeelake: +13.1%
> > > >AMD 5950X:+17.5%
> > > >
> > > > The transformation is rejected on cases that would cause
> store_bit_field
> > > > to generate subreg expressions on different register classes.
> > > > Files avoid-store-forwarding-4.c and avoid-store-forwarding-5.c
> contain
> > > > such cases and have been marked as XFAIL.
> > > >
> > > > There is a special handling for machines with BITS_BIG_ENDIAN !=
> > > > BYTES_BIG_ENDIAN. The need for this came up from an issue in H8
> > > > architecture, which uses big-endian ordering, but BITS_BIG_ENDIAN
> > > > is false. In that case, the START parameter of store_bit_field
> > > > needs to be calculated from the end of the destination register.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >   * Makefile.in (OBJS): Add avoid-store-forwarding.o.
> > > >   * common.opt (favoid-store-forwarding): New option.
> > > >   * common.opt.urls: Regenerate.
> > > >   * doc/invoke.texi: New param store-forwarding-max-distance.
> > > >   * doc/passes.texi: Document new pass.
> > > >   * doc/tm.texi: Regenerate.
> > > >   * doc/tm.texi.in: Document new pass.
> > > >   * params.opt (store-forwarding-max-distance): New param.
> > > >   * passes.def: Add pass_rtl_avoid_store_forwarding before
> > > >   pass_early_remat.
> > > >   * target.def (avoid_store_forwarding_p): New DEFHOOK.
> > > >   * target.h (struct store_fwd_info): Declare.
> > > >   * targhooks.cc (default_avoid_store_forwarding_p): New
> function.
> > > >   * targhooks.h (default_avoid_store_forwarding_p): Declare.
> > > >   * tree-pass.h (make_pass_rtl_avoid_store_forwarding): Declare.
> > > >   * avoid-store-forwarding.cc: New file.
> > > >   * avoid-store-forwarding.h: New file.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >   * gcc.target/aarch64/avoid-store-forwarding-1.c: New test.
> > > >   * gcc.target/aarch64/avoid-store-forwarding-2.c: New test.
> > > >   * gcc.target/aarch64/avoid-store-forwarding-3.c: New test.
> > > >   * gcc.target/aarch64/avoid-store-forwarding-4.c: New test.
> > > >   * gcc.target/aarch64/avoid-store-forwarding-5.c: New test.
> > > >   * gcc.target/x86_64/abi/callabi/avoid-store-forwarding-1.c:
> New test.
> > > >  * gcc.target/x86_64/abi/callabi/avoid-store-forwarding-2.c:
> New test.
> > > >
> > > > Signed-off-by: Philipp Tomsich 
> > > > Signed-off-by: Konstantinos Eleftheriou <
> konstantinos.elefther...@vrull.eu>
> > > >
> > > > Series-version: 8
> > > >
> > > > Series-changes: 8
> > > >   - Fix store_bit_field call for big-endian targets, where
> > > > BITS_BIG_ENDIAN is false.
> > > >   - Handle store_forwarding_max_distance = 0 as a special case
> that
> > > > disables cost checks for avoid-store-forwarding.
> > > >   - Update testcases for AArch64 and add testcases for x86-64.
> > > >
> > > > Series-changes: 7
> > > >   - Fix bug when copying back the load register, in the case
> that the
> > > > load is eliminated.
> > > >
> > > > Series-changes: 6
> > > >   - Reject the transformation on cases that would cause
> store_bit_field
> > > > to generate subreg expressions on different register classes.
> > > > Files avoid-store-forwarding-4.c and
> avoid-store-forwarding-5.c
> > > >contain such cases and have been marked as XFAIL.
> > > >   - Use optimize_bb_for_speed_p instead of
> 

[PATCH] asf: Enable pass at O2 or higher

2025-01-29 Thread Christoph Müllner
The avoid-store-forwarding pass is disabled by default and therefore
in the risk of bit-rotting.  This patch addresses this by enabling
the pass at O2 or higher.

The assembly patterns in `bitfield-bitint-abi-align16.c` and
`bitfield-bitint-abi-align8.c` have been updated to account for
the ASF transformations.

This was bootstrapped on x86-64 and AArch64 and showed no
regressions in the test suite (--enable-checking=yes,extra and
all languages).

gcc/ChangeLog:

* doc/invoke.texi: Document asf as an O2 enabled option.
* opts.cc: Enable asf at O2.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/bitfield-bitint-abi-align16.c:
Modify testcases to account for the asf transformations.
* gcc.target/aarch64/bitfield-bitint-abi-align8.c: Likewise.
* gcc.target/aarch64/avoid-store-forwarding-6.c: New test.

Co-developed-by: Konstantinos Eleftheriou 
Signed-off-by: Christoph Müllner 
---
 gcc/doc/invoke.texi   |  3 +-
 gcc/opts.cc   |  1 +
 .../aarch64/avoid-store-forwarding-6.c| 29 +++
 .../aarch64/bitfield-bitint-abi-align16.c | 28 ++
 .../aarch64/bitfield-bitint-abi-align8.c  | 28 ++
 5 files changed, 64 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e54a287..52d0489ab24 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12716,6 +12716,7 @@ also turns on the following optimization flags:
 @c Please keep the following list alphabetized!
 @gccoptlist{-falign-functions  -falign-jumps
 -falign-labels  -falign-loops
+-favoid-store-forwarding
 -fcaller-saves
 -fcode-hoisting
 -fcrossjumping
@@ -12876,7 +12877,7 @@ Many CPUs will stall for many cycles when a load 
partially depends on previous
 smaller stores.  This pass tries to detect such cases and avoid the penalty by
 changing the order of the load and store and then fixing up the loaded value.
 
-Disabled by default.
+Enabled by default at @option{-O2} and higher.
 
 @opindex ffp-contract
 @item -ffp-contract=@var{style}
diff --git a/gcc/opts.cc b/gcc/opts.cc
index 23900c7b1c0..9914d20ad47 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -627,6 +627,7 @@ static const struct default_options default_options_table[] 
=
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 },
 
 /* -O2 and -Os optimizations.  */
+{ OPT_LEVELS_2_PLUS, OPT_favoid_store_forwarding, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fcaller_saves, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fcode_hoisting, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fcrossjumping, NULL, 1 },
diff --git a/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c 
b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
new file mode 100644
index 000..320fa5e101e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Same as avoid-store-forwarding-1.c but without -favoid-store-forwarding.  */
+
+typedef union {
+char arr_8[8];
+long long_value;
+} DataUnion;
+
+long ssll_1 (DataUnion *data, char x)
+{
+  data->arr_8[0] = x;
+  return data->long_value;
+}
+
+long ssll_2 (DataUnion *data, char x)
+{
+  data->arr_8[1] = x;
+  return data->long_value;
+}
+
+long ssll_3 (DataUnion *data, char x)
+{
+  data->arr_8[7] = x;
+  return data->long_value;
+}
+
+/* { dg-final { scan-assembler-times {ldr\tx[0-9]+, 
\[x[0-9]+\]\n\tstrb\tw[0-9]+, \[x[0-9]+(, \d+)?\]\n\tbfi\tx[0-9]+, x[0-9]+, 
\d+, \d+} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c 
b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
index c29a230a771..b4501d81c45 100644
--- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
+++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
@@ -91,10 +91,11 @@
 ** mov (w[0-9]+), 0
 ** bfi \3, w\2, 0, 1
 ** and x3, x\2, 9223372036854775807
-** mov x2, 0
+** mov (x[0-9]+), 0
+** bfi \4, (x[0-9]+), 0, 8
 ** str xzr, \[sp\]
 ** strb\3, \[sp\]
-** ldr x1, \[sp\]
+** mov \5, 0
 ** add sp, sp, 16
 ** b   fp
 */
@@ -183,19 +184,21 @@
 ** sxtw(x[0-9]+), w1
 ** mov x0, \2
 ** and x7, \2, 9223372036854775807
+** mov (x[0-9]+), 0
 ** mov (w[0-9]+), 0
-** bfi \3, w\1, 0, 1
+** bfi \4, w\1, 0, 1
+** mov (x[0-9]+), \3
+** bfi \5, (x[0-9]+), 0, 8
+** stp x7, \5, \[sp\]
 ** strbwzr, \[sp, 16\]
 ** mov x6, x7
 ** mov x5, x7
 ** mov x4, x7
-** mov x3, x7
-** mov x2, x7
-** str xzr, \[sp, 48\]
-** strb\3, \[sp, 48\]
-** ldr (x[0-9]+), \[sp, 48\]
-** stp x7, \4, \[sp\]
-** mov x1, 

Re: [PATCH v2 04/12] AArch64: Diagnose OpenMP offloading when SVE types involved.

2025-01-29 Thread Richard Sandiford
Tejas Belagod  writes:
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index e7c703c987e..2c169ea3806 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -4956,12 +4956,35 @@ handle_arm_sve_vector_bits_attribute (tree *node, 
> tree, tree args, int,
>return NULL_TREE;
>  }
>  
> +
> +/* Return true if OpenMP context types.  */
> +
> +static bool
> +omp_type_context (type_context_kind context)
> +{
> +  switch (context)
> +{
> +case TCTX_OMP_MAP:
> +case TCTX_OMP_MAP_IMP_REF:
> +case TCTX_OMP_PRIVATE:
> +case TCTX_OMP_FIRSTPRIVATE:
> +case TCTX_OMP_DEVICE_ADDR:
> +  return true;
> +default:
> +  return false;;

Double semicolon.

> +}
> +}

I think it'd be good to make this an inline function in target.h,
since it isn't target-specific.

> +
>  /* Implement TARGET_VERIFY_TYPE_CONTEXT for SVE types.  */
>  bool
>  verify_type_context (location_t loc, type_context_kind context,
>const_tree type, bool silent_p)
>  {
> -  if (!sizeless_type_p (type))
> +  const_tree tmp = type;
> +  if (omp_type_context (context) && POINTER_TYPE_P (type))
> +tmp = strip_pointer_types (tmp);
> +
> +  if (!sizeless_type_p (tmp))
>  return true;
>  
>switch (context)
> @@ -5021,6 +5044,33 @@ verify_type_context (location_t loc, type_context_kind 
> context,
>if (!silent_p)
>   error_at (loc, "capture by copy of SVE type %qT", type);
>return false;
> +
> +case TCTX_OMP_MAP:
> +  if (!silent_p)
> + error_at (loc, "SVE type %qT not allowed in map clause", type);
> +  return false;
> +
> +case TCTX_OMP_MAP_IMP_REF:
> +  /* The diagnosis is done in the caller.  */

Not sure about this.  The documentation says:

  If defined, this hook returns false if there is a target-specific reason
  why type @var{type} cannot be used in the source language context described
  by @var{context}.  When @var{silent_p} is false, the hook also reports an
  error against @var{loc} for invalid uses of @var{type}.

So I think we should report an error even for this case, and not report
an error in the caller.  That unfortunately loses the decl information,
but the location would hopefully be enough in practice.  On that:

> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> index 3f602469d57..ace43cf78a0 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -8430,11 +8430,13 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, 
> tree decl, bool in_code)
> | GOVD_MAP_ALLOC_ONLY)) == flags)
>   {
> tree type = TREE_TYPE (decl);
> +   location_t dummy = UNKNOWN_LOCATION;
>  
> if (gimplify_omp_ctxp->target_firstprivatize_array_bases
> && omp_privatize_by_reference (decl))
>   type = TREE_TYPE (type);
> -   if (!omp_mappable_type (type))
> +   if (!omp_mappable_type (type)
> +   || !verify_type_context (dummy, TCTX_OMP_MAP_IMP_REF, type))

I think we should be passing DECL_SOURCE_LOCATION (decl) here.

The target-specific bits look good otherwise.

Thanks,
Richard



Re: [PATCH] asf: Enable pass at O2 or higher

2025-01-29 Thread Philipp Tomsich
+JiangNing Liu

On Wed, 29 Jan 2025 at 10:38, Richard Biener  wrote:
>
> On Wed, 29 Jan 2025, Christoph Müllner wrote:
>
> > The avoid-store-forwarding pass is disabled by default and therefore
> > in the risk of bit-rotting.  This patch addresses this by enabling
> > the pass at O2 or higher.
> >
> > The assembly patterns in `bitfield-bitint-abi-align16.c` and
> > `bitfield-bitint-abi-align8.c` have been updated to account for
> > the ASF transformations.
> >
> > This was bootstrapped on x86-64 and AArch64 and showed no
> > regressions in the test suite (--enable-checking=yes,extra and
> > all languages).
>
> OK for GCC 16 stage1.
>
> Richard.
>
> > gcc/ChangeLog:
> >
> >   * doc/invoke.texi: Document asf as an O2 enabled option.
> >   * opts.cc: Enable asf at O2.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/aarch64/bitfield-bitint-abi-align16.c:
> >   Modify testcases to account for the asf transformations.
> >   * gcc.target/aarch64/bitfield-bitint-abi-align8.c: Likewise.
> >   * gcc.target/aarch64/avoid-store-forwarding-6.c: New test.
> >
> > Co-developed-by: Konstantinos Eleftheriou 
> > 
> > Signed-off-by: Christoph Müllner 
> > ---
> >  gcc/doc/invoke.texi   |  3 +-
> >  gcc/opts.cc   |  1 +
> >  .../aarch64/avoid-store-forwarding-6.c| 29 +++
> >  .../aarch64/bitfield-bitint-abi-align16.c | 28 ++
> >  .../aarch64/bitfield-bitint-abi-align8.c  | 28 ++
> >  5 files changed, 64 insertions(+), 25 deletions(-)
> >  create mode 100644 
> > gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
> >
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index e54a287..52d0489ab24 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -12716,6 +12716,7 @@ also turns on the following optimization flags:
> >  @c Please keep the following list alphabetized!
> >  @gccoptlist{-falign-functions  -falign-jumps
> >  -falign-labels  -falign-loops
> > +-favoid-store-forwarding
> >  -fcaller-saves
> >  -fcode-hoisting
> >  -fcrossjumping
> > @@ -12876,7 +12877,7 @@ Many CPUs will stall for many cycles when a load 
> > partially depends on previous
> >  smaller stores.  This pass tries to detect such cases and avoid the 
> > penalty by
> >  changing the order of the load and store and then fixing up the loaded 
> > value.
> >
> > -Disabled by default.
> > +Enabled by default at @option{-O2} and higher.
> >
> >  @opindex ffp-contract
> >  @item -ffp-contract=@var{style}
> > diff --git a/gcc/opts.cc b/gcc/opts.cc
> > index 23900c7b1c0..9914d20ad47 100644
> > --- a/gcc/opts.cc
> > +++ b/gcc/opts.cc
> > @@ -627,6 +627,7 @@ static const struct default_options 
> > default_options_table[] =
> >  { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 },
> >
> >  /* -O2 and -Os optimizations.  */
> > +{ OPT_LEVELS_2_PLUS, OPT_favoid_store_forwarding, NULL, 1 },
> >  { OPT_LEVELS_2_PLUS, OPT_fcaller_saves, NULL, 1 },
> >  { OPT_LEVELS_2_PLUS, OPT_fcode_hoisting, NULL, 1 },
> >  { OPT_LEVELS_2_PLUS, OPT_fcrossjumping, NULL, 1 },
> > diff --git a/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c 
> > b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
> > new file mode 100644
> > index 000..320fa5e101e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c
> > @@ -0,0 +1,29 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +/* Same as avoid-store-forwarding-1.c but without 
> > -favoid-store-forwarding.  */
> > +
> > +typedef union {
> > +char arr_8[8];
> > +long long_value;
> > +} DataUnion;
> > +
> > +long ssll_1 (DataUnion *data, char x)
> > +{
> > +  data->arr_8[0] = x;
> > +  return data->long_value;
> > +}
> > +
> > +long ssll_2 (DataUnion *data, char x)
> > +{
> > +  data->arr_8[1] = x;
> > +  return data->long_value;
> > +}
> > +
> > +long ssll_3 (DataUnion *data, char x)
> > +{
> > +  data->arr_8[7] = x;
> > +  return data->long_value;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {ldr\tx[0-9]+, 
> > \[x[0-9]+\]\n\tstrb\tw[0-9]+, \[x[0-9]+(, \d+)?\]\n\tbfi\tx[0-9]+, x[0-9]+, 
> > \d+, \d+} 3 } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c 
> > b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> > index c29a230a771..b4501d81c45 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> > @@ -91,10 +91,11 @@
> >  **   mov (w[0-9]+), 0
> >  **   bfi \3, w\2, 0, 1
> >  **   and x3, x\2, 9223372036854775807
> > -**   mov x2, 0
> > +**   mov (x[0-9]+), 0
> > +**   bfi \4, (x[0-9]+), 0, 8
> >  **   str xzr, \[sp\]
> >  **   strb\3, \[sp\]
> > -**   ldr x1, \[sp\]
> > +**   mov \5, 0
> >  **   add sp, sp, 16
> >  **   b   fp

[PATCH] arm: Add multilib for _Float16 typeinfo for v8.1-m.main (PR 116447)

2025-01-29 Thread Christophe Lyon
Enabling 'fp' on v8.1-m.main (either via +fp, +fp.dp or +mve.dp)
enables support for fp16 types (and registers _Float16 in C++).

As the g++.dg/cpp23/ext-floating13.C testcase shows, this pulls
typeinfo for _Float16 at link time, but a toolchain built using
t-rmprofile currently relies on v8-m.main+fp libraries for
v8.1-m.main+fp, and v8-m does not support fp16, leading to undefined
reference.

To avoid this, the patch adds -march=armv8.1-m.main+fp and
-march=armv8.1-m.main+fp.dp multilibs in addition to the existing
march=armv8.1-m.main+fp/march=armv8.1-m.main+mve.

Instead of matching v8-main.fp for +fp +dsp+fp +mve.fp +fp+mve
versions of v8.1-m.main, they now match v8.1-m.main+fp (+fp is of
course removed from the list of matches, since it matches itself).
The same applies to +fp.dp +dsp+fp.dp +fp.dp+mve +fp.dp+mve.fp.

gcc/ChangeLog:

PR target/116447
* config/arm/t-rmprofile: Add -march=armv8.1-m.main+fp and
-march=armv8.1-m.main+fp.dp multilibs.
---
 gcc/config/arm/t-rmprofile | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/config/arm/t-rmprofile b/gcc/config/arm/t-rmprofile
index 9cddaaf85ef..2097846b0c8 100644
--- a/gcc/config/arm/t-rmprofile
+++ b/gcc/config/arm/t-rmprofile
@@ -27,8 +27,8 @@
 
 # Arch and FPU variants to build libraries with
 
-MULTI_ARCH_OPTS_RM = 
march=armv6s-m/march=armv7-m/march=armv7e-m/march=armv7e-m+fp/march=armv7e-m+fp.dp/march=armv8-m.base/march=armv8-m.main/march=armv8-m.main+fp/march=armv8-m.main+fp.dp/march=armv8.1-m.main+mve/march=armv8.1-m.main+pacbti/march=armv8.1-m.main+pacbti+fp/march=armv8.1-m.main+pacbti+fp.dp/march=armv8.1-m.main+pacbti+mve
-MULTI_ARCH_DIRS_RM = v6-m v7-m v7e-m v7e-m+fp v7e-m+dp v8-m.base v8-m.main 
v8-m.main+fp v8-m.main+dp v8.1-m.main+mve v8.1-m.main+pacbti 
v8.1-m.main+pacbti+fp v8.1-m.main+pacbti+dp v8.1-m.main+pacbti+mve
+MULTI_ARCH_OPTS_RM = 
march=armv6s-m/march=armv7-m/march=armv7e-m/march=armv7e-m+fp/march=armv7e-m+fp.dp/march=armv8-m.base/march=armv8-m.main/march=armv8-m.main+fp/march=armv8-m.main+fp.dp/march=armv8.1-m.main+fp/march=armv8.1-m.main+fp.dp/march=armv8.1-m.main+mve/march=armv8.1-m.main+pacbti/march=armv8.1-m.main+pacbti+fp/march=armv8.1-m.main+pacbti+fp.dp/march=armv8.1-m.main+pacbti+mve
+MULTI_ARCH_DIRS_RM = v6-m v7-m v7e-m v7e-m+fp v7e-m+dp v8-m.base v8-m.main 
v8-m.main+fp v8-m.main+dp v8.1-m.main+fp v8.1-m.main+dp v8.1-m.main+mve 
v8.1-m.main+pacbti v8.1-m.main+pacbti+fp v8.1-m.main+pacbti+dp 
v8.1-m.main+pacbti+mve
 
 MULTI_ARCH_OPTS_RM += mbranch-protection=standard
 MULTI_ARCH_DIRS_RM += bp
@@ -51,6 +51,8 @@ MULTILIB_REQUIRED += 
mthumb/march=armv8-m.main+fp/mfloat-abi=hard
 MULTILIB_REQUIRED  += mthumb/march=armv8-m.main+fp/mfloat-abi=softfp
 MULTILIB_REQUIRED  += mthumb/march=armv8-m.main+fp.dp/mfloat-abi=hard
 MULTILIB_REQUIRED  += mthumb/march=armv8-m.main+fp.dp/mfloat-abi=softfp
+MULTILIB_REQUIRED  += mthumb/march=armv8.1-m.main+fp/mfloat-abi=hard
+MULTILIB_REQUIRED  += mthumb/march=armv8.1-m.main+fp.dp/mfloat-abi=hard
 MULTILIB_REQUIRED  += mthumb/march=armv8.1-m.main+mve/mfloat-abi=hard
 
 MULTILIB_REQUIRED  += 
mthumb/march=armv8.1-m.main+pacbti/mbranch-protection=standard/mfloat-abi=soft
@@ -86,16 +88,16 @@ MULTILIB_MATCHES+= 
march?armv8-m.main=mlibarch?armv8.1-m.main+dsp
 MULTILIB_REUSE += 
mthumb/march.armv8-m\.main/mfloat-abi.soft=mthumb/march.armv8\.1-m\.main+mve/mfloat-abi.soft
 MULTILIB_REUSE += 
mthumb/march.armv8-m\.main/mfloat-abi.soft=mthumb/march.armv8\.1-m\.main+mve/mfloat-abi.softfp
 
-v8_1m_sp_variants = +fp +dsp+fp +mve.fp +fp+mve
-v8_1m_dp_variants = +fp.dp +dsp+fp.dp +fp.dp+mve +fp.dp+mve.fp
+v8_1m_sp_variants = +dsp+fp +mve.fp +fp+mve
+v8_1m_dp_variants = +dsp+fp.dp +fp.dp+mve +fp.dp+mve.fp
 
-# Map all v8.1-m.main FP sp variants down to v8-m.
+# Map all v8.1-m.main FP sp variants down to v8.1-m.main+fp.
 MULTILIB_MATCHES += $(foreach FP, $(v8_1m_sp_variants), \
-march?armv8-m.main+fp=mlibarch?armv8.1-m.main$(FP))
+
march?armv8.1-m.main+fp=mlibarch?armv8.1-m.main$(FP))
 
-# Map all v8.1-m.main FP dp variants down to v8-m.
+# Map all v8.1-m.main FP dp variants down to v8.1-m.main+fp.dp.
 MULTILIB_MATCHES += $(foreach FP, $(v8_1m_dp_variants), \
-
march?armv8-m.main+fp.dp=mlibarch?armv8.1-m.main$(FP))
+
march?armv8.1-m.main+fp.dp=mlibarch?armv8.1-m.main$(FP))
 
 # Map all mbranch-protection values other than 'none' to 'standard'.
 MULTILIB_MATCHES   += mbranch-protection?standard=mbranch-protection?bti
-- 
2.34.1



Re: [PATCH 1/2] split-path: CALL_EXPR can't show up in gimple_assign

2025-01-29 Thread Richard Biener
On Wed, Jan 29, 2025 at 8:56 AM Andrew Pinski  wrote:
>
> While working on split path, I noticed that poor_ifcvt_candidate_code
> would check for CALL_EXPR but that can't show up in gimple_assign
> so this removes that check.
>
> This could be a very very small compile time improvement.

OK

> Bootstrapped and tested on x86_64-linux-gnu.
>
> gcc/ChangeLog:
>
> * gimple-ssa-split-paths.cc (poor_ifcvt_candidate_code): Remove 
> CALL_EXPR handling.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/gimple-ssa-split-paths.cc | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/gcc/gimple-ssa-split-paths.cc b/gcc/gimple-ssa-split-paths.cc
> index 018e59f98cb..7c5bc1d842c 100644
> --- a/gcc/gimple-ssa-split-paths.cc
> +++ b/gcc/gimple-ssa-split-paths.cc
> @@ -138,8 +138,7 @@ poor_ifcvt_candidate_code (enum tree_code code)
>return (code == MIN_EXPR
>   || code == MAX_EXPR
>   || code == ABS_EXPR
> - || code == COND_EXPR
> - || code == CALL_EXPR);
> + || code == COND_EXPR);
>  }
>
>  /* Return TRUE if PRED of BB is an poor ifcvt candidate. */
> --
> 2.43.0
>


Re: [PATCH 2/2] split-path: Small fix for poor_ifcvt_pred (tsvc s258) [PR118505]

2025-01-29 Thread Richard Biener
On Wed, Jan 29, 2025 at 8:54 AM Andrew Pinski  wrote:
>
> After r15-3436-gb2b20b277988ab, poor_ifcvt_pred returns false for
> the case where the statement could trap but in this case trapping
> instructions cannot be made unconditional so it is a poor ifcvt.
>
> This fixes a small preformance regression with TSVC s258 at
> `-O3 -ftrapping-math` on aarch64 where ifcvt would not happen
> and we would still have a branch.
>
> On a specific aarch64, we go from 0.145s down to 0.118s.
>
> Bootstrapped and tested on x86_64-linux-gnu.


OK.

Thanks,
Richard.

> gcc/ChangeLog:
>
> PR tree-optimization/118505
> * gimple-ssa-split-paths.cc (poor_ifcvt_pred): Return
> true for trapping statements.
>
> Signed-off-by: Andrew Pinski 
> ---
>  gcc/gimple-ssa-split-paths.cc | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/gcc/gimple-ssa-split-paths.cc b/gcc/gimple-ssa-split-paths.cc
> index 7c5bc1d842c..9db73fdcc6d 100644
> --- a/gcc/gimple-ssa-split-paths.cc
> +++ b/gcc/gimple-ssa-split-paths.cc
> @@ -160,6 +160,11 @@ poor_ifcvt_pred (basic_block pred, basic_block bb)
>gimple *stmt = last_and_only_stmt (pred);
>if (!stmt || gimple_code (stmt) != GIMPLE_ASSIGN)
>  return true;
> +
> +  /* If the statement could trap, then this is a poor ifcvt candidate. */
> +  if (gimple_could_trap_p (stmt))
> +return true;
> +
>tree_code code = gimple_assign_rhs_code (stmt);
>if (poor_ifcvt_candidate_code (code))
>  return true;
> --
> 2.43.0
>


[PATCH 1/2] split-path: CALL_EXPR can't show up in gimple_assign

2025-01-29 Thread Andrew Pinski
While working on split path, I noticed that poor_ifcvt_candidate_code
would check for CALL_EXPR but that can't show up in gimple_assign
so this removes that check.

This could be a very very small compile time improvement.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* gimple-ssa-split-paths.cc (poor_ifcvt_candidate_code): Remove 
CALL_EXPR handling.

Signed-off-by: Andrew Pinski 
---
 gcc/gimple-ssa-split-paths.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/gimple-ssa-split-paths.cc b/gcc/gimple-ssa-split-paths.cc
index 018e59f98cb..7c5bc1d842c 100644
--- a/gcc/gimple-ssa-split-paths.cc
+++ b/gcc/gimple-ssa-split-paths.cc
@@ -138,8 +138,7 @@ poor_ifcvt_candidate_code (enum tree_code code)
   return (code == MIN_EXPR
  || code == MAX_EXPR
  || code == ABS_EXPR
- || code == COND_EXPR
- || code == CALL_EXPR);
+ || code == COND_EXPR);
 }
 
 /* Return TRUE if PRED of BB is an poor ifcvt candidate. */
-- 
2.43.0



[committed] [PR testsuite/116860] Testsuite adjustment for recently added tests

2025-01-29 Thread Jeff Law
There's two new tests that are dependent on logical-op-non-short-circuit 
settings.  The BZ is reported against ppc64 and ppc64le, but also 
applies to a goodly number of the other targets.


The "regression" fix is trivial, just add the appropriate param to force 
the behavior we're expecting.  I'm committing that fix momentarily. 
It's been verified on ppc64, ppc64le and x86_64 as well as the various 
embedded targets in my tester where many FAILS flip to PASS.


I'm leaving the bug open without the regression marker as Jakub has 
noted a couple of improvements that we can and probably should make.


Jeffcommit 15dba7dfba8b7800ac7b74213171e4df9bc32bb9
Author: Jeff Law 
Date:   Wed Jan 29 19:42:11 2025 -0700

[PR testsuite/116860] Testsuite adjustment for recently added tests

There's two new tests that are dependent on logical-op-non-short-circuit
settings.  The BZ is reported against ppc64 and ppc64le, but also applies 
to a
goodly number of the other targets.

The "regression" fix is trivial, just add the appropriate param to force the
behavior we're expecting.  I'm committing that fix momentarily.  It's been
verified on ppc64, ppc64le and x86_64 as well as the various embedded 
targets
in my tester where many FAILS flip to PASS.

I'm leaving the bug open without the regression marker as Jakub has noted a
couple of improvements that we can and probably should make.

PR target/116860
gcc/testsuite
* gcc.dg/tree-ssa/fold-xor-and-or.c: Set 
logical-op-non-short-circuit.
* gcc.dg/tree-ssa/fold-xor-or.c: Similarly.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
index e5dc98e7541..99e83d8e5aa 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-options "-O3 -fdump-tree-optimized --param 
logical-op-non-short-circuit=1" } */
 
 typedef unsigned long int uint64_t;
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c 
b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
index c55cfbcc84c..51b7373af0d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-options "-O3 -fdump-tree-optimized --param 
logical-op-non-short-circuit=1" } */
 
 typedef unsigned long int uint64_t;
 


Re: [PATCH v2] wwwdocs: add a Python postprocessing script

2025-01-29 Thread David Malcolm
On Thu, 2025-01-30 at 00:31 +0800, Gerald Pfeifer wrote:
> On Fri, 24 Jan 2025, David Malcolm wrote:
> > Changed in v2: rather than replacing "mhc", this version runs the
> > output from mhc through the Python script.
> 
> Unless I'm missing something that makes sense, yes.
> 
> > With this approach we could gradually move parts of the mhc
> > functionality into the python script, at whatever pace is
> > comfortable.
> 
> That sounds like a plan. It would be really great if you could find
> some 
> time on merging your original script into this. MetaHTML really has
> become 
> quite some technical debt.
> 
> > Gerald: can you test this with mhc?  Alternatively, can I go ahead
> > and try pushing this?
> 
> Yes, and sorry for the delay. Though ... 
> 
> > +    if ! python3 $SOURCETREE/bin/process_html.py
> > $TMPDIR/output.raw $TMPDIR/output.after-py; then
> > +    echo "bin/process_html.py failed; aborting."
> > +    exit 1
> > +    fi
> 
> ... great you didn't push it yet:
> 
> python3: can't open file '/www/gcc/htdocs-
> preformatted/bin/process_html.py': [Errno 2] No such file or
> directory
> bin/process_html.py failed; aborting.
> 
> I tried replacing this with just "process_html.py" or
> "./process_html.py", 
> alas neither did the job.

I used SOURCETREE; do we need a different variable?

> 
> So for now I used "/www/gcc/bin/process_html.py" (on gcc.gnu.org),
> which 
> is a bit heavy handed, but works.

OK

> 
> 
> I then preprocessed the whole htdocs tree into a new target directory
> and 
> manually inspected the diff which essentially features two kinds of
> change:
> 
>   < The host system
>   > The host system
> 
> which is what you are adding, and
> 
>   < Last modified 2022-11-14.
>   > Last modified 2025-01-29.
> 
> in the footers of pages.
> 
> 
> I'm fine if you commit just hardcoding the name of the script and
> will 
> then take it from there.
> 
> If you prefer me to commit your patch with necessary updates and then
> double test on the live system, I'll also be happy to do so.
> 
> Simply let me know!

If you've got it working, then please go ahead and do what's needed to
safely push this and get the changes live.  Once the python script is
in place, I can take spend some cycles looking at reimplementing parts
of the mhc functionality in python.

> 
> 
> 
> Just one question: Are you very attached to the current name of the
> Python 
> script or can we call it preprocess-html for consistency with the
> main 
> script?

It runs after mhc, so it could arguably be *post*processing.

That said I really don't care :)

> 
> (The comment at the top already reads "script to preprocess .html
> files 
> below htdocs". :-)
> 
> Cheers,
> Gerald

Thanks
Dave



Re: [PATCH] c++: friend vs inherited guide confusion [PR117855]

2025-01-29 Thread Bernhard Reutner-Fischer
On 29 January 2025 20:43:12 CET, Bernhard Reutner-Fischer 
 wrote:
>
>>> deduction_guide_p doesn't look like a significant expense to me; I wouldn't
>>> bother trying to optimize it without profile data as evidence.
>>
>>Yep, can confirm that it's insignificant.  And DECL_FRIEND_CONTEXT isn't
>>used in any particularly hot code so the implied memory barrier of the
>>deduction_guide_p call doesn't matter either (though I guess it'd be good
>>to mark it and other such predicates 'pure' anyway).
>
>hear, hear &

Why does predict fail on implying that on its own?



[PATCH] c++: remove LAMBDA_EXPR_CAPTURES_THIS_P

2025-01-29 Thread Patrick Palka
Built on x86_64-pc-linux-gnu, does this look OK for trunk?

-- >8 --

This unused accessor is just a simple alias of LAMBDA_EXPR_THIS_CAPTURE
and contrary to the documentation doesn't actually use TREE_LANG_FLAG_0.
Might as well remove it.

gcc/cp/ChangeLog:

* cp-tree.h (LAMBDA_EXPR_CAPTURES_THIS_P): Remove.
---
 gcc/cp/cp-tree.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 21011d0b003..ec976928f5f 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -429,7 +429,6 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
   LAMBDA_CAPTURE_EXPLICIT_P (in a TREE_LIST in LAMBDA_EXPR_CAPTURE_LIST)
   PARENTHESIZED_LIST_P (in the TREE_LIST for a parameter-declaration-list)
   CONSTRUCTOR_IS_DIRECT_INIT (in CONSTRUCTOR)
-  LAMBDA_EXPR_CAPTURES_THIS_P (in LAMBDA_EXPR)
   DECLTYPE_FOR_LAMBDA_CAPTURE (in DECLTYPE_TYPE)
   VEC_INIT_EXPR_IS_CONSTEXPR (in VEC_INIT_EXPR)
   DECL_OVERRIDE_P (in FUNCTION_DECL)
@@ -1543,10 +1542,6 @@ enum cp_lambda_default_capture_mode_type {
 #define LAMBDA_EXPR_THIS_CAPTURE(NODE) \
   (((struct tree_lambda_expr *)LAMBDA_EXPR_CHECK (NODE))->this_capture)
 
-/* Predicate tracking whether `this' is in the effective capture set.  */
-#define LAMBDA_EXPR_CAPTURES_THIS_P(NODE) \
-  LAMBDA_EXPR_THIS_CAPTURE(NODE)
-
 /* True iff uses of a const variable capture were optimized away.  */
 #define LAMBDA_EXPR_CAPTURE_OPTIMIZED(NODE) \
   TREE_LANG_FLAG_2 (LAMBDA_EXPR_CHECK (NODE))
-- 
2.48.1.131.gda898a5c64



[PATCH v6] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Siddhesh Poyarekar
Denormal behaviour is well defined for IEEE128 long doubles, so
XFAIL some gfortran tests only for targets with the IBM128 long double
ABI.

gcc/testsuite/ChangeLog:

PR testsuite/118127
* lib/target-supports.exp
(check_effective_target_long_double_is_ibm128): New
procedure.
* gfortran.dg/default_format_2.f90: xfail for
long_double_is_ibm128.
* gfortran.dg/default_format_denormal_2.f90: Likewise.
* gfortran.dg/large_real_kind_form_io_2.f90: Likewise.

Signed-off-by: Siddhesh Poyarekar 
---
Change from v5:
- bring back v4 and change proc name and comment.

 gcc/testsuite/gfortran.dg/default_format_2.f90 |  2 +-
 .../gfortran.dg/default_format_denormal_2.f90  |  2 +-
 .../gfortran.dg/large_real_kind_form_io_2.f90  |  2 +-
 gcc/testsuite/lib/target-supports.exp  | 14 ++
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/default_format_2.f90 
b/gcc/testsuite/gfortran.dg/default_format_2.f90
index 5ad7b3a6429..dd04d3aae98 100644
--- a/gcc/testsuite/gfortran.dg/default_format_2.f90
+++ b/gcc/testsuite/gfortran.dg/default_format_2.f90
@@ -1,4 +1,4 @@
-! { dg-do run { xfail powerpc*-apple-darwin* powerpc*-*-linux* } }
+! { dg-do run { xfail long_double_is_ibm128 } }
 ! { dg-require-effective-target fortran_large_real }
 ! Test XFAILed on these platforms because the system's printf() lacks
 ! proper support for denormalized long doubles. See PR24685
diff --git a/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90 
b/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90
index e9ccf5e8f61..ae056d506a2 100644
--- a/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90
+++ b/gcc/testsuite/gfortran.dg/default_format_denormal_2.f90
@@ -1,4 +1,4 @@
-! { dg-do run { xfail powerpc*-*-* } }
+! { dg-do run { xfail long_double_is_ibm128 } }
 ! { dg-require-effective-target fortran_large_real }
 ! Test XFAILed on this platform because the system's printf() lacks
 ! proper support for denormalized long doubles. See PR24685
diff --git a/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90 
b/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90
index 34b8aec462c..7b5ca645b62 100644
--- a/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90
+++ b/gcc/testsuite/gfortran.dg/large_real_kind_form_io_2.f90
@@ -1,4 +1,4 @@
-! { dg-do run { xfail powerpc*-apple-darwin* powerpc*-*-linux* } }
+! { dg-do run { xfail long_double_is_ibm128 } }
 ! Test XFAILed on these platforms because the system's printf() lacks
 ! proper support for denormalized long doubles. See PR24685
 ! { dg-require-effective-target fortran_large_real }
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 9ad1e1965bb..60e24129bd5 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1854,6 +1854,20 @@ proc check_effective_target_fortran_integer_16 { } {
 }]
 }
 
+# Check if long double on the target defaults to the IBM extended format.
+
+proc check_effective_target_long_double_is_ibm128 { } {
+if { ![istarget powerpc*-*-*] } {
+  return 0
+}
+
+return [check_no_compiler_messages long_double_is_ibm128 assembly {
+  #ifndef __LONG_DOUBLE_IBM128__
+  #error "__LONG_DOUBLE_IBM128__ not defined"
+  #endif
+}]
+}
+
 # Return 1 if we can statically link libgfortran, 0 otherwise.
 #
 # When the target name changes, replace the cached result.
-- 
2.47.1



Re: [PATCH v6] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Jakub Jelinek
On Wed, Jan 29, 2025 at 08:19:38AM -0500, Siddhesh Poyarekar wrote:
> Denormal behaviour is well defined for IEEE128 long doubles, so
> XFAIL some gfortran tests only for targets with the IBM128 long double
> ABI.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/118127
>   * lib/target-supports.exp
>   (check_effective_target_long_double_is_ibm128): New
>   procedure.
>   * gfortran.dg/default_format_2.f90: xfail for
>   long_double_is_ibm128.
>   * gfortran.dg/default_format_denormal_2.f90: Likewise.
>   * gfortran.dg/large_real_kind_form_io_2.f90: Likewise.
> 
> Signed-off-by: Siddhesh Poyarekar 

Ok, thanks.

Jakub



Re: [PATCH] lra: initialize allocated_hard_reg_p[] for hard regs referenced in RTL [PR118533]

2025-01-29 Thread Michael Matz
Hello,

On Tue, 28 Jan 2025, Surya Kumari Jangala wrote:

> The following patch has been bootstrapped and regtested on 
> powerpc64le-linux, aarch64-linux and x86_64-linux. This patch is a 
> proposed fix for PR118533. Request you to please review the patch.

A full insn-scan should not be needed for that.  df_regs_ever_live(x) 
should be true for a hardreg 'x' randomly being already used in the 
function before IRA (after df is initialized which it should be in 
ira_color).  (df_hard_reg_used_p(x) might also be acceptable here, 
depending on what's precisely needed)

So, just test for that in addition to allocated_hardreg_p[] within 
calculate_saved_nregs?


Ciao,
Michael.


Re: [PATCH] RX: Restrict displacement ranges in "Q" constraint

2025-01-29 Thread Jeff Law




On 1/29/25 3:47 AM, Yoshinori Sato wrote:

When using the "Q" constraint in the inline assembler, the displacement value
could exceed the range specified by the instruction.
To avoid this issue, a displacement range check is added to the "Q" constraint.

Thanks. I've pushed this to the trunk, even though it's not a regression 
as it's limited to the rx port and fixes a clear bug.


In the future, if you could include a testcase it'd be useful.

Thanks again,
Jeff



Re: [PATCH v2] get source line for diagnostic from preprocessed file [PR preprocessor/79106]

2025-01-29 Thread David Malcolm
On Wed, 2025-01-29 at 09:16 -0500, David Malcolm wrote:
> On Wed, 2025-01-29 at 13:05 +, Bader, Lucas wrote:
> > Hi,
> > 
> > as discussed, I rebased and tested my patch against current master.
> > Additionally, it now has the Signed-off-by tag.
> > Looking forward to your comments.
> > 
> > Best
> > Lucas
> 
> Thanks for the updated patch.
> 
> Various notes inline throughout below...
> 
> > 
> > 
> > 
> > Within a compile cluster, only the preprocessed output of GCC is
> > transferred
> > to remote nodes for compilation. When GCC produces advanced
> > diagnostics
> > (with -fdiagnostics-show-caret), e.g. prints out the affected
> > source
> > line and fixit hints, it attempts to read the source file again,
> > even
> > when
> > compiling a preprocessed file (-fpreprocessed). This leads to wrong
> > diagnostics when building with a compile cluster, or, more
> > generally,
> > when changing or deleting the original source file.
> > 
> > This patch alters GCC to read from the preprocessed file instead by
> > calculating the corresponding source line. This behavior is
> > consistent
> > with clang.
> > 
> > gcc/c-family/ChangeLog:
> > 
> > * c-opts.cc (c_common_handle_option): pass -fpreprocessed
> > option value to global diagnostic configuration
> > 
> > gcc/ChangeLog:
> > 
> > * diagnostic-show-locus.cc
> > (layout::calculate_x_offset_display): read line from source or
> > preprocessed
> > file based on -fpreprocessed value
> > (source_line::source_line): read line from source or
> > preprocessed
> > file based on -fpreprocessed value
> > (layout_printer::print_line): read line from source or
> > preprocessed
> > file based on -fpreprocessed value
> > 
> > * diagnostic.cc (diagnostic_context::initialize):
> > initialize
> > new members
> > * diagnostic.h: new members for reading
> > source lines from preprocessed files
> > 
> > * input.cc (file_cache::get_source_line_preprocessed): new
> > function
> > to read source lines from preprocessed files
> > (test_reading_source_line_preprocessed): new test case
> > (input_cc_tests): execute new test case
> > * input.h (class file_cache): add new member function
> > 
> > * opts-global.cc (read_cmdline_options): pass input
> > filename
> > to global
> > diagnostic context
> 
> 
> Some minor ChangeLog nits:
> * you should add "PR preprocessor/79106" (without quotes) to the
> start
> of each ChangeLog entry; have a look at the existing ChangeLog
> entries
> to see how we do it
> * the entries should read as sentences, so please capitalize the
> first
> letter, and add periods
> * please wrap at 74 columns.
> 
> You can check the patch using
>   ./contrib/gcc-changelog/git_check_commit.py
> (which is run as a server-side pre-commit hook, I believe)
> 
> 
> > 
> > Signed-off-by: Lucas Bader 
> > ---
> >  gcc/c-family/c-opts.cc   |   1 +
> >  gcc/diagnostic-show-locus.cc |  25 --
> >  gcc/diagnostic.cc    |   2 +
> >  gcc/diagnostic.h |   6 ++
> >  gcc/input.cc | 169
> > +++
> >  gcc/input.h  |   2 +
> >  gcc/opts-global.cc   |   4 +
> >  7 files changed, 204 insertions(+), 5 deletions(-)
> > 
> > diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
> > index 87b231861a6..105d8fa6eff 100644
> > --- a/gcc/c-family/c-opts.cc
> > +++ b/gcc/c-family/c-opts.cc
> > @@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const
> > char
> > *arg, HOST_WIDE_INT value,
> >  
> >  case OPT_fpreprocessed:
> >    cpp_opts->preprocessed = value;
> > +  global_dc->m_is_preprocessed = value;
> >    break;
> >  
> >  case OPT_fdebug_cpp:
> > diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-
> > locus.cc
> > index 898efe74acf..7ac183a126f 100644
> > --- a/gcc/diagnostic-show-locus.cc
> > +++ b/gcc/diagnostic-show-locus.cc
> > @@ -1786,8 +1786,13 @@ layout::calculate_x_offset_display ()
> >    return;
> >  }
> >  
> > -  const char_span line = m_file_cache.get_source_line
> > (m_exploc.file,
> > -     
> > m_exploc.line);
> > +  char_span line (NULL, 0);
> > +  if (global_dc->m_main_input_file_path != NULL && global_dc-
> > > m_is_preprocessed)
> > +    line = m_file_cache.get_source_line_preprocessed (
> > +     m_exploc.file, global_dc->m_main_input_file_path,
> > m_exploc.line);
> > +  else
> > +    line = m_file_cache.get_source_line (m_exploc.file,
> > m_exploc.line);
> > +
> 
> Here and in two other places below in this file the patch adds logic
> of the form:
> 
>   if (global_dc->main_input_file_path != NULL && global_dc-
> >is_preprocessed)
>     line = location_get_source_line_preprocessed (some file,
>   global_dc-
> >main_input_file_path,
>     some line);
>   else
>     line = location_get_source_line (some file,

[PATCH] pair-fusion: A couple of fixes for sp updates [PR118429]

2025-01-29 Thread Richard Sandiford
The PR showed two issues with pair-fusion.  The first is that the pass
treated stack pointer deallocations as ordinary register updates, and so
might move them earlier than another stack access (through a different
base register) that doesn't alias the pair candidate.

The simplest fix for that seems to be to prevent the stack deallocation
from being moved.  This part might (or might not) be a latent source of
wrong code and so worth backporting in some form.  (The patch as-is
won't work for GCC 14.)

The second issue only started with r15-6551, which added a memory
write to stack allocations and deallocations.  We should use the
existing tombstone mechanism to preserve the associated memory
definition.  (Deleting definitions immediately would have quadratic
complexity in the worst case.)

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/
PR rtl-optimization/118429
* pair-fusion.cc (latest_hazard_before): Add an extra parameter
to say whether the instruction is a load or a store.  If the
instruction is not a load or store and has memory side effects,
prevent it from being moved earlier.
(pair_fusion::find_trailing_add): Update call accordingly.
(pair_fusion_bb_info::fuse_pair): If the trailng addition had
a memory side-effect, use a tombstone to preserve it.

gcc/testsuite/
PR rtl-optimization/118429
* gcc.c-torture/compile/pr118429.c: New test.
---
 gcc/pair-fusion.cc| 45 ++-
 .../gcc.c-torture/compile/pr118429.c  |  7 +++
 2 files changed, 40 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr118429.c

diff --git a/gcc/pair-fusion.cc b/gcc/pair-fusion.cc
index b6643ca4812..602e572ab6c 100644
--- a/gcc/pair-fusion.cc
+++ b/gcc/pair-fusion.cc
@@ -573,11 +573,13 @@ pair_fusion_bb_info::track_access (insn_info *insn, bool 
load_p, rtx mem)
 // If IGNORE_INSN is non-NULL, we should further ignore any hazards arising
 // from that insn.
 //
-// N.B. we ignore any defs/uses of memory here as we deal with that separately,
-// making use of alias disambiguation.
+// IS_LOAD_STORE is true if INSN is one of the loads or stores in the
+// candidate pair.  We ignore any defs/uses of memory in such instructions
+// as we deal with that separately, making use of alias disambiguation.
 static insn_info *
 latest_hazard_before (insn_info *insn, rtx *ignore,
- insn_info *ignore_insn = nullptr)
+ insn_info *ignore_insn = nullptr,
+ bool is_load_store = true)
 {
   insn_info *result = nullptr;
 
@@ -588,6 +590,10 @@ latest_hazard_before (insn_info *insn, rtx *ignore,
   && find_reg_note (insn->rtl (), REG_EH_REGION, NULL_RTX))
 return insn->prev_nondebug_insn ();
 
+  if (!is_load_store
+  && accesses_include_memory (insn->defs ()))
+return insn->prev_nondebug_insn ();
+
   // Return true if we registered the hazard.
   auto hazard = [&](insn_info *h) -> bool
 {
@@ -1238,7 +1244,7 @@ pair_fusion::find_trailing_add (insn_info *insns[2],
 insns[0]->uid (), insns[1]->uid ());
 };
 
-  insn_info *hazard = latest_hazard_before (cand, nullptr, insns[1]);
+  insn_info *hazard = latest_hazard_before (cand, nullptr, insns[1], false);
   if (!hazard || *hazard <= *pair_dst)
 {
   if (dump_file)
@@ -1633,7 +1639,7 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
   insn_info *insns[2] = { first, second };
 
   auto_vec changes;
-  auto_vec tombstone_uids (2);
+  auto_vec tombstone_uids;
 
   rtx pats[2] = {
 PATTERN (first->rtl ()),
@@ -1824,6 +1830,16 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
 validate_change (rti, ®_NOTES (rti), reg_notes, true);
   };
 
+  // Turn CHANGE into a memory definition tombstone.
+  auto make_tombstone = [&](insn_change *change)
+{
+  tombstone_uids.quick_push (change->insn ()->uid ());
+  rtx_insn *rti = change->insn ()->rtl ();
+  validate_change (rti, &PATTERN (rti), gen_tombstone (), true);
+  validate_change (rti, ®_NOTES (rti), NULL_RTX, true);
+  change->new_uses = use_array ();
+};
+
   if (load_p)
 {
   changes.safe_push (make_delete (first));
@@ -1879,11 +1895,7 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
}
case Action::TOMBSTONE:
{
- tombstone_uids.quick_push (change->insn ()->uid ());
- rtx_insn *rti = change->insn ()->rtl ();
- validate_change (rti, &PATTERN (rti), gen_tombstone (), true);
- validate_change (rti, ®_NOTES (rti), NULL_RTX, true);
- change->new_uses = use_array (nullptr, 0);
+ make_tombstone (change);
  break;
}
case Action::INSERT:
@@ -1934,7 +1946,17 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
 }
 
   if (trailing_add)
-changes.safe_push (make_delete (trailing_add));
+{
+  if (aut

Re: [PATCH] c++: friend vs inherited guide confusion [PR117855]

2025-01-29 Thread Bernhard Reutner-Fischer


>> deduction_guide_p doesn't look like a significant expense to me; I wouldn't
>> bother trying to optimize it without profile data as evidence.
>
>Yep, can confirm that it's insignificant.  And DECL_FRIEND_CONTEXT isn't
>used in any particularly hot code so the implied memory barrier of the
>deduction_guide_p call doesn't matter either (though I guess it'd be good
>to mark it and other such predicates 'pure' anyway).

hear, hear &


Re: [PATCH v5] testsuite/118127: Pass fortran tests on ppc64le for IEEE128 long doubles

2025-01-29 Thread Jakub Jelinek
> Denormal behaviour is well defined for IEEE128 long doubles, so don't
> XFAIL some gfortran tests on ppc64le when configured with the IEEE128
> long double ABI.

If long double is just IEEE754 double, then I think denormals
are equally well behaved, it is solely the case of IBM double double.
Of course, these 3 tests won't run anyway in that case because
fortran_large_real effective target will be false.
So I think you want an effective target which is solely about IBM double
double being the default long double.  No need to have ppc in the
name of the effective target.
Now, long_double_ibm128 effective target is not what you want to use
because that isn't what the long double is, but what long double can be if
one dg-add-options long_double_ibm128.
So, perhaps long_double_is_ibm128 effective target with
#ifndef __LONG_DOUBLE_IBM128__
#error ...
#endif
in the test?
Or if you call it long_double_is_composite (for MODE_COMPOSITE_P on the
long double's mode), or long_double_has_variable_precision or something like
that.

> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/118127
>   * lib/target-supports.exp
>   (check_effective_target_ppc_not_well_defined_denormals): New
>   procedure.
>   * gfortran.dg/default_format_2.f90: xfail for
>   ppc_not_well_defined_denormals.
>   * gfortran.dg/default_format_denormal_2.f90: Likewise.
>   * gfortran.dg/large_real_kind_form_io_2.f90: Likewise.
> 
> Signed-off-by: Siddhesh Poyarekar 

Jakub



[pushed] c++: add fixed test [PR57533]

2025-01-29 Thread Marek Polacek
Tested x86_64-pc-linux-gnu, applying to trunk.

-- >8 --
Fixed by r11-2412.

PR c++/57533

gcc/testsuite/ChangeLog:

* g++.dg/eh/throw5.C: New test.
---
 gcc/testsuite/g++.dg/eh/throw5.C | 23 +++
 1 file changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/eh/throw5.C

diff --git a/gcc/testsuite/g++.dg/eh/throw5.C b/gcc/testsuite/g++.dg/eh/throw5.C
new file mode 100644
index 000..554e8700df0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/eh/throw5.C
@@ -0,0 +1,23 @@
+// PR c++/57533
+// { dg-do run { target c++11 } }
+
+struct X
+{
+bool moved = false;
+
+X() = default;
+X(const X&) = default;
+X(X&& x) { x.moved = true; }
+};
+
+int main()
+{
+X x;
+try {
+throw x;
+}
+catch(...) {
+}
+if (x.moved)
+__builtin_abort();
+}

base-commit: d4d4e874dee2d5b0abe5ceb9f2a78e5602e86030
-- 
2.48.1



C++ patch ping

2025-01-29 Thread Jakub Jelinek
I'd like to ping 4 C++ patches:

https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662379.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662380.html
https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662381.html
  P2552R3 - On the ignorability of standard attributes - series [PR110345]
  Note, all the FE changes are already in, the above are solely missing
  tests which would allow us to mark the paper as resolved

https://gcc.gnu.org/pipermail/gcc-patches/2024-December/671089.html
  P2 - c++: check_flexarray fixes [PR117516]

Thanks

Jakub



Re: [PATCH] pair-fusion: Check for invalid use arrays [PR118320]

2025-01-29 Thread Jakub Jelinek
On Wed, Jan 29, 2025 at 06:46:30PM +, Richard Sandiford wrote:
> As Andrew says in the bugzilla comments, this PR is about a case where
> we tried to fuse two stores of x0, one in which x0 was defined and one
> in which it was undefined.  merge_access_arrays failed on the conflict,
> but the failure wasn't caught.
> 
> Normally the hazard detection code would fail if the instructions
> had incompatible uses.  However, an undefined use doesn't impose
> many restrictions on movements.  I think this is likely to be the
> only case where hazard detection isn't enough.
> 
> As Andrew notes in bugzilla, it might be possible to allow uses
> of defined and undefined values to be merged to the defined value.
> But that sounds dangerous in the general case, as an rtl-ssa-level
> decision.  We might run the risk of turning conditional UB into
> unconditional UB.  And LLVM proves that the definition of "undef"
> isn't simple.
> 
> Tested on aarch64-linux-gnu.  OK to install?
> 
> Richard
> 
> 
> gcc/
>   PR rtl-optimization/118320
>   * pair-fusion.cc (pair_fusion_bb_info::fuse_pair): Commonize
>   the merge of input_uses and return early if it fails.
> 
> gcc/testsuite/
>   PR rtl-optimization/118320
>   * g++.dg/torture/pr118320.C: New test.

Ok.

Jakub



Re: [PATCH v2] wwwdocs: add a Python postprocessing script

2025-01-29 Thread Gerald Pfeifer
On Fri, 24 Jan 2025, David Malcolm wrote:
> Changed in v2: rather than replacing "mhc", this version runs the
> output from mhc through the Python script.

Unless I'm missing something that makes sense, yes.

> With this approach we could gradually move parts of the mhc
> functionality into the python script, at whatever pace is comfortable.

That sounds like a plan. It would be really great if you could find some 
time on merging your original script into this. MetaHTML really has become 
quite some technical debt.

> Gerald: can you test this with mhc?  Alternatively, can I go ahead
> and try pushing this?

Yes, and sorry for the delay. Though ... 

> +if ! python3 $SOURCETREE/bin/process_html.py $TMPDIR/output.raw 
> $TMPDIR/output.after-py; then
> +echo "bin/process_html.py failed; aborting."
> +exit 1
> +fi

... great you didn't push it yet:

python3: can't open file '/www/gcc/htdocs-preformatted/bin/process_html.py': 
[Errno 2] No such file or directory
bin/process_html.py failed; aborting.

I tried replacing this with just "process_html.py" or "./process_html.py", 
alas neither did the job.

So for now I used "/www/gcc/bin/process_html.py" (on gcc.gnu.org), which 
is a bit heavy handed, but works.


I then preprocessed the whole htdocs tree into a new target directory and 
manually inspected the diff which essentially features two kinds of change:

  < The host system
  > The host system

which is what you are adding, and

  < Last modified 2022-11-14.
  > Last modified 2025-01-29.

in the footers of pages.


I'm fine if you commit just hardcoding the name of the script and will 
then take it from there.

If you prefer me to commit your patch with necessary updates and then 
double test on the live system, I'll also be happy to do so.

Simply let me know!



Just one question: Are you very attached to the current name of the Python 
script or can we call it preprocess-html for consistency with the main 
script?

(The comment at the top already reads "script to preprocess .html files 
below htdocs". :-)

Cheers,
Gerald


Re: [PATCH] asf: Enable pass at O2 or higher

2025-01-29 Thread Richard Sandiford
Christoph Müllner  writes:
> The avoid-store-forwarding pass is disabled by default and therefore
> in the risk of bit-rotting.  This patch addresses this by enabling
> the pass at O2 or higher.
>
> The assembly patterns in `bitfield-bitint-abi-align16.c` and
> `bitfield-bitint-abi-align8.c` have been updated to account for
> the ASF transformations.
>
> This was bootstrapped on x86-64 and AArch64 and showed no
> regressions in the test suite (--enable-checking=yes,extra and
> all languages).
>
> gcc/ChangeLog:
>
>   * doc/invoke.texi: Document asf as an O2 enabled option.
>   * opts.cc: Enable asf at O2.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/bitfield-bitint-abi-align16.c:
>   Modify testcases to account for the asf transformations.
>   * gcc.target/aarch64/bitfield-bitint-abi-align8.c: Likewise.
>   * gcc.target/aarch64/avoid-store-forwarding-6.c: New test.

Thanks for doing this.  Just a question about the testsuite updates:

> diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c 
> b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> index c29a230a771..b4501d81c45 100644
> --- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c
> @@ -91,10 +91,11 @@
>  **   mov (w[0-9]+), 0
>  **   bfi \3, w\2, 0, 1
>  **   and x3, x\2, 9223372036854775807
> -**   mov x2, 0
> +**   mov (x[0-9]+), 0
> +**   bfi \4, (x[0-9]+), 0, 8
>  **   str xzr, \[sp\]
>  **   strb\3, \[sp\]
> -**   ldr x1, \[sp\]
> +**   mov \5, 0
>  **   add sp, sp, 16
>  **   b   fp
>  */

This looks odd, in that \5 is used to match an output, whereas:

> +**   bfi \4, (x[0-9]+), 0, 8

allows any input.  I would have expected (x[0-9]+) to only be used for
temporary results, with hard-coded registers being used for ABI-facing
results.  Similarly, all uses were previously \n or hard-coded registers.

Thanks,
Richard

> @@ -183,19 +184,21 @@
>  **   sxtw(x[0-9]+), w1
>  **   mov x0, \2
>  **   and x7, \2, 9223372036854775807
> +**   mov (x[0-9]+), 0
>  **   mov (w[0-9]+), 0
> -**   bfi \3, w\1, 0, 1
> +**   bfi \4, w\1, 0, 1
> +**   mov (x[0-9]+), \3
> +**   bfi \5, (x[0-9]+), 0, 8
> +**   stp x7, \5, \[sp\]
>  **   strbwzr, \[sp, 16\]
>  **   mov x6, x7
>  **   mov x5, x7
>  **   mov x4, x7
> -**   mov x3, x7
> -**   mov x2, x7
> -**   str xzr, \[sp, 48\]
> -**   strb\3, \[sp, 48\]
> -**   ldr (x[0-9]+), \[sp, 48\]
> -**   stp x7, \4, \[sp\]
> -**   mov x1, x7
> +**   mov \5, x7
> +**   str \3, \[sp, 48\]
> +**   strb\4, \[sp, 48\]
> +**   mov \3, x7
> +**   mov \6, x7
>  **   bl  fp_stack
>  **   sbfxx0, x0, 0, 63
>  **...
> @@ -343,10 +346,11 @@
>  **   mov w0, w1
>  **   mov (w[0-9]+), 0
>  **   bfi \2, w\1, 0, 1
> -**   mov x2, 0
> +**   mov (x[0-9]+), 0
> +**   bfi \3, (x[0-9]+), 0, 8
>  **   str xzr, \[sp\]
>  **   strb\2, \[sp\]
> -**   ldr x1, \[sp\]
> +**   mov \4, 0
>  **...
>  **   b   fp_stdarg
>  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c 
> b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
> index 13ffbf416ca..a9ac917d3a6 100644
> --- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c
> @@ -91,10 +91,11 @@
>  **   mov (w[0-9]+), 0
>  **   bfi \3, w\2, 0, 1
>  **   and x3, x\2, 9223372036854775807
> -**   mov x2, 0
> +**   mov (x[0-9]+), 0
> +**   bfi \4, (x[0-9]+), 0, 8
>  **   str xzr, \[sp\]
>  **   strb\3, \[sp\]
> -**   ldr x1, \[sp\]
> +**   mov \5, 0
>  **   add sp, sp, 16
>  **   b   fp
>  */
> @@ -183,19 +184,21 @@
>  **   sxtw(x[0-9]+), w1
>  **   mov x0, \2
>  **   and x7, \2, 9223372036854775807
> +**   mov (x[0-9]+), 0
>  **   mov (w[0-9]+), 0
> -**   bfi \3, w\1, 0, 1
> +**   bfi \4, w\1, 0, 1
> +**   mov (x[0-9]+), \3
> +**   bfi \5, (x[0-9]+), 0, 8
> +**   stp x7, \5, \[sp\]
>  **   strbwzr, \[sp, 16\]
>  **   mov x6, x7
>  **   mov x5, x7
>  **   mov x4, x7
> -**   mov x3, x7
> -**   mov x2, x7
> -**   str xzr, \[sp, 48\]
> -**   strb\3, \[sp, 48\]
> -**   ldr (x[0-9]+), \[sp, 48\]
> -**   stp x7, \4, \[sp\]
> -**   mov x1, x7
> +**   mov \5, x7
> +**   str \3, \[sp, 48\]
> +**   strb\4, \[sp, 48\]
> +**   mov \3, x7
> +**   mov \6, x7
>  **   bl  fp_stack
>  **   sbfxx0, x0, 0, 63
>  **...
> @@ -345,10 +348,11 @@
>  **   mov w0, w1
>  **   mov (w[0-9]+), 0
>  **   bfi \2, w\1, 0, 1
> -**   mov x2, 0
> +**   mov (x[0-9]+), 0
> +**   bfi \3, (x[0-9]+), 0, 8
>  **   str xzr, \[sp\]
>  **   strb\2, \[sp\]
> -**   ldr x1, \[sp\]
> +**   m

[PATCH] testsuite: Use int size instead of alignment for pr116357.c

2025-01-29 Thread Dimitar Dimitrov
The test case assumes that alignof(int)=sizeof(int).  But for some
targets this is not valid.  For example, for PRU target,
alignof(int)=1 but sizeof(int)=4.

Fix the test case to align to twice the size of int, as the expected
dg-error messages suggest.

This patch fixes the test failures for PRU target.

Ok for trunk?

gcc/testsuite/ChangeLog:

* gcc.dg/pr116357.c: Use sizeof(int) instead of alignof(int).

Cc: Jakub Jelinek 
Signed-off-by: Dimitar Dimitrov 
---
 gcc/testsuite/gcc.dg/pr116357.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr116357.c b/gcc/testsuite/gcc.dg/pr116357.c
index 07effa13254..12aaf62c20d 100644
--- a/gcc/testsuite/gcc.dg/pr116357.c
+++ b/gcc/testsuite/gcc.dg/pr116357.c
@@ -2,9 +2,9 @@
 /* { dg-do compile } */
 /* { dg-options "" } */
 
-typedef int A __attribute__((aligned (2 * alignof (int;
+typedef int A __attribute__((aligned (2 * sizeof (int;
 A a[4];/* { dg-error "alignment of array elements is greater than 
element size" } */
-typedef volatile int B __attribute__((aligned (2 * alignof (int;
+typedef volatile int B __attribute__((aligned (2 * sizeof (int;
 B b[4];/* { dg-error "alignment of array elements is greater than 
element size" } */
-typedef const int C __attribute__((aligned (2 * alignof (int;
+typedef const int C __attribute__((aligned (2 * sizeof (int;
 C c[4];/* { dg-error "alignment of array elements is greater than 
element size" } */
-- 
2.48.1



Re: [PATCH] pair-fusion: A couple of fixes for sp updates [PR118429]

2025-01-29 Thread Jakub Jelinek
On Wed, Jan 29, 2025 at 04:44:55PM +, Richard Sandiford wrote:
> The PR showed two issues with pair-fusion.  The first is that the pass
> treated stack pointer deallocations as ordinary register updates, and so
> might move them earlier than another stack access (through a different
> base register) that doesn't alias the pair candidate.
> 
> The simplest fix for that seems to be to prevent the stack deallocation
> from being moved.  This part might (or might not) be a latent source of
> wrong code and so worth backporting in some form.  (The patch as-is
> won't work for GCC 14.)
> 
> The second issue only started with r15-6551, which added a memory
> write to stack allocations and deallocations.  We should use the
> existing tombstone mechanism to preserve the associated memory
> definition.  (Deleting definitions immediately would have quadratic
> complexity in the worst case.)
> 
> Tested on aarch64-linux-gnu.  OK to install?
> 
> Richard
> 
> 
> gcc/
>   PR rtl-optimization/118429
>   * pair-fusion.cc (latest_hazard_before): Add an extra parameter
>   to say whether the instruction is a load or a store.  If the
>   instruction is not a load or store and has memory side effects,
>   prevent it from being moved earlier.
>   (pair_fusion::find_trailing_add): Update call accordingly.
>   (pair_fusion_bb_info::fuse_pair): If the trailng addition had
>   a memory side-effect, use a tombstone to preserve it.
> 
> gcc/testsuite/
>   PR rtl-optimization/118429
>   * gcc.c-torture/compile/pr118429.c: New test.

Ok.

Jakub



[PATCH] pair-fusion: Check for invalid use arrays [PR118320]

2025-01-29 Thread Richard Sandiford
As Andrew says in the bugzilla comments, this PR is about a case where
we tried to fuse two stores of x0, one in which x0 was defined and one
in which it was undefined.  merge_access_arrays failed on the conflict,
but the failure wasn't caught.

Normally the hazard detection code would fail if the instructions
had incompatible uses.  However, an undefined use doesn't impose
many restrictions on movements.  I think this is likely to be the
only case where hazard detection isn't enough.

As Andrew notes in bugzilla, it might be possible to allow uses
of defined and undefined values to be merged to the defined value.
But that sounds dangerous in the general case, as an rtl-ssa-level
decision.  We might run the risk of turning conditional UB into
unconditional UB.  And LLVM proves that the definition of "undef"
isn't simple.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/
PR rtl-optimization/118320
* pair-fusion.cc (pair_fusion_bb_info::fuse_pair): Commonize
the merge of input_uses and return early if it fails.

gcc/testsuite/
PR rtl-optimization/118320
* g++.dg/torture/pr118320.C: New test.
---
 gcc/pair-fusion.cc  | 32 -
 gcc/testsuite/g++.dg/torture/pr118320.C | 15 
 2 files changed, 36 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr118320.C

diff --git a/gcc/pair-fusion.cc b/gcc/pair-fusion.cc
index 602e572ab6c..5708d0f3b67 100644
--- a/gcc/pair-fusion.cc
+++ b/gcc/pair-fusion.cc
@@ -1730,6 +1730,24 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
   input_uses[i] = remove_note_accesses (attempt, input_uses[i]);
 }
 
+  // Get the uses that the pair instruction would have, and punt if
+  // the unpaired instructions use different definitions of the same
+  // register.  That would normally be caught as a side-effect of
+  // hazard detection below, but this check also deals with cases
+  // in which one use is undefined and the other isn't.
+  auto new_uses = merge_access_arrays (attempt,
+  drop_memory_access (input_uses[0]),
+  drop_memory_access (input_uses[1]));
+  if (!new_uses.is_valid ())
+{
+  if (dump_file)
+   fprintf (dump_file,
+"  load pair: i%d and i%d use different definiitions of"
+" the same register\n",
+insns[0]->uid (), insns[1]->uid ());
+  return false;
+}
+
   // Edge case: if the first insn is a writeback load and the
   // second insn is a non-writeback load which transfers into the base
   // register, then we should drop the writeback altogether as the
@@ -1852,11 +1870,7 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
   input_defs[1]);
   gcc_assert (pair_change->new_defs.is_valid ());
 
-  pair_change->new_uses
-   = merge_access_arrays (attempt,
-  drop_memory_access (input_uses[0]),
-  drop_memory_access (input_uses[1]));
-  gcc_assert (pair_change->new_uses.is_valid ());
+  pair_change->new_uses = new_uses;
   set_pair_pat (pair_change);
 }
   else
@@ -1877,9 +1891,7 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
case Action::CHANGE:
{
  set_pair_pat (change);
- change->new_uses = merge_access_arrays (attempt,
- input_uses[0],
- input_uses[1]);
+ change->new_uses = new_uses;
  auto d1 = drop_memory_access (input_defs[0]);
  auto d2 = drop_memory_access (input_defs[1]);
  change->new_defs = merge_access_arrays (attempt, d1, d2);
@@ -1907,9 +1919,7 @@ pair_fusion_bb_info::fuse_pair (bool load_p,
  auto new_insn = crtl->ssa->create_insn (attempt, INSN, pair_pat);
  change = make_change (new_insn);
  change->move_range = move_range;
- change->new_uses = merge_access_arrays (attempt,
- input_uses[0],
- input_uses[1]);
+ change->new_uses = new_uses;
  gcc_assert (change->new_uses.is_valid ());
 
  auto d1 = drop_memory_access (input_defs[0]);
diff --git a/gcc/testsuite/g++.dg/torture/pr118320.C 
b/gcc/testsuite/g++.dg/torture/pr118320.C
new file mode 100644
index 000..228d79860b5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr118320.C
@@ -0,0 +1,15 @@
+// { dg-additional-options "-fno-tree-sra" } */
+
+struct T{
+  long f[2];
+};
+void f1(int);
+void g(T&);
+void f1()
+{
+  T a;
+  a.~T(); // To force the clobber
+  a.f[1] = 1;
+  T b = a;
+  g(b);
+}
-- 
2.25.1



[PATCH] libstdc++: Use __is_invocable/nothrow_invocable builtins more

2025-01-29 Thread Patrick Palka
Tested on x86_64-pc-linux-gnu, I suppose this is stage 1 material?

-- >8 --

As a follow-up to r15-1253 and r15-1254.

libstdc++-v3/ChangeLog:

* include/std/type_traits (__is_invocable): Define in terms of
corresponding builtin if available.
(__is_nothrow_invocable): Likewise.
(is_invocable_v): Likewise.
(is_nothrow_invocable_v): Likewise.
---
 libstdc++-v3/include/std/type_traits | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 33892818257..f576d5b1426 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -3211,7 +3211,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
 struct __is_invocable
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_invocable)
+: __bool_constant<__is_invocable(_Fn, _ArgTypes...)>
+#else
 : __is_invocable_impl<__invoke_result<_Fn, _ArgTypes...>, void>::type
+#endif
 { };
 
   template
@@ -3262,8 +3266,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // __is_nothrow_invocable (std::is_nothrow_invocable for C++11)
   template
 struct __is_nothrow_invocable
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_nothrow_invocable)
+: __bool_constant<__is_nothrow_invocable(_Fn, _Args...)>
+#else
 : __and_<__is_invocable<_Fn, _Args...>,
  __call_is_nothrow_<_Fn, _Args...>>::type
+#endif
 { };
 
 #pragma GCC diagnostic push
@@ -3702,10 +3710,19 @@ template 
   inline constexpr bool is_convertible_v = is_convertible<_From, _To>::value;
 #endif
 template
-  inline constexpr bool is_invocable_v = is_invocable<_Fn, _Args...>::value;
+  inline constexpr bool is_invocable_v
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_invocable)
+= __is_invocable(_Fn, _Args...);
+#else
+= is_invocable<_Fn, _Args...>::value;
+#endif
 template
   inline constexpr bool is_nothrow_invocable_v
+#if _GLIBCXX_USE_BUILTIN_TRAIT(__is_nothrow_invocable)
+= __is_nothrow_invocable(_Fn, _Args...);
+#else
 = is_nothrow_invocable<_Fn, _Args...>::value;
+#endif
 template
   inline constexpr bool is_invocable_r_v
 = is_invocable_r<_Ret, _Fn, _Args...>::value;
-- 
2.48.1.131.gda898a5c64