[PATCH v2] [x86] Movement between GENERAL_REGS and SSE_REGS for TImode doesn't need secondary reload.

2024-08-15 Thread liuhongt
It results in 2 failures for x86_64-pc-linux-gnu{\
-march=cascadelake};

gcc: gcc.target/i386/extendditi3-1.c scan-assembler cqt?o
gcc: gcc.target/i386/pr113560.c scan-assembler-times \tmulq 1

For pr113560.c, now GCC generates mulx instead of mulq with
-march=cascadelake, which should be optimal, so adjust testcase for
that.
For gcc.target/i386/extendditi2-1.c, RA happens to choose another
register instead of rax and result in

movq%rdi, %rbp
movq%rdi, %rax
sarq$63, %rbp
movq%rbp, %rdx

The patch adds a new define_peephole2 for that.

gcc/ChangeLog:

PR target/116274
* config/i386/i386-expand.cc (ix86_expand_vector_move):
Restrict special case TImode to 128-bit vector conversions via
V2DI under ix86_pre_reload_split ().
* config/i386/i386.cc (inline_secondary_memory_needed):
Movement between GENERAL_REGS and SSE_REGS for TImode doesn't
need secondary reload.
* config/i386/i386.md (*extendsidi2_rex64): Add a
define_peephole2 after it.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr116274.c: New test.
* gcc.target/i386/pr113560.c: Scan either mulq or mulx.
---
 gcc/config/i386/i386-expand.cc   |  2 +-
 gcc/config/i386/i386.cc  | 18 --
 gcc/config/i386/i386.md  | 19 +++
 gcc/testsuite/gcc.target/i386/pr113560.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr116274.c | 12 
 5 files changed, 45 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr116274.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index bdbc1423267..ed546eeed6b 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -751,7 +751,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
   && SUBREG_P (op1)
   && GET_MODE (SUBREG_REG (op1)) == TImode
   && TARGET_64BIT && TARGET_SSE
-  && can_create_pseudo_p ())
+  && ix86_pre_reload_split ())
 {
   rtx tmp = gen_reg_rtx (V2DImode);
   rtx lo = gen_reg_rtx (DImode);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f044826269c..4821892d1e0 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20292,6 +20292,18 @@ inline_secondary_memory_needed (machine_mode mode, 
reg_class_t class1,
   if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
return true;
 
+  /* If the target says that inter-unit moves are more expensive
+than moving through memory, then don't generate them.  */
+  if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
+ || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
+   return true;
+
+  /* Under SSE4.1, *movti_internal supports movement between
+SSE_REGS and GENERAL_REGS with pinsrq and pextrq.  */
+  if (TARGET_SSE4_1
+ && (TARGET_64BIT ? mode == TImode : mode == DImode))
+   return false;
+
   int msize = GET_MODE_SIZE (mode);
 
   /* Between SSE and general, we have moves no larger than word size.  */
@@ -20304,12 +20316,6 @@ inline_secondary_memory_needed (machine_mode mode, 
reg_class_t class1,
 
   if (msize < minsize)
return true;
-
-  /* If the target says that inter-unit moves are more expensive
-than moving through memory, then don't generate them.  */
-  if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
- || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
-   return true;
 }
 
   return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index db7789c17d2..1962a7ba5c9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5041,6 +5041,25 @@ (define_split
   DONE;
 })
 
+(define_peephole2
+  [(set (match_operand:DI 0 "general_reg_operand")
+   (match_operand:DI 1 "general_reg_operand"))
+   (parallel [(set (match_dup 0)
+  (ashiftrt:DI (match_dup 0)
+   (const_int 63)))
+  (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:DI 2 "general_reg_operand") (match_dup 1))
+   (set (match_operand:DI 3 "general_reg_operand") (match_dup 0))]
+  "(optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+   && REGNO (operands[2]) == AX_REG
+   && REGNO (operands[3]) == DX_REG
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_mentioned_p (operands[0], operands[1])
+   && !reg_mentioned_p (operands[2], operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 3) (ashiftrt:DI (match_dup 2) (const_int 63)))
+ (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "extenddi2"
   [(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
diff --git a/gcc/testsuite/gcc.target/i386/pr113560.c 
b/gcc/testsuite/gcc.target/i386/pr113560.c
index ac2e01a4589..9431a2d1d90 100644
--- a/gcc/testsuite/gcc.

Re: [PATCH v3] aarch64: Improve popcount for bytes [PR113042]

2024-08-15 Thread Richard Sandiford
Andrew Pinski  writes:
> For popcount for bytes, we don't need the reduction addition
> after the vector cnt instruction as we are only counting one
> byte's popcount.
> This changes the popcount extend to cover all ALLI rather than GPI.
>
> Changes since v1:
> * v2 - Use ALLI iterator and combine all into one pattern.
>Add new testcases popcnt[6-8].c.
> * v3 - Simplify TARGET_CSSC path.
>Use convert_to_mode instead of gen_zero_extend* directly.
>Some other small cleanups.
>
> Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
>   PR target/113042
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64.md (popcount2): Update pattern
>   to support ALLI modes.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/popcnt5.c: New test.
>   * gcc.target/aarch64/popcnt6.c: New test.
>   * gcc.target/aarch64/popcnt7.c: New test.
>   * gcc.target/aarch64/popcnt8.c: New test.

LGTM, thanks.  OK if there are no comments in 24 hours.

Richard

> Signed-off-by: Andrew Pinski 
> ---
>  gcc/config/aarch64/aarch64.md  | 37 ++
>  gcc/testsuite/gcc.target/aarch64/popcnt5.c | 19 +++
>  gcc/testsuite/gcc.target/aarch64/popcnt6.c | 19 +++
>  gcc/testsuite/gcc.target/aarch64/popcnt7.c | 18 +++
>  gcc/testsuite/gcc.target/aarch64/popcnt8.c | 18 +++
>  5 files changed, 98 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt5.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt6.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt7.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt8.c
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 665a333903c..12dcc16529a 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5341,9 +5341,9 @@ (define_insn "*aarch64_popcount2_cssc_insn"
>  ;; MOV   w0, v2.b[0]
>  
>  (define_expand "popcount2"
> -  [(set (match_operand:GPI 0 "register_operand")
> - (popcount:GPI (match_operand:GPI 1 "register_operand")))]
> -  "TARGET_CSSC || TARGET_SIMD"
> +  [(set (match_operand:ALLI 0 "register_operand")
> + (popcount:ALLI (match_operand:ALLI 1 "register_operand")))]
> +  "TARGET_CSSC ? GET_MODE_BITSIZE (mode) >= 32 : TARGET_SIMD"
>  {
>if (!TARGET_CSSC)
>  {
> @@ -5351,18 +5351,29 @@ (define_expand "popcount2"
>rtx v1 = gen_reg_rtx (V8QImode);
>rtx in = operands[1];
>rtx out = operands[0];
> -  if(mode == SImode)
> - {
> -   rtx tmp;
> -   tmp = gen_reg_rtx (DImode);
> -   /* If we have SImode, zero extend to DImode, pop count does
> -  not change if we have extra zeros. */
> -   emit_insn (gen_zero_extendsidi2 (tmp, in));
> -   in = tmp;
> - }
> +  /* SImode and HImode should be zero extended to DImode.
> +  popcount does not change if we have extra zeros.  */
> +  if (mode == SImode || mode == HImode)
> + in = convert_to_mode (DImode, in, true);
> +
>emit_move_insn (v, gen_lowpart (V8QImode, in));
>emit_insn (gen_popcountv8qi2 (v1, v));
> -  emit_insn (gen_aarch64_zero_extend_reduc_plus_v8qi (out, v1));
> +  /* QImode, just extract from the v8qi vector.  */
> +  if (mode == QImode)
> + emit_move_insn (out, gen_lowpart (QImode, v1));
> +  /* HI and SI, reduction is zero extended to SImode. */
> +  else if (mode == SImode || mode == HImode)
> + {
> +   rtx out1 = gen_reg_rtx (SImode);
> +   emit_insn (gen_aarch64_zero_extendsi_reduc_plus_v8qi (out1, v1));
> +   emit_move_insn (out, gen_lowpart (mode, out1));
> + }
> +  /* DImode, reduction is zero extended to DImode. */
> +  else
> + {
> +   gcc_assert (mode == DImode);
> +   emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v8qi (out, v1));
> + }
>DONE;
>  }
>  })
> diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt5.c 
> b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
> new file mode 100644
> index 000..406369d9b29
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +/* PR target/113042 */
> +
> +#pragma GCC target "+nocssc"
> +
> +/*
> +** h8:
> +**   ldr b[0-9]+, \[x0\]
> +**   cnt v[0-9]+.8b, v[0-9]+.8b
> +**   smovw0, v[0-9]+.b\[0\]
> +**   ret
> +*/
> +/* We should not need the addv here since we only need a byte popcount. */
> +
> +unsigned h8 (const unsigned char *a) {
> +   return __builtin_popcountg (a[0]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt6.c 
> b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
> new file mode 100644
> index 000..e882cb24126
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/

Re: [PATCH] libstdc++-v3: testsuite: Prune uncapitalized "in function" linker warning

2024-08-15 Thread Jonathan Wakely
On Wed, 14 Aug 2024, 18:00 Hans-Peter Nilsson,  wrote:

> (CC to the dejagnu project as a heads-up)
>
> Regtested cris-elf with a fresh newlib checkout where 2640
> libstdc++-v3 tests otherwise fail due to the stubbed newlib
> _getentropy.  Ok to commit?
>

OK for trunk and release branches too if you want. Thanks.



> -- >8 --
> Newer newlib trigger warnings about certain functions not implemented
> (_getentropy) when testing libstdc++-v3.
>
> Since 2018 (circa binutils-2.10) the "in function" prefix isn't
> capitalized for those "not implemented" warnings when generated from
> the linker (a GNU ld feature used by newlib).  Dejagnu up to and
> including at least dejagnu-1.6.3 (and git @ 42979bd3b9) assumes a
> capital "In function", leaving that part unpruned, and boom we have
> thousands of "excess errors" from the libstdc++-v3 testsuite.
>
> While gcc/testsuite/lib/prune.exp:prune_gcc_output already deals with
> this quirk with a vastly more generic pattern, I choose this simpler
> tweak.
>
> libstdc++-v3:
> * testsuite/lib/prune.exp (libstdc++-dg-prune): Prune
> uncapitalized "in function" warning from linker.
> ---
>  libstdc++-v3/testsuite/lib/prune.exp | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/libstdc++-v3/testsuite/lib/prune.exp
> b/libstdc++-v3/testsuite/lib/prune.exp
> index 071dcf34c1e8..4250e2d39e7d 100644
> --- a/libstdc++-v3/testsuite/lib/prune.exp
> +++ b/libstdc++-v3/testsuite/lib/prune.exp
> @@ -80,6 +80,15 @@ proc libstdc++-dg-prune { system text } {
>  # Ignore dsymutil warning (tool bug is actually in the linker)
>  regsub -all "(^|\n)\[^\n\]*could not find object file symbol for
> symbol\[^\n\]*" $text "" text
>
> +# This pattern, except requiring a capitalized "In" and with a
> +# sub-pattern matching a subsequent line "is not implemented and will
> +# always fail", is part of the standard dejagnu prune_warnings
> function.
> +# There's also a separate single-line pattern pruning the "is not
> +# implemented and will always fail".  Since that pattern is processed
> +# before this ${tool}-dg-prune function is called, we have to handle
> +# the single uncapitalized "in function" line.
> +regsub -all "(^|\n)\[^\n\]*: in function\[^\n\]*" $text "" text
> +
>  # If exceptions are disabled, mark tests expecting exceptions to be
> enabled
>  # as unsupported.
>  if { ![check_effective_target_exceptions_enabled] } {
> --
> 2.30.2
>
>


Re: [PATCH] libstdc++-v3: Handle iconv as optional for newlib builds [PR116362]

2024-08-15 Thread Jonathan Wakely
On Wed, 14 Aug 2024, 16:01 Hans-Peter Nilsson,  wrote:

> Regtested cris-elf, both an older newlib (FWIW: before the
> getentropy issue that I hoped to investigate before
> summer...maybe next summer) and a fresh checkout, both
> with/without --enable-newlib-iconv.  I'm pleasantly
> surprised that it works (there are no regressions) with
> newlib iconv enabled compared to without: I had to
> double-check the different libstdc++-v3/config.log that it
> actually *was* enabled.
>
> Ok to commit?
>

Nice! OK for trunk, thanks.


> -- >8 --
> Support for iconv in newlib seems to have been always
> assumed present by libstdc++-v3, but is default off.
>
> Though, it hasn't been used before recent libstdc++ changes
> that actually call iconv functions.  This now leads to
> failures exposed by running the test-suite, unless the
> newlib being used has been explicitly configured with
> --enable-newlib-iconv.  When failing, there are undefined
> references to iconv, iconv_open or iconv_close for multiple
> tests.
>
> Thankfully there's a macro in newlib.h that we can check to
> detect presence of iconv support for the newlib build that's
> used.
>
> libstdc++-v3:
> PR libstdc++/116362
> * configure.ac: Check newlib configuration whether iconv is
> enabled.
> * configure: Regenerate.
> ---
>  libstdc++-v3/configure| 26 +-
>  libstdc++-v3/configure.ac | 10 +-
>  2 files changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
> index ccb24a82be79..4049f54bd5a3 100644
> --- a/libstdc++-v3/configure.ac
> +++ b/libstdc++-v3/configure.ac
> @@ -376,7 +376,15 @@ dnl # rather than hardcoding that information.
>frexpl hypotl ldexpl log10l logl modfl powl sinhl sinl sqrtl
>tanhl tanl])
>
> -AC_DEFINE(HAVE_ICONV)
> +# Support for iconv in newlib is configurable.
> +AC_TRY_COMPILE([#include ], [
> +  #ifndef _ICONV_ENABLED
> +  #error
> +  #endif], [ac_newlib_iconv_enabled=yes],
> [ac_newlib_iconv_enabled=no])
> +if test "$ac_newlib_iconv_enabled" = yes; then
> +  AC_DEFINE(HAVE_ICONV)
> +fi
> +
>  AC_DEFINE(HAVE_MEMALIGN)
>
>  case "${target}" in
> diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
> index fe525308ae28..305675eaa1e1 100755
> --- a/libstdc++-v3/configure
> +++ b/libstdc++-v3/configure
> @@ -28571,7 +28571,31 @@ _ACEOF
>
>
>
> -$as_echo "#define HAVE_ICONV 1" >>confdefs.h
> +# Support for iconv in newlib is configurable.
> +cat confdefs.h - <<_ACEOF >conftest.$ac_ext
> +/* end confdefs.h.  */
> +#include 
> +int
> +main ()
> +{
> +
> +  #ifndef _ICONV_ENABLED
> +  #error
> +  #endif
> +  ;
> +  return 0;
> +}
> +_ACEOF
> +if ac_fn_c_try_compile "$LINENO"; then :
> +  ac_newlib_iconv_enabled=yes
> +else
> +  ac_newlib_iconv_enabled=no
> +fi
> +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
> +if test "$ac_newlib_iconv_enabled" = yes; then
> +  $as_echo "#define HAVE_ICONV 1" >>confdefs.h
> +
> +fi
>
>  $as_echo "#define HAVE_MEMALIGN 1" >>confdefs.h
>
> --
> 2.30.2
>
>


Re: [PATCH V2 01/10] optabs: Make all `*dot_prod_optab's modeled as conversions

2024-08-15 Thread Richard Sandiford
Victor Do Nascimento  writes:
> Given the specification in the GCC internals manual defines the
> {u|s}dot_prod standard name as taking "two signed elements of the
> same mode, adding them to a third operand of wider mode", there is
> currently ambiguity in the relationship between the mode of the first
> two arguments and that of the third.
>
> This vagueness means that, in theory, different modes may be
> supportable in the third argument.  This flexibility would allow for a
> given backend to add to the accumulator a different number of
> vectorized products, e.g. A backend may provide instructions for both:
>
>   accum += a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]
>
> and
>
>   accum += a[0] * b[0] + a[1] * b[1],
>
> as is now seen in the SVE2.1 extension to AArch64.  In spite of the
> aforementioned flexibility, modeling the dot-product operation as a
> direct optab means that we have no way to encode both input and the
> accumulator data modes into the backend pattern name, which prevents
> us from harnessing this flexibility.
>
> We therefore make all dot_prod optabs conversions, allowing, for
> example, for the encoding of both 2-way and 4-way dot product backend
> patterns.
>
> gcc/ChangeLog:
>
>   * optabs.def (sdot_prod_optab): Convert from OPTAB_D to
>   OPTAB_CD.
>   (udot_prod_optab): Likewise.
>   (usdot_prod_optab): Likewise.
>   * doc/md.texi (Standard Names): update entries for u,s and us
>   dot_prod names.
> ---
>  gcc/doc/md.texi | 46 +-
>  gcc/optabs.def  |  6 +++---
>  2 files changed, 24 insertions(+), 28 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 5dc0d55edd6..aa1181a3320 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5760,15 +5760,14 @@ for (i = 0; i < LEN + BIAS; i++)
>  operand0 += operand2[i];
>  @end smallexample
>  
> -@cindex @code{sdot_prod@var{m}} instruction pattern
> -@item @samp{sdot_prod@var{m}}
> -
> -Compute the sum of the products of two signed elements.
> -Operand 1 and operand 2 are of the same mode. Their
> -product, which is of a wider mode, is computed and added to operand 3.
> -Operand 3 is of a mode equal or wider than the mode of the product. The
> -result is placed in operand 0, which is of the same mode as operand 3.
> -@var{m} is the mode of operand 1 and operand 2.
> +@cindex @code{sdot_prod@var{m}@var{n}} instruction pattern
> +@item @samp{sdot_prod@var{m}@var{n}}
> +
> +Multiply operand 1 by operand 2 without loss of precision, given that
> +both operands contain signed elements.  Add each product to the overlapping
> +element of operand 3 and store the result in operand 0.  Operands 0 and 3
> +have mode @var{m} and operands 1 and 2 have mode @var{n}, with @var{n}
> +having narrower elements than @var{m}.
>  
>  Semantically the expressions perform the multiplication in the following 
> signs
>  
> @@ -5778,15 +5777,14 @@ sdot 
> ==
>  @dots{}
>  @end smallexample
>  
> -@cindex @code{udot_prod@var{m}} instruction pattern
> -@item @samp{udot_prod@var{m}}
> +@cindex @code{udot_prod@var{m}@var{n}} instruction pattern
> +@item @samp{udot_prod@var{m}@var{n}}
>  
> -Compute the sum of the products of two unsigned elements.
> -Operand 1 and operand 2 are of the same mode. Their
> -product, which is of a wider mode, is computed and added to operand 3.
> -Operand 3 is of a mode equal or wider than the mode of the product. The
> -result is placed in operand 0, which is of the same mode as operand 3.
> -@var{m} is the mode of operand 1 and operand 2.
> +Multiply operand 1 by operand 2 without loss of precision, given that
> +both operands contain unsigned elements.  Add each product to the overlapping
> +element of operand 3 and store the result in operand 0.  Operands 0 and 3
> +have mode @var{m} and operands 1 and 2 have mode @var{n}, with @var{n}
> +having narrower elements than @var{m}.
>  
>  Semantically the expressions perform the multiplication in the following 
> signs
>  
> @@ -5796,14 +5794,12 @@ udot unsigned op3> ==
>  @dots{}
>  @end smallexample
>  
> -@cindex @code{usdot_prod@var{m}} instruction pattern
> -@item @samp{usdot_prod@var{m}}
> -Compute the sum of the products of elements of different signs.
> -Operand 1 must be unsigned and operand 2 signed. Their
> -product, which is of a wider mode, is computed and added to operand 3.
> -Operand 3 is of a mode equal or wider than the mode of the product. The
> -result is placed in operand 0, which is of the same mode as operand 3.
> -@var{m} is the mode of operand 1 and operand 2.
> +@cindex @code{usdot_prod@var{m}@var{n}} instruction pattern
> +@item @samp{usdot_prod@var{m}@var{n}}
> +Multiply operand 1 by operand 2.  Add each product to the overlapping

The new paragraph drops the information that operand 1 is unsigned and
operand 2 is signed.  Maybe change this sentence to:

  Multiply operand 1 by operand 2 without loss of precision, given that
  operand 1 is unsigned and 

[PATCH 0/4] Prime path coverage in gcc/gcov

2024-08-15 Thread Jørgen Kvalsvik
Ping. Since the last patch I have fixed a few bugs in the path count
limit aborting, and a few minor rephrases in docs.

Jørgen Kvalsvik (4):
  testsuite: Use dg-compile, not gcc -c
  gcov: Cache source files
  gcov: branch, conds, calls in function summaries
  Add prime path coverage to gcc/gcov

 gcc/Makefile.in|6 +-
 gcc/builtins.cc|2 +-
 gcc/collect2.cc|5 +-
 gcc/common.opt |   16 +
 gcc/doc/gcov.texi  |  155 ++
 gcc/doc/invoke.texi|   36 +
 gcc/gcc.cc |4 +-
 gcc/gcov-counter.def   |3 +
 gcc/gcov-io.h  |3 +
 gcc/gcov.cc|  537 ++-
 gcc/ipa-inline.cc  |2 +-
 gcc/passes.cc  |4 +-
 gcc/path-coverage.cc   |  782 +
 gcc/prime-paths.cc | 2031 
 gcc/profile.cc |6 +-
 gcc/selftest-run-tests.cc  |1 +
 gcc/selftest.h |1 +
 gcc/testsuite/g++.dg/gcov/gcov-22.C|  170 ++
 gcc/testsuite/gcc.misc-tests/gcov-23.c |3 +-
 gcc/testsuite/gcc.misc-tests/gcov-29.c |  869 ++
 gcc/testsuite/gcc.misc-tests/gcov-30.c |  869 ++
 gcc/testsuite/gcc.misc-tests/gcov-31.c |   35 +
 gcc/testsuite/gcc.misc-tests/gcov-32.c |   24 +
 gcc/testsuite/lib/gcov.exp |   92 +-
 gcc/tree-profile.cc|   11 +-
 25 files changed, 5627 insertions(+), 40 deletions(-)
 create mode 100644 gcc/path-coverage.cc
 create mode 100644 gcc/prime-paths.cc
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-22.C
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-29.c
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-30.c
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-31.c
 create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-32.c

-- 
2.39.2



[PATCH 1/4] testsuite: Use dg-compile, not gcc -c

2024-08-15 Thread Jørgen Kvalsvik
Since this is a pure compile test it makes sense to inform dejagnu of
it.

gcc/testsuite/ChangeLog:

* gcc.misc-tests/gcov-23.c: Use dg-compile, not gcc -c
---
 gcc/testsuite/gcc.misc-tests/gcov-23.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.misc-tests/gcov-23.c 
b/gcc/testsuite/gcc.misc-tests/gcov-23.c
index 72849d80e3a..72ba0aa1389 100644
--- a/gcc/testsuite/gcc.misc-tests/gcov-23.c
+++ b/gcc/testsuite/gcc.misc-tests/gcov-23.c
@@ -1,4 +1,5 @@
-/* { dg-options "-fcondition-coverage -ftest-coverage -O2 -c" } */
+/* { dg-options "-fcondition-coverage -ftest-coverage -O2" } */
+/* { dg-do compile } */
 
 #include 
 #include 
-- 
2.39.2



[PATCH 3/4] gcov: branch, conds, calls in function summaries

2024-08-15 Thread Jørgen Kvalsvik
The gcov function summaries only output the covered lines, not the
branches and calls. Since the function summaries is an opt-in it
probably makes sense to also include branch coverage, calls, and
condition coverage.

$ gcc --coverage -fpath-coverage hello.c -o hello
$ ./hello

Before:
$ gcov -f hello
Function 'main'
Lines executed:100.00% of 4

Function 'fn'
Lines executed:100.00% of 7

File 'hello.c'
Lines executed:100.00% of 11
Creating 'hello.c.gcov'

After:
$ gcov -f hello
Function 'main'
Lines executed:100.00% of 3
No branches
Calls executed:100.00% of 1

Function 'fn'
Lines executed:100.00% of 7
Branches executed:100.00% of 4
Taken at least once:50.00% of 4
No calls

File 'hello.c'
Lines executed:100.00% of 10
Creating 'hello.c.gcov'

Lines executed:100.00% of 10

With conditions:
$ gcov -fg hello
Function 'main'
Lines executed:100.00% of 3
No branches
Calls executed:100.00% of 1
No conditions

Function 'fn'
Lines executed:100.00% of 7
Branches executed:100.00% of 4
Taken at least once:50.00% of 4
Condition outcomes covered:100.00% of 8
No calls

File 'hello.c'
Lines executed:100.00% of 10
Creating 'hello.c.gcov'

Lines executed:100.00% of 10

gcc/ChangeLog:

* gcov.cc (generate_results): Count branches, conditions.
(function_summary): Output branch, calls, condition count.
---
 gcc/gcov.cc | 32 +---
 1 file changed, 29 insertions(+), 3 deletions(-)
---
 gcc/gcov.cc | 48 +++-
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/gcc/gcov.cc b/gcc/gcov.cc
index 19019f404ee..7215a00c702 100644
--- a/gcc/gcov.cc
+++ b/gcc/gcov.cc
@@ -1687,11 +1687,19 @@ generate_results (const char *file_name)
   memset (&coverage, 0, sizeof (coverage));
   coverage.name = fn->get_name ();
   add_line_counts (flag_function_summary ? &coverage : NULL, fn);
-  if (flag_function_summary)
-   {
- function_summary (&coverage);
- fnotice (stdout, "\n");
-   }
+
+  if (!flag_function_summary)
+   continue;
+
+  for (const block_info& block : fn->blocks)
+   for (arc_info *arc = block.succ; arc; arc = arc->succ_next)
+ add_branch_counts (&coverage, arc);
+
+  for (const block_info& block : fn->blocks)
+   add_condition_counts (&coverage, &block);
+
+  function_summary (&coverage);
+  fnotice (stdout, "\n");
 }
 
   name_map needle;
@@ -2764,6 +2772,36 @@ function_summary (const coverage_info *coverage)
 {
   fnotice (stdout, "%s '%s'\n", "Function", coverage->name);
   executed_summary (coverage->lines, coverage->lines_executed);
+
+  if (coverage->branches)
+{
+  fnotice (stdout, "Branches executed:%s of %d\n",
+  format_gcov (coverage->branches_executed, coverage->branches, 2),
+  coverage->branches);
+  fnotice (stdout, "Taken at least once:%s of %d\n",
+  format_gcov (coverage->branches_taken, coverage->branches, 2),
+   coverage->branches);
+}
+  else
+fnotice (stdout, "No branches\n");
+
+  if (coverage->calls)
+fnotice (stdout, "Calls executed:%s of %d\n",
+format_gcov (coverage->calls_executed, coverage->calls, 2),
+coverage->calls);
+  else
+fnotice (stdout, "No calls\n");
+
+  if (flag_conditions)
+{
+  if (coverage->conditions)
+   fnotice (stdout, "Condition outcomes covered:%s of %d\n",
+format_gcov (coverage->conditions_covered,
+ coverage->conditions, 2),
+coverage->conditions);
+  else
+   fnotice (stdout, "No conditions\n");
+}
 }
 
 /* Output summary info for a file.  */
-- 
2.39.2



[PATCH 1/3] gcov: Cache source files

2024-08-15 Thread Jørgen Kvalsvik
Cache the source files as they are read, rather than discarding them at
the end of output_lines (), and move the reading of the source file to
the new function slurp.

This patch does not really change anything other than moving the file
reading out of output_file, but set gcov up for more interaction with
the source file. The motvating example is reporting coverage on
functions from different source files, notably C++ headers and
((always_inline)).

Here is an example of what gcov does today:

hello.h:
inline __attribute__((always_inline))
int hello (const char *s)
{
  if (s)
printf ("hello, %s!\n", s);
  else
printf ("hello, world!\n");
  return 0;
}

hello.c:
int notmain(const char *entity)
{
  return hello (entity);
}

int main()
{
  const char *empty = 0;
  if (!empty)
hello (empty);
  else
puts ("Goodbye!");
}

$ gcov -abc hello
function notmain called 0 returned 0% blocks executed 0%
#:4:int notmain(const char *entity)
%:4-block 2
branch  0 never executed (fallthrough)
branch  1 never executed
-:5:{
#:6:  return hello (entity);
%:6-block 7
-:7:}

Clearly there is a branch in notmain, but the branch comes from the
inlining of hello. This is not very obvious from looking at the output.
Here is hello.h.gcov:

-:3:inline __attribute__((always_inline))
-:4:int hello (const char *s)
-:5:{
#:6:  if (s)
%:6-block 3
branch  0 never executed (fallthrough)
branch  1 never executed
%:6-block 2
branch  2 never executed (fallthrough)
branch  3 never executed
#:7:printf ("hello, %s!\n", s);
%:7-block 4
call0 never executed
%:7-block 3
call1 never executed
-:8:  else
#:9:printf ("hello, world!\n");
%:9-block 5
call0 never executed
%:9-block 4
call1 never executed
#:   10:  return 0;
%:   10-block 6
%:   10-block 5
-:   11:}

The blocks from the different call sites have all been interleaved.

The reporting could tuned be to list the inlined function, too, like
this:

1:4:int notmain(const char *entity)
-: == inlined from hello.h ==
1:6:  if (s)
branch  0 taken 0 (fallthrough)
branch  1 taken 1
#:7:printf ("hello, %s!\n", s);
%:7-block 3
call0 never executed
-:8:  else
1:9:printf ("hello, world!\n");
1:9-block 4
call0 returned 1
1:   10:  return 0;
1:   10-block 5
-: == inlined from hello.h (end) ==
-:5:{
1:6:  return hello (entity);
1:6-block 7
-:7:}

Implementing something to this effect relies on having the sources for
both files (hello.c, hello.h) available, which is what this patch sets
up.

Note that the previous reading code would leak the source file content,
and explicitly storing them is not a huge departure nor performance
implication. I verified this with valgrind:

With slurp:

$ valgrind gcov ./hello
== == Memcheck, a memory error detector
== == Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
== == Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
== == Command: ./gcc/gcov demo
== ==
File 'hello.c'
Lines executed:100.00% of 4
Creating 'hello.c.gcov'

File 'hello.h'
Lines executed:75.00% of 4
Creating 'hello.h.gcov'
== ==
== == HEAP SUMMARY:
== == in use at exit: 84,907 bytes in 54 blocks
== ==   total heap usage: 254 allocs, 200 frees, 137,156 bytes allocated
== ==
== == LEAK SUMMARY:
== ==definitely lost: 1,237 bytes in 22 blocks
== ==indirectly lost: 562 bytes in 18 blocks
== ==  possibly lost: 0 bytes in 0 blocks
== ==still reachable: 83,108 bytes in 14 blocks
== ==   of which reachable via heuristic:
== == newarray   : 1,544 bytes in 1 blocks
== == suppressed: 0 bytes in 0 blocks
== == Rerun with --leak-check=full to see details of leaked memory
== ==
== == For lists of detected and suppressed errors, rerun with: -s
== == ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

Without slurp:

$ valgrind gcov ./demo
== == Memcheck, a memory error detector
== == Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
== == Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
== == Command: ./gcc/gcov demo
== ==
File 'hello.c'
Lines executed:100.00% of 4
Creating 'hello.c.gcov'

File 'hello.h'
Lines executed:75.00% of 4
Creating 'hello.h.gcov'

Lines executed:87.50% of 8
== ==
== == HEAP SUMMARY:
== == in use at exit: 85,316 bytes in 82 blocks
== ==   total heap usage: 250 allocs, 168 frees, 137,084 bytes allocated
== ==
== == LEAK SUMMARY:
== ==definitely lost: 1,646 bytes in 50 blocks
== ==indirectly lost: 562 bytes in 18 blocks
== ==  possibly lost: 0 bytes in 0 blocks
==

[PATCH 2/4] gcov: Cache source files

2024-08-15 Thread Jørgen Kvalsvik
Cache the source files as they are read, rather than discarding them at
the end of output_lines (), and move the reading of the source file to
the new function slurp.

This patch does not really change anything other than moving the file
reading out of output_file, but set gcov up for more interaction with
the source file. The motvating example is reporting coverage on
functions from different source files, notably C++ headers and
((always_inline)).

Here is an example of what gcov does today:

hello.h:
inline __attribute__((always_inline))
int hello (const char *s)
{
  if (s)
printf ("hello, %s!\n", s);
  else
printf ("hello, world!\n");
  return 0;
}

hello.c:
int notmain(const char *entity)
{
  return hello (entity);
}

int main()
{
  const char *empty = 0;
  if (!empty)
hello (empty);
  else
puts ("Goodbye!");
}

$ gcov -abc hello
function notmain called 0 returned 0% blocks executed 0%
#:4:int notmain(const char *entity)
%:4-block 2
branch  0 never executed (fallthrough)
branch  1 never executed
-:5:{
#:6:  return hello (entity);
%:6-block 7
-:7:}

Clearly there is a branch in notmain, but the branch comes from the
inlining of hello. This is not very obvious from looking at the output.
Here is hello.h.gcov:

-:3:inline __attribute__((always_inline))
-:4:int hello (const char *s)
-:5:{
#:6:  if (s)
%:6-block 3
branch  0 never executed (fallthrough)
branch  1 never executed
%:6-block 2
branch  2 never executed (fallthrough)
branch  3 never executed
#:7:printf ("hello, %s!\n", s);
%:7-block 4
call0 never executed
%:7-block 3
call1 never executed
-:8:  else
#:9:printf ("hello, world!\n");
%:9-block 5
call0 never executed
%:9-block 4
call1 never executed
#:   10:  return 0;
%:   10-block 6
%:   10-block 5
-:   11:}

The blocks from the different call sites have all been interleaved.

The reporting could tuned be to list the inlined function, too, like
this:

1:4:int notmain(const char *entity)
-: == inlined from hello.h ==
1:6:  if (s)
branch  0 taken 0 (fallthrough)
branch  1 taken 1
#:7:printf ("hello, %s!\n", s);
%:7-block 3
call0 never executed
-:8:  else
1:9:printf ("hello, world!\n");
1:9-block 4
call0 returned 1
1:   10:  return 0;
1:   10-block 5
-: == inlined from hello.h (end) ==
-:5:{
1:6:  return hello (entity);
1:6-block 7
-:7:}

Implementing something to this effect relies on having the sources for
both files (hello.c, hello.h) available, which is what this patch sets
up.

Note that the previous reading code would leak the source file content,
and explicitly storing them is not a huge departure nor performance
implication. I verified this with valgrind:

With slurp:

$ valgrind gcov ./hello
== == Memcheck, a memory error detector
== == Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
== == Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
== == Command: ./gcc/gcov demo
== ==
File 'hello.c'
Lines executed:100.00% of 4
Creating 'hello.c.gcov'

File 'hello.h'
Lines executed:75.00% of 4
Creating 'hello.h.gcov'
== ==
== == HEAP SUMMARY:
== == in use at exit: 84,907 bytes in 54 blocks
== ==   total heap usage: 254 allocs, 200 frees, 137,156 bytes allocated
== ==
== == LEAK SUMMARY:
== ==definitely lost: 1,237 bytes in 22 blocks
== ==indirectly lost: 562 bytes in 18 blocks
== ==  possibly lost: 0 bytes in 0 blocks
== ==still reachable: 83,108 bytes in 14 blocks
== ==   of which reachable via heuristic:
== == newarray   : 1,544 bytes in 1 blocks
== == suppressed: 0 bytes in 0 blocks
== == Rerun with --leak-check=full to see details of leaked memory
== ==
== == For lists of detected and suppressed errors, rerun with: -s
== == ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

Without slurp:

$ valgrind gcov ./demo
== == Memcheck, a memory error detector
== == Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
== == Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
== == Command: ./gcc/gcov demo
== ==
File 'hello.c'
Lines executed:100.00% of 4
Creating 'hello.c.gcov'

File 'hello.h'
Lines executed:75.00% of 4
Creating 'hello.h.gcov'

Lines executed:87.50% of 8
== ==
== == HEAP SUMMARY:
== == in use at exit: 85,316 bytes in 82 blocks
== ==   total heap usage: 250 allocs, 168 frees, 137,084 bytes allocated
== ==
== == LEAK SUMMARY:
== ==definitely lost: 1,646 bytes in 50 blocks
== ==indirectly lost: 562 bytes in 18 blocks
== ==  possibly lost: 0 bytes in 0 blocks
==

[PATCH 2/3] gcov: branch, conds, calls in function summaries

2024-08-15 Thread Jørgen Kvalsvik
The gcov function summaries only output the covered lines, not the
branches and calls. Since the function summaries is an opt-in it
probably makes sense to also include branch coverage, calls, and
condition coverage.

$ gcc --coverage -fpath-coverage hello.c -o hello
$ ./hello

Before:
$ gcov -f hello
Function 'main'
Lines executed:100.00% of 4

Function 'fn'
Lines executed:100.00% of 7

File 'hello.c'
Lines executed:100.00% of 11
Creating 'hello.c.gcov'

After:
$ gcov -f hello
Function 'main'
Lines executed:100.00% of 3
No branches
Calls executed:100.00% of 1

Function 'fn'
Lines executed:100.00% of 7
Branches executed:100.00% of 4
Taken at least once:50.00% of 4
No calls

File 'hello.c'
Lines executed:100.00% of 10
Creating 'hello.c.gcov'

Lines executed:100.00% of 10

With conditions:
$ gcov -fg hello
Function 'main'
Lines executed:100.00% of 3
No branches
Calls executed:100.00% of 1
No conditions

Function 'fn'
Lines executed:100.00% of 7
Branches executed:100.00% of 4
Taken at least once:50.00% of 4
Condition outcomes covered:100.00% of 8
No calls

File 'hello.c'
Lines executed:100.00% of 10
Creating 'hello.c.gcov'

Lines executed:100.00% of 10

gcc/ChangeLog:

* gcov.cc (generate_results): Count branches, conditions.
(function_summary): Output branch, calls, condition count.
---
 gcc/gcov.cc | 32 +---
 1 file changed, 29 insertions(+), 3 deletions(-)
---
 gcc/gcov.cc | 48 +++-
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/gcc/gcov.cc b/gcc/gcov.cc
index 19019f404ee..7215a00c702 100644
--- a/gcc/gcov.cc
+++ b/gcc/gcov.cc
@@ -1687,11 +1687,19 @@ generate_results (const char *file_name)
   memset (&coverage, 0, sizeof (coverage));
   coverage.name = fn->get_name ();
   add_line_counts (flag_function_summary ? &coverage : NULL, fn);
-  if (flag_function_summary)
-   {
- function_summary (&coverage);
- fnotice (stdout, "\n");
-   }
+
+  if (!flag_function_summary)
+   continue;
+
+  for (const block_info& block : fn->blocks)
+   for (arc_info *arc = block.succ; arc; arc = arc->succ_next)
+ add_branch_counts (&coverage, arc);
+
+  for (const block_info& block : fn->blocks)
+   add_condition_counts (&coverage, &block);
+
+  function_summary (&coverage);
+  fnotice (stdout, "\n");
 }
 
   name_map needle;
@@ -2764,6 +2772,36 @@ function_summary (const coverage_info *coverage)
 {
   fnotice (stdout, "%s '%s'\n", "Function", coverage->name);
   executed_summary (coverage->lines, coverage->lines_executed);
+
+  if (coverage->branches)
+{
+  fnotice (stdout, "Branches executed:%s of %d\n",
+  format_gcov (coverage->branches_executed, coverage->branches, 2),
+  coverage->branches);
+  fnotice (stdout, "Taken at least once:%s of %d\n",
+  format_gcov (coverage->branches_taken, coverage->branches, 2),
+   coverage->branches);
+}
+  else
+fnotice (stdout, "No branches\n");
+
+  if (coverage->calls)
+fnotice (stdout, "Calls executed:%s of %d\n",
+format_gcov (coverage->calls_executed, coverage->calls, 2),
+coverage->calls);
+  else
+fnotice (stdout, "No calls\n");
+
+  if (flag_conditions)
+{
+  if (coverage->conditions)
+   fnotice (stdout, "Condition outcomes covered:%s of %d\n",
+format_gcov (coverage->conditions_covered,
+ coverage->conditions, 2),
+coverage->conditions);
+  else
+   fnotice (stdout, "No conditions\n");
+}
 }
 
 /* Output summary info for a file.  */
-- 
2.39.2



Re: [PATCH V2 03/10] aarch64: Fix aarch64 backend-use of (u|s|us)dot_prod patterns

2024-08-15 Thread Richard Sandiford
Victor Do Nascimento  writes:
> Given recent changes to the dot_prod standard pattern name, this patch
> fixes the aarch64 back-end by implementing the following changes:
>
> 1. Add 2nd mode to all (u|s|us)dot_prod patterns in .md files.
> 2. Rewrite initialization and function expansion mechanism for simd
> builtins.
> 3. Fix all direct calls to back-end `dot_prod' patterns in SVE
> builtins.
>
> Finally, given that it is now possible for the compiler to
> differentiate between the two- and four-way dot product, we add a test
> to ensure that autovectorization picks up on dot-product patterns
> where the result is twice the width of the operands.
>
> gcc/ChangeLog:
>
>   * config/aarch64/aarch64-simd.md
>   (dot_prod): Renamed to...
>   (dot_prod): ...this.
>   (usdot_prod): Renamed to...
>   (usdot_prod): ...this.
>   (sadv16qi): Adjust call to gen_udot_prod take second mode.
>   (popcount): fix use of `udot_prod_optab'.
>   * gcc/config/aarch64/aarch64-sve.md
>   (dot_prod): Renamed to...
>   (dot_prod): ...this.
>   (@dot_prod): Renamed to...
>   (@dot_prod): ...this.
>   (sad): Adjust call to gen_udot_prod take second mode.
>   * gcc/config/aarch64/aarch64-sve2.md
>   (@aarch64_sve_dotvnx4sivnx8hi): Renamed to...
>   (dot_prodvnx4sivnx8hi): ...this.
>   * config/aarch64/aarch64-simd-builtins.def: Modify macro
>   expansion-based initialization and expansion
>   of (u|s|us)dot_prod builtins.
>   * config/aarch64/aarch64-sve-builtins-base.cc
>   (svdot_impl::expand): s/direct/convert/ in
>   `convert_optab_handler_for_sign' function call.
>   (svusdot_impl::expand): add second mode argument in call to
>   `code_for_dot_prod'.
>   * config/aarch64/aarch64-sve-builtins.cc
>   (function_expander::convert_optab_handler_for_sign): New class
>   method.
>   * config/aarch64/aarch64-sve-builtins.h
>   (class function_expander): Add prototype for new
>   `convert_optab_handler_for_sign' method.
>
> gcc/testsuite/ChangeLog:
>   * gcc.target/aarch64/sme/vect-dotprod-twoway.c (udot2): New.

Could you run the patch through contrib/check_GNU_style.py to catch
the long lines?

> ---
>  gcc/config/aarch64/aarch64-builtins.cc|  7 ++
>  gcc/config/aarch64/aarch64-simd-builtins.def  |  6 ++---
>  gcc/config/aarch64/aarch64-simd.md|  9 ---
>  .../aarch64/aarch64-sve-builtins-base.cc  | 13 +-
>  gcc/config/aarch64/aarch64-sve-builtins.cc| 17 +
>  gcc/config/aarch64/aarch64-sve-builtins.h |  3 +++
>  gcc/config/aarch64/aarch64-sve.md |  6 ++---
>  gcc/config/aarch64/aarch64-sve2.md|  2 +-
>  .../aarch64/sme/vect-dotprod-twoway.c | 25 +++
>  9 files changed, 71 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c
> [...]
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index 0a560eaedca..975eca0bbd6 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -3745,6 +3745,23 @@ function_expander::direct_optab_handler_for_sign 
> (optab signed_op,
>return ::direct_optab_handler (op, mode);
>  }
>  
> +/* Choose between signed and unsigned convert optabs SIGNED_OP and
> +   UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then
> +   pick the appropriate optab handler for the mode.  Use MODE as the
> +   mode if given, otherwise use the mode of type suffix SUFFIX_I.  */

The last sentence needs to be adapted for this function.  Also, because
there is no longer a single mode, I don't think it makes sense to allow
a default.  So how about:

/* Choose between signed and unsigned convert optabs SIGNED_OP and
   UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then
   pick the appropriate optab handler for "converting" from FROM_MODE
   to TO_MODE.  */

> +insn_code
> +function_expander::convert_optab_handler_for_sign (optab signed_op,
> +optab unsigned_op,
> +unsigned int suffix_i,
> +machine_mode to_mode,
> +machine_mode from_mode)
> +{
> +  if (from_mode == VOIDmode)
> +from_mode = vector_mode (suffix_i);

This code would then be removed.

> +  optab op = type_suffix (suffix_i).unsigned_p ? unsigned_op : signed_op;
> +  return ::convert_optab_handler (op, to_mode, from_mode);
> +}
> +
>  /* Return true if X overlaps any input.  */
>  bool
>  function_expander::overlaps_input_p (rtx x)
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h 
> b/gcc/config/aarch64/aarch64-sve-builtins.h
> index 9ab6f202c30..7534a58c3d7 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.h
> +++ b/gcc/config/aarch64/aarch64-sve-

[PATCH V3 00/10] optabs: Make all `*dot_prod_optab's modeled as conversions

2024-08-15 Thread Victor Do Nascimento
Changes in this revision:

* [PATCH 2/10] - Make use of overloaded `directly_supported_p' in
`vect_supportable_conv_optab_p' to avoid code duplication.

-

Given the specification in the GCC internals manual defines the
{u|s}dot_prod standard name as taking "two signed elements of the
same mode, adding them to a third operand of wider mode", there is
currently ambiguity in the relationship between the mode of the first
two arguments and that of the third.

This vagueness means that, in theory, different modes may be
supportable in the third argument.  This flexibility would allow for a
given backend to add to the accumulator a different number of
vectorized products, e.g. A backend may provide instructions for both:

  accum += a[0] * b[0]

and

  accum += a[0] * b[0] + a[1] * b[1],

as is now seen in the SVE2.1 extension to AArch64.  In spite of the
aforementioned flexibility, modeling the dot-product operation as a
direct optab means that we have no way to encode both input and the
accumulator data modes into the backend pattern name, which prevents
us from harnessing this flexibility.

The purpose of this patch-series is therefore to remedy this current
shortcoming, moving the `dot_prod' from its current implementation as
a direct optab to an implementation where, as a conversion optab, we
are able to differentiate between dot products taking the same input
mode but resulting in a different output mode.

Regression-tested on x86_64, aarch64 and armhf.  I'd appreciate help
running relevant tests on the remaining architectures, i.e. arc, mips,
altivec and c6x to ensure I've not inadvertently broken anything for
those back-ends.

Victor Do Nascimento (10):
  optabs: Make all `*dot_prod_optab's modeled as conversions
  autovectorizer: Add basic support for convert optabs
  aarch64: Fix aarch64 backend-use of (u|s|us)dot_prod patterns
  arm: Fix arm backend-use of (u|s|us)dot_prod patterns
  i386: Fix dot_prod backend patterns for mmx and sse targets
  arc: Adjust dot-product backend patterns
  mips:  Adjust dot-product backend patterns
  rs6000: Adjust altivec dot-product backend patterns
  c6x:  Adjust dot-product backend patterns
  autovectorizer: Test autovectorization of different dot-prod modes.

 gcc/config/aarch64/aarch64-builtins.cc|  7 ++
 gcc/config/aarch64/aarch64-simd-builtins.def  |  6 +-
 gcc/config/aarch64/aarch64-simd.md|  9 +-
 .../aarch64/aarch64-sve-builtins-base.cc  | 13 +--
 gcc/config/aarch64/aarch64-sve-builtins.cc| 17 
 gcc/config/aarch64/aarch64-sve-builtins.h |  3 +
 gcc/config/aarch64/aarch64-sve.md |  6 +-
 gcc/config/aarch64/aarch64-sve2.md|  2 +-
 gcc/config/arc/simdext.md |  8 +-
 gcc/config/arm/arm-builtins.cc| 95 +++
 gcc/config/arm/arm-protos.h   |  3 +
 gcc/config/arm/arm.cc |  1 +
 gcc/config/arm/arm_neon_builtins.def  |  3 -
 gcc/config/arm/neon.md|  6 +-
 gcc/config/c6x/c6x.md |  2 +-
 gcc/config/i386/mmx.md| 30 +++---
 gcc/config/i386/sse.md| 38 
 gcc/config/mips/loongson-mmi.md   |  2 +-
 gcc/config/rs6000/altivec.md  |  4 +-
 gcc/doc/md.texi   | 46 -
 gcc/gimple-match-exports.cc   | 23 +
 gcc/gimple-match.h|  2 +
 gcc/optabs.cc |  3 +-
 gcc/optabs.def|  6 +-
 .../gcc.dg/vect/vect-dotprod-twoway.c | 39 
 .../aarch64/sme/vect-dotprod-twoway.c | 25 +
 .../gcc.target/aarch64/vect-dotprod-twoway.c  | 65 +
 gcc/testsuite/lib/target-supports.exp |  8 ++
 gcc/tree-vect-loop.cc |  1 +
 gcc/tree-vect-patterns.cc | 33 ++-
 30 files changed, 410 insertions(+), 96 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-dotprod-twoway.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway.c

-- 
2.34.1



[PATCH V3 01/10] optabs: Make all `*dot_prod_optab's modeled as conversions

2024-08-15 Thread Victor Do Nascimento
Given the specification in the GCC internals manual defines the
{u|s}dot_prod standard name as taking "two signed elements of the
same mode, adding them to a third operand of wider mode", there is
currently ambiguity in the relationship between the mode of the first
two arguments and that of the third.

This vagueness means that, in theory, different modes may be
supportable in the third argument.  This flexibility would allow for a
given backend to add to the accumulator a different number of
vectorized products, e.g. A backend may provide instructions for both:

  accum += a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]

and

  accum += a[0] * b[0] + a[1] * b[1],

as is now seen in the SVE2.1 extension to AArch64.  In spite of the
aforementioned flexibility, modeling the dot-product operation as a
direct optab means that we have no way to encode both input and the
accumulator data modes into the backend pattern name, which prevents
us from harnessing this flexibility.

We therefore make all dot_prod optabs conversions, allowing, for
example, for the encoding of both 2-way and 4-way dot product backend
patterns.

gcc/ChangeLog:

* optabs.def (sdot_prod_optab): Convert from OPTAB_D to
OPTAB_CD.
(udot_prod_optab): Likewise.
(usdot_prod_optab): Likewise.
* doc/md.texi (Standard Names): update entries for u,s and us
dot_prod names.
---
 gcc/doc/md.texi | 46 +-
 gcc/optabs.def  |  6 +++---
 2 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 5dc0d55edd6..aa1181a3320 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5760,15 +5760,14 @@ for (i = 0; i < LEN + BIAS; i++)
 operand0 += operand2[i];
 @end smallexample
 
-@cindex @code{sdot_prod@var{m}} instruction pattern
-@item @samp{sdot_prod@var{m}}
-
-Compute the sum of the products of two signed elements.
-Operand 1 and operand 2 are of the same mode. Their
-product, which is of a wider mode, is computed and added to operand 3.
-Operand 3 is of a mode equal or wider than the mode of the product. The
-result is placed in operand 0, which is of the same mode as operand 3.
-@var{m} is the mode of operand 1 and operand 2.
+@cindex @code{sdot_prod@var{m}@var{n}} instruction pattern
+@item @samp{sdot_prod@var{m}@var{n}}
+
+Multiply operand 1 by operand 2 without loss of precision, given that
+both operands contain signed elements.  Add each product to the overlapping
+element of operand 3 and store the result in operand 0.  Operands 0 and 3
+have mode @var{m} and operands 1 and 2 have mode @var{n}, with @var{n}
+having narrower elements than @var{m}.
 
 Semantically the expressions perform the multiplication in the following signs
 
@@ -5778,15 +5777,14 @@ sdot ==
 @dots{}
 @end smallexample
 
-@cindex @code{udot_prod@var{m}} instruction pattern
-@item @samp{udot_prod@var{m}}
+@cindex @code{udot_prod@var{m}@var{n}} instruction pattern
+@item @samp{udot_prod@var{m}@var{n}}
 
-Compute the sum of the products of two unsigned elements.
-Operand 1 and operand 2 are of the same mode. Their
-product, which is of a wider mode, is computed and added to operand 3.
-Operand 3 is of a mode equal or wider than the mode of the product. The
-result is placed in operand 0, which is of the same mode as operand 3.
-@var{m} is the mode of operand 1 and operand 2.
+Multiply operand 1 by operand 2 without loss of precision, given that
+both operands contain unsigned elements.  Add each product to the overlapping
+element of operand 3 and store the result in operand 0.  Operands 0 and 3
+have mode @var{m} and operands 1 and 2 have mode @var{n}, with @var{n}
+having narrower elements than @var{m}.
 
 Semantically the expressions perform the multiplication in the following signs
 
@@ -5796,14 +5794,12 @@ udot ==
 @dots{}
 @end smallexample
 
-@cindex @code{usdot_prod@var{m}} instruction pattern
-@item @samp{usdot_prod@var{m}}
-Compute the sum of the products of elements of different signs.
-Operand 1 must be unsigned and operand 2 signed. Their
-product, which is of a wider mode, is computed and added to operand 3.
-Operand 3 is of a mode equal or wider than the mode of the product. The
-result is placed in operand 0, which is of the same mode as operand 3.
-@var{m} is the mode of operand 1 and operand 2.
+@cindex @code{usdot_prod@var{m}@var{n}} instruction pattern
+@item @samp{usdot_prod@var{m}@var{n}}
+Multiply operand 1 by operand 2.  Add each product to the overlapping
+element of operand 3 and store the result in operand 0.  Operands 0 and 3
+have mode @var{m} and operands 1 and 2 have mode @var{n}, with @var{n}
+having narrower elements than @var{m}.
 
 Semantically the expressions perform the multiplication in the following signs
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 58a939442bd..ba860144d8b 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -110,6 +110,9 @@ OPTAB_CD(mask_scatter_store_optab, "mask_scatter_store

[PATCH V3 05/10] i386: Fix dot_prod backend patterns for mmx and sse targets

2024-08-15 Thread Victor Do Nascimento
Following the migration of the dot_prod optab from a direct to a
conversion-type optab, ensure all back-end patterns incorporate the
second machine mode into pattern names.

gcc/ChangeLog:

* config/i386/mmx.md (usdot_prodv8qi): Renamed to...
(usdot_prodv2siv8qi): ...this.
(sdot_prodv8qi): Renamed to...
(sdot_prodv2siv8qi): ...this.
(udot_prodv8qi): Renamed to...
(udot_prodv2siv8qi): ...this.
(usdot_prodv4hi): Renamed to...
(usdot_prodv2siv4hi): ...this.
(udot_prodv4hi): Renamed to...
(udot_prodv2siv4hi): ...this.
(sdot_prodv4hi): Renamed to...
(sdot_prodv2siv4hi): ...this.
* config/i386/sse.md (sdot_prod): Renamed to...
(sdot_prod): ...this.
(sdot_prodv4si): Renamed to...
(sdot_prodv2div4si): ...this.
(usdot_prod): Renamed to...
(usdot_prod): ...this.
(sdot_prod): Renamed to...
(sdot_prod): ...this.
(sdot_prodv64qi): Renamed to...
(sdot_prodv16siv64qi): ...this.
(udot_prod): Renamed to...
(udot_prod): ...this.
(udot_prodv64qi): Renamed to...
(udot_prodv16qiv64qi): ...this.
(usdot_prod): Renamed to...
(usdot_prod): ...this.
(udot_prod): Renamed to...
(udot_prod): ...this.
---
 gcc/config/i386/mmx.md | 30 +++---
 gcc/config/i386/sse.md | 38 +++---
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 94d3a6e5692..d78739b033d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -6344,7 +6344,7 @@ (define_expand "usadv8qi"
   DONE;
 })
 
-(define_expand "usdot_prodv8qi"
+(define_expand "usdot_prodv2siv8qi"
   [(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6363,7 +6363,7 @@ (define_expand "usdot_prodv8qi"
   rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
   rtx op0 = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
+  emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
   emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
  }
else
@@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi"
   emit_move_insn (op3, CONST0_RTX (V4SImode));
   emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
   emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
-  emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
 
   /* vec_perm (op0, 2, 3, 0, 1);  */
   emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi"
 DONE;
 })
 
-(define_expand "sdot_prodv8qi"
+(define_expand "sdot_prodv2siv8qi"
   [(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6406,7 +6406,7 @@ (define_expand "sdot_prodv8qi"
   rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
   rtx op0 = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
+  emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
   emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
 }
   else
@@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi"
   emit_move_insn (op3, CONST0_RTX (V4SImode));
   emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
   emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
-  emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
 
   /* vec_perm (op0, 2, 3, 0, 1);  */
   emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi"
 
 })
 
-(define_expand "udot_prodv8qi"
+(define_expand "udot_prodv2siv8qi"
   [(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6450,7 +6450,7 @@ (define_expand "udot_prodv8qi"
   rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
   rtx op0 = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
+  emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
   emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
 }
   else
@@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi"
   emit_move_insn (op3, CONST0_RTX (V4SImode));
   emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
   emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
-  emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
 
   /* vec_perm (op0, 2, 3, 0, 1);  */
   emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (

[PATCH V3 04/10] arm: Fix arm backend-use of (u|s|us)dot_prod patterns

2024-08-15 Thread Victor Do Nascimento
gcc/ChangeLog:

* config/arm/arm-builtins.cc (enum arm_builtins): Add new
ARM_BUILTIN_* enum values: SDOTV8QI, SDOTV16QI, UDOTV8QI,
UDOTV16QI, USDOTV8QI, USDOTV16QI.
(arm_init_dotprod_builtins): New.
(arm_init_builtins): Add call to `arm_init_dotprod_builtins'.
(arm_general_gimple_fold_builtin): New.
* config/arm/arm-protos.h (arm_general_gimple_fold_builtin):
New prototype.
* config/arm/arm.cc (arm_gimple_fold_builtin): Add call to
`arm_general_gimple_fold_builtin'.
* config/arm/neon.md (dot_prod): Renamed to...
(dot_prod): ...this.
(neon_usdot): Renamed to...
(neon_usdot): ...this.
---
 gcc/config/arm/arm-builtins.cc   | 95 
 gcc/config/arm/arm-protos.h  |  3 +
 gcc/config/arm/arm.cc|  1 +
 gcc/config/arm/arm_neon_builtins.def |  3 -
 gcc/config/arm/neon.md   |  6 +-
 5 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index c9d50bf8fbb..b23b6caa063 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -45,6 +45,8 @@
 #include "arm-builtins.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "basic-block.h"
+#include "gimple.h"
 
 #define SIMD_MAX_BUILTIN_ARGS 7
 
@@ -1298,6 +1300,13 @@ enum arm_builtins
 #define VAR1(T, N, X) \
   ARM_BUILTIN_##N,
 
+  ARM_BUILTIN_NEON_SDOTV8QI,
+  ARM_BUILTIN_NEON_SDOTV16QI,
+  ARM_BUILTIN_NEON_UDOTV8QI,
+  ARM_BUILTIN_NEON_UDOTV16QI,
+  ARM_BUILTIN_NEON_USDOTV8QI,
+  ARM_BUILTIN_NEON_USDOTV16QI,
+
   ARM_BUILTIN_ACLE_BASE,
   ARM_BUILTIN_SAT_IMM_CHECK = ARM_BUILTIN_ACLE_BASE,
 
@@ -2648,6 +2657,60 @@ arm_init_fp16_builtins (void)
   "__fp16");
 }
 
+static void
+arm_init_dotprod_builtins (void)
+{
+  tree fndecl = NULL;
+  tree ftype = NULL;
+
+  tree uv8qi = arm_simd_builtin_type (V8QImode, qualifier_unsigned);
+  tree sv8qi = arm_simd_builtin_type (V8QImode, qualifier_none);
+  tree uv16qi = arm_simd_builtin_type (V16QImode, qualifier_unsigned);
+  tree sv16qi = arm_simd_builtin_type (V16QImode, qualifier_none);
+  tree uv2si = arm_simd_builtin_type (V2SImode, qualifier_unsigned);
+  tree sv2si = arm_simd_builtin_type (V2SImode, qualifier_none);
+  tree uv4si = arm_simd_builtin_type (V4SImode, qualifier_unsigned);
+  tree sv4si = arm_simd_builtin_type (V4SImode, qualifier_none);
+
+  struct builtin_decls_data
+  {
+tree out_type_node;
+tree in_type1_node;
+tree in_type2_node;
+const char *builtin_name;
+int function_code;
+  };
+
+#define NAME(A) "__builtin_neon_" #A
+#define ENUM(B) ARM_BUILTIN_NEON_##B
+
+  builtin_decls_data bdda[] =
+  {
+{ sv2si, sv8qi,  sv8qi,  NAME (sdotv8qi),  ENUM (SDOTV8QI)   },
+{ uv2si, uv8qi,  uv8qi,  NAME (udotv8qi_),  ENUM (UDOTV8QI)   },
+{ sv2si, uv8qi,  sv8qi,  NAME (usdotv8qi_ssus), ENUM (USDOTV8QI)  },
+{ sv4si, sv16qi, sv16qi, NAME (sdotv16qi), ENUM (SDOTV16QI)  },
+{ uv4si, uv16qi, uv16qi, NAME (udotv16qi_),  ENUM (UDOTV16QI)  },
+{ sv4si, uv16qi, sv16qi, NAME (usdotv16qi_ssus), ENUM (USDOTV16QI) },
+  };
+
+#undef NAME
+#undef ENUM
+
+  builtin_decls_data *bdd = bdda;
+  builtin_decls_data *bdd_end = bdd + (ARRAY_SIZE (bdda));
+
+  for (; bdd < bdd_end; bdd++)
+  {
+ftype = build_function_type_list (bdd->out_type_node, bdd->out_type_node,
+ bdd->in_type1_node, bdd->in_type2_node,
+ NULL_TREE);
+fndecl = arm_general_add_builtin_function (bdd->builtin_name,
+  ftype, bdd->function_code);
+arm_builtin_decls[bdd->function_code] = fndecl;
+  }
+}
+
 void
 arm_init_builtins (void)
 {
@@ -2676,6 +2739,7 @@ arm_init_builtins (void)
arm_init_neon_builtins ();
   arm_init_vfp_builtins ();
   arm_init_crypto_builtins ();
+  arm_init_dotprod_builtins ();
 }
 
   if (TARGET_CDE)
@@ -2738,6 +2802,37 @@ arm_builtin_decl (unsigned code, bool initialize_p 
ATTRIBUTE_UNUSED)
 }
 }
 
+/* Try to fold STMT, given that it's a call to the built-in function with
+   subcode FCODE.  Return the new statement on success and null on
+   failure.  */
+gimple *
+arm_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
+gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
+{
+  gimple *new_stmt = NULL;
+  unsigned nargs = gimple_call_num_args (stmt);
+  tree *args = (nargs > 0
+   ? gimple_call_arg_ptr (stmt, 0)
+   : &error_mark_node);
+
+  switch (fcode)
+{
+case ARM_BUILTIN_NEON_SDOTV8QI:
+case ARM_BUILTIN_NEON_SDOTV16QI:
+case ARM_BUILTIN_NEON_UDOTV8QI:
+case ARM_BUILTIN_NEON_UDOTV16QI:
+case ARM_BUILTIN_NEON_USDOTV8QI:
+case ARM_BUILTIN_NEON_USDOTV16QI:
+  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ 

[PATCH V3 10/10] autovectorizer: Test autovectorization of different dot-prod modes.

2024-08-15 Thread Victor Do Nascimento
From: Victor Do Nascimento 

Given the novel treatment of the dot product optab as a conversion, we
are now able to targe different relationships between output modes and
input modes.

This is made clearer by way of example. Previously, on AArch64, the
following loop was vectorizable:

uint32_t udot4(int n, uint8_t* data) {
  uint32_t sum = 0;
  for (int i=0; i
+
+uint32_t udot4(int n, uint8_t* data) {
+  uint32_t sum = 0;
+  for (int i=0; i
+#include 
+
+uint32_t
+udot2 (int n, uint16_t* data)  __arm_streaming
+{
+  uint32_t sum = 0;
+  for (int i=0; i

[PATCH V3 02/10] autovectorizer: Add basic support for convert optabs

2024-08-15 Thread Victor Do Nascimento
Given the shift from modeling dot products as direct optabs to
treating them as conversion optabs, we make necessary changes to the
autovectorizer code to ensure that given the relevant tree code,
together with the input and output data modes, we can retrieve the
relevant optab and subsequently the insn_code for it.

gcc/ChangeLog:

* gimple-match-exports.cc (directly_supported_p): Add overload
for conversion-type optabs.
* gimple-match.h (directly_supported_p): Add new function
prototype.
* optabs.cc (expand_widen_pattern_expr): Make the
DOT_PROD_EXPR tree code use `find_widening_optab_handler' to
retrieve icode.
* tree-vect-loop.cc (vect_is_emulated_mixed_dot_prod): make it
call conversion-type overloaded `directly_supported_p'.
* tree-vect-patterns.cc (vect_supportable_conv_optab_p): New.
(vect_recog_dot_prod_pattern): s/direct/conv/ in call to
`vect_supportable_direct_optab_p'.
---
 gcc/gimple-match-exports.cc | 23 +++
 gcc/gimple-match.h  |  2 ++
 gcc/optabs.cc   |  3 ++-
 gcc/tree-vect-loop.cc   |  1 +
 gcc/tree-vect-patterns.cc   | 33 +++--
 5 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc
index aacf3ff0414..d18497e7c83 100644
--- a/gcc/gimple-match-exports.cc
+++ b/gcc/gimple-match-exports.cc
@@ -1381,6 +1381,29 @@ directly_supported_p (code_helper code, tree type, 
optab_subtype query_type)
  && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED));
 }
 
+/* As above, overloading the function for conversion-type optabs.  */
+bool
+directly_supported_p (code_helper code, tree type_out, tree type_in,
+ optab_subtype query_type)
+{
+  if (code.is_tree_code ())
+{
+  convert_optab optab = optab_for_tree_code (tree_code (code), type_in,
+   query_type);
+  return (optab != unknown_optab
+ && convert_optab_handler (optab, TYPE_MODE (type_out),
+   TYPE_MODE (type_in)) != 
CODE_FOR_nothing);
+}
+  gcc_assert (query_type == optab_default
+ || (query_type == optab_vector && VECTOR_TYPE_P (type_in))
+ || (query_type == optab_scalar && !VECTOR_TYPE_P (type_in)));
+  internal_fn ifn = associated_internal_fn (combined_fn (code), type_in);
+  return (direct_internal_fn_p (ifn)
+ && direct_internal_fn_supported_p (ifn, tree_pair (type_out, type_in),
+OPTIMIZE_FOR_SPEED));
+}
+
+
 /* A wrapper around the internal-fn.cc versions of get_conditional_internal_fn
for a code_helper CODE operating on type TYPE.  */
 
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
index d710fcbace2..0333a5db00a 100644
--- a/gcc/gimple-match.h
+++ b/gcc/gimple-match.h
@@ -419,6 +419,8 @@ code_helper canonicalize_code (code_helper, tree);
 
 #ifdef GCC_OPTABS_TREE_H
 bool directly_supported_p (code_helper, tree, optab_subtype = optab_default);
+bool directly_supported_p (code_helper, tree, tree,
+  optab_subtype = optab_default);
 #endif
 
 internal_fn get_conditional_internal_fn (code_helper, tree);
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 185c5b1a705..32737fb80e8 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -317,7 +317,8 @@ expand_widen_pattern_expr (const_sepops ops, rtx op0, rtx 
op1, rtx wide_op,
 widen_pattern_optab
   = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
   if (ops->code == WIDEN_MULT_PLUS_EXPR
-  || ops->code == WIDEN_MULT_MINUS_EXPR)
+  || ops->code == WIDEN_MULT_MINUS_EXPR
+  || ops->code == DOT_PROD_EXPR)
 icode = find_widening_optab_handler (widen_pattern_optab,
 TYPE_MODE (TREE_TYPE (ops->op2)),
 tmode0);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 6456220cdc9..5f3de7b72a8 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5289,6 +5289,7 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info stmt_info)
 
   gcc_assert (STMT_VINFO_REDUC_VECTYPE_IN (stmt_info));
   return !directly_supported_p (DOT_PROD_EXPR,
+   STMT_VINFO_VECTYPE (stmt_info),
STMT_VINFO_REDUC_VECTYPE_IN (stmt_info),
optab_vector_mixed_sign);
 }
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index f52de2b6972..b5af2f001b1 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -250,6 +250,35 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree 
otype, tree_code code,
   return true;
 }
 
+/* Return true if the target supports a vector version of CODE,
+   where CODE is known to map to a conversion optab with the given SUBTYPE.
+   ITYPE specifies the type of (som

[PATCH V3 03/10] aarch64: Fix aarch64 backend-use of (u|s|us)dot_prod patterns

2024-08-15 Thread Victor Do Nascimento
Given recent changes to the dot_prod standard pattern name, this patch
fixes the aarch64 back-end by implementing the following changes:

1. Add 2nd mode to all (u|s|us)dot_prod patterns in .md files.
2. Rewrite initialization and function expansion mechanism for simd
builtins.
3. Fix all direct calls to back-end `dot_prod' patterns in SVE
builtins.

Finally, given that it is now possible for the compiler to
differentiate between the two- and four-way dot product, we add a test
to ensure that autovectorization picks up on dot-product patterns
where the result is twice the width of the operands.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md
(dot_prod): Renamed to...
(dot_prod): ...this.
(usdot_prod): Renamed to...
(usdot_prod): ...this.
(sadv16qi): Adjust call to gen_udot_prod take second mode.
(popcount): fix use of `udot_prod_optab'.
* gcc/config/aarch64/aarch64-sve.md
(dot_prod): Renamed to...
(dot_prod): ...this.
(@dot_prod): Renamed to...
(@dot_prod): ...this.
(sad): Adjust call to gen_udot_prod take second mode.
* gcc/config/aarch64/aarch64-sve2.md
(@aarch64_sve_dotvnx4sivnx8hi): Renamed to...
(dot_prodvnx4sivnx8hi): ...this.
* config/aarch64/aarch64-simd-builtins.def: Modify macro
expansion-based initialization and expansion
of (u|s|us)dot_prod builtins.
* config/aarch64/aarch64-sve-builtins-base.cc
(svdot_impl::expand): s/direct/convert/ in
`convert_optab_handler_for_sign' function call.
(svusdot_impl::expand): add second mode argument in call to
`code_for_dot_prod'.
* config/aarch64/aarch64-sve-builtins.cc
(function_expander::convert_optab_handler_for_sign): New class
method.
* config/aarch64/aarch64-sve-builtins.h
(class function_expander): Add prototype for new
`convert_optab_handler_for_sign' method.

gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sme/vect-dotprod-twoway.c (udot2): New.
---
 gcc/config/aarch64/aarch64-builtins.cc|  7 ++
 gcc/config/aarch64/aarch64-simd-builtins.def  |  6 ++---
 gcc/config/aarch64/aarch64-simd.md|  9 ---
 .../aarch64/aarch64-sve-builtins-base.cc  | 13 +-
 gcc/config/aarch64/aarch64-sve-builtins.cc| 17 +
 gcc/config/aarch64/aarch64-sve-builtins.h |  3 +++
 gcc/config/aarch64/aarch64-sve.md |  6 ++---
 gcc/config/aarch64/aarch64-sve2.md|  2 +-
 .../aarch64/sme/vect-dotprod-twoway.c | 25 +++
 9 files changed, 71 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 30669f8aa18..8af646ab066 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -458,6 +458,13 @@ 
aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   qualifier_poly, qualifier_struct_load_store_lane_index };
 #define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
 
+constexpr insn_code CODE_FOR_aarch64_sdot_prodv8qi = 
CODE_FOR_sdot_prodv2siv8qi;
+constexpr insn_code CODE_FOR_aarch64_udot_prodv8qi = 
CODE_FOR_udot_prodv2siv8qi;
+constexpr insn_code CODE_FOR_aarch64_usdot_prodv8qi = 
CODE_FOR_usdot_prodv2siv8qi;
+constexpr insn_code CODE_FOR_aarch64_sdot_prodv16qi = 
CODE_FOR_sdot_prodv4siv16qi;
+constexpr insn_code CODE_FOR_aarch64_udot_prodv16qi = 
CODE_FOR_udot_prodv4siv16qi;
+constexpr insn_code CODE_FOR_aarch64_usdot_prodv16qi = 
CODE_FOR_usdot_prodv4siv16qi;
+
 #define CF0(N, X) CODE_FOR_aarch64_##N##X
 #define CF1(N, X) CODE_FOR_##N##X##1
 #define CF2(N, X) CODE_FOR_##N##X##2
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index e65f73d7ba2..0814f8ba14f 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -418,9 +418,9 @@
   BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE)
 
   /* Implemented by _prod.  */
-  BUILTIN_VB (TERNOP, sdot_prod, 10, NONE)
-  BUILTIN_VB (TERNOPU, udot_prod, 10, NONE)
-  BUILTIN_VB (TERNOP_SUSS, usdot_prod, 10, NONE)
+  BUILTIN_VB (TERNOP, sdot_prod, 0, NONE)
+  BUILTIN_VB (TERNOPU, udot_prod, 0, NONE)
+  BUILTIN_VB (TERNOP_SUSS, usdot_prod, 0, NONE)
   /* Implemented by aarch64__lane{q}.  */
   BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
   BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index cc612ec2ca0..e15e547b000 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -568,7 +568,7 @@ (define_expand "cmul3"
 ;; ...
 ;;
 ;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "dot_prod"
+(define_insn "dot

[PATCH V3 09/10] c6x: Adjust dot-product backend patterns

2024-08-15 Thread Victor Do Nascimento
Following the migration of the dot_prod optab from a direct to a
conversion-type optab, ensure all back-end patterns incorporate the
second machine mode into pattern names.

gcc/ChangeLog:

* config/c6x/c6x.md (sdot_prodv2hi): Renamed to...
(sdot_prodsiv2hi): ...this.
---
 gcc/config/c6x/c6x.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/c6x/c6x.md b/gcc/config/c6x/c6x.md
index 5964dd69d0d..ea9ffe8b4e1 100644
--- a/gcc/config/c6x/c6x.md
+++ b/gcc/config/c6x/c6x.md
@@ -3082,7 +3082,7 @@ (define_insn "v2hi3"
 ;; Widening vector multiply and dot product.
 ;; See c6x-mult.md.in for the define_insn patterns
 
-(define_expand "sdot_prodv2hi"
+(define_expand "sdot_prodsiv2hi"
   [(match_operand:SI 0 "register_operand" "")
(match_operand:V2HI 1 "register_operand" "")
(match_operand:V2HI 2 "register_operand" "")
-- 
2.34.1



[PATCH] late-combine: Preserve INSN_CODE when modifying notes [PR116343]

2024-08-15 Thread Richard Sandiford
When it removes a definition, late-combine tries to update all
uses in notes.  It does this using the same insn_propagation class
that it uses for patterns.

However, insn_propagation uses validate_change, which in turn
resets the INSN_CODE.  This is inefficient in the best case,
since it forces the pattern to be rerecognised even though
changing a note can't affect the INSN_CODE.  But in the PR
it's a correctness problem: resetting INSN_CODE means we lose
the NOOP_INSN_MOVE_CODE, which in turn means that rtl-ssa doesn't
queue it for deletion.

This patch adds a routine specifically for propagating into notes.
A belt-and-braces fix would be to rerecognise noop moves in
function_info::change_insns, but I can't think of a good reason
why that would be necessary, and it could paper over latent bugs.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR testsuite/116343
* recog.h (insn_propagation::apply_to_note): Declare.
* recog.cc (insn_propagation::apply_to_note): New function.
* late-combine.cc (insn_combination::substitute_note): Use
apply_to_note instead of apply_to_rvalue.
* rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Improve
dumping of costs for noop moves.

gcc/testsuite/
PR testsuite/116343
* gcc.dg/torture/pr116343.c: New test.
---
 gcc/late-combine.cc |  2 +-
 gcc/recog.cc| 13 +
 gcc/recog.h |  1 +
 gcc/rtl-ssa/changes.cc  |  5 -
 gcc/testsuite/gcc.dg/torture/pr116343.c | 18 ++
 5 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr116343.c

diff --git a/gcc/late-combine.cc b/gcc/late-combine.cc
index 2b62e2956ed..1d81b386c3d 100644
--- a/gcc/late-combine.cc
+++ b/gcc/late-combine.cc
@@ -338,7 +338,7 @@ insn_combination::substitute_note (insn_info *use_insn, rtx 
note,
   || REG_NOTE_KIND (note) == REG_EQUIV)
 {
   insn_propagation prop (use_insn->rtl (), m_dest, m_src);
-  return (prop.apply_to_rvalue (&XEXP (note, 0))
+  return (prop.apply_to_note (&XEXP (note, 0))
  && (can_propagate || prop.num_replacements == 0));
 }
   return true;
diff --git a/gcc/recog.cc b/gcc/recog.cc
index 23e4820180f..615aaabc551 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -1469,6 +1469,19 @@ insn_propagation::apply_to_rvalue (rtx *loc)
   return res;
 }
 
+/* Like apply_to_rvalue, but specifically for the case where *LOC is in
+   a note.  This never changes the INSN_CODE.  */
+
+bool
+insn_propagation::apply_to_note (rtx *loc)
+{
+  auto old_code = INSN_CODE (insn);
+  bool res = apply_to_rvalue (loc);
+  if (INSN_CODE (insn) != old_code)
+INSN_CODE (insn) = old_code;
+  return res;
+}
+
 /* Check whether INSN matches a specific alternative of an .md pattern.  */
 
 bool
diff --git a/gcc/recog.h b/gcc/recog.h
index 87a5803dec0..1dccce78ba4 100644
--- a/gcc/recog.h
+++ b/gcc/recog.h
@@ -121,6 +121,7 @@ public:
   insn_propagation (rtx_insn *, rtx, rtx, bool = true);
   bool apply_to_pattern (rtx *);
   bool apply_to_rvalue (rtx *);
+  bool apply_to_note (rtx *);
 
   /* Return true if we should accept a substitution into the address of
  memory expression MEM.  Undoing changes OLD_NUM_CHANGES and up restores
diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
index a30f000191e..0476296607b 100644
--- a/gcc/rtl-ssa/changes.cc
+++ b/gcc/rtl-ssa/changes.cc
@@ -228,7 +228,10 @@ rtl_ssa::changes_are_worthwhile (array_slice changes,
   for (const insn_change *change : changes)
if (!change->is_deletion ())
  {
-   fprintf (dump_file, " %c %d", sep, change->new_cost);
+   if (INSN_CODE (change->rtl ()) == NOOP_MOVE_INSN_CODE)
+ fprintf (dump_file, " %c nop", sep);
+   else
+ fprintf (dump_file, " %c %d", sep, change->new_cost);
sep = '+';
  }
   if (weighted_new_cost != 0)
diff --git a/gcc/testsuite/gcc.dg/torture/pr116343.c 
b/gcc/testsuite/gcc.dg/torture/pr116343.c
new file mode 100644
index 000..ad13f0fc21c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr116343.c
@@ -0,0 +1,18 @@
+// { dg-additional-options "-fschedule-insns -fno-thread-jumps -fno-dce" }
+
+int a, b, c;
+volatile int d;
+int e(int f, int g) { return g > 1 ? 1 : f >> g; }
+int main() {
+  int *i = &a;
+  long j[1];
+  if (a)
+while (1) {
+  a ^= 1;
+  if (*i)
+while (1)
+  ;
+  b = c && e((d, 1) >= 1, j[0]);
+}
+  return 0;
+}
-- 
2.25.1



[PATCH V3 07/10] mips: Adjust dot-product backend patterns

2024-08-15 Thread Victor Do Nascimento
Following the migration of the dot_prod optab from a direct to a
conversion-type optab, ensure all back-end patterns incorporate the
second machine mode into pattern names.

gcc/ChangeLog:

* config/mips/loongson-mmi.md (sdot_prodv4hi): Renamed to...
(sdot_prodv2siv4hi): ...this.
---
 gcc/config/mips/loongson-mmi.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/mips/loongson-mmi.md b/gcc/config/mips/loongson-mmi.md
index dd166bfa4c9..4d958730139 100644
--- a/gcc/config/mips/loongson-mmi.md
+++ b/gcc/config/mips/loongson-mmi.md
@@ -394,7 +394,7 @@ (define_insn "loongson_pmaddhw"
   "pmaddhw\t%0,%1,%2"
   [(set_attr "type" "fmul")])
 
-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand" "")
(match_operand:V4HI 1 "register_operand" "")
(match_operand:V4HI 2 "register_operand" "")
-- 
2.34.1



[PATCH V3 06/10] arc: Adjust dot-product backend patterns

2024-08-15 Thread Victor Do Nascimento
Following the migration of the dot_prod optab from a direct to a
conversion-type optab, ensure all back-end patterns incorporate the
second machine mode into pattern names.

gcc/ChangeLog:

* config/arc/simdext.md (sdot_prodv2hi): Renamed to...
(sdot_prodsiv2hi): ...this.
(udot_prodv2hi): Renamed to...
(udot_prodsiv2hi): ...this.
(sdot_prodv4hi): Renamed to...
(sdot_prodv2siv4hi): ...this.
(udot_prodv4hi): Renamed to...
(udot_prodv2siv4hi): ...this.
---
 gcc/config/arc/simdext.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
index 4e51a237c3a..0696f0abb70 100644
--- a/gcc/config/arc/simdext.md
+++ b/gcc/config/arc/simdext.md
@@ -1643,7 +1643,7 @@ (define_insn "dmpyh"
 
 ;; We can use dmac as well here.  To be investigated which version
 ;; brings more.
-(define_expand "sdot_prodv2hi"
+(define_expand "sdot_prodsiv2hi"
   [(match_operand:SI 0 "register_operand" "")
(match_operand:V2HI 1 "register_operand" "")
(match_operand:V2HI 2 "register_operand" "")
@@ -1656,7 +1656,7 @@ (define_expand "sdot_prodv2hi"
  DONE;
 })
 
-(define_expand "udot_prodv2hi"
+(define_expand "udot_prodsiv2hi"
   [(match_operand:SI 0 "register_operand" "")
(match_operand:V2HI 1 "register_operand" "")
(match_operand:V2HI 2 "register_operand" "")
@@ -1669,7 +1669,7 @@ (define_expand "udot_prodv2hi"
  DONE;
 })
 
-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand" "")
(match_operand:V4HI 1 "register_operand" "")
(match_operand:V4HI 2 "register_operand" "")
@@ -1688,7 +1688,7 @@ (define_expand "sdot_prodv4hi"
  DONE;
 })
 
-(define_expand "udot_prodv4hi"
+(define_expand "udot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand" "")
(match_operand:V4HI 1 "register_operand" "")
(match_operand:V4HI 2 "register_operand" "")
-- 
2.34.1



[PATCH V3 08/10] rs6000: Adjust altivec dot-product backend patterns

2024-08-15 Thread Victor Do Nascimento
Following the migration of the dot_prod optab from a direct to a
conversion-type optab, ensure all back-end patterns incorporate the
second machine mode into pattern names.

gcc/ChangeLog:

* config/rs6000/altivec.md (udot_prod): Renamed to...
(udot_prodv4si): ...this.
(sdot_prodv8hi): Renamed to...
(sdot_prodv4siv8hi): ...this.
---
 gcc/config/rs6000/altivec.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f..0911c1792a8 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -3698,7 +3698,7 @@ (define_expand "neg2"
 }
 })
 
-(define_expand "udot_prod"
+(define_expand "udot_prodv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
(unspec:V4SI [(match_operand:VIshort 1 "register_operand" 
"v")  
@@ -3710,7 +3710,7 @@ (define_expand "udot_prod"
   DONE;
 })
 
-(define_expand "sdot_prodv8hi"
+(define_expand "sdot_prodv4siv8hi"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
-- 
2.34.1



[PATCH] Tweak base/index disambiguation in decompose_normal_address [PR116236]

2024-08-15 Thread Richard Sandiford
The PR points out that, for an address like:

  (plus (zero_extend X) Y)

decompose_normal_address doesn't establish a strong preference
between treating X as the base or Y as the base.  As the comment
in the patch says, zero_extend isn't enough on its own to assume
an index, at least not on POINTERS_EXTEND_UNSIGNED targets.
But in a construct like the one above, X and Y have different modes,
and it seems reasonable to assume that the one with the expected
address mode is the base.

This matters on targets like m68k that support index extension
and that require different classes for bases and indices.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  Andreas also confirms
that it fixes the m68k LRA problem.  OK to install?

Richard


gcc/
PR middle-end/116236
* rtlanal.cc (decompose_normal_address): Try to distinguish
bases and indices based on mode, before resorting to "baseness".
---
 gcc/rtlanal.cc | 40 
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc
index 4158a531bdd..71207ee4f41 100644
--- a/gcc/rtlanal.cc
+++ b/gcc/rtlanal.cc
@@ -6724,20 +6724,36 @@ decompose_normal_address (struct address_info *info)
 }
   else if (out == 2)
 {
+  auto address_mode = targetm.addr_space.address_mode (info->as);
+  rtx inner_op0 = *inner_ops[0];
+  rtx inner_op1 = *inner_ops[1];
+  int base;
+  /* If one inner operand has the expected mode for a base and the other
+doesn't, assume that the other one is the index.  This is useful
+for addresses such as:
+
+  (plus (zero_extend X) Y)
+
+zero_extend is not in itself enough to assume an index, since bases
+can be zero-extended on POINTERS_EXTEND_UNSIGNED targets.  But if
+Y has address mode and X doesn't, there should be little doubt that
+Y is the base.  */
+  if (GET_MODE (inner_op0) == address_mode
+ && GET_MODE (inner_op1) != address_mode)
+   base = 0;
+  else if (GET_MODE (inner_op1) == address_mode
+  && GET_MODE (inner_op0) != address_mode)
+   base = 1;
   /* In the event of a tie, assume the base comes first.  */
-  if (baseness (*inner_ops[0], info->mode, info->as, PLUS,
-   GET_CODE (*ops[1]))
- >= baseness (*inner_ops[1], info->mode, info->as, PLUS,
-  GET_CODE (*ops[0])))
-   {
- set_address_base (info, ops[0], inner_ops[0]);
- set_address_index (info, ops[1], inner_ops[1]);
-   }
+  else if (baseness (inner_op0, info->mode, info->as, PLUS,
+GET_CODE (*ops[1]))
+  >= baseness (inner_op1, info->mode, info->as, PLUS,
+   GET_CODE (*ops[0])))
+   base = 0;
   else
-   {
- set_address_base (info, ops[1], inner_ops[1]);
- set_address_index (info, ops[0], inner_ops[0]);
-   }
+   base = 1;
+  set_address_base (info, ops[base], inner_ops[base]);
+  set_address_index (info, ops[1 - base], inner_ops[1 - base]);
 }
   else
 gcc_assert (out == 0);
-- 
2.25.1



Re: Re: [PATCH v2 1/1] RISC-V: Support BF16 interfaces in libgcc

2024-08-15 Thread Xiao Zeng
2024-08-13 15:53  Jakub Jelinek  wrote:
>
>On Tue, Aug 13, 2024 at 11:14:47AM +0800, Xiao Zeng wrote:
>> Thank you very much for the in-depth discussion between Jakub Jelinek and 
>> jeff.
>> My knowledge is narrow, and I am not familiar with architectures other than 
>> RISCV.
>> At the same time, my understanding of libraries such as libc and libm is 
>> also shallow.
>>
>> I spent some time sorting out my thoughts, which resulted in slow email 
>> replies. I am very sorry.
>
>The important thing is that the current state of BF16 support on other
>architectures is what we want there, not more.  So any changes done for
>RISCV shouldn't affect the other architectures, that wasn't the case of
>the patch you've posted.
>E.g. on x86_64, for FP16 we have:
>__divhc3@@GCC_12.0.0
>__eqhf2@@GCC_12.0.0
>__extendhfdf2@@GCC_12.0.0
>__extendhfsf2@@GCC_12.0.0
>__extendhftf2@@GCC_12.0.0
>__extendhfxf2@@GCC_12.0.0
>__fixhfti@@GCC_12.0.0
>__fixunshfti@@GCC_12.0.0
>__floatbitinthf@@GCC_14.0.0
>__floattihf@@GCC_12.0.0
>__floatuntihf@@GCC_12.0.0
>__mulhc3@@GCC_12.0.0
>__nehf2@@GCC_12.0.0
>__truncdfhf2@@GCC_12.0.0
>__trunchfbf2@@GCC_13.0.0
>__truncsfhf2@@GCC_12.0.0
>__trunctfhf2@@GCC_12.0.0
>__truncxfhf2@@GCC_12.0.0
>exported from libgcc, while for BF16 just:
>__extendbfsf2@@GCC_13.0.0
>__floatbitintbf@@GCC_14.0.0
>__floattibf@@GCC_13.0.0
>__floatuntibf@@GCC_13.0.0
>__truncdfbf2@@GCC_13.0.0
>__trunchfbf2@@GCC_13.0.0
>__truncsfbf2@@GCC_13.0.0
>__trunctfbf2@@GCC_13.0.0
>__truncxfbf2@@GCC_13.0.0
>More attention has been paid to what we actually need there, which is
>primarily conversions to/from other types (but even not to all of them, with
>some changes on the RTL expression lowering side to make sure we use the
>SFmode arithmetics as much as possible and only have the really required
>stuff on the libgcc side.
>We don't want to change that, if you really need __mulbc3/__divbc3 on RISCV,
>then it should be added for that arch only.  And similarly, the choice
>of the builtins on the compiler side, the two builtins we have right now is
>all we want on the other arches.  So, further builtins would be either a
>matter of RISCV specific builtins, or in generic code but guarded by some
>target hook so that they aren't enabled on arches which don't want them.
>On the libstdc++ side, the current headers provide for std::bfloat16_t and
>std::float16_t an implementation which uses SFmode calculations where
>possible, so stuff like:
>  constexpr _Float16
>  acos(_Float16 __x)
>  { return _Float16(__builtin_acosf(__x)); }
>or
>  constexpr __gnu_cxx::__bfloat16_t
>  acos(__gnu_cxx::__bfloat16_t __x)
>  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
>And for printing, note there is
>_ZSt20__to_chars_float16_tPcS_fSt12chars_format@@GLIBCXX_3.4.31
>_ZSt21__to_chars_bfloat16_tPcS_fSt12chars_format@@GLIBCXX_3.4.31
>_ZSt22__from_chars_float16_tPKcS0_RfSt12chars_format@@GLIBCXX_3.4.31
>_ZSt23__from_chars_bfloat16_tPKcS0_RfSt12chars_format@@GLIBCXX_3.4.31
>which input and output _Float16 and __bf16, but in the parameter passing
>they expect those types to be promoted to float, so that the ABIs aren't
>dependent on when a particular arch enables those types.
>
>For RISCV, the things to consider are, what is the _Float16 and __bf16
>function argument passing/returning ABI?  Is the type enabled on all
>variants of RISCV, or just some (e.g. regarding _Float16 and __bf16
>on i686-linux, there is support for it only if the SSE2 ISA is available,
>so e.g. the *[hb][fc]* functions in libgcc need to be compiled with
>-msse2 extra flag)?  If it can be passed/returned the same in all ABIs,
>what excess precision mode do you want to use on them?  I mean e.g. the
>TARGET_C_EXCESS_PRECISION target hook.  On e.g. x86_64, the default
>is to promote all _Float16 and __bf16 calculations to float, so if you have
>__bf16 a, b, c, d, e;
>...
>a = b * c + d - e + c * d;
>all variables are converted to SFmode temporaries and all the arithmetics
>is done in SFmode and only then at the end finally converted to HFmode
>or BFmode.  One can request a different mode, -fexcess-precision=16
>in which such promotion isn't done, but as there is no hw support for
>most of the operations, the actual multiplication, addition or subtraction
>is still done in SFmode, just there is a conversion to BFmode after each
>operation (so slower, but more precise).
>If you still want to export __divbc3 and __mulbc3, do you want to export
>those just on some RISCV ABI variants or all of them?  Depending on that,
>arrange for those to be compiled just for those; and, if it is exported
>from libgcc_s.so.1, you also need to add a symbol version for those, likely
>GCC_15.0.0.
>
>For enabling just those 2 functions, I don't think you need any changes on
>the builtins.def etc. side, those aren't builtins but libcalls.
>
>If you need other libgcc calls, similar questions to above apply, but please
>don't add them just because you can, but only if you really need them (they
>can't be handled 

[pushed] aarch64: Rename svpext to svpext_lane [PR116371]

2024-08-15 Thread Richard Sandiford
When implementing the SME2 ACLE, I somehow missed off the _lane
suffix on svpext.

Tested on aarch64-linux-gnu & pushed.  (I wondered about posting and
waiting for comments, but the patch seems kind-of obvious, once the
mistake is pointed out.)

I'll backport to GCC 14 if there is no fallout.

Richard


gcc/
PR target/116371
* config/aarch64/aarch64-sve-builtins-sve2.h (svpext): Rename to...
(svpext_lane): ...this.
* config/aarch64/aarch64-sve-builtins-sve2.cc (svpext_impl): Rename
to...
(svpext_lane_impl): ...this and update instantiation accordingly.
* config/aarch64/aarch64-sve-builtins-sve2.def (svpext): Rename to...
(svpext_lane): ...this.

gcc/testsuite/
PR target/116371
* gcc.target/aarch64/sme2/acle-asm/pext_c16.c,
gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_c32.c,
gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_c64.c,
gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_c8.c,
gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c: Replace with...
* gcc.target/aarch64/sme2/acle-asm/pext_lane_c16.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c16_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c32.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c32_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c64.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c64_x2.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c8.c,
gcc.target/aarch64/sme2/acle-asm/pext_lane_c8_x2.c: ...these new tests,
testing for svpext_lane instead of svpext.
---
 .../aarch64/aarch64-sve-builtins-sve2.cc  |  4 +-
 .../aarch64/aarch64-sve-builtins-sve2.def |  2 +-
 .../aarch64/aarch64-sve-builtins-sve2.h   |  2 +-
 .../aarch64/sme2/acle-asm/pext_c16.c  | 50 -
 .../aarch64/sme2/acle-asm/pext_c16_x2.c   | 54 ---
 .../aarch64/sme2/acle-asm/pext_c32.c  | 50 -
 .../aarch64/sme2/acle-asm/pext_c32_x2.c   | 54 ---
 .../aarch64/sme2/acle-asm/pext_c64.c  | 50 -
 .../aarch64/sme2/acle-asm/pext_c64_x2.c   | 54 ---
 .../aarch64/sme2/acle-asm/pext_c8.c   | 50 -
 .../aarch64/sme2/acle-asm/pext_c8_x2.c| 54 ---
 .../aarch64/sme2/acle-asm/pext_lane_c16.c | 50 +
 .../aarch64/sme2/acle-asm/pext_lane_c16_x2.c  | 54 +++
 .../aarch64/sme2/acle-asm/pext_lane_c32.c | 50 +
 .../aarch64/sme2/acle-asm/pext_lane_c32_x2.c  | 54 +++
 .../aarch64/sme2/acle-asm/pext_lane_c64.c | 50 +
 .../aarch64/sme2/acle-asm/pext_lane_c64_x2.c  | 54 +++
 .../aarch64/sme2/acle-asm/pext_lane_c8.c  | 50 +
 .../aarch64/sme2/acle-asm/pext_lane_c8_x2.c   | 54 +++
 19 files changed, 420 insertions(+), 420 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16.c
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c16_x2.c
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32.c
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c32_x2.c
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64.c
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c64_x2.c
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8.c
 delete mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_c8_x2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c16_x2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c32.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c32_x2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c64.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c64_x2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/pext_lane_c8_x2.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index dc591551682..146a5459930 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -221,7 +221,7 @@ public:
   }
 };
 
-class svpext_impl : public function_base
+class svpext_lane_impl : public function_base
 {
 public:
   rtx
@@ -619,7 +619,7 @@ FUNCTION (svmullt_lane, unspec_based_lane_function, 
(UNSPEC_SMULLT,
 UNSPEC_UMULLT, -1)

[x86 PATCH] Improve split of *extendv2di2_highpart_stv_noavx512vl.

2024-08-15 Thread Roger Sayle

This patch follows up on the previous patch to fix PR target/116275 by
improving the code STV (ultimately) generates for highpart sign extensions
like (x<<8)>>8.  The arithmetic right shift is able to take advantage of
the available common subexpressions from the preceding left shift.

Hence previously with -O2 -m32 -mavx -mno-avx512vl we'd generate:

vpsllq  $8, %xmm0, %xmm0
vpsrad  $8, %xmm0, %xmm1
vpsrlq  $8, %xmm0, %xmm0
vpblendw$51, %xmm0, %xmm1, %xmm0

But with improved splitting, we now generate three instructions:

vpslld  $8, %xmm1, %xmm0
vpsrad  $8, %xmm0, %xmm0
vpblendw$51, %xmm1, %xmm0, %xmm0

This patch also implements Uros' suggestion that the pre-reload
splitter could introduced a new pseudo to hold the intermediate
to potentially help reload with register allocation, which applies
when not performing the above optimization, i.e. on TARGET_XOP.


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2024-08-15  Roger Sayle  
Uros Bizjak  

gcc/ChangeLog
* config/i386/i386.md (*extendv2di2_highpart_stv_noavx512vl): Split
to an improved implementation on !TARGET_XOP.  On TARGET_XOP, use
a new pseudo for the intermediate to simplify register allocation.

gcc/testsuite/ChangeLog
* g++.target/i386/pr116275-2.C: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1a6188f..6bd8e766 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17407,10 +17407,38 @@
&& ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(set (match_dup 0)
+  [(set (match_dup 4)
(ashift:V2DI (match_dup 1) (match_dup 2)))
(set (match_dup 0)
-   (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
+   (ashiftrt:V2DI (match_dup 4) (match_dup 2)))]
+{
+  if (!TARGET_XOP)
+{
+  rtx op0 = operands[0];
+  rtx op2 = operands[2];
+  rtx tmp1 = gen_reg_rtx (V4SImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx tmp3 = gen_reg_rtx (V4SImode);
+  rtx tmp4 = gen_reg_rtx (V4SImode);
+  emit_move_insn (tmp1, lowpart_subreg (V4SImode, operands[1], V2DImode));
+  emit_insn (gen_ashlv4si3 (tmp2, tmp1, op2));
+  emit_insn (gen_ashrv4si3 (tmp3, tmp2, op2));
+  vec_perm_builder sel (4, 4, 1);
+  sel.quick_grow (4);
+  sel[0] = 0;
+  sel[1] = 5;
+  sel[2] = 2;
+  sel[3] = 7;
+  vec_perm_indices indices(sel, 2, 4);
+  bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode, tmp4,
+ tmp1, tmp3, indices);
+  gcc_assert (ok);
+  emit_move_insn (op0, lowpart_subreg (V2DImode, tmp4, V4SImode));
+  DONE;
+}
+  else
+operands[4] = gen_reg_rtx (V2DImode);
+})
 
 ;; Rotate instructions
 
diff --git a/gcc/testsuite/g++.target/i386/pr116275-2.C 
b/gcc/testsuite/g++.target/i386/pr116275-2.C
new file mode 100644
index 000..98d3c19
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr116275-2.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -mavx -mno-avx512vl -std=c++11" } */
+
+struct SymbolDesc push_back(SymbolDesc);
+struct SymbolDesc {
+  long long ELFLocalSymIdx;
+};
+struct Expected {
+  long long &operator*();
+};
+void SymbolizableObjectFileaddSymbol() {
+  Expected SymbolAddressOrErr;
+  long long SymbolAddress = *SymbolAddressOrErr << 8 >> 8;
+  push_back({SymbolAddress});
+}
+
+/* { dg-final { scan-assembler "vpslld" } } */
+/* { dg-final { scan-assembler-not "vpsllq" } } */
+/* { dg-final { scan-assembler-not "vpsrlq" } } */


RE: [PATCH V3 02/10] autovectorizer: Add basic support for convert optabs

2024-08-15 Thread Tamar Christina
Hi Victor,

> -Original Message-
> From: Victor Do Nascimento 
> Sent: Thursday, August 15, 2024 9:44 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Tamar Christina ; claz...@gmail.com;
> hongtao@intel.com; s...@gcc.gnu.org; bernds_...@t-online.de;
> al...@redhat.com; Victor Do Nascimento 
> Subject: [PATCH V3 02/10] autovectorizer: Add basic support for convert optabs
> 
> Given the shift from modeling dot products as direct optabs to
> treating them as conversion optabs, we make necessary changes to the
> autovectorizer code to ensure that given the relevant tree code,
> together with the input and output data modes, we can retrieve the
> relevant optab and subsequently the insn_code for it.
> 
> gcc/ChangeLog:
> 
>   * gimple-match-exports.cc (directly_supported_p): Add overload
>   for conversion-type optabs.
>   * gimple-match.h (directly_supported_p): Add new function
>   prototype.
>   * optabs.cc (expand_widen_pattern_expr): Make the
>   DOT_PROD_EXPR tree code use `find_widening_optab_handler' to
>   retrieve icode.
>   * tree-vect-loop.cc (vect_is_emulated_mixed_dot_prod): make it
>   call conversion-type overloaded `directly_supported_p'.
>   * tree-vect-patterns.cc (vect_supportable_conv_optab_p): New.
>   (vect_recog_dot_prod_pattern): s/direct/conv/ in call to
>   `vect_supportable_direct_optab_p'.

Thanks! This version looks good to me but can't approve.

Cheers,
Tamar
> ---
>  gcc/gimple-match-exports.cc | 23 +++
>  gcc/gimple-match.h  |  2 ++
>  gcc/optabs.cc   |  3 ++-
>  gcc/tree-vect-loop.cc   |  1 +
>  gcc/tree-vect-patterns.cc   | 33 +++--
>  5 files changed, 59 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc
> index aacf3ff0414..d18497e7c83 100644
> --- a/gcc/gimple-match-exports.cc
> +++ b/gcc/gimple-match-exports.cc
> @@ -1381,6 +1381,29 @@ directly_supported_p (code_helper code, tree type,
> optab_subtype query_type)
> && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED));
>  }
> 
> +/* As above, overloading the function for conversion-type optabs.  */
> +bool
> +directly_supported_p (code_helper code, tree type_out, tree type_in,
> +   optab_subtype query_type)
> +{
> +  if (code.is_tree_code ())
> +{
> +  convert_optab optab = optab_for_tree_code (tree_code (code), type_in,
> + query_type);
> +  return (optab != unknown_optab
> +   && convert_optab_handler (optab, TYPE_MODE (type_out),
> + TYPE_MODE (type_in)) !=
> CODE_FOR_nothing);
> +}
> +  gcc_assert (query_type == optab_default
> +   || (query_type == optab_vector && VECTOR_TYPE_P (type_in))
> +   || (query_type == optab_scalar && !VECTOR_TYPE_P (type_in)));
> +  internal_fn ifn = associated_internal_fn (combined_fn (code), type_in);
> +  return (direct_internal_fn_p (ifn)
> +   && direct_internal_fn_supported_p (ifn, tree_pair (type_out, type_in),
> +  OPTIMIZE_FOR_SPEED));
> +}
> +
> +
>  /* A wrapper around the internal-fn.cc versions of 
> get_conditional_internal_fn
> for a code_helper CODE operating on type TYPE.  */
> 
> diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
> index d710fcbace2..0333a5db00a 100644
> --- a/gcc/gimple-match.h
> +++ b/gcc/gimple-match.h
> @@ -419,6 +419,8 @@ code_helper canonicalize_code (code_helper, tree);
> 
>  #ifdef GCC_OPTABS_TREE_H
>  bool directly_supported_p (code_helper, tree, optab_subtype = optab_default);
> +bool directly_supported_p (code_helper, tree, tree,
> +optab_subtype = optab_default);
>  #endif
> 
>  internal_fn get_conditional_internal_fn (code_helper, tree);
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 185c5b1a705..32737fb80e8 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -317,7 +317,8 @@ expand_widen_pattern_expr (const_sepops ops, rtx op0,
> rtx op1, rtx wide_op,
>  widen_pattern_optab
>= optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
>if (ops->code == WIDEN_MULT_PLUS_EXPR
> -  || ops->code == WIDEN_MULT_MINUS_EXPR)
> +  || ops->code == WIDEN_MULT_MINUS_EXPR
> +  || ops->code == DOT_PROD_EXPR)
>  icode = find_widening_optab_handler (widen_pattern_optab,
>TYPE_MODE (TREE_TYPE (ops->op2)),
>tmode0);
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 6456220cdc9..5f3de7b72a8 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -5289,6 +5289,7 @@ vect_is_emulated_mixed_dot_prod (stmt_vec_info
> stmt_info)
> 
>gcc_assert (STMT_VINFO_REDUC_VECTYPE_IN (stmt_info));
>return !directly_supported_p (DOT_PROD_EXPR,
> + STMT_VINFO_VECTYPE (stmt_info),
>

[x86_64 PATCH] Support wide immediate constants in STV.

2024-08-15 Thread Roger Sayle

As requested this patch is split out from my earlier submission.
This patch provides more accurate costs/gains for (wide) immediate
constants in STV, suitably adjusting the costs/gains when the highpart
and lowpart words are the same.  One minor complication is that the
middle-end assumes (when generating memset) that SSE constants will
be shared/amortized across multiple consecutive writes.  Hence to
avoid testsuite regressions, we add a heuristic that considers an immediate
constant to be very cheap, if that same immediate value occurs in the
previous instruction or in the following instruction.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?


2024-08-15  Roger Sayle  

gcc/ChangeLog
* config/i386/i386-features.cc (timode_immed_const_gain): New
function to determine the gain/cost on a CONST_WIDE_INT.
(local_duplicate_constant_p): Helper function to see if the
same immediate constant appears in the previous or next insn.
(timode_scalar_chain::compute_convert_gain): Fix whitespace.
: Provide more accurate estimates using
timode_immed_const_gain and local_duplicate_constant_p.
: Handle CONSTANT_SCALAR_INT_P (src).


Thanks again,
Roger
--

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c36d181..78564df 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -1503,6 +1503,53 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
   df_insn_rescan (insn);
 }
 
+/* Helper function to compute gain for loading an immediate constant.
+   Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
+   with numerous special cases.  */
+
+static int
+timode_immed_const_gain (rtx cst)
+{
+  /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
+  if (CONST_WIDE_INT_P (cst)
+  && CONST_WIDE_INT_NUNITS (cst) == 2
+  && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
+return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9)
+  : -COSTS_N_INSNS (2);
+  /* 2x movabsq ~ vmovdqa.  */
+  return 0;
+}
+
+/* Return true if the constant CST in mode MODE is found as an
+   immediate operand in the insn after INSN, or the insn before it.  */
+
+static bool
+local_duplicate_constant_p (rtx_insn *insn, machine_mode mode, rtx cst)
+{
+  rtx set;
+
+  rtx_insn *next = NEXT_INSN (insn);
+  if (next)
+{
+  set = single_set (insn);
+  if (set
+ && GET_MODE (SET_DEST (set)) == mode
+ && rtx_equal_p (SET_SRC (set), cst))
+   return true;
+}
+
+  rtx_insn *prev = PREV_INSN (insn);
+  if (prev)
+{
+  set = single_set (insn);
+  if (set
+ && GET_MODE (SET_DEST (set)) == mode
+ && rtx_equal_p (SET_SRC (set), cst))
+   return true;
+}
+  return false;
+}
+
 /* Compute a gain for chain conversion.  */
 
 int
@@ -1549,7 +1596,17 @@ timode_scalar_chain::compute_convert_gain ()
case CONST_INT:
  if (MEM_P (dst)
  && standard_sse_constant_p (src, V1TImode))
-   igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (11) : 1;
+   igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1;
+ break;
+
+   case CONST_WIDE_INT:
+ igain = local_duplicate_constant_p (insn, TImode, src)
+ ? 0
+ : timode_immed_const_gain (src);
+ /* 2 x mov vs. vmovdqa.  */
+ if (MEM_P (dst))
+   igain += optimize_insn_for_size_p () ? COSTS_N_BYTES (3)
+: COSTS_N_INSNS (1);
  break;
 
case NOT:
@@ -1562,6 +1619,8 @@ timode_scalar_chain::compute_convert_gain ()
case IOR:
  if (!MEM_P (dst))
igain = COSTS_N_INSNS (1);
+ if (CONST_SCALAR_INT_P (XEXP (src, 1)))
+   igain += timode_immed_const_gain (XEXP (src, 1));
  break;
 
case ASHIFT:


Re: [PATCH V2 03/10] aarch64: Fix aarch64 backend-use of (u|s|us)dot_prod patterns

2024-08-15 Thread Victor Do Nascimento

On 8/15/24 09:26, Richard Sandiford wrote:

Victor Do Nascimento  writes:

Given recent changes to the dot_prod standard pattern name, this patch
fixes the aarch64 back-end by implementing the following changes:

1. Add 2nd mode to all (u|s|us)dot_prod patterns in .md files.
2. Rewrite initialization and function expansion mechanism for simd
builtins.
3. Fix all direct calls to back-end `dot_prod' patterns in SVE
builtins.

Finally, given that it is now possible for the compiler to
differentiate between the two- and four-way dot product, we add a test
to ensure that autovectorization picks up on dot-product patterns
where the result is twice the width of the operands.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md
(dot_prod): Renamed to...
(dot_prod): ...this.
(usdot_prod): Renamed to...
(usdot_prod): ...this.
(sadv16qi): Adjust call to gen_udot_prod take second mode.
(popcount): fix use of `udot_prod_optab'.
* gcc/config/aarch64/aarch64-sve.md
(dot_prod): Renamed to...
(dot_prod): ...this.
(@dot_prod): Renamed to...
(@dot_prod): ...this.
(sad): Adjust call to gen_udot_prod take second mode.
* gcc/config/aarch64/aarch64-sve2.md
(@aarch64_sve_dotvnx4sivnx8hi): Renamed to...
(dot_prodvnx4sivnx8hi): ...this.
* config/aarch64/aarch64-simd-builtins.def: Modify macro
expansion-based initialization and expansion
of (u|s|us)dot_prod builtins.
* config/aarch64/aarch64-sve-builtins-base.cc
(svdot_impl::expand): s/direct/convert/ in
`convert_optab_handler_for_sign' function call.
(svusdot_impl::expand): add second mode argument in call to
`code_for_dot_prod'.
* config/aarch64/aarch64-sve-builtins.cc
(function_expander::convert_optab_handler_for_sign): New class
method.
* config/aarch64/aarch64-sve-builtins.h
(class function_expander): Add prototype for new
`convert_optab_handler_for_sign' method.

gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sme/vect-dotprod-twoway.c (udot2): New.


Could you run the patch through contrib/check_GNU_style.py to catch
the long lines?


---
  gcc/config/aarch64/aarch64-builtins.cc|  7 ++
  gcc/config/aarch64/aarch64-simd-builtins.def  |  6 ++---
  gcc/config/aarch64/aarch64-simd.md|  9 ---
  .../aarch64/aarch64-sve-builtins-base.cc  | 13 +-
  gcc/config/aarch64/aarch64-sve-builtins.cc| 17 +
  gcc/config/aarch64/aarch64-sve-builtins.h |  3 +++
  gcc/config/aarch64/aarch64-sve.md |  6 ++---
  gcc/config/aarch64/aarch64-sve2.md|  2 +-
  .../aarch64/sme/vect-dotprod-twoway.c | 25 +++
  9 files changed, 71 insertions(+), 17 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/vect-dotprod-twoway.c
[...]
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 0a560eaedca..975eca0bbd6 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -3745,6 +3745,23 @@ function_expander::direct_optab_handler_for_sign (optab 
signed_op,
return ::direct_optab_handler (op, mode);
  }
  
+/* Choose between signed and unsigned convert optabs SIGNED_OP and

+   UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then
+   pick the appropriate optab handler for the mode.  Use MODE as the
+   mode if given, otherwise use the mode of type suffix SUFFIX_I.  */


The last sentence needs to be adapted for this function.  Also, because
there is no longer a single mode, I don't think it makes sense to allow
a default.  So how about:

/* Choose between signed and unsigned convert optabs SIGNED_OP and
UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then
pick the appropriate optab handler for "converting" from FROM_MODE
to TO_MODE.  */


+insn_code
+function_expander::convert_optab_handler_for_sign (optab signed_op,
+  optab unsigned_op,
+  unsigned int suffix_i,
+  machine_mode to_mode,
+  machine_mode from_mode)
+{
+  if (from_mode == VOIDmode)
+from_mode = vector_mode (suffix_i);


This code would then be removed.


+  optab op = type_suffix (suffix_i).unsigned_p ? unsigned_op : signed_op;
+  return ::convert_optab_handler (op, to_mode, from_mode);
+}
+
  /* Return true if X overlaps any input.  */
  bool
  function_expander::overlaps_input_p (rtx x)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h 
b/gcc/config/aarch64/aarch64-sve-builtins.h
index 9ab6f202c30..7534a58c3d7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -659,6 +659,9 @@ public:
ins

Re: [PATCHv3, expand] Add const0 move checking for CLEAR_BY_PIECES optabs

2024-08-15 Thread Richard Sandiford
HAO CHEN GUI  writes:
> Hi,
>   This patch adds const0 move checking for CLEAR_BY_PIECES. The original
> vec_duplicate handles duplicates of non-constant inputs. But 0 is a
> constant. So even a platform doesn't support vec_duplicate, it could
> still do clear by pieces if it supports const0 move by that mode.
>
>   Compared to the previous version, the main changes are to create a
> new class for clear by pieces and add an additional argument to
> indicate if the object is constant in pieces_addr.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-July/658337.html

Rather than add the additional argument, could we instead provide a
constfn that always returns zero?  ISTM that, under the current pieces_addr
framework, clear by pieces is essentially a memcpy from arbitrarily many
zeros.  E.g.:

  clear_by_pieces_d (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
: op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true,
  read_zero, NULL, len, align, false, CLEAR_BY_PIECES)

where read_zero is something like:

static rtx
read_zero (void *, void *, HOST_WIDE_INT, fixed_size_mode mode)
{
  return CONST0_RTX (mode);
}

(completely untested).

The changes to by_pieces_mode_supported_p look good.

Thanks,
Richard

>   I didn't convert const0 move predicate check to an assertion as it
> caused ICEs on i386. On i386, some modes (V8QI V4HI V2SI V1DI) have
> move expand defined but their predicates don't include const0.
>
>   Bootstrapped and tested on powerpc64-linux BE and LE with no
> regressions.
>
>   On i386, it got several regressions. One issue is the predicate of
> V16QI move expand doesn't include const0. Thus V16QI mode can't be used
> for clear by pieces with the patch. The second issue is the const0 is
> passed directly to the move expand with the patch. Originally it is
> forced to a pseudo and i386 can leverage the previous data to do
> optimization.
>
>   The patch also raises several regressions on aarch64. The V2x8QImode
> replaces TImode to do 16-byte clear by pieces as V2x8QImode move expand
> supports const0 and vector mode is preferable. I drafted a patch to
> address the issue. It will be sent for review in a separate email.
> Another problem is V8QImode replaces DImode to do 8-byte clear by pieces.
> It seems cause different sequences of instructions but the actually
> instructions are the same.
>
>
> ChangeLog
> expand: Add const0 move checking for CLEAR_BY_PIECES optabs
>
> vec_duplicate handles duplicates of non-constant inputs.  The 0 is a
> constant.  So even a platform doesn't support vec_duplicate, it could
> still do clear by pieces if it supports const0 move.  This patch adds
> the checking.
>
> gcc/
>   * expr.cc (by_pieces_mode_supported_p): Add const0 move checking
>   for CLEAR_BY_PIECES.
>   (pieces_addr::pieces_addr): Add fifth argument is_const to
>   indicate if object is a constant.  Do not set m_addr_inc if object
>   is a constant.
>   (op_by_pieces_d::op_by_pieces_d): Initialize m_from by setting
>   is_const to true for CLEAR_BY_PIECES.
>   (class clear_by_pieces_d): New.
>   (clear_by_pieces_d::prepare_mode): New.
>   (clear_by_pieces_d::generate): New.
>   (clear_by_pieces): Replace store_by_pieces_d with clear_by_pieces_d.
>
> patch.diff
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 9f66d479445..abf69c8d698 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -1014,14 +1014,20 @@ can_use_qi_vectors (by_pieces_operation op)
>  static bool
>  by_pieces_mode_supported_p (fixed_size_mode mode, by_pieces_operation op)
>  {
> -  if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
> +  enum insn_code icode = optab_handler (mov_optab, mode);
> +  if (icode == CODE_FOR_nothing)
>  return false;
>
> -  if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES)
> +  if (op == SET_BY_PIECES
>&& VECTOR_MODE_P (mode)
>&& optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing)
>  return false;
>
> +  if (op == CLEAR_BY_PIECES
> +  && VECTOR_MODE_P (mode)
> +  && !insn_operand_matches (icode, 1, CONST0_RTX (mode)))
> +   return false;
> +
>if (op == COMPARE_BY_PIECES
>&& !can_compare_p (EQ, mode, ccp_jump))
>  return false;
> @@ -1184,7 +1190,7 @@ class pieces_addr
>by_pieces_constfn m_constfn;
>void *m_cfndata;
>  public:
> -  pieces_addr (rtx, bool, by_pieces_constfn, void *);
> +  pieces_addr (rtx, bool, by_pieces_constfn, void *, bool = false);
>rtx adjust (fixed_size_mode, HOST_WIDE_INT, by_pieces_prev * = nullptr);
>void increment_address (HOST_WIDE_INT);
>void maybe_predec (HOST_WIDE_INT);
> @@ -1204,7 +1210,7 @@ public:
> memory load.  */
>
>  pieces_addr::pieces_addr (rtx obj, bool is_load, by_pieces_constfn constfn,
> -   void *cfndata)
> +   void *cfndata, bool is_const)
>: m_obj (obj), m_is_load (is_load), m_constfn (constfn), m_cfndata 
> (cfndata)
>  {
>m_addr_inc = 0;

Re: [x86 PATCH] Improve split of *extendv2di2_highpart_stv_noavx512vl.

2024-08-15 Thread Uros Bizjak
On Thu, Aug 15, 2024 at 11:14 AM Roger Sayle  wrote:
>
>
> This patch follows up on the previous patch to fix PR target/116275 by
> improving the code STV (ultimately) generates for highpart sign extensions
> like (x<<8)>>8.  The arithmetic right shift is able to take advantage of
> the available common subexpressions from the preceding left shift.
>
> Hence previously with -O2 -m32 -mavx -mno-avx512vl we'd generate:
>
> vpsllq  $8, %xmm0, %xmm0
> vpsrad  $8, %xmm0, %xmm1
> vpsrlq  $8, %xmm0, %xmm0
> vpblendw$51, %xmm0, %xmm1, %xmm0
>
> But with improved splitting, we now generate three instructions:
>
> vpslld  $8, %xmm1, %xmm0
> vpsrad  $8, %xmm0, %xmm0
> vpblendw$51, %xmm1, %xmm0, %xmm0
>
> This patch also implements Uros' suggestion that the pre-reload
> splitter could introduced a new pseudo to hold the intermediate
> to potentially help reload with register allocation, which applies
> when not performing the above optimization, i.e. on TARGET_XOP.
>
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2024-08-15  Roger Sayle  
> Uros Bizjak  
>
> gcc/ChangeLog
> * config/i386/i386.md (*extendv2di2_highpart_stv_noavx512vl): Split
> to an improved implementation on !TARGET_XOP.  On TARGET_XOP, use
> a new pseudo for the intermediate to simplify register allocation.
>
> gcc/testsuite/ChangeLog
> * g++.target/i386/pr116275-2.C: New test case.

LGTM.

Thanks,
Uros.


[PATCH] libcpp, c-family, v3: Add (dumb) C23 N3017 #embed support [PR105863]

2024-08-15 Thread Jakub Jelinek
On Wed, Aug 14, 2024 at 09:58:04PM +, Joseph Myers wrote:
> On Tue, 18 Jun 2024, Jakub Jelinek wrote:
> 
> > The following patch implements the C23 N3017 "#embed - a scannable,
> > tooling-friendly binary resource inclusion mechanism" paper.
> 
> Some initial comments, not yet reviewed the whole patch.

Thanks.  Here is an updated patch (and will post the following two which
needed updates as well).

> Having that alias seems reasonable and consistent with various other 
> options.

Done (both --embed-directory and --embed-directory= to match
--include-directory and --include-directory=).

> -Wmissing-include-dirs should probably warn about missing embed 
> directories (I don't think a separate -Wmissing-embed-dirs is needed).  I 
> suspect that already works with this patch, but a testcase should be 
> included.

Yes, it works, tests added.

> > +/* Skip over balanced preprocessing tokens until END is found.
> > +   If SAVE is non-NULL, remember the parsed tokens in it.  */
> 
> Document the parameter NESTED.

Done.

> > +/* Parse parameters of #embed directive or __has_embed expression.  */
> > +
> > +bool
> > +_cpp_parse_embed_params (cpp_reader *pfile, struct cpp_embed_params 
> > *params)
> 
> Document the meaning of the return value.

Done.

> Comparing param_kind against constants 0, 1, 2, 3 isn't very readable (and 
> it only gets worse in subsequent patches in the series comparing with 4 
> and 5 as well).  I think there should be actual defined constants (whether 
> enumeration constants or macros) indexing the table(s) of embed 
> parameters, rather than hardcoding 0 through 5.  (Or some system with 
> function pointers for embed parameter handlers in the table, so that such 
> constants aren't needed.)

Indeed, that makes it more readable.  Because of this change I'll repost the
gnu::offset and gnu::base64 patches because they needed some follow-up
tweaks, rest should remain the same.

2024-08-15  Jakub Jelinek  

PR c/105863
libcpp/
* include/cpplib.h: Implement C23 N3017 #embed - a scannable,
tooling-friendly binary resource inclusion mechanism paper.
(enum cpp_builtin_type): Add BT_HAS_EMBED.
(cpp_set_include_chains): Add another cpp_dir * argument to
the declaration.
* internal.h (enum include_type): Add IT_EMBED.
(struct cpp_reader): Add embed_include member.
(struct cpp_embed_params_tokens): New type.
(struct cpp_embed_params): New type.
(_cpp_get_token_no_padding): Declare.
(enum _cpp_find_file_kind): Add _cpp_FFK_EMBED and _cpp_FFK_HAS_EMBED.
(_cpp_stack_embed): Declare.
(_cpp_parse_expr): Change return type to cpp_num_part instead of
bool, change second argument from bool to const char * and add third
argument.
(_cpp_parse_embed_params): Declare.
* directives.cc (DIRECTIVE_TABLE): Add embed entry.
(end_directive): Don't call skip_rest_of_line for T_EMBED directive.
(_cpp_handle_directive): Return 2 rather than 1 for T_EMBED in
directives-only mode.
(parse_include): Don't Call check_eol for T_EMBED directive.
(skip_balanced_token_seq): New function.
(EMBED_PARAMS): Define.
(enum embed_param_kind): New type.
(embed_params): New variable.
(_cpp_parse_embed_params): New function.
(do_embed): New function.
(do_if): Adjust _cpp_parse_expr caller.
(do_elif): Likewise.
* expr.cc (parse_defined): Diagnose defined in #embed or __has_embed
parameters.
(_cpp_parse_expr): Change return type to cpp_num_part instead of
bool, change second argument from bool to const char * and add third
argument.  Adjust function comment.  For #embed/__has_embed parameters
add an artificial CPP_OPEN_PAREN.  Use the second argument DIR
directly instead of string literals conditional on IS_IF.
For #embed/__has_embed parameter, stop on reaching CPP_CLOSE_PAREN
matching the artificial one.  Diagnose negative or too large embed
parameter operands.
(num_binary_op): Use #embed instead of #if for diagnostics if inside
#embed/__has_embed parameter.
(num_div_op): Likewise.
* files.cc (struct _cpp_file): Add limit member and embed bitfield.
(search_cache): Add IS_EMBED argument, formatting fix.  Skip over
files with different file->embed from the argument.
(find_file_in_dir): Don't call pch_open_file if file->embed.
(_cpp_find_file): Handle _cpp_FFK_EMBED and _cpp_FFK_HAS_EMBED.
(read_file_guts): Formatting fix.
(has_unique_contents): Ignore file->embed files.
(search_path_head): Handle IT_EMBED type.
(_cpp_stack_embed): New function.
(_cpp_get_file_stat): Formatting fix.
(cpp_set_include_chains): Add embed argument, save it to
pfile->embed_include and compute lens for the chain.
* in

[PATCH] libcpp, v2: Add support for gnu::offset #embed/__has_embed parameter

2024-08-15 Thread Jakub Jelinek
Hi!

Here is a new version of the gnu::offset parameter support with the
use of EMBED_PARAMS macro for the registry of parameters.

2024-08-15  Jakub Jelinek  

libcpp/
* internal.h (struct cpp_embed_params): Add offset member.
* directives.cc (EMBED_PARAMS): Add gnu::offset entry.
(enum embed_param_kind): Add NUM_EMBED_STD_PARAMS.
(_cpp_parse_embed_params): Use NUM_EMBED_STD_PARAMS rather than
NUM_EMBED_PARAMS when parsing standard parameters.  Parse gnu::offset
parameter.
* files.cc (struct _cpp_file): Add offset member.
(_cpp_stack_embed): Handle params->offset.
gcc/
* doc/cpp.texi (Binary Resource Inclusion): Document gnu::offset
#embed parameter.
gcc/testsuite/
* c-c++-common/cpp/embed-15.c: New test.
* c-c++-common/cpp/embed-16.c: New test.
* gcc.dg/cpp/embed-5.c: New test.

--- libcpp/internal.h.jj2024-08-15 10:29:44.422065173 +0200
+++ libcpp/internal.h   2024-08-15 11:26:00.726026264 +0200
@@ -630,7 +630,7 @@ struct cpp_embed_params
 {
   location_t loc;
   bool has_embed;
-  cpp_num_part limit;
+  cpp_num_part limit, offset;
   cpp_embed_params_tokens prefix, suffix, if_empty;
 };
 
--- libcpp/directives.cc.jj 2024-08-15 11:02:56.653279609 +0200
+++ libcpp/directives.cc2024-08-15 11:39:49.476685559 +0200
@@ -1014,13 +1014,15 @@ skip_balanced_token_seq (cpp_reader *pfi
   EMBED_PARAM (LIMIT, "limit") \
   EMBED_PARAM (PREFIX, "prefix")   \
   EMBED_PARAM (SUFFIX, "suffix")   \
-  EMBED_PARAM (IF_EMPTY, "if_empty")
+  EMBED_PARAM (IF_EMPTY, "if_empty")   \
+  EMBED_PARAM (GNU_OFFSET, "offset")
 
 enum embed_param_kind {
 #define EMBED_PARAM(c, s) EMBED_PARAM_##c,
   EMBED_PARAMS
 #undef EMBED_PARAM
-  NUM_EMBED_PARAMS
+  NUM_EMBED_PARAMS,
+  NUM_EMBED_STD_PARAMS = EMBED_PARAM_IF_EMPTY + 1
 };
 
 static struct { int len; const char *name; } embed_params[NUM_EMBED_PARAMS] = {
@@ -1120,7 +1122,18 @@ _cpp_parse_embed_params (cpp_reader *pfi
   size_t param_kind = -1;
   if (param_prefix == NULL)
{
- for (size_t i = 0; i < NUM_EMBED_PARAMS; ++i)
+ for (size_t i = 0; i < NUM_EMBED_STD_PARAMS; ++i)
+   if (param_name_len == embed_params[i].len
+   && memcmp (param_name, embed_params[i].name,
+  param_name_len) == 0)
+ {
+   param_kind = i;
+   break;
+ }
+   }
+  else if (param_prefix_len == 3 && memcmp (param_prefix, "gnu", 3) == 0)
+   {
+ for (size_t i = NUM_EMBED_STD_PARAMS; i < NUM_EMBED_PARAMS; ++i)
if (param_name_len == embed_params[i].len
&& memcmp (param_name, embed_params[i].name,
   param_name_len) == 0)
@@ -1157,12 +1170,23 @@ _cpp_parse_embed_params (cpp_reader *pfi
   if (param_kind != (size_t) -1 && token->type != CPP_OPEN_PAREN)
cpp_error_with_line (pfile, CPP_DL_ERROR, loc, 0,
 "expected '('");
-  else if (param_kind == EMBED_PARAM_LIMIT)
+  else if (param_kind == EMBED_PARAM_LIMIT
+  || param_kind == EMBED_PARAM_GNU_OFFSET)
{
- if (params->has_embed && pfile->op_stack == NULL)
-   _cpp_expand_op_stack (pfile);
- params->limit = _cpp_parse_expr (pfile, "#embed", token);
- token = _cpp_get_token_no_padding (pfile);
+ if (params->has_embed && pfile->op_stack == NULL)
+   _cpp_expand_op_stack (pfile);
+ cpp_num_part res = _cpp_parse_expr (pfile, "#embed", token);
+ if (param_kind == EMBED_PARAM_LIMIT)
+   params->limit = res;
+ else
+   {
+ if (res > INTTYPE_MAXIMUM (off_t))
+   cpp_error_with_line (pfile, CPP_DL_ERROR, loc, 0,
+"too large 'gnu::offset' argument");
+ else
+   params->offset = res;
+   }
+ token = _cpp_get_token_no_padding (pfile);
}
   else if (token->type == CPP_OPEN_PAREN)
{
--- libcpp/files.cc.jj  2024-08-15 10:29:44.479064462 +0200
+++ libcpp/files.cc 2024-08-15 11:26:00.727026251 +0200
@@ -90,6 +90,9 @@ struct _cpp_file
   /* Size for #embed, perhaps smaller than st.st_size.  */
   size_t limit;
 
+  /* Offset for #embed.  */
+  off_t offset;
+
   /* File descriptor.  Invalid if -1, otherwise open.  */
   int fd;
 
@@ -1243,8 +1246,11 @@ _cpp_stack_embed (cpp_reader *pfile, con
   _cpp_file *orig_file = file;
   if (file->buffer_valid
   && (!S_ISREG (file->st.st_mode)
- || (file->limit < file->st.st_size + (size_t) 0
- && file->limit < params->limit)))
+ || file->offset + (cpp_num_part) 0 > params->offset
+ || (file->limit < file->st.st_size - file->offset + (size_t) 0
+ && (params->offset - file->offset > (cpp_num_part) file->limit
+ || file->limit - (params->offset
+  

[PATCH] libcpp, v3: Add support for gnu::base64 #embed parameter

2024-08-15 Thread Jakub Jelinek
Hi!

Here is a new version of the gnu::base64 parameter support, the only changes
are in using the EMBED_PARAMS registry of parameters.

2024-08-15  Jakub Jelinek  

libcpp/
* internal.h (struct cpp_embed_params): Add base64 member.
(_cpp_free_embed_params_tokens): Declare.
* directives.cc (DIRECTIVE_TABLE): Add IN_I flag to T_EMBED.
(save_token_for_embed, _cpp_free_embed_params_tokens): New functions.
(EMBED_PARAMS): Add gnu::base64 entry.
(_cpp_parse_embed_params): Parse gnu::base64 parameter.  If
-fpreprocessed without -fdirectives-only, require #embed to have
gnu::base64 parameter.  Diagnose conflict between gnu::base64 and
limit or gnu::offset parameters.
(do_embed): Use _cpp_free_embed_params_tokens.
* files.cc (finish_embed, base64_dec_fn): New functions.
(base64_dec): New array.
(B64D0, B64D1, B64D2, B64D3): Define.
(finish_base64_embed): New function.
(_cpp_stack_embed): Use finish_embed.  Handle params->base64
using finish_base64_embed.
* macro.cc (builtin_has_embed): Call _cpp_free_embed_params_tokens.
gcc/
* doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64
parameter.
gcc/testsuite/
* c-c++-common/cpp/embed-17.c: New test.
* c-c++-common/cpp/embed-18.c: New test.
* c-c++-common/cpp/embed-19.c: New test.
* gcc.dg/cpp/embed-6.c: New test.
* gcc.dg/cpp/embed-7.c: New test.

--- libcpp/internal.h.jj2024-08-15 11:26:00.726026264 +0200
+++ libcpp/internal.h   2024-08-15 11:35:50.559664877 +0200
@@ -631,7 +631,7 @@ struct cpp_embed_params
   location_t loc;
   bool has_embed;
   cpp_num_part limit, offset;
-  cpp_embed_params_tokens prefix, suffix, if_empty;
+  cpp_embed_params_tokens prefix, suffix, if_empty, base64;
 };
 
 /* Character classes.  Based on the more primitive macros in safe-ctype.h.
@@ -805,6 +805,7 @@ extern void _cpp_restore_pragma_names (c
 extern int _cpp_do__Pragma (cpp_reader *, location_t);
 extern void _cpp_init_directives (cpp_reader *);
 extern void _cpp_init_internal_pragmas (cpp_reader *);
+extern void _cpp_free_embed_params_tokens (cpp_embed_params_tokens *);
 extern bool _cpp_parse_embed_params (cpp_reader *, struct cpp_embed_params *);
 extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
 linenum_type, unsigned int);
--- libcpp/directives.cc.jj 2024-08-15 11:39:49.476685559 +0200
+++ libcpp/directives.cc2024-08-15 11:49:30.107446324 +0200
@@ -159,7 +159,7 @@ static void cpp_pop_definition (cpp_read
   D(error, T_ERROR,STDC89,0)   \
   D(pragma,T_PRAGMA,   STDC89,IN_I)\
   D(warning,   T_WARNING,  STDC23,0)   \
-  D(embed, T_EMBED,STDC23,INCL | EXPAND)   \
+  D(embed, T_EMBED,STDC23,IN_I | INCL | EXPAND)\
   D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND)\
   D(ident, T_IDENT,EXTENSION, IN_I)\
   D(import,T_IMPORT,   EXTENSION, INCL | EXPAND)  /* ObjC */   \
@@ -932,6 +932,50 @@ do_include_next (cpp_reader *pfile)
   do_include_common (pfile, type);
 }
 
+/* Helper function for skip_balanced_token_seq and _cpp_parse_embed_params.
+   Save one token *TOKEN into *SAVE.  */
+
+static void
+save_token_for_embed (cpp_embed_params_tokens *save, const cpp_token *token)
+{
+  if (save->count == 0)
+{
+  _cpp_init_tokenrun (&save->base_run, 4);
+  save->cur_run = &save->base_run;
+  save->cur_token = save->base_run.base;
+}
+  else if (save->cur_token == save->cur_run->limit)
+{
+  save->cur_run->next = XNEW (tokenrun);
+  save->cur_run->next->prev = save->cur_run;
+  _cpp_init_tokenrun (save->cur_run->next, 4);
+  save->cur_run = save->cur_run->next;
+  save->cur_token = save->cur_run->base;
+}
+  *save->cur_token = *token;
+  save->cur_token->flags |= NO_EXPAND;
+  save->cur_token++;
+  save->count++;
+}
+
+/* Free memory associated with saved tokens in *SAVE.  */
+
+void
+_cpp_free_embed_params_tokens (cpp_embed_params_tokens *save)
+{
+  if (save->count == 0)
+return;
+  tokenrun *n;
+  for (tokenrun *t = &save->base_run; t; t = n)
+{
+  n = t->next;
+  XDELETEVEC (t->base);
+  if (t != &save->base_run)
+   XDELETE (t);
+}
+  save->count = 0;
+}
+
 /* Skip over balanced preprocessing tokens until END is found.
If SAVE is non-NULL, remember the parsed tokens in it.  NESTED is
false in the outermost invocation of the function and true
@@ -961,26 +1005,7 @@ skip_balanced_token_seq (cpp_reader *pfi
   if (save
  && (token->type != CPP_PADDING || save->count)
  && (token->type != end || nested))
-   {
- if (save->count == 0)
-   {
- _cpp_init_tokenru

Re: [x86_64 PATCH] Support wide immediate constants in STV.

2024-08-15 Thread Uros Bizjak
On Thu, Aug 15, 2024 at 11:34 AM Roger Sayle  wrote:
>
>
> As requested this patch is split out from my earlier submission.
> This patch provides more accurate costs/gains for (wide) immediate
> constants in STV, suitably adjusting the costs/gains when the highpart
> and lowpart words are the same.  One minor complication is that the
> middle-end assumes (when generating memset) that SSE constants will
> be shared/amortized across multiple consecutive writes.  Hence to
> avoid testsuite regressions, we add a heuristic that considers an immediate
> constant to be very cheap, if that same immediate value occurs in the
> previous instruction or in the following instruction.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}
> with no new failures.  Ok for mainline?
>
>
> 2024-08-15  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386-features.cc (timode_immed_const_gain): New
> function to determine the gain/cost on a CONST_WIDE_INT.
> (local_duplicate_constant_p): Helper function to see if the
> same immediate constant appears in the previous or next insn.
> (timode_scalar_chain::compute_convert_gain): Fix whitespace.
> : Provide more accurate estimates using
> timode_immed_const_gain and local_duplicate_constant_p.
> : Handle CONSTANT_SCALAR_INT_P (src).

LGTM.

Thanks,
Uros.


Re: [Patch, rs6000, middle-end] v8: Add implementation for different targets for pair mem fusion

2024-08-15 Thread Richard Sandiford
Ajit Agarwal  writes:
> +static void
> +update_change (set_info *set)
> +{
> +  if (!set->has_any_uses ())
> +return;
> +
> +  auto *use = *set->all_uses ().begin ();
> +  do
> +{
> +  auto *next_use = use->next_use ();
> +  if (use->is_in_debug_insn ())
> + substitute_debug_use (use);
> +  else if (use->is_in_phi ())
> + {
> +   update_change (use->phi ());
> + }
> +  else
> + {
> +   crtl->ssa->remove_use (use);
> + }
> +  use = next_use;
> +}
> +  while (use);
> +}

This still contains direct modifications to the rtl-ssa ir (remove_use).
Which case is it handling?  REG_EQUAL notes?

The patch shouldn't rely on:

> diff --git a/gcc/rtl-ssa/functions.h b/gcc/rtl-ssa/functions.h
> index 567701c7995..3003251e62c 100644
> --- a/gcc/rtl-ssa/functions.h
> +++ b/gcc/rtl-ssa/functions.h
> @@ -222,6 +222,13 @@ public:
>template
>T *change_alloc (obstack_watermark &wm, Ts... args);
>  
> +  auto_vec &get_m_temp_defs () { return m_temp_defs; }
> +
> +  template
> +  T *allocate (Ts... args);
> +
> +  void remove_use (use_info *);
> +
>  private:
>class bb_phi_info;
>class build_info;

Those aren't things that rs6000 code should be doing directly.

For:

> +// Set subreg with use of INSN given SRC rtx instruction.
> +static void
> +set_load_subreg (insn_info *i1, rtx src)
> +{
> +  rtx set = single_set (i1->rtl());
> +  rtx old_dest = SET_DEST (set);
> +
> +  for (auto def : i1->defs ())
> +{
> +  auto set = dyn_cast (def);
> +  for (auto use : set->nondebug_insn_uses ())
> + {
> +   insn_info *info = use->insn ();
> +   if (!info || !info->rtl ())
> + continue;

I think this should check use->is_artificial () instead.  If that's false,
then use->insn () and use->insn ()->rtl () should both be nonnull, and there
should be no need to check.

> +
> +   rtx_insn *rtl_insn = info->rtl ();
> +   insn_propagation prop (rtl_insn, old_dest, src);
> +   if (!prop.apply_to_pattern (&PATTERN (rtl_insn)))
> + gcc_assert (0);
> + }
> +}
> +}

Could you combine this with the code that creates the insn_change
for the insn, rather than doing that in a separate function?
IMO it's better to keep the creation of insn_changes together
with the changes that they describe.

Thanks,
Richard


Re: [PATCH v2] [x86] Movement between GENERAL_REGS and SSE_REGS for TImode doesn't need secondary reload.

2024-08-15 Thread Hongtao Liu
On Thu, Aug 15, 2024 at 3:27 PM liuhongt  wrote:
>
> It results in 2 failures for x86_64-pc-linux-gnu{\
> -march=cascadelake};
>
> gcc: gcc.target/i386/extendditi3-1.c scan-assembler cqt?o
> gcc: gcc.target/i386/pr113560.c scan-assembler-times \tmulq 1
>
> For pr113560.c, now GCC generates mulx instead of mulq with
> -march=cascadelake, which should be optimal, so adjust testcase for
> that.
> For gcc.target/i386/extendditi2-1.c, RA happens to choose another
> register instead of rax and result in
>
> movq%rdi, %rbp
> movq%rdi, %rax
> sarq$63, %rbp
> movq%rbp, %rdx
>
> The patch adds a new define_peephole2 for that.
>
> gcc/ChangeLog:
>
> PR target/116274
> * config/i386/i386-expand.cc (ix86_expand_vector_move):
> Restrict special case TImode to 128-bit vector conversions via
> V2DI under ix86_pre_reload_split ().
> * config/i386/i386.cc (inline_secondary_memory_needed):
> Movement between GENERAL_REGS and SSE_REGS for TImode doesn't
> need secondary reload.
> * config/i386/i386.md (*extendsidi2_rex64): Add a
> define_peephole2 after it.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr116274.c: New test.
> * gcc.target/i386/pr113560.c: Scan either mulq or mulx.
> ---
>  gcc/config/i386/i386-expand.cc   |  2 +-
>  gcc/config/i386/i386.cc  | 18 --
>  gcc/config/i386/i386.md  | 19 +++
>  gcc/testsuite/gcc.target/i386/pr113560.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr116274.c | 12 
>  5 files changed, 45 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr116274.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index bdbc1423267..ed546eeed6b 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -751,7 +751,7 @@ ix86_expand_vector_move (machine_mode mode, rtx 
> operands[])
>&& SUBREG_P (op1)
>&& GET_MODE (SUBREG_REG (op1)) == TImode
>&& TARGET_64BIT && TARGET_SSE
> -  && can_create_pseudo_p ())
> +  && ix86_pre_reload_split ())
>  {
>rtx tmp = gen_reg_rtx (V2DImode);
>rtx lo = gen_reg_rtx (DImode);
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index f044826269c..4821892d1e0 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -20292,6 +20292,18 @@ inline_secondary_memory_needed (machine_mode mode, 
> reg_class_t class1,
>if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
> return true;
>
> +  /* If the target says that inter-unit moves are more expensive
> +than moving through memory, then don't generate them.  */
> +  if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
> + || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
> +   return true;
> +
> +  /* Under SSE4.1, *movti_internal supports movement between
> +SSE_REGS and GENERAL_REGS with pinsrq and pextrq.  */
> +  if (TARGET_SSE4_1
> + && (TARGET_64BIT ? mode == TImode : mode == DImode))
> +   return false;
> +
>int msize = GET_MODE_SIZE (mode);
>
>/* Between SSE and general, we have moves no larger than word size.  */
> @@ -20304,12 +20316,6 @@ inline_secondary_memory_needed (machine_mode mode, 
> reg_class_t class1,
>
>if (msize < minsize)
> return true;
> -
> -  /* If the target says that inter-unit moves are more expensive
> -than moving through memory, then don't generate them.  */
> -  if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
> - || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
> -   return true;
>  }
>
>return false;
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index db7789c17d2..1962a7ba5c9 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -5041,6 +5041,25 @@ (define_split
>DONE;
>  })
>
> +(define_peephole2
> +  [(set (match_operand:DI 0 "general_reg_operand")
> +   (match_operand:DI 1 "general_reg_operand"))
> +   (parallel [(set (match_dup 0)
> +  (ashiftrt:DI (match_dup 0)
> +   (const_int 63)))
> +  (clobber (reg:CC FLAGS_REG))])
> +   (set (match_operand:DI 2 "general_reg_operand") (match_dup 1))
> +   (set (match_operand:DI 3 "general_reg_operand") (match_dup 0))]
> +  "(optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
> +   && REGNO (operands[2]) == AX_REG
> +   && REGNO (operands[3]) == DX_REG
> +   && peep2_reg_dead_p (4, operands[0])
> +   && !reg_mentioned_p (operands[0], operands[1])
> +   && !reg_mentioned_p (operands[2], operands[0])"
> +  [(set (match_dup 2) (match_dup 1))
> +   (parallel [(set (match_dup 3) (ashiftrt:DI (match_dup 2) (const_int 63)))
> + (clobber (reg:CC FLAGS

Re: [PATCH v2] [x86] Movement between GENERAL_REGS and SSE_REGS for TImode doesn't need secondary reload.

2024-08-15 Thread Uros Bizjak
On Thu, Aug 15, 2024 at 9:27 AM liuhongt  wrote:
>
> It results in 2 failures for x86_64-pc-linux-gnu{\
> -march=cascadelake};
>
> gcc: gcc.target/i386/extendditi3-1.c scan-assembler cqt?o
> gcc: gcc.target/i386/pr113560.c scan-assembler-times \tmulq 1
>
> For pr113560.c, now GCC generates mulx instead of mulq with
> -march=cascadelake, which should be optimal, so adjust testcase for
> that.
> For gcc.target/i386/extendditi2-1.c, RA happens to choose another
> register instead of rax and result in
>
> movq%rdi, %rbp
> movq%rdi, %rax
> sarq$63, %rbp
> movq%rbp, %rdx
>
> The patch adds a new define_peephole2 for that.
>
> gcc/ChangeLog:
>
> PR target/116274
> * config/i386/i386-expand.cc (ix86_expand_vector_move):
> Restrict special case TImode to 128-bit vector conversions via
> V2DI under ix86_pre_reload_split ().
> * config/i386/i386.cc (inline_secondary_memory_needed):
> Movement between GENERAL_REGS and SSE_REGS for TImode doesn't
> need secondary reload.
> * config/i386/i386.md (*extendsidi2_rex64): Add a
> define_peephole2 after it.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr116274.c: New test.
> * gcc.target/i386/pr113560.c: Scan either mulq or mulx.

OK, with updated comment, as proposed below.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-expand.cc   |  2 +-
>  gcc/config/i386/i386.cc  | 18 --
>  gcc/config/i386/i386.md  | 19 +++
>  gcc/testsuite/gcc.target/i386/pr113560.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr116274.c | 12 
>  5 files changed, 45 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr116274.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index bdbc1423267..ed546eeed6b 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -751,7 +751,7 @@ ix86_expand_vector_move (machine_mode mode, rtx 
> operands[])
>&& SUBREG_P (op1)
>&& GET_MODE (SUBREG_REG (op1)) == TImode
>&& TARGET_64BIT && TARGET_SSE
> -  && can_create_pseudo_p ())
> +  && ix86_pre_reload_split ())
>  {
>rtx tmp = gen_reg_rtx (V2DImode);
>rtx lo = gen_reg_rtx (DImode);
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index f044826269c..4821892d1e0 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -20292,6 +20292,18 @@ inline_secondary_memory_needed (machine_mode mode, 
> reg_class_t class1,
>if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
> return true;
>
> +  /* If the target says that inter-unit moves are more expensive
> +than moving through memory, then don't generate them.  */
> +  if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
> + || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
> +   return true;
> +
> +  /* Under SSE4.1, *movti_internal supports movement between
> +SSE_REGS and GENERAL_REGS with pinsrq and pextrq.  */

WIth SSE4.1, *mov{ti,di}_internal supports moves between
SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}.

> +  if (TARGET_SSE4_1
> + && (TARGET_64BIT ? mode == TImode : mode == DImode))
> +   return false;
> +
>int msize = GET_MODE_SIZE (mode);
>
>/* Between SSE and general, we have moves no larger than word size.  */
> @@ -20304,12 +20316,6 @@ inline_secondary_memory_needed (machine_mode mode, 
> reg_class_t class1,
>
>if (msize < minsize)
> return true;
> -
> -  /* If the target says that inter-unit moves are more expensive
> -than moving through memory, then don't generate them.  */
> -  if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
> - || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
> -   return true;
>  }
>
>return false;
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index db7789c17d2..1962a7ba5c9 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -5041,6 +5041,25 @@ (define_split
>DONE;
>  })
>
> +(define_peephole2
> +  [(set (match_operand:DI 0 "general_reg_operand")
> +   (match_operand:DI 1 "general_reg_operand"))
> +   (parallel [(set (match_dup 0)
> +  (ashiftrt:DI (match_dup 0)
> +   (const_int 63)))
> +  (clobber (reg:CC FLAGS_REG))])
> +   (set (match_operand:DI 2 "general_reg_operand") (match_dup 1))
> +   (set (match_operand:DI 3 "general_reg_operand") (match_dup 0))]
> +  "(optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
> +   && REGNO (operands[2]) == AX_REG
> +   && REGNO (operands[3]) == DX_REG
> +   && peep2_reg_dead_p (4, operands[0])
> +   && !reg_mentioned_p (operands[0], operands[1])
> +   && !reg_mentioned_p (operan

Re: [Patch, rs6000, middle-end] v8: Add implementation for different targets for pair mem fusion

2024-08-15 Thread Ajit Agarwal
Hello Richard:

On 15/08/24 3:45 pm, Richard Sandiford wrote:
> Ajit Agarwal  writes:
>> +static void
>> +update_change (set_info *set)
>> +{
>> +  if (!set->has_any_uses ())
>> +return;
>> +
>> +  auto *use = *set->all_uses ().begin ();
>> +  do
>> +{
>> +  auto *next_use = use->next_use ();
>> +  if (use->is_in_debug_insn ())
>> +substitute_debug_use (use);
>> +  else if (use->is_in_phi ())
>> +{
>> +  update_change (use->phi ());
>> +}
>> +  else
>> +{
>> +  crtl->ssa->remove_use (use);
>> +}
>> +  use = next_use;
>> +}
>> +  while (use);
>> +}
> 
> This still contains direct modifications to the rtl-ssa ir (remove_use).
> Which case is it handling?  REG_EQUAL notes?
> 

This is done to handle your below comments:

Thanks.  It looks like you're updating just the definitions,
and then later updating the uses.  That isn't the way that rtl-ssa
is supposed to be used.  Each change set -- in other words, each call
to function_info::change_insns -- must go from a valid state to a valid
state.  That is, the RTL must be self-consistent before every individual
call to function_info::change_insns and must be self-consistent after
every individual call to function_info::change_insns.

This is what I meant before about:

  ... if we're removing a definition, all uses in "real"
  debug and non-debug insns must be removed either earlier than the
  definition or at the same time as the definition.  No such uses
  should remain.

Since you want to update all uses of register 178, you need to include
those updates in the same change set as the change to insns 130 and 131,
rather than doing them later.

> The patch shouldn't rely on:
> 
>> diff --git a/gcc/rtl-ssa/functions.h b/gcc/rtl-ssa/functions.h
>> index 567701c7995..3003251e62c 100644
>> --- a/gcc/rtl-ssa/functions.h
>> +++ b/gcc/rtl-ssa/functions.h
>> @@ -222,6 +222,13 @@ public:
>>template
>>T *change_alloc (obstack_watermark &wm, Ts... args);
>>  
>> +  auto_vec &get_m_temp_defs () { return m_temp_defs; }
>> +
>> +  template
>> +  T *allocate (Ts... args);
>> +
>> +  void remove_use (use_info *);
>> +
>>  private:
>>class bb_phi_info;
>>class build_info;
> 
> Those aren't things that rs6000 code should be doing directly.
> 
Would you mind explaining where to handle this.

> For:
> 
>> +// Set subreg with use of INSN given SRC rtx instruction.
>> +static void
>> +set_load_subreg (insn_info *i1, rtx src)
>> +{
>> +  rtx set = single_set (i1->rtl());
>> +  rtx old_dest = SET_DEST (set);
>> +
>> +  for (auto def : i1->defs ())
>> +{
>> +  auto set = dyn_cast (def);
>> +  for (auto use : set->nondebug_insn_uses ())
>> +{
>> +  insn_info *info = use->insn ();
>> +  if (!info || !info->rtl ())
>> +continue;
> 
> I think this should check use->is_artificial () instead.  If that's false,
> then use->insn () and use->insn ()->rtl () should both be nonnull, and there
> should be no need to check.
> 

Sure I will do that.

>> +
>> +  rtx_insn *rtl_insn = info->rtl ();
>> +  insn_propagation prop (rtl_insn, old_dest, src);
>> +  if (!prop.apply_to_pattern (&PATTERN (rtl_insn)))
>> +gcc_assert (0);
>> +}
>> +}
>> +}
> 
> Could you combine this with the code that creates the insn_change
> for the insn, rather than doing that in a separate function?
> IMO it's better to keep the creation of insn_changes together
> with the changes that they describe.
>

Sorry I didn't get this. Would you mind explaining where to make
the above change.

 
> Thanks,
> Richard

Thanks & Regards
Ajit


Re: [Patch, rs6000, middle-end] v8: Add implementation for different targets for pair mem fusion

2024-08-15 Thread Richard Sandiford
Ajit Agarwal  writes:
> Hello Richard:
>
> On 15/08/24 3:45 pm, Richard Sandiford wrote:
>> Ajit Agarwal  writes:
>>> +static void
>>> +update_change (set_info *set)
>>> +{
>>> +  if (!set->has_any_uses ())
>>> +return;
>>> +
>>> +  auto *use = *set->all_uses ().begin ();
>>> +  do
>>> +{
>>> +  auto *next_use = use->next_use ();
>>> +  if (use->is_in_debug_insn ())
>>> +   substitute_debug_use (use);
>>> +  else if (use->is_in_phi ())
>>> +   {
>>> + update_change (use->phi ());
>>> +   }
>>> +  else
>>> +   {
>>> + crtl->ssa->remove_use (use);
>>> +   }
>>> +  use = next_use;
>>> +}
>>> +  while (use);
>>> +}
>> 
>> This still contains direct modifications to the rtl-ssa ir (remove_use).
>> Which case is it handling?  REG_EQUAL notes?
>> 
>
> This is done to handle your below comments:
>
> Thanks.  It looks like you're updating just the definitions,
> and then later updating the uses.  That isn't the way that rtl-ssa
> is supposed to be used.  Each change set -- in other words, each call
> to function_info::change_insns -- must go from a valid state to a valid
> state.  That is, the RTL must be self-consistent before every individual
> call to function_info::change_insns and must be self-consistent after
> every individual call to function_info::change_insns.
>
> This is what I meant before about:
>
>   ... if we're removing a definition, all uses in "real"
>   debug and non-debug insns must be removed either earlier than the
>   definition or at the same time as the definition.  No such uses
>   should remain.
>
> Since you want to update all uses of register 178, you need to include
> those updates in the same change set as the change to insns 130 and 131,
> rather than doing them later.

The patch isn't using function_info::change_insns though.  It's calling
the private remove_use function directly.

>> The patch shouldn't rely on:
>> 
>>> diff --git a/gcc/rtl-ssa/functions.h b/gcc/rtl-ssa/functions.h
>>> index 567701c7995..3003251e62c 100644
>>> --- a/gcc/rtl-ssa/functions.h
>>> +++ b/gcc/rtl-ssa/functions.h
>>> @@ -222,6 +222,13 @@ public:
>>>template
>>>T *change_alloc (obstack_watermark &wm, Ts... args);
>>>  
>>> +  auto_vec &get_m_temp_defs () { return m_temp_defs; }
>>> +
>>> +  template
>>> +  T *allocate (Ts... args);
>>> +
>>> +  void remove_use (use_info *);
>>> +
>>>  private:
>>>class bb_phi_info;
>>>class build_info;
>> 
>> Those aren't things that rs6000 code should be doing directly.
>> 
> Would you mind explaining where to handle this.

Each change to an instruction should be done via function_info::change_insns.
The idea is that change should be structured as follows:

(A) Call crtl->ssa->new_change_attempt ()

(B) For each instruction that you want to change:

(1) Create an insn_change.

(2) In that insn_change, describe the new uses and defs, and the
range of possible instruction positions (if the instruction is
allowed to move).

(3) Use validate_change/insn_propagation/etc. to change the RTL of
the instruction.

[(2) and (3) could be done in the opposite order]

At this stage, nothing is committed.  No insn_infos have changed and
all insn patterns can be reset to their original state by cancel_changes
(which happens automatically when an uncommitted change attempt goes out
of scope).

Next:

(C) Check whether the set of changes are valid and self-consistent.
Also check whether they are a win.  If no to either, abort the change.

(D) Call function_info::change_insns to commit and finalise the changes.

All the logic for updating "permanent" rtl-ssa structures should go in
function_info::change_insns.  It shouldn't be done by pair-fusion itself,
or by target code.

pair-fusion already works like this.  I think the rs6000 pass just needs
to hook into (B), so that it can update the uses fed by loads and the
definitions that feed stores.

>>> +
>>> + rtx_insn *rtl_insn = info->rtl ();
>>> + insn_propagation prop (rtl_insn, old_dest, src);
>>> + if (!prop.apply_to_pattern (&PATTERN (rtl_insn)))
>>> +   gcc_assert (0);
>>> +   }
>>> +}
>>> +}
>> 
>> Could you combine this with the code that creates the insn_change
>> for the insn, rather than doing that in a separate function?
>> IMO it's better to keep the creation of insn_changes together
>> with the changes that they describe.
>>
>
> Sorry I didn't get this. Would you mind explaining where to make
> the above change.

The above answer is supposed to cover this too.  Each change to a use
insn should involve:

- creating an insn_change

- using insn_propagation to change the rtl

- working out the new uses for the insn, and storing them in the
  insn_change (not the insn itself)

- adding the insn_change to the list of changes that pair-fusion performs
  for the load fusion.

Then, the loads and their uses will be updated in one go, by one call
to function_info::change_insns.

Please let me know if the explanation doe

[PATCH] c++/modules: Fix type lookup in DECL_TEMPLATE_INSTANTIATIONS [PR116364]

2024-08-15 Thread Nathaniel Shead
Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

We need to use the DECL_TEMPLATE_INSTANTIATIONS property to find
reachable specialisations from a template to ensure that any GM
specialisations are properly marked as reachable.

Currently the modules code uses the decl when rebuilding this property,
but this is not always correct; it appears that for type specialisations
we need to use the TREE_TYPE of the decl instead so that the
specialisation is correctly found.  This patch makes the required
adjustments.

PR c++/116364

gcc/cp/ChangeLog:

* cp-tree.h (get_mergeable_specialization_flags): Adjust
signature.
* module.cc (trees_out::decl_value): Indicate whether this is a
type or decl specialisation.
* pt.cc (get_mergeable_specialization_flags): Match against the
type of a non-decl specialisation.
(add_mergeable_specialization): Use the already calculated spec
instead of always adding decl to DECL_TEMPLATE_INSTANTIATIONS.

gcc/testsuite/ChangeLog:

* g++.dg/modules/tpl-spec-9_a.C: New test.
* g++.dg/modules/tpl-spec-9_b.C: New test.
* g++.dg/modules/tpl-spec-9_c.C: New test.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/cp-tree.h|  3 ++-
 gcc/cp/module.cc|  3 ++-
 gcc/cp/pt.cc|  8 +---
 gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C | 12 
 gcc/testsuite/g++.dg/modules/tpl-spec-9_b.C |  5 +
 gcc/testsuite/g++.dg/modules/tpl-spec-9_c.C |  5 +
 6 files changed, 31 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C
 create mode 100644 gcc/testsuite/g++.dg/modules/tpl-spec-9_b.C
 create mode 100644 gcc/testsuite/g++.dg/modules/tpl-spec-9_c.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index a53fbcb43ec..039c70710a2 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7672,7 +7672,8 @@ extern void walk_specializations  (bool,
  void *),
 void *);
 extern tree match_mergeable_specialization (bool is_decl, spec_entry *);
-extern unsigned get_mergeable_specialization_flags (tree tmpl, tree spec);
+extern unsigned get_mergeable_specialization_flags (bool is_decl, tree tmpl,
+   tree spec);
 extern void add_mergeable_specialization(bool is_decl, spec_entry *,
 tree outer, unsigned);
 extern tree add_to_template_args   (tree, tree);
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index f4d137b13a1..c3218bd5caf 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -7981,7 +7981,8 @@ trees_out::decl_value (tree decl, depset *dep)
  auto *entry = reinterpret_cast  (dep->deps[0]);
 
  if (streaming_p ())
-   u (get_mergeable_specialization_flags (entry->tmpl, decl));
+   u (get_mergeable_specialization_flags (mk & MK_tmpl_decl_mask,
+  entry->tmpl, decl));
  tree_node (entry->tmpl);
  tree_node (entry->args);
}
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 8725a5eeb3f..1c531f456be 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -31559,13 +31559,14 @@ match_mergeable_specialization (bool decl_p, 
spec_entry *elt)
specialization lists of TMPL.  */
 
 unsigned
-get_mergeable_specialization_flags (tree tmpl, tree decl)
+get_mergeable_specialization_flags (bool decl_p, tree tmpl, tree decl)
 {
   unsigned flags = 0;
 
+  tree spec = decl_p ? decl : TREE_TYPE (decl);
   for (tree inst = DECL_TEMPLATE_INSTANTIATIONS (tmpl);
inst; inst = TREE_CHAIN (inst))
-if (TREE_VALUE (inst) == decl)
+if (TREE_VALUE (inst) == spec)
   {
flags |= 1;
break;
@@ -31623,7 +31624,8 @@ add_mergeable_specialization (bool decl_p, spec_entry 
*elt, tree decl,
 
   if (flags & 1)
 DECL_TEMPLATE_INSTANTIATIONS (elt->tmpl)
-  = tree_cons (elt->args, decl, DECL_TEMPLATE_INSTANTIATIONS (elt->tmpl));
+  = tree_cons (elt->args, elt->spec,
+  DECL_TEMPLATE_INSTANTIATIONS (elt->tmpl));
 
   if (flags & 2)
 {
diff --git a/gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C 
b/gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C
new file mode 100644
index 000..d7c02bb279d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/tpl-spec-9_a.C
@@ -0,0 +1,12 @@
+// PR c++/116364
+// { dg-additional-options "-fmodules-ts -Wno-global-module" }
+// { dg-module-cmi foo:part }
+
+module;
+template  struct S {};
+template <> struct S
+  { static constexpr bool value = true; };
+export module foo:part;
+
+export template 
+  constexpr bool result = S::value;
diff --git a/gcc/testsuite/g++.dg/modules/tpl-spec-9_b.C 
b/gcc/testsuite/g++.dg/modules/tpl-spec-9_b.C
new file mode 

Re: [PATCH] LoongArch: Implement scalar isinf, isnormal, and isfinite via fclass

2024-08-15 Thread Xi Ruoyao
On Mon, 2024-07-15 at 15:53 +0800, Lulu Cheng wrote:
> g++.dg/opt/pr107569. C and range-sincos.c vrp-float-abs-1.c is the
> same 
> issue, right?
> 
> And I have no objection to code modifications. But I think it's better
> to wait until this builtin
> 
> function is fixed.

Pushed r15-2931.  The ranger is already fixed at r15-2924.  The
redundant andi is fixed at r15-2426.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[Patch, Fortran, 77871, v1] Allow for class typed coarray parameter as dummy [PR77871]

2024-08-15 Thread Andre Vehreschild
Hi all,

attached patch fixes another regression on coarrays. This time for class typed
coarrays as dummys.

Regtested ok on x86_64-pc-linux-gnu / Fedora 39. Ok for mainline?

Regards,
Andre
--
Andre Vehreschild * Email: vehre ad gmx dot de
From d16ef6fe8e792063064d930f1b3ffd31c74594e1 Mon Sep 17 00:00:00 2001
From: Andre Vehreschild 
Date: Thu, 15 Aug 2024 13:49:49 +0200
Subject: [PATCH] [Fortran] Allow for class type coarray parameters. [PR77871]

gcc/fortran/ChangeLog:

	PR fortran/77871

	* trans-expr.cc (gfc_conv_derived_to_class): Assign token when
	converting a coarray to class.
	(gfc_get_tree_for_caf_expr): For classes get the caf decl from
	the saved descriptor.
 	(gfc_get_caf_token_offset):Assert that coarray=lib is set and
	cover more cases where the tree having the coarray token can be.
	* trans-intrinsic.cc (gfc_conv_intrinsic_caf_get): Use unified
	test for pointers.

gcc/testsuite/ChangeLog:

	* gfortran.dg/coarray/dummy_3.f90: New test.
---
 gcc/fortran/trans-expr.cc | 36 ---
 gcc/fortran/trans-intrinsic.cc|  2 +-
 gcc/testsuite/gfortran.dg/coarray/dummy_3.f90 | 33 +
 3 files changed, 58 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/coarray/dummy_3.f90

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 8801a15c3a8..4681a131139 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -810,6 +810,16 @@ gfc_conv_derived_to_class (gfc_se *parmse, gfc_expr *e, gfc_symbol *fsym,
   /* Now set the data field.  */
   ctree = gfc_class_data_get (var);

+  if (flag_coarray == GFC_FCOARRAY_LIB && CLASS_DATA (fsym)->attr.codimension)
+{
+  tree token;
+  tmp = gfc_get_tree_for_caf_expr (e);
+  if (POINTER_TYPE_P (TREE_TYPE (tmp)))
+	tmp = build_fold_indirect_ref (tmp);
+  gfc_get_caf_token_offset (parmse, &token, nullptr, tmp, NULL_TREE, e);
+  gfc_add_modify (&parmse->pre, gfc_conv_descriptor_token (ctree), token);
+}
+
   if (optional)
 cond_optional = gfc_conv_expr_present (e->symtree->n.sym);

@@ -2368,6 +2378,10 @@ gfc_get_tree_for_caf_expr (gfc_expr *expr)

   if (expr->symtree->n.sym->ts.type == BT_CLASS)
 {
+  if (DECL_P (caf_decl) && DECL_LANG_SPECIFIC (caf_decl)
+	  && GFC_DECL_SAVED_DESCRIPTOR (caf_decl))
+	caf_decl = GFC_DECL_SAVED_DESCRIPTOR (caf_decl);
+
   if (expr->ref && expr->ref->type == REF_ARRAY)
 	{
 	  caf_decl = gfc_class_data_get (caf_decl);
@@ -2432,16 +2446,12 @@ gfc_get_caf_token_offset (gfc_se *se, tree *token, tree *offset, tree caf_decl,
 {
   tree tmp;

+  gcc_assert (flag_coarray == GFC_FCOARRAY_LIB);
+
   /* Coarray token.  */
   if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (caf_decl)))
-{
-  gcc_assert (GFC_TYPE_ARRAY_AKIND (TREE_TYPE (caf_decl))
-		== GFC_ARRAY_ALLOCATABLE
-		  || expr->symtree->n.sym->attr.select_type_temporary
-		  || expr->symtree->n.sym->assoc);
   *token = gfc_conv_descriptor_token (caf_decl);
-}
-  else if (DECL_LANG_SPECIFIC (caf_decl)
+  else if (DECL_P (caf_decl) && DECL_LANG_SPECIFIC (caf_decl)
 	   && GFC_DECL_TOKEN (caf_decl) != NULL_TREE)
 *token = GFC_DECL_TOKEN (caf_decl);
   else
@@ -2459,7 +2469,7 @@ gfc_get_caf_token_offset (gfc_se *se, tree *token, tree *offset, tree caf_decl,
   && (GFC_TYPE_ARRAY_AKIND (TREE_TYPE (caf_decl)) == GFC_ARRAY_ALLOCATABLE
 	  || GFC_TYPE_ARRAY_AKIND (TREE_TYPE (caf_decl)) == GFC_ARRAY_POINTER))
 *offset = build_int_cst (gfc_array_index_type, 0);
-  else if (DECL_LANG_SPECIFIC (caf_decl)
+  else if (DECL_P (caf_decl) && DECL_LANG_SPECIFIC (caf_decl)
 	   && GFC_DECL_CAF_OFFSET (caf_decl) != NULL_TREE)
 *offset = GFC_DECL_CAF_OFFSET (caf_decl);
   else if (GFC_TYPE_ARRAY_CAF_OFFSET (TREE_TYPE (caf_decl)) != NULL_TREE)
@@ -2526,11 +2536,13 @@ gfc_get_caf_token_offset (gfc_se *se, tree *token, tree *offset, tree caf_decl,
 }
   else if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (caf_decl)))
 tmp = gfc_conv_descriptor_data_get (caf_decl);
+  else if (INDIRECT_REF_P (caf_decl))
+tmp = TREE_OPERAND (caf_decl, 0);
   else
-   {
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (caf_decl)));
- tmp = caf_decl;
-   }
+{
+  gcc_assert (POINTER_TYPE_P (TREE_TYPE (caf_decl)));
+  tmp = caf_decl;
+}

   *offset = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
 			fold_convert (gfc_array_index_type, *offset),
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 0ecb0439778..586fc65f21d 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -1900,7 +1900,7 @@ gfc_conv_intrinsic_caf_get (gfc_se *se, gfc_expr *expr, tree lhs, tree lhs_kind,
   gfc_add_block_to_block (&se->post, &argse.post);

   caf_decl = gfc_get_tree_for_caf_expr (array_expr);
-  if (TREE_CODE (TREE_TYPE (caf_decl)) == REFERENCE_TYPE)
+  if (POINTER_TYPE_P (TREE_TYPE (caf_decl)))
 caf_decl = build_fold_indirect_ref_loc (input_location,

[PATCH] testsuite: Reduce cut-&-paste in scanltranstree.exp

2024-08-15 Thread Richard Sandiford
scanltranstree.exp defines some LTO wrappers around standard
non-LTO scanners.  Four of them are cut-&-paste variants of
one another, so this patch generates them from a single template.
It also does the same for scan-ltrans-tree-dump-times, so that
other *-times scanners can be added easily in future.

The scanners seem to be lightly used.  gcc.dg/ipa/ipa-icf-38.c uses
scan-ltrans-tree-dump{,-not} and libgomp.c/declare-variant-1.c
uses scan-ltrans-tree-dump-{not,times}.  Nothing currently seems
to use scan-ltrans-tree-dump-dem*.

Tested on the files above so far.  Surprisingly, it worked first time,
but I tested that deliberately introduced mistakes were flagged.
(That's my story anyway.)  OK if it passes full testing on
aarch64-linux-gnu & x86_64-linux-gnu?

Richard


gcc/testsuite/
* lib/scanltranstree.exp: Redefine the routines using two
templates.
---
 gcc/testsuite/lib/scanltranstree.exp | 186 +--
 1 file changed, 62 insertions(+), 124 deletions(-)

diff --git a/gcc/testsuite/lib/scanltranstree.exp 
b/gcc/testsuite/lib/scanltranstree.exp
index 79f05f0ffed..bc6e02dc369 100644
--- a/gcc/testsuite/lib/scanltranstree.exp
+++ b/gcc/testsuite/lib/scanltranstree.exp
@@ -19,130 +19,68 @@
 
 load_lib scandump.exp
 
-# Utility for scanning compiler result, invoked via dg-final.
-# Call pass if pattern is present, otherwise fail.
-#
-# Argument 0 is the regexp to match
-# Argument 1 is the name of the dumped tree pass
-# Argument 2 handles expected failures and the like
-proc scan-ltrans-tree-dump { args } {
-
-if { [llength $args] < 2 } {
-   error "scan-ltrans-tree-dump: too few arguments"
-   return
-}
-if { [llength $args] > 3 } {
-   error "scan-ltrans-tree-dump: too many arguments"
-   return
-}
-if { [llength $args] >= 3 } {
-   scan-dump "ltrans-tree" [lindex $args 0] \
- "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 1]" ".ltrans0.ltrans" \
- [lindex $args 2]
-} else {
-   scan-dump "ltrans-tree" [lindex $args 0] \
- "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 1]" ".ltrans0.ltrans"
-}
-}
-
-# Call pass if pattern is present given number of times, otherwise fail.
-# Argument 0 is the regexp to match
-# Argument 1 is number of times the regexp must be found
-# Argument 2 is the name of the dumped tree pass
-# Argument 3 handles expected failures and the like
-proc scan-ltrans-tree-dump-times { args } {
-
-if { [llength $args] < 3 } {
-   error "scan-ltrans-tree-dump-times: too few arguments"
-   return
-}
-if { [llength $args] > 4 } {
-   error "scan-ltrans-tree-dump-times: too many arguments"
-   return
-}
-if { [llength $args] >= 4 } {
-   scan-dump-times "ltrans-tree" [lindex $args 0] [lindex $args 1] \
-   "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 2]" \
-   ".ltrans0.ltrans" [lindex $args 3]
-} else {
-   scan-dump-times "ltrans-tree" [lindex $args 0] [lindex $args 1] \
-   "\[0-9\]\[0-9\]\[0-9\]t.[lindex $args 2]" 
".ltrans0.ltrans"
-}
+# The first item in the list is an LTO equivalent of the second item
+# in the list; see the documentation of the second item for details.
+foreach { name scan type suffix } {
+scan-ltrans-tree-dump scan-dump ltrans-tree t
+scan-ltrans-tree-dump-not scan-dump-not ltrans-tree t
+scan-ltrans-tree-dump-dem scan-dump-dem ltrans-tree t
+scan-ltrans-tree-dump-dem-not scan-dump-dem-not ltrans-tree t
+} {
+eval [string map [list @NAME@ $name \
+  @SCAN@ $scan \
+  @TYPE@ $type \
+  @SUFFIX@ $suffix] {
+proc @NAME@ { args } {
+   if { [llength $args] < 2 } {
+   error "@NAME@: too few arguments"
+   return
+   }
+   if { [llength $args] > 3 } {
+   error "@NAME@: too many arguments"
+   return
+   }
+   if { [llength $args] >= 3 } {
+   @SCAN@ @TYPE@ [lindex $args 0] \
+   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
+   ".ltrans0.ltrans" \
+   [lindex $args 2]
+   } else {
+   @SCAN@ @TYPE@ [lindex $args 0] \
+   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
+   ".ltrans0.ltrans"
+   }
+}
+}]
 }
 
-# Call pass if pattern is not present, otherwise fail.
-#
-# Argument 0 is the regexp to match
-# Argument 1 is the name of the dumped tree pass
-# Argument 2 handles expected failures and the like
-proc scan-ltrans-tree-dump-not { args } {
-
-if { [llength $args] < 2 } {
-   error "scan-ltrans-tree-dump-not: too few arguments"
-   return
-}
-if { [llength $args] > 3 } {
-   error "scan-ltrans-tree-dump-not: too many arguments"
-   return
-}
-if { [llength $args] >= 3 } {
-   scan-dump-no

Re: [PATCH] match: Fix A || B not optimized to true when !B implies A [PR114326]

2024-08-15 Thread Sam James
Konstantinos Eleftheriou  writes:

> From: kelefth 
>
> In expressions like (a != b || ((a ^ b) & CST0) == CST1) and
> (a != b || (a ^ b) == CST), (a ^ b) is folded to false.
> In the equivalent expressions (((a ^ b) & CST0) == CST1 || a != b) and
> ((a ^ b) == CST, (a ^ b) || a != b) this is not happening.
>
> This patch adds the following simplifications in match.pd:
> ((a ^ b) & CST0) == CST1 || a != b --> 0 == (CST1 || a != b)
> (a ^ b) == CST || a != b --> 0 == CST || (a != b)
>
>   PR tree-optimization/114326
>
> gcc/ChangeLog:
>
>   * match.pd: Add two patterns to fold a ^ b to 0, when a == b.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.dg/tree-ssa/fold-xor-and-or-1.c: New test.
>   * gcc.dg/tree-ssa/fold-xor-and-or-2.c: New test.
>   * gcc.dg/tree-ssa/fold-xor-or-1.c: New test.
>   * gcc.dg/tree-ssa/fold-xor-or-2.c: New test.
>
> Reviewed-by: Christoph Müllner 
> Signed-off-by: Philipp Tomsich 
> Signed-off-by: Konstantinos Eleftheriou 
> ---
>  gcc/match.pd  | 30 +++
>  .../gcc.dg/tree-ssa/fold-xor-and-or-1.c   | 17 +++
>  .../gcc.dg/tree-ssa/fold-xor-and-or-2.c   | 19 
>  gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or-1.c | 17 +++
>  gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or-2.c | 19 
>  5 files changed, 102 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/fold-xor-or-2.c
>
> [...]
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/fold-xor-and-or-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do-compile } */

/* { dg-do compile } */

Please fix each instance of that. Thanks!

> [...]

sam


signature.asc
Description: PGP signature


Re: [PATCH 2/5] testsuite: Add scan-ltrans-rtl for use in dg-final [PR116140]

2024-08-15 Thread Richard Sandiford
Alex Coplan  writes:
> This extends the scan-ltrans-tree* helpers to create RTL variants.  This
> is needed to check the behaviour of an RTL pass under LTO.
>
> In particular it's used by a later patch in the series to check that
> RTL unrolling is applied under LTO.
>
> Tested as a series on aarch64-linux-gnu, OK for trunk?
>
> gcc/ChangeLog:
>
>   PR libstdc++/116140
>   * doc/sourcebuild.texi: Document ltrans-rtl value of kind for
>   scan--dump*.
>
> gcc/testsuite/ChangeLog:
>
>   PR libstdc++/116140
>   * lib/scanltranstree.exp (scan-ltrans-rtl-dump): New.
>   (scan-ltrans-rtl-dump-times): New.
>   (scan-ltrans-rtl-dump-not): New.
>   (scan-ltrans-rtl-dump-dem): New.
>   (scan-ltrans-rtl-dump-dem-not): New.
> ---
>  gcc/doc/sourcebuild.texi |   4 +-
>  gcc/testsuite/lib/scanltranstree.exp | 123 +++
>  2 files changed, 125 insertions(+), 2 deletions(-)

FTR: I've just posted:

  https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660523.html

in an attempt to make this patch simpler.  (In the end, that seemed easier
than trying to review whether all the manual updates were correct. :))

Thanks,
Richard


Re: [PATCH 3/5] testsuite: Ensure ltrans dump files get cleaned up properly [PR116140]

2024-08-15 Thread Richard Sandiford
Alex Coplan  writes:
> I noticed while working on a test that uses LTO and requests a dump
> file, that we are failing to cleanup ltrans dump files in the testsuite.
>
> E.g. the test I was working on compiles with -flto
> -fdump-rtl-loop2_unroll, and we end up with the following file:
>
> ./gcc/testsuite/g++/pr116140.ltrans0.ltrans.287r.loop2_unroll
>
> being left behind by the testsuite.  This is problematic not just from a
> "missing cleanup" POV, but also because it can cause the test to pass
> spuriously when the test is re-run wtih an unpatched compiler (without
> the bug fix).  In the broken case, loop2_unroll isn't run at all, so we
> end up scanning the old dumpfile (from the previous test run) and making
> the dumpfile scan pass.
>
> Running with `-v -v` in RUNTESTFLAGS we can see the following cleanup
> attempt is made:
>
> remove-build-file 
> `pr116140.{C,exe}.{ltrans[0-9]*.,}[0-9][0-9][0-9]{l,i,r,t}.*'
>
> looking again at the ltrans dump file above we can see this will fail for two
> reasons:
>
>  - The actual dump file has no {C,exe} extension between the basename and
>ltrans0.
>  - The actual dump file has an additional `.ltrans` component after 
> `.ltrans0`.
>
> This patch therefore relaxes the pattern constructed for cleaning up such
> dumpfiles to also match dumpfiles with the above form.
>
> Running the testsuite before/after this patch shows the number of files in
> gcc/testsuite (in the build dir) with "ltrans" in the name goes from 1416 to 
> 62
> on aarch64.
>
> No regressions on aarch64-linux-gnu, OK for trunk?
>
> Thanks,
> Alex
>
> gcc/testsuite/ChangeLog:
>
>   PR libstdc++/116140
>   * lib/gcc-dg.exp (schedule-cleanups): Relax ltrans dumpfile
>   cleanup pattern to handle missing cases.
> ---
>  gcc/testsuite/lib/gcc-dg.exp | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp
> index 992062103c1..cdb677d7873 100644
> --- a/gcc/testsuite/lib/gcc-dg.exp
> +++ b/gcc/testsuite/lib/gcc-dg.exp
> @@ -190,7 +190,7 @@ proc schedule-cleanups { opts } {
>  # Handle ltrans files around -flto
>  if [regexp -- {(^|\s+)-flto(\s+|$)} $opts] {
>   verbose "Cleanup -flto seen" 4
> - set ltrans "{ltrans\[0-9\]*.,}"
> + set ltrans "{ltrans\[0-9\]*{.ltrans,}.,}"
>  } else {
>   set ltrans ""
>  }
> @@ -206,7 +206,7 @@ proc schedule-cleanups { opts } {
>   if {$basename_ext != ""} {
>   regsub -- {^.*\.} $basename_ext {} basename_ext
>   }
> - lappend tfiles "$stem.{$basename_ext,exe}"
> + lappend tfiles "$stem{.$basename_ext,.exe,}"
>   unset basename_ext
>   } else {
>   lappend tfiles $basename

Hmm, hadn't realised that we rely on shell expansion of braces for the
cleanup (if I've understood correctly).  That seems like a bashism and
wouldn't work for testing via dash, for instance.  But that's obviously
entirely pre-existing.

The patch LGTM.  OK if no-one objects by the time the other patches
are approved.

Thanks for noticing and for cleaning this up.

Richard


Re: [PATCH] Tweak base/index disambiguation in decompose_normal_address [PR116236]

2024-08-15 Thread Jeff Law




On 8/15/24 2:50 AM, Richard Sandiford wrote:

The PR points out that, for an address like:

   (plus (zero_extend X) Y)

decompose_normal_address doesn't establish a strong preference
between treating X as the base or Y as the base.  As the comment
in the patch says, zero_extend isn't enough on its own to assume
an index, at least not on POINTERS_EXTEND_UNSIGNED targets.
But in a construct like the one above, X and Y have different modes,
and it seems reasonable to assume that the one with the expected
address mode is the base.

This matters on targets like m68k that support index extension
and that require different classes for bases and indices.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  Andreas also confirms
that it fixes the m68k LRA problem.  OK to install?

Richard


gcc/
PR middle-end/116236
* rtlanal.cc (decompose_normal_address): Try to distinguish
bases and indices based on mode, before resorting to "baseness".
OK.  Thanks to everyone for chasing this down.  No idea where we sit 
with the conversion of m68k to LRA but this looks like it'd be helpful 
irrespective of that effort.


jeff



Re: [PATCH] late-combine: Preserve INSN_CODE when modifying notes [PR116343]

2024-08-15 Thread Jeff Law




On 8/15/24 2:45 AM, Richard Sandiford wrote:

When it removes a definition, late-combine tries to update all
uses in notes.  It does this using the same insn_propagation class
that it uses for patterns.

However, insn_propagation uses validate_change, which in turn
resets the INSN_CODE.  This is inefficient in the best case,
since it forces the pattern to be rerecognised even though
changing a note can't affect the INSN_CODE.  But in the PR
it's a correctness problem: resetting INSN_CODE means we lose
the NOOP_INSN_MOVE_CODE, which in turn means that rtl-ssa doesn't
queue it for deletion.

This patch adds a routine specifically for propagating into notes.
A belt-and-braces fix would be to rerecognise noop moves in
function_info::change_insns, but I can't think of a good reason
why that would be necessary, and it could paper over latent bugs.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR testsuite/116343
* recog.h (insn_propagation::apply_to_note): Declare.
* recog.cc (insn_propagation::apply_to_note): New function.
* late-combine.cc (insn_combination::substitute_note): Use
apply_to_note instead of apply_to_rvalue.
* rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Improve
dumping of costs for noop moves.
Having fought problems with changing INSN_CODEs and rerecognition, I've 
wondered if we really should have an API for that to always put things 
back the way they were.  Though I guess in this case and the one I 
looked at likely wouldn't want to use the same API.  So perhaps not a 
great idea.



OK

jeff


Re: [PATCH] Tweak base/index disambiguation in decompose_normal_address [PR116236]

2024-08-15 Thread Andreas Schwab
On Aug 15 2024, Jeff Law wrote:

> On 8/15/24 2:50 AM, Richard Sandiford wrote:
>> The PR points out that, for an address like:
>>(plus (zero_extend X) Y)
>> decompose_normal_address doesn't establish a strong preference
>> between treating X as the base or Y as the base.  As the comment
>> in the patch says, zero_extend isn't enough on its own to assume
>> an index, at least not on POINTERS_EXTEND_UNSIGNED targets.
>> But in a construct like the one above, X and Y have different modes,
>> and it seems reasonable to assume that the one with the expected
>> address mode is the base.
>> This matters on targets like m68k that support index extension
>> and that require different classes for bases and indices.
>> Tested on aarch64-linux-gnu & x86_64-linux-gnu.  Andreas also confirms
>> that it fixes the m68k LRA problem.  OK to install?
>> Richard
>> gcc/
>>  PR middle-end/116236
>>  * rtlanal.cc (decompose_normal_address): Try to distinguish
>>  bases and indices based on mode, before resorting to "baseness".
> OK.  Thanks to everyone for chasing this down.  No idea where we sit with
> the conversion of m68k to LRA but this looks like it'd be helpful
> irrespective of that effort.

With PR116236 and PR116374 the situation is looking quite well.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH] libstdc++-v3: testsuite: Prune uncapitalized "in function" linker warning

2024-08-15 Thread Hans-Peter Nilsson
> Date: Wed, 14 Aug 2024 22:12:23 -0500
> From: Jacob Bachmeyer 

> Done and pushed to Savannah as commit 
> ed301dbd6a3d769670503ccfda1ea31b58d02547.  Please confirm that this 
> solves the problem.

Confirmed*...

> (Also note that you can now run DejaGnu from a Git checkout, simply use 
> the "runtest" in the Git working directory.  Any problems with this are 
> bugs and will be fixed.)

...using this setup.  Really nice!

(* Confirmed that the commit fixes the problem, also as a
sanity-check, verified that the combination with my patch
does not mess things up.)

As an aside for the gcc project only: by dejagnu now
properly handling this issue, it also showed that there's a
pruning-flaw with the g++ part testsuite (to wit, not
intended to be covered by my patch which was only for
libstdc++).  This means that either
gcc/testsuite/lib/prune.exp:prune_gcc_output has a flaw or
is not applied for e.g. g++.dg/modules/global-3,
g++.dg/modules/hello-1, g++.dg/modules/hello-2 and
g++.dg/modules/iostream-1.  I'll look into that.

brgds, H-P


Re: [Fortran, Patch, PR110033, v1] Fix associate for coarrays

2024-08-15 Thread Andre Vehreschild
Hi Harald, hi Paul,

thanks for the ok and the suggestions/recommendations on the testcase. I added
that and commit as: gcc-15-2935-gdbf4c574b92

@Paul: At the moment I am taking a look at 46371. The patch makes that proceed
a bit more, but still ICEing. I will address it and then check 56496.

Thanks again,
Andre

On Wed, 14 Aug 2024 21:21:17 +0200
Harald Anlauf  wrote:

> Hi Andre,
>
> Am 12.08.24 um 14:11 schrieb Andre Vehreschild:
> > Hi all,
> >
> > the attached two patches fix ASSOCIATE for coarrays, i.e. that a coarray
> > associated to a variable is also a coarray in the block of the ASSOCIATE
> > command. The patch has two parts:
> >
> > 1. pr110033p1_1.patch: Adds a corank member to the gfc_expr structure. I
> > decided to add it here and keep track of the corank of an expression,
> > because calling gfc_get_corank was getting to expensive with the associate
> > patch. This patch also improves the usage of coarrays in select type/rank
> > constructs.
> >
> > 2. pr110033p2_1.patch: The changes and testcase for PR 110033. In essence
> > the coarray is not detected correctly on the expression to associate to and
> > therefore not propagated correctly into the block of the ASSOCIATE command.
> > The patch adds correct treatment for propagating the coarray token into the
> > block, too.
> >
> > The costs of tracking the corank along side to the rank of an expression are
> > about 30 seconds real user time (i.e. time's "real" row) on a rather old
> > Intel i7-5775C@3.3GHz  with 24G RAM that was used for work during the test.
> > If need be I can tuned that more.
> >
> > Regtests ok on x86_64-pc-linux-gnu / Fedora 39. Ok for mainline?
>
> Paul already gave a basic OK, and I won't object.
>
> However, the testcase should be fixed.  It is only correct for
> single-image runs!  (Verified with Intel ifx).
>
> You have:
>
>associate (y => x)
>  y = -1
>  y[1] = 35
>end associate
>
> and check:
>
>if (x /= 35) stop 1
>
> This should rather be
>
>if (x[1] /= 35) stop 1
>
> or for number of images > 1:
>
>if (this_image() == 1) then
>   if (x /= 35) stop 1
>else
>   if (x /= -1) stop 99
>end if
>
> and similarly
>
>if (.NOT. c%l) stop 3
>
> needs to be adjusted accordingly.
>
> Thanks,
> Harald
>
> > Regards,
> > Andre
> > --
> > Andre Vehreschild * Email: vehre ad gmx dot de
>


--
Andre Vehreschild * Email: vehre ad gmx dot de


[PATCH v3 0/5] aarch64: Fix intrinsic availability [PR112108]

2024-08-15 Thread Andrew Carlotti
This series of patches fixes issues with some intrinsics being incorrectly
gated by global target options, instad of just using function-specific target
options.  These issues have been present since the +tme, +memtag and +ls64
intrinsics were introduced.

Compared to the previous version, this series no longer adds feature checks to
the intrinsic expanders, and fixes various formatting issues pointed out by
Richard Sandiford.

Additionally, the series now refactors the checking of TARGET_GENERAL_REGS_ONLY
in check_required_extensions.  This refactor is included as a new patch (1/5)
to make the diffs more readable.


Bootstrapped and regression tested on aarch64.  Ok to merge?

Also, ok for backports to affected versions (with regression tests)?


[PATCH v3 1/5] aarch64: Refactor check_required_extensions

2024-08-15 Thread Andrew Carlotti
Replace TARGET_GENERAL_REGS_ONLY check with an explicit check that
aarch64_isa_flags enables all required extensions.  This will be more
flexible when repurposing this function for non-SVE intrinsics.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc
(check_required_registers): Remove target check and rename to...
(report_missing_registers): ...this.
(check_required_extensions): Refactor.


diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 
0a560eaedca14832bfacef3225bd467691e16e99..1fe380dd1efb953466fd902f86eef8938059a261
 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1094,27 +1094,19 @@ report_missing_extension (location_t location, tree 
fndecl,
   reported_missing_extension_p = true;
 }
 
-/* Check whether the registers required by SVE function fndecl are available.
-   Report an error against LOCATION and return false if not.  */
-static bool
-check_required_registers (location_t location, tree fndecl)
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when non-general registers are disabled.  */
+static void
+report_missing_registers (location_t location, tree fndecl)
 {
   /* Avoid reporting a slew of messages for a single oversight.  */
   if (reported_missing_registers_p)
-return false;
-
-  if (TARGET_GENERAL_REGS_ONLY)
-{
-  /* SVE registers are not usable when -mgeneral-regs-only option
-is specified.  */
-  error_at (location,
-   "ACLE function %qD is incompatible with the use of %qs",
-   fndecl, "-mgeneral-regs-only");
-  reported_missing_registers_p = true;
-  return false;
-}
+return;
 
-  return true;
+  error_at (location,
+   "ACLE function %qD is incompatible with the use of %qs",
+   fndecl, "-mgeneral-regs-only");
+  reported_missing_registers_p = true;
 }
 
 /* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
@@ -1124,9 +1116,19 @@ static bool
 check_required_extensions (location_t location, tree fndecl,
   aarch64_feature_flags required_extensions)
 {
+  if ((required_extensions & ~aarch64_isa_flags) == 0)
+return true;
+
   auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+
   if (missing_extensions == 0)
-return check_required_registers (location, fndecl);
+{
+  /* All required extensions are enabled in aarch64_asm_isa_flags, so the
+error must be the use of general-regs-only.  */
+  report_missing_registers (location, fndecl);
+  return false;
+}
+
 
   if (missing_extensions & AARCH64_FL_SM_OFF)
 {


[PATCH v3 3/5] aarch64: Fix tme intrinsic availability

2024-08-15 Thread Andrew Carlotti
The availability of tme intrinsics was previously gated at both
initialisation time (using global target options) and usage time
(accounting for function-specific target options).  This patch removes
the check at initialisation time, and also moves the intrinsics out of
the header file to allow for better error messages (matching the
existing error messages for SVE intrinsics).

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (aarch64_init_tme_builtins):
(aarch64_general_init_builtins): Move tme initialisation...
(handle_arm_acle_h): ...to here, and remove feature check.
(aarch64_general_check_builtin_call): Check tme intrinsics.
* config/aarch64/arm_acle.h (__tstart, __tcommit, __tcancel)
(__ttest): Remove.
(_TMFAILURE_*): Define unconditionally.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/tme_guard-1.c: New test.
* gcc.target/aarch64/acle/tme_guard-2.c: New test.
* gcc.target/aarch64/acle/tme_guard-3.c: New test.
* gcc.target/aarch64/acle/tme_guard-4.c: New test.


diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
a07adcee6e266c947855041ed7432085f6448836..60e4c217921bc1144bfa436a168a4a1dc194f44e
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1791,21 +1791,17 @@ aarch64_init_tme_builtins (void)
 = build_function_type_list (void_type_node, uint64_type_node, NULL);
 
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
-= aarch64_general_add_builtin ("__builtin_aarch64_tstart",
-  ftype_uint64_void,
-  AARCH64_TME_BUILTIN_TSTART);
+= aarch64_general_simulate_builtin ("__tstart", ftype_uint64_void,
+   AARCH64_TME_BUILTIN_TSTART);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
-= aarch64_general_add_builtin ("__builtin_aarch64_ttest",
-  ftype_uint64_void,
-  AARCH64_TME_BUILTIN_TTEST);
+= aarch64_general_simulate_builtin ("__ttest", ftype_uint64_void,
+   AARCH64_TME_BUILTIN_TTEST);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
-= aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
-  ftype_void_void,
-  AARCH64_TME_BUILTIN_TCOMMIT);
+= aarch64_general_simulate_builtin ("__tcommit", ftype_void_void,
+   AARCH64_TME_BUILTIN_TCOMMIT);
   aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
-= aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
-  ftype_void_uint64,
-  AARCH64_TME_BUILTIN_TCANCEL);
+= aarch64_general_simulate_builtin ("__tcancel", ftype_void_uint64,
+   AARCH64_TME_BUILTIN_TCANCEL);
 }
 
 /* Add builtins for Random Number instructions.  */
@@ -2068,6 +2064,7 @@ handle_arm_acle_h (void)
 {
   if (TARGET_LS64)
 aarch64_init_ls64_builtins ();
+  aarch64_init_tme_builtins ();
 }
 
 /* Initialize fpsr fpcr getters and setters.  */
@@ -2160,9 +2157,6 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
 aarch64_init_pauth_hint_builtins ();
 
-  if (TARGET_TME)
-aarch64_init_tme_builtins ();
-
   if (TARGET_MEMTAG)
 aarch64_init_memtag_builtins ();
 
@@ -2285,6 +2279,7 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
unsigned int code, tree fndecl,
unsigned int nargs ATTRIBUTE_UNUSED, tree *args)
 {
+  tree decl = aarch64_builtin_decls[code];
   switch (code)
 {
 case AARCH64_RSR:
@@ -2297,15 +2292,29 @@ aarch64_general_check_builtin_call (location_t 
location, vec,
 case AARCH64_WSR64:
 case AARCH64_WSRF:
 case AARCH64_WSRF64:
-  tree addr = STRIP_NOPS (args[0]);
-  if (TREE_CODE (TREE_TYPE (addr)) != POINTER_TYPE
- || TREE_CODE (addr) != ADDR_EXPR
- || TREE_CODE (TREE_OPERAND (addr, 0)) != STRING_CST)
-   {
- error_at (location, "first argument to %qD must be a string literal",
-   fndecl);
- return false;
-   }
+  {
+   tree addr = STRIP_NOPS (args[0]);
+   if (TREE_CODE (TREE_TYPE (addr)) != POINTER_TYPE
+   || TREE_CODE (addr) != ADDR_EXPR
+   || TREE_CODE (TREE_OPERAND (addr, 0)) != STRING_CST)
+ {
+   error_at (location,
+ "first argument to %qD must be a string literal",
+ fndecl);
+   return false;
+ }
+   break;
+  }
+
+case AARCH64_TME_BUILTIN_TSTART:
+case AARCH64_TME_BUILTIN_TCOMMIT:
+case AARCH64_TME_BUILTIN_TTEST:
+case AARCH64_TME_BUILTIN_TCANCEL:
+  return aarch64_check_requir

[PATCH v3 2/5] aarch64: Move check_required_extensions

2024-08-15 Thread Andrew Carlotti
Move SVE extension checking functionality to aarch64-builtins.cc, so
that it can be shared by non-SVE intrinsics.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc (check_builtin_call)
(expand_builtin): Update calls to the below.
(report_missing_extension, report_missing_registers)
(check_required_extensions): Move out of aarch64_sve namespace,
rename, and move into...
* config/aarch64/aarch64-builtins.cc (aarch64_report_missing_extension)
(aarch64_report_missing_registers)
(aarch64_check_required_extensions) ...here.
* config/aarch64/aarch64-protos.h (aarch64_check_required_extensions):
Add prototype.


diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
30669f8aa1823b64689c67e306d38e234bd31698..a07adcee6e266c947855041ed7432085f6448836
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2180,6 +2180,106 @@ aarch64_general_builtin_decl (unsigned code, bool)
   return aarch64_builtin_decls[code];
 }
 
+/* True if we've already complained about attempts to use functions
+   when the required extension is disabled.  */
+static bool reported_missing_extension_p;
+
+/* True if we've already complained about attempts to use functions
+   which require registers that are missing.  */
+static bool reported_missing_registers_p;
+
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when extension EXTENSION is disabled.  */
+static void
+aarch64_report_missing_extension (location_t location, tree fndecl,
+ const char *extension)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_extension_p)
+return;
+
+  error_at (location, "ACLE function %qD requires ISA extension %qs",
+   fndecl, extension);
+  inform (location, "you can enable %qs using the command-line"
+ " option %<-march%>, or by using the %"
+ " attribute or pragma", extension);
+  reported_missing_extension_p = true;
+}
+
+/* Report an error against LOCATION that the user has tried to use
+   function FNDECL when non-general registers are disabled.  */
+static void
+aarch64_report_missing_registers (location_t location, tree fndecl)
+{
+  /* Avoid reporting a slew of messages for a single oversight.  */
+  if (reported_missing_registers_p)
+return;
+
+  error_at (location,
+   "ACLE function %qD is incompatible with the use of %qs",
+   fndecl, "-mgeneral-regs-only");
+  reported_missing_registers_p = true;
+}
+
+/* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
+   enabled, given that those extensions are required for function FNDECL.
+   Report an error against LOCATION if not.  */
+bool
+aarch64_check_required_extensions (location_t location, tree fndecl,
+  aarch64_feature_flags required_extensions)
+{
+  if ((required_extensions & ~aarch64_isa_flags) == 0)
+return true;
+
+  auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+
+  if (missing_extensions == 0)
+{
+  /* All required extensions are enabled in aarch64_asm_isa_flags, so the
+error must be the use of general-regs-only.  */
+  aarch64_report_missing_registers (location, fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_SM_OFF)
+{
+  error_at (location, "ACLE function %qD cannot be called when"
+   " SME streaming mode is enabled", fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_SM_ON)
+{
+  error_at (location, "ACLE function %qD can only be called when"
+   " SME streaming mode is enabled", fndecl);
+  return false;
+}
+
+  if (missing_extensions & AARCH64_FL_ZA_ON)
+{
+  error_at (location, "ACLE function %qD can only be called from"
+   " a function that has %qs state", fndecl, "za");
+  return false;
+}
+
+  static const struct {
+aarch64_feature_flags flag;
+const char *name;
+  } extensions[] = {
+#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
+{ AARCH64_FL_##IDENT, EXT_NAME },
+#include "aarch64-option-extensions.def"
+  };
+
+  for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i)
+if (missing_extensions & extensions[i].flag)
+  {
+   aarch64_report_missing_extension (location, fndecl, extensions[i].name);
+   return false;
+  }
+  gcc_unreachable ();
+}
+
 bool
 aarch64_general_check_builtin_call (location_t location, vec,
unsigned int code, tree fndecl,
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
f64afe2889018e1c4735a1677e6bf5febc4a7665..28613c425188cd270d9a2deeb91ae61b29aa1f07
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1008,6 +1008,8 

[PATCH v3 4/5] aarch64: Fix memtag intrinsic availability

2024-08-15 Thread Andrew Carlotti
The availability of memtag intrinsics and data types were determined
solely by the globally specified architecture features, which did not
reflect any changes specified in target pragmas or attributes.

This patch removes the initialisation-time guards for the intrinsics,
and replaces them with checks at use time. It also removes the macro
indirection from the header file - this simplifies the header, and
allows the missing extension error reporting to find the user-facing
intrinsic names.

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (aarch64_init_memtag_builtins):
Replace internal builtin names with intrinsic names.
(aarch64_general_init_builtins): Move memtag intialisation...
(handle_arm_acle_h): ...to here, and remove feature check.
(aarch64_general_check_builtin_call): Check memtag intrinsics.
* config/aarch64/arm_acle.h (__arm_mte_create_random_tag)
(__arm_mte_exclude_tag, __arm_mte_ptrdiff)
(__arm_mte_increment_tag, __arm_mte_set_tag, __arm_mte_get_tag):
Remove.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/memtag_guard-1.c: New test.
* gcc.target/aarch64/acle/memtag_guard-2.c: New test.
* gcc.target/aarch64/acle/memtag_guard-3.c: New test.
* gcc.target/aarch64/acle/memtag_guard-4.c: New test.


diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
60e4c217921bc1144bfa436a168a4a1dc194f44e..9c6d9ec7537e7c473dc42a27a7737d80aab9cddb
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1932,27 +1932,27 @@ aarch64_init_memtag_builtins (void)
 
 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
   aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
-= aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
-  T, AARCH64_MEMTAG_BUILTIN_##F); \
+= aarch64_general_simulate_builtin ("__arm_mte_"#N, T, \
+   AARCH64_MEMTAG_BUILTIN_##F); \
   aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
  AARCH64_MEMTAG_BUILTIN_START - 1] = \
{T, CODE_FOR_##I};
 
   fntype = build_function_type_list (ptr_type_node, ptr_type_node,
 uint64_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, create_random_tag, irg, fntype);
 
   fntype = build_function_type_list (uint64_type_node, ptr_type_node,
 uint64_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, exclude_tag, gmi, fntype);
 
   fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
 ptr_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, ptrdiff, subp, fntype);
 
   fntype = build_function_type_list (ptr_type_node, ptr_type_node,
 unsigned_type_node, NULL);
-  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
+  AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, increment_tag, addg, fntype);
 
   fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
   AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
@@ -2065,6 +2065,7 @@ handle_arm_acle_h (void)
   if (TARGET_LS64)
 aarch64_init_ls64_builtins ();
   aarch64_init_tme_builtins ();
+  aarch64_init_memtag_builtins ();
 }
 
 /* Initialize fpsr fpcr getters and setters.  */
@@ -2157,9 +2158,6 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
 aarch64_init_pauth_hint_builtins ();
 
-  if (TARGET_MEMTAG)
-aarch64_init_memtag_builtins ();
-
   if (in_lto_p)
 handle_arm_acle_h ();
 }
@@ -2316,7 +2314,12 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
 default:
   break;
 }
-  /* Default behavior.  */
+
+  if (code >= AARCH64_MEMTAG_BUILTIN_START
+  && code <= AARCH64_MEMTAG_BUILTIN_END)
+return aarch64_check_required_extensions (location, decl,
+ AARCH64_FL_MEMTAG);
+
   return true;
 }
 
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 
2d84ab1bd3f3241196727d7a632a155014708081..ab04326791309796125860ce64e63fe858a4a733
 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -287,29 +287,6 @@ __rndrrs (uint64_t *__res)
 
 #pragma GCC pop_options
 
-#pragma GCC push_options
-#pragma GCC target ("+nothing+memtag")
-
-#define __arm_mte_create_random_tag(__ptr, __u64_mask) \
-  __builtin_aarch64_memtag_irg(__ptr, __u64_mask)
-
-#define __arm_mte_exclude_tag(__ptr, __u64_excluded) \
-  __builtin_aarch64_memtag_gmi(__ptr, __u64_excluded)
-

[PATCH v3 5/5] aarch64: Fix ls64 intrinsic availability

2024-08-15 Thread Andrew Carlotti
The availability of ls64 intrinsics and data types were determined
solely by the globally specified architecture features, which did not
reflect any changes specified in target pragmas or attributes.

This patch removes the initialisation-time guards for the intrinsics,
and replaces them with checks at use time. We also get better error
messages when ls64 is not available (matching the existing error
messages for SVE intrinsics).

The data512_t type is made always available; this is consistent with the
present behaviour for Neon fp16/bf16 types.

gcc/ChangeLog:

PR target/112108
* config/aarch64/aarch64-builtins.cc (handle_arm_acle_h): Remove
feature check at initialisation.
(aarch64_general_check_builtin_call): Check ls64 intrinsics.
* config/aarch64/arm_acle.h: (data512_t) Make always available.

gcc/testsuite/ChangeLog:

PR target/112108
* gcc.target/aarch64/acle/ls64_guard-1.c: New test.
* gcc.target/aarch64/acle/ls64_guard-2.c: New test.
* gcc.target/aarch64/acle/ls64_guard-3.c: New test.
* gcc.target/aarch64/acle/ls64_guard-4.c: New test.


diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
9c6d9ec7537e7c473dc42a27a7737d80aab9cddb..eb878b933fe5ba4ee35a371d7149cd14ef161c2c
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2062,8 +2062,7 @@ aarch64_init_data_intrinsics (void)
 void
 handle_arm_acle_h (void)
 {
-  if (TARGET_LS64)
-aarch64_init_ls64_builtins ();
+  aarch64_init_ls64_builtins ();
   aarch64_init_tme_builtins ();
   aarch64_init_memtag_builtins ();
 }
@@ -2311,6 +2310,13 @@ aarch64_general_check_builtin_call (location_t location, 
vec,
   return aarch64_check_required_extensions (location, decl,
AARCH64_FL_TME);
 
+case AARCH64_LS64_BUILTIN_LD64B:
+case AARCH64_LS64_BUILTIN_ST64B:
+case AARCH64_LS64_BUILTIN_ST64BV:
+case AARCH64_LS64_BUILTIN_ST64BV0:
+  return aarch64_check_required_extensions (location, decl,
+   AARCH64_FL_LS64);
+
 default:
   break;
 }
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index 
ab04326791309796125860ce64e63fe858a4a733..ab4e7e60e046a9e9c81237de2ca5463c3d4f96ca
 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -265,9 +265,7 @@ __crc32d (uint32_t __a, uint64_t __b)
 #define _TMFAILURE_INT0x0080u
 #define _TMFAILURE_TRIVIAL0x0100u
 
-#ifdef __ARM_FEATURE_LS64
 typedef __arm_data512_t data512_t;
-#endif
 
 #pragma GCC push_options
 #pragma GCC target ("+nothing+rng")
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c
new file mode 100644
index 
..7dfc193a2934c994220280990316027c07e75ac4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a" } */
+
+#include 
+
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p); /* { dg-error {ACLE function '__arm_ld64b' requires 
ISA extension 'ls64'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c
new file mode 100644
index 
..3ede05a81f026f8606ee2c9cd56f15ce45caa1c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.6-a" } */
+
+#include 
+
+#pragma GCC target("arch=armv8-a+ls64")
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c
new file mode 100644
index 
..e0fccdad7bec4aa522fb709d010289fd02f91d05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+ls64 -mgeneral-regs-only" } */
+
+#include 
+
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c 
b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c
new file mode 100644
index 
..af1d9a4241fd0047c52735a8103eeaa45525ffc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/acle/ls64_guard-4.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8-a+ls64" } */
+
+#include 
+
+#pragma GCC target("arch=armv8.6-a")
+data512_t foo (void * p)
+{
+  return __arm_ld64b (p); /* { dg-error {ACLE function '__arm_ld64b' requires 
ISA extension 'ls64'} } */
+}


Re: [PATCH] ltmain.sh: allow more flags at link-time

2024-08-15 Thread Sam James
Sam James  writes:

> Eric Gallager  writes:
>
>> On Wed, Aug 14, 2024 at 8:50 AM Sam James  wrote:
>>>
>>> libtool defaults to filtering flags passed at link-time.
>>>
>>> This brings the filtering in GCC's 'fork' of libtool into sync with
>>> upstream libtool commit 22a7e547e9857fc94fe5bc7c921d9a4b49c09f8e.
>>
>> I think it'd be worthwhile to link to the upstream commit in the
>> ChangeLog / commit message, too. Also, are you sure that's the right
>> one? It looks just like a version revbump commit to me:
>> https://git.savannah.gnu.org/cgit/libtool.git/commit/?id=22a7e547e9857fc94fe5bc7c921d9a4b49c09f8e
>
> 'as of' meaning "this is the state of the repository when I
> checked", so if you want to check my work, you should checkout
> libtool.git at that commit and compare the product.
>
> There is no single commit which does this, it was done over
> many commits over many years. It's not worth trying to dig those many
> commits up, IMO.

... and of course, I made an error. While the explanation above is
correct, I actually missed 40b73c116e4f1c94b8f6c4ab60c7ec2036611fc6. But
the flags it adds aren't that interesting for us anyway. If approved,
I'll commit it with that variant if it's fine (which adds -fcilkplus and
-static-*).

Ultimately, this patch should overall be low-risk given it's not
*adding* flags, and if a flag is problematic, we should filter it
long-before libtool's link stage anyway.

It's mostly about correctness for LTO option merging.


signature.asc
Description: PGP signature


[PATCH v2] Update LDPT_REGISTER_CLAIM_FILE_HOOK_V2 linker plugin hook

2024-08-15 Thread H.J. Lu
The new hook allows the linker plugin to distinguish calls to
claim_file_handler that know the object is being used by the linker
(from ldmain.c:add_archive_element), from calls that don't know it's
being used by the linker (from elf_link_is_defined_archive_symbol); in
the latter case, the plugin should avoid including the unused LTO archive
members in linker output.  To get the proper support for archives with
LTO common symbols, the linker fix for

https://sourceware.org/bugzilla/show_bug.cgi?id=32083

is required.

PR lto/116361
* lto-plugin.c (claim_file_handler_v2): Include the LTO object
only if it is known to be used for link output.

Signed-off-by: H.J. Lu 
---
 lto-plugin/lto-plugin.c | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/lto-plugin/lto-plugin.c b/lto-plugin/lto-plugin.c
index 152648338b9..453561daece 100644
--- a/lto-plugin/lto-plugin.c
+++ b/lto-plugin/lto-plugin.c
@@ -1286,13 +1286,17 @@ claim_file_handler_v2 (const struct 
ld_plugin_input_file *file, int *claimed,
  lto_file.symtab.syms);
   check (status == LDPS_OK, LDPL_FATAL, "could not add symbols");
 
-  LOCK_SECTION;
-  num_claimed_files++;
-  claimed_files =
-   xrealloc (claimed_files,
- num_claimed_files * sizeof (struct plugin_file_info));
-  claimed_files[num_claimed_files - 1] = lto_file;
-  UNLOCK_SECTION;
+  /* Include it only if it is known to be used for link output.  */
+  if (known_used)
+   {
+ LOCK_SECTION;
+ num_claimed_files++;
+ claimed_files =
+   xrealloc (claimed_files,
+ num_claimed_files * sizeof (struct plugin_file_info));
+ claimed_files[num_claimed_files - 1] = lto_file;
+ UNLOCK_SECTION;
+   }
 
   *claimed = 1;
 }
-- 
2.46.0



Re: [PATCH v3 0/5] aarch64: Fix intrinsic availability [PR112108]

2024-08-15 Thread Richard Sandiford
Andrew Carlotti  writes:
> This series of patches fixes issues with some intrinsics being incorrectly
> gated by global target options, instad of just using function-specific target
> options.  These issues have been present since the +tme, +memtag and +ls64
> intrinsics were introduced.
>
> Compared to the previous version, this series no longer adds feature checks to
> the intrinsic expanders, and fixes various formatting issues pointed out by
> Richard Sandiford.
>
> Additionally, the series now refactors the checking of 
> TARGET_GENERAL_REGS_ONLY
> in check_required_extensions.  This refactor is included as a new patch (1/5)
> to make the diffs more readable.
>
>
> Bootstrapped and regression tested on aarch64.  Ok to merge?

LGTM, thanks.  OK if there are no other comments before the weekend.

> Also, ok for backports to affected versions (with regression tests)?

Hmm, it seems a bit invasive.  And if the GCC 11 tag in the PR is
anything to go by, it sounds like this is already unfixable behaviour
in at least one release series.

Let's see if anyone else has any opinions.

Richard


[GCC 14] aarch64: Add another use of force_subreg [PR115464]

2024-08-15 Thread Richard Sandiford
I'd like to backport r15-1244 to GCC 14 and then apply the patch below.

===

This patch includes the testcase from r15-1399 plus a miminal
fix for it, without the other proactive uses of force_subreg.
We can backport other force_subreg calls later if they're shown
to be needed.

Boostrapped & regression-tested on aarch64-linux-gnu.  I'll leave
a bit of time for comments and then push on Monday if no-one has any
objections before then.

Richard


gcc/
PR target/115464
* config/aarch64/aarch64-sve-builtins-base.cc
(svset_neonq_impl::expand): Use force_subreg instead of
lowpart_subreg.

gcc/testsuite/
PR target/115464
* gcc.target/aarch64/sve/acle/general/pr115464_2.c: New test.
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc   |  4 +++-
 .../gcc.target/aarch64/sve/acle/general/pr115464_2.c  | 11 +++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index c9182594bc1..241a249503f 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1185,7 +1185,9 @@ public:
 if (BYTES_BIG_ENDIAN)
   return e.use_exact_insn (code_for_aarch64_sve_set_neonq (mode));
 insn_code icode = code_for_vcond_mask (mode, mode);
-e.args[1] = lowpart_subreg (mode, e.args[1], GET_MODE (e.args[1]));
+e.args[1] = force_subreg (mode, e.args[1], GET_MODE (e.args[1]),
+ subreg_lowpart_offset (mode,
+GET_MODE (e.args[1])));
 e.add_output_operand (icode);
 e.add_input_operand (icode, e.args[1]);
 e.add_input_operand (icode, e.args[0]);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c
new file mode 100644
index 000..f561c34f732
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr115464_2.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+#include 
+
+svuint16_t
+convolve4_4_x (uint16x8x2_t permute_tbl, svuint16_t a)
+{
+return svset_neonq_u16 (a, permute_tbl.val[1]);
+}
-- 
2.25.1



[RESEND PATCH v5] RISC-V: use fclass insns to implement isfinite, isnormal and isinf builtins

2024-08-15 Thread Vineet Gupta
This was approved but resending so CI can retest after the ranges fixes.
---

Currently these builtins use float compare instructions which require
FP flags to be saved/restored which could be costly in uarch.
RV Base ISA already has FCLASS.{d,s,h} instruction to compare/identify FP
values w/o disturbing FP exception flags.

Now that upstream supports the corresponding optabs, wire them up in the
backend.

gcc/ChangeLog:
* config/riscv/riscv-protos.h (riscv_emit_fp_classify): New
function declaration.
* config/riscv/riscv.cc (riscv_emit_fp_classify): New helper for
the expanders.
* config/riscv/riscv.md: Add UNSPEC_FCLASS.
define_insn for fclass insn.
define_expand for isfinite, isnormal, isinf.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/fclass.c: New tests.

Signed-off-by: Vineet Gupta 
---
 gcc/config/riscv/riscv.md   | 63 +
 gcc/testsuite/gcc.target/riscv/fclass.c | 38 +++
 2 files changed, 101 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/fclass.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 5e3ef789e42e..f8d8162c0f91 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -68,6 +68,7 @@
   UNSPEC_FMAX
   UNSPEC_FMINM
   UNSPEC_FMAXM
+  UNSPEC_FCLASS
 
   ;; Stack tie
   UNSPEC_TIE
@@ -3478,6 +3479,68 @@
(set_attr "mode" "")
(set (attr "length") (const_int 16))])
 
+;; fclass instruction output bitmap
+;;   0 negative infinity
+;;   1 negative normal number.
+;;   2 negative subnormal number.
+;;   3 -0
+;;   4 +0
+;;   5 positive subnormal number.
+;;   6 positive normal number.
+;;   7 positive infinity
+;;   8 signaling NaN.
+;;   9 quiet NaN
+
+(define_insn "fclass"
+  [(set (match_operand:X0 "register_operand" "=r")
+   (unspec [(match_operand:ANYF 1 "register_operand" " f")]
+  UNSPEC_FCLASS))]
+  "TARGET_HARD_FLOAT"
+  "fclass.\t%0,%1";
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "")])
+
+;; Implements optab for isfinite, isnormal, isinf
+
+(define_int_iterator FCLASS_MASK [126 66 129])
+(define_int_attr fclass_optab
+  [(126"isfinite")
+   (66 "isnormal")
+   (129"isinf")])
+
+(define_expand "2"
+  [(match_operand  0 "register_operand" "=r")
+   (match_operand:ANYF 1 "register_operand" " f")
+   (const_int FCLASS_MASK)]
+  "TARGET_HARD_FLOAT"
+{
+  if (GET_MODE (operands[0]) != SImode
+  && GET_MODE (operands[0]) != word_mode)
+FAIL;
+
+  rtx t = gen_reg_rtx (word_mode);
+  rtx t_op0 = gen_reg_rtx (word_mode);
+
+  if (TARGET_64BIT)
+emit_insn (gen_fclassdi (t, operands[1]));
+  else
+emit_insn (gen_fclasssi (t, operands[1]));
+
+  riscv_emit_binary (AND, t, t, GEN_INT ());
+  rtx cmp = gen_rtx_NE (word_mode, t, const0_rtx);
+  emit_insn (gen_cstore4 (t_op0, cmp, t, const0_rtx));
+
+  if (TARGET_64BIT)
+{
+  t_op0 = gen_lowpart (SImode, t_op0);
+  SUBREG_PROMOTED_VAR_P (t_op0) = 1;
+  SUBREG_PROMOTED_SET (t_op0, SRP_SIGNED);
+}
+
+  emit_move_insn (operands[0], t_op0);
+  DONE;
+})
+
 (define_insn "*seq_zero_"
   [(set (match_operand:GPR   0 "register_operand" "=r")
(eq:GPR (match_operand:X 1 "register_operand" " r")
diff --git a/gcc/testsuite/gcc.target/riscv/fclass.c 
b/gcc/testsuite/gcc.target/riscv/fclass.c
new file mode 100644
index ..ea0f173ecf4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/fclass.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-march=rv64gc -mabi=lp64d  -ftrapping-math" { target { rv64 } 
} } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -ftrapping-math" { target { rv32 } 
} } */
+
+int d_isfinite(double a)
+{
+  return __builtin_isfinite(a);
+}
+
+int d_isnormal(double a)
+{
+  return __builtin_isnormal(a);
+}
+
+int d_isinf(double a)
+{
+  return __builtin_isinf(a);
+}
+
+int f_isfinite(float a)
+{
+  return __builtin_isfinite(a);
+}
+
+int f_isnormal(float a)
+{
+  return __builtin_isnormal(a);
+}
+
+int f_isinf(float a)
+{
+  return __builtin_isinf(a);
+}
+
+/* { dg-final { scan-assembler-not   {\mfrflags}  } } */
+/* { dg-final { scan-assembler-not   {\mfsflags}  } } */
+/* { dg-final { scan-assembler-times {\tfclass} 6 } } */
-- 
2.43.0



Re: [PATCH v3 0/5] aarch64: Fix intrinsic availability [PR112108]

2024-08-15 Thread Andrew Carlotti
On Thu, Aug 15, 2024 at 05:15:03PM +0100, Richard Sandiford wrote:
> Andrew Carlotti  writes:
> > This series of patches fixes issues with some intrinsics being incorrectly
> > gated by global target options, instad of just using function-specific 
> > target
> > options.  These issues have been present since the +tme, +memtag and +ls64
> > intrinsics were introduced.
> >
> > Compared to the previous version, this series no longer adds feature checks 
> > to
> > the intrinsic expanders, and fixes various formatting issues pointed out by
> > Richard Sandiford.
> >
> > Additionally, the series now refactors the checking of 
> > TARGET_GENERAL_REGS_ONLY
> > in check_required_extensions.  This refactor is included as a new patch 
> > (1/5)
> > to make the diffs more readable.
> >
> >
> > Bootstrapped and regression tested on aarch64.  Ok to merge?
> 
> LGTM, thanks.  OK if there are no other comments before the weekend.
> 
> > Also, ok for backports to affected versions (with regression tests)?
> 
> Hmm, it seems a bit invasive.  And if the GCC 11 tag in the PR is
> anything to go by, it sounds like this is already unfixable behaviour
> in at least one release series.

I think the impact is minimal prior to FMV support, so backporting is less
important for older versions.  The series should backport cleanly to GCC 14,
but would have conflicts in earlier version, so I think it would be sensible to
backport to GCC 14 and not further.

> Let's see if anyone else has any opinions.
> 
> Richard


[PATCH] gnat: fix lto-type-mismatch between C_Version_String and gnat_version_string [PR115917]

2024-08-15 Thread Arsen Arsenović
Reg-tested on x86_64-pc-linux-gnu with all languages and yes,rtl,extra
checking.

OK for trunk?

TIA, have a lovely day.
-- >8 --

gcc/ada/ChangeLog:

PR ada/115917
* gnatvsn.ads: Add note about the duplication of this value in
verrsion.c.
* version.c (VER_LEN_MAX): Define to the same value as
Gnatvsn.Ver_Len_Max.
(gnat_version_string): Use VER_LEN_MAX as bound.
---
 gcc/ada/gnatvsn.ads | 3 ++-
 gcc/ada/version.c   | 5 -
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/gnatvsn.ads b/gcc/ada/gnatvsn.ads
index 29238362cc04..f2082ece0965 100644
--- a/gcc/ada/gnatvsn.ads
+++ b/gcc/ada/gnatvsn.ads
@@ -83,7 +83,8 @@ package Gnatvsn is
--  space to store any possible version string value for checks. This
--  value should never be decreased in the future, but it would be
--  OK to increase it if absolutely necessary. If it is increased,
-   --  be sure to increase GNAT.Compiler.Version.Ver_Len_Max as well.
+   --  be sure to increase GNAT.Compiler.Version.Ver_Len_Max, and to update
+   --  the VER_LEN_MAX define in version.c as well.
 
Ver_Prefix : constant String := "GNAT Version: ";
--  Prefix generated by binder. If it is changed, be sure to change
diff --git a/gcc/ada/version.c b/gcc/ada/version.c
index 5e64edd0b17d..2fa9b8c2c859 100644
--- a/gcc/ada/version.c
+++ b/gcc/ada/version.c
@@ -31,4 +31,7 @@
 
 #include "version.h"
 
-char gnat_version_string[] = version_string;
+/* Logically a reference to Gnatvsn.Ver_Len_Max.  Please keep in sync.  */
+#define VER_LEN_MAX 256
+
+char gnat_version_string[VER_LEN_MAX] = version_string;
-- 
2.44.0



Re: [Fortran, Patch, PR110033, v1] Fix associate for coarrays

2024-08-15 Thread Harald Anlauf

Hi Andre,

Am 15.08.24 um 17:35 schrieb Andre Vehreschild:

Hi Harald, hi Paul,

thanks for the ok and the suggestions/recommendations on the testcase. I added
that and commit as: gcc-15-2935-gdbf4c574b92


I didn't notice this while skimming over the patch, but
gcc-testresults has:

../../src-master/gcc/fortran/resolve.cc: In function ‘bool
resolve_operator(gfc_expr*)’:
../../src-master/gcc/fortran/resolve.cc:4649:22: error: too many
arguments for format [-Werror=format-extra-args]
 4649 |   gfc_error ("Inconsistent coranks for operator at %%L
and %%L",
  |
^~


The format strings should have contained %L's, not %%L.

A follow-up fix is pre-approved.

Cheers,
Harald



[PATCH 0/7] v3 of libdiagnostics

2024-08-15 Thread David Malcolm
Here's v3 of my patch kit for "libdiagnostics", which makes GCC's
diagnostics subsystem available as a shared library; see:
  https://gcc.gnu.org/wiki/libdiagnostics

New in v3:
* it bootstraps and pass regression tests
* I added an opt-in configure flag: --enable-libdiagnostics, which
  must be enabled to build it (along with --enable-host-shared)
* a new "sarif-replay" command-line tool that takes .sarif files
  and replays the diagnostics within them as if they were GCC
  diagnostics, in GCC's textual format (i.e. GCC as a SARIF *consumer*,
  as well as producer).  This is implemented on top of libdiagnostics
  hence I've been "eating my own dogfood"
* support for execution paths in libdiagnostics API
* lots of fixes

Patch 1 has libdiagnostic.h, the public header file
Patch 2 has the implementation
Patch 3 has the C++ wrapper API I added in v2
Patch 4 has a refactoring of gcc-dg.exp I needed for patch 5.
Patch 5 has the testsuite for libdiagnostics itself
Patch 6 implements JSON parsing support
Patch 7 implements the sarif-replay command-line tool, and its
  testsuite, exercising various valid, invalid, and malformed
  input files.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu,
both with and without --enable-libdiagnostics.
With --enable-libdiagnostics the patch kit has this effect:

  # of .sum files: 20->22 (+2)
  FAIL: 110
  PASS: 617481->617679 (+198) 100.03%
  XFAIL: 4512
  XPASS: 13
  UNTESTED: 136
  UNSUPPORTED: 8058

where the two new .sum files are:

  BUILD/gcc/testsuite/libdiagnostics/libdiagnostics.sum: 
PASS: 132 tests

  BUILD/gcc/testsuite/sarif-replay/sarif-replay.sum:
PASS: 66 tests

OK for trunk?

David Malcolm (7):
  libdiagnostics v3: header
  libdiagnostics v3: implementation
  libdiagnostics v3: add C++ wrapper API
  testsuite: move dg-test cleanup code from gcc-dg.exp to its own file
  libdiagnostics v3: test suite
  json: add json parsing support
  libdiagnostics: add a "sarif-replay" command-line tool [PR96032]

 configure |   42 +
 configure.ac  |   35 +
 contrib/regenerate-sarif-spec-index.py|   60 +
 gcc/Makefile.in   |  191 +-
 gcc/configure |   26 +-
 gcc/configure.ac  |   16 +
 gcc/diagnostic-event-id.h |6 +
 gcc/doc/install.texi  |   13 +
 gcc/json-parsing.cc   | 2394 +
 gcc/json-parsing.h|  113 +
 gcc/json.cc   |2 +-
 gcc/json.h|  122 +-
 gcc/libdiagnostics++.h|  595 
 gcc/libdiagnostics.cc | 1652 
 gcc/libdiagnostics.h  |  691 +
 gcc/libdiagnostics.map|   72 +
 gcc/libsarifreplay.cc | 1747 
 gcc/libsarifreplay.h  |   59 +
 gcc/sarif-replay.cc   |  239 ++
 gcc/sarif-spec-urls.def   |  496 
 gcc/selftest-run-tests.cc |1 +
 gcc/selftest.h|1 +
 gcc/testsuite/lib/dg-test-cleanup.exp |  116 +
 gcc/testsuite/lib/gcc-dg.exp  |  106 +-
 gcc/testsuite/lib/sarif-replay-dg.exp |   90 +
 gcc/testsuite/lib/sarif-replay.exp|  204 ++
 .../libdiagnostics.dg/libdiagnostics.exp  |  296 ++
 gcc/testsuite/libdiagnostics.dg/sarif.py  |   23 +
 gcc/testsuite/libdiagnostics.dg/test-dump.c   |   69 +
 .../libdiagnostics.dg/test-error-c.py |   54 +
 .../test-error-with-note-c.py |   50 +
 .../libdiagnostics.dg/test-error-with-note.c  |   74 +
 .../libdiagnostics.dg/test-error-with-note.cc |   55 +
 gcc/testsuite/libdiagnostics.dg/test-error.c  |   59 +
 gcc/testsuite/libdiagnostics.dg/test-error.cc |   47 +
 .../libdiagnostics.dg/test-fix-it-hint-c.py   |   46 +
 .../libdiagnostics.dg/test-fix-it-hint.c  |   81 +
 .../libdiagnostics.dg/test-fix-it-hint.cc |   74 +
 .../libdiagnostics.dg/test-helpers++.h|   28 +
 .../libdiagnostics.dg/test-helpers.h  |   72 +
 .../libdiagnostics.dg/test-labelled-ranges.c  |   69 +
 .../libdiagnostics.dg/test-labelled-ranges.cc |   64 +
 .../libdiagnostics.dg/test-labelled-ranges.py |   48 +
 .../test-logical-location-c.py|   37 +
 .../libdiagnostics.dg/test-logical-location.c |   79 +
 .../libdiagnostics.dg/test-metadata-c.py  |   45 +
 .../libdiagnostics.dg/test-metadata.c |   61 +
 .../test-multiple-lines-c.py  |   83 +
 .../libdiagnostics.dg/test-multiple-lines.c   |   76 +
 .../libdiagnostics.dg/test-no-column-c.py |   35 +
 .../libdiagnostics.dg/test-no-column.c|   52 +
 .../test-no-diagnostics-c.py  |   42 +
 .../libdiagnostics.dg/test-no-

[PATCH 1/7] libdiagnostics v3: header

2024-08-15 Thread David Malcolm
Changed in v3:
* Added support for execution paths
* Moved the test cases to another patch
* diagnostic_manager_add_sarif_sink: add param "main_input_file"
* Added diagnostic_text_sink_set_colorize
* Added DIAGNOSTIC_LEVEL_SORRY
* Updated copyright year

Changed in v2:
* Changed from diagnostic_location_t -> const diagnostic_physical_location *
* Add entrypoint: diagnostic_finish_va
* add new type diagnostic_text_sink, and new entrypoints for
  enabling/disabling options on it
* add new debugging entrypoints for dumping objects to a FILE *
* new test cases

Blurb from v1:

Here's a work-in-progress patch for GCC that adds a libdiagnostics.h
header describing the public interface, along with various testcases
that show usage examples for the API.  Various aspects of this need
work; posting now for early feedback on overall direction.

How does the interface look?

gcc/ChangeLog:
* libdiagnostics.h: New file.
---
 gcc/libdiagnostics.h | 691 +++
 1 file changed, 691 insertions(+)
 create mode 100644 gcc/libdiagnostics.h

diff --git a/gcc/libdiagnostics.h b/gcc/libdiagnostics.h
new file mode 100644
index ..43138a911123
--- /dev/null
+++ b/gcc/libdiagnostics.h
@@ -0,0 +1,691 @@
+/* A pure C API for emitting diagnostics.
+   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef LIBDIAGNOSTICS_H
+#define LIBDIAGNOSTICS_H
+
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ Compatibility macros.
+ **/
+
+/* This macro simplifies testing whether we are using gcc, and if it
+   is of a particular minimum version. (Both major & minor numbers are
+   significant.)  This macro will evaluate to 0 if we are not using
+   gcc at all.  */
+#define LIBDIAGNOSTICS_GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__)
+
+/**
+ Macros for attributes.
+ **/
+
+# if (LIBDIAGNOSTICS_GCC_VERSION >= 3003)
+#  define LIBDIAGNOSTICS_PARAM_MUST_BE_NON_NULL(ARG_NUM) __attribute__ 
((__nonnull__ (ARG_NUM)))
+# else
+#  define LIBDIAGNOSTICS_PARAM_MUST_BE_NON_NULL(ARG_NUM)
+# endif /* GNUC >= 3.3 */
+
+#define LIBDIAGNOSTICS_PARAM_CAN_BE_NULL(ARG_NUM)
+  /* empty; for the human reader */
+
+#define LIBDIAGNOSTICS_PARAM_GCC_FORMAT_STRING(FMT_ARG_NUM, ARGS_ARG_NUM) \
+  LIBDIAGNOSTICS_PARAM_MUST_BE_NON_NULL (FMT_ARG_NUM)
+  /* In theory we'd also add
+   __attribute__ ((__format__ (__gcc_diag__, FMT_ARG_NUM, ARGS_ARG_NUM)))
+ if LIBDIAGNOSTICS_GCC_VERSION >= 4001
+ However, doing so leads to warnings from -Wformat-diag, which is part
+ of -Wall but undocumented, and much fussier than I'd want to inflict
+ on users of libdiagnostics.  */
+
+/**
+ Data structures and types.
+ All structs within the API are opaque.
+ **/
+
+/* An opaque bundle of state for a client of the library.
+   Has zero of more "sinks" to which diagnostics are emitted.
+   Responsibilities:
+   - location-management
+   - caching of source file content
+   - patch generation.  */
+typedef struct diagnostic_manager diagnostic_manager;
+
+/* Types relating to diagnostic output sinks.  */
+
+typedef struct diagnostic_text_sink diagnostic_text_sink;
+
+/* An enum for determining if we should colorize a text output sink.  */
+enum diagnostic_colorize
+{
+  DIAGNOSTIC_COLORIZE_IF_TTY,
+  DIAGNOSTIC_COLORIZE_NO,
+  DIAGNOSTIC_COLORIZE_YES
+};
+
+/* An enum for choosing the SARIF version for a SARIF output sink.
+   Eventually the SARIF output may support multiple SARIF versions.  */
+
+enum diagnostic_sarif_version
+{
+  DIAGNOSTIC_SARIF_VERSION_2_1_0
+};
+
+/* Types relating to "physical" source locations i.e. locations within
+   specific files expressed via line/column.  */
+
+/* Opaque type describing a particular input file.  */
+typedef struct diagnostic_file diagnostic_file;
+
+/* Opaque type representing a key into a database of source locations within
+   a diagnostic_manager.  Locations are created by various API 

[PATCH 3/7] libdiagnostics v3: add C++ wrapper API

2024-08-15 Thread David Malcolm
Changed in v3:
* Moved the testsuite to a separate patch
* Updated copyright year
* class text_sink: New.
* class file: Add default ctor, copy ctor, move ctor; make m_inner
  non-const
* class physical_location: Add default ctor
* class logical_location: Make m_inner non-const
* class execution_path: New
* class diagnostic: Add member functions: add_rule, take_execution_path,
  finish_va
* class manager: Add alternate ctor; add m_owned bool and use in dtor;
  delete copy ctor; add move ctor; add member functions set_tool_name,
  set_full-name, set_version_string, set_version_url,
  new_execution_path.  Add param "main_input_file" to add_sarif_sink.

Blurb from v2:

This is new in v2: a C++ wrapper API that provides some syntactic sugar for
calling into libdiagnostics.{h,so}.

I've been "eating my own dogfood" with this by using it to write a simple
client that reads a SARIF file and dumps it using the text sink:
  https://github.com/davidmalcolm/libdiagnostics-sarif-dump

gcc/ChangeLog:
* libdiagnostics++.h: New file.
---
 gcc/libdiagnostics++.h | 595 +
 1 file changed, 595 insertions(+)
 create mode 100644 gcc/libdiagnostics++.h

diff --git a/gcc/libdiagnostics++.h b/gcc/libdiagnostics++.h
new file mode 100644
index ..14c84934a446
--- /dev/null
+++ b/gcc/libdiagnostics++.h
@@ -0,0 +1,595 @@
+/* A C++ wrapper API around libdiagnostics.h for emitting diagnostics.
+   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef LIBDIAGNOSTICSPP_H
+#define LIBDIAGNOSTICSPP_H
+
+#include "libdiagnostics.h"
+
+namespace libdiagnostics {
+
+typedef diagnostic_line_num_t line_num_t;
+typedef diagnostic_column_num_t column_num_t;
+
+class file;
+class physical_location;
+class logical_location;
+class execution_path;
+class group;
+class manager;
+class diagnostic;
+
+/* Wrapper around a borrowed diagnostic_text_sink *.  */
+
+class text_sink
+{
+public:
+  text_sink (diagnostic_text_sink *inner)
+  : m_inner (inner)
+  {
+  }
+
+  void
+  set_source_printing_enabled (int value)
+  {
+diagnostic_text_sink_set_source_printing_enabled (m_inner, value);
+  }
+
+  void
+  set_colorize (enum diagnostic_colorize colorize)
+  {
+diagnostic_text_sink_set_colorize (m_inner, colorize);
+  }
+
+  void
+  set_labelled_source_colorization_enabled (int value)
+  {
+diagnostic_text_sink_set_labelled_source_colorization_enabled (m_inner,
+  value);
+  }
+
+  diagnostic_text_sink *m_inner;
+};
+
+/* Wrapper around a const diagnostic_file *.  */
+
+class file
+{
+public:
+  file () : m_inner (nullptr) {}
+  file (const diagnostic_file *file) : m_inner (file) {}
+  file (const file &other) : m_inner (other.m_inner) {}
+  file &operator= (const file &other) { m_inner = other.m_inner; return *this; 
}
+
+  const diagnostic_file * m_inner;
+};
+
+/* Wrapper around a const diagnostic_physical_location *.  */
+
+class physical_location
+{
+public:
+  physical_location () : m_inner (nullptr) {}
+
+  physical_location (const diagnostic_physical_location *location)
+  : m_inner (location)
+  {}
+
+  const diagnostic_physical_location *m_inner;
+};
+
+/* Wrapper around a const diagnostic_logical_location *.  */
+
+class logical_location
+{
+public:
+  logical_location () : m_inner (nullptr) {}
+
+  logical_location (const diagnostic_logical_location *logical_loc)
+  : m_inner (logical_loc)
+  {}
+
+  const diagnostic_logical_location *m_inner;
+};
+
+/* RAII class around a diagnostic_execution_path *.  */
+
+class execution_path
+{
+public:
+  execution_path () : m_inner (nullptr), m_owned (false) {}
+
+  execution_path (diagnostic_execution_path *path)
+  : m_inner (path), m_owned (true)
+  {}
+
+  execution_path (const diagnostic_execution_path *path)
+  : m_inner (const_cast (path)),
+m_owned (false)
+  {}
+
+  execution_path (const execution_path &other) = delete;
+  execution_path &operator= (const execution_path &other) = delete;
+
+  execution_path (execution_path &&other)
+  : m_inner (other.m_inner),
+m_owned (other.m_owned)
+  {
+other.m_inner = nullptr;
+other.m_owned = false;
+  }
+
+  execution_path &operator= (execution_path &&other)
+  {
+m_inner = other.m_inner;
+m_owned = other.m_owned;
+other.m_inner = nullptr;
+other.m_owned 

[PATCH 4/7] testsuite: move dg-test cleanup code from gcc-dg.exp to its own file

2024-08-15 Thread David Malcolm
I need to use this cleanup logic for the testsuite for libdiagnostics
where it's too awkward to directly use gcc-dg.exp itself.

No functional change intended.

gcc/testsuite/ChangeLog:
* lib/dg-test-cleanup.exp: New file, from material moved from
lib/gcc-dg.exp.
* lib/gcc-dg.exp: Add load_lib of dg-test-cleanup.exp.
(cleanup-after-saved-dg-test): Move to lib/dg-test-cleanup.exp.
(dg-test): Likewise for override.
(initialize_prune_notes): Likewise.

libatomic/ChangeLog:
* testsuite/lib/libatomic.exp: Add
"load_gcc_lib dg-test-cleanup.exp".

libgomp/ChangeLog:
* testsuite/lib/libgomp.exp: Add
"load_gcc_lib dg-test-cleanup.exp".
libitm/ChangeLog:
* testsuite/lib/libitm.exp: Add
"load_gcc_lib dg-test-cleanup.exp".

libphobos/ChangeLog:
* testsuite/lib/libphobos-dg.exp: Add
"load_gcc_lib dg-test-cleanup.exp".

libstdc++-v3/ChangeLog:
* testsuite/lib/libstdc++.exp: Add
"load_gcc_lib dg-test-cleanup.exp".

libvtv/ChangeLog:
* testsuite/lib/libvtv.exp: Add
"load_gcc_lib dg-test-cleanup.exp".

Signed-off-by: David Malcolm 
---
 gcc/testsuite/lib/dg-test-cleanup.exp| 116 +++
 gcc/testsuite/lib/gcc-dg.exp | 102 +---
 libatomic/testsuite/lib/libatomic.exp|   1 +
 libgomp/testsuite/lib/libgomp.exp|   1 +
 libitm/testsuite/lib/libitm.exp  |   1 +
 libphobos/testsuite/lib/libphobos-dg.exp |   1 +
 libstdc++-v3/testsuite/lib/libstdc++.exp |   1 +
 libvtv/testsuite/lib/libvtv.exp  |   1 +
 8 files changed, 123 insertions(+), 101 deletions(-)
 create mode 100644 gcc/testsuite/lib/dg-test-cleanup.exp

diff --git a/gcc/testsuite/lib/dg-test-cleanup.exp 
b/gcc/testsuite/lib/dg-test-cleanup.exp
new file mode 100644
index ..b2b8507a0320
--- /dev/null
+++ b/gcc/testsuite/lib/dg-test-cleanup.exp
@@ -0,0 +1,116 @@
+#   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .
+
+# We need to make sure that additional_* are cleared out after every
+# test.  It is not enough to clear them out *before* the next test run
+# because gcc-target-compile gets run directly from some .exp files
+# (outside of any test).  (Those uses should eventually be eliminated.)
+
+# Because the DG framework doesn't provide a hook that is run at the
+# end of a test, we must replace dg-test with a wrapper.
+
+if { [info procs saved-dg-test] == [list] } {
+rename dg-test saved-dg-test
+
+# Helper function for cleanups that should happen after the call
+# to the real dg-test, whether or not it returns normally, or
+# fails with an error.
+proc cleanup-after-saved-dg-test { } {
+   global additional_files
+   global additional_sources
+   global additional_sources_used
+   global additional_prunes
+   global compiler_conditional_xfail_data
+   global shouldfail
+   global expect_ice
+   global testname_with_flags
+   global set_target_env_var
+   global set_compiler_env_var
+   global saved_compiler_env_var
+   global keep_saved_temps_suffixes
+   global nn_line_numbers_enabled
+   global multiline_expected_outputs
+   global freeform_regexps
+   global save_linenr_varnames
+
+   set additional_files ""
+   set additional_sources ""
+   set additional_sources_used ""
+   set additional_prunes ""
+   set shouldfail 0
+   set expect_ice 0
+   if [info exists set_target_env_var] {
+   unset set_target_env_var
+   }
+   if [info exists set_compiler_env_var] {
+   restore-compiler-env-var
+   unset set_compiler_env_var
+   unset saved_compiler_env_var
+   }
+   if [info exists keep_saved_temps_suffixes] {
+   unset keep_saved_temps_suffixes
+   }
+   unset_timeout_vars
+   if [info exists compiler_conditional_xfail_data] {
+   unset compiler_conditional_xfail_data
+   }
+   if [info exists testname_with_flags] {
+   unset testname_with_flags
+   }
+   set nn_line_numbers_enabled 0
+   set multiline_expected_outputs []
+   set freeform_regexps []
+
+   if { [info exists save_linenr_varnames] } {
+   foreach varname $save_linenr_varnames {
+   

[PATCH 5/7] libdiagnostics v3: test suite

2024-08-15 Thread David Malcolm
Changed in v3:
* split out the C and C++ API tests into this patch
* heavily rewritten libdiagnostics.exp; added support for Python tests
* tests updated for API changes, rewritten and extended

gcc/testsuite/ChangeLog:
* libdiagnostics.dg/libdiagnostics.exp: New, adapted from jit.exp.
* libdiagnostics.dg/sarif.py: New.
* libdiagnostics.dg/test-dump.c: New test.
* libdiagnostics.dg/test-error-c.py: New test.
* libdiagnostics.dg/test-error-with-note-c.py: New test.
* libdiagnostics.dg/test-error-with-note.c: New test.
* libdiagnostics.dg/test-error-with-note.cc: New test.
* libdiagnostics.dg/test-error.c: New test.
* libdiagnostics.dg/test-error.cc: New test.
* libdiagnostics.dg/test-fix-it-hint-c.py: New test.
* libdiagnostics.dg/test-fix-it-hint.c: New test.
* libdiagnostics.dg/test-fix-it-hint.cc: New test.
* libdiagnostics.dg/test-helpers++.h: New test.
* libdiagnostics.dg/test-helpers.h: New test.
* libdiagnostics.dg/test-labelled-ranges.c: New test.
* libdiagnostics.dg/test-labelled-ranges.cc: New test.
* libdiagnostics.dg/test-labelled-ranges.py: New test.
* libdiagnostics.dg/test-logical-location-c.py: New test.
* libdiagnostics.dg/test-logical-location.c: New test.
* libdiagnostics.dg/test-metadata-c.py: New test.
* libdiagnostics.dg/test-metadata.c: New test.
* libdiagnostics.dg/test-multiple-lines-c.py: New test.
* libdiagnostics.dg/test-multiple-lines.c: New test.
* libdiagnostics.dg/test-no-column-c.py: New test.
* libdiagnostics.dg/test-no-column.c: New test.
* libdiagnostics.dg/test-no-diagnostics-c.py: New test.
* libdiagnostics.dg/test-no-diagnostics.c: New test.
* libdiagnostics.dg/test-note-with-fix-it-hint-c.py: New test.
* libdiagnostics.dg/test-note-with-fix-it-hint.c: New test.
* libdiagnostics.dg/test-text-sink-options.c: New test.
* libdiagnostics.dg/test-warning-c.py: New test.
* libdiagnostics.dg/test-warning-with-path-c.py: New test.
* libdiagnostics.dg/test-warning-with-path.c: New test.
* libdiagnostics.dg/test-warning.c: New test.
* libdiagnostics.dg/test-write-sarif-to-file-c.py: New test.
* libdiagnostics.dg/test-write-sarif-to-file.c: New test.
* libdiagnostics.dg/test-write-text-to-file.c: New test.

Signed-off-by: David Malcolm 
---
 .../libdiagnostics.dg/libdiagnostics.exp  | 296 ++
 gcc/testsuite/libdiagnostics.dg/sarif.py  |  23 ++
 gcc/testsuite/libdiagnostics.dg/test-dump.c   |  69 
 .../libdiagnostics.dg/test-error-c.py |  54 
 .../test-error-with-note-c.py |  50 +++
 .../libdiagnostics.dg/test-error-with-note.c  |  74 +
 .../libdiagnostics.dg/test-error-with-note.cc |  55 
 gcc/testsuite/libdiagnostics.dg/test-error.c  |  59 
 gcc/testsuite/libdiagnostics.dg/test-error.cc |  47 +++
 .../libdiagnostics.dg/test-fix-it-hint-c.py   |  46 +++
 .../libdiagnostics.dg/test-fix-it-hint.c  |  81 +
 .../libdiagnostics.dg/test-fix-it-hint.cc |  74 +
 .../libdiagnostics.dg/test-helpers++.h|  28 ++
 .../libdiagnostics.dg/test-helpers.h  |  72 +
 .../libdiagnostics.dg/test-labelled-ranges.c  |  69 
 .../libdiagnostics.dg/test-labelled-ranges.cc |  64 
 .../libdiagnostics.dg/test-labelled-ranges.py |  48 +++
 .../test-logical-location-c.py|  37 +++
 .../libdiagnostics.dg/test-logical-location.c |  79 +
 .../libdiagnostics.dg/test-metadata-c.py  |  45 +++
 .../libdiagnostics.dg/test-metadata.c |  61 
 .../test-multiple-lines-c.py  |  83 +
 .../libdiagnostics.dg/test-multiple-lines.c   |  76 +
 .../libdiagnostics.dg/test-no-column-c.py |  35 +++
 .../libdiagnostics.dg/test-no-column.c|  52 +++
 .../test-no-diagnostics-c.py  |  42 +++
 .../libdiagnostics.dg/test-no-diagnostics.c   |  25 ++
 .../test-note-with-fix-it-hint-c.py   |  54 
 .../test-note-with-fix-it-hint.c  |  69 
 .../test-text-sink-options.c  |  59 
 .../libdiagnostics.dg/test-warning-c.py   |  54 
 .../test-warning-with-path-c.py   | 108 +++
 .../test-warning-with-path.c  | 125 
 .../libdiagnostics.dg/test-warning.c  |  65 
 .../test-write-sarif-to-file-c.py |  55 
 .../test-write-sarif-to-file.c|  55 
 .../test-write-text-to-file.c |  47 +++
 37 files changed, 2435 insertions(+)
 create mode 100644 gcc/testsuite/libdiagnostics.dg/libdiagnostics.exp
 create mode 100644 gcc/testsuite/libdiagnostics.dg/sarif.py
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-dump.c
 create mode 100644 gcc/testsuite/libdiagnostics.dg/test-error-c.py
 create mode 100644 gcc/testsuite/lib

[PATCH 2/7] libdiagnostics v3: implementation

2024-08-15 Thread David Malcolm
Changed in v3:
* Added a --enable-libdiagnostics to configure.ac.  It is disabled
  by default, and requires --enable-host-shared.
* Split out gcc/testsuite/libdiagnostics.dg/libdiagnostics.exp into
  another patch
* Update copyright year
* class diagnostic_logical_location: Add get_name_for_path_output.
* as_diagnostic_event_id: New function.
* on_begin_text_diagnostic: Make "info" const.
* diagnostic_manager::~diagnostic_manager: Free
  m_line_table.m_location_adhoc_data_map.data and
  m_line_table.info_ordinary.maps.
* diagnostic_manager::new_location_from_file_and_line: Move code into
  ensure_linemap_for_file_and_line.
* diagnostic_manager::new_location_from_file_line_column: Use
  ensure_linemap_for_file_and_line rather than always using LC_ENTER.
* class libdiagnostics_path_event: New.
* class libdiagnostics_path_thread: New.
* struct diagnostic_execution_path: New.
* diagnostic::diagnostic: Initialize m_path.
* diagnostic::add_execution_path: New.
* diagnostic::take_execution_path: New.
* diagnostic::m_path: New field.
* diagnostic_text_sink::diagnostic_text_sink: Initialize
  m_current_logical_loc.  Call diagnostic_urls_init, set_show_cwe,
  set_show_rules, and set m_show_column.
* diagnostic_text_sink::on_begin_text_diagnostic: Various fixes.
* sarif_sink::sarif_sink: Add "main_input_file" param.
* diagnostic_manager::write_patch: Update for change to pretty_printer.
* diagnostic_manager::new_execution_path:  New.
* diagnostic_manager_add_sarif_sink: Add "main_input_file" param.
* diagnostic_manager_debug_dump_location: Call diagnostic_finish.
* diagnostic_set_logical_location: Allow logical_loc to be NULL.
* diagnostic_add_execution_path: New.
* diagnostic_manager_new_execution_path: New.
* diagnostic_take_execution_path: New.
* diagnostic_execution_path_release: New.
* diagnostic_execution_path_add_event: New.
* diagnostic_execution_path_add_event_va: New.

Changed in v2:
* Changed diagnostic_location_t -> const diagnostic_physical_location *
* new entrypoint: diagnostic_finish_va
* new debugging entrypoints for dumping to a FILE *
* Makefile.in: dropped FULL_DRIVER_NAME from libdiagnostics
* fix up for my recent changes to gcc/diagnostic.h

Blurb from v1:

Here's a work-in-progress patch for GCC that adds the implementation
of libdiagnostics.  Various aspects of this need work; posting now
for early feedback on overall direction.

ChangeLog:
* configure: Regenerate.
* configure.ac: Add --enable-libdiagnostics.

gcc/ChangeLog:
* Makefile.in (enable_libdiagnostics): New.
(lang_checks): If libdiagnostics is enabled, add
check-libdiagnostics.
(ALL_HOST_OBJS): If libdiagnostics is enabled, add
$(libdiagnostics_OBJS).
(start.encap): Add LIBDIAGNOSTICS.
(libdiagnostics_OBJS): New.
(LIBDIAGNOSTICS_VERSION_NUM): New, adapted from code in
jit/Make-lang.in.
(LIBDIAGNOSTICS_MINOR_NUM): Likewise.
(LIBDIAGNOSTICS_RELEASE_NUM): Likewise.
(LIBDIAGNOSTICS_FILENAME): Likewise.
(LIBDIAGNOSTICS_IMPORT_LIB): Likewise.
(libdiagnostics): Likewise.
(LIBDIAGNOSTICS_AGE): Likewise.
(LIBDIAGNOSTICS_BASENAME): Likewise.
(LIBDIAGNOSTICS_SONAME): Likewise.
(LIBDIAGNOSTICS_LINKER_NAME): Likewise.
(LIBDIAGNOSTICS_COMMA): Likewise.
(LIBDIAGNOSTICS_VERSION_SCRIPT_OPTION): Likewise.
(LIBDIAGNOSTICS_SONAME_OPTION): Likewise.
(LIBDIAGNOSTICS_SONAME_SYMLINK): Likewise.
(LIBDIAGNOSTICS_LINKER_NAME_SYMLINK): Likewise.
(LIBDIAGNOSTICS_FILENAME): Likewise.
(libdiagnostics.serial): Likewise.
(LIBDIAGNOSTICS_EXTRA_OPTS): Likewise.
(install): If libdiagnostics is enabled, add
install-libdiagnostics.
(libdiagnostics.install-headers): New.
(libdiagnostics.install-common): New, adapted from code in
jit/Make-lang.in.
(install-libdiagnostics): New.
* configure: Regenerate.
* configure.ac (check_languages): Add check-libdiagnostics.
(--enable-libdiagnostics): New.
* diagnostic-event-id.h (diagnostic_event_id_t::zero_based): New.
* doc/install.texi (--enable-libdiagnostics): New.
* libdiagnostics.cc: New file.
* libdiagnostics.map: New file.

Signed-off-by: David Malcolm 
---
 configure |   42 +
 configure.ac  |   35 +
 gcc/Makefile.in   |  179 +++-
 gcc/configure |   26 +-
 gcc/configure.ac  |   16 +
 gcc/diagnostic-event-id.h |6 +
 gcc/doc/install.texi  |7 +
 gcc/libdiagnostics.cc | 1652 +
 gcc/libdiagnostics.map|   72 ++
 9 files changed, 2032 insertions(+), 3 deletions(-)
 create mode 100644 gcc/libdiagnostics.cc
 create mode 100644 gcc/libdiagnostics.map

diff --git a/configure b/configure
index 51bf1d1add18..be6e41439990 100755
--- a/configure
+++ b/configure
@@ -691,6 +691,7 @@ extra_host_libiberty_configu

[PATCH 6/7] json: add json parsing support

2024-08-15 Thread David Malcolm
This patch implements JSON parsing support.

It's based on the parsing parts of the patch I posted here:
https://gcc.gnu.org/legacy-ml/gcc-patches/2017-08/msg00417.html
with the parsing moved to a separate source file and header, heavily
rewritten to capture source location information for JSON values, and
to capture errors via a result template.

I also added optional support for C and C++ style comments, which is
extremely useful in DejaGnu tests.

gcc/ChangeLog:
* Makefile.in (OBJS-libcommon): Add json-parsing.o.
* json-parsing.cc: New file.
* json-parsing.h: New file.
* json.cc (selftest::assert_print_eq): Remove "static".
* json.h (json::array::begin): New.
(json::array::end): New.
(json::array::length): New.
(json::array::get): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(is_a_helper ::test): New.
(selftest::assert_print_eq): New decl.
* selftest-run-tests.cc (selftest::run_tests): Call
selftest::json_parser_cc_tests.
* selftest.h (selftest::json_parser_cc_tests): New decl.

Signed-off-by: David Malcolm 
---
 gcc/Makefile.in   |2 +-
 gcc/json-parsing.cc   | 2394 +
 gcc/json-parsing.h|  113 ++
 gcc/json.cc   |2 +-
 gcc/json.h|  122 +-
 gcc/selftest-run-tests.cc |1 +
 gcc/selftest.h|1 +
 7 files changed, 2631 insertions(+), 4 deletions(-)
 create mode 100644 gcc/json-parsing.cc
 create mode 100644 gcc/json-parsing.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 3e4c7bd645f9..64dcaddfdfbe 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1832,7 +1832,7 @@ OBJS-libcommon = diagnostic-spec.o diagnostic.o 
diagnostic-color.o \
diagnostic-show-locus.o \
edit-context.o \
pretty-print.o intl.o \
-   json.o \
+   json.o json-parsing.o \
sbitmap.o \
vec.o input.o hash-table.o ggc-none.o memory-block.o \
selftest.o selftest-diagnostic.o sort.o \
diff --git a/gcc/json-parsing.cc b/gcc/json-parsing.cc
new file mode 100644
index ..78188c4fef9c
--- /dev/null
+++ b/gcc/json-parsing.cc
@@ -0,0 +1,2394 @@
+/* JSON parsing
+   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Contributed by David Malcolm .
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#include "config.h"
+#define INCLUDE_MEMORY
+#include "system.h"
+#include "coretypes.h"
+#include "json-parsing.h"
+#include "pretty-print.h"
+#include "math.h"
+#include "make-unique.h"
+#include "selftest.h"
+
+using namespace json;
+
+/* Declarations relating to parsing JSON, all within an
+   anonymous namespace.  */
+
+namespace {
+
+/* A typedef representing a single unicode character.  */
+
+typedef unsigned unichar;
+
+/* An enum for discriminating different kinds of JSON token.  */
+
+enum token_id
+{
+  TOK_ERROR,
+
+  TOK_EOF,
+
+  /* Punctuation.  */
+  TOK_OPEN_SQUARE,
+  TOK_OPEN_CURLY,
+  TOK_CLOSE_SQUARE,
+  TOK_CLOSE_CURLY,
+  TOK_COLON,
+  TOK_COMMA,
+
+  /* Literal names.  */
+  TOK_TRUE,
+  TOK_FALSE,
+  TOK_NULL,
+
+  TOK_STRING,
+  TOK_FLOAT_NUMBER,
+  TOK_INTEGER_NUMBER
+};
+
+/* Human-readable descriptions of enum token_id.  */
+
+static const char *token_id_name[] = {
+  "error",
+  "EOF",
+  "'['",
+  "'{'",
+  "']'",
+  "'}'",
+  "':'",
+  "','",
+  "'true'",
+  "'false'",
+  "'null'",
+  "string",
+  "number",
+  "number"
+};
+
+/* Tokens within the JSON lexer.  */
+
+struct token
+{
+  /* The kind of token.  */
+  enum token_id id;
+
+  /* The location of this token within the unicode
+ character stream.  */
+  location_map::range range;
+
+  union
+  {
+/* Value for TOK_ERROR and TOK_STRING.  */
+char *string;
+
+/* Value for TOK_FLOAT_NUMBER.  */
+double float_number;
+
+/* Value for TOK_INTEGER_NUMBER.  */
+long integer_number;
+  } u;
+};
+
+/* A class for lexing JSON.  */
+
+class lexer
+{
+ public:
+  lexer (bool support_comments);
+  ~lexer ();
+
+  std::unique_ptr add_utf8 (size_t le

Re: [Fortran, Patch, PR110033, v1] Fix associate for coarrays

2024-08-15 Thread Andre Vehreschild
Hi Harald,

whoopsie, I am sorry for that.

What I don't get is, why this has not been reported during my bootstrap. I am
doing this to bootstrap:

LANG=C "${SRCPATH}/configure" \
 --disable-multilib\
 --enable-languages=c,fortran,c++\
 --prefix="${INSTALLPATH}"
LANG=C make -j ${NOPARALLEL} bootstrap

What is wrong with that?

Er, Jakub, do you do the patch, as you have assigned yourself?

- Andre

On Thu, 15 Aug 2024 19:39:54 +0200
Harald Anlauf  wrote:

> Hi Andre,
> 
> Am 15.08.24 um 17:35 schrieb Andre Vehreschild:
> > Hi Harald, hi Paul,
> >
> > thanks for the ok and the suggestions/recommendations on the testcase. I
> > added that and commit as: gcc-15-2935-gdbf4c574b92  
> 
> I didn't notice this while skimming over the patch, but
> gcc-testresults has:
> 
> ../../src-master/gcc/fortran/resolve.cc: In function ‘bool
> resolve_operator(gfc_expr*)’:
> ../../src-master/gcc/fortran/resolve.cc:4649:22: error: too many
> arguments for format [-Werror=format-extra-args]
>   4649 |   gfc_error ("Inconsistent coranks for operator at %%L
> and %%L",
>|
> ^~
> 
> 
> The format strings should have contained %L's, not %%L.
> 
> A follow-up fix is pre-approved.
> 
> Cheers,
> Harald
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


[pushed] c++: fix up cpp23/consteval-if3.C test [PR115583]

2024-08-15 Thread Patrick Palka
Compiling with optimizations is needed to trigger the bug fixed
by r15-2369.

PR c++/115583

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/consteval-if13.C: Compile with -O.
---
 gcc/testsuite/g++.dg/cpp23/consteval-if13.C | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/g++.dg/cpp23/consteval-if13.C 
b/gcc/testsuite/g++.dg/cpp23/consteval-if13.C
index b98bbc33d13..b10ec18b3c6 100644
--- a/gcc/testsuite/g++.dg/cpp23/consteval-if13.C
+++ b/gcc/testsuite/g++.dg/cpp23/consteval-if13.C
@@ -1,5 +1,6 @@
 // PR c++/115583
 // { dg-do compile { target c++23 } }
+// { dg-additional-options -O }
 
 consteval int f(int i) {
   return i;
-- 
2.46.0.164.g477ce5ccd6



Re: [PATCH v1] Provide new GCC builtin __builtin_get_counted_by [PR116016]

2024-08-15 Thread Kees Cook
On Tue, Aug 13, 2024 at 03:33:26PM +, Qing Zhao wrote:
> With the addition of the 'counted_by' attribute and its wide roll-out
> within the Linux kernel, a use case has been found that would be very
> nice to have for object allocators: being able to set the counted_by
> counter variable without knowing its name.

This tests great for me; thanks! My prototype allocator example I used
for testing is here:
https://github.com/kees/kernel-tools/blob/trunk/fortify/get_counted_by.c

-- 
Kees Cook


Re: [Fortran, Patch, PR110033, v1] Fix associate for coarrays

2024-08-15 Thread Jakub Jelinek
On Thu, Aug 15, 2024 at 08:30:12PM +0200, Andre Vehreschild wrote:
> Hi Harald,
> 
> whoopsie, I am sorry for that.
> 
> What I don't get is, why this has not been reported during my bootstrap. I am
> doing this to bootstrap:
> 
> LANG=C "${SRCPATH}/configure" \
>  --disable-multilib\
>  --enable-languages=c,fortran,c++\
>  --prefix="${INSTALLPATH}"
> LANG=C make -j ${NOPARALLEL} bootstrap
> 
> What is wrong with that?

That should just work and catch it IMHO.

> Er, Jakub, do you do the patch, as you have assigned yourself?

I'm just 40 minutes into bootstrapping/regtesting that patch
on x86_64-linux and i686-linux, usually bootstrap takes ~ 50 minutes
and regtest ~ 65 minutes on the latter and ~ 85 minutes + ~ 70 minutes
on the former, so if you can get it tested faster than that, go ahead and
commit it, if not, I'll commit it when I'm done with testing.
It certainly got past the point of the failed bootstraps already.

Jakub



Re: [Fortran, Patch, PR110033, v1] Fix associate for coarrays

2024-08-15 Thread Andre Vehreschild
Hi Jakub,

I will not be faster by far. I have just started and am still in stage 1. So
please you go ahead.

And thank you very much for the help.

- Andre

On Thu, 15 Aug 2024 20:50:38 +0200
Jakub Jelinek  wrote:

> On Thu, Aug 15, 2024 at 08:30:12PM +0200, Andre Vehreschild wrote:
> > Hi Harald,
> >
> > whoopsie, I am sorry for that.
> >
> > What I don't get is, why this has not been reported during my bootstrap. I
> > am doing this to bootstrap:
> >
> > LANG=C "${SRCPATH}/configure" \
> >  --disable-multilib\
> >  --enable-languages=c,fortran,c++\
> >  --prefix="${INSTALLPATH}"
> > LANG=C make -j ${NOPARALLEL} bootstrap
> >
> > What is wrong with that?
>
> That should just work and catch it IMHO.
>
> > Er, Jakub, do you do the patch, as you have assigned yourself?
>
> I'm just 40 minutes into bootstrapping/regtesting that patch
> on x86_64-linux and i686-linux, usually bootstrap takes ~ 50 minutes
> and regtest ~ 65 minutes on the latter and ~ 85 minutes + ~ 70 minutes
> on the former, so if you can get it tested faster than that, go ahead and
> commit it, if not, I'll commit it when I'm done with testing.
> It certainly got past the point of the failed bootstraps already.
>
>   Jakub
>


--
Andre Vehreschild * Email: vehre ad gmx dot de


[pushed] wwwdocs: news: Avoid (obsolete) link for Blackfin

2024-08-15 Thread Gerald Pfeifer
This has started to redirect and an enormous URL, and being end of life 
for half a decade now...

Pushed.

Gerald
---
 htdocs/news.html | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/htdocs/news.html b/htdocs/news.html
index c8271e0a..cef5ee80 100644
--- a/htdocs/news.html
+++ b/htdocs/news.html
@@ -923,10 +923,8 @@ pass.
 
 April 5, 2005
 
-Analog Devices has contributed a port for the
-Blackfin processor.  See the http://blackfin.uclinux.org/gf/";>Blackfin projects
-page for more information and ports of binutils and gdb.
+Analog Devices has contributed a port for the Blackfin processor
+alongside ports of binutils and gdb.
 
 
 February 06, 2005
-- 
2.46.0


[PATCH] Fortran: fix doumentation of intrinsic RANDOM_INIT [PR114146]

2024-08-15 Thread Harald Anlauf
Dear all,

here's a documentation bugfix.  The previous wording was in conflict
with the standard, while the runtime behavior is apparently fine.

Checked with make dvi pdf .

OK for mainline?

Thanks,
Harald

From 4515018fd858fb6ae98b54d507596ef123d7580e Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Thu, 15 Aug 2024 22:31:11 +0200
Subject: [PATCH] Fortran: fix doumentation of intrinsic RANDOM_INIT [PR114146]

gcc/fortran/ChangeLog:

	* intrinsic.texi: Fix documentation of arguments of RANDOM_INIT,
	which is conforming to the F2018 standard.
---
 gcc/fortran/intrinsic.texi | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi
index 3d3b9edf8e6..10683e1185d 100644
--- a/gcc/fortran/intrinsic.texi
+++ b/gcc/fortran/intrinsic.texi
@@ -11928,15 +11928,15 @@ Subroutine
 and it is @code{INTENT(IN)}.  If it is @code{.true.}, the seed is set to
 a processor-dependent value that is the same each time @code{RANDOM_INIT}
 is called from the same image.  The term ``same image'' means a single
-instance of program execution.  The sequence of random numbers is different
-for repeated execution of the program.  If it is @code{.false.}, the seed
-is set to a processor-dependent value.
+instance of program execution.  The sequence of random numbers is the same
+for repeated execution of the program with the same execution environment.
+If it is @code{.false.}, the seed is set to a processor-dependent value.
 @item @var{IMAGE_DISTINCT} @tab Shall be a scalar with a
 @code{LOGICAL} type, and it is @code{INTENT(IN)}.  If it is @code{.true.},
-the seed is set to a processor-dependent value that is distinct from th
+the seed is set to a processor-dependent value that is distinct from the
 seed set by a call to @code{RANDOM_INIT} in another image.  If it is
-@code{.false.}, the seed is set to a value that does depend which image called
-@code{RANDOM_INIT}.
+@code{.false.}, the seed is set to a value that is the same on every image
+calling @code{RANDOM_INIT}.
 @end multitable

 @item @emph{Example}:
--
2.35.3



[committed] fortran: Fix bootstrap in resolve.cc [PR116387]

2024-08-15 Thread Jakub Jelinek
Hi!

On Thu, Aug 15, 2024 at 08:50:38PM +0200, Jakub Jelinek wrote:
> > whoopsie, I am sorry for that.
> > 
> > What I don't get is, why this has not been reported during my bootstrap. I 
> > am
> > doing this to bootstrap:
> > 
> > LANG=C "${SRCPATH}/configure" \
> >  --disable-multilib\
> >  --enable-languages=c,fortran,c++\
> >  --prefix="${INSTALLPATH}"
> > LANG=C make -j ${NOPARALLEL} bootstrap
> > 
> > What is wrong with that?
> 
> That should just work and catch it IMHO.
> 
> > Er, Jakub, do you do the patch, as you have assigned yourself?
> 
> I'm just 40 minutes into bootstrapping/regtesting that patch
> on x86_64-linux and i686-linux, usually bootstrap takes ~ 50 minutes
> and regtest ~ 65 minutes on the latter and ~ 85 minutes + ~ 70 minutes
> on the former, so if you can get it tested faster than that, go ahead and
> commit it, if not, I'll commit it when I'm done with testing.
> It certainly got past the point of the failed bootstraps already.

Here is what I've committed after successful x86_64-linux and i686-linux
bootstraps and regtests:

2024-08-15  Jakub Jelinek  

PR bootstrap/116387
* resolve.cc (resolve_operator): Use %L rather than %%L in format
string.

--- gcc/fortran/resolve.cc.jj   2024-08-15 19:14:25.700837372 +0200
+++ gcc/fortran/resolve.cc  2024-08-15 19:58:04.512851806 +0200
@@ -4646,7 +4646,7 @@ resolve_operator (gfc_expr *e)
}
   else
{
- gfc_error ("Inconsistent coranks for operator at %%L and %%L",
+ gfc_error ("Inconsistent coranks for operator at %L and %L",
 &op1->where, &op2->where);
  return false;
}


Jakub



[PATCH] c++: Appertain standard attributes after array closing square bracket to array type rather than declarator [PR110345]

2024-08-15 Thread Jakub Jelinek
Hi!

For C++ 26 P2552R3 I went through all the spots (except modules) where
attribute-specifier-seq appears in the grammar and tried to construct
a testcase in all those spots, for now for [[deprecated]] attribute.

This is the second issue I found.  The comment already correctly says that
attributes after closing ] appertain to the array type, but we were
appending them to returned_attrs, so effectively applying them to the
declarator (as if they appeared right after declarator-id).

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2024-08-15  Jakub Jelinek  

PR c++/110345
* decl.cc (grokdeclarator): Apply declarator->std_attributes
for cdk_array to type, rather than chaining it to returned_attrs.

* g++.dg/cpp0x/gen-attrs-82.C: New test.
* g++.dg/gomp/attrs-3.C (foo): Expect different diagnostics for
omp::directive attribute after closing square bracket of an automatic
declaration and add a test with the attribute after array's
declarator-id.

--- gcc/cp/decl.cc.jj   2024-08-14 18:19:28.0 +0200
+++ gcc/cp/decl.cc  2024-08-15 17:06:22.936426690 +0200
@@ -13317,9 +13317,8 @@ grokdeclarator (const cp_declarator *dec
/* [dcl.array]/1:
 
   The optional attribute-specifier-seq appertains to the
-  array.  */
-   returned_attrs = attr_chainon (returned_attrs,
-  declarator->std_attributes);
+  array type.  */
+   decl_attributes (&type, declarator->std_attributes, 0);
  break;
 
case cdk_function:
--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-82.C.jj2024-08-15 
17:15:47.982435384 +0200
+++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-82.C   2024-08-15 17:16:17.112074964 
+0200
@@ -0,0 +1,4 @@
+// { dg-do compile { target c++11 } }
+
+int a [[gnu::common]] [2];
+int b[2] [[gnu::common]];  // { dg-warning "'common' attribute does not 
apply to types" }
--- gcc/testsuite/g++.dg/gomp/attrs-3.C.jj  2023-09-18 10:37:49.710048827 
+0200
+++ gcc/testsuite/g++.dg/gomp/attrs-3.C 2024-08-15 22:33:44.570617103 +0200
@@ -35,6 +35,7 @@ foo ()
   int *[[omp::directive (threadprivate (t3))]] c;  // { dg-warning 
"'omp::directive' scoped attribute directive ignored" }
   int &[[omp::directive (threadprivate (t4))]] d = b;  // { dg-warning 
"'omp::directive' scoped attribute directive ignored" }
   typedef int T [[omp::directive (threadprivate (t5))]];   // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
-  int e[10] [[omp::directive (threadprivate (t6))]];   // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
+  int e [[omp::directive (threadprivate (t6))]] [10];  // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
+  int f[10] [[omp::directive (threadprivate (t6))]];   // { dg-warning 
"'omp::directive' scoped attribute directive ignored" }
   struct [[omp::directive (threadprivate (t7))]] S {}; // { dg-error 
"'omp::directive' not allowed to be specified in this context" }
 }


Jakub



[PATCH] c++: Parse and ignore attributes on base specifiers [PR110345]

2024-08-15 Thread Jakub Jelinek
Hi!

For C++ 26 P2552R3 I went through all the spots (except modules) where
attribute-specifier-seq appears in the grammar and tried to construct
a testcase in all those spots, for now for [[deprecated]] attribute.

This is the third issue I found.

https://eel.is/c++draft/class.derived#general-1 has attribute-specifier-seq
at the start of base-specifier.  The following patch parses it there and
warns about those.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-08-15  Jakub Jelinek  

PR c++/110345
* parser.cc (cp_parser_base_specifier): Parse standard attributes
at the start and emit a warning if there are any non-ignored ones.

* g++.dg/cpp0x/gen-attrs-83.C: New test.

--- gcc/cp/parser.cc.jj 2024-08-15 17:41:44.554159692 +0200
+++ gcc/cp/parser.cc2024-08-15 18:00:33.202211372 +0200
@@ -28987,11 +28987,12 @@ cp_parser_base_clause (cp_parser* parser
 /* Parse a base-specifier.
 
base-specifier:
- :: [opt] nested-name-specifier [opt] class-name
- virtual access-specifier [opt] :: [opt] nested-name-specifier
-   [opt] class-name
- access-specifier virtual [opt] :: [opt] nested-name-specifier
-   [opt] class-name
+ attribute-specifier-seq [opt] :: [opt] nested-name-specifier [opt]
+   class-name
+ attribute-specifier-seq [opt] virtual access-specifier [opt] :: [opt]
+   nested-name-specifier [opt] class-name
+ attribute-specifier-seq [opt] access-specifier virtual [opt] :: [opt]
+   nested-name-specifier [opt] class-name
 
Returns a TREE_LIST.  The TREE_PURPOSE will be one of
ACCESS_{DEFAULT,PUBLIC,PROTECTED,PRIVATE}_[VIRTUAL]_NODE to
@@ -29009,6 +29010,12 @@ cp_parser_base_specifier (cp_parser* par
   bool class_scope_p, template_p;
   tree access = access_default_node;
   tree type;
+  location_t attrs_loc = cp_lexer_peek_token (parser->lexer)->location;
+  tree std_attrs = cp_parser_std_attribute_spec_seq (parser);
+
+  if (std_attrs != NULL_TREE && any_nonignored_attribute_p (std_attrs))
+warning_at (attrs_loc, OPT_Wattributes,
+"attributes on base specifiers are ignored");
 
   /* Process the optional `virtual' and `access-specifier'.  */
   while (!done)
--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-83.C.jj2024-08-15 
17:58:52.093456428 +0200
+++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-83.C   2024-08-15 18:27:23.864431390 
+0200
@@ -0,0 +1,10 @@
+// { dg-do compile { target c++11 } }
+
+struct A {};
+struct B {};
+struct C {};
+struct D : [[]] [[]] A,
+  [[]] virtual public B, [[]] [[]] [[]] public virtual C {};
+struct E : [[gnu::deprecated]] A,  // { dg-warning 
"attributes on base specifiers are ignored" }
+  [[gnu::deprecated]] virtual public B,// { dg-warning 
"attributes on base specifiers are ignored" }
+  [[gnu::deprecated]] public virtual C {}; // { dg-warning 
"attributes on base specifiers are ignored" }

Jakub



[PATCH] c++: Allow standard attributes after closing square bracket in new-type-id [PR110345]

2024-08-15 Thread Jakub Jelinek
Hi!

For C++ 26 P2552R3 I went through all the spots (except modules) where
attribute-specifier-seq appears in the grammar and tried to construct
a testcase in all those spots, for now for [[deprecated]] attribute.

The first thing I found is that we aren't parsing standard attributes in
noptr-new-declarator - https://eel.is/c++draft/expr.new#1

The following patch parses it there, for the non-outermost arrays
applies normally the attributes to the array type, for the outermost
where we just set *nelts and don't really build an array type just
warns that we ignore those attributes (or, do you think we should
just build an array type in that case and just take its element type?).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-08-15  Jakub Jelinek  

PR c++/110345
* parser.cc (make_array_declarator): Add STD_ATTRS argument, set
declarator->std_attributes to it.
(cp_parser_new_type_id): Warn on non-ignored std_attributes on the
array declarator which is being omitted.
(cp_parser_direct_new_declarator): Parse standard attributes after
closing square bracket, pass it to make_array_declarator.
(cp_parser_direct_declarator): Pass std_attrs to make_array_declarator
instead of setting declarator->std_attributes manually.

* g++.dg/cpp0x/gen-attrs-80.C: New test.
* g++.dg/cpp0x/gen-attrs-81.C: New test.

--- gcc/cp/parser.cc.jj 2024-08-15 14:56:29.007757215 +0200
+++ gcc/cp/parser.cc2024-08-15 16:28:18.607692746 +0200
@@ -1689,7 +1689,7 @@ static cp_declarator *make_call_declarat
   (cp_declarator *, tree, cp_cv_quals, cp_virt_specifiers, cp_ref_qualifier,
tree, tree, tree, tree, tree, location_t);
 static cp_declarator *make_array_declarator
-  (cp_declarator *, tree);
+  (cp_declarator *, tree, tree);
 static cp_declarator *make_pointer_declarator
   (cp_cv_quals, cp_declarator *, tree);
 static cp_declarator *make_reference_declarator
@@ -1904,10 +1904,11 @@ make_call_declarator (cp_declarator *tar
 }
 
 /* Make a declarator for an array of BOUNDS elements, each of which is
-   defined by ELEMENT.  */
+   defined by ELEMENT.  STD_ATTRS contains attributes that appertain to
+   the array type.  */
 
 cp_declarator *
-make_array_declarator (cp_declarator *element, tree bounds)
+make_array_declarator (cp_declarator *element, tree bounds, tree std_attrs)
 {
   cp_declarator *declarator;
 
@@ -1923,6 +1924,8 @@ make_array_declarator (cp_declarator *el
   else
 declarator->parameter_pack_p = false;
 
+  declarator->std_attributes = std_attrs;
+
   return declarator;
 }
 
@@ -9784,6 +9787,12 @@ cp_parser_new_type_id (cp_parser* parser
   if (*nelts == error_mark_node)
*nelts = integer_one_node;
 
+  if (*nelts
+ && declarator->std_attributes
+ && any_nonignored_attribute_p (declarator->std_attributes))
+   warning (OPT_Wattributes, "attributes ignored on outermost array "
+ "type in new expression");
+
   if (*nelts == NULL_TREE)
/* Leave [] in the declarator.  */;
   else if (outer_declarator)
@@ -9838,8 +9847,8 @@ cp_parser_new_declarator_opt (cp_parser*
 /* Parse a direct-new-declarator.
 
direct-new-declarator:
- [ expression ]
- direct-new-declarator [constant-expression]
+ [ expression ] attribute-specifier-seq [opt]
+ direct-new-declarator [constant-expression] attribute-specifier-seq [opt]
 
*/
 
@@ -9886,8 +9895,9 @@ cp_parser_direct_new_declarator (cp_pars
   /* Look for the closing `]'.  */
   cp_parser_require (parser, CPP_CLOSE_SQUARE, RT_CLOSE_SQUARE);
 
+  tree attrs = cp_parser_std_attribute_spec_seq (parser);
   /* Add this bound to the declarator.  */
-  declarator = make_array_declarator (declarator, expression);
+  declarator = make_array_declarator (declarator, expression, attrs);
 
   /* If the next token is not a `[', then there are no more
 bounds.  */
@@ -24330,8 +24340,7 @@ cp_parser_direct_declarator (cp_parser*
}
 
  attrs = cp_parser_std_attribute_spec_seq (parser);
- declarator = make_array_declarator (declarator, bounds);
- declarator->std_attributes = attrs;
+ declarator = make_array_declarator (declarator, bounds, attrs);
}
   else if (first && dcl_kind != CP_PARSER_DECLARATOR_ABSTRACT)
{
--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-80.C.jj2024-08-15 
15:27:00.297061501 +0200
+++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-80.C   2024-08-15 15:27:19.610822213 
+0200
@@ -0,0 +1,10 @@
+// { dg-do compile { target c++11 } }
+
+void
+foo (int n)
+{
+  auto a = new int [n] [[]];
+  auto b = new int [n] [[]] [42] [[]] [1] [[]];
+  delete[] b;
+  delete[] a;
+}
--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-81.C.jj2024-08-15 
17:14:43.912228130 +0200
+++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-81.C   2024-08-15 17:19:01.738038053 
+0200
@@ -0,0 +1,11 @@
+// { dg-do co

[PATCH] c++: Pedwarn on [[]]; at class scope [PR110345]

2024-08-15 Thread Jakub Jelinek
Hi!

For C++ 26 P2552R3 I went through all the spots (except modules) where
attribute-specifier-seq appears in the grammar and tried to construct
a testcase in all those spots, for now for [[deprecated]] attribute.

The fourth issue is that we just emit (when enabled) -Wextra-semi warning
not just for lone semicolon at class scope (correct), but also for
[[]]; or [[whatever]]; there too.
While just semicolon is valid in C++11 and newer,
https://eel.is/c++draft/class.mem#nt:member-declaration
allows empty-declaration, unlike namespace scope or block scope
something like attribute-declaration or empty statement with attributes
applied for it aren't supported.
While syntactically it matches
attribute-specifier-seq [opt] decl-specifier-seq [opt] member-declarator-list 
[opt] ;
with the latter two omitted, there is
https://eel.is/c++draft/class.mem#general-3
which says that is not valid.

So, the following patch emits a pedwarn in that case.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-08-15  Jakub Jelinek  

PR c++/110345
* parser.cc (cp_parser_member_declaration): Call maybe_warn_extra_semi
only if it is empty-declaration, if there are some tokens like
attribute, pedwarn that the declaration doesn't declare anything.

* g++.dg/cpp0x/gen-attrs-84.C: New test.

--- gcc/cp/parser.cc.jj 2024-08-15 18:00:33.202211372 +0200
+++ gcc/cp/parser.cc2024-08-15 18:56:12.254139651 +0200
@@ -28268,7 +28268,11 @@ cp_parser_member_declaration (cp_parser*
   if (!decl_specifiers.any_specifiers_p)
{
  cp_token *token = cp_lexer_peek_token (parser->lexer);
- maybe_warn_extra_semi (token->location, extra_semi_kind::member);
+ if (decl_spec_token_start == token)
+   maybe_warn_extra_semi (token->location, extra_semi_kind::member);
+ else
+   pedwarn (decl_spec_token_start->location, OPT_Wpedantic,
+"declaration does not declare anything");
}
   else
{
--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-84.C.jj2024-08-15 
19:00:02.540314537 +0200
+++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-84.C   2024-08-15 19:05:36.602231553 
+0200
@@ -0,0 +1,8 @@
+// { dg-do compile { target c++11 } }
+
+struct A {
+  [[]];// { dg-error "declaration does not declare 
anything" }
+};
+struct B {
+  [[gnu::deprecated]]; // { dg-error "declaration does not declare anything" }
+};


Jakub



[COMMITTED] RISC-V: use fclass insns to implement isfinite, isnormal and isinf builtins

2024-08-15 Thread Vineet Gupta
Currently these builtins use float compare instructions which require
FP flags to be saved/restored which could be costly in uarch.
RV Base ISA already has FCLASS.{d,s,h} instruction to compare/identify FP
values w/o disturbing FP exception flags.

Now that upstream supports the corresponding optabs, wire them up in the
backend.

gcc/ChangeLog:
* config/riscv/riscv.md: define_insn for fclass insn.
define_expand for isfinite, isnormal, isinf.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/fclass.c: New tests.

Tested-by: Edwin Lu  # pre-commit-CI #2060
Signed-off-by: Vineet Gupta 
---
 gcc/config/riscv/riscv.md   | 63 +
 gcc/testsuite/gcc.target/riscv/fclass.c | 38 +++
 2 files changed, 101 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/fclass.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 5e3ef789e42e..f8d8162c0f91 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -68,6 +68,7 @@
   UNSPEC_FMAX
   UNSPEC_FMINM
   UNSPEC_FMAXM
+  UNSPEC_FCLASS
 
   ;; Stack tie
   UNSPEC_TIE
@@ -3478,6 +3479,68 @@
(set_attr "mode" "")
(set (attr "length") (const_int 16))])
 
+;; fclass instruction output bitmap
+;;   0 negative infinity
+;;   1 negative normal number.
+;;   2 negative subnormal number.
+;;   3 -0
+;;   4 +0
+;;   5 positive subnormal number.
+;;   6 positive normal number.
+;;   7 positive infinity
+;;   8 signaling NaN.
+;;   9 quiet NaN
+
+(define_insn "fclass"
+  [(set (match_operand:X0 "register_operand" "=r")
+   (unspec [(match_operand:ANYF 1 "register_operand" " f")]
+  UNSPEC_FCLASS))]
+  "TARGET_HARD_FLOAT"
+  "fclass.\t%0,%1";
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "")])
+
+;; Implements optab for isfinite, isnormal, isinf
+
+(define_int_iterator FCLASS_MASK [126 66 129])
+(define_int_attr fclass_optab
+  [(126"isfinite")
+   (66 "isnormal")
+   (129"isinf")])
+
+(define_expand "2"
+  [(match_operand  0 "register_operand" "=r")
+   (match_operand:ANYF 1 "register_operand" " f")
+   (const_int FCLASS_MASK)]
+  "TARGET_HARD_FLOAT"
+{
+  if (GET_MODE (operands[0]) != SImode
+  && GET_MODE (operands[0]) != word_mode)
+FAIL;
+
+  rtx t = gen_reg_rtx (word_mode);
+  rtx t_op0 = gen_reg_rtx (word_mode);
+
+  if (TARGET_64BIT)
+emit_insn (gen_fclassdi (t, operands[1]));
+  else
+emit_insn (gen_fclasssi (t, operands[1]));
+
+  riscv_emit_binary (AND, t, t, GEN_INT ());
+  rtx cmp = gen_rtx_NE (word_mode, t, const0_rtx);
+  emit_insn (gen_cstore4 (t_op0, cmp, t, const0_rtx));
+
+  if (TARGET_64BIT)
+{
+  t_op0 = gen_lowpart (SImode, t_op0);
+  SUBREG_PROMOTED_VAR_P (t_op0) = 1;
+  SUBREG_PROMOTED_SET (t_op0, SRP_SIGNED);
+}
+
+  emit_move_insn (operands[0], t_op0);
+  DONE;
+})
+
 (define_insn "*seq_zero_"
   [(set (match_operand:GPR   0 "register_operand" "=r")
(eq:GPR (match_operand:X 1 "register_operand" " r")
diff --git a/gcc/testsuite/gcc.target/riscv/fclass.c 
b/gcc/testsuite/gcc.target/riscv/fclass.c
new file mode 100644
index ..ea0f173ecf4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/fclass.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-march=rv64gc -mabi=lp64d  -ftrapping-math" { target { rv64 } 
} } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -ftrapping-math" { target { rv32 } 
} } */
+
+int d_isfinite(double a)
+{
+  return __builtin_isfinite(a);
+}
+
+int d_isnormal(double a)
+{
+  return __builtin_isnormal(a);
+}
+
+int d_isinf(double a)
+{
+  return __builtin_isinf(a);
+}
+
+int f_isfinite(float a)
+{
+  return __builtin_isfinite(a);
+}
+
+int f_isnormal(float a)
+{
+  return __builtin_isnormal(a);
+}
+
+int f_isinf(float a)
+{
+  return __builtin_isinf(a);
+}
+
+/* { dg-final { scan-assembler-not   {\mfrflags}  } } */
+/* { dg-final { scan-assembler-not   {\mfsflags}  } } */
+/* { dg-final { scan-assembler-times {\tfclass} 6 } } */
-- 
2.43.0



[RFC PATCH] c++: Disallow [[deprecated]] on types other than class/enum definitions [PR110345]

2024-08-15 Thread Jakub Jelinek
Hi!

For C++ 26 P2552R3 I went through all the spots (except modules) where
attribute-specifier-seq appears in the grammar and tried to construct
a testcase in all those spots, for now for [[deprecated]] attribute.

The patch below contains that testcase.  One needed change for this
particular attribute was that currently we handle [[deprecated]]
exactly the same as [[gnu::deprecated]], but for the latter unlike C++14
or later we allow it also on almost all types, while the standard
is strict and allows it only on
https://eel.is/c++draft/dcl.attr#deprecated-2
The attribute may be applied to the declaration of a class, a typedef-name,
a variable, a non-static data member, a function, a namespace,
an enumeration, an enumerator, a concept, or a template specialization.

The following patch just adds a pedwarn for the cases that gnu::deprecated
allows but C++14 disallows, so integral/floating/boolean types,
pointers/references, array types, function types etc.
Basically, for TYPE_P, if the attribute is applied in place (which means
the struct/union/class/enum definition), it is allowed, otherwise pedwarned.

The testcase still contains some FIXMEs I'd like to discuss.

I've tried to compile it also with latest clang and there is agreement in
most of the diagnostics, just at block scope (inside of foo) it doesn't
diagnose
  auto e = new int [n] [[deprecated]];
  auto e2 = new int [n] [[deprecated]] [42];
  [[deprecated]] lab:;
and at namespace scope
[[deprecated]];
I think that all feels like clang++ bug.
On the other side, clang++ diagnoses
enum B { B0 } [[deprecated]];
but GCC with all the patches I've posted today doesn't, is that a GCC bug?
We diagnose struct A { } [[deprecated]]; ...

The FIXMEs are where there is agreement with clang++, but I'm not sure.
One thing is I'm not sure if "a variable" above is meant to include function
parameters, and/or unused function parameters without a specified name,
function parameters inside of a function declaration rather than definition
and/or static data members.

Also unsure about
  [[deprecated]] int : 0;
at class scope, that isn't a non-static data member...

Thoughts on all of these FIXMEs?

I guess to mark the paper as implemented (or what has been already voted
into C++23 earlier) we'll need to add similar testcase for all the other
standard attributes and make sure we check what the attributes can appertain
to and what they can't.

Bootstrapped/regtested on x86_64-linux and i686-linux.

2024-08-15  Jakub Jelinek  

PR c++/110345
* parser.cc (cp_parser_std_attribute): Don't transform
[[deprecated]] into [[gnu::deprecated]].
* tree.cc (handle_std_deprecated_attribute): New function.
(std_attributes): Add deprecated entry.

* g++.dg/cpp0x/attr-deprecated1.C: New test.

--- gcc/cp/parser.cc.jj 2024-08-15 18:56:12.254139651 +0200
+++ gcc/cp/parser.cc2024-08-15 19:07:28.821875374 +0200
@@ -30340,12 +30340,11 @@ cp_parser_std_attribute (cp_parser *pars
 
   /* We used to treat C++11 noreturn attribute as equivalent to GNU's,
 but no longer: we have to be able to tell [[noreturn]] and
-__attribute__((noreturn)) apart.  */
-  /* C++14 deprecated attribute is equivalent to GNU's.  */
-  if (is_attribute_p ("deprecated", attr_id))
-   TREE_PURPOSE (TREE_PURPOSE (attribute)) = gnu_identifier;
+__attribute__((noreturn)) apart.
+Similarly for C++14 deprecated attribute, we need to emit extra
+diagnostics for [[deprecated]] compared to [[gnu::deprecated]].  */
   /* C++17 fallthrough attribute is equivalent to GNU's.  */
-  else if (is_attribute_p ("fallthrough", attr_id))
+  if (is_attribute_p ("fallthrough", attr_id))
TREE_PURPOSE (TREE_PURPOSE (attribute)) = gnu_identifier;
   /* C++23 assume attribute is equivalent to GNU's.  */
   else if (is_attribute_p ("assume", attr_id))
--- gcc/cp/tree.cc.jj   2024-08-15 17:36:40.109928397 +0200
+++ gcc/cp/tree.cc  2024-08-15 19:07:28.815875447 +0200
@@ -5087,6 +5087,22 @@ handle_likeliness_attribute (tree *node,
 return error_mark_node;
 }
 
+/* The C++14 [[deprecated]] attribute mostly maps to the GNU deprecated
+   attribute.  */
+
+static tree
+handle_std_deprecated_attribute (tree *node, tree name, tree args, int flags,
+bool *no_add_attrs)
+{
+  tree t = *node;
+  tree ret = handle_deprecated_attribute (node, name, args, flags,
+ no_add_attrs);
+  if (TYPE_P (*node) && t != *node)
+pedwarn (input_location, OPT_Wattributes,
+"%qE on a type other than class or enumeration definition", name);
+  return ret;
+}
+
 /* Table of valid C++ attributes.  */
 static const attribute_spec cxx_gnu_attributes[] =
 {
@@ -5110,6 +5126,8 @@ static const attribute_spec std_attribut
 {
   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
affects_type_identity, handler, exclude } */
+  { "deprecate

Re: [PATCH] libcpp, c-family, v3: Add (dumb) C23 N3017 #embed support [PR105863]

2024-08-15 Thread Joseph Myers
On Thu, 15 Aug 2024, Jakub Jelinek wrote:

> +  D(embed,   T_EMBED,STDC23,INCL | EXPAND)   \

I'd like to query the macro expansion handling.  My understanding is that 
a #embed directive only gets macro-expanded if it fails to match either of 
the forms

  # embed < h-char-sequence > embed-parameter-sequenceopt new-line
  # embed " q-char-sequence " embed-parameter-sequenceopt new-line

and while the argument of a limit parameter gets processed (including 
macro expansion) like in a #if condition, that processing doesn't apply to 
the other standard parameters.  (Maybe it *should* apply to prefix / 
suffix / if_empty so the pp-tokens in their arguments can include macros 
without needing to e.g. use a macro for the name of the file to embed as 
well, but that doesn't seem to be what the current wording says.  There 
was some discussion of this on the reflector last month, ending with 
.)

Apart from any consequences for arguments of prefix/suffix/is_empty (where 
there is a plausible argument that the argument should get expanded at 
some point and that the current wording is undesirable for usability), 
this would also mean that e.g.

#define LIMIT limit
#embed "file" LIMIT(1)

isn't valid because LIMIT doesn't get expanded (the syntax for 
non-expanded #embed is met, with an unknown parameter LIMIT), while

#define limit !
#embed "file" limit(1)

*is* valid, because limit doesn't get expanded (which may be convenient 
for usability - it means headers don't need to use __limit__ if using 
#embed, even if files including the header might have defined limit as a 
macro).

> +  if (CPP_PEDANTIC (pfile))
> +{
> +  if (CPP_OPTION (pfile, cplusplus))
> + cpp_error (pfile, CPP_DL_PEDWARN,
> +"#%s is a GCC extension", "embed");
> +  else if (!CPP_OPTION (pfile, warning_directive))
> + cpp_error (pfile, CPP_DL_PEDWARN,
> +"#%s before C23 is a GCC extension", "embed");

I don't think warning_directive directive should be used here as the 
condition for diagnosing #embed as an extension; adding a separate 
embed_directive would be better.  (Especially if a future C++ version ends 
up adding #embed; you could then use embed_directive as a condition for 
the pedwarn for both C and C++, whereas warning_directive wouldn't work as 
a condition for C++ since #warning is already in C++23.)

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH] libcpp, c-family, v3: Add (dumb) C23 N3017 #embed support [PR105863]

2024-08-15 Thread Jakub Jelinek
On Thu, Aug 15, 2024 at 10:07:00PM +, Joseph Myers wrote:
> On Thu, 15 Aug 2024, Jakub Jelinek wrote:
> 
> > +  D(embed, T_EMBED,STDC23,INCL | EXPAND)   \
> 
> I'd like to query the macro expansion handling.  My understanding is that 
> a #embed directive only gets macro-expanded if it fails to match either of 
> the forms
> 
>   # embed < h-char-sequence > embed-parameter-sequenceopt new-line
>   # embed " q-char-sequence " embed-parameter-sequenceopt new-line
> 
> and while the argument of a limit parameter gets processed (including 
> macro expansion) like in a #if condition, that processing doesn't apply to 
> the other standard parameters.  (Maybe it *should* apply to prefix / 
> suffix / if_empty so the pp-tokens in their arguments can include macros 
> without needing to e.g. use a macro for the name of the file to embed as 
> well, but that doesn't seem to be what the current wording says.  There 
> was some discussion of this on the reflector last month, ending with 
> .)
> 
> Apart from any consequences for arguments of prefix/suffix/is_empty (where 
> there is a plausible argument that the argument should get expanded at 
> some point and that the current wording is undesirable for usability), 
> this would also mean that e.g.
> 
> #define LIMIT limit
> #embed "file" LIMIT(1)
> 
> isn't valid because LIMIT doesn't get expanded (the syntax for 
> non-expanded #embed is met, with an unknown parameter LIMIT), while
> 
> #define limit !
> #embed "file" limit(1)
> 
> *is* valid, because limit doesn't get expanded (which may be convenient 
> for usability - it means headers don't need to use __limit__ if using 
> #embed, even if files including the header might have defined limit as a 
> macro).

Is there an agreement on that?
Because at least checking on godbolt, none of the existing implementations
work that way.

#define FILE "/etc/passwd"
#define LIMIT limit(1)
#define THIS , 1, 2, 3
#define PRE prefix (42,
ONE
#embed FILE LIMIT suffix(THIS) PRE )
TWO
#embed "/etc/passwd" LIMIT suffix(THIS) PRE )
THREE
#define limit prefix
#embed "/etc/passwd" limit (4) suffix (THIS)

is expanded to something like
ONE 42,110 , 1, 2, 3 TWO 42,110 , 1, 2, 3 THREE 4 47, ... 104 , 1, 2, 3
(... just not to show all of godbolt's /etc/passwd)
by JeanHeyd's branch which was used as a prior art for the paper
and on the clang trunk.

My reading of it wasn't that whether it is
# embed < h-char-sequence > embed-parameter-sequence[opt] new-line
or
# embed < h-char-sequence > embed-parameter-sequence[opt] new-line
or
# embed pp-tokens new-line
depends solely on the filename part in there, but also whether
embed-parameter-sequence is syntactically valid (if specified).

Though, guess I'm still not implementing it like that,
(and none of the existing implementations do), because
the last #embed in that case is treated by all 3 as
prefix (4) suffix (, 1, 2, 3) even when it would be valid
as limit (4) suffix (THIS) too.
In that reading, it would be pretty hard to implement, one would need to
check twice, first with disabled expansion read the tokens and see if
it matches that way the syntax of embed-parameter-sequence (but guess
LIMIT suffix(THIS)
would be e.g. valid but the PRE ) in there is not) and if invalid repeat
with the expansion enabled.

Also, in either case (if it is determined solely based on the header
tokens or on valid embed-parameter-sequence), what should happen if
the tokens e.g. in limit clause argument is without expansion valid
balanced token sequence but with expansion is not and it e.g. provides
) in there?
I mean
#define ARG 0) prefix (
#embed "file" limit (ARG)

Anyway, if there is no preprocessing of the embed-parameter-sequence tokens,
then it would make less sense that there are the limit vs. __limit__ etc.
variants (though sure, if it is the pp-tokens case it would still matter).

In any case, I'd hope that prefix/suffix/if_empty argument is macro expanded
at some point, because otherwise it will be a serious usability problem
for users.

> > +  if (CPP_PEDANTIC (pfile))
> > +{
> > +  if (CPP_OPTION (pfile, cplusplus))
> > +   cpp_error (pfile, CPP_DL_PEDWARN,
> > +  "#%s is a GCC extension", "embed");
> > +  else if (!CPP_OPTION (pfile, warning_directive))
> > +   cpp_error (pfile, CPP_DL_PEDWARN,
> > +  "#%s before C23 is a GCC extension", "embed");
> 
> I don't think warning_directive directive should be used here as the 
> condition for diagnosing #embed as an extension; adding a separate 
> embed_directive would be better.  (Especially if a future C++ version ends 
> up adding #embed; you could then use embed_directive as a condition for 
> the pedwarn for both C and C++, whereas warning_directive wouldn't work as 
> a condition for C++ since #warning is already in C++23.)

Ok, will change that (there are really too many features and the table
already needs 147 columns before this change,

Re: [PATCH v2] c++: Ensure ANNOTATE_EXPRs remain outermost expressions in conditions [PR116140]

2024-08-15 Thread Jason Merrill

On 8/12/24 1:55 PM, Alex Coplan wrote:

Hi!

This is a v2 patch of:
https://gcc.gnu.org/pipermail/gcc-patches/2024-August/659968.html
that addresses Jakub's feedback.

FWIW, I tried to contrive a testcase where convert_from_reference kicks
in and we get called with an ANNOTATE_EXPR in maybe_convert_cond, but to
no avail.


Yes, the convert_from_reference shouldn't have any effect here, that 
should have happened already when processing the condition expression.



However, I did see cases (both in hand-written testcases and
in the testsuite, e.g. g++.dg/ext/pr114409-2.C) where the subsequent
call to condition_conversion would change the type (e.g. from int to
bool), which shows the need for updating the types in the ANNOTATE_EXPR
chain -- thanks for pointing that out, Jakub!

Personally, I feel the handling of the flags (in this patch, as per
Jakub's suggestion) is a bit of a premature optimization.  It seems
cleaner (and safer) to me just to re-build the annotations if needed
(i.e. in the case that the type changed).  You could even have a nice
abstraction that encapsulates the stripping and re-building of
ANNOTATE_EXPRs, so that it doesn't clutter the caller quite so much.


I'm sympathetic that the optimization is not very significant, but 
neither is updating the flags.  You could also factor it out for the 
same less clutter in the caller?



+  /* If the type of *CONDP changed (e.g. due to convert_from_reference) 
then


As discussed, this is much more likely to be from condition_conversion.


+the flags may have changed too.  The logic in the loop below relies on
+the flags only being changed in the following directions (if at all):
+  TREE_SIDE_EFFECTS : 0 -> 1
+  TREE_READONLY : 1 -> 0
+thus avoiding re-computing the flags from scratch (e.g. via build3), so
+let's verify that this precondition holds.  */


Is there any case where an ANNOTATE_EXPR can have different 
READONLY/SIDE_EFFECTS flags from its operand?  It would be simpler to 
just copy the flags and not bother with the checking.



+#define CHECK_FLAG_CHANGE(prop, value)\
+  gcc_checking_assert (prop (orig_inner) == prop (*condp) || prop (*condp) 
== value)
+  CHECK_FLAG_CHANGE (TREE_SIDE_EFFECTS, 1);
+  CHECK_FLAG_CHANGE (TREE_READONLY, 0);
+#undef CHECK_FLAG_CHANGE
+  for (tree c = cond; c != *condp; c = TREE_OPERAND (c, 0))
+   {
+ gcc_checking_assert (TREE_CODE (c) == ANNOTATE_EXPR);
+ TREE_TYPE (c) = TREE_TYPE (*condp);
+ TREE_SIDE_EFFECTS (c) |= TREE_SIDE_EFFECTS (*condp);
+ TREE_READONLY (c) &= TREE_READONLY (*condp);
+   }





Re: [PATCH] c++: Pedwarn on [[]]; at class scope [PR110345]

2024-08-15 Thread Jason Merrill

On 8/15/24 4:58 PM, Jakub Jelinek wrote:

Hi!

For C++ 26 P2552R3 I went through all the spots (except modules) where
attribute-specifier-seq appears in the grammar and tried to construct
a testcase in all those spots, for now for [[deprecated]] attribute.

The fourth issue is that we just emit (when enabled) -Wextra-semi warning
not just for lone semicolon at class scope (correct), but also for
[[]]; or [[whatever]]; there too.
While just semicolon is valid in C++11 and newer,
https://eel.is/c++draft/class.mem#nt:member-declaration
allows empty-declaration, unlike namespace scope or block scope
something like attribute-declaration or empty statement with attributes
applied for it aren't supported.
While syntactically it matches
attribute-specifier-seq [opt] decl-specifier-seq [opt] member-declarator-list 
[opt] ;
with the latter two omitted, there is
https://eel.is/c++draft/class.mem#general-3
which says that is not valid.

So, the following patch emits a pedwarn in that case.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK


2024-08-15  Jakub Jelinek  

PR c++/110345
* parser.cc (cp_parser_member_declaration): Call maybe_warn_extra_semi
only if it is empty-declaration, if there are some tokens like
attribute, pedwarn that the declaration doesn't declare anything.

* g++.dg/cpp0x/gen-attrs-84.C: New test.

--- gcc/cp/parser.cc.jj 2024-08-15 18:00:33.202211372 +0200
+++ gcc/cp/parser.cc2024-08-15 18:56:12.254139651 +0200
@@ -28268,7 +28268,11 @@ cp_parser_member_declaration (cp_parser*
if (!decl_specifiers.any_specifiers_p)
{
  cp_token *token = cp_lexer_peek_token (parser->lexer);
- maybe_warn_extra_semi (token->location, extra_semi_kind::member);
+ if (decl_spec_token_start == token)
+   maybe_warn_extra_semi (token->location, extra_semi_kind::member);
+ else
+   pedwarn (decl_spec_token_start->location, OPT_Wpedantic,
+"declaration does not declare anything");
}
else
{
--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-84.C.jj2024-08-15 
19:00:02.540314537 +0200
+++ gcc/testsuite/g++.dg/cpp0x/gen-attrs-84.C   2024-08-15 19:05:36.602231553 
+0200
@@ -0,0 +1,8 @@
+// { dg-do compile { target c++11 } }
+
+struct A {
+  [[]];// { dg-error "declaration does not declare 
anything" }
+};
+struct B {
+  [[gnu::deprecated]]; // { dg-error "declaration does not declare anything" }
+};


Jakub





[PUSHED] PHIOPT: Fix comment before factor_out_conditional_operation

2024-08-15 Thread Andrew Pinski
From: Andrew Pinski 

I didn't update the comment before factor_out_conditional_operation
correctly. this updates it to be correct and mentions unary operations
rather than just conversions.

Pushed as obvious.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (factor_out_conditional_operation): Update
comment.
---
 gcc/tree-ssa-phiopt.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index f05ca727503..aa414f6 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -212,7 +212,7 @@ replace_phi_edge_with_variable (basic_block cond_block,
 }
 
 /* PR66726: Factor operations out of COND_EXPR.  If the arguments of the PHI
-   stmt are CONVERT_STMT, factor out the conversion and perform the conversion
+   stmt are Unary operator, factor out the operation and perform the operation
to the result of PHI stmt.  COND_STMT is the controlling predicate.
Return the newly-created PHI, if any.  */
 
-- 
2.43.0



Re: [PATCHv3, expand] Add const0 move checking for CLEAR_BY_PIECES optabs

2024-08-15 Thread HAO CHEN GUI
Hi Richard,
  It's a good point. I will test it. Thanks a lot.

Thanks
Gui Haochen

在 2024/8/15 17:50, Richard Sandiford 写道:
> HAO CHEN GUI  writes:
>> Hi,
>>   This patch adds const0 move checking for CLEAR_BY_PIECES. The original
>> vec_duplicate handles duplicates of non-constant inputs. But 0 is a
>> constant. So even a platform doesn't support vec_duplicate, it could
>> still do clear by pieces if it supports const0 move by that mode.
>>
>>   Compared to the previous version, the main changes are to create a
>> new class for clear by pieces and add an additional argument to
>> indicate if the object is constant in pieces_addr.
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-July/658337.html
> 
> Rather than add the additional argument, could we instead provide a
> constfn that always returns zero?  ISTM that, under the current pieces_addr
> framework, clear by pieces is essentially a memcpy from arbitrarily many
> zeros.  E.g.:
> 
>   clear_by_pieces_d (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
> : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true,
> read_zero, NULL, len, align, false, CLEAR_BY_PIECES)
> 
> where read_zero is something like:
> 
> static rtx
> read_zero (void *, void *, HOST_WIDE_INT, fixed_size_mode mode)
> {
>   return CONST0_RTX (mode);
> }
> 
> (completely untested).
> 
> The changes to by_pieces_mode_supported_p look good.
> 
> Thanks,
> Richard
> 
>>   I didn't convert const0 move predicate check to an assertion as it
>> caused ICEs on i386. On i386, some modes (V8QI V4HI V2SI V1DI) have
>> move expand defined but their predicates don't include const0.
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no
>> regressions.
>>
>>   On i386, it got several regressions. One issue is the predicate of
>> V16QI move expand doesn't include const0. Thus V16QI mode can't be used
>> for clear by pieces with the patch. The second issue is the const0 is
>> passed directly to the move expand with the patch. Originally it is
>> forced to a pseudo and i386 can leverage the previous data to do
>> optimization.
>>
>>   The patch also raises several regressions on aarch64. The V2x8QImode
>> replaces TImode to do 16-byte clear by pieces as V2x8QImode move expand
>> supports const0 and vector mode is preferable. I drafted a patch to
>> address the issue. It will be sent for review in a separate email.
>> Another problem is V8QImode replaces DImode to do 8-byte clear by pieces.
>> It seems cause different sequences of instructions but the actually
>> instructions are the same.
>>
>>
>> ChangeLog
>> expand: Add const0 move checking for CLEAR_BY_PIECES optabs
>>
>> vec_duplicate handles duplicates of non-constant inputs.  The 0 is a
>> constant.  So even a platform doesn't support vec_duplicate, it could
>> still do clear by pieces if it supports const0 move.  This patch adds
>> the checking.
>>
>> gcc/
>>  * expr.cc (by_pieces_mode_supported_p): Add const0 move checking
>>  for CLEAR_BY_PIECES.
>>  (pieces_addr::pieces_addr): Add fifth argument is_const to
>>  indicate if object is a constant.  Do not set m_addr_inc if object
>>  is a constant.
>>  (op_by_pieces_d::op_by_pieces_d): Initialize m_from by setting
>>  is_const to true for CLEAR_BY_PIECES.
>>  (class clear_by_pieces_d): New.
>>  (clear_by_pieces_d::prepare_mode): New.
>>  (clear_by_pieces_d::generate): New.
>>  (clear_by_pieces): Replace store_by_pieces_d with clear_by_pieces_d.
>>
>> patch.diff
>> diff --git a/gcc/expr.cc b/gcc/expr.cc
>> index 9f66d479445..abf69c8d698 100644
>> --- a/gcc/expr.cc
>> +++ b/gcc/expr.cc
>> @@ -1014,14 +1014,20 @@ can_use_qi_vectors (by_pieces_operation op)
>>  static bool
>>  by_pieces_mode_supported_p (fixed_size_mode mode, by_pieces_operation op)
>>  {
>> -  if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
>> +  enum insn_code icode = optab_handler (mov_optab, mode);
>> +  if (icode == CODE_FOR_nothing)
>>  return false;
>>
>> -  if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES)
>> +  if (op == SET_BY_PIECES
>>&& VECTOR_MODE_P (mode)
>>&& optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing)
>>  return false;
>>
>> +  if (op == CLEAR_BY_PIECES
>> +  && VECTOR_MODE_P (mode)
>> +  && !insn_operand_matches (icode, 1, CONST0_RTX (mode)))
>> +   return false;
>> +
>>if (op == COMPARE_BY_PIECES
>>&& !can_compare_p (EQ, mode, ccp_jump))
>>  return false;
>> @@ -1184,7 +1190,7 @@ class pieces_addr
>>by_pieces_constfn m_constfn;
>>void *m_cfndata;
>>  public:
>> -  pieces_addr (rtx, bool, by_pieces_constfn, void *);
>> +  pieces_addr (rtx, bool, by_pieces_constfn, void *, bool = false);
>>rtx adjust (fixed_size_mode, HOST_WIDE_INT, by_pieces_prev * = nullptr);
>>void increment_address (HOST_WIDE_INT);
>>void maybe_predec (HOST_WIDE_INT);
>> @@ -1204,7 +1210,7 @@ public:
>> memory load.  */
>>
>>  pieces_addr::pieces_addr 

[PATCHv2, aarch64] Implement 16-byte vector mode const0 store by TImode

2024-08-15 Thread HAO CHEN GUI
Hi,
  I submitted a patch to change the mode checking for
CLEAR_BY_PIECES.
https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660344.html

  It causes some regressions on aarch64. With the patch,
V2x8QImode is used to do clear by pieces instead of TImode as
vector mode is preferable and V2x8QImode supports const0 store.
Thus the efficient "stp" instructions can't be generated.

  I drafted following patch to fix the problem. It can fix
regressions found in memset-corner-cases.c, memset-q-reg.c,
auto-init-padding-11.c and auto-init-padding-5.c.

  Compared to previous one, the main changes are
1. Support all 16-byte vector modes
2. Check memory address when pseudo can't be created.
https://gcc.gnu.org/pipermail/gcc-patches/2024-August/660349.html

  I send the patch in order to call auto CI to test the patch. The
cfarm server is too slow to finish regression test overnight.

  I will check in the patch if there is no regressions and no one
objects it.

Thanks
Gui Haochen

ChangeLog
aarch64: Implement 16-byte vector mode const0 store by TImode

gcc/
* config/aarch64/aarch64-simd.md (mov for VSTRUCT_QD):
Expand 16-byte vector mode const0 store by TImode.

patch.diff
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 01b084d8ccb..acf86e191c7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7766,7 +7766,16 @@ (define_expand "mov"
(match_operand:VSTRUCT_QD 1 "general_operand"))]
   "TARGET_FLOAT"
 {
-  if (can_create_pseudo_p ())
+  if (known_eq (GET_MODE_SIZE (mode), 16)
+  && operands[1] == CONST0_RTX (mode)
+  && MEM_P (operands[0])
+  && (can_create_pseudo_p ()
+ || memory_address_p (TImode, XEXP (operands[0], 0
+{
+  operands[0] = adjust_address (operands[0], TImode, 0);
+  operands[1] = CONST0_RTX (TImode);
+}
+  else if (can_create_pseudo_p ())
 {
   if (GET_CODE (operands[0]) != REG)
operands[1] = force_reg (mode, operands[1]);


Re: [PATCH v2] [testsuite] add linkonly to dg-additional-sources [PR115295]

2024-08-15 Thread Sam James
Pinging this one on behalf of Alexandre. It fixes a bunch of D test
failures for us (I'm slowly working my way through packaging issues).

Iain mentioned it's blocking him upgrading the D stdlib too.


signature.asc
Description: PGP signature


[PATCH] Dump aliases in -fcallgraph-info

2024-08-15 Thread Alexandre Oliva


Dump ICF-unified decls, thunks, aliases and whatnot along with their
ultimate targets, with edges from the alias to the target.

Add support for dropping the source file's suffix when forming from
dump-base, so that auxiliary files can be scanned, such as the .ci
files generated by -fcallgraph-info, as in the testcase.

Regstrapped on x86_64-linux-gnu.  Ok to install?


for  gcc/ChangeLog

* toplev.cc (dump_final_alias_vcg): New.
(dump_final_node_vcg): Dump aliases along with node.

for  gcc/testsuite/ChangeLog

* lib/scandump.exp (dump-base): Support {} in dump base suffix
to drop it.
* gcc.dg/callgraph-info-1.c: New.
---
 gcc/testsuite/gcc.dg/callgraph-info-1.c |7 ++
 gcc/testsuite/lib/scandump.exp  |4 +++
 gcc/toplev.cc   |   37 +++
 3 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/callgraph-info-1.c

diff --git a/gcc/testsuite/gcc.dg/callgraph-info-1.c 
b/gcc/testsuite/gcc.dg/callgraph-info-1.c
new file mode 100644
index 0..853ff9554eeb0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/callgraph-info-1.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-fcallgraph-info" } */
+
+void f() {}
+void g() __attribute__ ((__alias__ ("f")));
+
+/* { dg-final { scan-dump-times "ci" "triangle" 1 "ci" {{}} } } */
diff --git a/gcc/testsuite/lib/scandump.exp b/gcc/testsuite/lib/scandump.exp
index 14536ae7379b6..adf9886b61c96 100644
--- a/gcc/testsuite/lib/scandump.exp
+++ b/gcc/testsuite/lib/scandump.exp
@@ -37,6 +37,10 @@ proc dump-base { args } {
 # gcc-defs to base compilation dumps only on the source basename.
 set dumpbase $src
 if { [string length $dumpbase_suf] != 0 } {
+   # Accept {} as dump base suffix to drop the source suffix entirely.
+   if { "$dumpbase_suf" == "{}" } {
+   set dumpbase_suf ""
+   }
regsub {[.][^.]*$} $src $dumpbase_suf dumpbase
 }
 return $dumpbase
diff --git a/gcc/toplev.cc b/gcc/toplev.cc
index eee4805b504a5..f308fb151083e 100644
--- a/gcc/toplev.cc
+++ b/gcc/toplev.cc
@@ -914,6 +914,37 @@ dump_final_callee_vcg (FILE *f, location_t location, tree 
callee)
   fputs ("\" }\n", f);
 }
 
+/* Callback for cgraph_node::call_for_symbol_thunks_and_aliases to dump to F_ a
+   node and an edge from ALIAS->DECL to CURRENT_FUNCTION_DECL.  */
+
+static bool
+dump_final_alias_vcg (cgraph_node *alias, void *f_)
+{
+  FILE *f = (FILE *)f_;
+
+  if (alias->decl == current_function_decl)
+return false;
+
+  dump_final_node_vcg_start (f, alias->decl);
+  fputs ("\" shape : triangle }\n", f);
+
+  fputs ("edge: { sourcename: \"", f);
+  print_decl_identifier (f, alias->decl, PRINT_DECL_UNIQUE_NAME);
+  fputs ("\" targetname: \"", f);
+  print_decl_identifier (f, current_function_decl, PRINT_DECL_UNIQUE_NAME);
+  location_t location = DECL_SOURCE_LOCATION (alias->decl);
+  if (LOCATION_LOCUS (location) != UNKNOWN_LOCATION)
+{
+  expanded_location loc;
+  fputs ("\" label: \"", f);
+  loc = expand_location (location);
+  fprintf (f, "%s:%d:%d", loc.file, loc.line, loc.column);
+}
+  fputs ("\" }\n", f);
+
+  return false;
+}
+
 /* Dump final cgraph node in VCG format.  */
 
 static void
@@ -950,6 +981,12 @@ dump_final_node_vcg (FILE *f)
 dump_final_callee_vcg (f, c->location, c->decl);
   vec_free (cfun->su->callees);
   cfun->su->callees = NULL;
+
+  cgraph_node *node = cgraph_node::get (current_function_decl);
+  if (!node)
+return;
+  node->call_for_symbol_thunks_and_aliases (dump_final_alias_vcg, f,
+   true, false);
 }
 
 /* Output stack usage and callgraph info, as requested.  */

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


Re: [PATCH v2] [libstdc++] [testsuite] avoid async.cc loss of precision [PR91486]

2024-08-15 Thread Alexandre Oliva
On Aug  7, 2024, Alexandre Oliva  wrote:

> On Aug  1, 2024, Alexandre Oliva  wrote:
>> Each iteration calls float_steady_clock::now() [...] an extra iteration
>> will reach 5 and cause the test to fail.

>> (Do we really want to use floats, that even with this tweak have
>> borderline precision for sub-µs vs 1s deltas?  Do we want to make sure
>> the wait time computation ensures we'll get past the deadline when the
>> time is converted back to the given clock?)

> Ping?
> https://gcc.gnu.org/pipermail/gcc-patches/2024-August/658977.html

Ping?

>> for  libstdc++-v3/ChangeLog

>> PR libstdc++/91486
>> * testsuite/30_threads/async/async.cc
>> (test_pr91486_wait_for): Mark status as unused.
>> (test_pr91486_wait_until): Likewise.  Initialize epoch later.

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
More tolerance and less prejudice are key for inclusion and diversity
Excluding neuro-others for not behaving ""normal"" is *not* inclusive


[PATCH v2] Optimize initialization of small padded objects

2024-08-15 Thread Alexandre Oliva
On Aug 15, 2024, Alexandre Oliva  wrote:

> I can't quite envision what to check for in a target-independent test.

Got it.  Also dropped some occurrences of CONST_CAST_TREE that I added,
then changed function signatures but failed to remove them.

Retested on x86_64-linux-gnu.  Ok to install?


When small objects containing padding bits (or bytes) are fully
initialized, we will often store them in registers, and setting
bitfields and other small fields will attempt to preserve the
uninitialized padding bits, which tends to be expensive.
Zero-initializing registers, OTOH, tends to be cheap.

So, if we're optimizing, zero-initialize such small padded objects
even if that's not needed for correctness.  We can't zero-initialize
all such padding objects, though: if there's no padding whatsoever,
and all fields are initialized with nonzero, the zero initialization
would be flagged as dead.  That's why we introduce machinery to detect
whether objects have padding bits.  I considered distinguishing
between bitfields, units and larger padding elements, but I didn't
pursue that distinction.

Since the object's zero-initialization subsumes fields'
zero-initialization, the empty string test in builtin-snprintf-6.c's
test_assign_aggregate would regress without the addition of
native_encode_constructor.


for  gcc/ChangeLog

* expr.cc (categorize_ctor_elements_1): Change p_complete to
int, to distinguish complete initialization in presence or
absence of uninitialized padding bits.
(categorize_ctor_elements): Likewise.  Adjust all callers...
* expr.h (categorize_ctor_elements): ... and declaration.
(type_has_padding_at_level_p): New.
* gimple-fold.cc (type_has_padding_at_level_p): New.
* fold-const.cc (native_encode_constructor): New.
(native_encode_expr): Call it.
* gimplify.cc (gimplify_init_constructor): Clear small
non-addressable non-volatile objects with padding or
other uninitialized fields as an optimization.

for  gcc/testsuite/ChangeLog

* gcc.dg/init-pad-1.c: New.
---
 gcc/expr.cc   |   20 ++-
 gcc/expr.h|3 +-
 gcc/fold-const.cc |   33 
 gcc/gimple-fold.cc|   50 +
 gcc/gimplify.cc   |   14 ++
 gcc/testsuite/gcc.dg/init-pad-1.c |   18 +
 6 files changed, 129 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/init-pad-1.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 2089c2b86a98a..a701c67b3485d 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7096,7 +7096,7 @@ count_type_elements (const_tree type, bool for_ctor_p)
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
HOST_WIDE_INT *p_unique_nz_elts,
-   HOST_WIDE_INT *p_init_elts, bool *p_complete)
+   HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
   HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
@@ -7218,7 +7218,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 
   if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
num_fields, elt_type))
-*p_complete = false;
+*p_complete = 0;
+  else if (*p_complete > 0
+  && type_has_padding_at_level_p (TREE_TYPE (ctor)))
+*p_complete = -1;
 
   *p_nz_elts += nz_elts;
   *p_unique_nz_elts += unique_nz_elts;
@@ -7239,7 +7242,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
  and place it in *P_ELT_COUNT.
* whether the constructor is complete -- in the sense that every
  meaningful byte is explicitly given a value --
- and place it in *P_COMPLETE.
+ and place it in *P_COMPLETE:
+ -  0 if any field is missing
+ -  1 if all fields are initialized, and there's no padding
+ - -1 if all fields are initialized, but there's padding
 
Return whether or not CTOR is a valid static constant initializer, the same
as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
@@ -7247,12 +7253,12 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 bool
 categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
  HOST_WIDE_INT *p_unique_nz_elts,
- HOST_WIDE_INT *p_init_elts, bool *p_complete)
+ HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   *p_nz_elts = 0;
   *p_unique_nz_elts = 0;
   *p_init_elts = 0;
-  *p_complete = true;
+  *p_complete = 1;
 
   return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts,
 p_init_elts, p_complete);
@@ -7313,7 +7319,7 @@ mostly_zeros_p (const_tree exp)
   if (TREE_CODE (exp) == CONSTRUCTOR)
 {
   HOST_WID

  1   2   >