date:20240913

[gcc(refs/users/meissner/heads/work178-tar)] Remove SPR alternatives for move insns.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:ec14dfbb3ca51cd59db9f27f3ab001cc856e5ec3

commit ec14dfbb3ca51cd59db9f27f3ab001cc856e5ec3
Author: Michael Meissner 
Date:   Fri Sep 13 03:43:30 2024 -0400

Remove SPR alternatives for move insns.

2024-09-13  Michael Meissner  

* config/rs6000/rs6000.md (mov_internal): Remove alternatives 
for
moving values to/from SPR registers.
(movcc_): Likewise.
(movsf_hardfloat): Likewise.
(movsd_hardfloat): Likewise.
(mov_softfloat): Likewise.
(mov_hardfloat64): Likewise.
(mov_softfloat64): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.md | 114 +---
 1 file changed, 44 insertions(+), 70 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2c932061b93a..16f3cd1ba6b8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8099,16 +8099,16 @@
 
 ;; MR  LHZ/LBZLXSI*ZXSTH/STBSTXSI*XLI
 ;; XXLOR   load 0 load -1VSPLTI*#  MFVSRWZ
-;; MTVSRWZ MF%1   MT%1   NOP
+;; MTVSRWZ
 (define_insn "*mov_internal"
   [(set (match_operand:QHI 0 "nonimmediate_operand"
"=r,r, wa,m, ?Z,r,
 wa,wa,wa,v, ?v,r,
-wa,r, *c*l,  *h")
+wa")
(match_operand:QHI 1 "input_operand"
"r, m, ?Z,r, wa,i,
 wa,O, wM,wB,wS,wa,
-r, *h,r, 0"))]
+r"))]
   "gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode)"
   "@
@@ -8124,22 +8124,19 @@
vspltis %0,%1
#
mfvsrwz %0,%x1
-   mtvsrwz %x0,%1
-   mf%1 %0
-   mt%0 %1
-   nop"
+   mtvsrwz %x0,%1"
   [(set_attr "type"
"*, load,  fpload,store, fpstore,   *,
 vecsimple, vecperm,   vecperm,   vecperm,   vecperm,   mfvsr,
-mtvsr, mfjmpr,mtjmpr,*")
+mtvsr")
(set_attr "length"
"*, *, *, *, *, *,
 *, *, *, *, 8, *,
-*, *, *, *")
+*")
(set_attr "isa"
"*, *, p9v,   *, p9v,   *,
 p9v,   p9v,   p9v,   p9v,   p9v,   p9v,
-p9v,   *, *, *")])
+p9v")])
 
 
 ;; Here is how to move condition codes around.  When we store CC data in
@@ -8155,9 +8152,9 @@
 
 (define_insn "*movcc_"
   [(set (match_operand:CC_any 0 "nonimmediate_operand"
-   "=y,x,?y,y,r,r,r,r, r,*c*l,r,m")
+   "=y,x,?y,y,r,r,r,r,r,m")
(match_operand:CC_any 1 "general_operand"
-   " y,r, r,O,x,y,r,I,*h,   r,m,r"))]
+   " y,r, r,O,x,y,r,I,m,r"))]
   "register_operand (operands[0], mode)
|| register_operand (operands[1], mode)"
   "@
@@ -8169,8 +8166,6 @@
mfcr %0%Q1\;rlwinm %0,%0,%f1,0xf000
mr %0,%1
li %0,%1
-   mf%1 %0
-   mt%0 %1
lwz%U1%X1 %0,%1
stw%U0%X0 %1,%0"
   [(set_attr_alternative "type"
@@ -8184,11 +8179,9 @@
(const_string "mfcrf") (const_string "mfcr"))
   (const_string "integer")
   (const_string "integer")
-  (const_string "mfjmpr")
-  (const_string "mtjmpr")
   (const_string "load")
   (const_string "store")])
-   (set_attr "length" "*,*,12,*,*,8,*,*,*,*,*,*")])
+   (set_attr "length" "*,*,12,*,*,8,*,*,*,*")])
 
 ;; For floating-point, we normally deal with the floating-point registers
 ;; unless -msoft-float is used.  The sole exception is that parameter passing
@@ -8239,17 +8232,17 @@
 ;;
 ;; LWZ  LFSLXSSP   LXSSPX STFS   STXSSP
 ;; STXSSPX  STWXXLXOR  LI FMRXSCPSGNDP
-;; MR   MT  MF   NOPXXSPLTIDP
+;; MR   XXSPLTIDP
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 "=!r,   f, v,  wa,m, wY,
  Z, m, wa, !r,f, wa,
- !r,*c*l,  !r, *h,wa")
+ !r,wa")
(match_operand:SF 1 "input_operand"
 "m, m, wY, Z, f, v,
  wa,r, j,  j, f, wa,
- r, r, *h, 0, eP"))]
+ r, eP"))]
   "(register_operand (operands[0], SFmode)
|| register_operand (

[gcc(refs/users/meissner/heads/work178-tar)] Add support for the TAR register.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:c70a26f1539068db09f2a456df2784ad1429a181

commit c70a26f1539068db09f2a456df2784ad1429a181
Author: Michael Meissner 
Date:   Fri Sep 13 03:42:34 2024 -0400

Add support for the TAR register.

2024-09-13  Michael Meissner  

gcc/

* config/rs6000/constraints.md (h constraint): Add TAR register to 
the
documentation.
(wt constraint): New constraint.
* config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar.
(POWERPC_MASKS): Likewise.
* config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register 
support.
(alt_reg_names): Likewise.
(rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only
hold scalar integer modes of an appropriate size.  Add TAR register
support.
(rs6000_debug_reg_global): Print the register class that wt maps 
too.
(rs6000_init_hard_regno_mode_ok): Add TAR register support.
(rs6000_conditional_register_usage): Add TAR register support.
(print_operand): Likewise.
(rs6000_debugger_regno): Likewise.
(rs6000_opt_masks): Add support for -mtar.
* config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register
support.
(FIXED_REGISTERS): Likewise.
(CALL_REALLY_USED_REGISTERS): Likewise.
(REG_ALLOC_ORDER): Likewise.
(enum reg_class): Likewise.
(REG_CLASS_NAMES): Likewise.
(REG_CLASS_CONTENTS): Likewise.
(enum r6000_reg_class_enum): Add support for the wt constraint.
* config/rs6000/rs6000.md (TAR_REGNO): New constant.
(call_indirect_nonlocal_sysv): Likewise.
(call_value_indirect_nonlocal_sysv): Likewise.
(call_indirect_aix): Likewise.
(call_value_indirect_aix): Likewise.
(call_indirect_elfv2): Likewise.
(call_indirect_pcrel): Likewise.
(call_value_indirect_elfv2): Likewise.
(call_value_indirect_pcrel): Likewise.
(*sibcall_indirect_nonlocal_sysv): Likewise.
(sibcall_value_indirect_nonlocal_sysv): Likewise.
(indirect_jump): Likewise.
(@indirect_jump_nospec): Likewise.
(@tablejump_insn_normal): Likewise.
(@tablejump_insn_nospec): Likewise.
* config/rs6000/rs6000.opt (-mtar): New option.

gcc/testsuite/

* gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR 
register.
* gcc.target/powerpc/pr51513.c: Likewise.
* gcc.target/powerpc/safe-indirect-jump-2.c: Likewise.
* gcc.target/powerpc/safe-indirect-jump-3.c: Likewise.
* gcc.target/powerpc/tar-register.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md   |  5 +-
 gcc/config/rs6000/rs6000-cpus.def  |  4 +-
 gcc/config/rs6000/rs6000.cc| 58 +++---
 gcc/config/rs6000/rs6000.h | 31 +++-
 gcc/config/rs6000/rs6000.md| 33 ++--
 gcc/config/rs6000/rs6000.opt   |  4 ++
 gcc/testsuite/gcc.target/powerpc/ppc-switch-1.c|  4 +-
 gcc/testsuite/gcc.target/powerpc/pr51513.c |  4 +-
 .../gcc.target/powerpc/safe-indirect-jump-2.c  |  2 +-
 .../gcc.target/powerpc/safe-indirect-jump-3.c  |  2 +-
 gcc/testsuite/gcc.target/powerpc/tar-register.c| 34 +
 11 files changed, 138 insertions(+), 43 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b75042d..14f0465d7ae5 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -57,7 +57,7 @@
   "@internal A compatibility alias for @code{wa}.")
 
 (define_register_constraint "h" "SPECIAL_REGS"
-  "@internal A special register (@code{vrsave}, @code{ctr}, or @code{lr}).")
+  "@internal A special register (@code{vrsave}, @code{ctr}, @code{lr} or 
@code{tar}).")
 
 (define_register_constraint "c" "CTR_REGS"
   "The count register, @code{ctr}.")
@@ -91,6 +91,9 @@
   "@internal Like @code{r}, if @option{-mpowerpc64} is used; otherwise,
@code{NO_REGS}.")
 
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
+  "The tar register, @code{tar}.")
+
 (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
   "@internal Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise,
@code{NO_REGS}.")
diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index e73d9ef51f8d..a7ecd38f8eef 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -64,7 +64,8 @@
  | OPTION_MASK_MODULO  \
  | OPTION_MASK_P9_MINMAX   \

[gcc(refs/users/meissner/heads/work178-tar)] Update ChangeLog.*

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:43d1a19ad340b65944fcc18cdee51fc844ea638e

commit 43d1a19ad340b65944fcc18cdee51fc844ea638e
Author: Michael Meissner 
Date:   Fri Sep 13 03:45:33 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.tar | 83 ++-
 1 file changed, 82 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
index 0e7ea4d69602..b091c795c423 100644
--- a/gcc/ChangeLog.tar
+++ b/gcc/ChangeLog.tar
@@ -1,6 +1,87 @@
+ Branch work178-tar, patch #301 
+
+Remove SPR alternatives for move insns.
+
+2024-09-04  Michael Meissner  
+
+   * config/rs6000/rs6000.md (mov_internal): Remove alternatives for
+   moving values to/from SPR registers.
+   (movcc_): Likewise.
+   (movsf_hardfloat): Likewise.
+   (movsd_hardfloat): Likewise.
+   (mov_softfloat): Likewise.
+   (mov_hardfloat64): Likewise.
+   (mov_softfloat64): Likewise.
+
+ Branch work178-tar, patch #300 
+
+Add support for the TAR register.
+
+2024-09-04  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/constraints.md (h constraint): Add TAR register to the
+   documentation.
+   (wt constraint): New constraint.
+   * config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar.
+   (POWERPC_MASKS): Likewise.
+   * config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register support.
+   (alt_reg_names): Likewise.
+   (rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only
+   hold scalar integer modes of an appropriate size.  Add TAR register
+   support.
+   (rs6000_debug_reg_global): Print the register class that wt maps too.
+   (rs6000_init_hard_regno_mode_ok): Add TAR register support.
+   (rs6000_conditional_register_usage): Add TAR register support.
+   (print_operand): Likewise.
+   (rs6000_debugger_regno): Likewise.
+   (rs6000_opt_masks): Add support for -mtar.
+   * config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register
+   support.
+   (FIXED_REGISTERS): Likewise.
+   (CALL_REALLY_USED_REGISTERS): Likewise.
+   (REG_ALLOC_ORDER): Likewise.
+   (enum reg_class): Likewise.
+   (REG_CLASS_NAMES): Likewise.
+   (REG_CLASS_CONTENTS): Likewise.
+   (enum r6000_reg_class_enum): Add support for the wt constraint.
+   * config/rs6000/rs6000.md (TAR_REGNO): New constant.
+   (call_indirect_nonlocal_sysv): Likewise.
+   (call_value_indirect_nonlocal_sysv): Likewise.
+   (call_indirect_aix): Likewise.
+   (call_value_indirect_aix): Likewise.
+   (call_indirect_elfv2): Likewise.
+   (call_indirect_pcrel): Likewise.
+   (call_value_indirect_elfv2): Likewise.
+   (call_value_indirect_pcrel): Likewise.
+   (*sibcall_indirect_nonlocal_sysv): Likewise.
+   (sibcall_value_indirect_nonlocal_sysv): Likewise.
+   (indirect_jump): Likewise.
+   (@indirect_jump_nospec): Likewise.
+   (@tablejump_insn_normal): Likewise.
+   (@tablejump_insn_nospec): Likewise.
+   * config/rs6000/rs6000.opt (-mtar): New option.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR register.
+   * gcc.target/powerpc/pr51513.c: Likewise.
+   * gcc.target/powerpc/safe-indirect-jump-2.c: Likewise.
+   * gcc.target/powerpc/safe-indirect-jump-3.c: Likewise.
+   * gcc.target/powerpc/tar-register.c: New test.
+
  Branch work178-tar, baseline 
 
+Add ChangeLog.tar and update REVISION.
+
+2024-09-03  Michael Meissner  
+
+gcc/
+
+   * ChangeLog.tar: New file for branch.
+   * REVISION: Update.
+
 2024-09-12   Michael Meissner  
 
Clone branch
-

[gcc(refs/users/meissner/heads/work178-libs)] Do not build IEEE 128-bit libgfortran support if VSX is not available.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:1b46906df0da209dc8f06e0d089b410eb15b61c4

commit 1b46906df0da209dc8f06e0d089b410eb15b61c4
Author: Michael Meissner 
Date:   Fri Sep 13 03:47:43 2024 -0400

Do not build IEEE 128-bit libgfortran support if VSX is not available.

If you build a little endian compiler and select a default CPU of power5
(i.e. --with-cpu=power5), GCC cannot be built.  The reason is that both the
libgfortran and libstdc++-v3 libraries assume that all little endian powerpc
builds support IEEE 128-bit floating point.

However, if the default cpu does not support the VSX instruction set, then 
we
cannot build the IEEE 128-bit libraries.  This patch fixes the libgfortran
library so if the GCC compiler does not support IEEE 128-bit floating 
point, the
IEEE 128-bit floating point libraries are not built.  A companion patch 
will fix
the libstdc++-v3 library.

I have built these patches on a little endian system, doing both normal 
builds,
and making a build with a power5 default.  There was no regression in the 
normal
builds.  I have also built a big endian GCC compiler and there was no 
regression
there.  Can I check this patch into the trunk?

2024-09-13  Michael Meissner  

libgfortran/

PR target/115800
* configure.ac (powerpc64le*-linux*): Check to see that the compiler
uses VSX before enabling IEEE 128-bit support.
* configure: Regenerate.
* kinds-override.h (GFC_REAL_17): Add check for __VSX__.
* libgfortran.h (POWER_IEEE128): Likewise.

Diff:
---
 libgfortran/configure| 7 +--
 libgfortran/configure.ac | 3 +++
 libgfortran/kinds-override.h | 2 +-
 libgfortran/libgfortran.h| 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/libgfortran/configure b/libgfortran/configure
index 11a1bc5f0708..2708e5c7eca4 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -5981,6 +5981,9 @@ if test "x$GCC" = "xyes"; then
 #if __SIZEOF_LONG_DOUBLE__ != 16
   #error long double is double
   #endif
+  #if !defined(__VSX__)
+  #error VSX is not available
+  #endif
 int
 main ()
 {
@@ -12847,7 +12850,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12850 "configure"
+#line 12853 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12953,7 +12956,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12956 "configure"
+#line 12959 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index cca1ea0ea970..cfaeb9717ab8 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -148,6 +148,9 @@ if test "x$GCC" = "xyes"; then
   AC_PREPROC_IFELSE(
 [AC_LANG_PROGRAM([[#if __SIZEOF_LONG_DOUBLE__ != 16
   #error long double is double
+  #endif
+  #if !defined(__VSX__)
+  #error VSX is not available
   #endif]],
  [[(void) 0;]])],
 [AM_FCFLAGS="$AM_FCFLAGS -mabi=ibmlongdouble -mno-gnu-attribute";
diff --git a/libgfortran/kinds-override.h b/libgfortran/kinds-override.h
index f6b4956c5caa..51f440e53232 100644
--- a/libgfortran/kinds-override.h
+++ b/libgfortran/kinds-override.h
@@ -30,7 +30,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #endif
 
 /* Keep these conditions on one line so grep can filter it out.  */
-#if defined(__powerpc64__)  && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  && 
__SIZEOF_LONG_DOUBLE__ == 16
+#if defined(__powerpc64__)  && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  && 
__SIZEOF_LONG_DOUBLE__ == 16 && defined(__VSX__)
 typedef _Float128 GFC_REAL_17;
 typedef _Complex _Float128 GFC_COMPLEX_17;
 #define HAVE_GFC_REAL_17
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index faf57a33358c..673e5314d276 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -104,7 +104,7 @@ typedef off_t gfc_offset;
 #endif
 
 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ \
-&& defined __GLIBC_PREREQ
+&& defined __GLIBC_PREREQ && defined(__VSX__)
 #if __GLIBC_PREREQ (2, 32)
 #define POWER_IEEE128 1
 #endif

[gcc(refs/users/meissner/heads/work178-libs)] Do not build IEEE 128-bit libstdc++ support if VSX is not available.

2024-09-13 Thread Michael Meissner via Libstdc++-cvs

https://gcc.gnu.org/g:deb0fb74a3906885414c339a9e8f0bb527cbe432

commit deb0fb74a3906885414c339a9e8f0bb527cbe432
Author: Michael Meissner 
Date:   Fri Sep 13 03:48:20 2024 -0400

Do not build IEEE 128-bit libstdc++ support if VSX is not available.

If you build a little endian compiler and select a default CPU of power5
(i.e. --with-cpu=power5), GCC cannot be built.  The reason is that both the
libgfortran and libstdc++-v3 libraries assume that all little endian powerpc
builds support IEEE 128-bit floating point.

However, if the default cpu does not support the VSX instruction set, then 
we
cannot build the IEEE 128-bit libraries.  This patch fixes the libstdc++-v3
library so if the GCC compiler does not support IEEE 128-bit floating 
point, the
IEEE 128-bit floating point libraries are not built.  A companion patch 
will fix
the libgfortran library.

I have built these patches on a little endian system, doing both normal 
builds,
and making a build with a power5 default.  There was no regression in the 
normal
builds.  I have also built a big endian GCC compiler and there was no 
regression
there.  Can I check this patch into the trunk?

2024-09-13  Michael Meissner  

libstdc++-v3/

PR target/115800
* configure.ac (powerpc*-*-linux*): Don't enable IEEE 128-bit on 
PowerPC
systems without VSX.
* configure: Regenerate.
* numeric_traits.h: Don't enable IEEE 128-bit on PowerPC systems 
without
VSX.

Diff:
---
 libstdc++-v3/configure| 68 ++-
 libstdc++-v3/configure.ac | 58 --
 libstdc++-v3/include/ext/numeric_traits.h |  2 +-
 3 files changed, 86 insertions(+), 42 deletions(-)

diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 005c4a29fd09..ae7944beb782 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -51379,8 +51379,31 @@ $as_echo "#define _GLIBCXX_LONG_DOUBLE_COMPAT 1" 
>>confdefs.h
 case "$target" in
   powerpc*-*-linux*)
LONG_DOUBLE_COMPAT_FLAGS="$LONG_DOUBLE_COMPAT_FLAGS -mno-gnu-attribute"
-# Check for IEEE128 support in libm:
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 
in -lm" >&5
+   # Eliminate little endian systems without VSX
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+ #ifndef __VSX__
+ #error "IEEE 128-bit needs VSX"
+ #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_ieee128_possible=yes
+else
+  ac_ieee128_possible=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   if test $ac_ieee128_possible = yes; then
+  # Check for IEEE128 support in libm:
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 
in -lm" >&5
 $as_echo_n "checking for __frexpieee128 in -lm... " >&6; }
 if ${ac_cv_lib_m___frexpieee128+:} false; then :
   $as_echo_n "(cached) " >&6
@@ -51425,18 +51448,18 @@ else
   ac_ldbl_ieee128_in_libc=no
 fi
 
-if test $ac_ldbl_ieee128_in_libc = yes; then
-  # Determine which long double format is the compiler's default:
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+  if test $ac_ldbl_ieee128_in_libc = yes; then
+# Determine which long double format is the compiler's default:
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 int
 main ()
 {
 
-#ifndef __LONG_DOUBLE_IEEE128__
-#error compiler defaults to ibm128
-#endif
+  #ifndef __LONG_DOUBLE_IEEE128__
+  #error compiler defaults to ibm128
+  #endif
 
   ;
   return 0;
@@ -51448,21 +51471,28 @@ else
   ac_ldbl_ieee128_default=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  # Library objects should use default long double format.
-  if test "$ac_ldbl_ieee128_default" = yes; then
-LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
-# Except for the ones that explicitly use these flags:
-LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble 
-mno-gnu-attribute -Wno-psabi"
-  else
-LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
-LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ieeelongdouble 
-mno-gnu-attribute -Wno-psabi"
-  fi
+# Library objects should use default long double format.
+if test "$ac_ldbl_ieee128_default" = yes; then
+  LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
+  # Except for the ones that explicitly use these flags:
+  LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble 
-mno-gnu-attribute -Wno-psabi"
+else
+  LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute"
+  LONG_DOUBLE_ALT128_COMPAT

[gcc(refs/users/meissner/heads/work178-libs)] Do not add -mvsx when building libgcc float128 support.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:4b2c6c4e13c05faddd07f300370490e728d8

commit 4b2c6c4e13c05faddd07f300370490e728d8
Author: Michael Meissner 
Date:   Fri Sep 13 03:51:57 2024 -0400

Do not add -mvsx when building libgcc float128 support.

Currently, we add -mvsx when building the float128 support in libgcc.  This
allows us to build the float128 support on a big endian system where the
default cpu is power4.  While the libgcc support can be built, given there 
is
no glibc support for float128 available.

In the past, we would add -mvsx when building the float128 support in 
libgcc.
This allowed us to build the float128 support on a big endian system where 
the
default cpu is power4.  While the libgcc support can be built, given there 
is no
glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

With these patches, the float128 libgcc support is only built if the default
compiler has VSX support.  If somebody wanted to enable the glibc support 
for
big endian, they would need to set the base cpu to power8 to enable 
building the
libgcc float128 libraries.

I built little endian compilers and there were no regressions.

I built big endian compilers with the --with-cpu=power5 configure option, 
and I
verified that none of the float128 support functions are built.

I also built big endian compilers on a power9 with the --with-cpu=native
configure option, and I verified that the float128 support functions were
built, since the default compiler used the VSX instruction set.

I verified that on both sets of big endian builds, that all of the float128
tests were skipped, since there is no support for float128 in glibc and the 
GCC
compiler does not enable float128 on those systems.

Can I check these patches into the trunk assuming the original bugzilla 
author
says they fix the problem?

2024-09-13 Michael Meissner  

libgcc/

PR target/115800
PR target/113652
* config.host (powerpc*-*-linux*): Do not add t-float128-hw or
t-float128-p10-hw if the default compiler does not support float128.
* config/rs6000/t-float128 (FP128_CFLAGS_SW): Do not add -mvsx when
building the basic float128 support.
* config/rs6000/t-float128-hw (FP128_CFLAGS_HW): Likewise.
* config/rs6000/t-float128-p10-hw (FP128_3_1_CFLAGS_HW): Likewise.
* configure.ac (powerpc*-*-linux*): Do not add -mvsx when testing
whether to build the float128 support.
* configure: Regenerate.

Diff:
---
 libgcc/config.host | 12 ++--
 libgcc/config/rs6000/t-float128|  8 +++-
 libgcc/config/rs6000/t-float128-hw |  3 +--
 libgcc/config/rs6000/t-float128-p10-hw |  3 +--
 libgcc/configure   |  8 +++-
 libgcc/configure.ac|  8 +++-
 6 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 9fae51d4ce7d..261b08859a4d 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1292,14 +1292,14 @@ powerpc*-*-linux*)
 
if test $libgcc_cv_powerpc_float128 = yes; then
tmake_file="${tmake_file} rs6000/t-float128"
-   fi
 
-   if test $libgcc_cv_powerpc_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-hw"
-   fi
+   if test $libgcc_cv_powerpc_float128_hw = yes; then
+   tmake_file="${tmake_file} rs6000/t-float128-hw"
 
-   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
-   tmake_file="${tmake_file} rs6000/t-float128-p10-hw"
+   if test $libgcc_cv_powerpc_3_1_float128_hw = yes; then
+   tmake_file="${tmake_file} 
rs6000/t-float128-p10-hw"
+   fi
+   fi
fi
 
extra_parts="$extra_parts ecrti.o ecrtn.o ncrti.o ncrtn.o"
diff --git a/libgcc/config/rs6000/t-float128 b/libgcc/config/rs6000/t-float128
index b09b5664af0e..93e78adcd624 100644
--- a/libgcc/config/rs6000/t-float128
+++ b/libgcc/config/rs6000/t-float128
@@ -74,7 +74,13 @@ fp128_includes   = $(srcdir)/soft-fp/double.h \
  $(srcdir)/soft-fp/soft-fp.h
 
 # Build the emulator without ISA 3.0 hardware support.
-FP128_CFLAGS_SW = -Wno-type-limits -mvsx -mfloat128 \
+#
+# In the past we added -mvsx to build the float128 specific libraries with the
+# VSX instruction set.  This allowed the big endian GCC on server platforms to
+# build the float128 support.  However, is causes probl

[gcc(refs/users/meissner/heads/work178-libs)] Do not add -mvsx when testing the float128 support.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:b5f934bf571d51be98b4a4c91292132df22a00a5

commit b5f934bf571d51be98b4a4c91292132df22a00a5
Author: Michael Meissner 
Date:   Fri Sep 13 03:50:23 2024 -0400

Do not add -mvsx when testing the float128 support.

Currently, we add -mvsx when building the float128 support in libgcc.  This
allows us to build the float128 support on a big endian system where the
default cpu is power4.  While the libgcc support can be built, given there 
is
no glibc support for float128 available.

However, adding -mvsx and building the libgcc float128 support causes 
problems
if you set the default cpu to something like a 7540, which does not have VSX
support.  The assembler complains that when the code does a ".machine 
7450", you
cannot use VSX instructions.

After patching libgcc to not build the float128 support unless the host can
support float128 normally, this patch changes the GCC tests so that it will 
only
do the IEEE 128-bit tests if the default compiler enables the VSX 
instruction
set by default.  Otherwise all of the float128 tests will fail because the
libgcc support is not available.

In addition to not doing the float128 tests when the compiler does not 
natively
support float128, this patch also removes adding -mvsx, -mfloat128, and
-mfloat128-hardware enable the support if the compiler did not natively 
enable
it.

I built little endian compilers and there were no regressions.

I built big endian compilers with the --with-cpu=power5 configure option, 
and I
verified that none of the float128 support functions are built.

I also built big endian compilers on a power9 with the --with-cpu=native
configure option, and I verified that the float128 support functions were
built, since the default compiler used the VSX instruction set.

I verified that on both sets of big endian builds, that all of the float128
tests were skipped, since there is no support for float128 in glibc and the 
GCC
compiler does not enable float128 on those systems.

Can I check these patches into the trunk assuming the original bugzilla 
author
says they fix the problem?

2024-09-13 Michael Meissner  

gcc/testsuite/

PR target/115800
PR target/113652
* gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, 
and
-mfloat128-hardware options to float128 test.  Add explicit checks 
for
the float128 support, rather than just using VSX as a stand in, or
assuming we can silently enable VSX if the default is power4.  For
pr99708.c, also use the correct spelling to disable the float128 
tests.
* gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
* gcc.target/powerpc/copysign128-1.c: Likewise.
* gcc.target/powerpc/divkc3-1.c: Likewise.
* gcc.target/powerpc/float128-3.c: Likewise.
* gcc.target/powerpc/float128-5.c: Likewise.
* gcc.target/powerpc/float128-complex-2.: Likewise.
* gcc.target/powerpc/float128-math.: Likewise.
* gcc.target/powerpc/inf128-1.: Likewise.
* gcc.target/powerpc/mulkc3-1.c: Likewise.
* gcc.target/powerpc/nan128-1.c: Likewise.
* gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
* gcc.target/powerpc/pr104253.: Likewise.
* gcc.target/powerpc/pr70669.c: Likewise.
* gcc.target/powerpc/pr79004.c: Likewise.
* gcc.target/powerpc/pr79038-1.c: Likewise.
* gcc.target/powerpc/pr81959.c: Likewise.
* gcc.target/powerpc/pr85657-1.: Likewise.
* gcc.target/powerpc/pr85657-2.c: Likewise.
* gcc.target/powerpc/pr99708.: Likewise.
* gcc.target/powerpc/signbit-1.c: Likewise.
* gcc.target/powerpc/signbit-2.c: Likewise.
* lib/target-supports.exp (check_ppc_float128_sw_available): 
Likewise.
(check_ppc_float128_hw_available): Likewise.
(check_effective_target_ppc_ieee128_ok): Likewise.
(add_options_for___float128): Likewise.
(check_effective_target___float128): Likewise.
(check_effective_target_base_quadfloat_support): Likewise.
(check_effective_target_powerpc_float128_sw_ok): Likewise.
(check_effective_target_powerpc_float128_hw_ok): Likewise.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/abs128-1.c|  3 ++-
 .../gcc.target/powerpc/bfp/scalar-insert-exp-16.c  |  1 +
 gcc/testsuite/gcc.target/powerpc/copysign128-1.c   |  3 ++-
 gcc/testsuite/gcc.target/powerpc/divkc3-1.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-3.c  |  3 ++-
 gcc/testsuite/gcc.target/powerpc/float128-5.c  |  3 ++-
 .../gcc.target/powerpc/float128-complex-2.c|  2 +-
 gcc/testsuite/gcc.target/powerpc/float128-math.c   |  2 +-

[gcc(refs/users/meissner/heads/work178-libs)] Update ChangeLog.*

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:77e47d18a1607c8a55eafa9b027f317889c3f720

commit 77e47d18a1607c8a55eafa9b027f317889c3f720
Author: Michael Meissner 
Date:   Fri Sep 13 03:54:57 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.libs | 210 -
 1 file changed, 209 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.libs b/gcc/ChangeLog.libs
index cf5c7cc035f2..0c376016f287 100644
--- a/gcc/ChangeLog.libs
+++ b/gcc/ChangeLog.libs
@@ -1,6 +1,214 @@
+ Branch work178-libs, patch #503 
+
+Do not add -mvsx when testing the float128 support.
+
+Currently, we add -mvsx when building the float128 support in libgcc.  This
+allows us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is
+no glibc support for float128 available.
+
+However, adding -mvsx and building the libgcc float128 support causes problems
+if you set the default cpu to something like a 7540, which does not have VSX
+support.  The assembler complains that when the code does a ".machine 7450", 
you
+cannot use VSX instructions.
+
+After patching libgcc to not build the float128 support unless the host can
+support float128 normally, this patch changes the GCC tests so that it will 
only
+do the IEEE 128-bit tests if the default compiler enables the VSX instruction
+set by default.  Otherwise all of the float128 tests will fail because the
+libgcc support is not available.
+
+In addition to not doing the float128 tests when the compiler does not natively
+support float128, this patch also removes adding -mvsx, -mfloat128, and
+-mfloat128-hardware enable the support if the compiler did not natively enable
+it.
+
+I built little endian compilers and there were no regressions.
+
+I built big endian compilers with the --with-cpu=power5 configure option, and I
+verified that none of the float128 support functions are built.
+
+I also built big endian compilers on a power9 with the --with-cpu=native
+configure option, and I verified that the float128 support functions were
+built, since the default compiler used the VSX instruction set.
+
+I verified that on both sets of big endian builds, that all of the float128
+tests were skipped, since there is no support for float128 in glibc and the GCC
+compiler does not enable float128 on those systems.
+
+Can I check these patches into the trunk assuming the original bugzilla author
+says they fix the problem?
+
+2024-09-13 Michael Meissner  
+
+gcc/testsuite/
+
+   PR target/115800
+   PR target/113652
+   * gcc.target/powerpc/abs128-1.c: Remove adding -mvsx, -mfloat128, and
+   -mfloat128-hardware options to float128 test.  Add explicit checks for
+   the float128 support, rather than just using VSX as a stand in, or
+   assuming we can silently enable VSX if the default is power4.  For
+   pr99708.c, also use the correct spelling to disable the float128 tests.
+   * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: Likewise.
+   * gcc.target/powerpc/copysign128-1.c: Likewise.
+   * gcc.target/powerpc/divkc3-1.c: Likewise.
+   * gcc.target/powerpc/float128-3.c: Likewise.
+   * gcc.target/powerpc/float128-5.c: Likewise.
+   * gcc.target/powerpc/float128-complex-2.: Likewise.
+   * gcc.target/powerpc/float128-math.: Likewise.
+   * gcc.target/powerpc/inf128-1.: Likewise.
+   * gcc.target/powerpc/mulkc3-1.c: Likewise.
+   * gcc.target/powerpc/nan128-1.c: Likewise.
+   * gcc.target/powerpc/p9-lxvx-stxvx-3.: Likewise.
+   * gcc.target/powerpc/pr104253.: Likewise.
+   * gcc.target/powerpc/pr70669.c: Likewise.
+   * gcc.target/powerpc/pr79004.c: Likewise.
+   * gcc.target/powerpc/pr79038-1.c: Likewise.
+   * gcc.target/powerpc/pr81959.c: Likewise.
+   * gcc.target/powerpc/pr85657-1.: Likewise.
+   * gcc.target/powerpc/pr85657-2.c: Likewise.
+   * gcc.target/powerpc/pr99708.: Likewise.
+   * gcc.target/powerpc/signbit-1.c: Likewise.
+   * gcc.target/powerpc/signbit-2.c: Likewise.
+   * lib/target-supports.exp (check_ppc_float128_sw_available): Likewise.
+   (check_ppc_float128_hw_available): Likewise.
+   (check_effective_target_ppc_ieee128_ok): Likewise.
+   (add_options_for___float128): Likewise.
+   (check_effective_target___float128): Likewise.
+   (check_effective_target_base_quadfloat_support): Likewise.
+   (check_effective_target_powerpc_float128_sw_ok): Likewise.
+   (check_effective_target_powerpc_float128_hw_ok): Likewise.
+
+ Branch work178-libs, patch #502 
+
+Do not add -mvsx when building libgcc float128 support.
+
+Currently, we add -mvsx when building the float128 support in libgcc.  This
+allows us to build the float128 support on a big endian system where the
+default cpu is power4.  While the libgcc support can be built, given there is
+no glib

[gcc(refs/users/meissner/heads/work178-vpair)] Add support for vector pair unary and binary operations.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:bf64847bff6f4a9c935a5fa2ee9e6f7861b73267

commit bf64847bff6f4a9c935a5fa2ee9e6f7861b73267
Author: Michael Meissner 
Date:   Fri Sep 13 04:03:32 2024 -0400

Add support for vector pair unary and binary operations.

2024-09-13  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_*): Add new
built-in functions for vector pair support.
* config/rs6000/rs6000-protos.h (enum vpair_split_unary): New
enumeration.
(vpair_split_unary): New declaration.
(vpair_split_binary): Likewise.
* config/rs6000/rs6000.cc (print_operand): Add 'S' output modifier.
(vpair_split_unary): New function to split vector pair operations.
(vpair_split_binary): Likewise.
* config/rs6000/rs6000.md (toplevel): Include vector-pair.md.
* config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md.
* config/rs6000/vector-pair.md: New file.
* doc/extend.texi (PowerPC Vector Pair Built-in Functions): Add
documentation for the new vector pair built-in functions.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-1.c: New test.
* gcc.target/powerpc/vector-pair-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def|  62 +
 gcc/config/rs6000/rs6000-protos.h|  12 ++
 gcc/config/rs6000/rs6000.cc  | 113 
 gcc/config/rs6000/rs6000.md  |   1 +
 gcc/config/rs6000/t-rs6000   |   1 +
 gcc/config/rs6000/vector-pair.md | 164 +++
 gcc/doc/extend.texi  |  51 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-1.c |  87 
 gcc/testsuite/gcc.target/powerpc/vector-pair-2.c |  86 
 9 files changed, 577 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05dbcff..cf22389542d8 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3933,3 +3933,65 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
+
+;; Vector pair built-in functions with float elements
+  v256 __builtin_vpair_f32_abs (v256);
+VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_add (v256, v256);
+VPAIR_F32_ADD vpair_add_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_div (v256, v256);
+VPAIR_F32_DIV vpair_div_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_max (v256, v256);
+VPAIR_F32_MAX vpair_smax_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_min (v256, v256);
+VPAIR_F32_MIN vpair_smin_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_mul (v256, v256);
+VPAIR_F32_MUL vpair_mul_v8sf3 {mma}
+
+  v256 __builtin_vpair_f32_nabs (v256);
+VPAIR_F32_NABS vpair_nabs_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_neg (v256);
+VPAIR_F32_NEG vpair_neg_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_sqrt (v256);
+VPAIR_F32_SQRT vpair_sqrt_v8sf2 {mma}
+
+  v256 __builtin_vpair_f32_sub (v256, v256);
+VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
+
+;; Vector pair built-in functions with double elements
+  v256 __builtin_vpair_f64_abs (v256);
+VPAIR_F64_ABS vpair_abs_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_add (v256, v256);
+VPAIR_F64_ADD vpair_add_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_div (v256, v256);
+VPAIR_F64_DIV vpair_div_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_max (v256, v256);
+VPAIR_F64_MAX vpair_smax_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_min (v256, v256);
+VPAIR_F64_MIN vpair_smin_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_mul (v256, v256);
+VPAIR_F64_MUL vpair_mul_v4df3 {mma}
+
+  v256 __builtin_vpair_f64_nabs (v256);
+VPAIR_F64_NABS vpair_nabs_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_neg (v256);
+VPAIR_F64_NEG vpair_neg_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_sqrt (v256);
+VPAIR_F64_SQRT vpair_sqrt_v4df2 {mma}
+
+  v256 __builtin_vpair_f64_sub (v256, v256);
+VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index da658cd5ab2e..7b8b3b0c2377 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -161,6 +161,18 @@ extern bool rs6000_pcrel_p (void);
 extern bool rs6000_fndecl_pcrel_p (const_tree);
 extern void rs6000_output_addr_vec_elt (FILE *, int);
 
+/* If we are splitting a vector pair unary operator into two separate vector
+   operations, we need to generate a NEG if this is NABS.  */
+
+enum vpair_split_unary {
+  VPAIR_SPLIT_NORMAL,  /* No extra processing is needed.  */
+  VPAIR_SPLIT_NEGATE   /* Wrap operation with a NEG.  */
+};
+
+extern void vpair_split_unary (rtx [], machine_mode, enum rtx_code,
+  enum vpair_split_unary);
+extern void vpair_split_binary

[gcc(refs/users/meissner/heads/work178-vpair)] Add vector pair init and splat.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:91602f8ce833a0925123104461b7151695c5f082

commit 91602f8ce833a0925123104461b7151695c5f082
Author: Michael Meissner 
Date:   Fri Sep 13 04:06:58 2024 -0400

Add vector pair init and splat.

2024-09-13  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
built-in function.
(__builtin_vpair_f32_splat): Likewise.
(__builtin_vpair_f64_splat): Likewise.
* config/rs6000/vector-pair.h: Update power10 splat patterns.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
(UNSPEC_VPAIR_SPLAT): Likewise.
(VPAIR_SPLAT_VMODE): New mode iterator.
(VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
(vpair_splat_name): Likewise.
(vpair_zero): New insn.
(vpair_splat_): New define_expand.
(vpair_splat__internal): New insns.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-5.c: New test.
* gcc.target/powerpc/vector-pair-6.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def|  10 +++
 gcc/config/rs6000/vector-pair.md | 102 ++-
 gcc/doc/extend.texi  |   9 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-5.c |  54 
 gcc/testsuite/gcc.target/powerpc/vector-pair-6.c |  56 +
 5 files changed, 230 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 2bac0e58971d..e0b1c744f7c6 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3934,6 +3934,10 @@
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
 STXVP nothing {mma,pair}
 
+;; Vector pair built-in functions.
+  v256 __builtin_vpair_zero ();
+VPAIR_ZERO vpair_zero {mma}
+
 ;; Vector pair built-in functions with float elements
   v256 __builtin_vpair_f32_abs (v256);
 VPAIR_F32_ABS vpair_abs_v8sf2 {mma}
@@ -3974,6 +3978,9 @@
   v256 __builtin_vpair_f32_nfms (v256, v256, v256);
 VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
 
+  v256 __builtin_vpair_f32_splat (float);
+VPAIR_F32_SPLAT vpair_splat_v8sf {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
 VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -4017,5 +4024,8 @@
   v256 __builtin_vpair_f64_nfms (v256, v256, v256);
 VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
 
+  v256 __builtin_vpair_f64_splat (double);
+VPAIR_F64_SPLAT vpair_splat_v4df {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
 VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index fe8004b75d54..6fbc90cf528a 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -39,7 +39,9 @@
UNSPEC_VPAIR_PLUS
UNSPEC_VPAIR_SMAX
UNSPEC_VPAIR_SMIN
-   UNSPEC_VPAIR_SQRT])
+   UNSPEC_VPAIR_SPLAT
+   UNSPEC_VPAIR_SQRT
+   UNSPEC_VPAIR_ZERO])
 
 ;; Vector pair element ID that defines the scaler element within the vector 
pair.
 (define_c_enum "vpair_element"
@@ -102,6 +104,104 @@
 ;; Map the scalar element ID into the appropriate insn type for divide.
 (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT  "vecfdiv")
(VPAIR_ELEMENT_DOUBLE "vecdiv")])
+
+;; Mode iterator for the vector modes that we provide splat operations for.
+(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF])
+
+;; Map element mode to 128-bit vector mode for splat operations
+(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF")
+   (DF "V2DF")])
+
+;; Map either element mode or vector mode into the name for the splat insn.
+(define_mode_attr vpair_splat_name [(SF   "v8sf")
+   (DF   "v4df")
+   (V4SF "v8sf")
+   (V2DF "v4df")])
+
+;; Initialize a vector pair to 0
+(define_insn_and_split "vpair_zero"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+   (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (match_dup 3))
+   (set (match_dup 2) (match_dup 3))]
+{
+  rtx op0 = operands[0];
+
+  operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0);
+  operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16);
+  operands[3] = CONST0_RTX (V2DFmode);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecperm")])
+
+;; Create a vector pair with a value splat'ed (duplicated) to all of the
+;; elements.
+(define_expand "vpair_splat_"
+  [(use (match_operand:OO 0 "vsx_register_operand"))
+   (use (match_operand:SFDF 1 "input_operand"))]
+  "TARGET_MMA"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  machine_mode element_mode = mode;
+
+  if (op1 == CONST0_RTX (element_mode))
+{
+  emit_insn (g

[gcc(refs/users/meissner/heads/work178-vpair)] Add vector pair optimizations.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:c77cec96f85a9947746af7998494a5ea8fec8a7a

commit c77cec96f85a9947746af7998494a5ea8fec8a7a
Author: Michael Meissner 
Date:   Fri Sep 13 04:09:36 2024 -0400

Add vector pair optimizations.

2024-09-13  Michael Meissner  

gcc/

* config/rs6000/vector-pair.md (vpair_add_neg_3): 
New
combiner insn to convert vector plus/neg into a minus operation.
(vpair_fma__merge): Optimize multiply, 
add/subtract, and
negation into fma operations if the user specifies to create fmas.
(vpair_fma__merge): Likewise.
(vpair_fma__merge2): Likewise.
(vpair_nfma__merge): Likewise.
(vpair_nfms__merge): Likewise.
(vpair_nfms__merge2): Likewise.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-7.c: New test.
* gcc.target/powerpc/vector-pair-8.c: Likewise.
* gcc.target/powerpc/vector-pair-9.c: Likewise.
* gcc.target/powerpc/vector-pair-10.c: Likewise.
* gcc.target/powerpc/vector-pair-11.c: Likewise.
* gcc.target/powerpc/vector-pair-12xs.c: Likewise.

Diff:
---
 gcc/config/rs6000/vector-pair.md  | 224 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-10.c |  61 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-11.c |  65 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-12.c |  65 +++
 gcc/testsuite/gcc.target/powerpc/vector-pair-7.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-8.c  |  18 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-9.c  |  61 ++
 7 files changed, 512 insertions(+)

diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 6fbc90cf528a..01d32e460f6e 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -265,6 +265,31 @@
(set (attr "type") (if_then_else (match_test " == DIV")
(const_string "")
(const_string "")))])
+
+;; Optimize vector pair add of a negative value into a subtract.
+(define_insn_and_split "*vpair_add_neg_3"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+   (unspec:OO
+[(match_operand:OO 1 "vsx_register_operand" "wa")
+ (unspec:OO
+  [(match_operand:OO 2 "vsx_register_operand" "wa")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+VPAIR_FP_BINARY))]
+  "TARGET_MMA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_MINUS))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "")])
 
 ;; Vector pair fused-multiply (FMA) operations.  The last argument in the
 ;; UNSPEC is a CONST_INT which identifies what the scalar element is.
@@ -358,3 +383,202 @@
 }
   [(set_attr "length" "8")
(set_attr "type" "")])
+
+;; Optimize vector pair multiply and vector pair add into vector pair fma,
+;; providing the compiler would do this optimization for scalar and vectors.
+;; Unlike most of the define_insn_and_splits, this can be done before register
+;; allocation.
+(define_insn_and_split "*vpair_fma__merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+   (unspec:OO
+[(unspec:OO
+  [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+   (match_operand:OO 2 "vsx_register_operand" "wa,0")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_PLUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "")])
+
+;; Merge multiply and subtract.
+(define_insn_and_split "*vpair_fma__merge"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+   (unspec:OO
+[(unspec:OO
+  [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+   (match_operand:OO 2 "vsx_register_operand" "wa,0")
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_MULT)
+ (match_operand:OO 3 "vsx_register_operand" "0,wa")
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_MINUS))]
+  "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (unspec:OO
+[(match_dup 1)
+ (match_dup 2)
+ (unspec:OO
+  [(match_dup 3)
+   (const_int VPAIR_FP_ELEMENT)]
+  UNSPEC_VPAIR_NEG)
+ (const_int VPAIR_FP_ELEMENT)]
+UNSPEC_VPAIR_FMA))]
+{
+}
+  [(set_attr "length" "8")
+   (set_attr "type"

[gcc(refs/users/meissner/heads/work178-vpair)] Add support for vector pair fma operations.

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:95f23327c13428eb0267d6b04b08efb0b378b75c

commit 95f23327c13428eb0267d6b04b08efb0b378b75c
Author: Michael Meissner 
Date:   Fri Sep 13 04:05:35 2024 -0400

Add support for vector pair fma operations.

2024-09-13  Michael Meissner  

gcc/

* config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New
built-in.
(__builtin_vpair_f32_fms): Likewise.
(__builtin_vpair_f32_nfma): Likewise.
(__builtin_vpair_f32_nfms): Likewise.
(__builtin_vpair_f64_fma): Likewise.
(__builtin_vpair_f64_fms): Likewise.
(__builtin_vpair_f64_nfma): Likewise.
* config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New
enumeration.
(vpair_split_fma): New declaration.
* config/rs6000/rs6000.cc (vpair_split_fma): New function to split
vector pair FMA operations.
* config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec.
(vpair_stdname): Add UNSPEC_VPAIR_FMA.
(VPAIR_OP): Likewise.
(vpair_fma_4): New insns.
(vpair_fms_4): Likewise.
(vpair_nfma_4): Likewise.
(vpair_nfms_4): Likewise.
* doc/extend.texi (PowerPC Vector Pair Built-in Functions): 
Document new
vector pair fma built-in functions.

gcc/testsuite/

* gcc.target/powerpc/vector-pair-3.c: New test.
* gcc.target/powerpc/vector-pair-4.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def| 24 ++
 gcc/config/rs6000/rs6000-protos.h| 13 
 gcc/config/rs6000/rs6000.cc  | 71 ++
 gcc/config/rs6000/vector-pair.md | 96 
 gcc/doc/extend.texi  | 25 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-3.c | 57 ++
 gcc/testsuite/gcc.target/powerpc/vector-pair-4.c | 57 ++
 7 files changed, 343 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index cf22389542d8..2bac0e58971d 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3944,6 +3944,12 @@
   v256 __builtin_vpair_f32_div (v256, v256);
 VPAIR_F32_DIV vpair_div_v8sf3 {mma}
 
+  v256 __builtin_vpair_f32_fma (v256, v256, v256);
+VPAIR_F32_FMA vpair_fma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_fms (v256, v256, v256);
+VPAIR_F32_FMS vpair_fms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_max (v256, v256);
 VPAIR_F32_MAX vpair_smax_v8sf3 {mma}
 
@@ -3962,6 +3968,12 @@
   v256 __builtin_vpair_f32_sqrt (v256);
 VPAIR_F32_SQRT vpair_sqrt_v8sf2 {mma}
 
+  v256 __builtin_vpair_f32_nfma (v256, v256, v256);
+VPAIR_F32_NFMA vpair_nfma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_nfms (v256, v256, v256);
+VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
 VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -3975,6 +3987,12 @@
   v256 __builtin_vpair_f64_div (v256, v256);
 VPAIR_F64_DIV vpair_div_v4df3 {mma}
 
+  v256 __builtin_vpair_f64_fma (v256, v256, v256);
+VPAIR_F64_FMA vpair_fma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_fms (v256, v256, v256);
+VPAIR_F64_FMS vpair_fms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_max (v256, v256);
 VPAIR_F64_MAX vpair_smax_v4df3 {mma}
 
@@ -3993,5 +4011,11 @@
   v256 __builtin_vpair_f64_sqrt (v256);
 VPAIR_F64_SQRT vpair_sqrt_v4df2 {mma}
 
+  v256 __builtin_vpair_f64_nfma (v256, v256, v256);
+VPAIR_F64_NFMA vpair_nfma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_nfms (v256, v256, v256);
+VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
 VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 7b8b3b0c2377..bab5fb437c27 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -173,6 +173,19 @@ extern void vpair_split_unary (rtx [], machine_mode, enum 
rtx_code,
   enum vpair_split_unary);
 extern void vpair_split_binary (rtx [], machine_mode, enum rtx_code);
 
+/* When we are splitting a vector pair FMA operation into two vector 
operations, we
+   may need to modify the code generated.  This enumeration encodes the
+   different choices.  */
+
+enum vpair_split_fma {
+  VPAIR_SPLIT_FMA, /* Fused multiply-add.  */
+  VPAIR_SPLIT_FMS, /* Fused multiply-subtract.  */
+  VPAIR_SPLIT_NFMA,/* Fused negate multiply-add.  */
+  VPAIR_SPLIT_NFMS /* Fused negate multiply-subtract.  */
+};
+
+extern void vpair_split_fma (rtx [], machine_mode, enum vpair_split_fma);
+
 /* Different PowerPC instruction formats that are used by GCC.  There are
various other instruction formats used by the PowerPC hardware, but these
f

[gcc(refs/users/meissner/heads/work178-vpair)] Update ChangeLog.*

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:932bf24ef1a84caad94b6f6f27fba4f696e5e254

commit 932bf24ef1a84caad94b6f6f27fba4f696e5e254
Author: Michael Meissner 
Date:   Fri Sep 13 04:15:08 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.vpair | 128 +++-
 1 file changed, 127 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair
index 4350f0a50f5f..10842a2a9b86 100644
--- a/gcc/ChangeLog.vpair
+++ b/gcc/ChangeLog.vpair
@@ -1,6 +1,132 @@
+ Branch work178-vpair, patch #403 
+
+Add vector pair optimizations.
+
+2024-09-13  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/vector-pair.md (vpair_add_neg_3): New
+   combiner insn to convert vector plus/neg into a minus operation.
+   (vpair_fma__merge): Optimize multiply, add/subtract, and
+   negation into fma operations if the user specifies to create fmas.
+   (vpair_fma__merge): Likewise.
+   (vpair_fma__merge2): Likewise.
+   (vpair_nfma__merge): Likewise.
+   (vpair_nfms__merge): Likewise.
+   (vpair_nfms__merge2): Likewise.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-7.c: New test.
+   * gcc.target/powerpc/vector-pair-8.c: Likewise.
+   * gcc.target/powerpc/vector-pair-9.c: Likewise.
+   * gcc.target/powerpc/vector-pair-10.c: Likewise.
+   * gcc.target/powerpc/vector-pair-11.c: Likewise.
+   * gcc.target/powerpc/vector-pair-12xs.c: Likewise.
+
+ Branch work178-vpair, patch #402 
+
+Add vector pair init and splat.
+
+2024-09-13  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New
+   built-in function.
+   (__builtin_vpair_f32_splat): Likewise.
+   (__builtin_vpair_f64_splat): Likewise.
+   * config/rs6000/vector-pair.h: Update power10 splat patterns.
+   * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec.
+   (UNSPEC_VPAIR_SPLAT): Likewise.
+   (VPAIR_SPLAT_VMODE): New mode iterator.
+   (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute.
+   (vpair_splat_name): Likewise.
+   (vpair_zero): New insn.
+   (vpair_splat_): New define_expand.
+   (vpair_splat__internal): New insns.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-5.c: New test.
+   * gcc.target/powerpc/vector-pair-6.c: Likewise.
+
+ Branch work178-vpair, patch #401 
+
+Add support for vector pair fma operations.
+
+2024-09-13  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New
+   built-in.
+   (__builtin_vpair_f32_fms): Likewise.
+   (__builtin_vpair_f32_nfma): Likewise.
+   (__builtin_vpair_f32_nfms): Likewise.
+   (__builtin_vpair_f64_fma): Likewise.
+   (__builtin_vpair_f64_fms): Likewise.
+   (__builtin_vpair_f64_nfma): Likewise.
+   * config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New
+   enumeration.
+   (vpair_split_fma): New declaration.
+   * config/rs6000/rs6000.cc (vpair_split_fma): New function to split
+   vector pair FMA operations.
+   * config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec.
+   (vpair_stdname): Add UNSPEC_VPAIR_FMA.
+   (VPAIR_OP): Likewise.
+   (vpair_fma_4): New insns.
+   (vpair_fms_4): Likewise.
+   (vpair_nfma_4): Likewise.
+   (vpair_nfms_4): Likewise.
+   * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Document new
+   vector pair fma built-in functions.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-3.c: New test.
+   * gcc.target/powerpc/vector-pair-4.c: Likewise.
+
+ Branch work178-vpair, patch #400 
+
+Add support for vector pair unary and binary operations.
+
+2024-09-13  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/rs6000-builtins.def (__builtin_vpair_*): Add new
+   built-in functions for vector pair support.
+   * config/rs6000/rs6000-protos.h (enum vpair_split_unary): New
+   enumeration.
+   (vpair_split_unary): New declaration.
+   (vpair_split_binary): Likewise.
+   * config/rs6000/rs6000.cc (print_operand): Add 'S' output modifier.
+   (vpair_split_unary): New function to split vector pair operations.
+   (vpair_split_binary): Likewise.
+   * config/rs6000/rs6000.md (toplevel): Include vector-pair.md.
+   * config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md.
+   * config/rs6000/vector-pair.md: New file.
+   * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Add
+   documentation for the new vector pair built-in functions.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/vector-pair-1.c: New test.
+   * gcc.target/powerpc/vector-pair-2.c: Likewise.
+
  Branch work178-vpair, baseline 
 
+Add ChangeLog.vpair and update REVISION.

[gcc(refs/users/meissner/heads/work178-vpair)] Initial vector-pair.h support

2024-09-13 Thread Michael Meissner via Gcc-cvs

https://gcc.gnu.org/g:87003006531164e1c59b6e5c4751ce38e396

commit 87003006531164e1c59b6e5c4751ce38e396
Author: Michael Meissner 
Date:   Fri Sep 13 04:12:28 2024 -0400

Initial vector-pair.h support

2024-09-13  Michael Meissner  

gcc/

* config.gcc (powerpc*-*-*): Add vector-pair.h to extra headers.
* config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): On 
systems
with vector pair support, define __VPAIR__.
* config/rs6000/vector-pair.h: New file.

Diff:
---
 gcc/config.gcc  |   2 +-
 gcc/config/rs6000/rs6000-c.cc   |   8 +-
 gcc/config/rs6000/vector-pair.h | 612 
 3 files changed, 619 insertions(+), 3 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 0b794e977f6a..3627bed8b863 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -537,7 +537,7 @@ powerpc*-*-*)
extra_headers="${extra_headers} pmmintrin.h tmmintrin.h smmintrin.h"
extra_headers="${extra_headers} nmmintrin.h immintrin.h x86gprintrin.h"
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
-   extra_headers="${extra_headers} amo.h"
+   extra_headers="${extra_headers} amo.h vector-pair.h"
case x$with_cpu in

xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower1[01]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500|xfuture)
cpu_is_64bit=yes
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 82826f96a8e7..2d674f9b2369 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -590,9 +590,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
   if (rs6000_cpu == PROCESSOR_CELL)
 rs6000_define_or_undefine_macro (define_p, "__PPU__");
 
-  /* Tell the user if we support the MMA instructions.  */
+  /* Tell the user if we support the MMA instructions.  Also say that we
+ support the vector pair built-in functions.  */
   if ((flags & OPTION_MASK_MMA) != 0)
-rs6000_define_or_undefine_macro (define_p, "__MMA__");
+{
+  rs6000_define_or_undefine_macro (define_p, "__MMA__");
+  rs6000_define_or_undefine_macro (define_p, "__VPAIR__");
+}
   /* Whether pc-relative code is being generated.  */
   if ((flags & OPTION_MASK_PCREL) != 0)
 rs6000_define_or_undefine_macro (define_p, "__PCREL__");
diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
new file mode 100644
index ..ebfaaa1e8a0c
--- /dev/null
+++ b/gcc/config/rs6000/vector-pair.h
@@ -0,0 +1,612 @@
+/* PowerPC vector pair include file.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (al...@redhat.com).
+   Rewritten by Paolo Bonzini (bonz...@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* Provide support for vector pairs, even on systems that do not have native
+   support for loading and storing pairs of vectors.  */
+
+#ifndef _VECTOR_PAIR_H
+#define _VECTOR_PAIR_H 1
+
+/* During testing, allow vector-pair.h to be included multiple times.  */
+#undef  vector_pair_t
+#undef  vector_pair_f64_t
+#undef  vector_pair_f32_t
+
+#undef  vpair_f64_abs
+#undef  vpair_f64_add
+#undef  vpair_f64_div
+#undef  vpair_f64_fma
+#undef  vpair_f64_fms
+#undef  vpair_f64_max
+#undef  vpair_f64_min
+#undef  vpair_f64_mul
+#undef  vpair_f64_nabs
+#undef  vpair_f64_neg
+#undef  vpair_f64_nfma
+#undef  vpair_f64_nfms
+#undef  vpair_f64_splat
+#undef  vpair_f64_sqrt
+#undef  vpair_f64_sub
+
+#undef  vpair_f32_abs
+#undef  vpair_f32_add
+#undef  vpair_f32_div
+#undef  vpair_f32_fma
+#undef  vpair_f32_fms
+#undef  vpair_f32_max
+#undef  vpair_f32_min
+#undef  vpair_f32_mul
+#undef  vpair_f32_nabs
+#undef  vpair_f32_neg
+#undef  vpair_f32_nfma
+#undef  vpair_f32_nfms
+#undef  vpair_f32_splat
+#undef  vpair_f32_sqrt
+#undef  vpair_f32_sub
+
+#if !__VPAIR_BUILTIN__ && !__VPAIR_ASM__ && !__VPAIR_NOP10__
+#if __MMA__ && __VPAIR__
+#define __VPAIR_BUILTIN__  1
+
+#elif __MMA__
+#

[gcc r15-3620] Match: Remove unnecessary types_match for case 1 of signed SAT_ADD

2024-09-13 Thread Pan Li via Gcc-cvs

https://gcc.gnu.org/g:45e7cc9caf327bfddd75b3093eb855b8b64acae8

commit r15-3620-g45e7cc9caf327bfddd75b3093eb855b8b64acae8
Author: Pan Li 
Date:   Fri Sep 13 11:36:40 2024 +0800

Match: Remove unnecessary types_match for case 1 of signed SAT_ADD

Given all commutative binary operators requires types matching
for both operands.  Remove the types_match check for case 1 of
the signed SAT_ADD, because we have (bit_xor @0 @1), which ensure
the operands have the correct TREE type.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Remove the types_match check for signed SAT_ADD
case 1.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 4cef965c9c7a..5566c0e4c41c 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3204,8 +3204,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
integer_zerop)
(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
@2)
- (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
-  && types_match (type, @0, @1
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type
 
 /* Signed saturation add, case 2:
T sum = (T)((UT)X + (UT)Y)

[gcc r15-3621] AVR: avr.cc - Reorder functions to require less forward decls.

2024-09-13 Thread Georg-Johann Lay via Gcc-cvs

https://gcc.gnu.org/g:be59aaf13cea06a6dd01736d2c31d1c3bc2a60ee

commit r15-3621-gbe59aaf13cea06a6dd01736d2c31d1c3bc2a60ee
Author: Georg-Johann Lay 
Date:   Tue Sep 10 18:28:36 2024 +0200

AVR: avr.cc - Reorder functions to require less forward decls.

gcc/
* config/avr/avr.cc (avr_init_machine_status): Move code to...
(avr_option_override) : ...lambda.
(avr_insn_has_reg_unused_note_p): Move up.
(_reg_unused_after, reg_unused_after): Move up.
(output_reload_in_const): Move up.
(avr_c_mode_for_floating_type): Move down.

Diff:
---
 gcc/config/avr/avr.cc | 891 +-
 1 file changed, 439 insertions(+), 452 deletions(-)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 1f809d8e1e3b..f743261c6adf 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -153,16 +153,6 @@ static const char *out_movqi_mr_r (rtx_insn *, rtx[], int 
*);
 static const char *out_movhi_mr_r (rtx_insn *, rtx[], int *);
 static const char *out_movsi_mr_r (rtx_insn *, rtx[], int *);
 
-static int get_sequence_length (rtx_insn *insns);
-static int sequent_regs_live (void);
-static const char *ptrreg_to_str (int);
-static int avr_num_arg_regs (machine_mode, const_tree);
-static int avr_operand_rtx_cost (rtx, machine_mode, enum rtx_code,
-int, bool);
-static void output_reload_in_const (rtx *, rtx, int *, bool);
-static struct machine_function *avr_init_machine_status (void);
-static bool _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn);
-
 
 /* Prototypes for hook implementors if needed before their implementation.  */
 
@@ -456,7 +446,10 @@ avr_option_override (void)
   avr_addr.sp_l = 0x3D + avr_arch->sfr_offset;
   avr_addr.sp_h = avr_addr.sp_l + 1;
 
-  init_machine_status = avr_init_machine_status;
+  init_machine_status = []()
+  {
+return ggc_cleared_alloc ();
+  };
 
   avr_log_set_avr_log();
 
@@ -473,14 +466,6 @@ avr_option_override (void)
   }
 }
 
-/* Function to set up the backend function structure.  */
-
-static struct machine_function *
-avr_init_machine_status (void)
-{
-  return ggc_cleared_alloc ();
-}
-
 
 /* Implement `INIT_EXPANDERS'.  */
 /* The function works like a singleton.  */
@@ -1179,7 +1164,7 @@ sequent_regs_live (void)
 
 /* Obtain the length sequence of insns.  */
 
-int
+static int
 get_sequence_length (rtx_insn *insns)
 {
   int length = 0;
@@ -2933,6 +2918,7 @@ avr_init_cumulative_args (CUMULATIVE_ARGS *cum, tree 
fntype, rtx libname,
   cfun->machine->sibcall_fails = 0;
 }
 
+
 /* Returns the number of registers to allocate for a function argument.  */
 
 static int
@@ -3099,6 +3085,152 @@ avr_xload_libgcc_p (machine_mode mode)
 }
 
 
+/* Return true when INSN has a REG_UNUSED note for hard reg REG.
+   rtlanal.cc::find_reg_note() uses == to compare XEXP (link, 0)
+   therefore use a custom function.  */
+
+static bool
+avr_insn_has_reg_unused_note_p (rtx_insn *insn, rtx reg)
+{
+  for (rtx link = REG_NOTES (insn); link; link = XEXP (link, 1))
+if (REG_NOTE_KIND (link) == REG_UNUSED
+   && REG_P (XEXP (link, 0))
+   && REGNO (reg) >= REGNO (XEXP (link, 0))
+   && END_REGNO (reg) <= END_REGNO (XEXP (link, 0)))
+  return true;
+
+  return false;
+}
+
+
+/* A helper for the next function.
+   Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+
+static bool
+_reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn)
+{
+  if (look_at_insn)
+{
+  /* If the reg is set by this instruction, then it is safe for our
+case.  Disregard the case where this is a store to memory, since
+we are checking a register used in the store address.  */
+  rtx set = single_set (insn);
+  if (set && !MEM_P (SET_DEST (set))
+ && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+   return 1;
+
+  /* This case occurs when fuse-add introduced a POST_INC addressing,
+but the address register is unused after.  */
+  if (set)
+   {
+ rtx mem = MEM_P (SET_SRC (set)) ? SET_SRC (set) : SET_DEST (set);
+ if (MEM_P (mem)
+ && reg_overlap_mentioned_p (reg, XEXP (mem, 0))
+ && avr_insn_has_reg_unused_note_p (insn, reg))
+   return 1;
+   }
+}
+
+  while ((insn = NEXT_INSN (insn)))
+{
+  rtx set;
+  enum rtx_code code = GET_CODE (insn);
+
+#if 0
+  /* If this is a label that existed before reload, then the register
+if dead here.  However, if this is a label added by reorg, then
+the register may still be live here.  We can't tell the difference,
+so we just ignore labels completely.  */
+  if (code == CODE_LABEL)
+   return 1;
+  /* else */
+#endif
+
+  if (!INSN_P (insn))
+   continue;
+
+  if (code == JUMP_INSN)
+   r

[gcc r15-3622] AVR: Tweak 32-bit EQ and NE comparisons.

2024-09-13 Thread Georg-Johann Lay via Gcc-cvs

https://gcc.gnu.org/g:1ec16778312a902592822cbda626241da68ea643

commit r15-3622-g1ec16778312a902592822cbda626241da68ea643
Author: Georg-Johann Lay 
Date:   Tue Sep 10 17:51:58 2024 +0200

AVR: Tweak 32-bit EQ and NE comparisons.

The order in which multi-byte EQ and NE comparisons are performing
the byte comparisons does not matter, and there are situations where
using SBIW on the high word can save an instruction.

gcc/
* config/avr/avr.cc (avr_out_compare): Tweak 32-bit EQ and NE
comparisons that can use SBIW for the hi16 part.

Diff:
---
 gcc/config/avr/avr.cc | 25 +
 1 file changed, 25 insertions(+)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index f743261c6adf..25220c3bc0c7 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -5990,6 +5990,31 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
}
 }
 
+  /* Comparisons == and != may change the order in which the sub-bytes are
+ being compared.  Start with the high 16 bits so we can use SBIW.  */
+
+  if (n_bytes == 4
+  && compare_eq_p (insn)
+  && AVR_HAVE_ADIW
+  && REGNO (xreg) >= REG_22)
+{
+  if (xval == const0_rtx)
+   return avr_asm_len ("sbiw %C0,0"   CR_TAB
+   "cpc %B0,__zero_reg__" CR_TAB
+   "cpc %A0,__zero_reg__", xop, plen, 3);
+
+  rtx xhi16 = simplify_gen_subreg (HImode, xval, mode, 2);
+  if (IN_RANGE (UINTVAL (xhi16) & GET_MODE_MASK (HImode), 0, 63)
+ && reg_unused_after (insn, xreg))
+   {
+ xop[1] = xhi16;
+ avr_asm_len ("sbiw %C0,%1", xop, plen, 1);
+ xop[1] = xval;
+ return avr_asm_len ("sbci %B0,hi8(%1)" CR_TAB
+ "sbci %A0,lo8(%1)", xop, plen, 2);
+   }
+}
+
   for (int i = 0; i < n_bytes; i++)
 {
   /* We compare byte-wise.  */

[gcc r15-3623] AVR: Rework avr_out_compare.

2024-09-13 Thread Georg-Johann Lay via Gcc-cvs

https://gcc.gnu.org/g:494d3c3faaee0dbde696ea334f8e242ae85ae2b5

commit r15-3623-g494d3c3faaee0dbde696ea334f8e242ae85ae2b5
Author: Georg-Johann Lay 
Date:   Thu Sep 12 14:24:53 2024 +0200

AVR: Rework avr_out_compare.

16-bit comparisons like R25:24 == -1 are currently performed like
cpi R24, -1
cpc R25, R24
Similar is possible for wider modes.  ADIW can be used like SBIW when
the compare code is EQ or NE because such comparisons are just about
(propagating) the Z flag.  The patch adds helper functions like avr_byte()
that may be useful in other functions than avr_out_compare().
Use new convenient helper functions that may be useful in
other output functions, too.

For example, with the patch

R24:SI == -1 (unused after)
adiw r26,1
sbci r25,hi8(-1)
sbci r24,lo8(-1)

R18:SI == -1
cpi r18,-1
cpc r19,r18
cpc r20,r18
cpc r21,r18

Without the patch, we had:

R24:SI == -1 (unused after)
cpi r24,-1
sbci r25,-1
sbci r26,-1
sbci r27,-1

R18:SI == -1
cpi r18,-1
ldi r24,-1
cpc r19,r24
cpc r20,r24
cpc r21,r24

gcc/
* config/avr/avr.cc (avr_chunk, avr_byte, avr_word)
(avr_int8, avr_uint8, avr_int16): New helper functions.
(avr_out_compare): Overhaul.

Diff:
---
 gcc/config/avr/avr.cc | 175 ++
 1 file changed, 118 insertions(+), 57 deletions(-)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 25220c3bc0c7..b26716551fcf 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -247,6 +247,68 @@ avr_tolower (char *lo, const char *up)
 }
 
 
+/* Return chunk of mode MODE of X as an rtx.  N specifies the subreg
+   byte at which the chunk starts.  N must be an integral multiple
+   of the mode size.  */
+
+static rtx
+avr_chunk (machine_mode mode, rtx x, int n)
+{
+  gcc_assert (n % GET_MODE_SIZE (mode) == 0);
+  machine_mode xmode = GET_MODE (x) == VOIDmode ? DImode : GET_MODE (x);
+  return simplify_gen_subreg (mode, x, xmode, n);
+}
+
+
+/* Return the N-th byte of X as an rtx.  */
+
+static rtx
+avr_byte (rtx x, int n)
+{
+  return avr_chunk (QImode, x, n);
+}
+
+
+/* Return the sub-word of X starting at byte number N.  */
+
+static rtx
+avr_word (rtx x, int n)
+{
+  return avr_chunk (HImode, x, n);
+}
+
+
+/* Return the N-th byte of compile-time constant X as an int8_t.  */
+
+static int8_t
+avr_int8 (rtx x, int n)
+{
+  gcc_assert (CONST_INT_P (x) || CONST_FIXED_P (x) || CONST_DOUBLE_P (x));
+
+  return (int8_t) trunc_int_for_mode (INTVAL (avr_byte (x, n)), QImode);
+}
+
+/* Return the N-th byte of compile-time constant X as an uint8_t.  */
+
+static uint8_t
+avr_uint8 (rtx x, int n)
+{
+  return (uint8_t) avr_int8 (x, n);
+}
+
+
+/* Return the sub-word of compile-time constant X that starts
+   at byte N as an int16_t.  */
+
+static int16_t
+avr_int16 (rtx x, int n)
+{
+  gcc_assert (CONST_INT_P (x) || CONST_FIXED_P (x) || CONST_DOUBLE_P (x));
+
+  return (int16_t) trunc_int_for_mode (INTVAL (avr_word (x, n)), HImode);
+}
+
+
 /* Constraint helper function.  XVAL is a CONST_INT or a CONST_DOUBLE.
Return true if the least significant N_BYTES bytes of XVAL all have a
popcount in POP_MASK and false, otherwise.  POP_MASK represents a subset
@@ -5917,9 +5979,6 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
   xval = avr_to_int_mode (xop[1]);
 }
 
-  /* MODE of the comparison.  */
-  machine_mode mode = GET_MODE (xreg);
-
   gcc_assert (REG_P (xreg));
   gcc_assert ((CONST_INT_P (xval) && n_bytes <= 4)
  || (const_double_operand (xval, VOIDmode) && n_bytes == 8));
@@ -5927,13 +5986,15 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
   if (plen)
 *plen = 0;
 
+  const bool eqne_p = compare_eq_p (insn);
+
   /* Comparisons == +/-1 and != +/-1 can be done similar to camparing
  against 0 by ORing the bytes.  This is one instruction shorter.
  Notice that 64-bit comparisons are always against reg:ALL8 18 (ACC_A)
  and therefore don't use this.  */
 
-  if (!test_hard_reg_class (LD_REGS, xreg)
-  && compare_eq_p (insn)
+  if (eqne_p
+  && ! test_hard_reg_class (LD_REGS, xreg)
   && reg_unused_after (insn, xreg))
 {
   if (xval == const1_rtx)
@@ -5962,39 +6023,11 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
}
 }
 
-  /* Comparisons == -1 and != -1 of a d-register that's used after the
- comparison.  (If it's unused after we use CPI / SBCI or ADIW sequence
- from below.)  Instead of  CPI Rlo,-1 / LDI Rx,-1 / CPC Rhi,Rx  we can
- use  CPI Rlo,-1 / CPC Rhi,Rlo  which is 1 instruction shorter:
- If CPI is true then Rlo contains -1 and we can use Rlo instead of Rx
- when CPC'ing the high part.  If CPI is false then CPC cannot render
- the result to true.  This also wor

[gcc r15-3624] libstdc++: Do not use use memmove for 1-element ranges [PR108846, PR116471]

2024-09-13 Thread Jonathan Wakely via Libstdc++-cvs

https://gcc.gnu.org/g:5938e0681c3907b2771ce6717988416b0ddd2f54

commit r15-3624-g5938e0681c3907b2771ce6717988416b0ddd2f54
Author: Giuseppe D'Angelo 
Date:   Fri Aug 23 15:05:54 2024 +0200

libstdc++: Do not use use memmove for 1-element ranges [PR108846,PR116471]

This commit ports the fixes already applied by r13-6372-g822a11a1e642e0
to the range-based versions of copy/move algorithms.

When doing so, a further bug (PR116471) was discovered in the
implementation of the range-based algorithms: although the algorithms
are already constrained by the indirectly_copyable/movable concepts,
there was a failing static_assert in the memmove path.

This static_assert checked that iterator's value type was assignable by
using the is_copy_assignable (move) type traits. However, this is a
problem, because the traits are too strict when checking for constness;
a type like

  struct S { S& operator=(S &) = default; };

is trivially copyable (and thus could benefit of the memmove path),
but it does not satisfy is_copy_assignable because the operator takes
by non-const reference.

Now, the reason for the check to be there is because a type with
a deleted assignment operator like

  struct E { E& operator=(const E&) = delete; };

is still trivially copyable, but not assignable. We don't want
algorithms like std::ranges::copy to compile because they end up
selecting the memmove path, "ignoring" the fact that E isn't even
copy assignable.

But the static_assert isn't needed here any longer: as noted before,
the ranges algorithms already have the appropriate constraints; and
even if they didn't, there's now a non-discarded codepath to deal with
ranges of length 1 where there is an explicit assignment operation.

Therefore, this commit removes it. (In fact, r13-6372-g822a11a1e642e0
removed the same static_assert from the non-ranges algorithms.)

libstdc++-v3/ChangeLog:

PR libstdc++/108846
PR libstdc++/116471
* include/bits/ranges_algobase.h (__assign_one): New helper
function.
(__copy_or_move): Remove a spurious static_assert; use
__assign_one for memcpyable ranges of length 1.
(__copy_or_move_backward): Likewise.
* testsuite/25_algorithms/copy/108846.cc: Extend to range-based
algorithms, and cover both memcpyable and non-memcpyable
cases.
* testsuite/25_algorithms/copy_backward/108846.cc: Likewise.
* testsuite/25_algorithms/copy_n/108846.cc: Likewise.
* testsuite/25_algorithms/move/108846.cc: Likewise.
* testsuite/25_algorithms/move_backward/108846.cc: Likewise.

Signed-off-by: Giuseppe D'Angelo 

Diff:
---
 libstdc++-v3/include/bits/ranges_algobase.h| 46 ++
 .../testsuite/25_algorithms/copy/108846.cc | 38 ++
 .../25_algorithms/copy_backward/108846.cc  | 38 ++
 .../testsuite/25_algorithms/copy_n/108846.cc   | 38 +-
 .../testsuite/25_algorithms/move/108846.cc | 40 ++-
 .../25_algorithms/move_backward/108846.cc  | 38 +-
 6 files changed, 209 insertions(+), 29 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_algobase.h 
b/libstdc++-v3/include/bits/ranges_algobase.h
index fd35b8ba14cb..9b45cbc5ef41 100644
--- a/libstdc++-v3/include/bits/ranges_algobase.h
+++ b/libstdc++-v3/include/bits/ranges_algobase.h
@@ -225,6 +225,16 @@ namespace ranges
  copy_backward_result<_Iter, _Out>>
 __copy_or_move_backward(_Iter __first, _Sent __last, _Out __result);
 
+  template
+constexpr void
+__assign_one(_Iter& __iter, _Out& __result)
+{
+  if constexpr (_IsMove)
+ *__result = std::move(*__iter);
+  else
+ *__result = *__iter;
+}
+
   template _Sent,
   weakly_incrementable _Out>
@@ -279,23 +289,19 @@ namespace ranges
  if constexpr (__memcpyable<_Iter, _Out>::__value)
{
  using _ValueTypeI = iter_value_t<_Iter>;
- static_assert(_IsMove
- ? is_move_assignable_v<_ValueTypeI>
- : is_copy_assignable_v<_ValueTypeI>);
  auto __num = __last - __first;
- if (__num)
+ if (__num > 1) [[likely]]
__builtin_memmove(__result, __first,
-   sizeof(_ValueTypeI) * __num);
+ sizeof(_ValueTypeI) * __num);
+ else if (__num == 1)
+   ranges::__assign_one<_IsMove>(__first, __result);
  return {__first + __num, __result + __num};
}
}
 
  for (auto __n = __last - __first; __n > 0; --__n)
{

[gcc r15-3625] s390: Fix AQ and AR constraints

2024-09-13 Thread Stefan Schulze Frielinghaus via Gcc-cvs

https://gcc.gnu.org/g:1a71ff3b89aadc7fa0af0bca269d74bb23c1a957

commit r15-3625-g1a71ff3b89aadc7fa0af0bca269d74bb23c1a957
Author: Stefan Schulze Frielinghaus 
Date:   Fri Sep 13 15:05:33 2024 +0200

s390: Fix AQ and AR constraints

Ensure for AQ and AR constraints that the resulting displacement after
adding any positive offset less than the size of the object being
referenced is still valid.

gcc/ChangeLog:

* config/s390/s390.cc (s390_mem_constraint): Check displacement
for AQ and AR constraints.

Diff:
---
 gcc/config/s390/s390.cc | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index c1649ca49bd1..926987113dd4 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -3689,6 +3689,18 @@ s390_mem_constraint (const char *str, rtx op)
   if ((reload_completed || reload_in_progress)
  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
return 0;
+  /* offsettable_memref_p ensures only that any positive offset added to
+the address forms a valid general address.  For AQ and AR constraints
+we also have to verify that the resulting displacement after adding
+any positive offset less than the size of the object being referenced
+is still valid.  */
+  if (str[1] == 'Q' || str[1] == 'R')
+   {
+ int o = GET_MODE_SIZE (GET_MODE (op)) - 1;
+ rtx tmp = adjust_address (op, QImode, o);
+ if (!s390_check_qrst_address (str[1], XEXP (tmp, 0), true))
+   return 0;
+   }
   return s390_check_qrst_address (str[1], XEXP (op, 0), true);
 case 'B':
   /* Check for non-literal-pool variants of memory constraints.  */

[gcc r15-3626] s390: Fix TF to FPRX2 conversion [PR115860]

2024-09-13 Thread Stefan Schulze Frielinghaus via Gcc-cvs

https://gcc.gnu.org/g:46c2538435dfc50dd5c67c4e03ce387d1f6ebe9b

commit r15-3626-g46c2538435dfc50dd5c67c4e03ce387d1f6ebe9b
Author: Stefan Schulze Frielinghaus 
Date:   Fri Sep 13 15:09:55 2024 +0200

s390: Fix TF to FPRX2 conversion [PR115860]

Currently subregs originating from *tf_to_fprx2_0 and *tf_to_fprx2_1
survive register allocation.  This in turn leads to wrong register
renaming.  Keeping the current approach would mean we need two insns for
*tf_to_fprx2_0 and *tf_to_fprx2_1, respectively.  Something along the
lines

(define_insn "*tf_to_fprx2_0"
  [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "=f") 0)
(unspec:DF [(match_operand:TF 1 "general_operand" "v")]
   UNSPEC_TF_TO_FPRX2_0))]
  "TARGET_VXE"
  "#")

(define_insn "*tf_to_fprx2_0"
  [(set (match_operand:DF 0 "nonimmediate_operand" "=f")
(unspec:DF [(match_operand:TF 1 "general_operand" "v")]
   UNSPEC_TF_TO_FPRX2_0))]
  "TARGET_VXE"
  "vpdi\t%v0,%v1,%v0,1
  [(set_attr "op_type" "VRR")])

and similar for *tf_to_fprx2_1.  Note, pre register allocation operand 0
has mode FPRX2 and afterwards DF once subregs have been eliminated.

Since we always copy a whole vector register into a floating-point
register pair, another way to fix this is to merge *tf_to_fprx2_0 and
*tf_to_fprx2_1 into a single insn which means we don't have to use
subregs at all.  The downside of this is that the assembler template
contains two instructions, now.  The upside is that we don't have to
come up with some artificial insn before RA which might be more
readable/maintainable.  That is implemented by this patch.

In commit r11-4872-ge627cda5686592, the output operand specifier %V was
introduced which is used in tf_to_fprx2 only, now.  Instead of coming up
with its counterpart %F for floating-point registers, which would also
only be used in tf_to_fprx2, I print the operands directly.  This
renders %V unused which is why it is removed by this patch.

gcc/ChangeLog:

PR target/115860
* config/s390/s390.cc (print_operand): Remove operand specifier
%V.
* config/s390/s390.md (UNSPEC_TF_TO_FPRX2): New.
* config/s390/vector.md (*tf_to_fprx2_0): Remove.
(*tf_to_fprx2_1): Remove.
(tf_to_fprx2): New.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/long-double-asm-abi.c: Adapt
scan-assembler directive.
* gcc.target/s390/vector/long-double-to-i64.c: Adapt
scan-assembler directive.
* gcc.target/s390/pr115860-1.c: New test.

Diff:
---
 gcc/config/s390/s390.cc|  5 +-
 gcc/config/s390/s390.md|  2 +
 gcc/config/s390/vector.md  | 75 --
 gcc/testsuite/gcc.target/s390/pr115860-1.c | 26 
 .../gcc.target/s390/vector/long-double-asm-abi.c   |  2 +-
 .../gcc.target/s390/vector/long-double-to-i64.c|  2 -
 6 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 926987113dd4..c9172d1153ac 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -8600,7 +8600,6 @@ print_operand_address (FILE *file, rtx addr)
 't': CONST_INT: "start" of contiguous bitmask X in SImode.
 'x': print integer X as if it's an unsigned halfword.
 'v': print register number as vector register (v1 instead of f1).
-'V': print the second word of a TFmode operand as vector register.
 */
 
 void
@@ -8854,13 +8853,13 @@ print_operand (FILE *file, rtx x, int code)
 case REG:
   /* Print FP regs as fx instead of vx when they are accessed
 through non-vector mode.  */
-  if ((code == 'v' || code == 'V')
+  if (code == 'v'
  || VECTOR_NOFP_REG_P (x)
  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
  || (VECTOR_REG_P (x)
  && (GET_MODE_SIZE (GET_MODE (x)) /
  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
-   fprintf (file, "%%v%s", reg_names[REGNO (x) + (code == 'V')] + 2);
+   fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
   else
fprintf (file, "%s", reg_names[REGNO (x)]);
   break;
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 592cf62d9623..4a225ae24f33 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -241,6 +241,8 @@
UNSPEC_VEC_VFMIN
UNSPEC_VEC_VFMAX
 
+   UNSPEC_TF_TO_FPRX2
+
UNSPEC_NNPA_VCLFNHS_V8HI
UNSPEC_NNPA_VCLFNLS_V8HI
UNSPEC_NNPA_VCRNFS_V8HI
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index a75b7cb58257..e6f83d07de27 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -907,36 +907,45 @@
   "vmrlg\t%0,%1,%2";
   [

[gcc r13-9021] testsuite: Fix up pr116034.c test for big/pdp endian [PR116061]

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:e5a9c15266ba70b3a4cbc0f8e6bc8537c9b1c12d

commit r13-9021-ge5a9c15266ba70b3a4cbc0f8e6bc8537c9b1c12d
Author: Jakub Jelinek 
Date:   Wed Jul 24 18:00:05 2024 +0200

testsuite: Fix up pr116034.c test for big/pdp endian [PR116061]

Didn't notice the memmove is into an int variable, so the test
was still failing on big endian.

2024-07-24  Jakub Jelinek  

PR tree-optimization/116034
PR testsuite/116061
* gcc.dg/pr116034.c (g): Change type from int to unsigned short.
(foo): Guard memmove call on __SIZEOF_SHORT__ == 2.

(cherry picked from commit 69e69847e21a8d951ab5f09fd3421449564dba31)

Diff:
---
 gcc/testsuite/gcc.dg/pr116034.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr116034.c b/gcc/testsuite/gcc.dg/pr116034.c
index 9a31de034246..955b4c9e86b8 100644
--- a/gcc/testsuite/gcc.dg/pr116034.c
+++ b/gcc/testsuite/gcc.dg/pr116034.c
@@ -2,12 +2,13 @@
 /* { dg-do run } */
 /* { dg-options "-O1 -fno-strict-aliasing" } */
 
-int g;
+unsigned short int g;
 
 static inline int
 foo (_Complex unsigned short c)
 {
-  __builtin_memmove (&g, 1 + (char *) &c, 2);
+  if (__SIZEOF_SHORT__ == 2)
+__builtin_memmove (&g, 1 + (char *) &c, 2);
   return g;
 }

[gcc r13-9022] i386: Fix up __builtin_ia32_b{extr{, i}_u{32, 64}, zhi_{s, d}i} folding [PR116287]

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:e5839cad7886c0277c111d96cc99c400f6f36b9d

commit r13-9022-ge5839cad7886c0277c111d96cc99c400f6f36b9d
Author: Jakub Jelinek 
Date:   Fri Aug 9 14:32:51 2024 +0200

i386: Fix up __builtin_ia32_b{extr{,i}_u{32,64},zhi_{s,d}i} folding 
[PR116287]

The GENERIC folding of these builtins have cases where it folds to a
constant regardless of the value of the first operand.  If so, we need
to use omit_one_operand to avoid throwing away side-effects in the first
operand if any.  The cases which verify the first argument is INTEGER_CST
don't need that, INTEGER_CST doesn't have side-effects.

2024-08-09  Jakub Jelinek  

PR target/116287
* config/i386/i386.cc (ix86_fold_builtin) :
When folding into zero without checking whether first argument is
constant, use omit_one_operand.
(ix86_fold_builtin) : Likewise.

* gcc.target/i386/bmi-pr116287.c: New test.
* gcc.target/i386/bmi2-pr116287.c: New test.
* gcc.target/i386/tbm-pr116287.c: New test.

(cherry picked from commit 6e7088dbe3bf87108a89558ffb7df36df3469206)

Diff:
---
 gcc/config/i386/i386.cc   | 12 +++
 gcc/testsuite/gcc.target/i386/bmi-pr116287.c  | 28 ++
 gcc/testsuite/gcc.target/i386/bmi2-pr116287.c | 24 ++
 gcc/testsuite/gcc.target/i386/tbm-pr116287.c  | 29 +++
 4 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index a90351ca9c2c..85aa68175aa3 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18054,9 +18054,11 @@ ix86_fold_builtin (tree fndecl, int n_args,
  unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
  unsigned int start = tree_to_uhwi (args[1]);
  unsigned int len = (start & 0xff00) >> 8;
+ tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
  start &= 0xff;
  if (start >= prec || len == 0)
-   res = 0;
+   return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
+args[0]);
  else if (!tree_fits_uhwi_p (args[0]))
break;
  else
@@ -18065,7 +18067,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
len = prec;
  if (len < HOST_BITS_PER_WIDE_INT)
res &= (HOST_WIDE_INT_1U << len) - 1;
- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
+ return build_int_cstu (lhs_type, res);
}
  break;
 
@@ -18075,15 +18077,17 @@ ix86_fold_builtin (tree fndecl, int n_args,
  if (tree_fits_uhwi_p (args[1]))
{
  unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
+ tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
  if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
return args[0];
  if (idx == 0)
-   return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
+   return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
+args[0]);
  if (!tree_fits_uhwi_p (args[0]))
break;
  unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
  res &= ~(HOST_WIDE_INT_M1U << idx);
- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
+ return build_int_cstu (lhs_type, res);
}
  break;
 
diff --git a/gcc/testsuite/gcc.target/i386/bmi-pr116287.c 
b/gcc/testsuite/gcc.target/i386/bmi-pr116287.c
new file mode 100644
index ..2212cb458d26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi-pr116287.c
@@ -0,0 +1,28 @@
+/* PR target/116287 */
+/* { dg-do run { target bmi } } */
+/* { dg-options "-O2 -mbmi" } */
+
+#include 
+
+#include "bmi-check.h"
+
+static void
+bmi_test ()
+{
+  unsigned int a = 0;
+  if (__builtin_ia32_bextr_u32 (a++, 0) != 0)
+abort ();
+  if (__builtin_ia32_bextr_u32 (a++, 0x120) != 0)
+abort ();
+  if (a != 2)
+abort ();
+#ifdef __x86_64__
+  unsigned long long b = 0;
+  if (__builtin_ia32_bextr_u64 (b++, 0) != 0)
+abort ();
+  if (__builtin_ia32_bextr_u64 (b++, 0x140) != 0)
+abort ();
+  if (b != 2)
+abort ();
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pr116287.c 
b/gcc/testsuite/gcc.target/i386/bmi2-pr116287.c
new file mode 100644
index ..51c939c39f62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pr116287.c
@@ -0,0 +1,24 @@
+/* PR target/116287 */
+/* { dg-do run { target bmi2 } } */
+/* { dg-options "-O2 -mbmi2" } */
+
+#include 
+
+#include "bmi2-check.h"
+
+static void
+bmi2_test ()
+{
+  unsigned int a = 0;
+  if (__builtin_ia32_bzhi_si (a++, 0) != 0)
+abort ();
+  if (a != 1)
+abort ();
+#ifdef __x86_64__
+  unsigned long lon

[gcc r13-9023] libiberty: Fix up > 64K section handling in simple_object_elf_copy_lto_debug_section [PR116614]

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:9b4a7d907d90ba7b7787433ab66eaf6112c33ffb

commit r13-9023-g9b4a7d907d90ba7b7787433ab66eaf6112c33ffb
Author: Jakub Jelinek 
Date:   Sat Sep 7 09:36:53 2024 +0200

libiberty: Fix up > 64K section handling in 
simple_object_elf_copy_lto_debug_section [PR116614]

cat abc.C
  #define A(n) struct T##n {} t##n;
  #define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) 
A(n##7) A(n##8) A(n##9)
  #define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) 
B(n##7) B(n##8) B(n##9)
  #define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) 
C(n##7) C(n##8) C(n##9)
  #define E(n) D(n##0) D(n##1) D(n##2) D(n##3) D(n##4) D(n##5) D(n##6) 
D(n##7) D(n##8) D(n##9)
  E(1) E(2) E(3)
  int main () { return 0; }
./xg++ -B ./ -o abc{.o,.C} -flto -flto-partition=1to1 -O2 -g 
-fdebug-types-section -c
./xgcc -B ./ -o abc{,.o} -flto -flto-partition=1to1 -O2
(not included in testsuite as it takes a while to compile) FAILs with
lto-wrapper: fatal error: Too many copied sections: Operation not supported
compilation terminated.
/usr/bin/ld: error: lto-wrapper failed
collect2: error: ld returned 1 exit status

The following patch fixes that.  Most of the 64K+ section support for
reading and writing was already there years ago (and especially reading used
quite often already) and a further bug fixed in it in the PR104617 fix.

Yet, the fix isn't solely about removing the
  if (new_i - 1 >= SHN_LORESERVE)
{
  *err = ENOTSUP;
  return "Too many copied sections";
}
5 lines, the missing part was that the function only handled reading of
the .symtab_shndx section but not copying/updating of it.
If the result has less than 64K-epsilon sections, that actually wasn't
needed, but e.g. with -fdebug-types-section one can exceed that pretty
easily (reported to us on WebKitGtk build on ppc64le).
Updating the section is slightly more complicated, because it basically
needs to be done in lock step with updating the .symtab section, if one
doesn't need to use SHN_XINDEX in there, the section should (or should be
updated to) contain SHN_UNDEF entry, otherwise needs to have whatever would
be overwise stored but couldn't fit.  But repeating due to that all the
symtab decisions what to discard and how to rewrite it would be ugly.

So, the patch instead emits the .symtab_shndx section (or sections) last
and prepares the content during the .symtab processing and in a second
pass when going just through .symtab_shndx sections just uses the saved
content.

2024-09-07  Jakub Jelinek  

PR lto/116614
* simple-object-elf.c (SHN_COMMON): Align comment with neighbouring
comments.
(SHN_HIRESERVE): Use uppercase hex digits instead of lowercase for
consistency.
(simple_object_elf_find_sections): Formatting fixes.
(simple_object_elf_fetch_attributes): Likewise.
(simple_object_elf_attributes_merge): Likewise.
(simple_object_elf_start_write): Likewise.
(simple_object_elf_write_ehdr): Likewise.
(simple_object_elf_write_shdr): Likewise.
(simple_object_elf_write_to_file): Likewise.
(simple_object_elf_copy_lto_debug_section): Likewise.  Don't fail 
for
new_i - 1 >= SHN_LORESERVE, instead arrange in that case to copy
over .symtab_shndx sections, though emit those last and compute 
their
section content when processing associated .symtab sections.  Handle
simple_object_internal_read failure even in the .symtab_shndx 
reading
case.

(cherry picked from commit bb8dd0980b39cfd601f88703fd356055727ef24d)

Diff:
---
 libiberty/simple-object-elf.c | 210 --
 1 file changed, 143 insertions(+), 67 deletions(-)

diff --git a/libiberty/simple-object-elf.c b/libiberty/simple-object-elf.c
index eee07039984d..501b5ba62aac 100644
--- a/libiberty/simple-object-elf.c
+++ b/libiberty/simple-object-elf.c
@@ -128,9 +128,9 @@ typedef struct {
 
 #define SHN_UNDEF  0   /* Undefined section */
 #define SHN_LORESERVE  0xFF00  /* Begin range of reserved indices */
-#define SHN_COMMON 0xFFF2  /* Associated symbol is in common */
+#define SHN_COMMON 0xFFF2  /* Associated symbol is in common */
 #define SHN_XINDEX 0x  /* Section index is held elsewhere */
-#define SHN_HIRESERVE  0x  /* End of reserved indices */
+#define SHN_HIRESERVE  0x  /* End of reserved indices */
 
 
 /* 32-bit ELF program header.  */
@@ -569,8 +569,8 @@ simple_object_elf_find_sections (simple_object_read *sobj,
 void *data,
 int *err)
 {
-  struct simple_object_elf_read *eor =

[gcc r13-9019] gimple-fold: Fix up __builtin_clear_padding lowering [PR115527]

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:1880ff0dbd814cf1e7dd53dd810f372a94d66d39

commit r13-9019-g1880ff0dbd814cf1e7dd53dd810f372a94d66d39
Author: Jakub Jelinek 
Date:   Wed Jul 17 11:38:33 2024 +0200

gimple-fold: Fix up __builtin_clear_padding lowering [PR115527]

The builtin-clear-padding-6.c testcase fails as clear_padding_type
doesn't correctly recompute the buf->size and buf->off members after
expanding clearing of an array using a runtime loop.
buf->size should be in that case the offset after which it should continue
with next members or padding before them modulo UNITS_PER_WORD and
buf->off that offset minus buf->size.  That is what the code was doing,
but with off being the start of the loop cleared array, not its end.
So, the last hunk in gimple-fold.cc fixes that.
When adding the testcase, I've noticed that the
c-c++-common/torture/builtin-clear-padding-* tests, although clearly
written as runtime tests to test the builtins at runtime, didn't have
{ dg-do run } directive and were just compile tests because of that.
When adding that to the tests, builtin-clear-padding-1.c was already
failing without that clear_padding_type hunk too, but
builtin-clear-padding-5.c was still failing even after the change.
That is due to a bug in clear_padding_flush which the patch fixes as
well - when clear_padding_flush is called with full=true (that happens
at the end of the whole __builtin_clear_padding or on those array
padding clears done by a runtime loop), it wants to flush all the pending
padding clearings rather than just some.  If it is at the end of the whole
object, it decreases wordsize when needed to make sure the code never writes
including RMW cycles to something outside of the object:
  if ((unsigned HOST_WIDE_INT) (buf->off + i + wordsize)
  > (unsigned HOST_WIDE_INT) buf->sz)
{
  gcc_assert (wordsize > 1);
  wordsize /= 2;
  i -= wordsize;
  continue;
}
but if it is full==true flush in the middle, this doesn't happen, but we
still process just the buffer bytes before the current end.  If that end
is not on a wordsize boundary, e.g. on the builtin-clear-padding-5.c test
the last chunk is 2 bytes, '\0', '\xff', i is 16 and end is 18,
nonzero_last might be equal to the end - i, i.e. 2 here, but still all_ones
might be true, so in some spots we just didn't emit any clearing in that
last chunk.

2024-07-17  Jakub Jelinek  

PR middle-end/115527
* gimple-fold.cc (clear_padding_flush): Introduce endsize
variable and use it instead of wordsize when comparing it against
nonzero_last.
(clear_padding_type): Increment off by sz.

* c-c++-common/torture/builtin-clear-padding-1.c: Add dg-do run
directive.
* c-c++-common/torture/builtin-clear-padding-2.c: Likewise.
* c-c++-common/torture/builtin-clear-padding-3.c: Likewise.
* c-c++-common/torture/builtin-clear-padding-4.c: Likewise.
* c-c++-common/torture/builtin-clear-padding-5.c: Likewise.
* c-c++-common/torture/builtin-clear-padding-6.c: New test.

(cherry picked from commit 8b5919bae11754f4b65a17e63663d3143f9615ac)

Diff:
---
 gcc/gimple-fold.cc | 12 ++
 .../c-c++-common/torture/builtin-clear-padding-1.c |  1 +
 .../c-c++-common/torture/builtin-clear-padding-2.c |  1 +
 .../c-c++-common/torture/builtin-clear-padding-3.c |  1 +
 .../c-c++-common/torture/builtin-clear-padding-4.c |  4 ++--
 .../c-c++-common/torture/builtin-clear-padding-5.c |  1 +
 .../c-c++-common/torture/builtin-clear-padding-6.c | 28 ++
 7 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index a61bfcee4e7a..5bdd1d08a265 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -4235,7 +4235,8 @@ clear_padding_flush (clear_padding_struct *buf, bool full)
  i -= wordsize;
  continue;
}
-  for (size_t j = i; j < i + wordsize && j < end; j++)
+  size_t endsize = end - i > wordsize ? wordsize : end - i;
+  for (size_t j = i; j < i + endsize; j++)
{
  if (buf->buf[j])
{
@@ -4264,12 +4265,12 @@ clear_padding_flush (clear_padding_struct *buf, bool 
full)
   if (padding_bytes)
{
  if (nonzero_first == 0
- && nonzero_last == wordsize
+ && nonzero_last == endsize
  && all_ones)
{
  /* All bits are padding and we had some padding
 before too.  Just extend it.  */
- padding_bytes += wordsize;
+ padding_bytes += endsize;
  continue;
}
  if (all_ones && nonzero_first == 0)
@@ -4309,7 +4310,7 @@ clear_padding_flush (clea

[gcc r13-9020] ssa: Fix up maybe_rewrite_mem_ref_base complex type handling [PR116034]

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:aaa82d63fed5978a0bc7136a3922d280576ce257

commit r13-9020-gaaa82d63fed5978a0bc7136a3922d280576ce257
Author: Jakub Jelinek 
Date:   Tue Jul 23 10:50:29 2024 +0200

ssa: Fix up maybe_rewrite_mem_ref_base complex type handling [PR116034]

The folding into REALPART_EXPR is correct, used only when the mem_offset
is zero, but for IMAGPART_EXPR it didn't check the exact offset value (just
that it is not 0).
The following patch fixes that by using IMAGPART_EXPR only if the offset
is right and using BITFIELD_REF or whatever else otherwise.

2024-07-23  Jakub Jelinek  
Andrew Pinski  

PR tree-optimization/116034
* tree-ssa.cc (maybe_rewrite_mem_ref_base): Only use IMAGPART_EXPR
if MEM_REF offset is equal to element type size.

* gcc.dg/pr116034.c: New test.

(cherry picked from commit b9cefd67a2a464a3c9413e6b3f28e7dc7a9ef162)

Diff:
---
 gcc/testsuite/gcc.dg/pr116034.c | 22 ++
 gcc/tree-ssa.cc |  5 -
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr116034.c b/gcc/testsuite/gcc.dg/pr116034.c
new file mode 100644
index ..9a31de034246
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116034.c
@@ -0,0 +1,22 @@
+/* PR tree-optimization/116034 */
+/* { dg-do run } */
+/* { dg-options "-O1 -fno-strict-aliasing" } */
+
+int g;
+
+static inline int
+foo (_Complex unsigned short c)
+{
+  __builtin_memmove (&g, 1 + (char *) &c, 2);
+  return g;
+}
+
+int
+main ()
+{
+  if (__SIZEOF_SHORT__ == 2
+  && __CHAR_BIT__ == 8
+  && (foo (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ ? 0x100 : 1)
+ != (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ ? 1 : 0x100)))
+__builtin_abort ();
+}
diff --git a/gcc/tree-ssa.cc b/gcc/tree-ssa.cc
index b0b3895af1c9..74ede9cc9cf8 100644
--- a/gcc/tree-ssa.cc
+++ b/gcc/tree-ssa.cc
@@ -1529,7 +1529,10 @@ maybe_rewrite_mem_ref_base (tree *tp, bitmap 
suitable_for_renaming)
}
   else if (TREE_CODE (TREE_TYPE (sym)) == COMPLEX_TYPE
   && useless_type_conversion_p (TREE_TYPE (*tp),
-TREE_TYPE (TREE_TYPE (sym
+TREE_TYPE (TREE_TYPE (sym)))
+  && (integer_zerop (TREE_OPERAND (*tp, 1))
+  || tree_int_cst_equal (TREE_OPERAND (*tp, 1),
+ TYPE_SIZE_UNIT (TREE_TYPE (*tp)
{
  *tp = build1 (integer_zerop (TREE_OPERAND (*tp, 1))
? REALPART_EXPR : IMAGPART_EXPR,

[gcc r13-9024] c++: Fix get_member_function_from_ptrfunc with -fsanitize=bounds [PR116449]

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:973c6ea242cea7d95c2888ec6dde39b5cbb9dbb3

commit r13-9024-g973c6ea242cea7d95c2888ec6dde39b5cbb9dbb3
Author: Jakub Jelinek 
Date:   Tue Sep 10 18:32:58 2024 +0200

c++: Fix get_member_function_from_ptrfunc with -fsanitize=bounds [PR116449]

The following testcase is miscompiled, because
get_member_function_from_ptrfunc
emits something like
(((FUNCTION.__pfn & 1) != 0)
 ? ptr + FUNCTION.__delta + FUNCTION.__pfn - 1
 : FUNCTION.__pfn) (ptr + FUNCTION.__delta, ...)
or so, so FUNCTION tree is used there 5 times.  There is
if (TREE_SIDE_EFFECTS (function)) function = save_expr (function);
but in this case function doesn't have side-effects, just nested ARRAY_REFs.
Now, if all the FUNCTION trees would be shared, it would work fine,
FUNCTION is evaluated in the first operand of COND_EXPR; but unfortunately
that isn't the case, both the BIT_AND_EXPR shortening and conversion to
bool done for build_conditional_expr actually unshare_expr that first
expression, but none of the other 4 are unshared.  With -fsanitize=bounds,
.UBSAN_BOUNDS calls are added to the ARRAY_REFs and use save_expr to avoid
evaluating the argument multiple times, but because that FUNCTION tree is
first used in the second argument of COND_EXPR (i.e. conditionally), the
SAVE_EXPR initialization is done just there and then the third argument
of COND_EXPR just uses the uninitialized temporary and so does the first
argument computation as well.

The following patch fixes that by doing save_expr even if 
!TREE_SIDE_EFFECTS,
but to avoid doing that too often only if !nonvirtual and if the expression
isn't a simple decl.

2024-09-10  Jakub Jelinek  

PR c++/116449
* typeck.cc (get_member_function_from_ptrfunc): Use save_expr
on instance_ptr and function even if it doesn't have side-effects,
as long as it isn't a decl.

* g++.dg/ubsan/pr116449.C: New test.

(cherry picked from commit 0008050b9d6046ba4e811a03b406fb5d98707cae)

Diff:
---
 gcc/cp/typeck.cc  | 19 ---
 gcc/testsuite/g++.dg/ubsan/pr116449.C | 14 ++
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 470bb2ee5f71..2b29bdc4d70a 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -4178,10 +4178,23 @@ get_member_function_from_ptrfunc (tree 
*instance_ptrptr, tree function,
   if (!nonvirtual && is_dummy_object (instance_ptr))
nonvirtual = true;
 
-  if (TREE_SIDE_EFFECTS (instance_ptr))
-   instance_ptr = instance_save_expr = save_expr (instance_ptr);
+  /* Use save_expr even when instance_ptr doesn't have side-effects,
+unless it is a simple decl (save_expr won't do anything on
+constants), so that we don't ubsan instrument the expression
+multiple times.  See PR116449.  */
+  if (TREE_SIDE_EFFECTS (instance_ptr)
+ || (!nonvirtual && !DECL_P (instance_ptr)))
+   {
+ instance_save_expr = save_expr (instance_ptr);
+ if (instance_save_expr == instance_ptr)
+   instance_save_expr = NULL_TREE;
+ else
+   instance_ptr = instance_save_expr;
+   }
 
-  if (TREE_SIDE_EFFECTS (function))
+  /* See above comment.  */
+  if (TREE_SIDE_EFFECTS (function)
+ || (!nonvirtual && !DECL_P (function)))
function = save_expr (function);
 
   /* Start by extracting all the information from the PMF itself.  */
diff --git a/gcc/testsuite/g++.dg/ubsan/pr116449.C 
b/gcc/testsuite/g++.dg/ubsan/pr116449.C
new file mode 100644
index ..f13368a51b00
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ubsan/pr116449.C
@@ -0,0 +1,14 @@
+// PR c++/116449
+// { dg-do compile }
+// { dg-options "-O2 -Wall -fsanitize=undefined" }
+
+struct C { void foo (int); void bar (); int c[16]; };
+typedef void (C::*P) ();
+struct D { P d; };
+static D e[1] = { { &C::bar } };
+
+void
+C::foo (int x)
+{
+  (this->*e[c[x]].d) ();
+}

[gcc r15-3627] libcpp: Fix up UB in finish_embed

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:4963eb76918295a08a7c216bea986ab8e65c1cf8

commit r15-3627-g4963eb76918295a08a7c216bea986ab8e65c1cf8
Author: Jakub Jelinek 
Date:   Fri Sep 13 16:11:05 2024 +0200

libcpp: Fix up UB in finish_embed

Jonathan reported on IRC that certain unnamed proprietary static analyzer
is unhappy about the new finish_embed function and it is actually right.
On a testcase like:
 #embed __FILE__ limit (0) if_empty (0)
params->if_empty.count is 1, limit is 0, so count is 0 (we need just
a single token and one fits into pfile->directive_result).  Because
count is 0, we don't allocate toks, so it stays NULL, and then in
1301  if (prefix->count)
1302{
1303  *tok = *prefix->base_run.base;
1304  tok = toks;
1305  tokenrun *cur_run = &prefix->base_run;
1306  while (cur_run)
1307{
1308  size_t cnt = (cur_run->next ? cur_run->limit
1309: prefix->cur_token) - cur_run->base;
1310  cpp_token *t = cur_run->base;
1311  if (cur_run == &prefix->base_run)
1312{
1313  t++;
1314  cnt--;
1315}
1316  memcpy (tok, t, cnt * sizeof (cpp_token));
1317  tok += cnt;
1318  cur_run = cur_run->next;
1319}
1320}
the *tok = *prefix->base_run.base; assignment will copy the only
token.  cur_run is still non-NULL, cnt will be initially 1 and
then decremented to 0, but we invoke UB because we do
memcpy (NULL, cur_run->base + 1, 0 * sizeof (cpp_token));
and then the loop stops because cur_run->next must be NULL.

As we don't really copy anything, toks can be anything non-NULL,
so the following patch fixes that by initializing toks also to
&pfile->directive_result (just something known to be non-NULL).
This should be harmless even for the
 #embed __FILE__ limit (1)
case (no non-empty prefix/suffix) where toks isn't allocated
either, but in that case prefix->count will be 0 and in the
1321  for (size_t i = 0; i < limit; ++i)
1322{
1323  tok->src_loc = params->loc;
1324  tok->type = CPP_NUMBER;
1325  tok->flags = NO_EXPAND;
1326  if (i == 0)
1327tok->flags |= PREV_WHITE;
1328  tok->val.str.text = s;
1329  tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
1330  s += tok->val.str.len + 1;
1331  if (tok == &pfile->directive_result)
1332tok = toks;
1333  else
1334tok++;
1335  if (i < limit - 1)
1336{
1337  tok->src_loc = params->loc;
1338  tok->type = CPP_COMMA;
1339  tok->flags = NO_EXPAND;
1340  tok++;
1341}
1342}
loop limit will be 1, so tok is initially &pfile->directive_result,
that is stilled in, then tok = toks; (previously setting tok to NULL,
now to &pfile->directive_result again) and because 0 < 1 - 1 is
false, nothing further will happen and the loop will finish (and as
params->suffix.count will be 0, nothing further will use tok).

2024-09-13  Jakub Jelinek  

* files.cc (finish_embed): Initialize toks to tok rather
than NULL.

Diff:
---
 libcpp/files.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcpp/files.cc b/libcpp/files.cc
index 8aff0149cbee..031169978e72 100644
--- a/libcpp/files.cc
+++ b/libcpp/files.cc
@@ -1284,7 +1284,7 @@ finish_embed (cpp_reader *pfile, _cpp_file *file,
 }
   uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
   _cpp_buff *tok_buff = NULL;
-  cpp_token *toks = NULL, *tok = &pfile->directive_result;
+  cpp_token *tok = &pfile->directive_result, *toks = tok;
   size_t count = 0;
   if (limit)
 count = (params->prefix.count + limit * 2 - 1

[gcc r15-3628] c++: Don't emit deprecated/unavailable attribute diagnostics when creating cdtor thunks [PR116678]

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:b7b67732e20217196f2a13a10fc3df4605b2b2ab

commit r15-3628-gb7b67732e20217196f2a13a10fc3df4605b2b2ab
Author: Jakub Jelinek 
Date:   Fri Sep 13 16:13:01 2024 +0200

c++: Don't emit deprecated/unavailable attribute diagnostics when creating 
cdtor thunks [PR116678]

Another spot where we mark_used a function (in this case ctor or dtor)
even when it is just artificially used inside of thunks (emitted on mingw
with -Os for the testcase).

2024-09-13  Jakub Jelinek  

PR c++/116678
* optimize.cc: Include decl.h.
(maybe_thunk_body): Temporarily change deprecated_state to
UNAVAILABLE_DEPRECATED_SUPPRESS.

* g++.dg/warn/deprecated-20.C: New test.

Diff:
---
 gcc/cp/optimize.cc|  6 ++
 gcc/testsuite/g++.dg/warn/deprecated-20.C | 16 
 2 files changed, 22 insertions(+)

diff --git a/gcc/cp/optimize.cc b/gcc/cp/optimize.cc
index b8791d8a9635..8429d856728f 100644
--- a/gcc/cp/optimize.cc
+++ b/gcc/cp/optimize.cc
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "target.h"
 #include "cp-tree.h"
+#include "decl.h"
 #include "stringpool.h"
 #include "cgraph.h"
 #include "debug.h"
@@ -287,6 +288,11 @@ maybe_thunk_body (tree fn, bool force)
   if (ctor_omit_inherited_parms (fns[0]))
 return 0;
 
+  /* Don't diagnose deprecated or unavailable cdtors just because they
+ have thunks emitted for them.  */
+  auto du = make_temp_override (deprecated_state,
+   UNAVAILABLE_DEPRECATED_SUPPRESS);
+
   DECL_ABSTRACT_P (fn) = false;
   if (!DECL_WEAK (fn))
 {
diff --git a/gcc/testsuite/g++.dg/warn/deprecated-20.C 
b/gcc/testsuite/g++.dg/warn/deprecated-20.C
new file mode 100644
index ..1911aeff4e37
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/deprecated-20.C
@@ -0,0 +1,16 @@
+// PR c++/116678
+// { dg-do compile }
+// { dg-options "-Os -pedantic" }
+
+struct S
+{
+  [[deprecated]] S () { s = 1; }   // { dg-bogus "'S::S\\\(\\\)' is 
deprecated" }
+  S (int x) { s = x; } // { dg-warning "C\\\+\\\+11 attributes 
only available with" "" { target c++98_only } .-1 }
+  ~S () {}
+  int s;
+};
+
+int
+main ()
+{
+}

[gcc r15-3629] gcn/mkoffload.cc: Use #embed for including the generated ELF file

2024-09-13 Thread Tobias Burnus via Gcc-cvs

https://gcc.gnu.org/g:508ef585243d4674d06b0737bfe8769fc18f824f

commit r15-3629-g508ef585243d4674d06b0737bfe8769fc18f824f
Author: Tobias Burnus 
Date:   Fri Sep 13 16:18:46 2024 +0200

gcn/mkoffload.cc: Use #embed for including the generated ELF file

gcc/ChangeLog:

* config/gcn/mkoffload.cc (read_file): Remove.
(process_asm): Do not add '#include' to generated C file.
(process_obj): Generate C file that uses #embed and use
__SIZE_TYPE__ and __UINTPTR_TYPE__ instead the #include-defined
size_t and uintptr.
(main): Update call to it; remove no longer needed file I/O.

Diff:
---
 gcc/config/gcn/mkoffload.cc | 79 +++--
 1 file changed, 12 insertions(+), 67 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 345bbf7709c4..1f6337719e9d 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -182,44 +182,6 @@ xputenv (const char *string)
   putenv (CONST_CAST (char *, string));
 }
 
-/* Read the whole input file.  It will be NUL terminated (but
-   remember, there could be a NUL in the file itself.  */
-
-static const char *
-read_file (FILE *stream, size_t *plen)
-{
-  size_t alloc = 16384;
-  size_t base = 0;
-  char *buffer;
-
-  if (!fseek (stream, 0, SEEK_END))
-{
-  /* Get the file size.  */
-  long s = ftell (stream);
-  if (s >= 0)
-   alloc = s + 100;
-  fseek (stream, 0, SEEK_SET);
-}
-  buffer = XNEWVEC (char, alloc);
-
-  for (;;)
-{
-  size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
-
-  if (!n)
-   break;
-  base += n;
-  if (base + 1 == alloc)
-   {
- alloc *= 2;
- buffer = XRESIZEVEC (char, buffer, alloc);
-   }
-}
-  buffer[base] = 0;
-  *plen = base;
-  return buffer;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'.  */
 
@@ -651,10 +613,6 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
   struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
   struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *);
 
-  fprintf (cfile, "#include \n");
-  fprintf (cfile, "#include \n");
-  fprintf (cfile, "#include \n\n");
-
   fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
   fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", 
ind_fn_count);
 
@@ -719,35 +677,28 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
 /* Embed an object file into a C source file.  */
 
 static void
-process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
+process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires)
 {
-  size_t len = 0;
-  const char *input = read_file (in, &len);
-
   /* Dump out an array containing the binary.
- FIXME: do this with objcopy.  */
-  fprintf (cfile, "static unsigned char gcn_code[] = {");
-  for (size_t i = 0; i < len; i += 17)
-{
-  fprintf (cfile, "\n\t");
-  for (size_t j = i; j < i + 17 && j < len; j++)
-   fprintf (cfile, "%3u,", (unsigned char) input[j]);
-}
-  fprintf (cfile, "\n};\n\n");
+ If the file is empty, a parse error is shown as the argument to is_empty
+ is an undeclared identifier.  */
+  fprintf (cfile,
+  "static unsigned char gcn_code[] = {\n"
+  "#embed \"%s\" if_empty (error_file_is_empty)\n"
+  "};\n\n", fname_in);
 
   fprintf (cfile,
   "static const struct gcn_image {\n"
-  "  size_t size;\n"
+  "  __SIZE_TYPE__ size;\n"
   "  void *image;\n"
   "} gcn_image = {\n"
-  "  %zu,\n"
+  "  sizeof(gcn_code),\n"
   "  gcn_code\n"
-  "};\n\n",
-  len);
+  "};\n\n");
 
   fprintf (cfile,
   "static const struct gcn_data {\n"
-  "  uintptr_t omp_requires_mask;\n"
+  "  __UINTPTR_TYPE__ omp_requires_mask;\n"
   "  const struct gcn_image *gcn_image;\n"
   "  unsigned kernel_count;\n"
   "  const struct hsa_kernel_description *kernel_infos;\n"
@@ -1305,13 +1256,7 @@ main (int argc, char **argv)
   fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, 
".ld_args");
   obstack_free (&ld_argv_obstack, NULL);
 
-  in = fopen (gcn_o_name, "r");
-  if (!in)
-   fatal_error (input_location, "cannot open intermediate gcn obj file");
-
-  process_obj (in, cfile, omp_requires);
-
-  fclose (in);
+  process_obj (gcn_o_name, cfile, omp_requires);
 
   xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
   xputenv (concat ("COMPILER_PATH=", cpath, NULL));

[gcc(refs/vendors/redhat/heads/gcc-13-branch)] Merge commit 'r13-9024-g973c6ea242cea7d95c2888ec6dde39b5cbb9dbb3' into redhat/gcc-13-branch

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:2fcca36eb1d34066437939cf1b54810a75dd553f

commit 2fcca36eb1d34066437939cf1b54810a75dd553f
Merge: 03b1a31f9807 973c6ea242ce
Author: Jakub Jelinek 
Date:   Fri Sep 13 16:15:02 2024 +0200

Merge commit 'r13-9024-g973c6ea242cea7d95c2888ec6dde39b5cbb9dbb3' into 
redhat/gcc-13-branch

Diff:

 gcc/ChangeLog  | 627 +
 gcc/DATESTAMP  |   2 +-
 gcc/c-family/ChangeLog |  10 +
 gcc/c-family/c-warn.cc |  13 +-
 gcc/c/ChangeLog|  10 +
 gcc/common/config/aarch64/aarch64-common.cc|  35 +-
 gcc/config/aarch64/aarch64-c.cc|   7 +-
 gcc/config/aarch64/aarch64-cores.def   |   2 +
 gcc/config/aarch64/aarch64-sve-builtins-base.cc|  25 +-
 gcc/config/aarch64/aarch64-sve.md  |  20 +-
 gcc/config/aarch64/aarch64-tune.md |   2 +-
 gcc/config/aarch64/aarch64.cc  |  52 +-
 gcc/config/aarch64/aarch64.h   |  10 +-
 gcc/config/aarch64/aarch64.md  |   4 +-
 gcc/config/alpha/alpha.md  |  10 +-
 gcc/config/arm/arm.cc  |  87 ++-
 gcc/config/arm/mve.md  |   2 +-
 gcc/config/avr/avr-dimode.md   |  26 +-
 gcc/config/avr/avr-protos.h|   2 +-
 gcc/config/avr/avr.cc  |  46 +-
 gcc/config/avr/avr.md  |  42 +-
 gcc/config/i386/avx512dqintrin.h   |  16 +-
 gcc/config/i386/avx512fp16intrin.h |   4 +-
 gcc/config/i386/avx512vlbwintrin.h |   4 +-
 gcc/config/i386/avx512vlintrin.h   |   2 +-
 gcc/config/i386/constraints.md |   2 +-
 gcc/config/i386/i386-options.cc|  77 ++-
 gcc/config/i386/i386.cc|  48 +-
 gcc/config/i386/i386.md|   2 +-
 gcc/config/i386/prfchiintrin.h |   9 +
 gcc/config/i386/x86-tune-costs.h   |   4 +-
 gcc/config/loongarch/loongarch.cc  |   2 +-
 gcc/config/loongarch/loongarch.h   |   7 -
 gcc/config/pa/pa.cc|   1 +
 gcc/config/pa/pa.md|  18 -
 gcc/config/riscv/riscv.cc  |   5 +-
 gcc/config/rs6000/altivec.md   | 222 ++--
 gcc/config/rs6000/rs6000-logue.cc  |  54 +-
 gcc/config/rs6000/rs6000.cc|  41 +-
 gcc/config/rs6000/rs6000.md|  21 +-
 gcc/config/rs6000/vsx.md   |  28 +-
 gcc/config/s390/3931.md|   7 -
 gcc/config/s390/s390.md|   5 +-
 gcc/config/s390/vector.md  |   6 +-
 gcc/config/sh/sh.cc|  12 +-
 gcc/cp/ChangeLog   |  17 +
 gcc/cp/method.cc   |   1 +
 gcc/cp/typeck.cc   |  22 +-
 gcc/cse.cc |   4 +-
 gcc/doc/invoke.texi|  18 +-
 gcc/expmed.cc  |   4 +-
 gcc/fortran/ChangeLog  | 100 
 gcc/fortran/dependency.cc  |  32 ++
 gcc/fortran/expr.cc|   5 +
 gcc/fortran/gfortran.h |   4 +
 gcc/fortran/iresolve.cc|   4 +
 gcc/fortran/trans-array.cc |  52 +-
 gcc/fortran/trans-expr.cc  |  49 +-
 gcc/fortran/trans-intrinsic.cc |  80 ++-
 gcc/fortran/trans-stmt.cc  |  43 +-
 gcc/fortran/trans-types.cc |   4 +-
 gcc/gimple-fold.cc |  12 +-
 gcc/ipa-icf-gimple.cc  |   4 +
 gcc/ipa-modref.cc  |   4 +-
 gcc/jit/ChangeLog  |   9 +
 gcc/jit/jit-recording.cc   |   1 +
 gcc/opt-suggestions.cc |   2 +-
 gcc/opts-common.cc |   6 +-
 gcc/testsuite/ChangeLog| 421 ++
 gcc/testsuite/c-c++-common/Warray-compare-3.c  |  13 +
 .../c-c++-common/torture/builtin-clear-padding-1.c |   1 +
 .../c-c++-common/torture/builtin-clear-padding-2.c |   1 +
 .../c-c++-common/torture/builtin-clear-padding-3.c |   1 +
 .../c-c++-common/torture/builtin-clear-padding-4.c |   4 +-
 .../c-c++-common/torture/builtin-clear-padding-5.c |   1 +
 .../c-c++-common/torture/builtin-clear-padding-6.c |  28 +
 gcc/testsuite/c-c++-common/to

[gcc/redhat/heads/gcc-13-branch] (187 commits) Merge commit 'r13-9024-g973c6ea242cea7d95c2888ec6dde39b5cbb

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

The branch 'redhat/heads/gcc-13-branch' was updated to point to:

 2fcca36eb1d3... Merge commit 'r13-9024-g973c6ea242cea7d95c2888ec6dde39b5cbb

It previously pointed to:

 03b1a31f9807... Merge commit 'r13-8838-g7813d94393f60ac641265cb3fc3a446f9f3

Diff:

Summary of changes (added commits):
---

  2fcca36... Merge commit 'r13-9024-g973c6ea242cea7d95c2888ec6dde39b5cbb
  973c6ea... c++: Fix get_member_function_from_ptrfunc with -fsanitize=b (*)
  9b4a7d9... libiberty: Fix up > 64K section handling in simple_object_e (*)
  e5839ca... i386: Fix up __builtin_ia32_b{extr{,i}_u{32,64},zhi_{s,d}i} (*)
  e5a9c15... testsuite: Fix up pr116034.c test for big/pdp endian [PR116 (*)
  aaa82d6... ssa: Fix up maybe_rewrite_mem_ref_base complex type handlin (*)
  1880ff0... gimple-fold: Fix up __builtin_clear_padding lowering [PR115 (*)
  ff84211... Daily bump. (*)
  934245a... Daily bump. (*)
  2d7b4df... Daily bump. (*)
  5ceea2a... libstdc++: Fix std::chrono::tzdb to work with vanguard form (*)
  e9b2f1f... libstdc++: Support link chains in std::chrono::tzdb::locate (*)
  2913d33... [libstdc++] define zoneinfo_dir_override on vxworks (*)
  04a8e50... Daily bump. (*)
  0a16b1b... doc: Enhance Intel CPU documentation (*)
  61fd9b0... Daily bump. (*)
  fbcc672... Daily bump. (*)
  750bb0c... Daily bump. (*)
  8ad345a... Daily bump. (*)
  e83df98... ipa: Don't disable function parameter analysis for fat LTO (*)
  c56dc83... Arm: Fix incorrect tailcall-generation for indirect calls [ (*)
  5a081da... Daily bump. (*)
  cc2c50b... Daily bump. (*)
  e152aee... i386: Fix vfpclassph non-optimizied intrin (*)
  f364a43... RISC-V: fix TARGET_PROMOTE_FUNCTION_MODE hook for libcalls (*)
  032b6e3... Daily bump. (*)
  5e049ad... Check avx upper register for parallel. (*)
  d9decdc... Daily bump. (*)
  85f323c... Daily bump. (*)
  9b9e33e... Daily bump. (*)
  d473609... Daily bump. (*)
  2c88e24... Daily bump. (*)
  154639f... Daily bump. (*)
  d4e36c7... Daily bump. (*)
  4f26b4f... Daily bump. (*)
  891a312... Daily bump. (*)
  bdb1cb6... Daily bump. (*)
  ea9c508... Fix testcase failure. (*)
  aea3742... Align ix86_{move_max,store_max} with vectorizer. (*)
  39d5de3... Daily bump. (*)
  3e5cf9f... [testsuite] [arm] [vect] adjust mve-vshr test [PR113281] (*)
  95c2bc2... Daily bump. (*)
  9f54144... Daily bump. (*)
  e469654... Compare loop bounds in ipa-icf (*)
  49bcfb7... Daily bump. (*)
  58c8882... AVR: target/116407 - Fix linker error "relocation truncated (*)
  b8fe699... Daily bump. (*)
  2466e10... Daily bump. (*)
  959d652... aarch64: Fix bogus cnot optimisation [PR114603] (*)
  22c6a11... aarch64: Fix expansion of svsudot [PR114607] (*)
  73d22be... Daily bump. (*)
  8796e33... Daily bump. (*)
  a79d7cc... Daily bump. (*)
  7b0e478... Daily bump. (*)
  617562e... Refine constraint "Bk" to define_special_memory_constraint. (*)
  3689565... Daily bump. (*)
  3008807... Daily bump. (*)
  419c533... Daily bump. (*)
  12ba140... c++: local class memfn synth from uneval context [PR113063] (*)
  7830d92... Daily bump. (*)
  617bbae... Daily bump. (*)
  9d36882... Daily bump. (*)
  f6624ad... hppa: Fix (plus (plus (mult (a) (mem_shadd_constant)) (b))  (*)
  73064a2... sh: Don't call make_insn_raw in sh_recog_treg_set_expr [PR1 (*)
  87cb011... Daily bump. (*)
  bf0673e... libgomp: Remove bogus warnings from privatized-ref-2.f90. (*)
  7195144... Fortran: Suppress bogus used uninitialized warnings [PR1088 (*)
  dcc9a85... Daily bump. (*)
  991acbd... Daily bump. (*)
  4e0846d... Daily bump. (*)
  65b8906... Daily bump. (*)
  7928ec5... Daily bump. (*)
  fa6c24e... Daily bump. (*)
  d80abba... i386: Add non-optimize prefetchi intrins (*)
  320a9c5... Daily bump. (*)
  b2ab34b... i386: Use _mm_setzero_ps/d instead of _mm_avx512_setzero_ps (*)
  bb15c4c... i386: Fix AVX512 intrin macro typo (*)
  69272e4... Daily bump. (*)
  920adcb... Daily bump. (*)
  4e03c89... Daily bump. (*)
  f280772... Daily bump. (*)
  58b3e55... Daily bump. (*)
  46d68bc... libstdc++: Fix std::vector for -std=gnu++14 -fconcept (*)
  9a4603d... rs6000: Catch unsupported ABI errors when using -mrop-prote (*)
  63b1b3e... rs6000: Error on CPUs and ABIs that don't support the ROP p (*)
  77fd352... rs6000: ROP - Emit hashst and hashchk insns on Power8 and l (*)
  bc51e5a... rs6000: Compute rop_hash_save_offset for non-Altivec compil (*)
  9bbdec4... rs6000: Update ELFv2 stack frame comment showing the correc (*)
  0575d3b... Daily bump. (*)
  b352766... Fixup unaligned load/store cost for znver4 (*)
  dec571e... i386: Change prefetchi output template (*)
  e504184... [powerpc] [testsuite] reorder dg directives [PR106069] (*)
  8a470d7... Daily bump. (*)
  4ce7c81... [PR115565] cse: Don't use a valid regno for non-register in (*)
  9778ad5... Daily bump. (*)
  ae6d5dc... Fortran: character array constructor with >= 4 constant ele (*)
  44e07e4... Daily bump. (*)
  a23deb1... Avoid undefined behaviour in build_option_suggestions (*)

[gcc r15-3630] Fortran: Fixes to OpenMP 'interop' directive parsing support

2024-09-13 Thread Tobias Burnus via Gcc-cvs

https://gcc.gnu.org/g:99988464fc86354f0359c0fd91eee444fb5bd8a2

commit r15-3630-g99988464fc86354f0359c0fd91eee444fb5bd8a2
Author: Tobias Burnus 
Date:   Fri Sep 13 16:48:57 2024 +0200

Fortran: Fixes to OpenMP 'interop' directive parsing support

Handle lists as argument to 'fr' and 'attr'; fix parsing corner cases.
Additionally, 'fr' values are now internally stored as integer, permitting
the diagnoses (warning) for values not defined in the OpenMP additional
definitions document.

PR fortran/116661

gcc/fortran/ChangeLog:

* gfortran.h (gfc_omp_namelist): Rename 'init' members for clarity.
* match.cc (gfc_free_omp_namelist): Handle renaming.
* dump-parse-tree.cc (show_omp_namelist): Update for new format
and features.
* openmp.cc (gfc_match_omp_prefer_type): Parse list to 'fr' and 
'attr';
store 'fr' values as integer.
(gfc_match_omp_init): Rename variable names.

gcc/ChangeLog:

* omp-api.h (omp_get_fr_id_from_name, omp_get_name_from_fr_id): New
prototypes.
* omp-general.cc (omp_get_fr_id_from_name, omp_get_name_from_fr_id):
New.

include/ChangeLog:

* gomp-constants.h (GOMP_INTEROP_IFR_LAST,
GOMP_INTEROP_IFR_SEPARATOR, GOMP_INTEROP_IFR_NONE): New.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/interop-1.f90: Extend, update dg-*.
* gfortran.dg/gomp/interop-2.f90: Update dg-error.
* gfortran.dg/gomp/interop-3.f90: Add dg-warning.

Diff:
---
 gcc/fortran/dump-parse-tree.cc   |  84 +---
 gcc/fortran/gfortran.h   |   4 +-
 gcc/fortran/match.cc |  10 +-
 gcc/fortran/openmp.cc| 305 ---
 gcc/omp-api.h|   3 +
 gcc/omp-general.cc   |  29 +++
 gcc/testsuite/gfortran.dg/gomp/interop-1.f90 |  32 ++-
 gcc/testsuite/gfortran.dg/gomp/interop-2.f90 |   2 +-
 gcc/testsuite/gfortran.dg/gomp/interop-3.f90 |   2 +-
 include/gomp-constants.h |   5 +
 10 files changed, 314 insertions(+), 162 deletions(-)

diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc
index 8fc6141611c4..3547d7f8aca3 100644
--- a/gcc/fortran/dump-parse-tree.cc
+++ b/gcc/fortran/dump-parse-tree.cc
@@ -37,6 +37,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "constructor.h"
 #include "version.h"
 #include "parse.h"  /* For gfc_ascii_statement.  */
+#include "omp-api.h"  /* For omp_get_name_from_fr_id.  */
+#include "gomp-constants.h"  /* For GOMP_INTEROP_IFR_SEPARATOR.  */
 
 /* Keep track of indentation for symbol tree dumps.  */
 static int show_level = 0;
@@ -1537,35 +1539,69 @@ show_omp_namelist (int list_type, gfc_omp_namelist *n)
}
   else if (list_type == OMP_LIST_INIT)
{
- int i = 0;
  if (n->u.init.target)
fputs ("target,", dumpfile);
  if (n->u.init.targetsync)
fputs ("targetsync,", dumpfile);
- char *prefer_type = n->u.init.str;
- if (n->u.init.len)
-   fputs ("prefer_type(", dumpfile);
- if (n->u.init.len)
-   while (*prefer_type)
- {
-   fputc ('{', dumpfile);
-   if (n->u2.interop_int && n->u2.interop_int[i] != 0)
- fprintf (dumpfile, "fr(%d),", n->u2.interop_int[i]);
-   else if (prefer_type[0] != ' ' || prefer_type[1] != '\0')
- fprintf (dumpfile, "fr(\"%s\"),", prefer_type);
-   prefer_type += 1 + strlen (prefer_type);
-
-   while (*prefer_type)
- {
-   fprintf (dumpfile, "attr(\"%s\"),", prefer_type);
-   prefer_type += 1 + strlen (prefer_type);
- }
-   fputc ('}', dumpfile);
-   ++prefer_type;
-   ++i;
+ if (n->u2.init_interop_fr)
+   {
+ char *attr_str = n->u.init.attr;
+ int idx = 0;
+ int fr_id;
+ fputs ("prefer_type(", dumpfile);
+ do
+   {
+ fr_id = n->u2.init_interop_fr[idx];
+ fputc ('{', dumpfile);
+ if (fr_id != GOMP_INTEROP_IFR_NONE)
+   {
+ fputs ("fr(", dumpfile);
+ do
+   {
+ const char *fr_str = omp_get_name_from_fr_id (fr_id);
+ if (fr_str)
+   fprintf (dumpfile, "\"%s\"", fr_str);
+ else
+   fprintf (dumpfile, "%d", fr_id);
+ fr_id = n->u2.init_interop_fr[++idx];
+ if (fr_id != GOMP_INTEROP_IFR_SEPARATOR)
+   fputc (',', d

[gcc r15-3631] c++: -fimplicit-constexpr diagnostic improvement [PR116696]

2024-09-13 Thread Jason Merrill via Gcc-cvs

https://gcc.gnu.org/g:4ee692337c4ec18fe9be3df34f3607ea3de5ef93

commit r15-3631-g4ee692337c4ec18fe9be3df34f3607ea3de5ef93
Author: Jason Merrill 
Date:   Thu Sep 12 16:22:02 2024 -0400

c++: -fimplicit-constexpr diagnostic improvement [PR116696]

PR116696 expressed surprise that explicit 'constexpr' was needed on one
function; this was because the function isn't 'inline', and
-fimplicit-constexpr doesn't try to promote non-inline functions.  Let's be
more helpful in that situation, and also help trace through functions that
were promoted.

PR c++/116696

gcc/cp/ChangeLog:

* constexpr.cc (explain_invalid_constexpr_fn): When
-fimplicit-constexpr, also explain inline functions, and point out
non-inline functions.

gcc/testsuite/ChangeLog:

* g++.dg/DRs/dr2478.C: Prune extra diagnostic.
* g++.dg/ext/fimplicit-constexpr1.C: New test.

Diff:
---
 gcc/cp/constexpr.cc | 7 +++
 gcc/testsuite/g++.dg/DRs/dr2478.C   | 2 +-
 gcc/testsuite/g++.dg/ext/fimplicit-constexpr1.C | 8 
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index db2a9c1543e1..d0f617481413 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1057,9 +1057,16 @@ explain_invalid_constexpr_fn (tree fun)
   /* Only diagnose defaulted functions, lambdas, or instantiations.  */
   else if (!DECL_DEFAULTED_FN (fun)
   && !LAMBDA_TYPE_P (CP_DECL_CONTEXT (fun))
+  && !(flag_implicit_constexpr
+   && !DECL_DECLARED_CONSTEXPR_P (fun)
+   && DECL_DECLARED_INLINE_P (fun))
   && !is_instantiation_of_constexpr (fun))
 {
   inform (DECL_SOURCE_LOCATION (fun), "%qD declared here", fun);
+  if (flag_implicit_constexpr && !maybe_constexpr_fn (fun)
+ && decl_defined_p (fun))
+   inform (DECL_SOURCE_LOCATION (fun),
+   "%<-fimplicit-constexpr%> only affects % functions");
   return;
 }
   if (diagnosed == NULL)
diff --git a/gcc/testsuite/g++.dg/DRs/dr2478.C 
b/gcc/testsuite/g++.dg/DRs/dr2478.C
index 7f581cabb7b0..b2292561381a 100644
--- a/gcc/testsuite/g++.dg/DRs/dr2478.C
+++ b/gcc/testsuite/g++.dg/DRs/dr2478.C
@@ -2,7 +2,7 @@
 // { dg-do compile { target c++20 } }
 
 // Defeat -fimplicit-constexpr
-int ii;
+int ii; // { dg-prune-output "value of 'ii' is not usable in a constant expr" }
 
 template 
 struct S {
diff --git a/gcc/testsuite/g++.dg/ext/fimplicit-constexpr1.C 
b/gcc/testsuite/g++.dg/ext/fimplicit-constexpr1.C
new file mode 100644
index ..fc4b2829b657
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/fimplicit-constexpr1.C
@@ -0,0 +1,8 @@
+// { dg-additional-options -fimplicit-constexpr }
+// { dg-do compile { target c++14 } }
+
+void f() { } // { dg-message "'-fimplicit-constexpr' only affects 'inline' 
functions" }
+
+inline int g() { f(); return 42; } // { dg-error {non-'constexpr' function 
'void f\(\)'} }
+
+constexpr int i = g(); // { dg-error {'int g\(\)' called in a constant 
expression} }

[gcc r15-3632] AVR: Use avr_byte instead of simplify_gen_subreg (QImode, ...

2024-09-13 Thread Georg-Johann Lay via Gcc-cvs

https://gcc.gnu.org/g:b55f5e344c9b2f3667ef176e090c168e0add41f1

commit r15-3632-gb55f5e344c9b2f3667ef176e090c168e0add41f1
Author: Georg-Johann Lay 
Date:   Fri Sep 13 12:50:31 2024 +0200

AVR: Use avr_byte instead of simplify_gen_subreg (QImode, ...

There are many places where asm output functions have to look at
the constituent bytes and words of the operands.  The function of
choice was simplify_gen_subreg (mode, ...) which can be replaced
by the more handy avr_byte (rtx, byte_num).

gcc/
* config/avr/avr.cc: Use functions like avr_byte,
avr_word, avr_[u]int8/16 if convenient.
(avr_uint16): New function.

Diff:
---
 gcc/config/avr/avr.cc | 109 ++
 1 file changed, 47 insertions(+), 62 deletions(-)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index b26716551fcf..48d1d0e76465 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -308,6 +308,15 @@ avr_int16 (rtx x, int n)
   return (int16_t) trunc_int_for_mode (INTVAL (avr_word (x, n)), HImode);
 }
 
+/* Return the sub-word of compile-time constant X that starts
+   at byte N as an uint16_t.  */
+
+static uint16_t
+avr_uint16 (rtx x, int n)
+{
+  return (uint16_t) avr_int16 (x, n);
+}
+
 
 /* Constraint helper function.  XVAL is a CONST_INT or a CONST_DOUBLE.
Return true if the least significant N_BYTES bytes of XVAL all have a
@@ -317,15 +326,9 @@ avr_int16 (rtx x, int n)
 bool
 avr_popcount_each_byte (rtx xval, int n_bytes, int pop_mask)
 {
-  machine_mode mode = GET_MODE (xval);
-
-  if (VOIDmode == mode)
-mode = SImode;
-
   for (int i = 0; i < n_bytes; i++)
 {
-  rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
-  unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+  unsigned int val8 = avr_uint8 (xval, i);
 
   if ((pop_mask & (1 << popcount_hwi (val8))) == 0)
return false;
@@ -342,15 +345,9 @@ avr_popcount_each_byte (rtx xval, int n_bytes, int 
pop_mask)
 bool
 avr_xor_noclobber_dconst (rtx xval, int n_bytes)
 {
-  machine_mode mode = GET_MODE (xval);
-
-  if (VOIDmode == mode)
-mode = SImode;
-
   for (int i = 0; i < n_bytes; ++i)
 {
-  rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
-  unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+  unsigned int val8 = avr_uint8 (xval, i);
 
   if (val8 != 0 && val8 != 0xff && val8 != 0x80)
return false;
@@ -3414,7 +3411,7 @@ avr_out_lpm_no_lpmx (rtx_insn *insn, rtx *xop, int *plen)
 
   for (int i = 0; i < n_bytes; ++i)
{
- rtx reg = simplify_gen_subreg (QImode, dest, GET_MODE (dest), i);
+ rtx reg = avr_byte (dest, i);
 
  if (i > 0)
avr_asm_len ("adiw %2,1", xop, plen, 1);
@@ -3676,9 +3673,9 @@ output_reload_in_const (rtx *op, rtx clobber_reg, int 
*len, bool clear_p)
  but has some subregs that are in LD_REGS.  Use the MSB (REG:QI 17).  */
 
   if (REGNO (dest) < REG_16
-  && REGNO (dest) + GET_MODE_SIZE (mode) > REG_16)
+  && END_REGNO (dest) > REG_16)
 {
-  clobber_reg = all_regs_rtx[REGNO (dest) + n_bytes - 1];
+  clobber_reg = all_regs_rtx[END_REGNO (dest) - 1];
 }
 
   /* We might need a clobber reg but don't have one.  Look at the value to
@@ -3708,7 +3705,7 @@ output_reload_in_const (rtx *op, rtx clobber_reg, int 
*len, bool clear_p)
 
   /* Crop the n-th destination byte.  */
 
-  xdest[n] = simplify_gen_subreg (QImode, dest, mode, n);
+  xdest[n] = avr_byte (dest, n);
   int ldreg_p = test_hard_reg_class (LD_REGS, xdest[n]);
 
   if (!CONST_INT_P (src)
@@ -3734,7 +3731,7 @@ output_reload_in_const (rtx *op, rtx clobber_reg, int 
*len, bool clear_p)
 
   /* Crop the n-th source byte.  */
 
-  xval = simplify_gen_subreg (QImode, src, mode, n);
+  xval = avr_byte (src, n);
   ival[n] = INTVAL (xval);
 
   /* Look if we can reuse the low word by means of MOVW.  */
@@ -3743,12 +3740,12 @@ output_reload_in_const (rtx *op, rtx clobber_reg, int 
*len, bool clear_p)
  && n_bytes >= 4
  && AVR_HAVE_MOVW)
{
- rtx lo16 = simplify_gen_subreg (HImode, src, mode, 0);
- rtx hi16 = simplify_gen_subreg (HImode, src, mode, 2);
+ int lo16 = avr_int16 (src, 0);
+ int hi16 = avr_int16 (src, 2);
 
- if (INTVAL (lo16) == INTVAL (hi16))
+ if (lo16 == hi16)
{
- if (INTVAL (lo16) != 0 || !clear_p)
+ if (lo16 != 0 || ! clear_p)
avr_asm_len ("movw %C0,%A0", &op[0], len, 1);
 
  break;
@@ -6275,7 +6272,7 @@ avr_out_cmp_ext (rtx xop[], enum rtx_code code, int *plen)
 {
   // Sign-extend the high-byte of zreg to tmp_reg.
   int zmsb = GET_MODE_SIZE (zmode) - 1;
-  rtx xzmsb = simplify_gen_subreg (QImode, zreg, zmode, zmsb);
+  rtx xzmsb = avr_byte (zreg, zmsb);
 
   avr_asm_len ("mov __tmp_reg__,%0" CR_TAB

[gcc r15-3633] Fix factor_out_conditional_operation heuristics for constants

2024-09-13 Thread Andrew Pinski via Gcc-cvs

https://gcc.gnu.org/g:8ed8c342fba682286159f56d0e53a05db95762a0

commit r15-3633-g8ed8c342fba682286159f56d0e53a05db95762a0
Author: Andrew Pinski 
Date:   Wed Sep 11 22:10:53 2024 -0700

Fix factor_out_conditional_operation heuristics for constants

While working on a different patch, I noticed the heuristics were not
doing the right thing if there was statements before the NOP/PREDICTs.
(LABELS don't have other statements before them).

This fixes that oversight which was added in r15-3334-gceda727dafba6e.

Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

* tree-ssa-phiopt.cc (factor_out_conditional_operation): Instead
of just ignorning a NOP/PREDICT, skip over them before checking
the heuristics.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/tree-ssa-phiopt.cc | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 5710bc32e611..e5413e405722 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -332,15 +332,17 @@ factor_out_conditional_operation (edge e0, edge e1, gphi 
*phi,
{
  gsi = gsi_for_stmt (arg0_def_stmt);
  gsi_prev_nondebug (&gsi);
+ /* Ignore nops, predicates and labels. */
+ while (!gsi_end_p (gsi)
+ && (gimple_code (gsi_stmt (gsi)) == GIMPLE_NOP
+ || gimple_code (gsi_stmt (gsi)) == GIMPLE_PREDICT
+ || gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL))
+   gsi_prev_nondebug (&gsi);
+
  if (!gsi_end_p (gsi))
{
  gimple *stmt = gsi_stmt (gsi);
- /* Ignore nops, predicates and labels. */
- if (gimple_code (stmt) == GIMPLE_NOP
- || gimple_code (stmt) == GIMPLE_PREDICT
- || gimple_code (stmt) == GIMPLE_LABEL)
-   ;
- else if (gassign *assign = dyn_cast  (stmt))
+ if (gassign *assign = dyn_cast  (stmt))
{
  tree lhs = gimple_assign_lhs (assign);
  enum tree_code ass_code

[gcc r15-3634] AVR: Detect more skip opportunities.

2024-09-13 Thread Georg-Johann Lay via Gcc-cvs

https://gcc.gnu.org/g:4ffca9966a9c43cedafe56d3ef8033182290f25b

commit r15-3634-g4ffca9966a9c43cedafe56d3ef8033182290f25b
Author: Georg-Johann Lay 
Date:   Fri Sep 13 18:06:57 2024 +0200

AVR: Detect more skip opportunities.

The transparent call insns like "*parityhi2.libgcc" output a single
[R]CALL instruction that can be skipped by the skip instructions.
Such insns have attribute "type" of "xcall" and can therefore
be easily recognized.  Same applies when "adjust_len" is "call".

gcc/
* config/avr/avr.cc (avr_2word_insn_p): Return true for
transparent calls: When insn attribute "type" is "xcall"
or when "adjust_len" is "call".

Diff:
---
 gcc/config/avr/avr.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 48d1d0e76465..4cb51ea68dc7 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -12972,7 +12972,7 @@ test_hard_reg_class (enum reg_class rclass, rtx x)
 }
 
 
-/* Helper for jump_over_one_insn_p:  Test if INSN is a 2-word instruction
+/* Helper for `jump_over_one_insn_p':  Test if INSN is a 2-word instruction
and thus is suitable to be skipped by CPSE, SBRC, etc.  */
 
 static bool
@@ -12986,7 +12986,10 @@ avr_2word_insn_p (rtx_insn *insn)
   switch (INSN_CODE (insn))
 {
 default:
-  return false;
+  return (recog_memoized (insn) >= 0
+ // Transparent calls may be skipped.
+ && (get_attr_type (insn) == TYPE_XCALL
+ || get_attr_adjust_len (insn) == ADJUST_LEN_CALL));
 
 case CODE_FOR_movqi_insn:
 case CODE_FOR_movuqq_insn:

[gcc r13-9025] testsuite: Fix up builtin-clear-padding-3.c for -funsigned-char

2024-09-13 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:504c8e4dc26501fb68f0ae784b45dba0f68b4f4a

commit r13-9025-g504c8e4dc26501fb68f0ae784b45dba0f68b4f4a
Author: Jakub Jelinek 
Date:   Thu Jul 18 09:22:10 2024 +0200

testsuite: Fix up builtin-clear-padding-3.c for -funsigned-char

As reported on gcc-regression, this test FAILs on aarch64, but my
r15-2090 change didn't change anything on the generated assembly,
just added the forgotten dg-do run directive to the test, so the
test has been failing forever, just we didn't know it.

I can actually reproduce it on x86_64 with -funsigned-char too,
s2.b.a has int type and -1 is stored to it, so we should compare
it against -1 rather than (char) -1; the latter is appropriate for
testing char fields into which we've stored -1.

2024-07-18  Jakub Jelinek  

* c-c++-common/torture/builtin-clear-padding-3.c (main): Compare
s2.b.a against -1 rather than (char) -1.

(cherry picked from commit 958ee138748fae4371e453eb9b357f576abbe83e)

Diff:
---
 gcc/testsuite/c-c++-common/torture/builtin-clear-padding-3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/torture/builtin-clear-padding-3.c 
b/gcc/testsuite/c-c++-common/torture/builtin-clear-padding-3.c
index 27bf8f6dd734..2c673169e134 100644
--- a/gcc/testsuite/c-c++-common/torture/builtin-clear-padding-3.c
+++ b/gcc/testsuite/c-c++-common/torture/builtin-clear-padding-3.c
@@ -34,7 +34,7 @@ main ()
   foo (&s1, 0);
   foo (&s2, 0);
   __builtin_clear_padding (&s2);
-  if (s2.b.a != (char) -1)
+  if (s2.b.a != -1)
 __builtin_abort ();
   __builtin_clear_padding (&s2.b.a);
   __builtin_memset (&s2.b.a + 1, 0, sizeof (union U) - sizeof (s2.b.a));

[gcc r15-3636] testsuite: a few more hostedlib adjustments

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:b56bd542942ba7bd2020d5824e57d819974bc071

commit r15-3636-gb56bd542942ba7bd2020d5824e57d819974bc071
Author: Alexandre Oliva 
Date:   Fri Sep 13 21:42:41 2024 -0300

testsuite: a few more hostedlib adjustments

This adjusts some recently-added tests that won't compile without a
hostedlib libstdc++, missed in the patch that just went in, and also
an old test that I'd missed because it also failed in my baseline.


for  gcc/testsuite/ChangeLog

* g++.dg/coroutines/pr108620.C: Skip if !hostedlib because of
unavailable headers.
* g++.dg/other/profile1.C: Likewise.
* g++.dg/ext/pragma-unroll-lambda-lto.C: Skip if !hostedlib
because of unavailable declarations.

Diff:
---
 gcc/testsuite/g++.dg/coroutines/pr108620.C  | 2 ++
 gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C | 1 +
 gcc/testsuite/g++.dg/other/profile1.C   | 1 +
 3 files changed, 4 insertions(+)

diff --git a/gcc/testsuite/g++.dg/coroutines/pr108620.C 
b/gcc/testsuite/g++.dg/coroutines/pr108620.C
index e8016b9f8a23..22bf0c18bac4 100644
--- a/gcc/testsuite/g++.dg/coroutines/pr108620.C
+++ b/gcc/testsuite/g++.dg/coroutines/pr108620.C
@@ -1,3 +1,5 @@
+// { dg-skip-if "requires hosted libstdc++ for iostream" { ! hostedlib } }
+
 // https://gcc.gnu.org/PR108620
 #include 
 #include 
diff --git a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C 
b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
index 144c4c326924..64cdf90f34d3 100644
--- a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
+++ b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
@@ -1,5 +1,6 @@
 // { dg-do link { target c++11 } }
 // { dg-options "-O2 -flto -fdump-rtl-loop2_unroll" }
+// { dg-skip-if "requires hosted libstdc++ for cstdlib rand" { ! hostedlib } }
 
 #include 
 
diff --git a/gcc/testsuite/g++.dg/other/profile1.C 
b/gcc/testsuite/g++.dg/other/profile1.C
index a4bf6b3d0fea..99844373189e 100644
--- a/gcc/testsuite/g++.dg/other/profile1.C
+++ b/gcc/testsuite/g++.dg/other/profile1.C
@@ -2,6 +2,7 @@
 // { dg-do run }
 // { dg-require-profiling "" }
 // { dg-options "-fnon-call-exceptions -fprofile-arcs" }
+// { dg-skip-if "requires hosted libstdc++ for string" { ! hostedlib } }
 
 #include

[gcc/aoliva/heads/testbase] (19 commits) testsuite: a few more hostedlib adjustments

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

The branch 'aoliva/heads/testbase' was updated to point to:

 b56bd542942b... testsuite: a few more hostedlib adjustments

It previously pointed to:

 4308c343b8ea... testsuite: introduce hostedlib effective target

Diff:

Summary of changes (added commits):
---

  b56bd54... testsuite: a few more hostedlib adjustments (*)
  d53c5bc... Daily bump. (*)
  4ffca99... AVR: Detect more skip opportunities. (*)
  8ed8c34... Fix factor_out_conditional_operation heuristics for constan (*)
  b55f5e3... AVR: Use avr_byte instead of simplify_gen_subreg (QImode, . (*)
  4ee6923... c++: -fimplicit-constexpr diagnostic improvement [PR116696] (*)
  9998846... Fortran: Fixes to OpenMP 'interop' directive parsing suppor (*)
  508ef58... gcn/mkoffload.cc: Use #embed for including the generated EL (*)
  b7b6773... c++: Don't emit deprecated/unavailable attribute diagnostic (*)
  4963eb7... libcpp: Fix up UB in finish_embed (*)
  46c2538... s390: Fix TF to FPRX2 conversion [PR115860] (*)
  1a71ff3... s390: Fix AQ and AR constraints (*)
  5938e06... libstdc++: Do not use use memmove for 1-element ranges [PR1 (*)
  494d3c3... AVR: Rework avr_out_compare. (*)
  1ec1677... AVR: Tweak 32-bit EQ and NE comparisons. (*)
  be59aaf... AVR: avr.cc - Reorder functions to require less forward dec (*)
  45e7cc9... Match: Remove unnecessary types_match for case 1 of signed  (*)
  5d9486c... Fix endianness issue on unsigned_21.f90. (*)
  3d021a0... Daily bump. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/aoliva/heads/testbase' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc/aoliva/heads/testme] (24 commits) relax ifcombine to accept vuses

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

The branch 'aoliva/heads/testme' was updated to point to:

 7b7dfff4b174... relax ifcombine to accept vuses

It previously pointed to:

 0d90ad11fb42... relax ifcombine to accept vuses

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  0d90ad1... relax ifcombine to accept vuses
  1494b67... fold truth-and only in ifcombine
  163a769... check for mergeable loads, choose insertion points accordin
  e4f6196... rework truth_andor folding into tree-ssa-ifcombine
  6621257... assorted improvements for fold_truth_andor_1
  90eb457... testsuite: a few more hostedlib adjustments


Summary of changes (added commits):
---

  7b7dfff... relax ifcombine to accept vuses
  e731ae8... fold truth-and only in ifcombine
  fbf1f80... check for mergeable loads, choose insertion points accordin
  b4b872b... rework truth_andor folding into tree-ssa-ifcombine
  8aa412b... assorted improvements for fold_truth_andor_1
  b56bd54... testsuite: a few more hostedlib adjustments (*)
  d53c5bc... Daily bump. (*)
  4ffca99... AVR: Detect more skip opportunities. (*)
  8ed8c34... Fix factor_out_conditional_operation heuristics for constan (*)
  b55f5e3... AVR: Use avr_byte instead of simplify_gen_subreg (QImode, . (*)
  4ee6923... c++: -fimplicit-constexpr diagnostic improvement [PR116696] (*)
  9998846... Fortran: Fixes to OpenMP 'interop' directive parsing suppor (*)
  508ef58... gcn/mkoffload.cc: Use #embed for including the generated EL (*)
  b7b6773... c++: Don't emit deprecated/unavailable attribute diagnostic (*)
  4963eb7... libcpp: Fix up UB in finish_embed (*)
  46c2538... s390: Fix TF to FPRX2 conversion [PR115860] (*)
  1a71ff3... s390: Fix AQ and AR constraints (*)
  5938e06... libstdc++: Do not use use memmove for 1-element ranges [PR1 (*)
  494d3c3... AVR: Rework avr_out_compare. (*)
  1ec1677... AVR: Tweak 32-bit EQ and NE comparisons. (*)
  be59aaf... AVR: avr.cc - Reorder functions to require less forward dec (*)
  45e7cc9... Match: Remove unnecessary types_match for case 1 of signed  (*)
  5d9486c... Fix endianness issue on unsigned_21.f90. (*)
  3d021a0... Daily bump. (*)

(*) This commit already exists in another branch.
Because the reference `refs/users/aoliva/heads/testme' matches
your hooks.email-new-commits-only configuration,
no separate email is sent for this commit.

[gcc(refs/users/aoliva/heads/testme)] assorted improvements for fold_truth_andor_1

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:8aa412b62ba7275d5d0f861ef8e0306d5023b028

commit 8aa412b62ba7275d5d0f861ef8e0306d5023b028
Author: Alexandre Oliva 
Date:   Fri Sep 13 21:42:56 2024 -0300

assorted improvements for fold_truth_andor_1

This patch introduces various improvements to the logic that merges
field compares.

Before the patch, we could merge:

  (a.x1 EQNE b.x1)  ANDOR  (a.y1 EQNE b.y1)

into something like:

  (((type *)&a)[Na] & MASK) EQNE (((type *)&b)[Nb] & MASK)

if both of A's fields live within the same alignment boundaries, and
so do B's, at the same relative positions.  Constants may be used
instead of the object B.

The initial goal of this patch was to enable such combinations when a
field crossed alignment boundaries, e.g. for packed types.  We can't
generally access such fields with a single memory access, so when we
come across such a compare, we will attempt to combine each access
separately.

Some merging opportunities were missed because of right-shifts,
compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and
narrowing conversions, especially after earlier merges.  This patch
introduces handlers for several cases involving these.

Other merging opportunities were missed because of association.  The
existing logic would only succeed in merging a pair of consecutive
compares, or e.g. B with C in (A ANDOR B) ANDOR C, not even trying
e.g. C and D in (A ANDOR (B ANDOR C)) ANDOR D.  I've generalized the
handling of the rightmost compare in the left-hand operand, going for
the leftmost compare in the right-hand operand, and then onto trying
to merge compares pairwise, one from each operand, even if they are
not consecutive, taking care to avoid merging operations with
intervening side effects, including volatile accesses.

When it is the second of a non-consecutive pair of compares that first
accesses a word, we may merge the first compare with part of the
second compare that refers to the same word, keeping the compare of
the remaining bits at the spot where the second compare used to be.

Handling compares with non-constant fields was somewhat generalized,
now handling non-adjacent fields.  When a field of one object crosses
an alignment boundary but the other doesn't, we issue the same load in
both compares; gimple optimizers will later turn it into a single
load, without our having to handle SAVE_EXPRs at this point.

The logic for issuing split loads and compares, and ordering them, is
now shared between all cases of compares with constants and with
another object.

The -Wno-error for toplev.o on rs6000 is because of toplev.c's:

  if ((flag_sanitize & SANITIZE_ADDRESS)
  && !FRAME_GROWS_DOWNWARD)

and rs6000.h's:

#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0   \
  || (flag_sanitize & SANITIZE_ADDRESS) != 0)

The mutually exclusive conditions involving flag_sanitize are now
noticed and reported by fold-const.c's:

  warning (0,
   "% of mutually exclusive equal-tests"
   " is always 0");

This patch enables over 12k compare-merging opportunities that we used
to miss in a GCC bootstrap.


for  gcc/ChangeLog

* fold-const.cc (prepare_xor): New.
(decode_field_reference): Handle xor, shift, and narrowing
conversions.
(all_ones_mask_p): Remove.
(compute_split_boundary_from_align): New.
(build_split_load, reuse_split_load): New.
(fold_truth_andor_1): Add recursion to combine pairs of
non-neighboring compares.  Handle xor compared with zero.
Handle fields straddling across alignment boundaries.
Generalize handling of non-constant rhs.
(fold_truth_andor): Leave sub-expression handling to the
recursion above.
* config/rs6000/t-rs6000 (toplev.o-warn): Disable errors.

for  gcc/testsuite/ChangeLog

* gcc.dg/field-merge-1.c: New.
* gcc.dg/field-merge-2.c: New.
* gcc.dg/field-merge-3.c: New.
* gcc.dg/field-merge-4.c: New.
* gcc.dg/field-merge-5.c: New.

Diff:
---
 gcc/config/rs6000/t-rs6000   |   4 +
 gcc/fold-const.cc| 818 ---
 gcc/testsuite/gcc.dg/field-merge-1.c |  64 +++
 gcc/testsuite/gcc.dg/field-merge-2.c |  31 ++
 gcc/testsuite/gcc.dg/field-merge-3.c |  36 ++
 gcc/testsuite/gcc.dg/field-merge-4.c |  40 ++
 gcc/testsuite/gcc.dg/field-merge-5.c |  40 ++
 7 files changed, 881 insertions(+), 152 deletions(-)

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 155788de40a3..a83968d663a6

[gcc(refs/users/aoliva/heads/testme)] rework truth_andor folding into tree-ssa-ifcombine

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:b4b872b2195b448b3e1bcd28c3e28d59618580a2

commit b4b872b2195b448b3e1bcd28c3e28d59618580a2
Author: Alexandre Oliva 
Date:   Fri Sep 13 21:43:00 2024 -0300

rework truth_andor folding into tree-ssa-ifcombine

Diff:
---
 gcc/fold-const.cc | 1048 +
 gcc/gimple-fold.cc| 1149 +
 gcc/tree-ssa-ifcombine.cc |7 +-
 3 files changed, 1170 insertions(+), 1034 deletions(-)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 81814de5b04b..19824e6a477f 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -137,7 +137,6 @@ static tree range_successor (tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, enum tree_code,
tree, tree, tree, tree);
-static tree unextend (tree, int, int, tree);
 static tree extract_muldiv (tree, tree, enum tree_code, tree, bool *);
 static tree extract_muldiv_1 (tree, tree, enum tree_code, tree, bool *);
 static tree fold_binary_op_with_conditional_arg (location_t,
@@ -4695,7 +4694,7 @@ invert_truthvalue_loc (location_t loc, tree arg)
is the original memory reference used to preserve the alias set of
the access.  */
 
-static tree
+tree
 make_bit_field_ref (location_t loc, tree inner, tree orig_inner, tree type,
HOST_WIDE_INT bitsize, poly_int64 bitpos,
int unsignedp, int reversep)
@@ -4945,212 +4944,6 @@ optimize_bit_field_compare (location_t loc, enum 
tree_code code,
   return lhs;
 }
 
-/* If *R_ARG is a constant zero, and L_ARG is a possibly masked
-   BIT_XOR_EXPR, return 1 and set *r_arg to l_arg.
-   Otherwise, return 0.
-
-   The returned value should be passed to decode_field_reference for it
-   to handle l_arg, and then doubled for r_arg.  */
-static int
-prepare_xor (tree l_arg, tree *r_arg)
-{
-  int ret = 0;
-
-  if (!integer_zerop (*r_arg))
-return ret;
-
-  tree exp = l_arg;
-  STRIP_NOPS (exp);
-
-  if (TREE_CODE (exp) == BIT_AND_EXPR)
-{
-  tree and_mask = TREE_OPERAND (exp, 1);
-  exp = TREE_OPERAND (exp, 0);
-  STRIP_NOPS (exp); STRIP_NOPS (and_mask);
-  if (TREE_CODE (and_mask) != INTEGER_CST)
-   return ret;
-}
-
-  if (TREE_CODE (exp) == BIT_XOR_EXPR)
-{
-  *r_arg = l_arg;
-  return 1;
-}
-
-  return ret;
-}
-
-/* Subroutine for fold_truth_andor_1: decode a field reference.
-
-   If EXP is a comparison reference, we return the innermost reference.
-
-   *PBITSIZE is set to the number of bits in the reference, *PBITPOS is
-   set to the starting bit number.
-
-   If the innermost field can be completely contained in a mode-sized
-   unit, *PMODE is set to that mode.  Otherwise, it is set to VOIDmode.
-
-   *PVOLATILEP is set to 1 if the any expression encountered is volatile;
-   otherwise it is not changed.
-
-   *PUNSIGNEDP is set to the signedness of the field.
-
-   *PREVERSEP is set to the storage order of the field.
-
-   *PMASK is set to the mask used.  This is either contained in a
-   BIT_AND_EXPR or derived from the width of the field.
-
-   *PAND_MASK is set to the mask found in a BIT_AND_EXPR, if any.
-
-   XOR_WHICH is 1 or 2 if EXP was found to be a (possibly masked)
-   BIT_XOR_EXPR compared with zero.  We're to take the first or second
-   operand thereof if so.  It should be zero otherwise.
-
-   Return 0 if this is not a component reference or is one that we can't
-   do anything with.  */
-
-static tree
-decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
-   HOST_WIDE_INT *pbitpos, machine_mode *pmode,
-   int *punsignedp, int *preversep, int *pvolatilep,
-   tree *pmask, tree *pand_mask, int xor_which)
-{
-  tree exp = *exp_;
-  tree outer_type = 0;
-  tree and_mask = 0;
-  tree mask, inner, offset;
-  tree unsigned_type;
-  unsigned int precision;
-  HOST_WIDE_INT shiftrt = 0;
-
-  /* All the optimizations using this function assume integer fields.
- There are problems with FP fields since the type_for_size call
- below can fail for, e.g., XFmode.  */
-  if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-return NULL_TREE;
-
-  /* We are interested in the bare arrangement of bits, so strip everything
- that doesn't affect the machine mode.  However, record the type of the
- outermost expression if it may matter below.  */
-  if (CONVERT_EXPR_P (exp)
-  || TREE_CODE (exp) == NON_LVALUE_EXPR)
-outer_type = TREE_TYPE (exp);
-  STRIP_NOPS (exp);
-
-  if (TREE_CODE (exp) == BIT_AND_EXPR)
-{
-  and_mask = TREE_OPERAND (exp, 1);
-  exp = TREE_OPERAND (exp, 0);
-  STRIP_NOPS (exp); STRIP_NOPS (and_mask);
-  if (TREE_CODE (and_mask) != INTEGER_CST)
-   return NULL_TREE;
-}
-
-  if (xor_which)
-{
-  gcc_checking_assert (TREE_CODE (exp) == BIT_XOR_EXPR);
-

[gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:fbf1f8007325adf2d4b70f3b5a26d5c666c815e3

commit fbf1f8007325adf2d4b70f3b5a26d5c666c815e3
Author: Alexandre Oliva 
Date:   Fri Sep 13 21:43:06 2024 -0300

check for mergeable loads, choose insertion points accordingly

Diff:
---
 gcc/gimple-fold.cc | 253 ++---
 1 file changed, 219 insertions(+), 34 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 64426bd76977..85a0ec028030 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "internal-fn.h"
 #include "gimple-range.h"
+#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p
 
 /* ??? Move this to some header, it's defined in fold-const.c.  */
 extern tree
@@ -7395,7 +7396,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum 
tree_code code,
Same as ssa_is_replaceable_p, except that we don't insist it has a
single use.  */
 
-bool
+static bool
 ssa_is_substitutable_p (gimple *stmt)
 {
 #if 0
@@ -7476,9 +7477,10 @@ is_cast_p (tree *name)
   if (gimple_num_ops (def) != 2)
break;
 
-  if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+  if (gimple_assign_single_p (def))
{
+ if (gimple_assign_load_p (def))
+   break;
  *name = gimple_assign_rhs1 (def);
  continue;
}
@@ -7515,8 +7517,7 @@ is_binop_p (enum tree_code code, tree *name)
  return 0;
 
case 2:
- if (get_gimple_rhs_class (gimple_expr_code (def))
- == GIMPLE_SINGLE_RHS)
+ if (gimple_assign_single_p (def) && !gimple_assign_load_p (def))
{
  *name = gimple_assign_rhs1 (def);
  continue;
@@ -7524,7 +7525,7 @@ is_binop_p (enum tree_code code, tree *name)
  return 0;
 
case 3:
- ;
+ break;
}
 
   if (gimple_assign_rhs_code (def) != code)
@@ -7569,6 +7570,26 @@ prepare_xor (tree l_arg, tree *r_arg)
   return ret;
 }
 
+/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and
+   return its RHS, otherwise return EXP.  */
+
+static tree
+follow_load (tree exp, gimple **load)
+{
+  if (TREE_CODE (exp) == SSA_NAME
+  && !SSA_NAME_IS_DEFAULT_DEF (exp))
+{
+  gimple *def = SSA_NAME_DEF_STMT (exp);
+  if (gimple_assign_load_p (def))
+   {
+ *load = def;
+ exp = gimple_assign_rhs1 (def);
+   }
+}
+
+  return exp;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
If EXP is a comparison reference, we return the innermost reference.
@@ -7595,6 +7616,9 @@ prepare_xor (tree l_arg, tree *r_arg)
BIT_XOR_EXPR compared with zero.  We're to take the first or second
operand thereof if so.  It should be zero otherwise.
 
+   *LOAD is set to the load stmt of the innermost reference, if any,
+   *and NULL otherwise.
+
Return 0 if this is not a component reference or is one that we can't
do anything with.  */
 
@@ -7602,7 +7626,8 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
HOST_WIDE_INT *pbitpos, machine_mode *pmode,
int *punsignedp, int *preversep, int *pvolatilep,
-   tree *pmask, tree *pand_mask, int xor_which)
+   tree *pmask, tree *pand_mask, int xor_which,
+   gimple **load)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -7612,11 +7637,13 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   unsigned int precision;
   HOST_WIDE_INT shiftrt = 0;
 
+  *load = NULL;
+
   /* All the optimizations using this function assume integer fields.
  There are problems with FP fields since the type_for_size call
  below can fail for, e.g., XFmode.  */
   if (! INTEGRAL_TYPE_P (TREE_TYPE (exp)))
-return 0;
+return NULL_TREE;
 
   /* We are interested in the bare arrangement of bits, so strip everything
  that doesn't affect the machine mode.  However, record the type of the
@@ -7626,7 +7653,7 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp)))
 {
   if (TREE_CODE (and_mask) != INTEGER_CST)
-   return 0;
+   return NULL_TREE;
 }
 
   if (xor_which)
@@ -7644,16 +7671,18 @@ decode_field_reference (location_t loc, tree *exp_, 
HOST_WIDE_INT *pbitsize,
   if (tree shift = is_binop_p (RSHIFT_EXPR, &exp))
 {
   if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift))
-   return 0;
+   return NULL_TREE;
   shiftrt = tree_to_shwi (shift);
   if (shiftrt <= 0)
-   return 0;
+   return NULL_TREE;
 }
 
   if (tree t = is_cast_p (&exp))
 if (!outer_type)
   outer_type = t;
 
+  exp = follow_load (exp, load);
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = ge

[gcc(refs/users/aoliva/heads/testme)] fold truth-and only in ifcombine

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:e731ae8c98953fb898a938ee0a0be19e2ea906d7

commit e731ae8c98953fb898a938ee0a0be19e2ea906d7
Author: Alexandre Oliva 
Date:   Fri Sep 13 21:43:10 2024 -0300

fold truth-and only in ifcombine

Diff:
---
 gcc/gimple-fold.cc|  2 ++
 gcc/tree-ssa-ifcombine.cc | 24 +---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 85a0ec028030..5b7d83edbea9 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -8738,12 +8738,14 @@ maybe_fold_and_comparisons (tree type,
 op2b, outer_cond_bb))
 return t;
 
+#if 0
   if (tree t = fold_truth_andor_maybe_separate (UNKNOWN_LOCATION,
TRUTH_ANDIF_EXPR, type,
code2, op2a, op2b,
code1, op1a, op1b,
NULL))
 return t;
+#endif
 
   return NULL_TREE;
 }
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 79a4bdd363b9..61480e5fa894 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -399,6 +399,14 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
   outer2->probability = profile_probability::never ();
 }
 
+/* FIXME: move to a header file.  */
+extern tree
+fold_truth_andor_maybe_separate (location_t loc,
+enum tree_code code, tree truth_type,
+enum tree_code lcode, tree ll_arg, tree lr_arg,
+enum tree_code rcode, tree rl_arg, tree rr_arg,
+tree *separatep);
+
 /* If-convert on a and pattern with a common else block.  The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
inner_inv, outer_inv and result_inv indicate whether the conditions
@@ -576,7 +584,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
   else if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison
   && TREE_CODE_CLASS (gimple_cond_code (outer_cond)) == tcc_comparison)
 {
-  tree t;
+  tree t, ts = NULL_TREE;
   enum tree_code inner_cond_code = gimple_cond_code (inner_cond);
   enum tree_code outer_cond_code = gimple_cond_code (outer_cond);
 
@@ -599,7 +607,17 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
outer_cond_code,
gimple_cond_lhs (outer_cond),
gimple_cond_rhs (outer_cond),
-   gimple_bb (outer_cond
+   gimple_bb (outer_cond)))
+ && !(t = ts = (fold_truth_andor_maybe_separate
+(UNKNOWN_LOCATION, TRUTH_ANDIF_EXPR,
+ boolean_type_node,
+ outer_cond_code,
+ gimple_cond_lhs (outer_cond),
+ gimple_cond_rhs (outer_cond),
+ inner_cond_code,
+ gimple_cond_lhs (inner_cond),
+ gimple_cond_rhs (inner_cond),
+ NULL
{
  {
  tree t1, t2;
@@ -636,7 +654,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
inner_inv,
  NULL, true, GSI_SAME_STMT);
 }
   /* ??? Fold should avoid this.  */
-  else if (!is_gimple_condexpr_for_cond (t))
+  else if (ts && !is_gimple_condexpr_for_cond (t))
goto gimplify_after_fold;
   if (result_inv)
t = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (t), t);

[gcc(refs/users/aoliva/heads/testme)] relax ifcombine to accept vuses

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:7b7dfff4b174765248fcf275dc8fde9c78352ad8

commit 7b7dfff4b174765248fcf275dc8fde9c78352ad8
Author: Alexandre Oliva 
Date:   Fri Sep 13 21:43:15 2024 -0300

relax ifcombine to accept vuses

Diff:
---
 gcc/config/i386/t-i386   |  2 ++
 gcc/testsuite/gcc.dg/field-merge-6.c | 26 ++
 gcc/tree-ssa-ifcombine.cc|  2 +-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
index bf4ae109af98..1b904787ec62 100644
--- a/gcc/config/i386/t-i386
+++ b/gcc/config/i386/t-i386
@@ -79,3 +79,5 @@ s-i386-bt: $(srcdir)/config/i386/i386-builtin-types.awk \
$(AWK) -f $^ > tmp-bt.inc
$(SHELL) $(srcdir)/../move-if-change tmp-bt.inc i386-builtin-types.inc
$(STAMP) $@
+
+insn-attrtab.o-warn = -Wno-error
diff --git a/gcc/testsuite/gcc.dg/field-merge-6.c 
b/gcc/testsuite/gcc.dg/field-merge-6.c
new file mode 100644
index ..7fd48a138d14
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-6.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+/* { dg-shouldfail } */
+
+/* Check that the third compare won't be pulled ahead of the second one and
+   prevent, which would prevent the NULL pointer dereference that should cause
+   the execution to fail.  */
+
+struct s {
+  char a, b;
+  int *p;
+};
+
+struct s a = { 0, 1, 0 };
+struct s b = { 0, 0, 0 };
+
+int f () {
+  return (a.a != b.a
+ || *b.p != *a.p
+ || a.b != b.b);
+}
+
+int main() {
+  f ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 61480e5fa894..7678c87e0170 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -129,7 +129,7 @@ bb_no_side_effects_p (basic_block bb)
   enum tree_code rhs_code;
   if (gimple_has_side_effects (stmt)
  || gimple_could_trap_p (stmt)
- || gimple_vuse (stmt)
+ /* || gimple_vuse (stmt) */
  /* We need to rewrite stmts with undefined overflow to use
 unsigned arithmetic but cannot do so for signed division.  */
  || ((ass = dyn_cast  (stmt))

[gcc(refs/users/aoliva/heads/testme)] support noncontiguous ifcombine

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:4f6753de737fb45d78634c35c4c50a546357f70d

commit 4f6753de737fb45d78634c35c4c50a546357f70d
Author: Alexandre Oliva 
Date:   Sat Sep 14 03:40:26 2024 -0300

support noncontiguous ifcombine

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index 7678c87e0170..eb4317bebdfb 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -798,10 +798,10 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
if (a && b)
 ;
  This requires a single predecessor of the inner cond_bb.  */
-  if (single_pred_p (inner_cond_bb)
-  && bb_no_side_effects_p (inner_cond_bb))
+  for (basic_block bb = inner_cond_bb;
+   single_pred_p (bb) && bb_no_side_effects_p (bb); )
 {
-  basic_block outer_cond_bb = single_pred (inner_cond_bb);
+  basic_block outer_cond_bb = bb = single_pred (bb);
 
   if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb,
   then_bb, else_bb, inner_cond_bb))

[gcc(refs/users/aoliva/heads/testme)] support noncontiguous ifcombine

2024-09-13 Thread Alexandre Oliva via Gcc-cvs

https://gcc.gnu.org/g:8a7e9581280c41f3c18cba7fafe110b4108a07a7

commit 8a7e9581280c41f3c18cba7fafe110b4108a07a7
Author: Alexandre Oliva 
Date:   Sat Sep 14 03:40:26 2024 -0300

support noncontiguous ifcombine

Diff:
---
 gcc/tree-ssa-ifcombine.cc | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index eb4317bebdfb..b52d343feb91 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -782,13 +782,13 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, 
basic_block outer_cond_bb,
if-conversion helper.  We start with BB as the innermost
worker basic-block.  Returns true if a transformation was done.  */
 
-static bool
+static basic_block
 tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 {
   basic_block then_bb = NULL, else_bb = NULL;
 
   if (!recognize_if_then_else (inner_cond_bb, &then_bb, &else_bb))
-return false;
+return NULL;
 
   /* Recognize && and || of two conditions with a common
  then/else block which entry edges we can merge.  That is:
@@ -805,7 +805,7 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 
   if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb,
   then_bb, else_bb, inner_cond_bb))
-   return true;
+   return bb;
 
   if (forwarder_block_to (else_bb, then_bb))
{
@@ -817,7 +817,7 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 edge from outer_cond_bb and the forwarder block.  */
  if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, else_bb,
   then_bb, else_bb))
-   return true;
+   return bb;
}
   else if (forwarder_block_to (then_bb, else_bb))
{
@@ -829,11 +829,11 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb)
 edge from outer_cond_bb and the forwarder block.  */
  if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, else_bb,
   then_bb, then_bb))
-   return true;
+   return bb;
}
 }
 
-  return false;
+  return NULL;
 }
 
 /* Main entry for the tree if-conversion pass.  */
@@ -884,12 +884,14 @@ pass_tree_ifcombine::execute (function *fun)
  inner ones, and also that we do not try to visit a removed
  block.  This is opposite of PHI-OPT, because we cascade the
  combining rather than cascading PHIs. */
+  basic_block seen = NULL;
+  bool changed = false;
   for (i = n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS - 1; i >= 0; i--)
 {
   basic_block bb = bbs[i];
 
   if (safe_is_a  (*gsi_last_bb (bb)))
-   if (tree_ssa_ifcombine_bb (bb))
+   if (basic_block outer_bb = tree_ssa_ifcombine_bb (bb))
  {
/* Clear range info from all stmts in BB which is now executed
   conditional on a always true/false condition.  */
@@ -908,7 +910,24 @@ pass_tree_ifcombine::execute (function *fun)
  rewrite_to_defined_overflow (&gsi);
  }
cfg_changed |= true;
+   if (seen)
+ changed |= true;
+   else
+ seen = bb;
+   /* Go back and check whether the modified outer_bb can be further
+  optimized.  ??? How could it?  */
+   do
+ i++;
+   while (bbs[i] != outer_bb);
+   continue;
  }
+
+  if (bb == seen)
+   {
+ gcc_assert (!changed);
+ seen = NULL;
+ changed = false;
+   }
 }
 
   free (bbs);

48 matches

Mail list logo