date:20241031

[gcc r15-4796] expand: Fix up expansion of VIEW_CONVERT_EXPR to BITINT_TYPE [PR117354]

2024-10-31 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:b39f62ff739e9ffea0e6485667f15b985f8cd63d

commit r15-4796-gb39f62ff739e9ffea0e6485667f15b985f8cd63d
Author: Jakub Jelinek 
Date:   Thu Oct 31 10:52:56 2024 +0100

expand: Fix up expansion of VIEW_CONVERT_EXPR to BITINT_TYPE [PR117354]

The following testcase ICEs, because when trying to expand the
VIEW_CONVERT_EXPR operand which is SSA_NAME defined to
V32QI or V4DI MEM_REF which is aligned just to 8 bytes we force
it as unaligned into a register, but then try to call extract_bit_field
from the V32QI or V4DI register to BLKmode.  extract_bit_field doesn't
obviously support BLKmode extraction and so ICEs.

The second hunk fixes the ICE by not calling extract_bit_field when
it can't handle it, the last if will handle it properly by storing
it to memory and using BLKmode access to the copy.

The first hunk is an optimization, if mode is BLKmode, by setting
inner_reference_p argument to expand_expr_real we avoid the
expand_misaligned_mem_ref calls which load it from memory into a register.

2024-10-31  Jakub Jelinek  

PR middle-end/117354
* expr.cc (expand_expr_real_1) : Pass
true as inner_reference_p argument to expand_expr_real if
mode is BLKmode.  Don't call extract_bit_field if mode is BLKmode.

* gcc.dg/bitint-113.c: New test.

Diff:
---
 gcc/expr.cc   |  4 ++--
 gcc/testsuite/gcc.dg/bitint-113.c | 40 +++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 348ac3c777a3..caa1a72ba0be 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -12468,7 +12468,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
tmode,
 
   if (!op0)
op0 = expand_expr_real (treeop0, NULL_RTX, VOIDmode, modifier,
-   NULL, inner_reference_p);
+   NULL, inner_reference_p || mode == BLKmode);
 
   /* If the input and output modes are both the same, we are done.  */
   if (mode == GET_MODE (op0))
@@ -12505,7 +12505,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
tmode,
op0 = convert_modes (mode, GET_MODE (op0), op0,
 TYPE_UNSIGNED (TREE_TYPE (treeop0)));
   /* If the output type is a bit-field type, do an extraction.  */
-  else if (reduce_bit_field)
+  else if (reduce_bit_field && mode != BLKmode)
return extract_bit_field (op0, TYPE_PRECISION (type), 0,
  TYPE_UNSIGNED (type), NULL_RTX,
  mode, mode, false, NULL);
diff --git a/gcc/testsuite/gcc.dg/bitint-113.c 
b/gcc/testsuite/gcc.dg/bitint-113.c
new file mode 100644
index ..3c934a62fb47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bitint-113.c
@@ -0,0 +1,40 @@
+/* PR middle-end/117354 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */
+
+#if __BITINT_MAXWIDTH__ >= 256
+#define N 256
+#else
+#define N 64
+#endif
+
+struct S {
+  unsigned char y;
+  _BitInt(N) x;
+} s;
+
+__attribute__((noipa)) static void
+foo (const char *, _BitInt(N))
+{
+}
+
+__attribute__((noipa)) static void
+bar (_BitInt(N))
+{
+}
+
+static void
+baz (void *p)
+{
+  foo ("bazbazbazb", s.x);
+  __builtin_memcpy (p, &s.x, sizeof s.x);
+}
+
+int
+main ()
+{
+  void *ptr = &s.x;
+  baz (&s.x);
+  bar (*(_BitInt(N) *) ptr);
+}

[gcc r15-4799] OpenMP/C++: Fix declare variant with reference-returning functions

2024-10-31 Thread Tobias Burnus via Gcc-cvs

https://gcc.gnu.org/g:f011f8908182fd05ddd9a34881507b8584c44fb2

commit r15-4799-gf011f8908182fd05ddd9a34881507b8584c44fb2
Author: Tobias Burnus 
Date:   Thu Oct 31 11:28:57 2024 +0100

OpenMP/C++: Fix declare variant with reference-returning functions

gcc/cp/ChangeLog:

* decl.cc (omp_declare_variant_finalize_one): Strip indirect ref
around variant-function call when processing a variant.

gcc/testsuite/ChangeLog:

* g++.dg/gomp/declare-variant-9.C: New test.

Diff:
---
 gcc/cp/decl.cc|  3 +++
 gcc/testsuite/g++.dg/gomp/declare-variant-9.C | 29 +++
 2 files changed, 32 insertions(+)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 0bc320a2b396..b638f3af294d 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -8375,6 +8375,9 @@ omp_declare_variant_finalize_one (tree decl, tree attr)
   if (variant == error_mark_node && !processing_template_decl)
 return true;
 
+  if (TREE_CODE (variant) == INDIRECT_REF)
+variant = TREE_OPERAND (variant, 0);
+
   variant = cp_get_callee_fndecl_nofold (variant);
   input_location = save_loc;
 
diff --git a/gcc/testsuite/g++.dg/gomp/declare-variant-9.C 
b/gcc/testsuite/g++.dg/gomp/declare-variant-9.C
new file mode 100644
index ..7e26d8b11aee
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/declare-variant-9.C
@@ -0,0 +1,29 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+int &variant_fn();
+
+#pragma omp declare variant(variant_fn) match(user={condition(1)})
+int &bar();
+
+void sub(int &a)
+{
+  bar();
+  a = bar(); 
+}
+
+template
+T &templ_var_fn(T x);
+
+#pragma omp declare variant(templ_var_fn) match(user={condition(1)})
+template
+T &templ_base_fn(T x);
+
+void run(int &b)
+{
+  templ_base_fn(5);
+  b = templ_base_fn(7); 
+}
+
+/* { dg-final { scan-tree-dump "  variant_fn \\(\\);" "gimple" } } */
+/* { dg-final { scan-tree-dump "  _1 = variant_fn \\(\\);" "gimple" } } */
+/* { dg-final { scan-tree-dump "  templ_var_fn \\(5\\);" "gimple" } } */
+/* { dg-final { scan-tree-dump "  _1 = templ_var_fn \\(7\\);" "gimple" } 
} */

[gcc r15-4811] testsuite: Fix prototype in gcc.dg/pr114115.c

2024-10-31 Thread Joseph Myers via Gcc-cvs

https://gcc.gnu.org/g:241d419c46f381f9351b1957d7d34f177e0303ba

commit r15-4811-g241d419c46f381f9351b1957d7d34f177e0303ba
Author: Joseph Myers 
Date:   Thu Oct 31 17:56:07 2024 +

testsuite: Fix prototype in gcc.dg/pr114115.c

One test failing with a -std=gnu23 default that I wanted to
investigate further is gcc.dg/pr114115.c.  Building with -std=gnu23
produces a warning:

pr114115.c:18:8: warning: 'ifunc' resolver for 'foo_ifunc2' should return 
'void * (*)(void)' [-Wattribute-alias=]

It turns out that this warning (from cgraphunit.cc) is disabled for
unprototyped functions.  Fix the return type for foo_ifunc2 so the
test builds without warnings both with and without -std=gnu23.

Tested for x86_64.

* gcc.dg/pr114115.c (foo_ifunc2): Return void.

Diff:
---
 gcc/testsuite/gcc.dg/pr114115.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c
index 2629f5918779..5e3ef5793cde 100644
--- a/gcc/testsuite/gcc.dg/pr114115.c
+++ b/gcc/testsuite/gcc.dg/pr114115.c
@@ -3,7 +3,7 @@
 /* { dg-require-profiling "-fprofile-generate" } */
 /* { dg-require-ifunc "" } */
 
-void *foo_ifunc2() __attribute__((ifunc("foo_resolver")));
+void foo_ifunc2() __attribute__((ifunc("foo_resolver")));
 
 void bar(void)
 {

[gcc r15-4810] Add autoconf check for clock_gettime

2024-10-31 Thread Andi Kleen via Gcc-cvs

https://gcc.gnu.org/g:b23de8ec7694883b1c203e1f12e3ea6d249f23f8

commit r15-4810-gb23de8ec7694883b1c203e1f12e3ea6d249f23f8
Author: Andi Kleen 
Date:   Thu Oct 31 10:03:08 2024 -0700

Add autoconf check for clock_gettime

Reported by Andrew Stubbs

gcc/ChangeLog:

* config.in: Regenerate.
* configure: Regenerate.
* configure.ac: Check for HAVE_CLOCK_GETTIME.
* timevar.cc (get_time): Use HAVE_CLOCK_GETTIME.

Diff:
---
 gcc/config.in|  6 ++
 gcc/configure| 55 ---
 gcc/configure.ac | 12 +++-
 gcc/timevar.cc   |  2 +-
 4 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index 3fc4666d60b5..0a506d1783a4 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -883,6 +883,12 @@
 #endif
 
 
+/* Define to 1 if you have the `clock_gettime' function. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_CLOCK_GETTIME
+#endif
+
+
 /* Define if  defines clock_t. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_CLOCK_T
diff --git a/gcc/configure b/gcc/configure
index 47c58036530f..150ab6164142 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -10613,7 +10613,8 @@ fi
 for ac_func in times clock kill getrlimit setrlimit atoq \
popen sysconf strsignal getrusage nl_langinfo \
gettimeofday mbstowcs wcswidth mmap posix_fallocate setlocale \
-   clearerr_unlocked feof_unlocked   ferror_unlocked fflush_unlocked 
fgetc_unlocked fgets_unlocked   fileno_unlocked fprintf_unlocked fputc_unlocked 
fputs_unlocked   fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked  
 putchar_unlocked putc_unlocked madvise mallinfo mallinfo2 fstatat getauxval
+   clearerr_unlocked feof_unlocked   ferror_unlocked fflush_unlocked 
fgetc_unlocked fgets_unlocked   fileno_unlocked fprintf_unlocked fputc_unlocked 
fputs_unlocked   fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked  
 putchar_unlocked putc_unlocked madvise mallinfo mallinfo2 fstatat getauxval \
+   clock_gettime
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_cxx_check_func "$LINENO" "$ac_func" "$as_ac_var"
@@ -10626,6 +10627,54 @@ fi
 done
 
 
+# At least for glibc, clock_gettime is in librt.  But don't pull that
+# in if it still doesn't give us the function we want.
+if test $ac_cv_func_clock_gettime = no; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" 
>&5
+$as_echo_n "checking for clock_gettime in -lrt... " >&6; }
+if ${ac_cv_lib_rt_clock_gettime+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lrt  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"; then :
+  ac_cv_lib_rt_clock_gettime=yes
+else
+  ac_cv_lib_rt_clock_gettime=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_clock_gettime" 
>&5
+$as_echo "$ac_cv_lib_rt_clock_gettime" >&6; }
+if test "x$ac_cv_lib_rt_clock_gettime" = xyes; then :
+  LIBS="-lrt $LIBS"
+
+$as_echo "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h
+
+fi
+
+fi
+
 if test x$ac_cv_func_mbstowcs = xyes; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mbstowcs works" >&5
 $as_echo_n "checking whether mbstowcs works... " >&6; }
@@ -21405,7 +21454,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 21408 "configure"
+#line 21457 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -21511,7 +21560,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 21514 "configure"
+#line 21563 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/gcc/configure.ac b/gcc/configure.ac
index dc8346a7b823..bdb22d53e2ca 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -1557,7 +1557,17 @@ define(gcc_UNLOCKED_FUNCS, clearerr_unlocked 
feof_unlocked dnl
 AC_CHECK_FUNCS(times clock kill getrlimit setrlimit atoq \
popen sysconf strsignal getrusage nl_langinfo \
gettimeofday mbstowcs wcswidth mmap posix_fallocate setlocale \
-   gcc_UNLOCKED_FUNCS madvise mallinfo mallinfo2 fstatat getauxval)
+   gcc_UNLOCKED_FUNCS madvise mallinfo mallinfo2 fstatat getauxval \
+   clock_gettime)
+
+# At least for glibc, clock_gettime is in librt.  But don't pull that
+# in if it still doesn't give us the function we want.
+if test $ac_cv_func_clock_gettime

[gcc r15-4812] c: detect variably-modified types [PR117145, PR117245, PR100420]

2024-10-31 Thread Martin Uecker via Gcc-cvs

https://gcc.gnu.org/g:9eae9268e41463927c9383004e58708048ec379f

commit r15-4812-g9eae9268e41463927c9383004e58708048ec379f
Author: Martin Uecker 
Date:   Tue Oct 22 23:25:00 2024 +0200

c: detect variably-modified types [PR117145,PR117245,PR100420]

This fixes two cases where variably-modified types were not recognized as
such.  The first is when building composite types and the other when a type
is reconstructed for the 'vector' attribute.  Construction of types in
the C FE is reorganized to use c_build_* functions which are responsible for
setting C_TYPE_VARIABLE_SIZE, C_TYPE_VARIABLY_MODIFIED and 
TYPE_TYPELESS_STORAGE
based on the properties of the type itself and these replace all other logic
elsewhere (e.g. in grokdeclarator).  A new 'c_reconstruct_complex_type' 
based
on these functions is introduced which is called via a language hook when 
the
'vector' attribute is processed (as for C++).

One problem is are arrays of unspecified size 'T[*]' which were represented
identically to zero-sized arrays but with C_TYPE_VARIABLE_SIZE set.  To 
avoid
having to create distinct type copies for this, the representation was 
changed
to make it a natural VLA by giving it an upper bound of '(0, 0)'.  This also
then allows fixing of PR100420 where such arrays were printed as 'T[0]'.

Finally, a new function 'c_verify_type' checks consistency of properties
specific to C FE and is called when checking is on.

PR c/117145
PR c/117245
PR c/100420

gcc/c/ChangeLog:
* c-decl.cc (c_build_pointer_type): Move to c-typeck.cc
(grokdeclarator): Simplify logic.
(match_builtin_function_types): Adapt.
(push_decl): Adapt.
(implicitly_declare): Adapt.
(c_update_type_canonical): Adapt.
(c_make_fname_decl): Adapt.
(start_function): Adapt.
* c-objc-common.h: Add LANG_HOOKS_RECONSTRUCT_COMPLEX_TYPE.
* c-tree.h: Add prototypes.
* c-typeck.cc (c_verify_type): New function.
(c_set_type_bits). New function.
(c_build_pointer_type): Moved from c-decl.cc.
(c_build_pointer_type_for_mode): New function.
(c_build_function_type): New function.
(c_build_array_type): New function.
(c_build_type_attribute_variant): New function.
(c_reconstruct_complex_type): New function.
(c_build_functype_attribute_variant): Renamed.
(array_to_pointer_conversion): Simplify logic.
(composite_type_internal): Simplify logic..
(build_unary_op): Simplify logic..
(comptypes_verify): Add checking assertions.
(c_build_qualified_type): Add checking assertions.
(c_build_function_call_vec): Adapt.
(qualify_type): Adapt.
(build_functype_attribute_variant): Adapt.
(common_pointer_type): Adapt.
(c_common_type): Adapt.
(convert_for_assignment): Adapt.
(type_or_builtin_type): Adapt.
(build_access_with_size_for_counted_by): Adapt.
(build_conditional_expr): Adapt.
(build_modify_expr): Adapt.
(build_binary_op): Adapt.
(build_omp_array_section): Adapt.
(handle_omp_array_sections): Adapt.
(c_finish_omp_clauses): Adapt.
* c-parser.cc (c_parser_typeof_specifier): Adapt.
(c_parser_generic_selection): Adapt.

gcc/c-family/ChangeLog:
* c-pretty-print.cc (c_pretty_printer::direct_abstract_declarator):
Detect arrays of unspecified size.

gcc/testsuite/ChangeLog:
* gcc.dg/c23-tag-composite-11.c: New test.
* gcc.dg/Warray-parameter-4.c: Resolve xfails.
* gcc.dg/Wvla-parameter-2.c: Resolve xfails.
* gcc.dg/Wvla-parameter-3.c: Resolve xfails.
* gcc.dg/pr117145-1.c: New test.
* gcc.dg/pr117145-2.c: New test.
* gcc.dg/pr117245.c: New test.

Diff:
---
 gcc/c-family/c-pretty-print.cc  |   6 +-
 gcc/c/c-decl.cc | 130 ---
 gcc/c/c-objc-common.h   |   2 +
 gcc/c/c-parser.cc   |   4 +-
 gcc/c/c-tree.h  |   7 +
 gcc/c/c-typeck.cc   | 331 ++--
 gcc/testsuite/gcc.dg/Warray-parameter-4.c   |   6 +-
 gcc/testsuite/gcc.dg/Wvla-parameter-2.c |  15 +-
 gcc/testsuite/gcc.dg/Wvla-parameter-3.c |  11 +-
 gcc/testsuite/gcc.dg/c23-tag-composite-11.c |  27 +++
 gcc/testsuite/gcc.dg/pr117145-1.c   |  14 ++
 gcc/testsuite/gcc.dg/pr117145-2.c   |  10 +
 gcc/testsuite/gcc.dg/pr117245.c |  17 ++
 13 files changed, 393 insertions(+), 187 deletions(-)

diff --git a/gcc/c-family/c-pretty-print.cc b/gcc/c-fa

[gcc r15-4809] testsuite: Use noinline in gcc.dg/simulate-thread/simulate-thread.h

2024-10-31 Thread Joseph Myers via Gcc-cvs

https://gcc.gnu.org/g:1504073ad89f4dff7243dea608f385d3fa8cc89a

commit r15-4809-g1504073ad89f4dff7243dea608f385d3fa8cc89a
Author: Joseph Myers 
Date:   Thu Oct 31 17:01:09 2024 +

testsuite: Use noinline in gcc.dg/simulate-thread/simulate-thread.h

Among the changes of test results with a -std=gnu23 default were two
tests changing from PASS to UNSUPPORTED:

UNSUPPORTED: gcc.dg/simulate-thread/speculative-store.c   -O2 -g  thread 
simulation test
UNSUPPORTED: gcc.dg/simulate-thread/speculative-store.c   -O3 -g  thread 
simulation test

It appears that functions defined with () becoming prototyped affects
inlining, and changing the code to use (void) allows UNSUPPORTED
results to be reproduced with -std=gnu17.  Add __attribute__
((noinline)) on one more function to avoid the UNSUPPORTED results;
some of the tests in this directory already have such an attribute on
some functions.

Tested for x86_64-pc-linux-gnu.

* gcc.dg/simulate-thread/simulate-thread.h
(simulate_thread_wrapper_final_verify): Mark noinline.

Diff:
---
 gcc/testsuite/gcc.dg/simulate-thread/simulate-thread.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/simulate-thread/simulate-thread.h 
b/gcc/testsuite/gcc.dg/simulate-thread/simulate-thread.h
index 22c05084ee74..b582220694e5 100644
--- a/gcc/testsuite/gcc.dg/simulate-thread/simulate-thread.h
+++ b/gcc/testsuite/gcc.dg/simulate-thread/simulate-thread.h
@@ -116,7 +116,7 @@ simulate_thread_wrapper_other_threads()
 
 /* If the test case defines HOSTILE_PAUSE_ERROR, then the test case
will fail execution if it had a hostile pause.  */
-int
+__attribute__ ((noinline)) int
 simulate_thread_wrapper_final_verify ()
 {
   int ret = simulate_thread_final_verify ();

[gcc r15-4813] testsuite: fix c23-constexpr-2a.c test to use dg-do run

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:2a4ee57b04398e54284e3d6b5ed4f8842ee26a5c

commit r15-4813-g2a4ee57b04398e54284e3d6b5ed4f8842ee26a5c
Author: Sam James 
Date:   Thu Oct 31 18:37:30 2024 +

testsuite: fix c23-constexpr-2a.c test to use dg-do run

The comment at the top of the test indicates it should be an execution test,
but it was only using 'dg-do link'. Correct that.

The only change in test results is as expected:
```
+PASS: gcc.dg/c23-constexpr-2a.c execution test
```

gcc/testsuite/ChangeLog:
PR testsuite/117183

* gcc.dg/c23-constexpr-2a.c: Use dg-do run.

Diff:
---
 gcc/testsuite/gcc.dg/c23-constexpr-2a.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/c23-constexpr-2a.c 
b/gcc/testsuite/gcc.dg/c23-constexpr-2a.c
index a08097a126bd..3fe3756f379a 100644
--- a/gcc/testsuite/gcc.dg/c23-constexpr-2a.c
+++ b/gcc/testsuite/gcc.dg/c23-constexpr-2a.c
@@ -1,5 +1,5 @@
 /* Test C23 constexpr.  Valid code, execution test.  */
-/* { dg-do link } */
+/* { dg-do run } */
 /* { dg-options "-std=c23 -pedantic-errors" } */
 /* { dg-additional-sources "c23-constexpr-2b.c" } */

[gcc r14-10859] aarch64: Forbid F64MM permutes in streaming mode

2024-10-31 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:8ac2f3ce09c23fda8cc4657805648a07a210a885

commit r14-10859-g8ac2f3ce09c23fda8cc4657805648a07a210a885
Author: Richard Sandiford 
Date:   Thu Oct 31 19:52:46 2024 +

aarch64: Forbid F64MM permutes in streaming mode

The current code was based on an early version of the SME spec,
which allowed the .Q forms of TRN1, TRN2, UZP1, UZP2, ZIP1, and ZIP2
to be used in streaming mode.  We should now forbid them instead;
see 
https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/TRN1--TRN2--vectors---Interleave-even-or-odd-elements-from-two-vectors-?lang=en
and the corresponding entries for the others.

gcc/
* config/aarch64/aarch64-sve-builtins-base.def (svtrn1q, svtrn2q)
(svuzp1q, svuzp2q, svzip1q, svzip2q): Require SM_OFF.

gcc/testsuite/
* g++.target/aarch64/sve/aarch64-ssve.exp: Add tests for trn[12]q,
uzp[12].c, and zip[12]q.
* gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c: Skip for
STREAMING_COMPATIBLE.
* gcc.target/aarch64/sve/acle/asm/trn1q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_u32.c: Likewise.

[gcc r15-4816] aarch64: Record separate streaming and non-streaming ISA requirements

2024-10-31 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:ede97598e2cdb6310e05b271513f6009e84fc0b5

commit r15-4816-gede97598e2cdb6310e05b271513f6009e84fc0b5
Author: Richard Sandiford 
Date:   Thu Oct 31 19:52:07 2024 +

aarch64: Record separate streaming and non-streaming ISA requirements

For some upcoming extensions, we need to add intrinsics whose
ISA requirements differ between streaming mode and non-streaming mode.
This patch tries to generalise the infrastructure to support that:

- Rather than have a single set of feature flags, the patch uses a
  separate set for sm_off (non-streaming, PSTATE.SM==0) and sm_on
  (streaming, PSTATE.SM==1).

- The sm_off set is zero if the intrinsic is streaming-only.
  Otherwise it is AARCH64_FL_SM_OFF | .

- Similarly, the sm_on set is zero if the intrinsic is non-streaming-only.
  Otherwise it is AARCH64_FL_SM_ON | .  AARCH64_FL_SME is
  taken as given in streaming mode.

- Streaming-compatible code must satisfy both sets of requirements.

There should be no functional change.

gcc/
* config.gcc (aarch64*-*-*): Add aarch64-protos.h to target_gtfiles.
* config/aarch64/aarch64-protos.h
(aarch64_required_extensions): New structure.
(aarch64_check_required_extensions): Change the type of the
required_extensions parameter from aarch64_feature_flags to
aarch64_required_extensions.
* config/aarch64/aarch64-sve-builtins.h
(function_builder::add_unique_function): Likewise.
(function_builder::add_overloaded_function): Likewise.
(function_builder::get_attributes): Likewise.
(function_builder::add_function): Likewise.
(function_group_info): Change the type of required_extensions
in the same way.
* config/aarch64/aarch64-builtins.cc
(aarch64_pragma_builtins_data::required_extensions): Change the type
from aarch64_feature_flags to aarch64_required_extensions.
(aarch64_check_required_extensions): Likewise change the type
of the required_extensions parameter.  Separate the requirements
for non-streaming mode and streaming mode, ORing them together
for streaming-compatible mode.
(aarch64_general_required_extensions): New function.
(aarch64_general_check_builtin_call): Use it.
* config/aarch64/aarch64-sve-builtins.cc
(registered_function::required_extensions): Change the type
from aarch64_feature_flags to aarch64_required_extensions.
(DEF_NEON_SVE_FUNCTION, DEF_SME_ZA_FUNCTION_GS): Update accordingly.
(function_builder::get_attributes): Change the type of the
required_extensions parameter from aarch64_feature_flags to
aarch64_required_extensions.
(function_builder::add_function): Likewise.
(function_builder::add_unique_function): Likewise.
(function_builder::add_overloaded_function): Likewise.
* config/aarch64/aarch64-simd-pragma-builtins.def: Update
REQUIRED_EXTENSIONS definitions to use aarch64_required_extensions.
* config/aarch64/aarch64-sve-builtins-base.def: Likewise.
* config/aarch64/aarch64-sve-builtins-sme.def: Likewise.
* config/aarch64/aarch64-sve-builtins-sve2.def: Likewise.

Diff:
---
 gcc/config.gcc |   2 +-
 gcc/config/aarch64/aarch64-builtins.cc | 122 -
 gcc/config/aarch64/aarch64-protos.h|  87 ++-
 .../aarch64/aarch64-simd-pragma-builtins.def   |   2 +-
 gcc/config/aarch64/aarch64-sve-builtins-base.def   |  26 ++---
 gcc/config/aarch64/aarch64-sve-builtins-sme.def|  30 ++---
 gcc/config/aarch64/aarch64-sve-builtins-sve2.def   |  41 ++-
 gcc/config/aarch64/aarch64-sve-builtins.cc |  51 +
 gcc/config/aarch64/aarch64-sve-builtins.h  |  13 +--
 9 files changed, 226 insertions(+), 148 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index e2ed3b309cc2..c3531e56c9d6 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -352,7 +352,7 @@ aarch64*-*-*)
cxx_target_objs="aarch64-c.o"
d_target_objs="aarch64-d.o"
extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o 
aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o 
aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o 
cortex-a57-fma-steering.o aarch64-speculation.o 
falkor-tag-collision-avoidance.o aarch-bti-insert.o aarch64-cc-fusion.o 
aarch64-early-ra.o aarch64-ldp-fusion.o"
-   target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.h 
\$(srcdir)/config/aarch64/aarch64-builtins.cc 
\$(srcdir)/config/aarch64/aarch64-sve-builtins.h 
\$(srcdir)/config/aarch64/aarch64-sve-builtins.cc"
+   target_gtfiles="\$(srcdir)/config/aarch64/aarch64-p

[gcc r15-4814] aarch64: Forbid F64MM permutes in streaming mode

2024-10-31 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:94b9ffbdd9a3a2d3c71afb67e0fd6b7ce1d51391

commit r15-4814-g94b9ffbdd9a3a2d3c71afb67e0fd6b7ce1d51391
Author: Richard Sandiford 
Date:   Thu Oct 31 19:52:06 2024 +

aarch64: Forbid F64MM permutes in streaming mode

The current code was based on an early version of the SME spec,
which allowed the .Q forms of TRN1, TRN2, UZP1, UZP2, ZIP1, and ZIP2
to be used in streaming mode.  We should now forbid them instead;
see 
https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/TRN1--TRN2--vectors---Interleave-even-or-odd-elements-from-two-vectors-?lang=en
and the corresponding entries for the others.

gcc/
* config/aarch64/aarch64-sve-builtins-base.def (svtrn1q, svtrn2q)
(svuzp1q, svuzp2q, svzip1q, svzip2q): Require SM_OFF.

gcc/testsuite/
* g++.target/aarch64/sve/aarch64-ssve.exp: Add tests for trn[12]q,
uzp[12].c, and zip[12]q.
* gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c: Skip for
STREAMING_COMPATIBLE.
* gcc.target/aarch64/sve/acle/asm/trn1q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn1q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/trn2q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_f16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_f32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_f64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s32.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s64.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_s8.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_u16.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/zip1q_u32.c: Likewise.

[gcc r15-4817] aarch64: Require SVE2 and/or SME2 for SVE FAMINMAX intrinsics

2024-10-31 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:a00a0e34b8a7312bcf357bd0238d6d9711ca96b2

commit r15-4817-ga00a0e34b8a7312bcf357bd0238d6d9711ca96b2
Author: Richard Sandiford 
Date:   Thu Oct 31 19:52:07 2024 +

aarch64: Require SVE2 and/or SME2 for SVE FAMINMAX intrinsics

After the previous patch, we can now accurately model the ISA
requirements for the SVE FAMINMAX intrinsics.  They can be used
in non-streaming mode if TARGET_SVE2 and in streaming mode if
TARGET_SME2 (with both cases also requiring TARGET_FAMINMAX).
They can be used in streaming-compatible mode if TARGET_SVE2
&& TARGET_SME2.

Also, Kyrill pointed out in the original review of the FAMINMAX
support that it would be more consistent to define the rtl patterns
in aarch64-sve2.md rather than aarch64-sve.md, so the pushed patch
did that.  This patch moves the definitions of the intrinsics to
the sve2 files too, for consistency.

gcc/
* config/aarch64/aarch64-sve-builtins-base.cc (svmax, svamin): Move
definitions to...
* config/aarch64/aarch64-sve-builtins-sve2.cc: ...here.
* config/aarch64/aarch64-sve-builtins-base.def (svmax, svamin): Move
definitions to...
* config/aarch64/aarch64-sve-builtins-sve2.def: ...here.  Require
SME2 in streaming mode.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/amin_1.c: New test.
* gcc.target/aarch64/sve2/acle/asm/amax_f16.c: Enabled sve2 and
(for streaming mode) sme2.
* gcc.target/aarch64/sve2/acle/asm/amax_f32.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/amax_f64.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/amin_f16.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/amin_f32.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/amin_f64.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc| 4 
 gcc/config/aarch64/aarch64-sve-builtins-base.def   | 5 -
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc| 4 
 gcc/config/aarch64/aarch64-sve-builtins-sve2.def   | 7 +++
 gcc/testsuite/gcc.target/aarch64/sve/acle/general/amin_1.c | 9 +
 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f16.c  | 5 -
 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f32.c  | 5 -
 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amax_f64.c  | 5 -
 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f16.c  | 5 -
 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f32.c  | 5 -
 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/amin_f64.c  | 5 -
 11 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index fe16d93adcd1..1c9f515a52cb 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -3184,10 +3184,6 @@ FUNCTION (svadrb, svadr_bhwd_impl, (0))
 FUNCTION (svadrd, svadr_bhwd_impl, (3))
 FUNCTION (svadrh, svadr_bhwd_impl, (1))
 FUNCTION (svadrw, svadr_bhwd_impl, (2))
-FUNCTION (svamax, cond_or_uncond_unspec_function,
- (UNSPEC_COND_FAMAX, UNSPEC_FAMAX))
-FUNCTION (svamin, cond_or_uncond_unspec_function,
- (UNSPEC_COND_FAMIN, UNSPEC_FAMIN))
 FUNCTION (svand, rtx_code_function, (AND, AND))
 FUNCTION (svandv, reduction, (UNSPEC_ANDV))
 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def 
b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index edfe2574507e..da2a0e41aa5d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -368,8 +368,3 @@ DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none)
 DEF_SVE_FUNCTION (svzip1q, binary, all_data, none)
 DEF_SVE_FUNCTION (svzip2q, binary, all_data, none)
 #undef REQUIRED_EXTENSIONS
-
-#define REQUIRED_EXTENSIONS ssve (AARCH64_FL_FAMINMAX)
-DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz)
-DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz)
-#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index d29c2209fdfe..64f86035c30e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -591,6 +591,10 @@ FUNCTION (svaesd, fixed_insn_function, 
(CODE_FOR_aarch64_sve2_aesd))
 FUNCTION (svaese, fixed_insn_function, (CODE_FOR_aarch64_sve2_aese))
 FUNCTION (svaesimc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesimc))
 FUNCTION (svaesmc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesmc))
+FUNCTION (svamax, cond_or_uncond_unspec_function,
+ (UNSPEC_COND_FAMAX, UNSPEC_FAMAX))
+FUNCTION (svamin, cond_or_uncond_unspec_function,
+ (UNSPEC_COND_FAMIN, UNSPEC_FAMIN))
 FUNCTION (svbcax, CO

[gcc r14-10858] Fix function multiversioning dispatcher link error with LTO

2024-10-31 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:4e174ac8ee96a484be97313a90d2e831e978e27e

commit r14-10858-g4e174ac8ee96a484be97313a90d2e831e978e27e
Author: Yangyu Chen 
Date:   Thu Oct 31 19:52:45 2024 +

Fix function multiversioning dispatcher link error with LTO

We forgot to apply DECL_EXTERNAL to __init_cpu_features_resolver decl. When
building with LTO, the linker cannot find the
__init_cpu_features_resolver.lto_priv* symbol, causing the link error.

This patch gets this fixed by adding DECL_EXTERNAL to the decl. To avoid 
used
but never defined warning for this symbol, we also mark TREE_PUBLIC to the 
decl.
We should also mark the decl having hidden visibility. And fix the 
attribute in
the same way for __aarch64_cpu_features identifier.

Minimal steps to reproduce the bug:

echo '__attribute__((target_clones("default", "aes"))) void func1() { }' > 
1.c
echo '__attribute__((target_clones("default", "aes"))) void func2() { }' > 
2.c
echo 'void func1();void func2();int main(){func1();func2();return 0;}' > 
main.c
gcc -flto -c 1.c 2.c
gcc -flto main.c 1.o 2.o

Fixes: 0cfde688e213 ("[aarch64] Add function multiversioning support")
Signed-off-by: Yangyu Chen 

gcc/ChangeLog:

* config/aarch64/aarch64.cc (dispatch_function_versions): Adding
DECL_EXTERNAL, TREE_PUBLIC and hidden DECL_VISIBILITY to
__init_cpu_features_resolver and __aarch64_cpu_features.

(cherry picked from commit 875279ff3ee3b4135401286b8378087a24fd0f8d)

Diff:
---
 gcc/config/aarch64/aarch64.cc | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e9bca8bbd08b..33a46c9eabec 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -20283,6 +20283,10 @@ dispatch_function_versions (tree dispatch_decl,
   tree init_fn_id = get_identifier ("__init_cpu_features_resolver");
   tree init_fn_decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
  init_fn_id, init_fn_type);
+  DECL_EXTERNAL (init_fn_decl) = 1;
+  TREE_PUBLIC (init_fn_decl) = 1;
+  DECL_VISIBILITY (init_fn_decl) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY_SPECIFIED (init_fn_decl) = 1;
   tree arg1 = DECL_ARGUMENTS (dispatch_decl);
   tree arg2 = TREE_CHAIN (arg1);
   ifunc_cpu_init_stmt = gimple_build_call (init_fn_decl, 2, arg1, arg2);
@@ -20302,6 +20306,9 @@ dispatch_function_versions (tree dispatch_decl,
get_identifier ("__aarch64_cpu_features"),
global_type);
   DECL_EXTERNAL (global_var) = 1;
+  TREE_PUBLIC (global_var) = 1;
+  DECL_VISIBILITY (global_var) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY_SPECIFIED (global_var) = 1;
   tree mask_var = create_tmp_var (long_long_unsigned_type_node);
 
   tree component_expr = build3 (COMPONENT_REF, long_long_unsigned_type_node,

[gcc r15-4806] diagnostics: use std::move in output_factory::handler ctor

2024-10-31 Thread David Malcolm via Gcc-cvs

https://gcc.gnu.org/g:a91569d47d68cfe2b53b97b743c8b936d58b1658

commit r15-4806-ga91569d47d68cfe2b53b97b743c8b936d58b1658
Author: David Malcolm 
Date:   Thu Oct 31 12:25:07 2024 -0400

diagnostics: use std::move in output_factory::handler ctor

gcc/ChangeLog:
* opts-diagnostic.cc (output_factory::handler::handler): Use
std::move on name.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/opts-diagnostic.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/opts-diagnostic.cc b/gcc/opts-diagnostic.cc
index 8dd4234ab0f4..97ff5777ef0a 100644
--- a/gcc/opts-diagnostic.cc
+++ b/gcc/opts-diagnostic.cc
@@ -107,7 +107,7 @@ public:
   class handler
   {
   public:
-handler (std::string name) : m_name (name) {}
+handler (std::string name) : m_name (std::move (name)) {}
 virtual ~handler () {}
 
 const std::string &get_name () const { return m_name; }

[gcc r15-4805] diagnostics: fix memory leak of m_option_mgr

2024-10-31 Thread David Malcolm via Gcc-cvs

https://gcc.gnu.org/g:d1d3c17258a564ade216ec908ac99ab6c78f8e3e

commit r15-4805-gd1d3c17258a564ade216ec908ac99ab6c78f8e3e
Author: David Malcolm 
Date:   Thu Oct 31 12:24:55 2024 -0400

diagnostics: fix memory leak of m_option_mgr

Fix some noise seen in "make selftest-valgrind".

gcc/ChangeLog:
* diagnostic.cc (diagnostic_context::finish): Delete and reset
m_option_mgr.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/diagnostic.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc
index 477214c15f2b..ecc4bd94eba1 100644
--- a/gcc/diagnostic.cc
+++ b/gcc/diagnostic.cc
@@ -420,6 +420,9 @@ diagnostic_context::finish ()
   m_client_data_hooks = nullptr;
 }
 
+  delete m_option_mgr;
+  m_option_mgr = nullptr;
+
   delete m_urlifier;
   m_urlifier = nullptr;

[gcc r15-4807] diagnostics: add class lazy_diagnostic_path

2024-10-31 Thread David Malcolm via Gcc-cvs

https://gcc.gnu.org/g:da9772be0ceee9b9a3f6f8ff20df939ce8063660

commit r15-4807-gda9772be0ceee9b9a3f6f8ff20df939ce8063660
Author: David Malcolm 
Date:   Thu Oct 31 12:25:36 2024 -0400

diagnostics: add class lazy_diagnostic_path

This patch adds a new class lazy_diagnostic_path for
use when creating rich_location instances, to allow deferring
expensive computations until the path is actually used (when
a diagnostic using the rich_location is emitted).

gcc/ChangeLog:
* Makefile.in (OBJS): Add lazy-diagnostic-path.o.
* lazy-diagnostic-path.cc: New file.
* lazy-diagnostic-path.h: New file.
* selftest-diagnostic.cc: Include "diagnostic-format.h".
(test_diagnostic_context::test_diagnostic_context): Turn off
flushing for the output format's printer.
* selftest-run-tests.cc (selftest::run_tests): Call
selftest::lazy_diagnostic_path_cc_tests.
* selftest.h (selftest::lazy_diagnostic_path_cc_tests): New decl.

Signed-off-by: David Malcolm 

Diff:
---
 gcc/Makefile.in |   1 +
 gcc/lazy-diagnostic-path.cc | 233 
 gcc/lazy-diagnostic-path.h  |  58 +++
 gcc/selftest-diagnostic.cc  |   2 +
 gcc/selftest-run-tests.cc   |   1 +
 gcc/selftest.h  |   1 +
 6 files changed, 296 insertions(+)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 798d4302fa78..b4d34cc0b42e 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1572,6 +1572,7 @@ OBJS = \
jump.o \
langhooks.o \
late-combine.o \
+   lazy-diagnostic-path.o \
lcm.o \
lists.o \
loop-doloop.o \
diff --git a/gcc/lazy-diagnostic-path.cc b/gcc/lazy-diagnostic-path.cc
new file mode 100644
index ..06500652374d
--- /dev/null
+++ b/gcc/lazy-diagnostic-path.cc
@@ -0,0 +1,233 @@
+/* Helper class for deferring path creation until a diagnostic is emitted.
+   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Contributed by David Malcolm 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+
+#include "config.h"
+#define INCLUDE_MEMORY
+#define INCLUDE_VECTOR
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "version.h"
+#include "intl.h"
+#include "diagnostic.h"
+#include "lazy-diagnostic-path.h"
+#include "make-unique.h"
+#include "selftest.h"
+#include "selftest-diagnostic.h"
+#include "simple-diagnostic-path.h"
+#include "gcc-rich-location.h"
+#include "diagnostic-format-text.h"
+
+/* class lazy_diagnostic_path : public diagnostic_path.  */
+
+/* Implementation of diagnostic_path vfuncs in terms of a lazily-generated
+   path.  */
+
+unsigned
+lazy_diagnostic_path::num_events () const
+{
+  lazily_generate_path ();
+  return m_inner_path->num_events ();
+}
+
+const diagnostic_event &
+lazy_diagnostic_path::get_event (int idx) const
+{
+  lazily_generate_path ();
+  return m_inner_path->get_event (idx);
+}
+
+unsigned
+lazy_diagnostic_path::num_threads () const
+{
+  lazily_generate_path ();
+  return m_inner_path->num_threads ();
+}
+
+const diagnostic_thread &
+lazy_diagnostic_path::get_thread (diagnostic_thread_id_t idx) const
+{
+  lazily_generate_path ();
+  return m_inner_path->get_thread (idx);
+}
+
+bool
+lazy_diagnostic_path::same_function_p (int event_idx_a,
+  int event_idx_b) const
+{
+  lazily_generate_path ();
+  return m_inner_path->same_function_p (event_idx_a, event_idx_b);
+}
+
+void
+lazy_diagnostic_path::lazily_generate_path () const
+{
+  if (!m_inner_path)
+m_inner_path = make_inner_path ();
+  gcc_assert (m_inner_path != nullptr);
+}
+
+#if CHECKING_P
+
+namespace selftest {
+
+class test_lazy_path : public lazy_diagnostic_path
+{
+public:
+  test_lazy_path (pretty_printer &pp)
+  : m_pp (pp)
+  {
+  }
+  std::unique_ptr make_inner_path () const final override
+  {
+tree fntype_void_void
+  = build_function_type_array (void_type_node, 0, NULL);
+tree fndecl_foo = build_fn_decl ("foo", fntype_void_void);
+auto path = ::make_unique (&m_pp);
+path->add_event (UNKNOWN_LOCATION, fndecl_foo, 0, "first %qs", "free");
+path->add_event (UNKNOWN_LOCATION, fndecl_foo, 0, "double %qs", "free");
+return path;
+  }
+private:
+  pretty_printer &m_pp;
+};
+
+static void
+test_intraprocedural_path (pretty_p

[gcc r15-4808] RISC-V: fix const interleaved stepped vector with a scalar pattern

2024-10-31 Thread Vineet Gupta via Gcc-cvs

https://gcc.gnu.org/g:1905b59fdc58ce67e508b99dff105afebaaa9bb1

commit r15-4808-g1905b59fdc58ce67e508b99dff105afebaaa9bb1
Author: Vineet Gupta 
Date:   Thu Oct 24 15:15:40 2024 -0700

RISC-V: fix const interleaved stepped vector with a scalar pattern

When bisecting for ICE in PR/117353, commit 771256bcb9dd ("RISC-V: Emit 
costs for
bool and stepped const vectors") uncovered yet another latent issue (first 
noted [1])

  [1] https://github.com/patrick-rivos/gcc-postcommit-ci/issues/1625

This patch fixes some of the fortran regressions from that report.

Fixes 71a5ac6703d1 ("RISC-V: Support interleave vector with different step 
sequence")

rv64imafdcv_zvl256b_zba_zbb_zbs_zicond/lp64d/medlow
| # of unexpected case / # of unique unexpected 
case
|  gcc |  g++ | gfortran |
|  392 /   108 |7 / 3 |   91 /24 |
|  392 /   108 |7 / 3 |   67 /12 |

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Use IOR op.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/slp-interleave-5.c: New test.

Tested-by: Edwin Lu  # Pre-commit CU #2503
Signed-off-by: Vineet Gupta 

Diff:
---
 gcc/config/riscv/riscv-v.cc|  6 ++--
 .../riscv/rvv/autovec/slp-interleave-5.c   | 35 ++
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 209b7ee88f18..5e728f04cf51 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1501,9 +1501,9 @@ expand_const_vector (rtx target, rtx src)
gen_int_mode (builder.inner_bits_size (), new_smode),
NULL_RTX, false, OPTAB_DIRECT);
  rtx tmp2 = gen_reg_rtx (new_mode);
- rtx and_ops[] = {tmp2, tmp1, scalar};
- emit_vlmax_insn (code_for_pred_scalar (AND, new_mode),
-  BINARY_OP, and_ops);
+ rtx ior_ops[] = {tmp2, tmp1, scalar};
+ emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode),
+  BINARY_OP, ior_ops);
  emit_move_insn (result, gen_lowpart (mode, tmp2));
}
  else
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-5.c
new file mode 100644
index ..32cfe8a8688c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-5.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl1024b -mabi=lp64d -O3 
-fdump-tree-optimized-details" } */
+
+struct S { int a, b; } s[8];
+
+void
+foo ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+{
+  s[i].b = 1;
+  s[i].a = i+1;
+}
+}
+
+/* { dg-final { scan-tree-dump-times "\{ 1, 1, 2, 1, 3, 1, 4, 1 \}" 1 
"optimized" } } */
+/* { dg-final { scan-assembler {vid\.v} } } */
+/* { dg-final { scan-assembler {vadd\.v} } } */
+/* { dg-final { scan-assembler {vor\.v} } } */
+
+void
+foo2 ()
+{
+  int i;
+  for (i = 0; i < 8; i++)
+{
+  s[i].b = 0;
+  s[i].a = i+1;
+}
+}
+
+/* { dg-final { scan-tree-dump-times "\{ 1, 0, 2, 0, 3, 0, 4, 0 \}" 1 
"optimized" } } */
+/* { dg-final { scan-assembler {vid\.v} } } */
+/* { dg-final { scan-assembler {vadd\.v} } } */

[gcc r15-4801] OpenMP/C++: Use STRIP_REFERENCE_REF to fix declare variant with reference-returning functions

2024-10-31 Thread Tobias Burnus via Gcc-cvs

https://gcc.gnu.org/g:f7ae087ef0132b01c3a2c04932058b996ebda18d

commit r15-4801-gf7ae087ef0132b01c3a2c04932058b996ebda18d
Author: Tobias Burnus 
Date:   Thu Oct 31 12:37:46 2024 +0100

OpenMP/C++: Use STRIP_REFERENCE_REF to fix declare variant with 
reference-returning functions

As Jakub suggested, use STRIP_REFERENCE_REF instead of doing it manually
as r15-4800-geb828a1e380e7b did.

gcc/cp/ChangeLog:

* decl.cc (omp_declare_variant_finalize_one): Use 
STRIP_REFERENCE_REF
instead of doing it manually.

Diff:
---
 gcc/cp/decl.cc | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index b638f3af294d..0e4533c6faba 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -8375,10 +8375,7 @@ omp_declare_variant_finalize_one (tree decl, tree attr)
   if (variant == error_mark_node && !processing_template_decl)
 return true;
 
-  if (TREE_CODE (variant) == INDIRECT_REF)
-variant = TREE_OPERAND (variant, 0);
-
-  variant = cp_get_callee_fndecl_nofold (variant);
+  variant = cp_get_callee_fndecl_nofold (STRIP_REFERENCE_REF (variant));
   input_location = save_loc;
 
   if (variant)

[gcc r15-4793] Fortran: Fix problem with substring selectors in ASSOCIATE [PR115700]

2024-10-31 Thread Paul Thomas via Gcc-cvs

https://gcc.gnu.org/g:159fb203231c503418e7ab9f45282957e40cb195

commit r15-4793-g159fb203231c503418e7ab9f45282957e40cb195
Author: Paul Thomas 
Date:   Thu Oct 31 07:22:36 2024 +

Fortran: Fix problem with substring selectors in ASSOCIATE [PR115700]

2024-10-31  Paul Thomas  

gcc/fortran
PR fortran/115700
* resolve.cc (resolve_variable): The typespec of an expression,
which is not a substring, can be shared with a deferred length
associate name.
(resolve_assoc_var): Extract a substring reference with non-
constant start or end. Use it to flag up the need for array
associate name to be a pointer.
(resolve_block_construct): Change comment from past to future
tense.

gcc/testsuite/
PR fortran/115700
* gfortran.dg/associate_70.f90: New test.

Diff:
---
 gcc/fortran/resolve.cc | 33 
 gcc/testsuite/gfortran.dg/associate_70.f90 | 40 ++
 2 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 565d4aa5fe9a..8045deddd8ad 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -6153,6 +6153,15 @@ resolve_variable (gfc_expr *e)
  e->ref = newref;
}
 }
+  else if (sym->assoc && sym->ts.type == BT_CHARACTER && sym->ts.deferred)
+{
+  gfc_ref *ref;
+  for (ref = e->ref; ref; ref = ref->next)
+   if (ref->type == REF_SUBSTRING)
+ break;
+  if (ref == NULL)
+   e->ts = sym->ts;
+}
 
   if (e->ref && !gfc_resolve_ref (e))
 return false;
@@ -9871,6 +9880,15 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
   /* Fix up the type-spec for CHARACTER types.  */
   if (sym->ts.type == BT_CHARACTER && !sym->attr.select_type_temporary)
 {
+  gfc_ref *ref;
+  for (ref = target->ref; ref; ref = ref->next)
+   if (ref->type == REF_SUBSTRING
+   && ((ref->u.ss.start
+&& ref->u.ss.start->expr_type != EXPR_CONSTANT)
+   || (ref->u.ss.end
+   && ref->u.ss.end->expr_type != EXPR_CONSTANT)))
+ break;
+
   if (!sym->ts.u.cl)
sym->ts.u.cl = target->ts.u.cl;
 
@@ -9889,9 +9907,10 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
gfc_get_int_expr (gfc_charlen_int_kind, NULL,
  target->value.character.length);
}
-  else if ((!sym->ts.u.cl->length
-   || sym->ts.u.cl->length->expr_type != EXPR_CONSTANT)
+  else if (((!sym->ts.u.cl->length
+|| sym->ts.u.cl->length->expr_type != EXPR_CONSTANT)
&& target->expr_type != EXPR_VARIABLE)
+  || ref)
{
  if (!sym->ts.deferred)
{
@@ -9901,7 +9920,10 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
 
  /* This is reset in trans-stmt.cc after the assignment
 of the target expression to the associate name.  */
- sym->attr.allocatable = 1;
+ if (ref && sym->as)
+   sym->attr.pointer = 1;
+ else
+   sym->attr.allocatable = 1;
}
 }
 
@@ -11508,8 +11530,9 @@ resolve_block_construct (gfc_code* code)
 {
   gfc_namespace *ns = code->ext.block.ns;
 
-  /* For an ASSOCIATE block, the associations (and their targets) are already
- resolved during resolve_symbol. Resolve the BLOCK's namespace.  */
+  /* For an ASSOCIATE block, the associations (and their targets) will be
+ resolved by gfc_resolve_symbol, during resolution of the BLOCK's
+ namespace.  */
   gfc_resolve (ns);
 }
 
diff --git a/gcc/testsuite/gfortran.dg/associate_70.f90 
b/gcc/testsuite/gfortran.dg/associate_70.f90
new file mode 100644
index ..b8916f4c70fd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/associate_70.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+! ( dg-options "-Wuninitialized" )
+!
+! Test fix for PR115700 comment 5, in which ‘.tmp1’ is used uninitialized and
+! both normal and scalarized array references did not work correctly.
+!
+! Contributed by Harald Anlauf  
+!
+  character(4), dimension(3) :: chr = ['abcd', 'efgh', 'ijkl']
+  call mvce (chr)
+  if (any (chr /= ['ABcd', 'EFgh', 'IJkl'])) stop 1
+contains
+  subroutine mvce(x)
+implicit none
+character(len=*), dimension(:), intent(inOUT), target :: x
+integer :: i
+i = len(x)
+
+! This was broken
+associate (tmp1 => x(:)(1:i/2))
+  if (len (tmp1) /= i/2) stop 2
+  if (tmp1(2) /= 'ef') stop 3
+  if (any (tmp1 /= ['ab', 'ef', 'ij'])) stop 4
+  tmp1 = ['AB','EF','IJ']
+end associate
+
+! Retest things that worked previously.
+associate (tmp2 => x(:)(1:2))
+  if (len (tmp2) /= i/2) stop 5
+  if (tmp2(2) /= 'EF') stop 6
+  if (any (tmp2 /= ['AB','EF','IJ'])) stop 7
+end associate
+
+associate (tmp3 => x(3)(1:i/2))
+

[gcc r15-4797] libstdc++: Add align_alloc attribute to aligned operator new

2024-10-31 Thread Jonathan Wakely via Libstdc++-cvs

https://gcc.gnu.org/g:646b24efaa50b149c12d0ae432999cb5a0cd12f2

commit r15-4797-g646b24efaa50b149c12d0ae432999cb5a0cd12f2
Author: Jonathan Wakely 
Date:   Tue Jan 9 13:16:11 2024 +

libstdc++: Add align_alloc attribute to aligned operator new

The aligned versions of operator new should use the align_alloc
attribute to help the compiler.

PR c++/86878 requests that the compiler would use the attribute to warn
about invalid attributes, so an XFAILed test is added for that.

libstdc++-v3/ChangeLog:

* libsupc++/new (operator new): Add attribute align_alloc(2) to
overloads taking a std::align_val_t argument.
* testsuite/18_support/new_aligned_warn.cc: New test.

Reviewed-by: Jakub Jelinek 

Diff:
---
 libstdc++-v3/libsupc++/new|  6 +++---
 libstdc++-v3/testsuite/18_support/new_aligned_warn.cc | 13 +
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/libsupc++/new b/libstdc++-v3/libsupc++/new
index e9a3d9b49a3e..4345030071bb 100644
--- a/libstdc++-v3/libsupc++/new
+++ b/libstdc++-v3/libsupc++/new
@@ -167,7 +167,7 @@ void operator delete[](void*, const std::nothrow_t&)
 #if __cpp_aligned_new
 _GLIBCXX_NODISCARD void* operator new(std::size_t, std::align_val_t)
   _GLIBCXX_TXN_SAFE
-  __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__));
+  __attribute__((__externally_visible__, __alloc_size__ (1), __alloc_align__ 
(2),  __malloc__));
 _GLIBCXX_NODISCARD void* operator new(std::size_t, std::align_val_t, const 
std::nothrow_t&)
_GLIBCXX_TXN_SAFE
   _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ 
(1), __malloc__));
@@ -178,10 +178,10 @@ void operator delete(void*, std::align_val_t, const 
std::nothrow_t&)
   _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__));
 _GLIBCXX_NODISCARD void* operator new[](std::size_t, std::align_val_t)
   _GLIBCXX_TXN_SAFE
-  __attribute__((__externally_visible__, __alloc_size__ (1), __malloc__));
+  __attribute__((__externally_visible__, __alloc_size__ (1), __alloc_align__ 
(2), __malloc__));
 _GLIBCXX_NODISCARD void* operator new[](std::size_t, std::align_val_t, const 
std::nothrow_t&)
   _GLIBCXX_TXN_SAFE
-  _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ 
(1), __malloc__));
+  _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__, __alloc_size__ 
(1), __alloc_align__ (2), __malloc__));
 void operator delete[](void*, std::align_val_t) _GLIBCXX_TXN_SAFE
   _GLIBCXX_USE_NOEXCEPT __attribute__((__externally_visible__));
 void operator delete[](void*, std::align_val_t, const std::nothrow_t&)
diff --git a/libstdc++-v3/testsuite/18_support/new_aligned_warn.cc 
b/libstdc++-v3/testsuite/18_support/new_aligned_warn.cc
new file mode 100644
index ..e9d374abe317
--- /dev/null
+++ b/libstdc++-v3/testsuite/18_support/new_aligned_warn.cc
@@ -0,0 +1,13 @@
+// { dg-options "-Wattributes" }
+// { dg-do compile { target c++17 } }
+
+#include 
+
+int main()
+{
+  // PR c++/86878 has a patch to make these warn.
+  (void) operator new(1, std::align_val_t(3)); // { dg-warning "power of two" 
"" { xfail *-*-* } }
+  (void) operator new[](1, std::align_val_t(10)); // { dg-warning "power of 
two" "" { xfail *-*-* } }
+  (void) operator new(1, std::align_val_t(0), std::nothrow_t()); // { 
dg-warning "power of two" "" { xfail *-*-* } }
+  (void) operator new[](1, std::align_val_t(-1), std::nothrow_t()); // { 
dg-warning "power of two" "" { xfail *-*-* } }
+}

[gcc r15-4798] RISC-V: Split riscv_process_target_attr with const char *args argument

2024-10-31 Thread Kito Cheng via Gcc-cvs

https://gcc.gnu.org/g:a57c16e50d478cc413e3e530db21de693e4eb2ae

commit r15-4798-ga57c16e50d478cc413e3e530db21de693e4eb2ae
Author: Yangyu Chen 
Date:   Thu Oct 24 15:10:57 2024 +0800

RISC-V: Split riscv_process_target_attr with const char *args argument

This patch splits static bool riscv_process_target_attr
(tree args, location_t loc) into two functions:

- bool riscv_process_target_attr (const char *args, location_t loc)
- static bool riscv_process_target_attr (tree args, location_t loc)

Thus, we can call `riscv_process_target_attr` with a `const char *`
argument.  This is useful for implementation of `target_version`
attribute.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_process_target_attr): New.
* config/riscv/riscv-target-attr.cc (riscv_process_target_attr):
Split into two functions with const char *args argument

Diff:
---
 gcc/config/riscv/riscv-protos.h   |  2 ++
 gcc/config/riscv/riscv-target-attr.cc | 65 ---
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0a6b43f0c767..4ed04321d32c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -805,6 +805,8 @@ extern bool riscv_use_divmod_expander (void);
 void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 extern bool
 riscv_option_valid_attribute_p (tree, tree, tree, int);
+extern bool
+riscv_process_target_attr (const char *, location_t);
 extern void
 riscv_override_options_internal (struct gcc_options *);
 extern void riscv_option_override (void);
diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index bf14ade5ce08..8ce9607b3c9b 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -304,35 +304,13 @@ num_occurrences_in_str (char c, char *str)
   return res;
 }
 
-/* Parse the tree in ARGS that contains the target attribute information
+/* Parse the string in ARGS that contains the target attribute information
and update the global target options space.  */
 
-static bool
-riscv_process_target_attr (tree args, location_t loc)
+bool
+riscv_process_target_attr (const char *args, location_t loc)
 {
-  if (TREE_CODE (args) == TREE_LIST)
-{
-  do
-   {
- tree head = TREE_VALUE (args);
- if (head)
-   {
- if (!riscv_process_target_attr (head, loc))
-   return false;
-   }
- args = TREE_CHAIN (args);
-  } while (args);
-
-  return true;
-}
-
-  if (TREE_CODE (args) != STRING_CST)
-{
-  error_at (loc, "attribute % argument not a string");
-  return false;
-}
-
-  size_t len = strlen (TREE_STRING_POINTER (args));
+  size_t len = strlen (args);
 
   /* No need to emit warning or error on empty string here, generic code 
already
  handle this case.  */
@@ -343,7 +321,7 @@ riscv_process_target_attr (tree args, location_t loc)
 
   std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
-  strcpy (str_to_check, TREE_STRING_POINTER (args));
+  strcpy (str_to_check, args);
 
   /* Used to catch empty spaces between semi-colons i.e.
  attribute ((target ("attr1;;attr2"))).  */
@@ -366,7 +344,7 @@ riscv_process_target_attr (tree args, location_t loc)
   if (num_attrs != num_semicolons + 1)
 {
   error_at (loc, "malformed % attribute",
-   TREE_STRING_POINTER (args));
+   args);
   return false;
 }
 
@@ -376,6 +354,37 @@ riscv_process_target_attr (tree args, location_t loc)
   return true;
 }
 
+/* Parse the tree in ARGS that contains the target attribute information
+   and update the global target options space.  */
+
+static bool
+riscv_process_target_attr (tree args, location_t loc)
+{
+  if (TREE_CODE (args) == TREE_LIST)
+{
+  do
+   {
+ tree head = TREE_VALUE (args);
+ if (head)
+   {
+ if (!riscv_process_target_attr (head, loc))
+   return false;
+   }
+ args = TREE_CHAIN (args);
+  } while (args);
+
+  return true;
+}
+
+  if (TREE_CODE (args) != STRING_CST)
+{
+  error_at (loc, "attribute % argument not a string");
+  return false;
+}
+
+  return riscv_process_target_attr (TREE_STRING_POINTER (args), loc);
+}
+
 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P.
This is used to process attribute ((target ("..."))).
Note, that riscv_set_current_function() has not been called before,

[gcc r15-4800] RISC-V: Do not inline when callee is versioned but caller is not

2024-10-31 Thread Kito Cheng via Gcc-cvs

https://gcc.gnu.org/g:eb828a1e380e7bb5a708c899081541ee9130ff87

commit r15-4800-geb828a1e380e7bb5a708c899081541ee9130ff87
Author: Yangyu Chen 
Date:   Thu Oct 24 15:12:45 2024 +0800

RISC-V: Do not inline when callee is versioned but caller is not

When the callee is versioned but the caller is not, we should not inline
the callee into the caller, to prevent the default version of the callee
from being inlined into a not versioned caller.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_can_inline_p): Refuse to inline
when callee is versioned but caller is not.

Diff:
---
 gcc/config/riscv/riscv.cc | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8443b29fb8f5..0b3b2c4cba91 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7693,6 +7693,10 @@ riscv_compute_frame_info (void)
 static bool
 riscv_can_inline_p (tree caller, tree callee)
 {
+  /* Do not inline when callee is versioned but caller is not.  */
+  if (DECL_FUNCTION_VERSIONED (callee) && ! DECL_FUNCTION_VERSIONED (caller))
+return false;
+
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);

[gcc r15-4803] testsuite: add testcase for fixed PR106073

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:df09173e355f30089b97090b19c095907242b35e

commit r15-4803-gdf09173e355f30089b97090b19c095907242b35e
Author: Sam James 
Date:   Thu Oct 31 03:36:23 2024 +

testsuite: add testcase for fixed PR106073

This was fixed by r12-8835-ge8d5f3a1b5a583 which surely made it latent
but richi points out it was likely an instance of PR90348. -fstack-reuse
continues to be a menace, so let's add the testcase.

gcc/testsuite/ChangeLog:
PR middle-end/90348
PR tree-optimization/106073

* gcc.dg/pr106073.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/pr106073.c | 123 
 1 file changed, 123 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/pr106073.c b/gcc/testsuite/gcc.dg/pr106073.c
new file mode 100644
index ..570e68868b2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr106073.c
@@ -0,0 +1,123 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+int a, f = 1, h, l, m = 1, o, r = 4, q, s, x, e, aa, ab, ac, *ad, ae = 5, **y, 
**af, ag, ah, ai, aj;
+static int c[6], d, g[6][5], n, *v = &s, ak;
+volatile int p;
+const volatile int al;
+static volatile int t, u, w = 3, z, am, an;
+static int ao();
+void ap();
+static void aq() {
+  int ar[4] = {6, 6, 6, 6}, as[1], i, j;
+  as[0] = 0;
+  if (m) {
+int at[11] = {4, 4, 6, 5, 7, 0, 7, 6, 7, 6, 6}, *au, *av[7], k;
+au = (int*) &au;
+for (i = 0; i < 1; i++)
+  for (j = 0; j < 1; j++)
+for (k = 0; k < 7; k++) {
+  (t || n) && u;
+  av[k] = 0;
+}
+y = av;
+while (o) {
+  int *b[2] = {as, ar};
+  *af = at;
+}
+m = 0;
+  }
+}
+inline void ap() {
+  for (; l <= 4; l++) {
+*v = 0;
+aq();
+if (a)
+  break;
+for (; q; q++)
+  ;
+  }
+}
+int ao() {
+  int be = 0, j;
+  if (n)
+aa = d = 0;
+  l = 0;
+  for (; be < 2; be++) {
+int bf[7][2];
+for (ai = 0; ai < 7; ai++)
+  for (j = 0; j < 2; j++)
+bf[ai][j] = 5;
+if (be) {
+  for (; h >= 0; h--) {
+while (z >= w) {
+  ap();
+  *ad = 0;
+}
+ap();
+  }
+  return bf[3][0];
+}
+if (bf[3][0])
+  continue;
+while (1)
+  ;
+  }
+  return 0;
+}
+static void aw() {
+  for (; ah; ah++) {
+p = 0;
+p = 0;
+  }
+  int ax = ~e;
+ L1:
+  e = a = 0;
+ L2:
+  if (!r)
+goto L3;
+  if (!ax)
+goto L2;
+  if (d)
+goto L1;
+  if (!ae)
+goto L1;
+  if (w && x <= 808 && f)
+ag = ao();
+  g[0][4] = ag;
+  if (a) {
+int bd;
+n++;
+while (n)
+  for (bd = 0; bd < 7; bd++) {
+am;
+am;
+am;
+am;
+d = c[d ^ am];
+  }
+  } else {
+  L3:
+an;
+for (; ak; ak++) {
+  int bc = 7;
+  for (; bc >= 0; bc--) {
+al;
+al;
+d = f && an;
+an;
+  }
+}
+  }
+}
+int main() {
+  int k;
+  for (; aj < 6; aj++)
+c[0] = aj;
+  aw();
+  for (aj = 0; aj < 6; aj++)
+for (k = 0; k < 5; k++)
+  d = c[d ^ g[aj][k]];
+  if (d != 5)
+__builtin_abort();
+  return 0;
+}

[gcc r15-4815] aarch64: Move ENTRY_VHSDF to aarch64-simd-pragma-builtins.def

2024-10-31 Thread Richard Sandiford via Gcc-cvs

https://gcc.gnu.org/g:e1b17a0cfd3dfb80303d466cc28684e74db6634a

commit r15-4815-ge1b17a0cfd3dfb80303d466cc28684e74db6634a
Author: Richard Sandiford 
Date:   Thu Oct 31 19:52:06 2024 +

aarch64: Move ENTRY_VHSDF to aarch64-simd-pragma-builtins.def

It's more convenient for later patches if we only define ENTRY_VHSDF
once, in the .def file.  Then the only macro that needs to be defined
before including the file is ENTRY itself.

The patch also moves the architecture requirements out of the
individual ENTRY invocations into a block-level definition of
REQUIRED_EXTENSIONS.  This reduces cut-&-paste a little and makes
things more consistent with aarch64-sve-builtins*.def.

gcc/
* config/aarch64/aarch64-builtins.cc (ENTRY): Remove the features
argument and get the features from REQUIRED_EXTENSIONS instead.
(ENTRY_VHSDF): Move definition to...
* config/aarch64/aarch64-simd-pragma-builtins.def: ...here.
Move the architecture requirements to REQUIRED_EXTENSIONS.

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc | 22 +++---
 .../aarch64/aarch64-simd-pragma-builtins.def   | 14 --
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 86d96e47f01c..480ac223d86a 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -780,17 +780,9 @@ typedef struct
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
 #undef ENTRY
-#define ENTRY(N, S, M, U, F) \
+#define ENTRY(N, S, M, U) \
   AARCH64_##N,
 
-#undef ENTRY_VHSDF
-#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC, EXTENSIONS) \
-  AARCH64_##NAME##_f16, \
-  AARCH64_##NAME##q_f16, \
-  AARCH64_##NAME##_f32, \
-  AARCH64_##NAME##q_f32, \
-  AARCH64_##NAME##q_f64,
-
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
@@ -1602,16 +1594,8 @@ enum class aarch64_builtin_signatures
 };
 
 #undef ENTRY
-#define ENTRY(N, S, M, U, F) \
-  {#N, aarch64_builtin_signatures::S, E_##M##mode, U, F},
-
-#undef ENTRY_VHSDF
-#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC, EXTENSIONS) \
-  ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC, EXTENSIONS) \
-  ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC, EXTENSIONS) \
-  ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC, EXTENSIONS) \
-  ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC, EXTENSIONS) \
-  ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC, EXTENSIONS)
+#define ENTRY(N, S, M, U) \
+  {#N, aarch64_builtin_signatures::S, E_##M##mode, U, REQUIRED_EXTENSIONS},
 
 /* Initialize pragma builtins.  */
 
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def 
b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index f432185be466..9d530fc45d4b 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -18,6 +18,16 @@
along with GCC; see the file COPYING3.  If not see
.  */
 
+#undef ENTRY_VHSDF
+#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC) \
+  ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC) \
+  ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC) \
+  ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC) \
+  ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC) \
+  ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC)
+
 // faminmax
-ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX)
-ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX)
+#define REQUIRED_EXTENSIONS AARCH64_FL_FAMINMAX
+ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX)
+ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN)
+#undef REQUIRED_EXTENSIONS

[gcc r15-4818] libiberty: Fix comment typos

2024-10-31 Thread Mark Wielaard via Gcc-cvs

https://gcc.gnu.org/g:aa84020b2edbab18c98e4e1faa1dfec7bad9d179

commit r15-4818-gaa84020b2edbab18c98e4e1faa1dfec7bad9d179
Author: Mark Wielaard 
Date:   Thu Oct 31 17:57:45 2024 +0100

libiberty: Fix comment typos

These comment typos were found in the valgrind fork of libiberty
demangle code.

libiberty/ChangeLog:

* cplus-dem.c: Change preceeded to preceded.

include/ChangeLog:

* safe-ctype.h: Change accidently to accidentally.

Diff:
---
 include/safe-ctype.h  | 2 +-
 libiberty/cplus-dem.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/safe-ctype.h b/include/safe-ctype.h
index 93785da3f24d..eec3d940cf60 100644
--- a/include/safe-ctype.h
+++ b/include/safe-ctype.h
@@ -112,7 +112,7 @@ extern const unsigned char  _sch_tolower[256];
 #define TOUPPER(c) _sch_toupper[(c) & 0xff]
 #define TOLOWER(c) _sch_tolower[(c) & 0xff]
 
-/* Prevent the users of safe-ctype.h from accidently using the routines
+/* Prevent the users of safe-ctype.h from accidentally using the routines
from ctype.h.  Initially, the approach was to produce an error when
detecting that ctype.h has been included.  But this was causing
trouble as ctype.h might get indirectly included as a result of
diff --git a/libiberty/cplus-dem.c b/libiberty/cplus-dem.c
index ee9e84f5d6b1..e67ae9300490 100644
--- a/libiberty/cplus-dem.c
+++ b/libiberty/cplus-dem.c
@@ -215,7 +215,7 @@ ada_demangle (const char *mangled, int option 
ATTRIBUTE_UNUSED)
 goto unknown;
 
   /* Most of the demangling will trivially remove chars.  Operator names
- may add one char but because they are always preceeded by '__' which is
+ may add one char but because they are always preceded by '__' which is
  replaced by '.', they eventually never expand the size.
  A few special names such as '___elabs' add a few chars (at most 7), but
  they occur only once.  */

[gcc r14-10862] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue

2024-10-31 Thread Hongyu Wang via Gcc-cvs

https://gcc.gnu.org/g:df542909224a7ff88b204534ad035a0b216a98bf

commit r14-10862-gdf542909224a7ff88b204534ad035a0b216a98bf
Author: Hongyu Wang 
Date:   Wed Feb 7 14:42:58 2024 +0800

[APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue

According to APX spec, the pushp/popp pairs should be matched,
otherwise the PPX hint cannot take effect and cause performance loss.

In the ix86_expand_epilogue, there are several optimizations that may
cause the epilogue using mov to restore the regs. Check if PPX applied
and prevent usage of mov/leave in the epilogue. Also do not use PPX
for eh_return.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
(ix86_emit_save_regs): Emit ppx is available only when
TARGET_APX_PPX && !crtl->calls_eh_return.
(ix86_expand_epilogue): Don't restore reg using mov when
apx_ppx_used flag is true.
* config/i386/i386.h (struct machine_frame_state):
Add apx_ppx_used flag.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-ppx-2.c: New test.
* gcc.target/i386/apx-ppx-3.c: Likewise.

(cherry picked from commit 8e72b1bb3896f6e8d4f4679cbcfbc2a8212d04f9)

Diff:
---
 gcc/config/i386/i386.cc   | 13 +
 gcc/config/i386/i386.h|  4 
 gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++
 gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++
 4 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 48921d422cf8..dde4ba5ca19d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7435,6 +7435,7 @@ ix86_emit_save_regs (void)
 {
   int regno;
   rtx_insn *insn;
+  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
 
   if (!TARGET_APX_PUSH2POP2
   || !ix86_can_use_push2pop2 ()
@@ -7444,7 +7445,7 @@ ix86_emit_save_regs (void)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
  {
insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-   TARGET_APX_PPX));
+   use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
  }
 }
@@ -7475,7 +7476,7 @@ ix86_emit_save_regs (void)
  regno_list[0]),
 gen_rtx_REG (word_mode,
  regno_list[1]),
-TARGET_APX_PPX));
+use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
 
@@ -7508,7 +7509,7 @@ ix86_emit_save_regs (void)
else
  {
insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-   TARGET_APX_PPX));
+   use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
aligned = true;
  }
@@ -7517,7 +7518,7 @@ ix86_emit_save_regs (void)
{
  insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
   regno_list[0]),
- TARGET_APX_PPX));
+ use_ppx));
  RTX_FRAME_RELATED_P (insn) = 1;
}
 }
@@ -8991,6 +8992,7 @@ ix86_expand_prologue (void)
   if (!frame.save_regs_using_mov)
{
  ix86_emit_save_regs ();
+ m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
  int_registers_saved = true;
  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
}
@@ -9876,6 +9878,9 @@ ix86_expand_epilogue (int style)
   /* SEH requires the use of pops to identify the epilogue.  */
   else if (TARGET_SEH)
 restore_regs_via_mov = false;
+  /* If we already save reg with pushp, don't use move at epilogue.  */
+  else if (m->fs.apx_ppx_used)
+restore_regs_via_mov = false;
   /* If we're only restoring one register and sp cannot be used then
  using a move instruction to restore the register since it's
  less work than reloading sp and popping the register.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 1c6e323d6551..1c456c3422fc 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2727,6 +2727,10 @@ struct GTY(()) machine_frame_state
  The flags realigned and sp_realigned are mutually exclusive.  */
   BOOL_BITFIELD sp_realigned : 1;
 
+  /* When APX_PPX used in prologue, force epilogue to emit
+  popp instead of move and leave.  */
+  BOOL_BITFIELD apx_ppx_used : 1;
+
   /* If sp_realigne

[gcc r15-4822] testsuite: g++.dg: rename pr42965 test

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:6e58db990113ef756970a83de16754563076e701

commit r15-4822-g6e58db990113ef756970a83de16754563076e701
Author: Sam James 
Date:   Fri Oct 25 23:18:12 2024 +0100

testsuite: g++.dg: rename pr42965 test

.c is used for C and .C is for C++. The test was being ignored before.

gcc/testsuite/ChangeLog:
PR other/42965

* g++.dg/warn/unused-result1-Werror.c: Move to...
* g++.dg/warn/unused-result1-Werror.C: ...here.

Diff:
---
 .../g++.dg/warn/{unused-result1-Werror.c => unused-result1-Werror.C}  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/testsuite/g++.dg/warn/unused-result1-Werror.c 
b/gcc/testsuite/g++.dg/warn/unused-result1-Werror.C
similarity index 100%
rename from gcc/testsuite/g++.dg/warn/unused-result1-Werror.c
rename to gcc/testsuite/g++.dg/warn/unused-result1-Werror.C

[gcc r15-4821] testsuite: g++.dg: rename pr105820 test

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:31ec0b0360e2cae5cf927924713757ad7a7c669c

commit r15-4821-g31ec0b0360e2cae5cf927924713757ad7a7c669c
Author: Sam James 
Date:   Fri Oct 25 23:15:02 2024 +0100

testsuite: g++.dg: rename pr105820 test

.c is used for C and .C is for C++. The test was being ignored before.

gcc/testsuite/ChangeLog:
PR tree-optimization/105820

* g++.dg/tree-ssa/pr105820.c: Move to...
* g++.dg/tree-ssa/pr105820.C: ...here.

Diff:
---
 gcc/testsuite/g++.dg/tree-ssa/{pr105820.c => pr105820.C} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr105820.c 
b/gcc/testsuite/g++.dg/tree-ssa/pr105820.C
similarity index 100%
rename from gcc/testsuite/g++.dg/tree-ssa/pr105820.c
rename to gcc/testsuite/g++.dg/tree-ssa/pr105820.C

[gcc r15-4823] testsuite: g++.dg: rename pr66655 test

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:14e2f3233bf0ef27e3cbf28478c63c0335cf514c

commit r15-4823-g14e2f3233bf0ef27e3cbf28478c63c0335cf514c
Author: Sam James 
Date:   Tue Oct 29 05:32:39 2024 +

testsuite: g++.dg: rename pr66655 test

The test was being ignored because dg.exp looks for .C in g++.dg/.

gcc/testsuite/ChangeLog:
PR target/66655

* g++.dg/pr66655_1.cc: Move to...
* g++.dg/pr66655_1.C: ...here.

Diff:
---
 gcc/testsuite/g++.dg/{pr66655_1.cc => pr66655_1.C} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/testsuite/g++.dg/pr66655_1.cc 
b/gcc/testsuite/g++.dg/pr66655_1.C
similarity index 100%
rename from gcc/testsuite/g++.dg/pr66655_1.cc
rename to gcc/testsuite/g++.dg/pr66655_1.C

[gcc r15-4824] testsuite: fixup pr66655.C

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:1ef0239da9dc5e3326ee5e928f2c5fe1bc2f4ffd

commit r15-4824-g1ef0239da9dc5e3326ee5e928f2c5fe1bc2f4ffd
Author: Sam James 
Date:   Fri Nov 1 00:03:34 2024 +

testsuite: fixup pr66655.C

In r15-4823-g14e2f3233bf0ef, I renamed pr66655_1.cc but neglected
to update a dg-additional-sources reference.

gcc/testsuite/ChangeLog:
PR target/66655

* g++.dg/pr66655.C: Adjust filename in dg-additional-sources.

Diff:
---
 gcc/testsuite/g++.dg/pr66655.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/pr66655.C b/gcc/testsuite/g++.dg/pr66655.C
index e4a40b02912d..2ae4c4e22f88 100644
--- a/gcc/testsuite/g++.dg/pr66655.C
+++ b/gcc/testsuite/g++.dg/pr66655.C
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-additional-sources "pr66655_1.cc" } */
+/* { dg-additional-sources "pr66655_1.C" } */
 
 #include "pr66655.h"

[gcc r15-4825] testsuite: g++.dg: fix PR90313 filename

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:6df5c49452a1fcf66cc1d15ab50d9de83d01cf1c

commit r15-4825-g6df5c49452a1fcf66cc1d15ab50d9de83d01cf1c
Author: Sam James 
Date:   Tue Oct 29 05:58:27 2024 +

testsuite: g++.dg: fix PR90313 filename

gcc/testsuite/ChangeLog:
PR c++/90313

* g++.dg/torture/pr90313.cc: Move to...
* g++.dg/torture/pr90313.C: ...here.

Diff:
---
 gcc/testsuite/g++.dg/torture/{pr90313.cc => pr90313.C} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/testsuite/g++.dg/torture/pr90313.cc 
b/gcc/testsuite/g++.dg/torture/pr90313.C
similarity index 100%
rename from gcc/testsuite/g++.dg/torture/pr90313.cc
rename to gcc/testsuite/g++.dg/torture/pr90313.C

[gcc r15-4826] testsuite: g++.dg: torture: fix PR111520 filename

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:57d4ec18f577bad60e7f8bc9c0e4b14eb6d03fd0

commit r15-4826-g57d4ec18f577bad60e7f8bc9c0e4b14eb6d03fd0
Author: Sam James 
Date:   Tue Oct 29 05:58:49 2024 +

testsuite: g++.dg: torture: fix PR111520 filename

gcc/testsuite/ChangeLog:
PR tree-optimization/111520

* g++.dg/torture/harden-comp-pr111520.cc: Move to...
* g++.dg/torture/harden-comp-pr111520.C: ...here.

Diff:
---
 .../g++.dg/torture/{harden-comp-pr111520.cc => harden-comp-pr111520.C}| 0
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/testsuite/g++.dg/torture/harden-comp-pr111520.cc 
b/gcc/testsuite/g++.dg/torture/harden-comp-pr111520.C
similarity index 100%
rename from gcc/testsuite/g++.dg/torture/harden-comp-pr111520.cc
rename to gcc/testsuite/g++.dg/torture/harden-comp-pr111520.C

[gcc r15-4827] testsuite: g++.dg: debug: fix test filenames

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:a287ff2697f0788856a21c99098611a5ae0a4749

commit r15-4827-ga287ff2697f0788856a21c99098611a5ae0a4749
Author: Sam James 
Date:   Tue Oct 29 06:00:08 2024 +

testsuite: g++.dg: debug: fix test filenames

gcc/testsuite/ChangeLog:
PR debug/15736
PR debug/46240

* g++.dg/debug/pr15736.cc: Move to...
* g++.dg/debug/pr15736.C: ...here.
* g++.dg/debug/pr46240.cc: Move to...
* g++.dg/debug/pr46240.C: ...here.

Diff:
---
 gcc/testsuite/g++.dg/debug/{pr15736.cc => pr15736.C} | 0
 gcc/testsuite/g++.dg/debug/{pr46240.cc => pr46240.C} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/testsuite/g++.dg/debug/pr15736.cc 
b/gcc/testsuite/g++.dg/debug/pr15736.C
similarity index 100%
rename from gcc/testsuite/g++.dg/debug/pr15736.cc
rename to gcc/testsuite/g++.dg/debug/pr15736.C
diff --git a/gcc/testsuite/g++.dg/debug/pr46240.cc 
b/gcc/testsuite/g++.dg/debug/pr46240.C
similarity index 100%
rename from gcc/testsuite/g++.dg/debug/pr46240.cc
rename to gcc/testsuite/g++.dg/debug/pr46240.C

[gcc r15-4830] Support Intel AMX-TF32

2024-10-31 Thread Haochen Jiang via Gcc-cvs

https://gcc.gnu.org/g:8cc38abf575381905eb3a869b0874bdaddb608bb

commit r15-4830-g8cc38abf575381905eb3a869b0874bdaddb608bb
Author: Haochen Jiang 
Date:   Fri Nov 1 10:04:36 2024 +0800

Support Intel AMX-TF32

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect AMX-TF32.
* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_TF32_SET,
OPTION_MASK_ISA2_AMX_TF32_UNSET): New.
(ix86_handle_option): Handle -mamx-tf32.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AMX_TF32.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
amx-tf32.
* config.gcc: Add amxtf32intrin.h
* config/i386/cpuid.h (bit_AMX_TF32): New.
* config/i386/i386-c.cc (ix86_target_macros_internal): Handle 
amx-tf32.
* config/i386/i386-isa.def (AMX_TF32): Add DEF_PTA(AMX_TF32).
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle amx-tf32.
* config/i386/i386.opt: Add option -mamx-tf32.
* config/i386/i386.opt.urls: Regenerated.
* config/i386/immintrin.h: Include amxtf32intrin.h.
* doc/extend.texi: Document amx-tf32.
* doc/invoke.texi: Document -mamx-tf32.
* doc/sourcebuild.texi: Document target amx-tf32.
* config/i386/amxtf32intrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mamx-tf32.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/amx-check.h: Add cpu check for AMX-TF32.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mamx-tf32.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add amx-tf32.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_amx_tf32): New.
* gcc.target/i386/amx-helper.h: New file for tf32 support.
* gcc.target/i386/amxtf32-asmatt-1.c: New test.
* gcc.target/i386/amxtf32-asmintel-1.c: Ditto.
* gcc.target/i386/amxtf32-mmultf32ps-2.c: Ditto.

Diff:
---
 gcc/common/config/i386/cpuinfo.h   |  2 +
 gcc/common/config/i386/i386-common.cc  | 19 -
 gcc/common/config/i386/i386-cpuinfo.h  |  1 +
 gcc/common/config/i386/i386-isas.h |  1 +
 gcc/config.gcc |  2 +-
 gcc/config/i386/amxtf32intrin.h| 47 +
 gcc/config/i386/cpuid.h|  1 +
 gcc/config/i386/i386-c.cc  |  2 +
 gcc/config/i386/i386-isa.def   |  1 +
 gcc/config/i386/i386-options.cc|  4 +-
 gcc/config/i386/i386.opt   |  4 ++
 gcc/config/i386/i386.opt.urls  |  3 ++
 gcc/config/i386/immintrin.h|  2 +
 gcc/doc/extend.texi|  5 +++
 gcc/doc/invoke.texi| 11 +++--
 gcc/doc/sourcebuild.texi   |  3 ++
 gcc/testsuite/g++.dg/other/i386-2.C|  2 +-
 gcc/testsuite/g++.dg/other/i386-3.C|  2 +-
 gcc/testsuite/gcc.target/i386/amx-check.h  |  3 ++
 gcc/testsuite/gcc.target/i386/amx-helper.h | 21 ++
 gcc/testsuite/gcc.target/i386/amxtf32-asmatt-1.c   | 13 ++
 gcc/testsuite/gcc.target/i386/amxtf32-asmintel-1.c | 10 +
 .../gcc.target/i386/amxtf32-mmultf32ps-2.c | 48 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc  |  2 +
 gcc/testsuite/gcc.target/i386/sse-12.c |  2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c |  2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c |  2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c |  4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c |  2 +-
 gcc/testsuite/lib/target-supports.exp  | 11 +
 30 files changed, 217 insertions(+), 15 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 67724c308458..5d0a6443d992 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -1003,6 +1003,8 @@ get_available_features (struct __processor_model 
*cpu_model,
{
  if (eax & bit_AMX_AVX512)
set_feature (FEATURE_AMX_AVX512);
+ if (eax & bit_AMX_TF32)
+   set_feature (FEATURE_AMX_TF32);
}
 }
 
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index e8e3eb198783..74ad03926b66 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -130,6 +130,8 @@ along wit

[gcc r15-4832] Support Intel AMX-FP8

2024-10-31 Thread Haochen Jiang via Gcc-cvs

https://gcc.gnu.org/g:dd859e93a16704448e70b5941711ecd626e098ba

commit r15-4832-gdd859e93a16704448e70b5941711ecd626e098ba
Author: Liwei Xu 
Date:   Fri Nov 1 10:04:39 2024 +0800

Support Intel AMX-FP8

gcc/ChangeLog:

* common/config/i386/cpuinfo.h
(get_available_features): Detect amx-fp8.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AMX_FP8_SET): New macros.
(OPTION_MASK_ISA2_AMX_FP8_UNSET): Ditto.
(ix86_handle_option): Handle -mamx-fp8.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AMX_FP8.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for 
amx-fp8.
* config.gcc: Add amxfp8intrin.h.
* config/i386/cpuid.h (bit_AMX_FP8): New.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Define __AMX_FP8__.
* config/i386/i386-isa.def (AMX_FP8): Add DEF_PTA for AMX_FP8.
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Add new ATTR.
* config/i386/i386.opt: Add -mamx-fp8.
* config/i386/i386.opt.urls: Regenerated.
* config/i386/immintrin.h: Include amxfp8intrin.h.
* doc/extend.texi: Document -mamx-fp8.
* doc/invoke.texi: Document -mamx-fp8.
* doc/sourcebuild.texi: Document -mamx-fp8.
* config/i386/amxfp8intrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mamx-fp8.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/amx-check.h: Check for amx-fp8.
* gcc.target/i386/amx-helper.h: Ditto.
* gcc.target/i386/fp8-helper.h: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mamx-fp8.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp: New proc.
* gcc.target/i386/amxfp8-asmatt-1.c: New test.
* gcc.target/i386/amxfp8-asmintel-1.c: Ditto.
* gcc.target/i386/amxfp8-dpbf8ps-2.c: Ditto.
* gcc.target/i386/amxfp8-dpbhf8ps-2.c: Ditto.
* gcc.target/i386/amxfp8-dphbf8ps-2.c: Ditto.
* gcc.target/i386/amxfp8-dphf8ps-2.c: Ditto.
* gcc.target/i386/fp-emulation.h: Emulates NaN behaviour.

Co-authored-by: Hu, Lin1 

Diff:
---
 gcc/common/config/i386/cpuinfo.h  |   2 +
 gcc/common/config/i386/i386-common.cc |  19 ++-
 gcc/common/config/i386/i386-cpuinfo.h |   1 +
 gcc/common/config/i386/i386-isas.h|   1 +
 gcc/config.gcc|   2 +-
 gcc/config/i386/amxfp8intrin.h|  67 +++
 gcc/config/i386/cpuid.h   |   1 +
 gcc/config/i386/i386-c.cc |   2 +
 gcc/config/i386/i386-isa.def  |   1 +
 gcc/config/i386/i386-options.cc   |   4 +-
 gcc/config/i386/i386.opt  |   4 +
 gcc/config/i386/i386.opt.urls |   3 +
 gcc/config/i386/immintrin.h   |   2 +
 gcc/doc/extend.texi   |   5 +
 gcc/doc/invoke.texi   |  11 +-
 gcc/doc/sourcebuild.texi  |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C   |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C   |   2 +-
 gcc/testsuite/gcc.target/i386/amx-check.h |   3 +
 gcc/testsuite/gcc.target/i386/amx-helper.h|  58 +
 gcc/testsuite/gcc.target/i386/amxfp8-asmatt-1.c   |  20 
 gcc/testsuite/gcc.target/i386/amxfp8-asmintel-1.c |  21 
 gcc/testsuite/gcc.target/i386/amxfp8-dpbf8ps-2.c  | 120 +++
 gcc/testsuite/gcc.target/i386/amxfp8-dpbhf8ps-2.c | 120 +++
 gcc/testsuite/gcc.target/i386/amxfp8-dphbf8ps-2.c | 120 +++
 gcc/testsuite/gcc.target/i386/amxfp8-dphf8ps-2.c  | 120 +++
 gcc/testsuite/gcc.target/i386/fp-emulation.h  | 112 ++
 gcc/testsuite/gcc.target/i386/fp8-helper.h| 137 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-12.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c|   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c|   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c|   2 +-
 gcc/testsuite/lib/target-supports.exp |  11 ++
 35 files changed, 973 insertions(+), 15 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 5a6aed001d1b..b128c3187d5a

[gcc r15-4829] Support Intel AMX-AVX512

2024-10-31 Thread Haochen Jiang via Gcc-cvs

https://gcc.gnu.org/g:343f8113385d00e9ffac53150bca4f78be30e19c

commit r15-4829-g343f8113385d00e9ffac53150bca4f78be30e19c
Author: Haochen Jiang 
Date:   Fri Nov 1 10:04:34 2024 +0800

Support Intel AMX-AVX512

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect AMX-AVX512.
* common/config/i386/i386-common.cc 
(OPTION_MASK_ISA2_AMX_AVX512_SET,
OPTION_MASK_ISA2_AMX_AVX512_UNSET): New.
(ix86_handle_option): Handle -mamx-avx512.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AMX_AVX512.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
amx-avx512.
* config.gcc: Add amxavx512intrin.h
* config/i386/cpuid.h (bit_AMX_AVX512): New.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Handle amx-avx512.
* config/i386/i386-isa.def (AMX_AVX512): Add DEF_PTA(AMX_AVX512).
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle amx-avx512.
* config/i386/i386.opt: Add option -mamx-avx512.
* config/i386/i386.opt.urls: Regenerated.
* config/i386/immintrin.h: Include amxavx512intrin.h
* doc/extend.texi: Document amx-avx512.
* doc/invoke.texi: Document -mamx-avx512.
* doc/sourcebuild.texi: Document target amx-avx512.
* config/i386/amxavx512intrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mamx-avx512.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/amx-check.h: Add cpu check for AMX-AVX512.
* gcc.target/i386/amx-helper.h: Support amx-avx512.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mamx-avx512.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add amx-avx512.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_amx_avx512): New.
* gcc.target/i386/amxavx512-asmatt-1.c: New test.
* gcc.target/i386/amxavx512-asmintel-1.c: Ditto.
* gcc.target/i386/amxavx512-cvtrowd2ps-2.c: Ditto.
* gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Ditto.
* gcc.target/i386/amxavx512-cvtrowps2ph-2.c: Ditto.
* gcc.target/i386/amxavx512-movrow-2.c: Ditto.

Co-authored-by: Yu, Bing 

Diff:
---
 gcc/common/config/i386/cpuinfo.h   |  11 ++
 gcc/common/config/i386/i386-common.cc  |  24 ++-
 gcc/common/config/i386/i386-cpuinfo.h  |   1 +
 gcc/common/config/i386/i386-isas.h |   2 +
 gcc/config.gcc |   2 +-
 gcc/config/i386/amxavx512intrin.h  | 189 +
 gcc/config/i386/cpuid.h|   4 +
 gcc/config/i386/i386-c.cc  |   2 +
 gcc/config/i386/i386-isa.def   |   1 +
 gcc/config/i386/i386-options.cc|   4 +-
 gcc/config/i386/i386.opt   |   5 +
 gcc/config/i386/i386.opt.urls  |   3 +
 gcc/config/i386/immintrin.h|   2 +
 gcc/doc/extend.texi|   5 +
 gcc/doc/invoke.texi|  11 +-
 gcc/doc/sourcebuild.texi   |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C|   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C|   2 +-
 gcc/testsuite/gcc.target/i386/amx-check.h  |   3 +
 gcc/testsuite/gcc.target/i386/amx-helper.h | 105 +++-
 gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c |  31 
 .../gcc.target/i386/amxavx512-asmintel-1.c |  30 
 .../gcc.target/i386/amxavx512-cvtrowd2ps-2.c   |  62 +++
 .../gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c   |  82 +
 .../gcc.target/i386/amxavx512-cvtrowps2ph-2.c  |  82 +
 gcc/testsuite/gcc.target/i386/amxavx512-movrow-2.c |  59 +++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc  |   2 +
 gcc/testsuite/gcc.target/i386/sse-12.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c |   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c |   2 +-
 gcc/testsuite/lib/target-supports.exp  |  11 ++
 33 files changed, 733 insertions(+), 19 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index e3eb6e9d2503..67724c308458 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -995,6 +995,17 @@ get_available_featu

[gcc r15-4833] Support Intel MOVRS

2024-10-31 Thread Haochen Jiang via Gcc-cvs

https://gcc.gnu.org/g:e9ab41b79933d42410126f0eb7b29f820745276c

commit r15-4833-ge9ab41b79933d42410126f0eb7b29f820745276c
Author: Hu, Lin1 
Date:   Fri Nov 1 10:04:40 2024 +0800

Support Intel MOVRS

gcc/ChangeLog:

* builtins.cc (expand_builtin_prefetch): Expand for
prefetchrst2.
* common/config/i386/cpuinfo.h (get_available_features): Detect 
movrs.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_MOVRS_SET): New.
(OPTION_MASK_ISA2_MOVRS_UNSET): Ditto.
(ix86_handle_option): Handle -mmovrs.
* common/config/i386/i386-cpuinfo.h
(enum processor_features): Add FEATURE_MOVRS.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for 
movrs.
* config.gcc: Add movrsintrin.h
* config/i386/cpuid.h (bit_MOVRS): New.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE (CHAR, PCCHAR), (SHORT, PCSHORT), (INT, 
PCINT),
(INT64, PCINT64).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-c.cc (ix86_target_macros_internal): Add
__MOVRS__.
* config/i386/i386-expand.cc (ix86_expand_special_args_builtin): 
Define
__MOVRS__.
* config/i386/i386-isa.def (MOVRS): Add DEF_PTA(MOVRS)
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle movrs.
* config/i386/i386.md (movrs): New.
* config/i386/i386.opt: Add option -mmovrs.
* config/i386/i386.opt.urls: Regenerated.
* config/i386/immintrin.h: Include movrsintrin.h
* config/i386/sse.md (unspecv): Add UNSPEC_VMOVRS.
(VI1248_AVX10_2): New.
(avx10_2_movrs_vmovrs): New 
define_insn.
* config/i386/xmmintrin.h: Add prefetchrst2.
* doc/extend.texi: Document movrs.
* doc/invoke.texi: Document -mmovrs.
* doc/rtl.texi: Document extension of prefetchrst2.
* doc/sourcebuild.texi: Document target movrs.
* config/i386/movrsintrin.h: New.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mmovrs.
* g++.dg/other/i386-3.C: Ditto.
* gcc.c-torture/execute/builtin-prefetch-1.c: Expand rws.
* gcc.dg/builtin-prefetch-1.c: Ditto.
* gcc.target/i386/avx-1.c: Ditto.
* gcc.target/i386/avx-2.c: Ditto.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mmovrs.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add movrs.
* gcc.target/i386/sse-23.c: Ditto
* gcc.target/i386/avx10_2-512-movrs-1.c: New test.
* gcc.target/i386/avx10_2-movrs-1.c: Ditto.
* gcc.target/i386/movrs-1.c: Ditto.

Co-authored-by: Haochen Jiang 

Diff:
---
 gcc/builtins.cc|   4 +-
 gcc/common/config/i386/cpuinfo.h   |   2 +
 gcc/common/config/i386/i386-common.cc  |  16 +
 gcc/common/config/i386/i386-cpuinfo.h  |   1 +
 gcc/common/config/i386/i386-isas.h |   1 +
 gcc/config.gcc |   2 +-
 gcc/config/i386/cpuid.h|   1 +
 gcc/config/i386/i386-builtin-types.def |   6 +
 gcc/config/i386/i386-builtin.def   |  18 +
 gcc/config/i386/i386-c.cc  |   2 +
 gcc/config/i386/i386-expand.cc |   4 +
 gcc/config/i386/i386-isa.def   |   1 +
 gcc/config/i386/i386-options.cc|   4 +-
 gcc/config/i386/i386.md|  49 ++-
 gcc/config/i386/i386.opt   |   4 +
 gcc/config/i386/i386.opt.urls  |   3 +
 gcc/config/i386/immintrin.h|   2 +
 gcc/config/i386/movrsintrin.h  | 453 +
 gcc/config/i386/sse.md |  21 +
 gcc/config/i386/xmmintrin.h|   5 +-
 gcc/doc/extend.texi|   5 +
 gcc/doc/invoke.texi|   7 +-
 gcc/doc/rtl.texi   |   9 +-
 gcc/doc/sourcebuild.texi   |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C|   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C|   2 +-
 .../gcc.c-torture/execute/builtin-prefetch-1.c |   3 +-
 gcc/testsuite/gcc.dg/builtin-prefetch-1.c  |   4 +-
 gcc/testsuite/gcc.target/i386/avx-1.c  |   2 +-
 gcc/testsuite/gcc.target/i386/avx-2.c  |   2 +-
 .../gcc.target/i386/avx10_2-512-movrs-1.c  |  40 ++
 gcc/testsuite/gcc.target/i386/avx1

[gcc r15-4834] Support Intel AMX-MOVRS

2024-10-31 Thread Haochen Jiang via Gcc-cvs

https://gcc.gnu.org/g:f8ae2cce23fc6c36dc553cc90e0091cdbc8dda22

commit r15-4834-gf8ae2cce23fc6c36dc553cc90e0091cdbc8dda22
Author: Hu, Lin1 
Date:   Fri Nov 1 10:04:43 2024 +0800

Support Intel AMX-MOVRS

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_available_features):
Detect AMX-MOVRS.
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AMX_MOVRS_SET): New.
(OPTION_MASK_ISA2_AMX_MOVRS_UNSET): Ditto.
(ix86_handle_option): Handle -mamx-movrs.
* common/config/i386/i386-cpuinfo.h (enum processor_features):
Add FEATURE_AMX_MOVRS.
* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
amx-movrs.
* config.gcc: Add amxmovrsintrin.h.
* config/i386/cpuid.h (bit_AMX_MOVRS): New.
* config/i386/i386-c.cc (ix86_target_macros_internal):
Define __AMX_MOVRS__.
* config/i386/i386-isa.def (AMX_MOVRS): Add DEF_PTA(AMX_MOVRS).
* config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p):
Handle amx-movrs.
* config/i386/i386.opt: Add option -mamx-movrs.
* config/i386/i386.opt.urls: Regenerated.
* config/i386/immintrin.h: Include amxmovrsintrin.h
* doc/extend.texi: Document amx-movrs.
* doc/invoke.texi: Document -mamx-movrs.
* doc/sourcebuild.texi: Document target amx-movrs.
* config/i386/amxmovrsintrin.h: New file.

gcc/testsuite/ChangeLog:

* g++.dg/other/i386-2.C: Add -mamx-movrs.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/amx-check.h: Add new check for amx-movrs.
* gcc.target/i386/funcspec-56.inc: Add new target attribute.
* gcc.target/i386/sse-12.c: Add -mamx-movrs.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Add amx-movrs.
* gcc.target/i386/sse-23.c: Ditto.
* lib/target-supports.exp (check_effective_target_amx_movrs): New.
* gcc.target/i386/amxmovrs-asmatt-1.c: New test.
* gcc.target/i386/amxmovrs-asmintel-1.c: Ditto.
* gcc.target/i386/amxmovrs-t2rpntlvw-2.c: Ditto.
* gcc.target/i386/amxmovrs-tileloaddrs-2.c: Ditto.

Diff:
---
 gcc/common/config/i386/cpuinfo.h   |   2 +
 gcc/common/config/i386/i386-common.cc  |  19 +++-
 gcc/common/config/i386/i386-cpuinfo.h  |   1 +
 gcc/common/config/i386/i386-isas.h |   1 +
 gcc/config.gcc |   3 +-
 gcc/config/i386/amxmovrsintrin.h   | 111 +
 gcc/config/i386/cpuid.h|   1 +
 gcc/config/i386/i386-c.cc  |   2 +
 gcc/config/i386/i386-isa.def   |   1 +
 gcc/config/i386/i386-options.cc|   4 +-
 gcc/config/i386/i386.opt   |   4 +
 gcc/config/i386/i386.opt.urls  |   3 +
 gcc/config/i386/immintrin.h|   1 +
 gcc/doc/extend.texi|   5 +
 gcc/doc/invoke.texi|   9 +-
 gcc/doc/sourcebuild.texi   |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C|   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C|   2 +-
 gcc/testsuite/gcc.target/i386/amx-check.h  |   3 +
 gcc/testsuite/gcc.target/i386/amxmovrs-asmatt-1.c  |  27 +
 .../gcc.target/i386/amxmovrs-asmintel-1.c  |  28 ++
 .../gcc.target/i386/amxmovrs-t2rpntlvw-2.c |  58 +++
 .../gcc.target/i386/amxmovrs-tileloaddrs-2.c   |  33 ++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc  |   2 +
 gcc/testsuite/gcc.target/i386/sse-12.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse-13.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse-14.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse-22.c |   4 +-
 gcc/testsuite/gcc.target/i386/sse-23.c |   2 +-
 gcc/testsuite/lib/target-supports.exp  |  11 ++
 30 files changed, 334 insertions(+), 14 deletions(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 14af8bf22949..0dcdaafeca5d 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -1011,6 +1011,8 @@ get_available_features (struct __processor_model 
*cpu_model,
set_feature (FEATURE_AMX_TRANSPOSE);
  if (eax & bit_AMX_FP8)
set_feature (FEATURE_AMX_FP8);
+ if (eax & bit_AMX_MOVRS)
+   set_feature (FEATURE_AMX_MOVRS);
}
 }
 
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 86cacf5322b4..b2c2f0bdc11a 100644
--- a/gcc/common/config/i386/i386-c

[gcc r15-4828] Support Intel SM4 EVEX instructions

2024-10-31 Thread Haochen Jiang via Gcc-cvs

https://gcc.gnu.org/g:8ee5cd4b84489bee0f72153e96a9afe9493e170d

commit r15-4828-g8ee5cd4b84489bee0f72153e96a9afe9493e170d
Author: Haochen Jiang 
Date:   Fri Nov 1 10:04:27 2024 +0800

Support Intel SM4 EVEX instructions

gcc/ChangeLog:

* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI).
* config/i386/i386-builtin.def (BDESC): Add new builtins.
* config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle
V16SI_FTYPE_V16SI_V16SI.
* config/i386/sm4intrin.h: Add zmm insns.
* config/i386/sse.md (vsm4key4_): Add EVEX pattern.
(vsm4rnds4_): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/sm4-check.h: Add zmm test.
* gcc.target/i386/sm4-avx10_2-1.c: New test.
* gcc.target/i386/sm4-avx10_2-512-1.c: Ditto.
* gcc.target/i386/sm4key4-avx10_2-512-2.c: Ditto.
* gcc.target/i386/sm4rnds4-avx10_2-512-2.c: Ditto.

Diff:
---
 gcc/config/i386/i386-builtin-types.def |  3 ++
 gcc/config/i386/i386-builtin.def   |  2 +
 gcc/config/i386/i386-expand.cc |  1 +
 gcc/config/i386/sm4intrin.h| 25 ++
 gcc/config/i386/sse.md | 20 +---
 gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c  | 58 ++
 gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c  | 15 ++
 gcc/testsuite/gcc.target/i386/sm4-check.h  | 36 +-
 .../gcc.target/i386/sm4key4-avx10_2-512-2.c| 18 +++
 .../gcc.target/i386/sm4rnds4-avx10_2-512-2.c   | 18 +++
 10 files changed, 186 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/i386-builtin-types.def 
b/gcc/config/i386/i386-builtin-types.def
index 290f6e649a90..c6034238ac41 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1507,3 +1507,6 @@ DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
 DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
 DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
+
+# SM4 builtins
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 1eb631db7109..39580272fa87 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1668,8 +1668,10 @@ BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, 
CODE_FOR_vsm3rnds2, "__builtin
 /* SM4 */
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v4si, 
"__builtin_ia32_vsm4key4128", IX86_BUILTIN_VSM4KEY4128, UNKNOWN, (int) 
V4SI_FTYPE_V4SI_V4SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v8si, 
"__builtin_ia32_vsm4key4256", IX86_BUILTIN_VSM4KEY4256, UNKNOWN, (int) 
V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, 
CODE_FOR_vsm4key4_v16si, "__builtin_ia32_vsm4key4512", 
IX86_BUILTIN_VSM4KEY4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v4si, 
"__builtin_ia32_vsm4rnds4128", IX86_BUILTIN_VSM4RNDS4128, UNKNOWN, (int) 
V4SI_FTYPE_V4SI_V4SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v8si, 
"__builtin_ia32_vsm4rnds4256", IX86_BUILTIN_VSM4RNDS4256, UNKNOWN, (int) 
V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, 
CODE_FOR_vsm4rnds4_v16si, "__builtin_ia32_vsm4rnds4512", 
IX86_BUILTIN_VSM4RNDS4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
 
 /* SHA512 */
 BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, 
"__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) 
V4DI_FTYPE_V4DI_V2DI)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 0de0e8427314..768987c08b8e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -11415,6 +11415,7 @@ ix86_expand_args_builtin (const struct 
builtin_description *d,
 case V16QI_FTYPE_V8HI_V8HI:
 case V16HF_FTYPE_V16HF_V16HF:
 case V16SF_FTYPE_V16SF_V16SF:
+case V16SI_FTYPE_V16SI_V16SI:
 case V8QI_FTYPE_V8QI_V8QI:
 case V8QI_FTYPE_V4HI_V4HI:
 case V8HI_FTYPE_V8HI_V8HI:
diff --git a/gcc/config/i386/sm4intrin.h b/gcc/config/i386/sm4intrin.h
index 4c212ccb566a..e2d78f01e6ef 100644
--- a/gcc/config/i386/sm4intrin.h
+++ b/gcc/config/i386/sm4intrin.h
@@ -67,4 +67,29 @@ _mm256_sm4rnds4_epi32 (__m256i __A, __m256i __B)
 #pragma GCC pop_options
 #endif /* __DISABLE_SM4__ */
 
+#if !defined (__SM4__) || !defined (__AVX10_2_512__)
+#pragma GCC push_options
+#pragma GCC target("sm4,avx10.2-512")
+#define __DISABLE_SM4_512__
+#endif /* __SM4_512__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4key4_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vsm4key4512 ((__v16si) __A, (__v16si) __B);
+

[gcc r15-4804] [PATCH v2] RISC-V: Fix gcc.target/riscv/rvv/base/cpymem-1.c f3

2024-10-31 Thread Jeff Law via Gcc-cvs

https://gcc.gnu.org/g:d6868b284379ecb7deb65d60f6f17fd6c34c7d6f

commit r15-4804-gd6868b284379ecb7deb65d60f6f17fd6c34c7d6f
Author: Craig Blackmore 
Date:   Thu Oct 31 09:12:10 2024 -0600

[PATCH v2] RISC-V: Fix gcc.target/riscv/rvv/base/cpymem-1.c f3

The function body checks for f3 only ran with -mcmodel explicitly set
which meant I missed a regression in my local testing of:

  commit b039d06c9a810a3fab4c5eb9d50b0c7aff94b2d8
  Author: Craig Blackmore 
  Date:   Fri Oct 18 09:17:21 2024 -0600

  [PATCH 3/7] RISC-V: Fix vector memcpy smaller LMUL generation

The failure showed up in the rivos CI and it is due to f3 now using
LMUL m1 instead of m8.

I have reworked the test to make it more robust and maintainable.  This
allowed most of the special casing of command line arguments to be
removed.  It also fixes an issue where some targets would enable
multiple versions of the function body check e.g. `-march=rv32gcv
-mcmodel=medany`.

Changes since v1: Added missing ChangeLog.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/cpymem-1.c: Fix and rework f3.

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c | 107 +
 1 file changed, 48 insertions(+), 59 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
index 6edb4c9253a4..81d14d836334 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cpymem-1.c
@@ -9,6 +9,8 @@
 extern void *memcpy(void *__restrict dest, const void *__restrict src, 
__SIZE_TYPE__ n);
 #endif
 
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
 /* memcpy should be implemented using the cpymem pattern.
 ** f1:
 XX \.L\d+: # local label is ignored
@@ -50,70 +52,57 @@ void f2 (__INT32_TYPE__* a, __INT32_TYPE__* b, int l)
Use extern here so that we get a known alignment, lest
DATA_ALIGNMENT force us to make the scan pattern accomodate
code for different alignments depending on word size.
-** f3: { target { { any-opts "-mcmodel=medlow" } && { no-opts 
"-march=rv64gcv_zvl512b" "-march=rv64gcv_zvl1024b" "-mrvv-max-lmul=dynamic" 
"-mrvv-max-lmul=m2" "-mrvv-max-lmul=m4" "-mrvv-max-lmul=m8" 
"-mrvv-vector-bits=zvl" } } }
-**lui\s+[ta][0-7],%hi\(a_a\)
-**addi\s+[ta][0-7],[ta][0-7],%lo\(a_a\)
-**lui\s+[ta][0-7],%hi\(a_b\)
-**addi\s+a4,[ta][0-7],%lo\(a_b\)
-**vsetivli\s+zero,16,e32,m8,ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medlow -mrvv-vector-bits=zvl" 
"-mcmodel=medlow -march=rv64gcv_zvl512b -mrvv-vector-bits=zvl" } && { no-opts 
"-march=rv64gcv_zvl1024b" } } }
-**lui\s+[ta][0-7],%hi\(a_a\)
-**lui\s+[ta][0-7],%hi\(a_b\)
-**addi\s+[ta][0-7],[ta][0-7],%lo\(a_a\)
-**addi\s+a4,[ta][0-7],%lo\(a_b\)
-**vl(1|4|2)re32\.v\s+v\d+,0\([ta][0-7]\)
-**vs(1|4|2)r\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medlow -march=rv64gcv_zvl1024b" 
"-mcmodel=medlow -march=rv64gcv_zvl512b" } && { no-opts "-mrvv-vector-bits=zvl" 
} } }
-**lui\s+[ta][0-7],%hi\(a_a\)
-**lui\s+[ta][0-7],%hi\(a_b\)
-**addi\s+a4,[ta][0-7],%lo\(a_b\)
-**vsetivli\s+zero,16,e32,(m1|m4|mf2),ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**addi\s+[ta][0-7],[ta][0-7],%lo\(a_a\)
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medany" } && { no-opts 
"-march=rv64gcv_zvl512b" "-march=rv64gcv_zvl256b" "-march=rv64gcv_zvl1024b" 
"-mrvv-max-lmul=dynamic" "-mrvv-max-lmul=m8" "-mrvv-max-lmul=m4" 
"-mrvv-vector-bits=zvl" } } }
-**lla\s+[ta][0-7],a_a
-**lla\s+[ta][0-7],a_b
-**vsetivli\s+zero,16,e32,m8,ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
-*/
-
-/*
-** f3: { target { { any-opts "-mcmodel=medany"  } && { no-opts 
"-march=rv64gcv_zvl512b" "-march=rv64gcv_zvl256b" "-march=rv64gcv" 
"-march=rv64gc_zve64d" "-march=rv64gc_zve32f" } } }
-**lla\s+[ta][0-7],a_b
-**vsetivli\s+zero,16,e32,m(f2|1|4),ta,ma
-**vle32.v\s+v\d+,0\([ta][0-7]\)
-**lla\s+[ta][0-7],a_a
-**vse32\.v\s+v\d+,0\([ta][0-7]\)
-**ret
+** f3: { target { no-opts "-mrvv-vector-bits=zvl" } }
+**  (
+**  lui\s+[ta][0-7],%hi\(a_a\)
+**  lui\s+[ta][0-7],%hi\(a_b\)
+**  addi\s+[ta][0-7],[ta][0-7],%lo\(a_b\)
+**  vsetivli\s+zero,4,e32,m1,ta,ma
+**  |
+**  lui\s+[ta][0-7],%hi\(a_a\)
+**  lui\s+[ta][0-7],%hi\(a_b\)
+**  li\s+[ta][0-7],\d+
+**  addi\s+[ta][0-7],[ta][0-7],%lo\(a_b\)
+**  vsetvli\s+zero,[ta][0-7],e32,m1,ta,ma
+**  |
+**  lla\s+[ta][0-7],a_b
+**  vsetivli\s+zero,4,e32,m1,ta,ma
+**  |
+**  li\s+[ta][0-7],\d+
+**  lla\s+[ta][0-7],a_b
+*

[gcc r15-4795] RISC-V: allow -fno-plt to disable PLT

2024-10-31 Thread Kito Cheng via Gcc-cvs

https://gcc.gnu.org/g:1f7b1c555c66cf55f9032ea14135f29d27d34811

commit r15-4795-g1f7b1c555c66cf55f9032ea14135f29d27d34811
Author: Yangyu Chen 
Date:   Thu Oct 31 16:31:24 2024 +0800

RISC-V: allow -fno-plt to disable PLT

Currently, the RISC-V target uses the target specific mplt option to
control PLT generation. This patch deprecates the target specific mplt
option and uses the common fplt option instead. This allows users to
use the same option for most targets.

Co-Developed-by: Liao Shihua 
Signed-off-by: Yangyu Chen 

gcc/ChangeLog:

* config/riscv/predicates.md: Use flag_plt instead of TARGET_PLT.
* config/riscv/riscv.opt: alias common option fplt to mplt.

Diff:
---
 gcc/config/riscv/predicates.md | 2 +-
 gcc/config/riscv/riscv.opt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 9971fabc5873..55bcfa4fa4f1 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -329,7 +329,7 @@
 {
   enum riscv_symbol_type type;
   return (riscv_symbolic_constant_p (op, &type)
- && type == SYMBOL_GOT_DISP && !SYMBOL_REF_WEAK (op) && TARGET_PLT);
+ && type == SYMBOL_GOT_DISP && !SYMBOL_REF_WEAK (op) && flag_plt);
 })
 
 (define_predicate "call_insn_operand"
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 6360ed3984d0..5bc5d3002934 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -34,8 +34,8 @@ Target RejectNegative Joined UInteger Var(riscv_branch_cost)
 -mbranch-cost=NSet the cost of branches to roughly N instructions.
 
 mplt
-Target Var(TARGET_PLT) Init(1)
-When generating -fpic code, allow the use of PLTs. Ignored for fno-pic.
+Target Alias(fplt)
+This option is deprecated; use -fplt or -fno-plt instead.
 
 mabi=
 Target RejectNegative Joined Enum(abi_type) Var(riscv_abi) Init(ABI_ILP32) 
Negative(mabi=)

[gcc r15-4794] tree: Fix up comment wording in valid_new_delete_pair_p

2024-10-31 Thread Jakub Jelinek via Gcc-cvs

https://gcc.gnu.org/g:1de156eb2bb445cd0e0a582944dcd75d085f30c9

commit r15-4794-g1de156eb2bb445cd0e0a582944dcd75d085f30c9
Author: Jakub Jelinek 
Date:   Thu Oct 31 08:46:24 2024 +0100

tree: Fix up comment wording in valid_new_delete_pair_p

I've noticed duplicated word in a comment, fixed thusly.

2024-10-31  Jakub Jelinek  

* tree.cc (valid_new_delete_pair_p): Fix up duplicate "or  or"
in comment.

Diff:
---
 gcc/tree.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree.cc b/gcc/tree.cc
index 45f0474f9e38..b4c059d3b0db 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -15081,7 +15081,7 @@ valid_new_delete_pair_p (tree new_asm, tree delete_asm,
   && !memcmp (new_name + 4, "St11align_val_tRKSt9nothrow_t", 29)))
 {
   /* _ZnXYSt11align_val_t or _ZnXYSt11align_val_tRKSt9nothrow_t matches
-_ZdXPvSt11align_val_t or _ZdXPvYSt11align_val_t or  or
+_ZdXPvSt11align_val_t or _ZdXPvYSt11align_val_t or
 _ZdXPvSt11align_val_tRKSt9nothrow_t.  */
   if (delete_len == 20 && !memcmp (delete_name + 5, "St11align_val_t", 15))
return true;

[gcc r15-4802] middle-end: Lower all gconds during vector pattern matching [PR117176]

2024-10-31 Thread Tamar Christina via Gcc-cvs

https://gcc.gnu.org/g:d2f9159cfe7ea904e6476cabefea0c6ac9532e29

commit r15-4802-gd2f9159cfe7ea904e6476cabefea0c6ac9532e29
Author: Tamar Christina 
Date:   Thu Oct 31 12:50:23 2024 +

middle-end: Lower all gconds during vector pattern matching [PR117176]

I have been taking a look at boolean handing once more in the vectorizer.

There are two situation to consider:

  1. when the boolean being created are created from comparing data inputs 
then
 for the resulting vector boolean we need to know the vector type and 
the
 precision.  In this case, when we have an operation such as NOT on the 
data
 element, this has to be lowered to XOR because the truncation to the 
vector
 precision needs to be explicit.
  2. when the boolean being created comes from another boolean operation, 
then
 we don't need to lower NOT, as the precision doesn't change.  We don't 
do
 any lowering for these (as denoted in check_bool_pattern) and instead 
the
 precision is copied from the element feeding the boolean statement 
during
 VF analysis.

For early break gcond lowering in order to correctly handle the second 
scenario
above we punted the lowering of VECT_SCALAR_BOOLEAN_TYPE_P comparisons that 
were
already in the right shape.  e.g. e != 0 where e is a boolean does not need 
any
lowering.

The issue however is that the statement feeding e may need to be lowered in 
the
case where it's a data expression.

This patch changes a bit how we do the lowering.  We now always emit an
additional compare. e.g. if the input is;

  if (e != 0)

where is a boolean we would punt on thi before, but now we generate

  f = e != 0
  if (f != 0)

We then use the same infrastructre as recog_bool to ask it to lower f, and 
in
doing so handle and boolean conversions that need to be lowered.

Because we now guarantee that f is an internal def we can also simplify the
SLP building code.

When e is a boolean, the precision we build for f needs to reflect the 
precision
of the operation feeding e.  To get this value we use integer_type_for_mask 
the
same way recog_bool does, and if it's defined (e.g. we have a data 
conversions
somewhere) we pass that precision on instead.  This gets us the correct VF
on the newly lowered boolean expressions.

gcc/ChangeLog:

PR tree-optimization/117176
* tree-vect-patterns.cc (vect_recog_gcond_pattern): Lower all 
gconds.
* tree-vect-slp.cc (vect_analyze_slp): No longer check for in vect 
def.

gcc/testsuite/ChangeLog:

PR tree-optimization/117176
* gcc.dg/vect/vect-early-break_130-pr117176.c: New test.

Diff:
---
 .../gcc.dg/vect/vect-early-break_130-pr117176.c| 21 
 gcc/tree-vect-patterns.cc  | 19 ++-
 gcc/tree-vect-slp.cc   | 39 +-
 3 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_130-pr117176.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_130-pr117176.c
new file mode 100644
index ..841dcce284dd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_130-pr117176.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+struct ColorSpace {
+  int componentCt;
+};
+
+struct Psnr {
+  double psnr[3];
+};
+
+int f(struct Psnr psnr, struct ColorSpace colorSpace) {
+  int i, hitsTarget = 1;
+
+  for (i = 1; i < colorSpace.componentCt && hitsTarget; ++i)
+hitsTarget = !(psnr.psnr[i] < 1);
+
+  return hitsTarget;
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 945e7d2dc45d..a708234304fe 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -5426,17 +5426,19 @@ vect_recog_gcond_pattern (vec_info *vinfo,
   if (VECTOR_TYPE_P (scalar_type))
 return NULL;
 
-  if (code == NE_EXPR
-  && zerop (rhs)
-  && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
-return NULL;
+  /* If the input is a boolean then try to figure out the precision that the
+ vector type should use.  We cannot use the scalar precision as this would
+ later mismatch.  This is similar to what recog_bool does.  */
+  if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
+{
+  if (tree stype = integer_type_for_mask (lhs, vinfo))
+   scalar_type = stype;
+}
 
-  tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
-  if (vecitype == NULL_TREE)
+  tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
+  if (vectype == NULL_TREE)
 return NULL;
 
-  tree vectype = truth_type_for (vecitype);
-
   tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL

[gcc r15-4819] testsuite: move single-file LTO pr95677 test to torture

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:778863eab72fdef226b8ac999b13a6ee71f397c0

commit r15-4819-g778863eab72fdef226b8ac999b13a6ee71f397c0
Author: Sam James 
Date:   Fri Oct 25 22:57:50 2024 +0100

testsuite: move single-file LTO pr95677 test to torture

This only started being used recently in r15-4681-g96110c14cf61a1 and
pinskia pointed out we may as well make it a proper torture test
instead as it's a single file LTO test.

gcc/testsuite/ChangeLog:
PR c++/95677

* g++.dg/lto/pr95677_0.C: Move to...
* g++.dg/torture/pr95677.C: ...here.

Diff:
---
 gcc/testsuite/g++.dg/{lto/pr95677_0.C => torture/pr95677.C} | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/gcc/testsuite/g++.dg/lto/pr95677_0.C 
b/gcc/testsuite/g++.dg/torture/pr95677.C
similarity index 71%
rename from gcc/testsuite/g++.dg/lto/pr95677_0.C
rename to gcc/testsuite/g++.dg/torture/pr95677.C
index d24279de0252..d33f5c8c33a2 100644
--- a/gcc/testsuite/g++.dg/lto/pr95677_0.C
+++ b/gcc/testsuite/g++.dg/torture/pr95677.C
@@ -1,9 +1,5 @@
 // PR c++/95677
-
-// { dg-lto-do link }
-// { dg-lto-options { -flto } }
-
-
+// { dg-do link }
 
 namespace {
   void foo() {

[gcc r14-10860] rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

2024-10-31 Thread Peter Bergner via Gcc-cvs

https://gcc.gnu.org/g:b4d4b86aebe1314a145732150d59a7b9ba066032

commit r14-10860-gb4d4b86aebe1314a145732150d59a7b9ba066032
Author: Peter Bergner 
Date:   Tue Jun 18 17:42:45 2024 -0500

rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

Only disable shrink-wrapping when using -mrop-protect when we know we
will be emitting the ROP-protect hash instructions (ie, non-leaf functions).

2024-06-17  Peter Bergner  

gcc/
PR target/114759
* config/rs6000/rs6000.cc (rs6000_override_options_after_change): 
Move
the disabling of shrink-wrapping from here
* config/rs6000/rs6000-logue.cc (rs6000_emit_prologue): ...to here.

gcc/testsuite/
PR target/114759
* gcc.target/powerpc/pr114759-1.c: New test.

(cherry picked from commit 0451bc503da9c858e9f1ddfb8faec367c2e032c8)

Diff:
---
 gcc/config/rs6000/rs6000-logue.cc |  5 +
 gcc/config/rs6000/rs6000.cc   |  4 
 gcc/testsuite/gcc.target/powerpc/pr114759-1.c | 16 
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-logue.cc 
b/gcc/config/rs6000/rs6000-logue.cc
index edc0d6c8f520..fdb6414f486f 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -3012,6 +3012,11 @@ rs6000_emit_prologue (void)
&& (lookup_attribute ("no_split_stack",
  DECL_ATTRIBUTES (cfun->decl))
== NULL));
+  /* If we are inserting ROP-protect hash instructions, disable shrink-wrap
+ until the bug where the hashst insn is emitted in the wrong location
+ is fixed.  See PR101324 for details.  */
+  if (info->rop_hash_size)
+flag_shrink_wrap = 0;
 
   frame_pointer_needed_indeed
 = frame_pointer_needed && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 5fd31bf8242c..80f03809ff68 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3424,10 +3424,6 @@ rs6000_override_options_after_change (void)
 }
   else if (!OPTION_SET_P (flag_cunroll_grow_size))
 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
-
-  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
-  if (rs6000_rop_protect)
-flag_shrink_wrap = 0;
 }
 
 #ifdef TARGET_USES_LINUX64_OPT
diff --git a/gcc/testsuite/gcc.target/powerpc/pr114759-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr114759-1.c
new file mode 100644
index ..579e08e920f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr114759-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect 
-fdump-rtl-pro_and_epilogue" } */
+/* { dg-require-effective-target rop_ok } Only enable on supported ABIs. */
+
+/* Verify we still attempt shrink-wrapping when using -mrop-protect
+   and there are no function calls.  */
+
+long
+foo (long arg)
+{
+  if (arg)
+asm ("" ::: "r20");
+  return 0;
+}
+
+/* { dg-final { scan-rtl-dump-times "Performing shrink-wrapping" 1 
"pro_and_epilogue" } } */

[gcc r15-4820] testsuite: move single-file LTO pr47333 test to torture

2024-10-31 Thread Sam James via Gcc-cvs

https://gcc.gnu.org/g:bedd308c25ac00466bc4921a99279659cff8df51

commit r15-4820-gbedd308c25ac00466bc4921a99279659cff8df51
Author: Sam James 
Date:   Fri Oct 25 22:59:13 2024 +0100

testsuite: move single-file LTO pr47333 test to torture

This only started being used recently in r15-4683-g04e0fbbc34e101 and
pinskia pointed out we may as well make it a proper torture test
instead as it's a single file LTO test.

gcc/testsuite/ChangeLog:
PR target/47333

* g++.dg/lto/pr47333_0.C: Move to...
* g++.dg/torture/pr47333.C: ...here.

Diff:
---
 gcc/testsuite/g++.dg/{lto/pr47333_0.C => torture/pr47333.C} | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/lto/pr47333_0.C 
b/gcc/testsuite/g++.dg/torture/pr47333.C
similarity index 99%
rename from gcc/testsuite/g++.dg/lto/pr47333_0.C
rename to gcc/testsuite/g++.dg/torture/pr47333.C
index 254a378f8dfd..06610425ebba 100644
--- a/gcc/testsuite/g++.dg/lto/pr47333_0.C
+++ b/gcc/testsuite/g++.dg/torture/pr47333.C
@@ -1,6 +1,5 @@
 /* { dg-require-effective-target sync_int_long } */
-/* "WARNING: lto.exp does not support dg-additional-options" */
-#pragma GCC diagnostic ignored "-Wtemplate-body"
+// { dg-additional-options "-Wno-template-body" }
 
 namespace std
 {

45 matches

Mail list logo