[gcc r15-7102] vect: Force alignment peeling to vectorize more early break loops [PR118211]: update 'gcc.dg/vect/ve

2025-01-21 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:da75309c635c54a6010b146514d456d2a4c6ab33

commit r15-7102-gda75309c635c54a6010b146514d456d2a4c6ab33
Author: Thomas Schwinge 
Date:   Tue Jan 21 14:57:37 2025 +0100

vect: Force alignment peeling to vectorize more early break loops 
[PR118211]: update 'gcc.dg/vect/vect-switch-search-line-fast.c' for GCN

PR tree-optimization/118211
PR tree-optimization/116126
gcc/testsuite/
* gcc.dg/vect/vect-switch-search-line-fast.c: Update for GCN.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c 
b/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c
index 21c77f49ebd7..678512db3197 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-switch-search-line-fast.c
@@ -16,5 +16,5 @@ const unsigned char *search_line_fast2 (const unsigned char 
*s,
   return s;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { 
! ilp32 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
ilp32 || { amdgcn*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { 
! { ilp32 || { amdgcn*-*-* } } } } } } */


[gcc r15-7088] c, c++: Return 1 for __has_builtin(__builtin_va_arg) and __has_builtin(__builtin_c23_va_start)

2025-01-21 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:2a6816883107ee4a4aabb43763ce079512f3f0f8

commit r15-7088-g2a6816883107ee4a4aabb43763ce079512f3f0f8
Author: Jakub Jelinek 
Date:   Tue Jan 21 09:14:01 2025 +0100

c, c++: Return 1 for __has_builtin(__builtin_va_arg) and 
__has_builtin(__builtin_c23_va_start)

The Linux kernel uses its own copy of stdarg.h.
Now, before GCC 15, our stdarg.h had
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
#define va_start(v, ...)__builtin_va_start(v, 0)
#else
#define va_start(v,l)   __builtin_va_start(v,l)
#endif
va_start definition but GCC 15 has:
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
#define va_start(...) __builtin_c23_va_start(__VA_ARGS__)
#else
#define va_start(v,l)   __builtin_va_start(v,l)
#endif

I wanted to suggest to the kernel people during their porting to C23
that they'd better use C23 compatible va_start macro definition,
but to make it portable, I think they really want something like
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
#define va_start(v, ...)__builtin_va_start(v, 0)
#ifdef __has_builtin
#if __has_builtin(__builtin_c23_va_start)
#undef va_start
#define va_start(...) __builtin_c23_va_start(__VA_ARGS__)
#endif
#else
#define va_start(v,l)   __builtin_va_start(v,l)
#endif
or so (or with >= 202311L), as GCC 13-14 and clang don't support
__builtin_c23_va_start (yet?) and one gets better user experience with
that.

Except it seems __has_builtin(__builtin_c23_va_start) doesn't actually work,
it works for most of the stdarg.h __builtin_va_*, doesn't work for
__builtin_va_arg (neither C nor C++) and didn't work for
__builtin_c23_va_start if it was available.

The following patch wires __has_builtin for those.

2025-01-21  Jakub Jelinek  

gcc/c/
* c-decl.cc (names_builtin_p): Return 1 for RID_C23_VA_START and
RID_VA_ARG.
gcc/cp/
* cp-objcp-common.cc (names_builtin_p): Return 1 for RID_VA_ARG.
gcc/testsuite/
* c-c++-common/cpp/has-builtin-4.c: New test.

Diff:
---
 gcc/c/c-decl.cc|  2 ++
 gcc/cp/cp-objcp-common.cc  |  1 +
 gcc/testsuite/c-c++-common/cpp/has-builtin-4.c | 16 
 3 files changed, 19 insertions(+)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 314b118b7c8b..68d331b22503 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -11804,6 +11804,8 @@ names_builtin_p (const char *name)
 case RID_CHOOSE_EXPR:
 case RID_OFFSETOF:
 case RID_TYPES_COMPATIBLE_P:
+case RID_C23_VA_START:
+case RID_VA_ARG:
   return 1;
 default:
   break;
diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc
index fc6c790ce113..8336d0bb8f7c 100644
--- a/gcc/cp/cp-objcp-common.cc
+++ b/gcc/cp/cp-objcp-common.cc
@@ -587,6 +587,7 @@ names_builtin_p (const char *name)
 case RID_BUILTIN_ASSOC_BARRIER:
 case RID_BUILTIN_BIT_CAST:
 case RID_OFFSETOF:
+case RID_VA_ARG:
   return 1;
 case RID_BUILTIN_OPERATOR_NEW:
 case RID_BUILTIN_OPERATOR_DELETE:
diff --git a/gcc/testsuite/c-c++-common/cpp/has-builtin-4.c 
b/gcc/testsuite/c-c++-common/cpp/has-builtin-4.c
new file mode 100644
index ..65d2b188d9aa
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/has-builtin-4.c
@@ -0,0 +1,16 @@
+/* { dg-do preprocess } */
+
+#if __has_builtin (__builtin_va_start) != 1
+#error "No __builtin_va_start"
+#endif
+#if __has_builtin (__builtin_va_end) != 1
+#error "No __builtin_va_end"
+#endif
+#if __has_builtin (__builtin_va_arg) != 1
+#error "no __builtin_va_arg"
+#endif
+#if __STDC_VERSION__ >= 202311L
+#if __has_builtin (__builtin_c23_va_start) != 1
+#error "no __builtin_c23_va_start"
+#endif
+#endif


[gcc r15-7087] c++: Handle RAW_DATA_CST in add_list_candidates [PR118532]

2025-01-21 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:843ca8a964bcfeae72d9d48e2dd549fa818120e3

commit r15-7087-g843ca8a964bcfeae72d9d48e2dd549fa818120e3
Author: Jakub Jelinek 
Date:   Tue Jan 21 09:12:21 2025 +0100

c++: Handle RAW_DATA_CST in add_list_candidates [PR118532]

This is the second bug discovered today with the
https://gcc.gnu.org/pipermail/gcc-patches/2025-January/673945.html
hack but then turned into proper testcases where embed-2[23].C FAILed
since introduction of optimized #embed support and the others when
optimizing large C++ initializers using RAW_DATA_CST.

The add_list_candidates problem is the same as with
make_tree_vector_from_ctor, unfortunately it can't call that
function because it can have those additional artificial arguments
that need to be pushed earlier.
When working on the patch, I've also noticed an error where we didn't
know how to dump RAW_DATA_CST, so I've added support for that too.

2025-01-21  Jakub Jelinek  

PR c++/118532
* call.cc (add_list_candidates): Handle RAW_DATA_CST among init_list
elts.
* error.cc (dump_expr_init_vec): Handle RAW_DATA_CST among v elts.

* g++.dg/cpp/embed-22.C: New test.
* g++.dg/cpp/embed-23.C: New test.
* g++.dg/cpp0x/pr118532.C: New test.
* g++.dg/cpp2a/explicit20.C: New test.

Diff:
---
 gcc/cp/call.cc  | 23 +--
 gcc/cp/error.cc | 21 -
 gcc/testsuite/g++.dg/cpp/embed-22.C | 24 
 gcc/testsuite/g++.dg/cpp/embed-23.C | 21 +
 gcc/testsuite/g++.dg/cpp0x/pr118532.C   | 25 +
 gcc/testsuite/g++.dg/cpp2a/explicit20.C | 23 +++
 6 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 9e57261cf17d..80015dfe9da6 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -4258,11 +4258,30 @@ add_list_candidates (tree fns, tree first_arg,
 
   /* Expand the CONSTRUCTOR into a new argument vec.  */
   vec *new_args;
-  vec_alloc (new_args, nart + CONSTRUCTOR_NELTS (init_list));
+  unsigned nelts = nart + CONSTRUCTOR_NELTS (init_list);
+  vec_alloc (new_args, nelts);
   for (unsigned i = 0; i < nart; ++i)
 new_args->quick_push ((*args)[i]);
   for (unsigned i = 0; i < CONSTRUCTOR_NELTS (init_list); ++i)
-new_args->quick_push (CONSTRUCTOR_ELT (init_list, i)->value);
+if (TREE_CODE (CONSTRUCTOR_ELT (init_list, i)->value) == RAW_DATA_CST)
+  {
+   tree raw_data = CONSTRUCTOR_ELT (init_list, i)->value;
+   nelts += RAW_DATA_LENGTH (raw_data) - 1;
+   vec_safe_reserve (new_args, nelts - new_args->length ());
+   if (TYPE_PRECISION (TREE_TYPE (raw_data)) > CHAR_BIT
+   || TYPE_UNSIGNED (TREE_TYPE (raw_data)))
+ for (unsigned j = 0; j < (unsigned) RAW_DATA_LENGTH (raw_data); ++j)
+   new_args->quick_push (build_int_cst (TREE_TYPE (raw_data),
+RAW_DATA_UCHAR_ELT (raw_data,
+j)));
+   else
+ for (unsigned j = 0; j < (unsigned) RAW_DATA_LENGTH (raw_data); ++j)
+   new_args->quick_push (build_int_cst (TREE_TYPE (raw_data),
+RAW_DATA_SCHAR_ELT (raw_data,
+j)));
+  }
+else
+  new_args->quick_push (CONSTRUCTOR_ELT (init_list, i)->value);
 
   /* We aren't looking for list-ctors anymore.  */
   flags &= ~LOOKUP_LIST_ONLY;
diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc
index 615ae0d1b65a..a33afdb3d509 100644
--- a/gcc/cp/error.cc
+++ b/gcc/cp/error.cc
@@ -2289,7 +2289,26 @@ dump_expr_init_vec (cxx_pretty_printer *pp, 
vec *v,
 
   FOR_EACH_CONSTRUCTOR_VALUE (v, idx, value)
 {
-  dump_expr (pp, value, flags | TFF_EXPR_IN_PARENS);
+  if (TREE_CODE (value) == RAW_DATA_CST)
+   for (unsigned i = 0; i < (unsigned) RAW_DATA_LENGTH (value); ++i)
+ {
+   if (TYPE_UNSIGNED (TREE_TYPE (value))
+   || TYPE_PRECISION (TREE_TYPE (value)) > CHAR_BIT)
+ pp_decimal_int (pp, RAW_DATA_UCHAR_ELT (value, i));
+   else
+ pp_decimal_int (pp, RAW_DATA_SCHAR_ELT (value, i));
+   if (i == RAW_DATA_LENGTH (value) - 1U)
+ break;
+   else if (i == 9 && RAW_DATA_LENGTH (value) > 20)
+ {
+   pp_string (pp, ", ..., ");
+   i = RAW_DATA_LENGTH (value) - 11;
+ }
+   else
+ pp_separate_with_comma (pp);
+ }
+  else
+   dump_expr (pp, value, flags | TFF_EXPR_IN_PARENS);
   if (idx != v->length () - 1)
pp_separate_with_comma (pp);
 }
diff --git a/gcc/testsuite/g++.dg/cpp/embed-22.C 
b/gcc/testsuite/g++.dg/cpp/embed-22.C
new file mode 100

[gcc r15-7089] c++: Speed up compilation of large char array initializers when not using #embed

2025-01-21 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:f31d49d65412d03d2cf91dd3b8b7281815c6d03b

commit r15-7089-gf31d49d65412d03d2cf91dd3b8b7281815c6d03b
Author: Jakub Jelinek 
Date:   Tue Jan 21 09:15:53 2025 +0100

c++: Speed up compilation of large char array initializers when not using 
#embed

The following patch (again, on top of the #embed patchset
attempts to optimize compilation of large {{{,un}signed ,}char,std::byte}
array initializers when not using #embed in the source.

Unlike the C patch which is done during the parsing of initializers this
is done when lexing tokens into an array, because C++ lexes all tokens
upfront and so by the time we parse the initializers we already have 16
bytes per token allocated (i.e. 32 extra compile time memory bytes per
one byte in the array).

The drawback is again that it can result in worse locations for diagnostics
(-Wnarrowing, -Wconversion) when initializing signed char arrays with values
128..255.  Not really sure what to do about this though unlike the C case,
the locations would need to be preserved through reshape_init* and perhaps
till template instantiation.
For #embed, there is just a single location_t (could be range of the
directive), for diagnostics perhaps we could extend it to say byte xyz of
the file embedded here or something like that, but the optimization done by
this patch, either we'd need to bump the minimum limit at which to try it,
or say temporarily allocate a location_t array for each byte and then clear
it when we no longer need it or something.
I've been using the same testcases as for C, with #embed of 100'000'000
bytes:
time ./cc1plus -quiet -O2 -o test4a.s2 test4a.c

real0m0.972s
user0m0.578s
sys 0m0.195s
with xxd -i alternative of the same data without this patch it consumed
around 13.2GB of RAM and
time ./cc1plus -quiet -O2 -o test4b.s4 test4b.c

real3m47.968s
user3m41.907s
sys 0m5.015s
and the same with this patch it consumed around 3.7GB of RAM and
time ./cc1plus -quiet -O2 -o test4b.s3 test4b.c

real0m24.772s
user0m23.118s
sys 0m1.495s

2025-01-21  Jakub Jelinek  

* parser.cc (cp_lexer_new_main): Attempt to optimize large sequences
of CPP_NUMBER with int type and values 0-255 separated by CPP_COMMA
into CPP_EMBED with RAW_DATA_CST u.value.

Diff:
---
 gcc/cp/parser.cc | 101 +++
 1 file changed, 101 insertions(+)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index a8ac8af09550..37214dae5b11 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -735,6 +735,12 @@ cp_lexer_new_main (void)
   gcc_assert (!the_parser);
   the_parser = cp_parser_new (lexer);
 
+  unsigned raw_data_tokens = 0;
+  char *raw_data_buf = NULL;
+  const unsigned int raw_data_max_len
+= 131072 - offsetof (struct tree_string, str) - 1;
+  const unsigned int raw_data_min_len = 128;
+
   /* Get the remaining tokens from the preprocessor.  */
   while (tok->type != CPP_EOF)
 {
@@ -743,6 +749,99 @@ cp_lexer_new_main (void)
module_token_lang (tok->type, tok->keyword, tok->u.value,
   tok->location, filter);
 
+  /* Attempt to optimize long lists of 0-255 integers
+separated by commas into CPP_EMBED.
+In particular, when we see
+CPP_NUMBER CPP_COMMA ( CPP_NUMBER CPP_COMMA ){n} CPP_NUMBER
+where n is in [raw_data_min_len, raw_data_max_len - 2]
+and all CPP_NUMBER tokens have int type and value in [0, UCHAR_MAX]
+it is changed into
+CPP_NUMBER CPP_COMMA CPP_EMBED CPP_COMMA CPP_NUMBER.  */
+recheck:
+  if (tok->type == CPP_NUMBER
+ && (raw_data_tokens & 1) == 0
+ && TREE_CODE (tok->u.value) == INTEGER_CST
+ && TREE_TYPE (tok->u.value) == integer_type_node
+ && !wi::neg_p (wi::to_wide (tok->u.value))
+ && wi::to_widest (tok->u.value) <= UCHAR_MAX
+ && raw_data_tokens < raw_data_max_len * 2)
+   {
+ raw_data_tokens++;
+ /* * 2 comes from each byte in the middle represented by 2 tokens,
+CPP_NUMBER and CPP_COMMA, while + 3 stands for the
+CPP_NUMBER CPP_COMMA at the start and CPP_NUMBER at the end.  */
+ if (raw_data_tokens >= raw_data_min_len * 2 + 3)
+   {
+ unsigned int len = lexer->buffer->length ();
+ unsigned int new_len;
+ if (raw_data_tokens == raw_data_min_len * 2 + 3)
+   {
+ if (raw_data_buf == NULL)
+   raw_data_buf = XNEWVEC (char, raw_data_max_len);
+ for (unsigned i = len - raw_data_tokens, j = 0;
+  i < len; i += 2, ++j)
+   raw_data_buf[j]
+ = (char) tree_to_uhwi ((*lexer->buffer)[i].u.value);
+

[gcc r15-7090] Add warning for non-spec compliant FMV in Aarch64

2025-01-21 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:e5798872281de0c4c2e87587cbb562552048ccdb

commit r15-7090-ge5798872281de0c4c2e87587cbb562552048ccdb
Author: Alfie Richards 
Date:   Thu Jan 9 09:45:32 2025 +

Add warning for non-spec compliant FMV in Aarch64

This patch adds a warning when FMV is used for Aarch64.

The reasoning for this is the ACLE [1] spec for FMV has diverged
significantly from the current implementation and we want to prevent
potential future compatability issues.

There is a patch for an ACLE compliant version of target_version and
target_clone in progress but it won't make gcc-15.

This has been bootstrap and regression tested for Aarch64.
Is this okay for master and packport to gcc-14?

[1] 
https://github.com/ARM-software/acle/blob/main/main/acle.md#function-multi-versioning

gcc/ChangeLog:

* config/aarch64/aarch64.cc
(aarch64_process_target_version_attr): Add experimental warning.
* config/aarch64/aarch64.opt: Add command line option to disable
warning.
* doc/invoke.texi: Add documentation for 
-W[no-]experimental-fmv-target.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/mv-1.C: Add CLI flag.
* g++.target/aarch64/mv-symbols1.C: Add CLI flag.
* g++.target/aarch64/mv-symbols2.C: Add CLI flag.
* g++.target/aarch64/mv-symbols3.C: Add CLI flag.
* g++.target/aarch64/mv-symbols4.C: Add CLI flag.
* g++.target/aarch64/mv-symbols5.C: Add CLI flag.
* g++.target/aarch64/mv-warning1.C: New test.
* g++.target/aarch64/mvc-symbols1.C: Add CLI flag.
* g++.target/aarch64/mvc-symbols2.C: Add CLI flag.
* g++.target/aarch64/mvc-symbols3.C: Add CLI flag.
* g++.target/aarch64/mvc-symbols4.C: Add CLI flag.
* g++.target/aarch64/mv-pragma.C: Add CLI flag.
* g++.target/aarch64/mvc-warning1.C: New test.

Diff:
---
 gcc/config/aarch64/aarch64.cc   |  9 +
 gcc/config/aarch64/aarch64.opt  |  4 
 gcc/doc/invoke.texi | 11 ++-
 gcc/testsuite/g++.target/aarch64/mv-1.C |  1 +
 gcc/testsuite/g++.target/aarch64/mv-pragma.C|  1 +
 gcc/testsuite/g++.target/aarch64/mv-symbols1.C  |  1 +
 gcc/testsuite/g++.target/aarch64/mv-symbols2.C  |  1 +
 gcc/testsuite/g++.target/aarch64/mv-symbols3.C  |  1 +
 gcc/testsuite/g++.target/aarch64/mv-symbols4.C  |  1 +
 gcc/testsuite/g++.target/aarch64/mv-symbols5.C  |  1 +
 gcc/testsuite/g++.target/aarch64/mv-warning1.C  |  9 +
 gcc/testsuite/g++.target/aarch64/mvc-symbols1.C |  1 +
 gcc/testsuite/g++.target/aarch64/mvc-symbols2.C |  1 +
 gcc/testsuite/g++.target/aarch64/mvc-symbols3.C |  1 +
 gcc/testsuite/g++.target/aarch64/mvc-symbols4.C |  1 +
 gcc/testsuite/g++.target/aarch64/mvc-warning1.C |  6 ++
 16 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 1dbbc9c3cf9b..dba779a8e51e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -20257,6 +20257,15 @@ aarch64_parse_fmv_features (const char *str, 
aarch64_feature_flags *isa_flags,
 static bool
 aarch64_process_target_version_attr (tree args)
 {
+  static bool issued_warning = false;
+  if (!issued_warning)
+{
+  warning (OPT_Wexperimental_fmv_target,
+  "Function Multi Versioning support is experimental, and the "
+  "behavior is likely to change");
+  issued_warning = true;
+}
+
   if (TREE_CODE (args) == TREE_LIST)
 {
   if (TREE_CHAIN (args))
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index da9e0c18d477..7e309d9efe46 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -431,3 +431,7 @@ handling.  One means we try to form pairs involving one or 
more existing
 individual writeback accesses where possible.  A value of two means we
 also try to opportunistically form writeback opportunities by folding in
 trailing destructive updates of the base register used by a pair.
+
+Wexperimental-fmv-target
+Target Var(warn_experimental_fmv) Warning Init(1)
+Warn about usage of experimental Function Multi Versioning.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 72811042700b..75fbe8838f48 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -827,7 +827,8 @@ Objective-C and Objective-C++ Dialects}.
 -moverride=@var{string}  -mverbose-cost-dump
 -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg}
 -mstack-protector-guard-offset=@var{offset} -mtrack-speculation
--moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion}
+-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion
+-Wexperimental-fmv-target}
 
 @emph{Adapteva Epiphany Options} (@ref{Adapteva Epiphany Options})
 @gccoptlist{-mhalf-reg-file  -mprefer

[gcc r15-7091] testsuite: Fix test failing with -fimplicit-constexpr [PR118277]

2025-01-21 Thread Simon Martin via Gcc-cvs
https://gcc.gnu.org/g:f3d884da1280e211f48be0619e5d2f1ee787

commit r15-7091-gf3d884da1280e211f48be0619e5d2f1ee787
Author: Simon Martin 
Date:   Tue Jan 21 10:11:12 2025 +0100

testsuite: Fix test failing with -fimplicit-constexpr [PR118277]

While testing an unrelated C++ patch with "make check-c++-all", I
noticed that r15-6760-g38a13ea4117b96 added a test case that fails with
-fimplicit-constexpr.

The problem is that this test unconditionally expects an error stating
that a non-constexpr function is called, but that function is
auto-magically constexpr'd under -fimplicit-constexpr.

As suggested by Jakub, this patch simply passes -fno-implicit-constexpr
in that test.

PR c++/118277

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/constexpr-asm-5.C: Pass -fno-implicit-constexpr.

Diff:
---
 gcc/testsuite/g++.dg/cpp1z/constexpr-asm-5.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/cpp1z/constexpr-asm-5.C 
b/gcc/testsuite/g++.dg/cpp1z/constexpr-asm-5.C
index 1c20b9dfec1e..bcecea9d6b50 100644
--- a/gcc/testsuite/g++.dg/cpp1z/constexpr-asm-5.C
+++ b/gcc/testsuite/g++.dg/cpp1z/constexpr-asm-5.C
@@ -2,7 +2,7 @@
 // { dg-do compile { target c++11 } }
 // { dg-options "" }
 // Override any default-'-fno-exceptions':
-// { dg-additional-options -fexceptions }
+// { dg-additional-options "-fexceptions -fno-implicit-constexpr" }
 
 struct A {};
 struct B { int size; };


[gcc r15-7094] aarch64: Drop ILP32 from default elf multilibs after deprecation

2025-01-21 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:9fd190c70976638eb8ae239f09d9f73da26d3021

commit r15-7094-g9fd190c70976638eb8ae239f09d9f73da26d3021
Author: Tamar Christina 
Date:   Tue Jan 21 10:27:13 2025 +

aarch64: Drop ILP32 from default elf multilibs after deprecation

Following the deprecation of ILP32 *-elf builds fail now due to -Werror on 
the
deprecation warning.  This is because on embedded builds ILP32 is part of 
the
default multilib.

This patch removed it from the default target as the build would fail 
anyway.

gcc/ChangeLog:

* config.gcc (aarch64-*-elf): Drop ILP32 from default multilibs.

Diff:
---
 gcc/config.gcc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index c0e66a26f953..6f9f7313e132 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1210,7 +1210,7 @@ aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*)
esac
aarch64_multilibs="${with_multilib_list}"
if test "$aarch64_multilibs" = "default"; then
-   aarch64_multilibs="lp64,ilp32"
+   aarch64_multilibs="lp64"
fi
aarch64_multilibs=`echo $aarch64_multilibs | sed -e 's/,/ /g'`
for aarch64_multilib in ${aarch64_multilibs}; do


[gcc r15-7097] AArch64: Add LUTI ACLE for SVE2

2025-01-21 Thread Saurabh Jha via Gcc-cvs
https://gcc.gnu.org/g:eb0b551c5570d98dd7cf21fa1bd0240a0c9d875f

commit r15-7097-geb0b551c5570d98dd7cf21fa1bd0240a0c9d875f
Author: Vladimir Miloserdov 
Date:   Fri May 31 16:26:11 2024 +

AArch64: Add LUTI ACLE for SVE2

This patch introduces support for LUTI2/LUTI4 ACLE for SVE2.

LUTI instructions are used for efficient table lookups with 2-bit
or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from
the low 128 bits of the table vector using packed 2-bit indices,
while LUTI4 can read from the low 128 or 256 bits of the table
vector or from two table vectors using packed 4-bit indices.
These instructions fill the destination vector by copying elements
indexed by segments of the source vector, selected by the vector
segment index.

The changes include the addition of a new AArch64 option
extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions
for the new LUTI instruction shapes, and implementations of the
svluti2 and svluti4 builtins.

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc
(aarch64_update_cpp_builtins): Add new flag TARGET_LUT.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(struct luti_base): Shape for lut intrinsics.
(SHAPE): Specializations for lut shapes for luti2 and luti4..
* config/aarch64/aarch64-sve-builtins-shapes.h: Declare lut
intrinsics.
* config/aarch64/aarch64-sve-builtins-sve2.cc
(class svluti_lane_impl): Define expand for lut intrinsics.
(FUNCTION): Define expand for lut intrinsics.
* config/aarch64/aarch64-sve-builtins-sve2.def
(REQUIRED_EXTENSIONS): Declare lut intrinsics behind lut flag.
(svluti2_lane): Define intrinsic behind flag.
(svluti4_lane): Define intrinsic behind flag.
* config/aarch64/aarch64-sve-builtins-sve2.h: Declare lut
intrinsics.
* config/aarch64/aarch64-sve-builtins.cc
(TYPES_bh_data): New type for byte and halfword.
(bh_data): Type array for byte and halfword.
(h_data): Type array for halfword.
* config/aarch64/aarch64-sve2.md
(@aarch64_sve_luti): Instruction patterns for
lut intrinsics.
* config/aarch64/iterators.md: Iterators and attributes for lut
intrinsics.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: New test
macro.
* lib/target-supports.exp: Add lut flag to the for loop.
* gcc.target/aarch64/sve/acle/general-c/lut_1.c: New test.
* gcc.target/aarch64/sve/acle/general-c/lut_2.c: New test.
* gcc.target/aarch64/sve/acle/general-c/lut_3.c: New test.
* gcc.target/aarch64/sve/acle/general-c/lut_4.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_s16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_s8.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_u16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti2_u8.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_f16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_s16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_s8.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_u16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c: New test.
* gcc.target/aarch64/sve2/acle/asm/luti4_u8.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-c.cc|   2 +
 gcc/config/aarch64/aarch64-sve-builtins-shapes.cc  |  47 
 gcc/config/aarch64/aarch64-sve-builtins-shapes.h   |   2 +
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc|  17 ++
 gcc/config/aarch64/aarch64-sve-builtins-sve2.def   |   8 +
 gcc/config/aarch64/aarch64-sve-builtins-sve2.h |   2 +
 gcc/config/aarch64/aarch64-sve-builtins.cc |   8 +-
 gcc/config/aarch64/aarch64-sve2.md |  33 +++
 gcc/config/aarch64/iterators.md|   7 +
 .../aarch64/sve/acle/asm/test_sve_acle.h   |  16 ++
 .../gcc.target/aarch64/sve/acle/general-c/lut_1.c  |  34 +++
 .../gcc.target/aarch64/sve/acle/general-c/lut_2.c  |  11 +
 .../gcc.target/aarch64/sve/acle/general-c/lut_3.c  |  92 
 .../gcc.target/aarch64/sve/acle/general-c/lut_4.c  | 262 +
 .../gcc.target/aarch64/sve2/acle/asm/luti2_bf

[gcc r12-10923] Zen5 tuning part 2: disable gather and scatter

2025-01-21 Thread Jan Hubicka via Gcc-cvs
https://gcc.gnu.org/g:e909afe8a8a2924dd6ced6bdf7d8e397f14310b5

commit r12-10923-ge909afe8a8a2924dd6ced6bdf7d8e397f14310b5
Author: Jan Hubicka 
Date:   Tue Sep 3 15:07:41 2024 +0200

Zen5 tuning part 2: disable gather and scatter

We disable gathers for zen4.  It seems that gather has improved a bit 
compared
to zen4 and Zen5 optimization manual suggests "Avoid GATHER instructions 
when
the indices are known ahead of time. Vector loads followed by shuffles 
result
in a higher load bandwidth." however the situation seems to be more
complicated.

gather is 5-10% loss on parest benchmark as well as 30% loss on sparse dot
products in TSVC. Curiously enough breaking these out into microbenchmark
reversed the situation and it turns out that the performance depends on
how indices are distributed.  gather is loss if indices are sequential,
neutral if they are random and win for some strides (4, 8).

This seems to be similar to earlier zens, so I think (especially for
backporting znver5 support) that it makes sense to be conistent and disable
gather unless we work out a good heuristics on when to use it. Since we
typically do not know the indices in advance, I don't see how that can be 
done.

I opened PR116582 with some examples of wins and loses

gcc/ChangeLog:

* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Disable for
ZNVER5.
(X86_TUNE_USE_SCATTER_2PARTS): Disable for ZNVER5.
(X86_TUNE_USE_GATHER_4PARTS): Disable for ZNVER5.
(X86_TUNE_USE_SCATTER_4PARTS): Disable for ZNVER5.
(X86_TUNE_USE_GATHER_8PARTS): Disable for ZNVER5.
(X86_TUNE_USE_SCATTER_8PARTS): Disable for ZNVER5.

(cherry picked from commit d82edbe92eed53a479736fcbbe6d54d0fb42daa4)

Diff:
---
 gcc/config/i386/x86-tune.def | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 561bd17b6e54..3fa7501fc72e 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -471,35 +471,32 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, 
"avoid_4byte_prefixes",
 /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
-   | m_GENERIC | m_GDS))
+ ~(m_ZNVER | m_ALDERLAKE | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
elements.  */
 DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
- ~(m_ZNVER4))
+ ~(m_ZNVER4 | m_ZNVER5))
 
 /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE
-   | m_GENERIC | m_GDS))
+ ~(m_ZNVER | m_ALDERLAKE | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
elements.  */
 DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
- ~(m_ZNVER4))
+ ~(m_ZNVER4 | m_ZNVER5))
 
 /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE
-   | m_GENERIC | m_GDS))
+ ~(m_ZNVER | m_ALDERLAKE | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
elements.  */
 DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
- ~(m_ZNVER4))
+ ~(m_ZNVER4 | m_ZNVER5))
 
 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
smaller FMA chain.  */


[gcc r15-7098] tree-optimization/118569 - LC SSA broken after unrolling

2025-01-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:1d25e357c047f48cee50df318e965a0fbf84e2fe

commit r15-7098-g1d25e357c047f48cee50df318e965a0fbf84e2fe
Author: Richard Biener 
Date:   Tue Jan 21 09:45:41 2025 +0100

tree-optimization/118569 - LC SSA broken after unrolling

The following amends the previous fix to mark all of the loop BBs
as need to be scanned for new LC PHI uses when its nesting parents
changed, noticing one caller of fix_loop_placement was already
doing that.  So the following moves this code into fix_loop_placement,
covering both callers now.

PR tree-optimization/118569
* cfgloopmanip.cc (fix_loop_placement): When the loops
nesting parents changed, mark all blocks to be scanned
for LC PHI uses.
(fix_bb_placements): Remove code moved into fix_loop_placement.

* gcc.dg/torture/pr118569.c: New testcase.

Diff:
---
 gcc/cfgloopmanip.cc | 22 ++--
 gcc/testsuite/gcc.dg/torture/pr118569.c | 36 +
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
index 573146b2e284..2c28437b34d9 100644
--- a/gcc/cfgloopmanip.cc
+++ b/gcc/cfgloopmanip.cc
@@ -154,10 +154,17 @@ fix_loop_placement (class loop *loop, bool 
*irred_invalidated,
  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
*irred_invalidated = true;
  rescan_loop_exit (e, false, false);
- /* Any LC SSA PHIs on e->dest might now be on the wrong edge
-if their defs were in a former outer loop.  */
- if (loop_closed_ssa_invalidated)
-   bitmap_set_bit (loop_closed_ssa_invalidated, e->src->index);
+   }
+  /* Any LC SSA PHIs on e->dest might now be on the wrong edge
+if their defs were in a former outer loop.  Also all uses
+in the original inner loop of defs in the outer loop(s) now
+require LC PHI nodes.  */
+  if (loop_closed_ssa_invalidated)
+   {
+ basic_block *bbs = get_loop_body (loop);
+ for (unsigned i = 0; i < loop->num_nodes; ++i)
+   bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
+ free (bbs);
}
 
   ret = true;
@@ -233,13 +240,6 @@ fix_bb_placements (basic_block from,
   loop_closed_ssa_invalidated))
continue;
  target_loop = loop_outer (from->loop_father);
- if (loop_closed_ssa_invalidated)
-   {
- basic_block *bbs = get_loop_body (from->loop_father);
- for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
-   bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
- free (bbs);
-   }
}
   else
{
diff --git a/gcc/testsuite/gcc.dg/torture/pr118569.c 
b/gcc/testsuite/gcc.dg/torture/pr118569.c
new file mode 100644
index ..c5b404aded53
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr118569.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-ch -fno-tree-ccp -fno-tree-fre" } */
+
+volatile int a;
+int b, c, d, e, f, g;
+int main() {
+  int i = 2, j = 1;
+k:
+  if (!e)
+;
+  else {
+short l = 1;
+if (0)
+m:
+  d = g;
+f = 0;
+for (; f < 2; f++) {
+  if (f)
+for (; j < 2; j++)
+  if (i)
+goto m;
+  a;
+  if (l)
+continue;
+  i = 0;
+  while (c)
+l++;
+}
+g = 0;
+  }
+  if (b) {
+i = 1;
+goto k;
+  }
+  return 0;
+}


[gcc r15-7103] c++: fix wrong-code with constexpr prvalue opt [PR118396]

2025-01-21 Thread Marek Polacek via Gcc-cvs
https://gcc.gnu.org/g:f3f02493dfa8858c3fb2bc0da0d6d7320921408a

commit r15-7103-gf3f02493dfa8858c3fb2bc0da0d6d7320921408a
Author: Marek Polacek 
Date:   Thu Jan 16 11:22:59 2025 -0500

c++: fix wrong-code with constexpr prvalue opt [PR118396]

The recent r15-6369 unfortunately caused a bad wrong-code issue.
Here we have

  TARGET_EXPR 

and call cp_fold_r -> maybe_constant_init with object=D.2996.  In
cxx_eval_outermost_constant_expr we now take the type of the object
if present.  An object can't have type 'void' and so we continue to
evaluate the initializer.  That evaluates into a VOID_CST, meaning
we disregard the whole initializer, and terrible things ensue.

For non-simple TARGET_EXPRs, we should return ctx.ctor rather than
the result of cxx_eval_constant_expression.

PR c++/118396
PR c++/118523

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_outermost_constant_expr): For non-simple
TARGET_EXPRs, return ctx.ctor rather than the result of
cxx_eval_constant_expression.  If TYPE and the type of R don't
match, return the original expression.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-prvalue4.C: New test.
* g++.dg/cpp1y/constexpr-prvalue3.C: New test.

Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/constexpr.cc |  9 -
 gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue4.C | 33 ++
 gcc/testsuite/g++.dg/cpp1y/constexpr-prvalue3.C | 45 +
 3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 7ff38f8b5e52..9f950ffed749 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8983,6 +8983,11 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
   r = cxx_eval_constant_expression (&ctx, r, vc_prvalue,
&non_constant_p, &overflow_p);
 
+  /* If we got a non-simple TARGET_EXPR, the initializer was a sequence
+ of statements, and the result ought to be stored in ctx.ctor.  */
+  if (r == void_node && !constexpr_dtor && ctx.ctor)
+r = ctx.ctor;
+
   if (!constexpr_dtor)
 verify_constant (r, allow_non_constant, &non_constant_p, &overflow_p);
   else
@@ -9087,7 +9092,9 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
 return r;
   else if (non_constant_p && TREE_CONSTANT (r))
 r = mark_non_constant (r);
-  else if (non_constant_p)
+  else if (non_constant_p
+  /* Check we are not trying to return the wrong type.  */
+  || !same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE (r)))
 return t;
 
   if (should_unshare)
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue4.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue4.C
new file mode 100644
index ..afcee65f8803
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue4.C
@@ -0,0 +1,33 @@
+// PR c++/118396
+// { dg-do run { target c++11 } }
+// { dg-options "-O" }
+
+void *operator new(__SIZE_TYPE__, void *__p) { return __p; }
+
+struct Foo {
+  virtual ~Foo() = default;
+};
+struct Data {
+  int status;
+  Foo data{};
+};
+
+Data *P, *Q;
+
+struct vector {
+  vector (const Data &__value) {
+P = static_cast(__builtin_operator_new(0));
+new (P) Data (__value);
+Q = P + 1;
+  }
+  Data *begin() { return P; }
+  Data *end() { return Q; }
+};
+
+int
+main ()
+{
+  vector items_(Data{});
+  for (auto item : items_)
+item.status == 0 ? void() : __builtin_abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-prvalue3.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-prvalue3.C
new file mode 100644
index ..8ea86c60be52
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-prvalue3.C
@@ -0,0 +1,45 @@
+// PR c++/118523
+// { dg-do compile { target c++14 } }
+// { dg-options "-O2 -Wall" }
+
+struct __new_allocator {
+  constexpr __new_allocator() {}
+  __new_allocator(__new_allocator &) {}
+};
+template  using __allocator_base = __new_allocator;
+template  struct allocator_traits;
+template  struct allocator : __allocator_base {};
+template  struct allocator_traits> {
+  using pointer = _Tp *;
+  template  using rebind_alloc = allocator<_Up>;
+  static void deallocate(allocator<_Tp>, pointer, long);
+};
+struct __alloc_traits : allocator_traits> {};
+struct _Vector_impl_data {
+  __alloc_traits::pointer _M_start;
+  __alloc_traits::pointer _M_end_of_storage;
+  constexpr _Vector_impl_data() : _M_start(), _M_end_of_storage() {}
+};
+struct _Vector_impl : __alloc_traits::rebind_alloc, _Vector_impl_data {};
+struct _Vector_base {
+  ~_Vector_base() {
+_M_deallocate(_M_impl._M_start,
+  _M_impl._M_end_of_storage - _M_impl._M_start);
+  }
+  _Vector_impl _M_impl;
+  void _M_deallocate(__alloc_traits::pointer __p, long __n) {
+if (__p)
+  __alloc_traits::dealloc

[gcc r12-10924] d: Fix ICE in build_deref, at d/d-codegen.cc:1650 [PR111650]

2025-01-21 Thread Iain Buclaw via Gcc-cvs
https://gcc.gnu.org/g:4d320a7df4b25c2eb060a2a16fee8b993301be55

commit r12-10924-g4d320a7df4b25c2eb060a2a16fee8b993301be55
Author: Iain Buclaw 
Date:   Fri Apr 19 10:51:12 2024 +0200

d: Fix ICE in build_deref, at d/d-codegen.cc:1650 [PR111650]

PR d/111650

gcc/d/ChangeLog:

* decl.cc (get_fndecl_arguments): Move generation of frame type to 
...
(DeclVisitor::visit (FuncDeclaration *)): ... here, after the call 
to
build_closure.

gcc/testsuite/ChangeLog:

* gdc.dg/pr111650.d: New test.

(cherry picked from commit 4d4929fe0654d51b52a2bf6e6188d7aad0bf17ac)

Diff:
---
 gcc/d/decl.cc   | 20 ++--
 gcc/testsuite/gdc.dg/pr111650.d | 21 +
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc
index a2dd8b84c59f..6c2705d9864f 100644
--- a/gcc/d/decl.cc
+++ b/gcc/d/decl.cc
@@ -162,16 +162,6 @@ get_fndecl_arguments (FuncDeclaration *decl)
  tree parm_decl = get_symbol_decl (decl->vthis);
  DECL_ARTIFICIAL (parm_decl) = 1;
  TREE_READONLY (parm_decl) = 1;
-
- if (decl->vthis->type == Type::tvoidptr)
-   {
- /* Replace generic pointer with back-end closure type
-(this wins for gdb).  */
- tree frame_type = FRAMEINFO_TYPE (get_frameinfo (decl));
- gcc_assert (frame_type != NULL_TREE);
- TREE_TYPE (parm_decl) = build_pointer_type (frame_type);
-   }
-
  param_list = chainon (param_list, parm_decl);
}
 
@@ -1047,6 +1037,16 @@ public:
 /* May change cfun->static_chain.  */
 build_closure (d);
 
+/* Replace generic pointer with back-end closure type
+   (this wins for gdb).  */
+if (d->vthis && d->vthis->type == Type::tvoidptr)
+  {
+   tree frame_type = FRAMEINFO_TYPE (get_frameinfo (d));
+   gcc_assert (frame_type != NULL_TREE);
+   tree parm_decl = get_symbol_decl (d->vthis);
+   TREE_TYPE (parm_decl) = build_pointer_type (frame_type);
+  }
+
 if (d->vresult)
   declare_local_var (d->vresult);
 
diff --git a/gcc/testsuite/gdc.dg/pr111650.d b/gcc/testsuite/gdc.dg/pr111650.d
new file mode 100644
index ..4298a76d38f9
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/pr111650.d
@@ -0,0 +1,21 @@
+// { dg-do compile }
+ref V require(K, V)(ref V[K] aa, K key, lazy V value);
+
+struct Root
+{
+ulong[3] f;
+}
+
+Root[ulong] roots;
+
+Root getRoot(int fd, ulong rootID)
+{
+return roots.require(rootID,
+{
+Root result;
+inoLookup(fd, () => result);
+return result;
+}());
+}
+
+void inoLookup(int, scope Root delegate()) { }


[gcc r13-9338] d: Fix ICE in build_deref, at d/d-codegen.cc:1650 [PR111650]

2025-01-21 Thread Iain Buclaw via Gcc-cvs
https://gcc.gnu.org/g:24291f6e40e4b37954b368361fc97fc8fb1bf864

commit r13-9338-g24291f6e40e4b37954b368361fc97fc8fb1bf864
Author: Iain Buclaw 
Date:   Fri Apr 19 10:51:12 2024 +0200

d: Fix ICE in build_deref, at d/d-codegen.cc:1650 [PR111650]

PR d/111650

gcc/d/ChangeLog:

* decl.cc (get_fndecl_arguments): Move generation of frame type to 
...
(DeclVisitor::visit (FuncDeclaration *)): ... here, after the call 
to
build_closure.

gcc/testsuite/ChangeLog:

* gdc.dg/pr111650.d: New test.

(cherry picked from commit 4d4929fe0654d51b52a2bf6e6188d7aad0bf17ac)

Diff:
---
 gcc/d/decl.cc   | 20 ++--
 gcc/testsuite/gdc.dg/pr111650.d | 21 +
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc
index 2a135b516aa6..84274b3f3c31 100644
--- a/gcc/d/decl.cc
+++ b/gcc/d/decl.cc
@@ -163,16 +163,6 @@ get_fndecl_arguments (FuncDeclaration *decl)
  tree parm_decl = get_symbol_decl (decl->vthis);
  DECL_ARTIFICIAL (parm_decl) = 1;
  TREE_READONLY (parm_decl) = 1;
-
- if (decl->vthis->type == Type::tvoidptr)
-   {
- /* Replace generic pointer with back-end closure type
-(this wins for gdb).  */
- tree frame_type = FRAMEINFO_TYPE (get_frameinfo (decl));
- gcc_assert (frame_type != NULL_TREE);
- TREE_TYPE (parm_decl) = build_pointer_type (frame_type);
-   }
-
  param_list = chainon (param_list, parm_decl);
}
 
@@ -1060,6 +1050,16 @@ public:
 /* May change cfun->static_chain.  */
 build_closure (d);
 
+/* Replace generic pointer with back-end closure type
+   (this wins for gdb).  */
+if (d->vthis && d->vthis->type == Type::tvoidptr)
+  {
+   tree frame_type = FRAMEINFO_TYPE (get_frameinfo (d));
+   gcc_assert (frame_type != NULL_TREE);
+   tree parm_decl = get_symbol_decl (d->vthis);
+   TREE_TYPE (parm_decl) = build_pointer_type (frame_type);
+  }
+
 if (d->vresult)
   declare_local_var (d->vresult);
 
diff --git a/gcc/testsuite/gdc.dg/pr111650.d b/gcc/testsuite/gdc.dg/pr111650.d
new file mode 100644
index ..4298a76d38f9
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/pr111650.d
@@ -0,0 +1,21 @@
+// { dg-do compile }
+ref V require(K, V)(ref V[K] aa, K key, lazy V value);
+
+struct Root
+{
+ulong[3] f;
+}
+
+Root[ulong] roots;
+
+Root getRoot(int fd, ulong rootID)
+{
+return roots.require(rootID,
+{
+Root result;
+inoLookup(fd, () => result);
+return result;
+}());
+}
+
+void inoLookup(int, scope Root delegate()) { }


[gcc r15-7113] c++: Don't call fold from cp_fold if one of the operands is an error_mark [PR118525]

2025-01-21 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:0d25d45c9d3a54b21f9dce43beb0b5ced4db0409

commit r15-7113-g0d25d45c9d3a54b21f9dce43beb0b5ced4db0409
Author: Andrew Pinski 
Date:   Thu Jan 16 12:53:21 2025 -0800

c++: Don't call fold from cp_fold if one of the operands is an error_mark 
[PR118525]

While adding a new match pattern, g++.dg/cpp2a/consteval36.C started to ICE 
and that was
because we would call fold even if one of the operands of the comparison 
was an error_mark_node.
I found a new testcase which also ICEs before this patch too so show the 
issue was latent.

So there is code in cp_fold to avoid calling fold when one of the operands 
become error_mark_node
but with the addition of consteval, the replacement of an invalid call is 
replaced before the call
to cp_fold and there is no way to pop up the error_mark. So this patch 
changes the current code to
check if the operands of the expression are error_mark_node before checking 
if the folded operand
is different from the previous one.

Bootstrapped and tested on x86_64-linux-gnu.

PR c++/118525

gcc/cp/ChangeLog:

* cp-gimplify.cc (cp_fold): Check operands of unary, binary, 
cond/vec_cond
and array_ref for error_mark before checking if the operands had 
changed.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/consteval38.C: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/cp/cp-gimplify.cc| 99 ++--
 gcc/testsuite/g++.dg/cpp2a/consteval38.C | 11 
 2 files changed, 53 insertions(+), 57 deletions(-)

diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index c7074b00cef2..4ec3de13008c 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -3005,19 +3005,16 @@ cp_fold (tree x, fold_flags_t flags)
   loc = EXPR_LOCATION (x);
   op0 = cp_fold_maybe_rvalue (TREE_OPERAND (x, 0), rval_ops, flags);
 
-  if (code == CONVERT_EXPR
+  if (op0 == error_mark_node)
+   x = error_mark_node;
+  else if (code == CONVERT_EXPR
  && SCALAR_TYPE_P (TREE_TYPE (x))
  && op0 != void_node)
/* During parsing we used convert_to_*_nofold; re-convert now using the
   folding variants, since fold() doesn't do those transformations.  */
x = fold (convert (TREE_TYPE (x), op0));
   else if (op0 != TREE_OPERAND (x, 0))
-   {
- if (op0 == error_mark_node)
-   x = error_mark_node;
- else
-   x = fold_build1_loc (loc, code, TREE_TYPE (x), op0);
-   }
+   x = fold_build1_loc (loc, code, TREE_TYPE (x), op0);
   else
x = fold (x);
 
@@ -3087,20 +3084,17 @@ cp_fold (tree x, fold_flags_t flags)
   op0 = cp_fold_maybe_rvalue (TREE_OPERAND (x, 0), rval_ops, flags);
 
 finish_unary:
-  if (op0 != TREE_OPERAND (x, 0))
+  if (op0 == error_mark_node)
+   x = error_mark_node;
+  else if (op0 != TREE_OPERAND (x, 0))
{
- if (op0 == error_mark_node)
-   x = error_mark_node;
- else
+ x = fold_build1_loc (loc, code, TREE_TYPE (x), op0);
+ if (code == INDIRECT_REF
+ && (INDIRECT_REF_P (x) || TREE_CODE (x) == MEM_REF))
{
- x = fold_build1_loc (loc, code, TREE_TYPE (x), op0);
- if (code == INDIRECT_REF
- && (INDIRECT_REF_P (x) || TREE_CODE (x) == MEM_REF))
-   {
- TREE_READONLY (x) = TREE_READONLY (org_x);
- TREE_SIDE_EFFECTS (x) = TREE_SIDE_EFFECTS (org_x);
- TREE_THIS_VOLATILE (x) = TREE_THIS_VOLATILE (org_x);
-   }
+ TREE_READONLY (x) = TREE_READONLY (org_x);
+ TREE_SIDE_EFFECTS (x) = TREE_SIDE_EFFECTS (org_x);
+ TREE_THIS_VOLATILE (x) = TREE_THIS_VOLATILE (org_x);
}
}
   else
@@ -3190,13 +3184,10 @@ cp_fold (tree x, fold_flags_t flags)
op0, op1);
}
 
-  if (op0 != TREE_OPERAND (x, 0) || op1 != TREE_OPERAND (x, 1))
-   {
- if (op0 == error_mark_node || op1 == error_mark_node)
-   x = error_mark_node;
- else
-   x = fold_build2_loc (loc, code, TREE_TYPE (x), op0, op1);
-   }
+  if (op0 == error_mark_node || op1 == error_mark_node)
+   x = error_mark_node;
+  else if (op0 != TREE_OPERAND (x, 0) || op1 != TREE_OPERAND (x, 1))
+   x = fold_build2_loc (loc, code, TREE_TYPE (x), op0, op1);
   else
x = fold (x);
 
@@ -3268,17 +3259,14 @@ cp_fold (tree x, fold_flags_t flags)
}
}
 
-  if (op0 != TREE_OPERAND (x, 0)
- || op1 != TREE_OPERAND (x, 1)
- || op2 != TREE_OPERAND (x, 2))
-   {
- if (op0 == error_mark_node
- || op1 == error_mark_node
- || op2 == error_mark_node)
-   x = error_mark_node;
- else
-   x = fold_build3_loc (loc, 

[gcc r15-7114] match: Improve the `x ==/!= ~x` pattern [PR118483]

2025-01-21 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:dae2b6246c00f4389b617ffaa30459bd22d9fe13

commit r15-7114-gdae2b6246c00f4389b617ffaa30459bd22d9fe13
Author: Andrew Pinski 
Date:   Wed Jan 15 20:17:09 2025 -0800

match: Improve the `x ==/!= ~x` pattern [PR118483]

This improves this pattern by 2 ways:
* Allow for an optional convert, similar to how the few other
  `a OP ~a` patterns also allow for an optional convert.
* Use bitwise_inverted_equal_p/maybe_bit_not instead of directly
  matching bit_not. Just like the other patterns do too.

Note pr118483-2.c used to optimized for aarch64-linux-gnu with GCC 4.9.4
on the RTL level even though the gimple level was missing it.

PR tree-optimization/118483

gcc/ChangeLog:

* match.pd (`x ==/!= ~x`): Allow for an optional convert
and use itwise_inverted_equal_p/maybe_bit_not instead of
directly matching bit_not.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr118483-1.c: New test.
* gcc.dg/tree-ssa/pr118483-2.c: New test.
* gcc.dg/tree-ssa/pr118483-3.c: New test.
* gcc.dg/tree-ssa/pr118483-4.c: New test.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/match.pd   |  7 +--
 gcc/testsuite/gcc.dg/tree-ssa/pr118483-1.c | 18 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr118483-2.c | 18 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr118483-3.c | 14 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr118483-4.c | 11 +++
 5 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index f4359d3a005a..1cdc7e94f1fe 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6959,8 +6959,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* x != ~x -> true */
 (for cmp (eq ne)
  (simplify
-  (cmp:c @0 (bit_not @0))
-  { constant_boolean_node (cmp == NE_EXPR, type); }))
+  (cmp:c (convert? @0) (convert? (maybe_bit_not @1)))
+  (with { bool wascmp; }
+   (if (types_match (TREE_TYPE (@0), TREE_TYPE (@1))
+&& bitwise_inverted_equal_p (@0, @1, wascmp))
+{ constant_boolean_node (cmp == NE_EXPR, type); }
 
 /* Fold ~X op ~Y as Y op X.  */
 (for cmp (simple_comparison)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118483-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-1.c
new file mode 100644
index ..e31876c940a2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/118483 */
+/* { dg-final { scan-tree-dump-not "abort " "optimized" } } */
+
+
+/* The value of `l == e` is always false as it is
+   `(b == 0) == (b != 0)`. */
+
+int d;
+int f(int b)
+{
+  int e = b == 0;
+  d = e;
+  int l = b != 0;
+  if (l == e)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118483-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-2.c
new file mode 100644
index ..84867719867d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/118483 */
+/* { dg-final { scan-tree-dump-not "abort " "optimized" } } */
+
+
+/* The value of `l == e` is always false as it is
+   `(b == 0) == (b != 0)`. */
+
+int d;
+int f(int b)
+{
+  int e = b == 0;
+  d = e;
+  int l = !e;
+  if (l == e)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118483-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-3.c
new file mode 100644
index ..65efaf5c30fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/118483 */
+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
+
+/* This should optimize down to just `return 0;` */
+/* as `(short)a == ~(short)a` is always false. */
+int f(int a)
+{
+  short b = a;
+  int e = ~a;
+  short c = e;
+  return b == c;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118483-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-4.c
new file mode 100644
index ..c6e389c46743
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118483-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/118483 */
+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
+
+/* This should optimize down to just `return 0;` */
+/* as `a == 0` and `a != 0` are opposites. */
+int f(int a)
+{
+  return (a == 0) == (a != 0);
+}


[gcc r15-7104] testsuite: Add testcase for already fixed PR [PR118560]

2025-01-21 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:5ddcf049376fe77e4895b857b22b3c142930b86b

commit r15-7104-g5ddcf049376fe77e4895b857b22b3c142930b86b
Author: Jakub Jelinek 
Date:   Tue Jan 21 16:53:14 2025 +0100

testsuite: Add testcase for already fixed PR [PR118560]

The fix for this PR has been committed without a testcase.
The following testcase would take at least 15 minutes to compile
on a fast machine (powerpc64-linux both -m32 or -m64), now it takes
100ms.

2025-01-21  Jakub Jelinek  

PR target/118560
* gcc.dg/dfp/pr118560.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/dfp/pr118560.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/dfp/pr118560.c 
b/gcc/testsuite/gcc.dg/dfp/pr118560.c
new file mode 100644
index ..2409a1a98f03
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/dfp/pr118560.c
@@ -0,0 +1,17 @@
+/* PR target/118560 */
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+struct { _Decimal32 a; } b;
+void foo (int, _Decimal32);
+
+#define B(n) \
+void   \
+bar##n (int, _Decimal32 d) \
+{  \
+  foo (n, 1);  \
+  b.a = d; \
+}
+
+#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) 
B(n##8) B(n##9)
+C(1) C(2) C(3) C(4) C(5)


[gcc r15-7109] AVR: Tweak some 16-bit shifts by using MUL.

2025-01-21 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:55d792542d21311e415daee333e2786ac5f150a7

commit r15-7109-g55d792542d21311e415daee333e2786ac5f150a7
Author: Georg-Johann Lay 
Date:   Tue Jan 21 12:47:40 2025 +0100

AVR: Tweak some 16-bit shifts by using MUL.

u16 << 5 and u16 << 6 can be tweaked by using MUL instructions.
Benefit is a better speed ratio with -Os and smaller size with -O2.

gcc/
* config/avr/avr-passes.cc (avr_emit_shift) [ASHIFT,HImode]:
Allow offsets 5 and 6 as 3op provided have MUL and a scratch.
* config/avr/avr.cc (avr_optimize_size_max_p): New function.
(avr_out_ashlhi3_mul): New function.
(ashlhi3_out) [case 4, 5, 6]: Better speed for -Os.
* config/avr/avr.md (isa) : New attr values.
(*ashlhi3_const): Add alternative for offsets 5 and 6.

Diff:
---
 gcc/config/avr/avr-passes.cc |  4 ++-
 gcc/config/avr/avr.cc| 72 ++--
 gcc/config/avr/avr.md| 19 
 3 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 8bf125f12aaf..e32c46738d81 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4951,7 +4951,9 @@ avr_emit_shift (rtx_code code, rtx dest, rtx src, int 
off, rtx scratch)
   const bool b8_is_3op = off == 6;
 
   const bool b16_is_3op = select()
-: code == ASHIFT ? satisfies_constraint_C7c (xoff) // 7...12
+: code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12
+   // The "C05 C06" alternative of *ashlhi3_const.
+   || (AVR_HAVE_MUL && scratch && (off == 5 || off == 6)))
 : code == LSHIFTRT ? satisfies_constraint_C7c (xoff)
 : code == ASHIFTRT ? off == 7
 : bad_case ();
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index ce1a375ce92c..e5a5aa34ec04 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -563,7 +563,8 @@ avr_option_override (void)
 }
 
 
-int avr_optimize_size_level ()
+int
+avr_optimize_size_level ()
 {
   return cfun && cfun->decl
 ? opt_for_fn (cfun->decl, optimize_size)
@@ -571,6 +572,13 @@ int avr_optimize_size_level ()
 }
 
 
+static bool
+avr_optimize_size_max_p ()
+{
+  return avr_optimize_size_level () == OPTIMIZE_SIZE_MAX;
+}
+
+
 /* Implement `INIT_EXPANDERS'.  */
 /* The function works like a singleton.  */
 
@@ -7048,6 +7056,26 @@ ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 }
 
 
+/* Output a 16-bit left shift  XOP[0] = XOP[1] << XOP[2]  using MUL.
+   XOP[3] is an upper 8-bit scratch register.  This function is currently
+   only used for offsets 5 and 6 but works for offsets 1...7 as well.  */
+
+static const char*
+avr_out_ashlhi3_mul (rtx *xop, bool scratch_p, int *plen)
+{
+  gcc_assert (scratch_p && AVR_HAVE_MUL);
+
+  // Takes 7 words and 9 cycles.
+  return avr_asm_len ("ldi %3,1<<%2" CR_TAB
+ "mul %B1,%3"   CR_TAB
+ "mov %B0,r0"   CR_TAB
+ "mul %A1,%3"   CR_TAB
+ "mov %A0,r0"   CR_TAB
+ "or  %B0,r1"   CR_TAB
+ "clr __zero_reg__", xop, plen, -7);
+}
+
+
 /* 16bit shift left ((short)x << i)   */
 
 const char *
@@ -7060,6 +7088,10 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
  && REG_P (operands[3]));
   bool ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
   bool reg1_unused_after = reg_unused_after (insn, operands[1]);
+  int size;
+  int reg0 = REGNO (operands[0]);
+  int reg1 = REGNO (operands[1]);
+  bool use_mul_p = reg1 != reg0 || (scratch && AVR_HAVE_MUL);
 
   if (plen)
*plen = 0;
@@ -7073,7 +7105,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
  return avr_asm_len ("clr %B0" CR_TAB
  "clr %A0", operands, plen, 2);
case 4:
- if (optimize_size && scratch)
+ if (avr_optimize_size_max_p () && scratch)
break;  /* 5 */
  if (ldi_ok)
return avr_asm_len ("swap %A0"  CR_TAB
@@ -7093,6 +7125,23 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
  break;  /* optimize_size ? 6 : 8 */
 
case 5:
+ size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW);
+ if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p))
+   {
+ if (reg0 != reg1)
+   {
+ if (AVR_HAVE_MOVW)
+   avr_asm_len ("movw %0,%1", operands, plen, 1);
+ else
+   avr_asm_len ("mov %A0,%A1" CR_TAB
+"mov %B0,%B1", operands, plen, 2);
+   }
+ break;  // scratch ? 5 : 6
+   }
+
+ if (use_mul_p)
+   return avr_out_ashlhi3_mul (operands, scratch, plen); // 7
+
  if (optimize_size)
break;  /* scratch ? 5 : 6 */

[gcc r15-7110] RISC-V: Unbreak bootstrap.

2025-01-21 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e324619281239bb513840600436b735dfbd32416

commit r15-7110-ge324619281239bb513840600436b735dfbd32416
Author: Robin Dapp 
Date:   Tue Jan 21 18:07:41 2025 +0100

RISC-V: Unbreak bootstrap.

This fixes a wrong format specifier and an unused variable which should
re-enable bootstrap.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_file_end): Fix format string.
(riscv_lshift_subword): Mark MODE as unused.

Diff:
---
 gcc/config/riscv/riscv.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f5e672bb7f50..5a3a05041773 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10375,7 +10375,7 @@ riscv_file_end ()
   fprintf (asm_out_file, "\t.long\t4f - 3f\n");
   fprintf (asm_out_file, "3:\n");
   /* zicfiss, zicfilp.  */
-  fprintf (asm_out_file, "\t.long\t%x\n", feature_1_and);
+  fprintf (asm_out_file, "\t.long\t%lx\n", feature_1_and);
   fprintf (asm_out_file, "4:\n");
   fprintf (asm_out_file, "\t.p2align\t%u\n", p2align);
   fprintf (asm_out_file, "5:\n");
@@ -11959,7 +11959,7 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx 
*shift, rtx *mask,
 /* Leftshift a subword within an SImode register.  */
 
 void
-riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
+riscv_lshift_subword (machine_mode mode ATTRIBUTE_UNUSED, rtx value, rtx shift,
  rtx *shifted_value)
 {
   rtx value_reg = gen_reg_rtx (SImode);


[gcc r15-7105] Use `known_ge' instead of `compare_sizes_for_sort'.

2025-01-21 Thread Denis Chertykov via Gcc-cvs
https://gcc.gnu.org/g:ef7ed227fc97cd71093bb373a4d6d6368e1cc635

commit r15-7105-gef7ed227fc97cd71093bb373a4d6d6368e1cc635
Author: Denis Chertykov 
Date:   Tue Jan 21 21:36:05 2025 +0400

Use `known_ge' instead of `compare_sizes_for_sort'.

gcc/
* lra-spills.cc (assign_stack_slot_num_and_sort_pseudos): Use 
known_ge
to compare sizes.

Diff:
---
 gcc/lra-spills.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/lra-spills.cc b/gcc/lra-spills.cc
index 93a0c92db9fa..fc912c43ce6e 100644
--- a/gcc/lra-spills.cc
+++ b/gcc/lra-spills.cc
@@ -394,8 +394,7 @@ assign_stack_slot_num_and_sort_pseudos (int *pseudo_regnos, 
int n)
/* A slot with allocated memory can be shared only with equal
   or smaller register with equal or smaller alignment.  */
if (slots[j].align >= spill_slot_alignment (mode)
-   && compare_sizes_for_sort (slots[j].size,
-  GET_MODE_SIZE (mode)) != -1)
+   && known_ge (slots[j].size, GET_MODE_SIZE (mode)))
  break;
  }
}


[gcc r15-7112] testsuite: Require int32plus for test case pr117546.c

2025-01-21 Thread Dimitar Dimitrov via Gcc-cvs
https://gcc.gnu.org/g:16d778239397b2f70a1e0680c0b82ae6ee98fe9e

commit r15-7112-g16d778239397b2f70a1e0680c0b82ae6ee98fe9e
Author: Dimitar Dimitrov 
Date:   Tue Jan 21 21:38:12 2025 +0200

testsuite: Require int32plus for test case pr117546.c

Test case is valid even if size of int is more than 32 bits.

gcc/testsuite/ChangeLog:

* gcc.dg/torture/pr117546.c: Require effective target int32plus.

Signed-off-by: Dimitar Dimitrov 

Diff:
---
 gcc/testsuite/gcc.dg/torture/pr117546.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr117546.c 
b/gcc/testsuite/gcc.dg/torture/pr117546.c
index b60f877a9063..a837d056451e 100644
--- a/gcc/testsuite/gcc.dg/torture/pr117546.c
+++ b/gcc/testsuite/gcc.dg/torture/pr117546.c
@@ -1,4 +1,4 @@
-/* { dg-do run { target int32 } } */
+/* { dg-do run { target int32plus } } */
 
 typedef struct {
   int a;


[gcc r15-7111] libphobos: Add MIPS64 implementation of fiber_switchContext [PR118584]

2025-01-21 Thread Iain Buclaw via Gcc-cvs
https://gcc.gnu.org/g:79186e392c77c1862197a49421f77644e3b8c05d

commit r15-7111-g79186e392c77c1862197a49421f77644e3b8c05d
Author: Iain Buclaw 
Date:   Tue Jan 21 19:41:05 2025 +0100

libphobos: Add MIPS64 implementation of fiber_switchContext [PR118584]

Replaces the generic implementation.  The `core.thread.fiber' module
already defines version=AsmExternal on mips64el-linux-gnuabi64.

PR d/118584

libphobos/ChangeLog:

* libdruntime/config/mips/switchcontext.S: Add MIPS64 N64 ABI
implementation of fiber_switchContext.

Diff:
---
 libphobos/libdruntime/config/mips/switchcontext.S | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/libphobos/libdruntime/config/mips/switchcontext.S 
b/libphobos/libdruntime/config/mips/switchcontext.S
index d2fed64c78c1..078ad0b3cce3 100644
--- a/libphobos/libdruntime/config/mips/switchcontext.S
+++ b/libphobos/libdruntime/config/mips/switchcontext.S
@@ -99,4 +99,82 @@ fiber_switchContext:
 .end fiber_switchContext
 .size fiber_switchContext,.-fiber_switchContext
 
+#endif /* _MIPS_SIM == _ABIO32 */
+
+#if defined(__mips64) && _MIPS_SIM == _ABI64
+/
+ * MIPS 64 ASM BITS
+ * $a0 - void** - ptr to old stack pointer
+ * $a1 - void*  - new stack pointer
+ *
+ */
+.text
+.globl fiber_switchContext
+.align 2
+.ent fiber_switchContext,0
+fiber_switchContext:
+.cfi_startproc
+daddiu $sp, $sp, -(10 * 8)
+
+// fp regs and return address are stored below the stack
+// because we don't want the GC to scan them.
+
+#ifdef __mips_hard_float
+#define BELOW (8 * 8 + 8)
+s.d  $f24, (0 * 8 - BELOW)($sp)
+s.d  $f25, (1 * 8 - BELOW)($sp)
+s.d  $f26, (2 * 8 - BELOW)($sp)
+s.d  $f27, (3 * 8 - BELOW)($sp)
+s.d  $f28, (4 * 8 - BELOW)($sp)
+s.d  $f29, (5 * 8 - BELOW)($sp)
+s.d  $f30, (6 * 8 - BELOW)($sp)
+s.d  $f31, (7 * 8 - BELOW)($sp)
+#endif
+sd $ra, -8($sp)
+
+sd  $s0, (0 * 8)($sp)
+sd  $s1, (1 * 8)($sp)
+sd  $s2, (2 * 8)($sp)
+sd  $s3, (3 * 8)($sp)
+sd  $s4, (4 * 8)($sp)
+sd  $s5, (5 * 8)($sp)
+sd  $s6, (6 * 8)($sp)
+sd  $s7, (7 * 8)($sp)
+sd  $gp, (8 * 8)($sp)
+sd  $fp, (9 * 8)($sp)
+
+// swap stack pointer
+sd   $sp, 0($a0)
+move $sp, $a1
+
+#ifdef __mips_hard_float
+l.d  $f24, (0 * 8 - BELOW)($sp)
+l.d  $f25, (1 * 8 - BELOW)($sp)
+l.d  $f26, (2 * 8 - BELOW)($sp)
+l.d  $f27, (3 * 8 - BELOW)($sp)
+l.d  $f28, (4 * 8 - BELOW)($sp)
+l.d  $f29, (5 * 8 - BELOW)($sp)
+l.d  $f30, (6 * 8 - BELOW)($sp)
+l.d  $f31, (7 * 8 - BELOW)($sp)
 #endif
+ld $ra, -8($sp)
+
+ld $s0, (0 * 8)($sp)
+ld $s1, (1 * 8)($sp)
+ld $s2, (2 * 8)($sp)
+ld $s3, (3 * 8)($sp)
+ld $s4, (4 * 8)($sp)
+ld $s5, (5 * 8)($sp)
+ld $s6, (6 * 8)($sp)
+ld $s7, (7 * 8)($sp)
+ld $gp, (8 * 8)($sp)
+ld $fp, (9 * 8)($sp)
+
+daddiu $sp, $sp, (10 * 8)
+
+jr $ra // return
+.cfi_endproc
+.end fiber_switchContext
+.size fiber_switchContext,.-fiber_switchContext
+
+#endif /* defined(__mips64) && _MIPS_SIM == _ABI64 */


[gcc r15-7106] RISC-V: Enable and adjust the testsuite for XTheadVector.

2025-01-21 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ab24171d237a9138714f0e6d2bb38fd357ccaed9

commit r15-7106-gab24171d237a9138714f0e6d2bb38fd357ccaed9
Author: Jin Ma 
Date:   Tue Jan 21 10:43:47 2025 -0700

RISC-V: Enable and adjust the testsuite for XTheadVector.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/rvv.exp: Enable testsuite of
XTheadVector.
* gcc.target/riscv/rvv/xtheadvector/pr114194.c: Adjust correctly.
* gcc.target/riscv/rvv/xtheadvector/prefix.c: Likewise.
* gcc.target/riscv/rvv/xtheadvector/vlb-vsb.c: Likewise.
* gcc.target/riscv/rvv/xtheadvector/vlbu-vsb.c: Likewise.
* gcc.target/riscv/rvv/xtheadvector/vlh-vsh.c: Likewise.
* gcc.target/riscv/rvv/xtheadvector/vlhu-vsh.c: Likewise.
* gcc.target/riscv/rvv/xtheadvector/vlw-vsw.c: Likewise.
* gcc.target/riscv/rvv/xtheadvector/vlwu-vsw.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp |  2 ++
 .../gcc.target/riscv/rvv/xtheadvector/pr114194.c   | 32 +++---
 .../gcc.target/riscv/rvv/xtheadvector/prefix.c |  2 +-
 .../gcc.target/riscv/rvv/xtheadvector/vlb-vsb.c| 17 +++-
 .../gcc.target/riscv/rvv/xtheadvector/vlbu-vsb.c   | 17 +++-
 .../gcc.target/riscv/rvv/xtheadvector/vlh-vsh.c| 17 +++-
 .../gcc.target/riscv/rvv/xtheadvector/vlhu-vsh.c   | 17 +++-
 .../gcc.target/riscv/rvv/xtheadvector/vlw-vsw.c| 17 +++-
 .../gcc.target/riscv/rvv/xtheadvector/vlwu-vsw.c   | 17 +++-
 9 files changed, 79 insertions(+), 59 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp 
b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index d82710e9c416..3824997c9082 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -41,6 +41,8 @@ dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/base/*.\[cS\]]] \
"" $CFLAGS
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/xsfvector/*.\[cS\]]] \
"" $CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/xtheadvector/*.\[cS\]]] \
+   "" $CFLAGS
 gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vsetvl/*.\[cS\]]] \
"" $CFLAGS
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/*.\[cS\]]] \
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c
index a82e2d3fbfe6..5c9777b071b5 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c
@@ -1,11 +1,11 @@
 /* { dg-do compile { target { ! riscv_abi_e } } } */
-/* { dg-options "-march=rv32gc_xtheadvector" { target { rv32 } } } */
-/* { dg-options "-march=rv64gc_xtheadvector" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_xtheadvector -O2" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_xtheadvector -O2" { target { rv64 } } } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 /*
 ** foo0_1:
-** sb\tzero,0([a-x0-9]+)
+** sb\tzero,0\([a-x0-9]+\)
 ** ret
 */
 void foo0_1 (void *p)
@@ -15,13 +15,13 @@ void foo0_1 (void *p)
 
 /*
 ** foo0_7:
-** sb\tzero,0([a-x0-9]+)
-** sb\tzero,1([a-x0-9]+)
-** sb\tzero,2([a-x0-9]+)
-** sb\tzero,3([a-x0-9]+)
-** sb\tzero,4([a-x0-9]+)
-** sb\tzero,5([a-x0-9]+)
-** sb\tzero,6([a-x0-9]+)
+** sb\tzero,0\([a-x0-9]+\)
+** sb\tzero,1\([a-x0-9]+\)
+** sb\tzero,2\([a-x0-9]+\)
+** sb\tzero,3\([a-x0-9]+\)
+** sb\tzero,4\([a-x0-9]+\)
+** sb\tzero,5\([a-x0-9]+\)
+** sb\tzero,6\([a-x0-9]+\)
 ** ret
 */
 void foo0_7 (void *p)
@@ -32,7 +32,7 @@ void foo0_7 (void *p)
 /*
 ** foo1_1:
 ** li\t[a-x0-9]+,1
-** sb\t[a-x0-9]+,0([a-x0-9]+)
+** sb\t[a-x0-9]+,0\([a-x0-9]+\)
 ** ret
 */
 void foo1_1 (void *p)
@@ -43,11 +43,11 @@ void foo1_1 (void *p)
 /*
 ** foo1_5:
 ** li\t[a-x0-9]+,1
-** sb\t[a-x0-9]+,0([a-x0-9]+)
-** sb\t[a-x0-9]+,1([a-x0-9]+)
-** sb\t[a-x0-9]+,2([a-x0-9]+)
-** sb\t[a-x0-9]+,3([a-x0-9]+)
-** sb\t[a-x0-9]+,4([a-x0-9]+)
+** sb\t[a-x0-9]+,0\([a-x0-9]+\)
+** sb\t[a-x0-9]+,1\([a-x0-9]+\)
+** sb\t[a-x0-9]+,2\([a-x0-9]+\)
+** sb\t[a-x0-9]+,3\([a-x0-9]+\)
+** sb\t[a-x0-9]+,4\([a-x0-9]+\)
 ** ret
 */
 void foo1_5 (void *p)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
index eee727ef6b42..0a18e697830c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
@@ -9,4 +9,4 @@ prefix (vint32m1_t vx, vint32m1_t vy, size_t vl)
   return __riscv_vadd_vv_i32m1 (vx, vy, vl);
 }
 
-/* { dg-final { scan-assembler {\mth\.v\M} } } */
+/* { dg-final { scan-assembler {\mth\.vadd\.vv\M} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlb-vsb.c 
b/gcc/testsuite/gcc.target/riscv/

[gcc r15-7108] c++: Handle CPP_EMBED in cp_parser_objc_message_args [PR118586]

2025-01-21 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:1911b8cbd78293582b38d938350a7fa6b3c2d5eb

commit r15-7108-g1911b8cbd78293582b38d938350a7fa6b3c2d5eb
Author: Jakub Jelinek 
Date:   Tue Jan 21 18:49:51 2025 +0100

c++: Handle CPP_EMBED in cp_parser_objc_message_args [PR118586]

As the following testcases show, I forgot to handle CPP_EMBED in
cp_parser_objc_message_args which is another place which can parse
possibly long valid lists of CPP_COMMA separated CPP_NUMBER tokens.

2025-01-21  Jakub Jelinek  

PR objc++/118586
gcc/cp/
* parser.cc (cp_parser_objc_message_args): Handle CPP_EMBED.
gcc/testsuite/
* objc.dg/embed-1.m: New test.
* obj-c++.dg/embed-1.mm: New test.
* obj-c++.dg/va-meth-2.mm: New test.

Diff:
---
 gcc/cp/parser.cc  | 20 +---
 gcc/testsuite/obj-c++.dg/embed-1.mm   | 15 ++
 gcc/testsuite/obj-c++.dg/va-meth-2.mm | 87 +++
 gcc/testsuite/objc.dg/embed-1.m   | 14 ++
 4 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 37214dae5b11..398fd8538e2f 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -36734,14 +36734,22 @@ cp_parser_objc_message_args (cp_parser* parser)
   /* Handle non-selector arguments, if any. */
   while (token->type == CPP_COMMA)
 {
-  tree arg;
-
   cp_lexer_consume_token (parser->lexer);
-  arg = cp_parser_assignment_expression (parser);
 
-  addl_args
-   = chainon (addl_args,
-  build_tree_list (NULL_TREE, arg));
+  if (cp_lexer_next_token_is (parser->lexer, CPP_EMBED))
+   {
+ tree raw_data = cp_lexer_peek_token (parser->lexer)->u.value;
+ cp_lexer_consume_token (parser->lexer);
+ for (tree argument : raw_data_range (raw_data))
+   addl_args = chainon (addl_args,
+build_tree_list (NULL_TREE, argument));
+   }
+  else
+   {
+ tree arg = cp_parser_assignment_expression (parser);
+ addl_args = chainon (addl_args,
+  build_tree_list (NULL_TREE, arg));
+   }
 
   token = cp_lexer_peek_token (parser->lexer);
 }
diff --git a/gcc/testsuite/obj-c++.dg/embed-1.mm 
b/gcc/testsuite/obj-c++.dg/embed-1.mm
new file mode 100644
index ..630a0f813918
--- /dev/null
+++ b/gcc/testsuite/obj-c++.dg/embed-1.mm
@@ -0,0 +1,15 @@
+// PR objc++/118586
+// { dg-do compile }
+// { dg-options "" }
+
+@interface Foo
++ (int) bar: (int) firstNumber, int secondNumber, ...;
+@end
+
+void
+baz (void)
+{
+  [Foo bar: 1, 2,
+#embed __FILE__
+   , -1];
+}
diff --git a/gcc/testsuite/obj-c++.dg/va-meth-2.mm 
b/gcc/testsuite/obj-c++.dg/va-meth-2.mm
new file mode 100644
index ..f5f096aef0b5
--- /dev/null
+++ b/gcc/testsuite/obj-c++.dg/va-meth-2.mm
@@ -0,0 +1,87 @@
+/* PR objc++/118586 */
+/* Based on objc/execute/va_method.m, by Nicola Pero */
+
+/* { dg-do run } */
+/* { dg-xfail-run-if "Needs OBJC2 ABI" { *-*-darwin* && { lp64 && { ! objc2 } 
} } { "-fnext-runtime" } { "" } } */
+#include "../objc-obj-c++-shared/TestsuiteObject.m"
+#include 
+#include 
+
+/* Test methods with "C-style" trailing arguments, with or without ellipsis. */
+
+@interface MathClass: TestsuiteObject
+/* sum positive numbers; -1 ends the list */
++ (int) sum: (int) firstNumber, int secondNumber, ...;
++ (int) prod: (int) firstNumber, int secondNumber, int thirdNumber;
++ (int) minimum: (int) firstNumber, ...;
+@end
+
+extern "C" int some_func(id self, SEL _cmd, int firstN, int secondN, int 
thirdN, ...) {
+  return firstN + secondN + thirdN;
+}
+
+@implementation MathClass
++ (int) sum: (int) firstNumber, int secondNumber, ...
+{
+  va_list ap;
+  int sum = 0, number = 0;
+
+  va_start (ap, secondNumber);
+  number = firstNumber + secondNumber;
+
+  while (number >= 0)
+{
+  sum += number;
+  number = va_arg (ap, int);
+}
+  
+  va_end (ap);
+
+  return sum;
+}
++ (int) prod: (int) firstNumber, int secondNumber, int thirdNumber {
+  return firstNumber * secondNumber * thirdNumber;
+}
++ (int) minimum: (int) firstNumber, ...
+{
+  va_list ap;
+  int minimum = 999, number = 0;
+  
+  va_start (ap, firstNumber);
+  number = firstNumber;
+  
+  while (number >= 0)
+{
+  minimum = (minimum < number ? minimum: number);
+  number = va_arg (ap, int);
+}
+  
+  va_end (ap);
+  
+  return minimum;
+}
+@end
+
+int main (void)
+{
+#define ONETOTEN 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
+  if ([MathClass sum: ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN,
+   ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN,
+   ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN,
+   ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN,
+   ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN, ONETOTEN,
+   ONETOTEN, ONETOTEN, -1] != 1650)
+abort ();
+  if ([MathClass prod: 4, 5, 6] != 120)
+abort ();
+#de

[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Factorisation initialisation subarray_descriptor

2025-01-21 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:b23e6b8c56a791a814a2528db8776a7206a4350f

commit b23e6b8c56a791a814a2528db8776a7206a4350f
Author: Mikael Morin 
Date:   Tue Jan 21 18:44:41 2025 +0100

Factorisation initialisation subarray_descriptor

Diff:
---
 gcc/fortran/trans-expr.cc | 151 --
 1 file changed, 78 insertions(+), 73 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index b7d1e3df0613..65b6cd8a4642 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -9418,17 +9418,90 @@ gfc_trans_subarray_assign (tree dest, gfc_component * 
cm, gfc_expr * expr)
 }
 
 
+static void
+set_subarray_descriptor (stmtblock_t *block, tree desc, tree value,
+gfc_expr *value_expr, gfc_expr *conv_arg)
+{
+  if (value_expr->expr_type != EXPR_VARIABLE)
+gfc_conv_descriptor_data_set (block, value,
+ null_pointer_node);
+
+  /* Obtain the array spec of full array references.  */
+  gfc_array_spec *as;
+  if (conv_arg)
+as = gfc_get_full_arrayspec_from_expr (conv_arg);
+  else
+as = gfc_get_full_arrayspec_from_expr (value_expr);
+
+  /* Shift the lbound and ubound of temporaries to being unity,
+ rather than zero, based. Always calculate the offset.  */
+  tree offset = gfc_conv_descriptor_offset_get (desc);
+  gfc_add_modify (block, offset, gfc_index_zero_node);
+  tree tmp2 = gfc_create_var (gfc_array_index_type, NULL);
+
+  for (int n = 0; n < value_expr->rank; n++)
+{
+  tree span;
+  tree lbound;
+
+  /* Obtain the correct lbound - ISO/IEC TR 15581:2001 page 9.
+TODO It looks as if gfc_conv_expr_descriptor should return
+the correct bounds and that the following should not be
+necessary.  This would simplify gfc_conv_intrinsic_bound
+as well.  */
+  if (as && as->lower[n])
+   {
+ gfc_se lbse;
+ gfc_init_se (&lbse, NULL);
+ gfc_conv_expr (&lbse, as->lower[n]);
+ gfc_add_block_to_block (block, &lbse.pre);
+ lbound = gfc_evaluate_now (lbse.expr, block);
+   }
+  else if (as && conv_arg)
+   {
+ tree tmp = gfc_get_symbol_decl (conv_arg->symtree->n.sym);
+ lbound = gfc_conv_descriptor_lbound_get (tmp,
+   gfc_rank_cst[n]);
+   }
+  else if (as)
+   lbound = gfc_conv_descriptor_lbound_get (desc,
+   gfc_rank_cst[n]);
+  else
+   lbound = gfc_index_one_node;
+
+  lbound = fold_convert (gfc_array_index_type, lbound);
+
+  /* Shift the bounds and set the offset accordingly.  */
+  tree tmp = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[n]);
+  span = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
+   tmp, gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[n]));
+  tmp = fold_build2_loc (input_location, PLUS_EXPR, gfc_array_index_type,
+span, lbound);
+  gfc_conv_descriptor_ubound_set (block, desc,
+ gfc_rank_cst[n], tmp);
+  gfc_conv_descriptor_lbound_set (block, desc,
+ gfc_rank_cst[n], lbound);
+
+  tmp = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
+gfc_conv_descriptor_lbound_get (desc,
+gfc_rank_cst[n]),
+gfc_conv_descriptor_stride_get (desc,
+gfc_rank_cst[n]));
+  gfc_add_modify (block, tmp2, tmp);
+  tmp = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
+offset, tmp2);
+  gfc_conv_descriptor_offset_set (block, desc, tmp);
+}
+}
+
+
 static tree
 gfc_trans_alloc_subarray_assign (tree dest, gfc_component * cm,
 gfc_expr * expr)
 {
   gfc_se se;
   stmtblock_t block;
-  tree offset;
-  int n;
   tree tmp;
-  tree tmp2;
-  gfc_array_spec *as;
   gfc_expr *arg = NULL;
 
   gfc_start_block (&block);
@@ -9489,10 +9562,6 @@ gfc_trans_alloc_subarray_assign (tree dest, 
gfc_component * cm,
   gfc_add_expr_to_block (&block, tmp);
   gfc_add_block_to_block (&block, &se.post);
 
-  if (expr->expr_type != EXPR_VARIABLE)
-gfc_conv_descriptor_data_set (&block, se.expr,
- null_pointer_node);
-
   /* We need to know if the argument of a conversion function is a
  variable, so that the correct lower bound can be used.  */
   if (expr->expr_type == EXPR_FUNCTION
@@ -9502,71 +9571,7 @@ gfc_trans_alloc_subarray_assign (tree dest, 
gfc_component * cm,
&& expr->value.function.actual->expr->expr_type == EXPR_VARIABLE)
 arg = expr->value.function.actual->expr;
 
-  /* Obtain the array spec of full array references.  */
-  if (arg)
-as = gfc_get_full_arrayspec_from_expr (arg);
-  else
-a

[gcc r15-7107] RISC-V: Add a new constraint to ensure that the vl of XTheadVector does not get a non-zero immediate

2025-01-21 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:3024b12f2cde5db3bf52b49b07e32ef3065929fb

commit r15-7107-g3024b12f2cde5db3bf52b49b07e32ef3065929fb
Author: Jin Ma 
Date:   Tue Jan 21 10:46:37 2025 -0700

RISC-V: Add a new constraint to ensure that the vl of XTheadVector does not 
get a non-zero immediate

Although we have handled the vl of XTheadVector correctly in the
expand phase and predicates, the results show that the work is
still insufficient.

In the curr_insn_transform function, the insn is transformed from:
(insn 69 67 225 12 (set (mem:RVVM8SF (reg/f:DI 218 [ _77 ]) [0  S[128, 128] 
A32])
(if_then_else:RVVM8SF (unspec:RVVMF4BI [
(const_vector:RVVMF4BI repeat [
(const_int 1 [0x1])
])
(reg:DI 209)
(const_int 0 [0])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_VPREDICATE)
(reg/v:RVVM8SF 143 [ _xx ])
(mem:RVVM8SF (reg/f:DI 218 [ _77 ]) [0  S[128, 128] A32])))
 (expr_list:REG_DEAD (reg/v:RVVM8SF 143 [ _xx ])
(nil)))
to
(insn 69 284 225 11 (set (mem:RVVM8SF (reg/f:DI 18 s2 [orig:218 _77 ] 
[218]) [0  S[128, 128] A32])
(if_then_else:RVVM8SF (unspec:RVVMF4BI [
(const_vector:RVVMF4BI repeat [
(const_int 1 [0x1])
])
(const_int 1 [0x1])
(const_int 0 [0])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_VPREDICATE)
(reg/v:RVVM8SF 104 v8 [orig:143 _xx ] [143])
(mem:RVVM8SF (reg/f:DI 18 s2 [orig:218 _77 ] [218]) [0  S[128, 
128] A32])))
 (nil))

Looking at the log for the reload pass, it is found that "Changing pseudo 
209 in
operand 3 of insn 69 on equiv 0x1".
It converts the vl operand in insn from the expected register(reg:DI 209) 
to the
constant 1(const_int 1 [0x1]).

This conversion occurs because, although the predicate for the vl operand is
restricted by "vector_length_operand" in the pattern, the constraint is 
still
"rK", which allows the transformation.

The issue is that changing the "rK" constraint to "rJ" for the constraint 
of vl
operand in the pattern would prevent this conversion, But unfortunately 
this will
conflict with RVV (RISC-V Vector Extension).

Based on the review's recommendations, the best solution for now is to 
create
a new constraint to distinguish between RVV and XTheadVector, which is 
exactly
what this patch does.

PR target/116593

gcc/ChangeLog:

* config/riscv/constraints.md (vl): New.
* config/riscv/thead-vector.md: Replacing rK with rvl.
* config/riscv/vector.md: Likewise.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/rvv.exp: Enable testsuite of XTheadVector.
* g++.target/riscv/rvv/xtheadvector/pr116593.C: New test.

Diff:
---
 gcc/config/riscv/constraints.md|   6 +
 gcc/config/riscv/thead-vector.md   |  18 +-
 gcc/config/riscv/vector.md | 476 ++---
 gcc/testsuite/g++.target/riscv/rvv/rvv.exp |   3 +
 .../g++.target/riscv/rvv/xtheadvector/pr116593.C   |  47 ++
 5 files changed, 303 insertions(+), 247 deletions(-)

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index f25975dc0208..ba3c6e6a4c44 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -209,6 +209,12 @@
   (and (match_code "const_vector")
(match_test "riscv_vector::const_vec_all_same_in_range_p (op, 0, 31)")))
 
+(define_constraint "vl"
+  "A uimm5 for Vector or zero for XTheadVector."
+  (and (match_code "const_int")
+   (ior (match_test "!TARGET_XTHEADVECTOR && satisfies_constraint_K (op)")
+   (match_test "TARGET_XTHEADVECTOR && satisfies_constraint_J (op)"
+
 (define_constraint "Wc0"
   "@internal
  A constraint that matches a vector of immediate all zeros."
diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md
index 5fe9ba08c4eb..5a02debdd207 100644
--- a/gcc/config/riscv/thead-vector.md
+++ b/gcc/config/riscv/thead-vector.md
@@ -108,7 +108,7 @@
   [(set (match_operand:V_VLS_VT 0 "reg_or_mem_operand"  "=vr,vr, m")
(unspec:V_VLS_VT
  [(match_operand:V_VLS_VT 1 "reg_or_mem_operand" " vr, m,vr")
-  (match_operand 2 "vector_length_operand"   " rK, rK, rK")
+  (match_operand 2 "vector_length_operand"   "rvl,rvl,rvl")
   (match_operand 3 "const_1_operand" "  i, i, i")
   (reg:SI VL_REGNUM)
   (reg:SI VTYPE_REGNUM)]
@@ -133,7 +133,7 @@
   [(set (match_operand:VB 0 "reg_or_mem

[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Suppression set_subarray_descriptor

2025-01-21 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:02608142b0ba9f84cafdfefc2e944250db84780d

commit 02608142b0ba9f84cafdfefc2e944250db84780d
Author: Mikael Morin 
Date:   Tue Jan 21 18:53:50 2025 +0100

Suppression set_subarray_descriptor

Diff:
---
 gcc/fortran/trans-expr.cc | 79 ---
 1 file changed, 79 deletions(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 65b6cd8a4642..7a384261dc06 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -9418,83 +9418,6 @@ gfc_trans_subarray_assign (tree dest, gfc_component * 
cm, gfc_expr * expr)
 }
 
 
-static void
-set_subarray_descriptor (stmtblock_t *block, tree desc, tree value,
-gfc_expr *value_expr, gfc_expr *conv_arg)
-{
-  if (value_expr->expr_type != EXPR_VARIABLE)
-gfc_conv_descriptor_data_set (block, value,
- null_pointer_node);
-
-  /* Obtain the array spec of full array references.  */
-  gfc_array_spec *as;
-  if (conv_arg)
-as = gfc_get_full_arrayspec_from_expr (conv_arg);
-  else
-as = gfc_get_full_arrayspec_from_expr (value_expr);
-
-  /* Shift the lbound and ubound of temporaries to being unity,
- rather than zero, based. Always calculate the offset.  */
-  tree offset = gfc_conv_descriptor_offset_get (desc);
-  gfc_add_modify (block, offset, gfc_index_zero_node);
-  tree tmp2 = gfc_create_var (gfc_array_index_type, NULL);
-
-  for (int n = 0; n < value_expr->rank; n++)
-{
-  tree span;
-  tree lbound;
-
-  /* Obtain the correct lbound - ISO/IEC TR 15581:2001 page 9.
-TODO It looks as if gfc_conv_expr_descriptor should return
-the correct bounds and that the following should not be
-necessary.  This would simplify gfc_conv_intrinsic_bound
-as well.  */
-  if (as && as->lower[n])
-   {
- gfc_se lbse;
- gfc_init_se (&lbse, NULL);
- gfc_conv_expr (&lbse, as->lower[n]);
- gfc_add_block_to_block (block, &lbse.pre);
- lbound = gfc_evaluate_now (lbse.expr, block);
-   }
-  else if (as && conv_arg)
-   {
- tree tmp = gfc_get_symbol_decl (conv_arg->symtree->n.sym);
- lbound = gfc_conv_descriptor_lbound_get (tmp,
-   gfc_rank_cst[n]);
-   }
-  else if (as)
-   lbound = gfc_conv_descriptor_lbound_get (desc,
-   gfc_rank_cst[n]);
-  else
-   lbound = gfc_index_one_node;
-
-  lbound = fold_convert (gfc_array_index_type, lbound);
-
-  /* Shift the bounds and set the offset accordingly.  */
-  tree tmp = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[n]);
-  span = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
-   tmp, gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[n]));
-  tmp = fold_build2_loc (input_location, PLUS_EXPR, gfc_array_index_type,
-span, lbound);
-  gfc_conv_descriptor_ubound_set (block, desc,
- gfc_rank_cst[n], tmp);
-  gfc_conv_descriptor_lbound_set (block, desc,
- gfc_rank_cst[n], lbound);
-
-  tmp = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
-gfc_conv_descriptor_lbound_get (desc,
-gfc_rank_cst[n]),
-gfc_conv_descriptor_stride_get (desc,
-gfc_rank_cst[n]));
-  gfc_add_modify (block, tmp2, tmp);
-  tmp = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
-offset, tmp2);
-  gfc_conv_descriptor_offset_set (block, desc, tmp);
-}
-}
-
-
 static tree
 gfc_trans_alloc_subarray_assign (tree dest, gfc_component * cm,
 gfc_expr * expr)
@@ -9571,8 +9494,6 @@ gfc_trans_alloc_subarray_assign (tree dest, gfc_component 
* cm,
&& expr->value.function.actual->expr->expr_type == EXPR_VARIABLE)
 arg = expr->value.function.actual->expr;
 
-  set_subarray_descriptor (&block, dest, se.expr, expr, arg);
-
   if (arg)
 {
   /* If a conversion expression has a null data pointer


[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Revert "Suppression set_subarray_descriptor"

2025-01-21 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:cbaf18c25835750f441ece64f011f36aff55f246

commit cbaf18c25835750f441ece64f011f36aff55f246
Author: Mikael Morin 
Date:   Tue Jan 21 20:05:36 2025 +0100

Revert "Suppression set_subarray_descriptor"

This reverts commit 02608142b0ba9f84cafdfefc2e944250db84780d.

Diff:
---
 gcc/fortran/trans-expr.cc | 79 +++
 1 file changed, 79 insertions(+)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 7a384261dc06..65b6cd8a4642 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -9418,6 +9418,83 @@ gfc_trans_subarray_assign (tree dest, gfc_component * 
cm, gfc_expr * expr)
 }
 
 
+static void
+set_subarray_descriptor (stmtblock_t *block, tree desc, tree value,
+gfc_expr *value_expr, gfc_expr *conv_arg)
+{
+  if (value_expr->expr_type != EXPR_VARIABLE)
+gfc_conv_descriptor_data_set (block, value,
+ null_pointer_node);
+
+  /* Obtain the array spec of full array references.  */
+  gfc_array_spec *as;
+  if (conv_arg)
+as = gfc_get_full_arrayspec_from_expr (conv_arg);
+  else
+as = gfc_get_full_arrayspec_from_expr (value_expr);
+
+  /* Shift the lbound and ubound of temporaries to being unity,
+ rather than zero, based. Always calculate the offset.  */
+  tree offset = gfc_conv_descriptor_offset_get (desc);
+  gfc_add_modify (block, offset, gfc_index_zero_node);
+  tree tmp2 = gfc_create_var (gfc_array_index_type, NULL);
+
+  for (int n = 0; n < value_expr->rank; n++)
+{
+  tree span;
+  tree lbound;
+
+  /* Obtain the correct lbound - ISO/IEC TR 15581:2001 page 9.
+TODO It looks as if gfc_conv_expr_descriptor should return
+the correct bounds and that the following should not be
+necessary.  This would simplify gfc_conv_intrinsic_bound
+as well.  */
+  if (as && as->lower[n])
+   {
+ gfc_se lbse;
+ gfc_init_se (&lbse, NULL);
+ gfc_conv_expr (&lbse, as->lower[n]);
+ gfc_add_block_to_block (block, &lbse.pre);
+ lbound = gfc_evaluate_now (lbse.expr, block);
+   }
+  else if (as && conv_arg)
+   {
+ tree tmp = gfc_get_symbol_decl (conv_arg->symtree->n.sym);
+ lbound = gfc_conv_descriptor_lbound_get (tmp,
+   gfc_rank_cst[n]);
+   }
+  else if (as)
+   lbound = gfc_conv_descriptor_lbound_get (desc,
+   gfc_rank_cst[n]);
+  else
+   lbound = gfc_index_one_node;
+
+  lbound = fold_convert (gfc_array_index_type, lbound);
+
+  /* Shift the bounds and set the offset accordingly.  */
+  tree tmp = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[n]);
+  span = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
+   tmp, gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[n]));
+  tmp = fold_build2_loc (input_location, PLUS_EXPR, gfc_array_index_type,
+span, lbound);
+  gfc_conv_descriptor_ubound_set (block, desc,
+ gfc_rank_cst[n], tmp);
+  gfc_conv_descriptor_lbound_set (block, desc,
+ gfc_rank_cst[n], lbound);
+
+  tmp = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
+gfc_conv_descriptor_lbound_get (desc,
+gfc_rank_cst[n]),
+gfc_conv_descriptor_stride_get (desc,
+gfc_rank_cst[n]));
+  gfc_add_modify (block, tmp2, tmp);
+  tmp = fold_build2_loc (input_location, MINUS_EXPR, gfc_array_index_type,
+offset, tmp2);
+  gfc_conv_descriptor_offset_set (block, desc, tmp);
+}
+}
+
+
 static tree
 gfc_trans_alloc_subarray_assign (tree dest, gfc_component * cm,
 gfc_expr * expr)
@@ -9494,6 +9571,8 @@ gfc_trans_alloc_subarray_assign (tree dest, gfc_component 
* cm,
&& expr->value.function.actual->expr->expr_type == EXPR_VARIABLE)
 arg = expr->value.function.actual->expr;
 
+  set_subarray_descriptor (&block, dest, se.expr, expr, arg);
+
   if (arg)
 {
   /* If a conversion expression has a null data pointer


[gcc(refs/users/mikael/heads/refactor_descriptor_v01)] Factorisation shift descriptor

2025-01-21 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:8fb40fe6608990cc67e827e9b9f2bf043cdce5cc

commit 8fb40fe6608990cc67e827e9b9f2bf043cdce5cc
Author: Mikael Morin 
Date:   Tue Jan 21 22:27:02 2025 +0100

Factorisation shift descriptor

Diff:
---
 gcc/fortran/trans-array.cc | 117 -
 gcc/fortran/trans-array.h  |   1 +
 gcc/fortran/trans-expr.cc  |  82 ++-
 3 files changed, 100 insertions(+), 100 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index b05f69fdd874..7afa29746e08 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -1198,16 +1198,52 @@ conv_shift_descriptor_lbound (stmtblock_t* block, tree 
desc, int dim,
 }
 
 
-class lb_info
+class lb_info_base
 {
 public:
+  virtual tree lower_bound (stmtblock_t *block, int dim) const = 0;
+};
+
+
+class lb_info : public lb_info_base
+{
+public:
+  using lb_info_base::lower_bound;
   virtual gfc_expr *lower_bound (int dim) const = 0;
+  virtual tree lower_bound (stmtblock_t *block, int dim) const;
 };
 
 
+tree
+lb_info::lower_bound (stmtblock_t *block, int dim) const
+{
+  gfc_expr *lb_expr = lower_bound(dim);
+
+  if (lb_expr == nullptr)
+return gfc_index_one_node;
+  else
+{
+  gfc_se lb_se;
+
+  gfc_init_se (&lb_se, nullptr);
+  gfc_conv_expr (&lb_se, lb_expr);
+
+  gfc_add_block_to_block (block, &lb_se.pre);
+  tree lb_var = gfc_create_var (gfc_array_index_type, "lower_bound");
+  gfc_add_modify (block, lb_var,
+ fold_convert (gfc_array_index_type, lb_se.expr));
+  gfc_add_block_to_block (block, &lb_se.post);
+
+  return lb_var;
+}
+}
+
+
+
 class unset_lb : public lb_info
 {
 public:
+  using lb_info::lower_bound;
   virtual gfc_expr *lower_bound (int) const { return nullptr; }
 };
 
@@ -1218,6 +1254,7 @@ class defined_lb : public lb_info
   gfc_expr * const * lower_bounds;
 
 public:
+  using lb_info::lower_bound;
   defined_lb (int arg_rank, gfc_expr * const 
arg_lower_bounds[GFC_MAX_DIMENSIONS])
 : rank(arg_rank), lower_bounds(arg_lower_bounds) { }
   virtual gfc_expr *lower_bound (int dim) const { return lower_bounds[dim]; }
@@ -1226,7 +1263,7 @@ public:
 
 static void
 conv_shift_descriptor (stmtblock_t *block, tree desc, int rank,
-  const lb_info &info)
+  const lb_info_base &info)
 {
   tree tmp = gfc_conv_descriptor_offset_get (desc);
   tree offset_var = gfc_create_var (TREE_TYPE (tmp), "offset");
@@ -1235,26 +1272,7 @@ conv_shift_descriptor (stmtblock_t *block, tree desc, 
int rank,
   /* Apply a shift of the lbound when supplied.  */
   for (int dim = 0; dim < rank; ++dim)
 {
-  gfc_expr *lb_expr = info.lower_bound(dim);
-
-  tree lower_bound;
-  if (lb_expr == nullptr)
-   lower_bound = gfc_index_one_node;
-  else
-   {
- gfc_se lb_se;
-
- gfc_init_se (&lb_se, nullptr);
- gfc_conv_expr (&lb_se, lb_expr);
-
- gfc_add_block_to_block (block, &lb_se.pre);
- tree lb_var = gfc_create_var (TREE_TYPE (lb_se.expr), "lower_bound");
- gfc_add_modify (block, lb_var, lb_se.expr);
- gfc_add_block_to_block (block, &lb_se.post);
-
- lower_bound = lb_var;
-   }
-
+  tree lower_bound = info.lower_bound (block, dim);
   conv_shift_descriptor_lbound (block, desc, dim, lower_bound, offset_var);
 }
 
@@ -1337,6 +1355,61 @@ gfc_conv_shift_descriptor (stmtblock_t *block, tree desc,
 }
 
 
+class dataref_lb : public lb_info_base
+{
+  gfc_array_spec *as;
+  gfc_expr *conv_arg;
+  tree desc;
+
+public:
+  dataref_lb (gfc_array_spec *arg_as, gfc_expr *arg_conv_arg, tree arg_desc)
+: as(arg_as), conv_arg (arg_conv_arg), desc (arg_desc)
+  {}
+  virtual tree lower_bound (stmtblock_t *block, int dim) const;
+};
+
+
+tree
+dataref_lb::lower_bound (stmtblock_t *block, int dim) const
+{
+  tree lbound;
+  if (as && as->lower[dim])
+{
+  gfc_se lbse;
+  gfc_init_se (&lbse, NULL);
+  gfc_conv_expr (&lbse, as->lower[dim]);
+  gfc_add_block_to_block (block, &lbse.pre);
+  lbound = gfc_evaluate_now (lbse.expr, block);
+}
+  else if (as && conv_arg)
+{
+  tree tmp = gfc_get_symbol_decl (conv_arg->symtree->n.sym);
+  lbound = gfc_conv_descriptor_lbound_get (tmp, gfc_rank_cst[dim]);
+}
+  else if (as)
+lbound = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[dim]);
+  else
+lbound = gfc_index_one_node;
+
+  return fold_convert (gfc_array_index_type, lbound);
+}
+
+
+void
+gfc_conv_shift_descriptor_subarray (stmtblock_t *block, tree desc,
+   gfc_expr *value_expr, gfc_expr *conv_arg)
+{
+  /* Obtain the array spec of full array references.  */
+  gfc_array_spec *as;
+  if (conv_arg)
+as = gfc_get_full_arrayspec_from_expr (conv_arg);
+  else
+as = gfc_get_full_arrayspec_from_expr (value_expr);
+
+  conv_shift_descriptor (block, desc, value_expr->rank, dataref_lb (as, 
conv

[gcc r15-7092] LoongArch: Implement target attribute.

2025-01-21 Thread LuluCheng via Gcc-cvs
https://gcc.gnu.org/g:c01ad91886527f685e67037ab4d36a6b0cd07c08

commit r15-7092-gc01ad91886527f685e67037ab4d36a6b0cd07c08
Author: Lulu Cheng 
Date:   Tue Jan 7 11:42:25 2025 +0800

LoongArch: Implement target attribute.

Add function attributes support for LoongArch.

Currently, the following items are supported:

__attribute__ ((target ("{no-}strict-align")))
__attribute__ ((target ("cmodel=")))
__attribute__ ((target ("arch=")))
__attribute__ ((target ("tune=")))
__attribute__ ((target ("{no-}lsx")))
__attribute__ ((target ("{no-}lasx")))

This implementation is derived from AArch64.

gcc/ChangeLog:

* attr-urls.def: Regenerate.
* config.gcc: Add loongarch-target-attr.o to extra_objs.
* config/loongarch/loongarch-protos.h
(loongarch_option_valid_attribute_p): Function declaration.
(loongarch_option_override_internal): Likewise.
* config/loongarch/loongarch.cc
(loongarch_option_override_internal): Delete the modifications
to target_option_default_node and target_option_current_node.
(loongarch_set_current_function): Add annotation information.
(loongarch_option_override): add assignment operations to
target_option_default_node and target_option_current_node.
(TARGET_OPTION_VALID_ATTRIBUTE_P): Define.
* config/loongarch/t-loongarch: Add compilation of target file
loongarch-target-attr.o.
* doc/extend.texi: Add description information of LoongArch
Function Attributes.
* config/loongarch/loongarch-target-attr.cc: New file.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/arch-func-attr-1.c: New test.
* gcc.target/loongarch/cmodel-func-attr-1.c: New test.
* gcc.target/loongarch/lasx-func-attr-1.c: New test.
* gcc.target/loongarch/lasx-func-attr-2.c: New test.
* gcc.target/loongarch/lsx-func-attr-1.c: New test.
* gcc.target/loongarch/lsx-func-attr-2.c: New test.
* gcc.target/loongarch/strict_align-func-attr-1.c: New test.
* gcc.target/loongarch/strict_align-func-attr-2.c: New test.
* gcc.target/loongarch/vector-func-attr-1.c: New test.
* gcc.target/loongarch/attr-check-error-message.c: New test.

Diff:
---
 gcc/attr-urls.def  |   6 +
 gcc/config.gcc |   2 +-
 gcc/config/loongarch/loongarch-protos.h|   2 +
 gcc/config/loongarch/loongarch-target-attr.cc  | 413 +
 gcc/config/loongarch/loongarch.cc  |  26 +-
 gcc/config/loongarch/t-loongarch   |   6 +
 gcc/doc/extend.texi|  75 
 .../gcc.target/loongarch/arch-func-attr-1.c|  16 +
 .../loongarch/attr-check-error-message.c   |  30 ++
 .../gcc.target/loongarch/cmodel-func-attr-1.c  |  17 +
 .../gcc.target/loongarch/lasx-func-attr-1.c|  15 +
 .../gcc.target/loongarch/lasx-func-attr-2.c|  12 +
 .../gcc.target/loongarch/lsx-func-attr-1.c |  15 +
 .../gcc.target/loongarch/lsx-func-attr-2.c |  12 +
 .../loongarch/strict_align-func-attr-1.c   |  17 +
 .../loongarch/strict_align-func-attr-2.c   |  17 +
 .../gcc.target/loongarch/vector-func-attr-1.c  |  15 +
 17 files changed, 691 insertions(+), 5 deletions(-)

diff --git a/gcc/attr-urls.def b/gcc/attr-urls.def
index e8417cff43c3..0d27400d218a 100644
--- a/gcc/attr-urls.def
+++ b/gcc/attr-urls.def
@@ -18,6 +18,7 @@ const attr_url_entry function_attrs[] = {
  { "amdgpu_hsa_kernel", 
"gcc/AMD-GCN-Function-Attributes.html#index-amdgpu_005fhsa_005fkernel-function-attribute_002c-AMD-GCN",
 "AMD GCN", 17},
  { "arch=", 
"gcc/AArch64-Function-Attributes.html#index-arch_003d-function-attribute_002c-AArch64",
 "AArch64", 5},
  { "arch=", 
"gcc/ARM-Function-Attributes.html#index-arch_003d-function-attribute_002c-ARM", 
"ARM", 5},
+ { "arch=", 
"gcc/LoongArch-Function-Attributes.html#index-arch_003d-function-attribute_002c-LoongArch",
 "LoongArch", 5},
  { "arch=", 
"gcc/RISC-V-Function-Attributes.html#index-arch_003d-function-attribute_002c-RISC-V",
 "RISC-V", 5},
  { "artificial", 
"gcc/Common-Function-Attributes.html#index-artificial-function-attribute", "", 
10},
  { "assume_aligned", 
"gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute",
 "", 14},
@@ -29,6 +30,7 @@ const attr_url_entry function_attrs[] = {
  { "cdecl", 
"gcc/x86-Function-Attributes.html#index-cdecl-function-attribute_002c-x86-32", 
"x86-32", 5},
  { "cf_check", 
"gcc/x86-Function-Attributes.html#index-cf_005fcheck-function-attribute_002c-x86",
 "x86", 8},
  { "cmodel=", 
"gcc/AArch64-Function-Attributes.html#index-cmodel_003d-function-attribute_002c-AAr

[gcc r15-7093] LoongArch: Implement target pragma.

2025-01-21 Thread LuluCheng via Gcc-cvs
https://gcc.gnu.org/g:4d2a1c292611514dfa5de4cbdb8a426eb437c964

commit r15-7093-g4d2a1c292611514dfa5de4cbdb8a426eb437c964
Author: Lulu Cheng 
Date:   Tue Jan 7 12:00:12 2025 +0800

LoongArch: Implement target pragma.

The target pragmas defined correspond to the target function attributes.

This implementation is derived from AArch64.

gcc/ChangeLog:

* config/loongarch/loongarch-protos.h
(loongarch_reset_previous_fndecl):  Add function declaration.
(loongarch_save_restore_target_globals): Likewise.
(loongarch_register_pragmas): Likewise.
* config/loongarch/loongarch-target-attr.cc
(loongarch_option_valid_attribute_p): Optimize the processing
of attributes.
(loongarch_pragma_target_parse): New functions.
(loongarch_register_pragmas): Likewise.
* config/loongarch/loongarch.cc
(loongarch_reset_previous_fndecl): New functions.
(loongarch_set_current_function): When the old_tree is the same
as the new_tree, the rules for using registers, etc.,
are set according to the option values to ensure that the
pragma can be processed correctly.
* config/loongarch/loongarch.h (REGISTER_TARGET_PRAGMAS):
Define macro.
* doc/extend.texi: Supplemental Documentation.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/arch-func-attr-1.c: Add '#pragma'.
* gcc.target/loongarch/cmodel-func-attr-1.c: Likewise.
* gcc.target/loongarch/lasx-func-attr-1.c: Likewise.
* gcc.target/loongarch/lsx-func-attr-1.c: Likewise.
* gcc.target/loongarch/strict_align-func-attr-1.c: Likewise.
* gcc.target/loongarch/strict_align-func-attr-2.c: Likewise.
* gcc.target/loongarch/vector-func-attr-1.c: Likewise.
* gcc.target/loongarch/arch-pragma-attr-1.c: Likewise.
* gcc.target/loongarch/cmodel-pragma-attr-1.c: New test.
* gcc.target/loongarch/lasx-pragma-attr-1.c: New test.
* gcc.target/loongarch/lasx-pragma-attr-2.c: New test.
* gcc.target/loongarch/lsx-pragma-attr-1.c: New test.
* gcc.target/loongarch/lsx-pragma-attr-2.c: New test.
* gcc.target/loongarch/strict_align-pragma-attr-1.c: New test.
* gcc.target/loongarch/strict_align-pragma-attr-2.c: New test.
* gcc.target/loongarch/vector-pragma-attr-1.c: New test.
* gcc.target/loongarch/pragma-push-pop.c: New test.

Diff:
---
 gcc/config/loongarch/loongarch-protos.h|  3 ++
 gcc/config/loongarch/loongarch-target-attr.cc  | 59 ++
 gcc/config/loongarch/loongarch.cc  | 19 ---
 gcc/config/loongarch/loongarch.h   |  2 +
 gcc/doc/extend.texi| 13 +
 .../gcc.target/loongarch/arch-func-attr-1.c|  6 ++-
 .../gcc.target/loongarch/arch-pragma-attr-1.c  |  7 +++
 .../gcc.target/loongarch/cmodel-func-attr-1.c  |  4 ++
 .../gcc.target/loongarch/cmodel-pragma-attr-1.c|  7 +++
 .../gcc.target/loongarch/lasx-func-attr-1.c|  4 ++
 .../gcc.target/loongarch/lasx-pragma-attr-1.c  |  7 +++
 .../gcc.target/loongarch/lasx-pragma-attr-2.c  | 12 +
 .../gcc.target/loongarch/lsx-func-attr-1.c |  4 ++
 .../gcc.target/loongarch/lsx-pragma-attr-1.c   |  7 +++
 .../gcc.target/loongarch/lsx-pragma-attr-2.c   | 12 +
 .../gcc.target/loongarch/pragma-push-pop.c | 22 
 .../loongarch/strict_align-func-attr-1.c   |  4 ++
 .../loongarch/strict_align-func-attr-2.c   |  4 ++
 .../loongarch/strict_align-pragma-attr-1.c |  7 +++
 .../loongarch/strict_align-pragma-attr-2.c |  7 +++
 .../gcc.target/loongarch/vector-func-attr-1.c  |  4 ++
 .../gcc.target/loongarch/vector-pragma-attr-1.c|  7 +++
 22 files changed, 213 insertions(+), 8 deletions(-)

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index dba52beac00a..b99f949a004e 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -216,4 +216,7 @@ extern bool loongarch_explicit_relocs_p (enum 
loongarch_symbol_type);
 extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type);
 extern bool loongarch_option_valid_attribute_p (tree, tree, tree, int);
 extern void loongarch_option_override_internal (struct loongarch_target *, 
struct gcc_options *, struct gcc_options *);
+extern void loongarch_reset_previous_fndecl (void);
+extern void loongarch_save_restore_target_globals (tree new_tree);
+extern void loongarch_register_pragmas (void);
 #endif /* ! GCC_LOONGARCH_PROTOS_H */
diff --git a/gcc/config/loongarch/loongarch-target-attr.cc 
b/gcc/config/loongarch/loongarch-target-attr.cc
index 6bb1e6b753cb..cee7031ca1e7 100644
--- a/gcc/

[gcc r15-7095] middle-end: use ncopies both when registering and reading masks [PR118273]

2025-01-21 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:1dd79f44dfb64b441f3d6c64e7f909d73441bd05

commit r15-7095-g1dd79f44dfb64b441f3d6c64e7f909d73441bd05
Author: Tamar Christina 
Date:   Tue Jan 21 10:29:08 2025 +

middle-end: use ncopies both when registering and reading masks [PR118273]

When registering masks for SIMD clone we end up using nmasks instead of
nvectors where nmasks seems to compute the number of input masks required 
for
the call given the current simdlen.

This is however wrong as vect_record_loop_mask wants to know how many masks 
you
want to create from the given vectype. i.e. which level of rgroups to 
create.

This ends up mismatching with vect_get_loop_mask which uses nvectors and if 
the
return type is narrower than the input types there will be a mismatch which
causes us to try to read from the given rgroup.  It only happens to work if 
the
function had an additional argument that's wider or if all elements and 
return
types are the same size.

This fixes it by using nvectors during registration as well, which has 
already
taken into account SLP and VF.

gcc/ChangeLog:

PR middle-end/118273
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Use nvectors 
when
doing mask registrations.

gcc/testsuite/ChangeLog:

PR middle-end/118273
* gcc.target/aarch64/vect-simd-clone-4.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c | 15 +++
 gcc/tree-vect-stmts.cc   | 11 +++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c 
b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c
new file mode 100644
index ..9b52af703933
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile }  */
+/* { dg-options "-std=c99" } */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+#pragma GCC target ("+sve")
+
+extern char __attribute__ ((simd, const)) fn3 (short);
+void test_fn3 (float *a, float *b, double *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+a[i] = fn3 (c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn3\n} } } */
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 833029fcb001..21fb5cf5bd47 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4561,14 +4561,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
case SIMD_CLONE_ARG_TYPE_MASK:
  if (loop_vinfo
  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-   {
- unsigned nmasks
-   = exact_div (ncopies * bestn->simdclone->simdlen,
-TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
- vect_record_loop_mask (loop_vinfo,
-&LOOP_VINFO_MASKS (loop_vinfo),
-nmasks, vectype, op);
-   }
+   vect_record_loop_mask (loop_vinfo,
+  &LOOP_VINFO_MASKS (loop_vinfo),
+  ncopies, vectype, op);
 
  break;
}


[gcc r15-7096] c++: Don't ICE in build_class_member_access_expr during error recovery [PR118225]

2025-01-21 Thread Simon Martin via Gcc-cvs
https://gcc.gnu.org/g:4e4c378ac1f923a310fa31be85ed8c0c50e9f5ef

commit r15-7096-g4e4c378ac1f923a310fa31be85ed8c0c50e9f5ef
Author: Simon Martin 
Date:   Tue Jan 21 13:31:41 2025 +0100

c++: Don't ICE in build_class_member_access_expr during error recovery 
[PR118225]

The invalid case in this PR trips on an assertion in
build_class_member_access_expr that build_base_path would never return
an error_mark_node, which is actually incorrect if the object involves a
tree with an error_mark_node DECL_INITIAL, like here.

This patch changes the assert to not fire if an error has been reported.

PR c++/118225

gcc/cp/ChangeLog:

* typeck.cc (build_class_member_access_expr): Let errors that
that have been reported go through.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-ice21.C: New test.

Diff:
---
 gcc/cp/typeck.cc |  2 +-
 gcc/testsuite/g++.dg/cpp0x/constexpr-ice21.C | 17 +
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 3e0d71102abd..6b549809243a 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -2980,7 +2980,7 @@ build_class_member_access_expr (cp_expr object, tree 
member,
/*nonnull=*/1, complain);
  /* If we found the base successfully then we should be able
 to convert to it successfully.  */
- gcc_assert (object != error_mark_node);
+ gcc_assert (object != error_mark_node || seen_error ());
}
 
   /* If MEMBER is from an anonymous aggregate, we have converted
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-ice21.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-ice21.C
new file mode 100644
index ..46273654f240
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-ice21.C
@@ -0,0 +1,17 @@
+// PR c++/118225
+// { dg-do "compile" { target c++11} }
+
+struct NoMut1 { int a, b; };
+struct NoMut3 : virtual NoMut1 {
+  constexpr NoMut3(int a, int b) // { dg-error "virtual base" "" { target 
c++23 } }
+: NoMut1{a, b}
+  {} // { dg-error "virtual base" }
+};
+void mutable_subobjects() {
+  constexpr NoMut3 nm3 = {1, 2}; // { dg-error "call to non" }
+  struct A {
+void f() {
+  static_assert(nm3.a == 1, ""); // { dg-error "local variable" }
+}
+  };
+}


[gcc r15-7100] [RISC-V][PR target/116256] Fix incorrect return value for predicate

2025-01-21 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:61995d86b66b39698c0dfbbab8d8dca579b42d00

commit r15-7100-g61995d86b66b39698c0dfbbab8d8dca579b42d00
Author: Jeff Law 
Date:   Tue Jan 21 06:56:27 2025 -0700

[RISC-V][PR target/116256] Fix incorrect return value for predicate

Another bug found while chasing paths to fix the remaining issues in 
pr116256.

This case is sometimes benign when the optimizers are enabled.  But could 
show
up in a -O0 compile with some patterns I was playing around with.

Basically we have a predicate that is meant to return true if bits set in 
the
operand are all consecutive.

That predicate would return the wrong value when presented with (const_int 
0)
indicating it had a run of on bits when obviously no bits are on 😉

It's pretty obvious once you look at the implementation.

if (exact_log2 ((val >> ctz_hwi (val)) + 1) < 0)
  return false
return true;

The right shift is always going to produce 0.  0 + 1 = 1 which is a power 
of 2.
So exact_log2 returns 0 and we get a true result rather than a false result.

The fix is trivial.  "<=".  While inside we might as well fix the 
formatting.

Tested on rv32 and rv64 in my tester.  Waiting on upstream pre-commit 
testing
to render a verdict.

PR target/116256
gcc/
* config/riscv/predicates.md (consecutive_bits_operand): Properly
handle (const_int 0).

Diff:
---
 gcc/config/riscv/predicates.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 1f67d30be9d9..f26bafcc688b 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -423,11 +423,11 @@
 (define_predicate "consecutive_bits_operand"
   (match_code "const_int")
 {
-   unsigned HOST_WIDE_INT val = UINTVAL (op);
-   if (exact_log2 ((val >> ctz_hwi (val)) + 1) < 0)
-   return false;
+  unsigned HOST_WIDE_INT val = UINTVAL (op);
+  if (exact_log2 ((val >> ctz_hwi (val)) + 1) <= 0)
+return false;
 
-   return true;
+  return true;
 })
 
 (define_predicate "const_two_s12"


[gcc r15-7099] Regenerate aarch64.opt.urls

2025-01-21 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:1a3a5f5db6603f9212d421bfc48aa7e2922efd00

commit r15-7099-g1a3a5f5db6603f9212d421bfc48aa7e2922efd00
Author: Alfie Richards 
Date:   Tue Jan 21 13:42:05 2025 +

Regenerate aarch64.opt.urls

This updates aarch64.opt.urls after my patch earlier today.

Pushing directly as it's an obvious fix.

gcc/ChangeLog:

* config/aarch64/aarch64.opt.urls: Regenerate

Diff:
---
 gcc/config/aarch64/aarch64.opt.urls | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.opt.urls 
b/gcc/config/aarch64/aarch64.opt.urls
index 4fa903843784..7ec14a943817 100644
--- a/gcc/config/aarch64/aarch64.opt.urls
+++ b/gcc/config/aarch64/aarch64.opt.urls
@@ -92,3 +92,6 @@ 
UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg)
 mstack-protector-guard-offset=
 UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset)
 
+Wexperimental-fmv-target
+UrlSuffix(gcc/AArch64-Options.html#index-Wexperimental-fmv-target)
+


[gcc r15-7101] MAINTAINERS: add myself to write after approval

2025-01-21 Thread Alfie Richards via Gcc-cvs
https://gcc.gnu.org/g:ddc65177bb5a799acb8b17181ca8275acc462a81

commit r15-7101-gddc65177bb5a799acb8b17181ca8275acc462a81
Author: Alfie Richards 
Date:   Tue Jan 21 13:53:29 2025 +

MAINTAINERS: add myself to write after approval

ChangeLog:

* MAINTAINERS: Add myself to write after approval.

Diff:
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 256a03957d59..44367b27b415 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -749,6 +749,7 @@ Fritz Reese foreese 

 Volker Reichelt reichelt
 Joern Rennecke  amylaar 
 Bernhard Reutner-Fischeraldot   
+Alfie Richards  -   
 Torvald Riegel  torvald 
 Tom Rix -   
 Pierre-Marie de Rodat   pmderodat   


[gcc r15-7118] c++: 'this' capture clobbered during recursive inst [PR116756]

2025-01-21 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:303cc73182db5ed367b184df813cd50864c55f83

commit r15-7118-g303cc73182db5ed367b184df813cd50864c55f83
Author: Patrick Palka 
Date:   Tue Jan 21 21:57:02 2025 -0500

c++: 'this' capture clobbered during recursive inst [PR116756]

Here during instantiation of generic lambda's op() [with I = 0] we
substitute into the call self(self, cst<1>{}) which requires recursive
instantiation of the same op() [with I = 1] (which isn't deferred due to
lambda's deduced return type.  During this recursive instantiation, the
DECL_EXPR case of tsubst_stmt clobbers LAMBDA_EXPR_THIS_CAPTURE to point
to the child op()'s specialized capture proxy instead of the parent's,
and the original value is never restored.

So later when substituting into the openSeries call in the parent op()
maybe_resolve_dummy uses the 'this' proxy belonging to the child op(),
which leads to a context mismatch ICE during gimplification of the
proxy.

An earlier version of this patch fixed this by making instantiate_body
save/restore LAMBDA_EXPR_THIS_CAPTURE during a lambda op() instantiation.
But it seems cleaner to avoid overwriting LAMBDA_EXPR_THIS_CAPTURE in the
first place by making it point to the non-specialized capture proxy, and
instead call retrieve_local_specialization as needed, which is what this
patch implements.  It's natural then to not clear LAMBDA_EXPR_THIS_CAPTURE
after parsing/regenerating a lambda.

PR c++/116756

gcc/cp/ChangeLog:

* lambda.cc (lambda_expr_this_capture): Call
retrieve_local_specialization on the result of
LAMBDA_EXPR_THIS_CAPTURE for a generic lambda.
* parser.cc (cp_parser_lambda_expression): Don't clear
LAMBDA_EXPR_THIS_CAPTURE.
* pt.cc (tsubst_stmt) : Don't overwrite
LAMBDA_EXPR_THIS_CAPTURE with the specialized capture.
(tsubst_lambda_expr): Don't clear LAMBDA_EXPR_THIS_CAPTURE
afterward.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/constexpr-if-lambda7.C: New test.

Reviewed-by: Jason Merrill 

Diff:
---
 gcc/cp/lambda.cc  |  6 ++
 gcc/cp/parser.cc  |  3 ---
 gcc/cp/pt.cc  | 11 +--
 gcc/testsuite/g++.dg/cpp1z/constexpr-if-lambda7.C | 24 +++
 4 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc
index be8a0fe01cba..4ee8f6c745da 100644
--- a/gcc/cp/lambda.cc
+++ b/gcc/cp/lambda.cc
@@ -785,6 +785,12 @@ lambda_expr_this_capture (tree lambda, int add_capture_p)
   tree result;
 
   tree this_capture = LAMBDA_EXPR_THIS_CAPTURE (lambda);
+  if (this_capture)
+if (tree spec = retrieve_local_specialization (this_capture))
+  {
+   gcc_checking_assert (generic_lambda_fn_p (lambda_function (lambda)));
+   this_capture = spec;
+  }
 
   /* In unevaluated context this isn't an odr-use, so don't capture.  */
   if (cp_unevaluated_operand)
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 398fd8538e2f..a9eddd1a6da5 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -11825,9 +11825,6 @@ cp_parser_lambda_expression (cp_parser* parser)
 parser->omp_array_section_p = saved_omp_array_section_p;
   }
 
-  /* This field is only used during parsing of the lambda.  */
-  LAMBDA_EXPR_THIS_CAPTURE (lambda_expr) = NULL_TREE;
-
   /* This lambda shouldn't have any proxies left at this point.  */
   gcc_assert (LAMBDA_EXPR_PENDING_PROXIES (lambda_expr) == NULL);
   /* And now that we're done, push proxies for an enclosing lambda.  */
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 6a5d65026157..28e05490d06c 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -18940,12 +18940,6 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
else if (is_capture_proxy (DECL_EXPR_DECL (t)))
  {
DECL_CONTEXT (decl) = current_function_decl;
-   if (DECL_NAME (decl) == this_identifier)
- {
-   tree lam = DECL_CONTEXT (current_function_decl);
-   lam = CLASSTYPE_LAMBDA_EXPR (lam);
-   LAMBDA_EXPR_THIS_CAPTURE (lam) = decl;
- }
insert_capture_proxy (decl);
  }
else if (DECL_IMPLICIT_TYPEDEF_P (t))
@@ -20148,8 +20142,7 @@ tsubst_lambda_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
 LAMBDA_EXPR_REGEN_INFO (r)
   = build_template_info (t, preserve_args (args));
 
-  gcc_assert (LAMBDA_EXPR_THIS_CAPTURE (t) == NULL_TREE
- && LAMBDA_EXPR_PENDING_PROXIES (t) == NULL);
+  gcc_assert (LAMBDA_EXPR_PENDING_PROXIES (t) == NULL);
 
   vec* field_packs = NULL;
   unsigned name_independent_cnt = 0;
@@ -20364,8 +20357,6 @@ tsub

[gcc r15-7116] Revert "[PATCH 1/2] RISC-V:Add intrinsic support for the CMOs extensions"

2025-01-21 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:3f641a8f1d1fafc0c6531aee185d0e74998987d5

commit r15-7116-g3f641a8f1d1fafc0c6531aee185d0e74998987d5
Author: Jeff Law 
Date:   Tue Jan 21 16:21:44 2025 -0700

Revert "[PATCH 1/2] RISC-V:Add intrinsic support for the CMOs extensions"

This reverts commit d2c8548e0ce51dac6bc51d37236c50f98fca82f0.

Diff:
---
 gcc/config.gcc   |  2 +-
 gcc/config/riscv/riscv_cmo.h | 84 
 2 files changed, 1 insertion(+), 85 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 6f9f7313e132..9e167f7f00d5 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -555,7 +555,7 @@ riscv*)
extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o 
sifive-vector-builtins-bases.o"
extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o"
d_target_objs="riscv-d.o"
-   extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
riscv_th_vector.h riscv_cmo.h sifive_vector.h"
+   extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
riscv_th_vector.h sifive_vector.h"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.h"
;;
diff --git a/gcc/config/riscv/riscv_cmo.h b/gcc/config/riscv/riscv_cmo.h
deleted file mode 100644
index 5b9b4536a598..
--- a/gcc/config/riscv/riscv_cmo.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* RISC-V CMO Extension intrinsics include file.
-   Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   .  */
-
-#ifndef __RISCV_CMO_H
-#define __RISCV_CMO_H
-
-#if defined (__riscv_zicbom)
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__riscv_cmo_clean (void *addr)
-{
-__builtin_riscv_zicbom_cbo_clean (addr);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__riscv_cmo_flush (void *addr)
-{
-__builtin_riscv_zicbom_cbo_flush (addr);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__riscv_cmo_inval (void *addr)
-{
-__builtin_riscv_zicbom_cbo_inval (addr);
-}
-
-#endif // __riscv_zicbom
-
-#if defined (__riscv_zicbop)
-
-# define rnum 1
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__riscv_cmo_prefetch (void *addr, const int vs1, const int vs2)
-{
-__builtin_prefetch (addr,vs1,vs2);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__riscv_cmo_prefetchi ()
-{
-return __builtin_riscv_zicbop_cbo_prefetchi (rnum);
-}
-
-#endif // __riscv_zicbop
-
-#if defined (__riscv_zicboz)
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__riscv_cmo_zero (void *addr)
-{
-__builtin_riscv_zicboz_cbo_zero (addr);
-}
-
-#endif // __riscv_zicboz
-
-#endif // __RISCV_CMO_H


[gcc r15-7115] Revert "[PATCH 2/2] RISC-V:Add intrinsic cases for the CMOs extensions"

2025-01-21 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d4a1a63fc4fbfb7ed92862cd8befc7bef2bc602b

commit r15-7115-gd4a1a63fc4fbfb7ed92862cd8befc7bef2bc602b
Author: Jeff Law 
Date:   Tue Jan 21 16:20:16 2025 -0700

Revert "[PATCH 2/2] RISC-V:Add intrinsic cases for the CMOs extensions"

This reverts commit b22d9c8f8216d15773dee4f9677c6b26aff507fd.

Diff:
---
 gcc/testsuite/gcc.target/riscv/cmo-32.c | 58 -
 gcc/testsuite/gcc.target/riscv/cmo-64.c | 58 -
 2 files changed, 116 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cmo-32.c 
b/gcc/testsuite/gcc.target/riscv/cmo-32.c
deleted file mode 100644
index 071586beacc3..
--- a/gcc/testsuite/gcc.target/riscv/cmo-32.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target rv32} */
-/* { dg-options "-march=rv32gc_zicbom_zicbop_zicboz -mabi=ilp32 -std=gnu17" } 
*/
-
-#include "riscv_cmo.h"
-
-void foo1 (void *addr)
-{
-__riscv_cmo_clean(0);
-__riscv_cmo_clean(addr);
-__riscv_cmo_clean((void*)0x111);
-}
-
-void foo2 (void *addr)
-{
-__riscv_cmo_flush(0);
-__riscv_cmo_flush(addr);
-__riscv_cmo_flush((void*)0x111);
-}
-
-void foo3 (void *addr)
-{
-__riscv_cmo_inval(0);
-__riscv_cmo_inval(addr);
-__riscv_cmo_inval((void*)0x111);
-}
-
-void foo4 (void *addr)
-{
-__riscv_cmo_prefetch(addr,0,0);
-__riscv_cmo_prefetch(addr,0,1);
-__riscv_cmo_prefetch(addr,0,2);
-__riscv_cmo_prefetch(addr,0,3);
-__riscv_cmo_prefetch(addr,1,0);
-__riscv_cmo_prefetch(addr,1,1);
-__riscv_cmo_prefetch(addr,1,2);
-__riscv_cmo_prefetch(addr,1,3);
-}
-
-int foo5 (int num)
-{
-return __riscv_cmo_prefetchi(num);
-}
-
-void foo6 (void *addr)
-{
-__riscv_cmo_zero(0);
-__riscv_cmo_zero(addr);
-__riscv_cmo_zero((void*)0x121);
-}
-
-/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
-/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
-/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */
-/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
-/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-64.c 
b/gcc/testsuite/gcc.target/riscv/cmo-64.c
deleted file mode 100644
index dc9fc97b94bb..
--- a/gcc/testsuite/gcc.target/riscv/cmo-64.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target rv64 } */
-/* { dg-options "-march=rv64gc_zicbom_zicbop_zicboz -mabi=lp64d -std=gnu17" } 
*/
-
-#include "riscv_cmo.h"
-
-void foo1 (void *addr)
-{
-__riscv_cmo_clean(0);
-__riscv_cmo_clean(addr);
-__riscv_cmo_clean((void*)0x111);
-}
-
-void foo2 (void *addr)
-{
-__riscv_cmo_flush(0);
-__riscv_cmo_flush(addr);
-__riscv_cmo_flush((void*)0x111);
-}
-
-void foo3 (void *addr)
-{
-__riscv_cmo_inval(0);
-__riscv_cmo_inval(addr);
-__riscv_cmo_inval((void*)0x111);
-}
-
-void foo4 (void *addr)
-{
-__riscv_cmo_prefetch(addr,0,0);
-__riscv_cmo_prefetch(addr,0,1);
-__riscv_cmo_prefetch(addr,0,2);
-__riscv_cmo_prefetch(addr,0,3);
-__riscv_cmo_prefetch(addr,1,0);
-__riscv_cmo_prefetch(addr,1,1);
-__riscv_cmo_prefetch(addr,1,2);
-__riscv_cmo_prefetch(addr,1,3);
-}
-
-int foo5 (int num)
-{
-return __riscv_cmo_prefetchi(num);
-}
-
-void foo6 (void *addr)
-{
-__riscv_cmo_zero(0);
-__riscv_cmo_zero(addr);
-__riscv_cmo_zero((void*)0x121);
-}
-
-/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
-/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
-/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */
-/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
-/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */


[gcc r14-11234] c++: Wrap force_target_expr in get_member_function_from_ptrfunc with save_expr [PR118509]

2025-01-21 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:0e4f03c6701f6ef9493c78cf3bbf4aa8e41cf04b

commit r14-11234-g0e4f03c6701f6ef9493c78cf3bbf4aa8e41cf04b
Author: Jakub Jelinek 
Date:   Wed Jan 22 00:18:24 2025 +0100

c++: Wrap force_target_expr in get_member_function_from_ptrfunc with 
save_expr [PR118509]

My October PR117259 fix to get_member_function_from_ptrfunc to use a
TARGET_EXPR rather than SAVE_EXPR unfortunately caused some regressions as
well as the following testcase shows.
What happens is that
get_member_function_from_ptrfunc -> build_base_path calls save_expr,
so since the PR117259 change in mnay cases it will call save_expr on
a TARGET_EXPR.  And, for some strange reason a TARGET_EXPR is not considered
an invariant, so we get a SAVE_EXPR wrapped around the TARGET_EXPR.
That SAVE_EXPR > gets initially added only to the second
operand of ?:, so at that point it would still work fine during expansion.
But unfortunately an expression with that subexpression is handed to the
caller also through *instance_ptrptr = instance_ptr; and gets evaluated
once again when computing the first argument to the method.
So, essentially, we end up with
(TARGET_EXPR , (... ? ... SAVE_EXPR 
 ... : ...)) (... SAVE_EXPR  ..., ...);
and while D.2907 is initialized during gimplification in the code dominating
everything that uses it, the extra temporary created for the SAVE_EXPR
is initialized only conditionally (if the ?: condition is true) but then
used unconditionally, so we get
pmf-4.C: In function ‘void foo(C, B*)’:
pmf-4.C:12:11: warning: ‘’ may be used uninitialized 
[-Wmaybe-uninitialized]
   12 |   (y->*x) ();
  |   ^~
pmf-4.C:12:11: note: ‘’ was declared here
   12 |   (y->*x) ();
  |   ^~
diagnostic and wrong-code issue too.

As the trunk fix to just treat TARGET_EXPR as invariant seems a little bit 
risky
and I'd like to get it tested on the trunk for a while, for 14.2.1 this 
patch
instead wraps those TARGET_EXPRs into SAVE_EXPRs.  Eventually that can be 
reverted
and the trunk fix backported.

2025-01-21  Jakub Jelinek  

PR c++/118509
* typeck.cc (get_member_function_from_ptrfunc): Wrap 
force_target_expr
with save_expr.

* g++.dg/expr/pmf-4.C: New test.

Diff:
---
 gcc/cp/typeck.cc  |  7 ---
 gcc/testsuite/g++.dg/expr/pmf-4.C | 22 ++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index ebb4c8b7bb9b..e32e4a7b7acd 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -4187,8 +4187,8 @@ get_member_function_from_ptrfunc (tree *instance_ptrptr, 
tree function,
  && !DECL_P (instance_ptr)
  && !TREE_CONSTANT (instance_ptr)))
instance_ptr = instance_save_expr
- = force_target_expr (TREE_TYPE (instance_ptr), instance_ptr,
-  complain);
+ = save_expr (force_target_expr (TREE_TYPE (instance_ptr),
+ instance_ptr, complain));
 
   /* See above comment.  */
   if (TREE_SIDE_EFFECTS (function)
@@ -4196,7 +4196,8 @@ get_member_function_from_ptrfunc (tree *instance_ptrptr, 
tree function,
  && !DECL_P (function)
  && !TREE_CONSTANT (function)))
function
- = force_target_expr (TREE_TYPE (function), function, complain);
+ = save_expr (force_target_expr (TREE_TYPE (function), function,
+ complain));
 
   /* Start by extracting all the information from the PMF itself.  */
   e3 = pfn_from_ptrmemfunc (function);
diff --git a/gcc/testsuite/g++.dg/expr/pmf-4.C 
b/gcc/testsuite/g++.dg/expr/pmf-4.C
new file mode 100644
index ..87c9be18f70d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/expr/pmf-4.C
@@ -0,0 +1,22 @@
+// PR c++/118509
+// { dg-do run }
+// { dg-options "-Wall -O2" }
+
+struct A { void foo () { a = 1; } int a; A () : a (0) {} };
+struct B : virtual A {};
+typedef void (A::*C) ();
+
+__attribute__((noipa)) void
+foo (C x, B *y)
+{
+  (y->*x) ();
+}
+
+int
+main ()
+{
+  B b;
+  foo (&A::foo, &b);
+  if (b.a != 1)
+__builtin_abort ();
+}