date:20250131

[PATCH] debug/100530 - Revert QUAL_ADDR_SPACE handling from dwarf2out.cc

2025-01-31 Thread Richard Biener

The bug clearly shows that r8-4385-ga297ccb52e0c89 was wrong in
enabling handling of address-space qualification as DWARF type
qualifiers as the code isn't prepared to it actually be not handled
and ends up changing a lesser qualified (without address-space)
type DIE in ways tripping asserts.  The following reverts that
part which then causes the DIE for the same type with address-space
qualifiers removed to be re-used since there's currently no code
to encode address-spaces within dwarf2out.cc or in the DWARF spec.

r8-4385-ga297ccb52e0c89 did not come with a testcase nor a good
description of the bug fixed - I've verified const qualification
mixed with address-spaces creates the expected DWARF.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

OK for trunk?  (I don't plan to backport, the bug points out a
checking ICE only)

PR debug/100530
* dwarf2out.cc (modified_type_die): Do not claim we handle
address-space qualification with dwarf_qual_info[].

* gcc.target/i386/pr100530.c: New testcase.
---
 gcc/dwarf2out.cc | 3 +--
 gcc/testsuite/gcc.target/i386/pr100530.c | 5 +
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr100530.c

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 8085b8d85d8..43884f206c0 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -13678,8 +13678,7 @@ modified_type_die (tree type, int cv_quals, bool 
reverse,
   struct array_descr_info info;
   /* Only these cv-qualifiers are currently handled.  */
   const int cv_qual_mask = (TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE
-   | TYPE_QUAL_RESTRICT | TYPE_QUAL_ATOMIC |
-   ENCODE_QUAL_ADDR_SPACE(~0U));
+   | TYPE_QUAL_RESTRICT | TYPE_QUAL_ATOMIC);
   /* DW_AT_endianity is specified only for base types in the standard.  */
   const bool reverse_type
 = need_endianity_attribute_p (reverse)
diff --git a/gcc/testsuite/gcc.target/i386/pr100530.c 
b/gcc/testsuite/gcc.target/i386/pr100530.c
new file mode 100644
index 000..005c0199129
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100530.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-g" } */
+
+__seg_gs const int var;
+__seg_gs int foo;
-- 
2.43.0

[PATCH] Do not rely on non-SLP analysis for SLP outer loop vectorization

2025-01-31 Thread Richard Biener

We end up relying on non-SLP analysis of the inner loop LC PHI to
set the vectorizationb method for SLP since vectorizable_reduction
claims responsibility.  The following fixes this.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

* tree-vect-loop.cc (vect_analyze_loop_operations): Only
call vectorizable_lc_phi when not PURE_SLP.
(vectorizable_reduction): Do not claim having handled
the inner loop LC PHI for outer loop vectorization.
---
 gcc/tree-vect-loop.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ce674a71e8a..03426207879 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2171,6 +2171,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
   || (STMT_VINFO_DEF_TYPE (stmt_info)
   == vect_double_reduction_def))
+ && ! PURE_SLP_STMT (stmt_info)
  && !vectorizable_lc_phi (loop_vinfo,
   stmt_info, NULL, NULL))
return opt_result::failure_at (phi, "unsupported phi\n");
@@ -7770,9 +7771,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
{
  /* For SLP we arrive here for both the inner loop LC PHI and
 the outer loop PHI.  The latter is what we want to analyze
-the reduction with.  */
+the reduction with.  The LC PHI is handled by
+vectorizable_lc_phi.  */
  gcc_assert (slp_node);
- return true;
+ return gimple_phi_num_args (as_a  (stmt_info->stmt)) == 2;
}
   use_operand_p use_p;
   gimple *use_stmt;
-- 
2.43.0

[PATCH] force-indirect-call-2.c: Allow indirect branch via GOT

2025-01-31 Thread H.J. Lu

r15-1619-g3b9b8d6cfdf593 changed the codegen from

f2:
.cfi_startproc
pushq   %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movqf1@GOTPCREL(%rip), %rbx
call*%rbx
leaqf3(%rip), %rax
call*%rax
movq%rbx, %rax
popq%rbx
.cfi_def_cfa_offset 8
jmp *%rax
.cfi_endproc

to

f2:
.cfi_startproc
subq$8, %rsp
.cfi_def_cfa_offset 16
call*f1@GOTPCREL(%rip)
leaqf3(%rip), %rax
call*%rax
addq$8, %rsp
.cfi_def_cfa_offset 8
jmp *f1@GOTPCREL(%rip)
.cfi_endproc

Since it is OK to indirect call via memory for -mforce-indirect-call,
allow indirect branch via GOT.

PR target/115673
* gcc.target/i386/force-indirect-call-2.c: Allow indirect branch
via GOT.

Signed-off-by: H.J. Lu 
---
 gcc/testsuite/gcc.target/i386/force-indirect-call-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c 
b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
index 2f702363041..405c97c8000 100644
--- a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
+++ b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mforce-indirect-call -fPIC" } */
 /* { dg-require-effective-target fpic } */
-/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*%" 3 } } */
+/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*" 3 } } */
 
 #include "force-indirect-call-1.c"
-- 
2.48.1

Re: [PATCH] force-indirect-call-2.c: Allow indirect branch via GOT

2025-01-31 Thread Uros Bizjak

On Fri, Jan 31, 2025 at 11:35 AM H.J. Lu  wrote:

> r15-1619-g3b9b8d6cfdf593 changed the codegen from
>
> f2:
> .cfi_startproc
> pushq   %rbx
> .cfi_def_cfa_offset 16
> .cfi_offset 3, -16
> movqf1@GOTPCREL(%rip), %rbx
> call*%rbx
> leaqf3(%rip), %rax
> call*%rax
> movq%rbx, %rax
> popq%rbx
> .cfi_def_cfa_offset 8
> jmp *%rax
> .cfi_endproc
>
> to
>
> f2:
> .cfi_startproc
> subq$8, %rsp
> .cfi_def_cfa_offset 16
> call*f1@GOTPCREL(%rip)
> leaqf3(%rip), %rax
> call*%rax
> addq$8, %rsp
> .cfi_def_cfa_offset 8
> jmp *f1@GOTPCREL(%rip)
> .cfi_endproc
>
> Since it is OK to indirect call via memory for -mforce-indirect-call,
> allow indirect branch via GOT.
>
> PR target/115673
> * gcc.target/i386/force-indirect-call-2.c: Allow indirect branch
> via GOT.
>

NIce, so at the end it was only a testsuite patch.

OK.

Thanks,
Uros.


>
> Signed-off-by: H.J. Lu 
> ---
>  gcc/testsuite/gcc.target/i386/force-indirect-call-2.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
> b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
> index 2f702363041..405c97c8000 100644
> --- a/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
> +++ b/gcc/testsuite/gcc.target/i386/force-indirect-call-2.c
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mforce-indirect-call -fPIC" } */
>  /* { dg-require-effective-target fpic } */
> -/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*%" 3 } } */
> +/* { dg-final { scan-assembler-times "(?:call|jmp)\[ \\t\]+\\*" 3 } } */
>
>  #include "force-indirect-call-1.c"
> --
> 2.48.1
>
>

[PATCH] OpenMP/Fortran: Add missing pop_state in parse_omp_dispatch

2025-01-31 Thread Paul-Antoine Arras

When the ST_NONE case is taken, the function returns immediately. Not calling
pop_state causes a dangling pointer.

gcc/fortran/ChangeLog:

* parse.cc (parse_omp_dispatch): Add missing pop_state.
---
 gcc/fortran/parse.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index 00cd23d7729..5094d9d3ead 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -6375,7 +6375,10 @@ parse_omp_dispatch (void)
 
   st = next_statement ();
   if (st == ST_NONE)
-return st;
+{
+  pop_state ();
+  return st;
+}
   if (st == ST_CALL || st == ST_ASSIGNMENT)
 accept_statement (st);
   else
-- 
2.47.2

[PATCH] libstdc++: Use canonical loop form in std::reduce

2025-01-31 Thread Abhishek Kaushik

>From 4ac7c7e56e23ed2f4dd2dafdfab6cfa110c14260 Mon Sep 17 00:00:00 2001
From: Abhishek Kaushik 
Date: Fri, 31 Jan 2025 01:28:48 -0800
Subject: [PATCH] libstdc++: Use canonical loop form in std::reduce

The current while loop in std::reduce and related functions is hard to
vectorize because the loop control variable is hard to detect.

`while ((__last - __first) >= 4)`

Changing the loop header to a for loop following the OpenMP canonical
form allows easy vectorization, resulting in improved performance.

`for (; __first <= __last - 4; __first += 4)`

This patch modifies the loop header for std::reduce & std::transform_reduce.
---
 libstdc++-v3/include/std/numeric | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric
index 4d36fcd36d9..9c38ad89e21 100644
--- a/libstdc++-v3/include/std/numeric
+++ b/libstdc++-v3/include/std/numeric
@@ -300,13 +300,12 @@ namespace __detail
   static_assert(is_invocable_r_v<_Tp, _BinaryOperation&, __ref, __ref>);
   if constexpr (__is_random_access_iter<_InputIterator>::value)
{
- while ((__last - __first) >= 4)
+ for (; __first <= __last - 4; __first += 4)
{
  _Tp __v1 = __binary_op(__first[0], __first[1]);
  _Tp __v2 = __binary_op(__first[2], __first[3]);
  _Tp __v3 = __binary_op(__v1, __v2);
  __init = __binary_op(__init, __v3);
- __first += 4;
}
}
   for (; __first != __last; ++__first)
@@ -381,7 +380,7 @@ namespace __detail
   if constexpr (__and_v<__is_random_access_iter<_InputIterator1>,
__is_random_access_iter<_InputIterator2>>)
{
- while ((__last1 - __first1) >= 4)
+ for (; __first1 <= __last1 - 4; __first1 += 4, __first2 += 4)
{
  _Tp __v1 = __binary_op1(__binary_op2(__first1[0], __first2[0]),
  __binary_op2(__first1[1], __first2[1]));
@@ -389,8 +388,6 @@ namespace __detail
  __binary_op2(__first1[3], __first2[3]));
  _Tp __v3 = __binary_op1(__v1, __v2);
  __init = __binary_op1(__init, __v3);
- __first1 += 4;
- __first2 += 4;
}
}
   for (; __first1 != __last1; ++__first1, (void) ++__first2)
@@ -447,7 +444,7 @@ namespace __detail
 {
   if constexpr (__is_random_access_iter<_InputIterator>::value)
{
- while ((__last - __first) >= 4)
+ for (; __first <= __last - 4; __first += 4)
{
  _Tp __v1 = __binary_op(__unary_op(__first[0]),
 __unary_op(__first[1]));
@@ -455,7 +452,6 @@ namespace __detail
 __unary_op(__first[3]));
  _Tp __v3 = __binary_op(__v1, __v2);
  __init = __binary_op(__init, __v3);
- __first += 4;
}
}
   for (; __first != __last; ++__first)
--
2.31.1

Re: [PATCH] debug/100530 - Revert QUAL_ADDR_SPACE handling from dwarf2out.cc

2025-01-31 Thread Jakub Jelinek

On Fri, Jan 31, 2025 at 09:07:52AM +0100, Richard Biener wrote:
> The bug clearly shows that r8-4385-ga297ccb52e0c89 was wrong in
> enabling handling of address-space qualification as DWARF type
> qualifiers as the code isn't prepared to it actually be not handled
> and ends up changing a lesser qualified (without address-space)
> type DIE in ways tripping asserts.  The following reverts that
> part which then causes the DIE for the same type with address-space
> qualifiers removed to be re-used since there's currently no code
> to encode address-spaces within dwarf2out.cc or in the DWARF spec.
> 
> r8-4385-ga297ccb52e0c89 did not come with a testcase nor a good
> description of the bug fixed - I've verified const qualification
> mixed with address-spaces creates the expected DWARF.
> 
> Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.
> 
> OK for trunk?  (I don't plan to backport, the bug points out a
> checking ICE only)
> 
>   PR debug/100530
>   * dwarf2out.cc (modified_type_die): Do not claim we handle
>   address-space qualification with dwarf_qual_info[].
> 
>   * gcc.target/i386/pr100530.c: New testcase.

Ok.

Jakub

[PATCH] x86: Handle -mindirect-branch-register for indirect calls

2025-01-31 Thread H.J. Lu

-mindirect-branch-register requires indirect call and jump via register.
For -mindirect-branch-register, expanding indirect call via register and
update call patterns and peepholes to disable indirect call via memory.

gcc/

PR target/115673
* config/i386/i386-expand.cc (ix86_expand_call): Force indirect
call via register for -mindirect-branch-register.
* config/i386/i386.md (*call): Disable indirect call via memory
for -mindirect-branch-register.
(*call_got_x32): Likewise.
(*sibcall_GOT_32): Likewise.
(*sibcall): Likewise.
(*sibcall_memory): Likewise.
(*call_pop): Likewise.
(*sibcall_pop): Likewise.
(*sibcall_pop_memory): Likewise.
(*call_value): Likewise.
(*call_value_got_x32): Likewise.
(*sibcall_value_GOT_32): Likewise.
(*sibcall_value): Likewise.
(*sibcall_value_memory): Likewise.
(*call_value_pop): Likewise.
(*sibcall_value_pop): Likewise.
(*sibcall_value_pop_memory): Likewise.
Disable indirect call via memory peepholes for
-mindirect-branch-register.

gcc/testsuite/

PR target/115673
* gcc.target/i386/pr115673-1-x32.c: New test.
* gcc.target/i386/pr115673-1.c: Likewise.
* gcc.target/i386/pr115673-2-x32.c: Likewise.
* gcc.target/i386/pr115673-2.c: Likewise.
* gcc.target/i386/pr115673-3-x32.c: Likewise.
* gcc.target/i386/pr115673-3.c: Likewise.
* gcc.target/i386/pr115673-4-x32.c: Likewise.
* gcc.target/i386/pr115673-4.c: Likewise.
* gcc.target/i386/pr115673-5-x32.c: Likewise.
* gcc.target/i386/pr115673-5.c: Likewise.
* gcc.target/i386/pr115673-6-x32.c: Likewise.
* gcc.target/i386/pr115673-6.c: Likewise.
* gcc.target/i386/pr115673-7-x32.c: Likewise.
* gcc.target/i386/pr115673-7.c: Likewise.
* gcc.target/i386/pr115673-8-x32.c: Likewise.
* gcc.target/i386/pr115673-8.c: Likewise.
* gcc.target/i386/pr115673-9-x32.c: Likewise.
* gcc.target/i386/pr115673-9.c: Likewise.
* gcc.target/i386/pr115673-10-x32.c: Likewise.
* gcc.target/i386/pr115673-10.c: Likewise.
* gcc.target/i386/pr115673-11-x32.c: Likewise.
* gcc.target/i386/pr115673-11.c: Likewise.
* gcc.target/i386/pr115673-12-x32.c: Likewise.
* gcc.target/i386/pr115673-12.c: Likewise.

Co-Authored-By: Uros Bizjak 
Signed-off-by: H.J. Lu 
---
 gcc/config/i386/i386-expand.cc|  20 +--
 gcc/config/i386/i386.md   | 118 --
 .../gcc.target/i386/pr115673-1-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-1.c|  14 +++
 .../gcc.target/i386/pr115673-10-x32.c |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-10.c   |  15 +++
 .../gcc.target/i386/pr115673-11-x32.c |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-11.c   |  14 +++
 .../gcc.target/i386/pr115673-12-x32.c |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-12.c   |  14 +++
 .../gcc.target/i386/pr115673-2-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-2.c|  15 +++
 .../gcc.target/i386/pr115673-3-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-3.c|  14 +++
 .../gcc.target/i386/pr115673-4-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-4.c|  14 +++
 .../gcc.target/i386/pr115673-5-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-5.c|  13 ++
 .../gcc.target/i386/pr115673-6-x32.c  |  15 +++
 gcc/testsuite/gcc.target/i386/pr115673-6.c|  14 +++
 .../gcc.target/i386/pr115673-7-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-7.c|  13 ++
 .../gcc.target/i386/pr115673-8-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-8.c|  13 ++
 .../gcc.target/i386/pr115673-9-x32.c  |   8 ++
 gcc/testsuite/gcc.target/i386/pr115673-9.c|  14 +++
 26 files changed, 365 insertions(+), 43 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-1-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-10-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-11-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-12-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-2-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-3-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-4-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115673-4.c
 create mode 100644 gcc/testsuite/gcc.target

Re: [PATCH 2/2] Add prime path coverage to gcc/gcov

2025-01-31 Thread Jørgen Kvalsvik

Ping. Should I apply these changes and re-submit, or would you like to 
see more changes?


Thanks,
Jørgen

On 1/5/25 22:06, Jørgen Kvalsvik wrote:

On 1/5/25 20:53, Jørgen Kvalsvik wrote:

On 1/5/25 20:25, Jan Hubicka wrote:

ALGORITHM

Since the numbers of paths grows so fast, we need a good
algorithm. The naive approach of generating all paths and discarding
redundancies (see reference_prime_paths in the diff) simply doesn't
complete for even pretty simple functions with a few ten thousand
paths (granted, the implementation is also poor, but only serves as a
reference). Fazli & Afsharchi in their paper "Time and Space-Efficient
Compositional Method for Prime and Test Paths Generation" describe a
neat algorithm which drastically improves on for most programs, and
brings complexity down to something managable. This patch implements
that algorithm with a few minor tweaks.

The algorithm first finds the strongly connected components (SCC) of 
the graph
and creates a new graph where the vertices are the SCCs of the CFG. 
Within
these vertices different paths are found - regular prime paths, 
paths that
start in the SCCs entries, and paths that end in the SCCs exits. 
These per-SCC
paths are combined with paths through the CFG which greatly reduces 
of paths

needed to be evaluated just to be thrown away.

Using this algorithm we can find the prime paths for somewhat
complicated functions in a reasonable time. Please note that some
programs don't benefit from this at all. We need to find the prime
paths within a SCC, so if a single SCC is very large the function
degenerates to the naive implementation. This can probably be much
improved on, but is an exercise for later.


Interesting I was only aware of the old paper by Ball and Larus
https://ieeexplore.ieee.org/abstract/document/566449


--

OVERALL ARCHITECTURE

Like the other coverages in gcc, this operates on the CFG in the 
profiling

phase, just after branch and condition coverage, in phases:

1. All prime paths are generated, counted, and enumerated from the CFG
2. The paths are evaluted and counter instructions and accumulators are
    emitted
3. gcov reads the CFG and computes the prime paths (same as step 1)
4. gcov prints a report

Simply writing out all the paths in the .gcno file is not really 
viable,

the files would be too big. Additionally, there are limits to the
practicality of measuring (and reporting) on millions of paths, so for
most programs where coverage is feasible, computing paths should be
plenty fast. As a result, path coverage really only adds 1 bit to the
counter, rounded up to nearest 64 ("bucket"), so 64 paths takes up 8
bytes, 65 paths take up 16 bytes.


path coverage can also be used to determine corelated branches (where
outcome if first if predetrmines probability of the second if) which can
be used for optimization: if such paths are detected tail duplication
will likely help propagating some extra invariants.

For this we also need to know actual frequencies of the paths, not only
bit if it was or was not taken.


This would be a relatively simple extension, I think, using adds 
rather than setting bits. I suppose compiled object file size would 
increase by a constant factor, too?




Recording paths is really just massaging large bitsets. Per function,
ceil(paths/64 or 32) buckets (gcov_type) are allocated. Paths are
sorted, so the first path maps to the lowest bit, the second path to 
the

second lowest bit, and so on. On taking an edge and entering a basic
block, a few bitmasks are applied to unset the bits corresponding to 
the
paths outside the block and set the bits of the paths that start in 
that

block. Finally, the right buckets are masked and written to the global
accumulators for the paths that end in the block. Full coverage is
achieved when all bits are set.

gcc does not really inform gcov of abnormal paths, so paths with


Adding abnormal edges is probably not hard, but I am not sure how
realistic is to cover them all.  Even EH introduces edges that can not
really be taken at runtime.


For coverage, absolutely, but it is a (minor) source of complexity 
because of special rules and how some abnormal edges become fake edges 
when recorded in .gcno.





abnormal paths are ignored. This probably possible, but requires some
changes to the graph gcc writes to the .gcno file.


If I recall correctly, Ball&Larus simply stores counts into hashtable
assuming that most paths are not taken dynamically.


+@item -e
+@itemx --prime-paths
+Write path coverage to the output file, and write path summary info to
+the standard output.  This option allows you to see how many prime 
paths

+were taken at least once.  For the regular output this option only
+includes the number of paths covered.  For more fine grained 
information

+on paths you can use @option{--prime-paths-lines} or
+@option{--prime-paths-source}.  With @option{--json-format} all path
+details are included in the output.  This requires you to compile the

[PATCH] Record, report basic blocks of conditional exprs

2025-01-31 Thread Jørgen Kvalsvik

Record basic blocks that make up a conditional expression with
-fcondition-coverage and report when using the gcov -w/--verbose flag.
This makes the report more accurate when basic blocks are included as
there may be blocks in-between the actual Boolean expressions, e.g. when
there a term is the result of a function call. This helps understanding
the report as gcc uses the CFG, and not source code, to figure out
MC/DC, which is somewhat lost in gcov. While it does not make a
tremendous difference for the gcov report directly, it opens up for more
analysis and clearer reporting.

This change includes deleting the GCOV_TAG_COND_* macros as the .gcno
records are now dynamic in length.

Here is an example with, comparing two programs:

int main() {
  int a = 1;
  int b = 0;

  if (a && b)
printf ("Success!\n");
  else
printf ("Failure!\n");
}

int f(int);
int g(int);
int main() {
  int a = 1;
  int b = 0;

  if (f (a) && g (b))
printf ("Success!\n");
  else
printf ("Failure!\n");
}

And the corresponding reports:
$ gcov -tagw p1 p2
1:3:int main() {
1:4:  int a = 1;
1:5:  int b = 0;
-:6:
1:7:  if (a && b)
1:7-block 2 (BB 2)
condition outcomes covered 1/4
BBs 2 3
condition  0 not covered (true false)
condition  1 not covered (true)
1:7-block 3 (BB 3)
#:8:printf ("Success!\n");
%:8-block 4 (BB 4)
-:9:  else
1:   10:printf ("Failure!\n");
1:   10-block 5 (BB 5)
-:   11:}

#:6:int main() {
#:7:  int a = 1;
#:8:  int b = 0;
-:9:
#:   10:  if (f (a) && g (b))
%:   10-block 2 (BB 2)
condition outcomes covered 0/4
BBs 3 5
condition  0 not covered (true false)
condition  1 not covered (true false)
%:   10-block 4 (BB 4)
#:   11:printf ("Success!\n");
%:   11-block 6 (BB 6)
-:   12:  else
#:   13:printf ("Failure!\n");
%:   13-block 7 (BB 7)
-:   14:}

gcc/ChangeLog:

* doc/gcov.texi: Add example.
* gcov-dump.cc (tag_conditions): Print basic blocks, not length.
* gcov-io.h (GCOV_TAG_CONDS_LENGTH): Delete.
(GCOV_TAG_CONDS_NUM): Likewise.
* gcov.cc (output_intermediate_json_line): Output basic blocks.
(read_graph_file): Read basic blocks.
(output_conditions): Output basic blocks.
* profile.cc (branch_prob): Write basic blocks for conditions.
---
 gcc/doc/gcov.texi | 32 
 gcc/gcov-dump.cc  | 12 +++-
 gcc/gcov-io.h |  2 --
 gcc/gcov.cc   | 20 +++-
 gcc/profile.cc| 11 +--
 5 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/gcc/doc/gcov.texi b/gcc/doc/gcov.texi
index dda279fbff3..268e9e553f3 100644
--- a/gcc/doc/gcov.texi
+++ b/gcc/doc/gcov.texi
@@ -423,6 +423,7 @@ Each @var{condition} has the following form:
   "covered": 2,
   "not_covered_false": [],
   "not_covered_true": [0, 1],
+  "basic_blocks": [2, 3]
 @}
 
 @end smallexample
@@ -989,6 +990,37 @@ condition  1 not covered (true)
 -:   12:@}
 @end smallexample
 
+With @option{-w}, each condition will also print the basic blocks that
+make up the decision.
+
+@smallexample
+$ gcov -t -m -g -a -w tmp
+-:0:Source:tmp.c
+-:0:Graph:tmp.gcno
+-:0:Data:tmp.gcda
+-:0:Runs:1
+-:1:#include 
+-:2:
+1:3:int main()
+-:4:@{
+1:5:  int a = 1;
+1:6:  int b = 0;
+-:7:
+1:7:  if (a && b)
+1:7-block 2 (BB 2)
+condition outcomes covered 1/4
+BBs 2 3
+condition  0 not covered (true false)
+condition  1 not covered (true)
+1:7-block 3 (BB 3)
+#:8:printf ("Success!\n");
+%:8-block 4 (BB 4)
+-:9:  else
+1:   10:printf ("Failure!\n");
+1:   10-block 5 (BB 5)
+-:   12:@}
+@end smallexample
+
 The execution counts are cumulative.  If the example program were
 executed again without removing the @file{.gcda} file, the count for the
 number of times each line in the source was executed would be added to
diff --git a/gcc/gcov-dump.cc b/gcc/gcov-dump.cc
index cc7f8a9ebfb..642e58c22bf 100644
--- a/gcc/gcov-dump.cc
+++ b/gcc/gcov-dump.cc
@@ -396,23 +396,25 @@ tag_arcs (const char *filename ATTRIBUTE_UNUSED,
 
 /* Print number of conditions (not outcomes, i.e. if (x && y) is 2, not 4).  */
 static void
-tag_conditions (const char *filename, unsigned /* tag */, int length,
+tag_conditions (const char *filename, unsigned /* tag */, int /* length */,
unsigned depth)
 {
-  unsigned n_conditions = GCOV_TAG_CONDS_NUM (length);
+  unsigned n_conditions = gcov_read_unsigned ();
 
   printf (" %u conditions", n_conditions);
   if (flag_dump_contents)
 {
   for (unsigned ix = 0; ix != n_conditions; ix++)

[PATCH] icf: Compare call argument types in certain cases and asm operands [PR117432]

2025-01-31 Thread Jakub Jelinek

Hi!

compare_operand uses operand_equal_p under the hood, which e.g. for
INTEGER_CSTs will just match the values rather regardless of their types.
Now, in many comparing the type is redundant, if we have
  x_2 = y_3 + 1;
we've already compared the type for the lhs and also for rhs1, there won't
be any surprises on rhs2.
As noted in the PR, there are cases where the type of the operand is the
sole place of information and we don't want to ICF merge functions if the
types differ.
One case is stdarg functions, arguments passed to ..., it is different
if we pass 1, 1L, 1LL.
Another case are the K&R unprototyped functions (sure, gone in C23).
And yet another case are inline asm operands, "r" (1) is different from "r"
(1L) from "r" (1LL).

So, the following patch determines based on lack of fntype (e.g. for
internal functions), or on !prototype_p, or on stdarg_p (in that case
using number of named arguments) which arguments need to have type checked
and does that, plus compares types on inline asm operands (maybe it would be
enough to do that just for input operands but we have just a routine to
handle both and I didn't feel we need to differentiate).

Fuirthermore, I've noticed fntype{1,2} isn't actually compared if it is a
direct call (gimple_call_fndecl is non-NULL).  That is wrong too, we could
have
  void (*fn) (int, long long) = (void (*) (int, long long)) foo;
  fn (1, 1LL);
in one case and
  void (*fn) (long long, int) = (void (*) (long long, int)) foo;
  fn (1LL, 1);
in another, both folded into a direct call of foo with different
gimple_call_fntype.  Sure, one of them would be UB at runtime (or both), but
what if we ICF merge it into something that into the one UB at runtime
and the program actually calls the correct one only?

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-01-31  Jakub Jelinek  

PR ipa/117432
* ipa-icf-gimple.cc (func_checker::compare_asm_inputs_outputs):
Also return_false if operands have incompatible types.
(func_checker::compare_gimple_call): Also check fntype1 vs. fntype2
compatibility if at least one of the calls has different
gimple_call_fntype from the FUNCTION_TYPE of the called decl.  For
calls to non-prototyped calls or for stdarg_p functions after the
last named argument (if any) check type compatibility of call
arguments.

* gcc.c-torture/execute/pr117432.c: New test.
* gcc.target/i386/pr117432.c: New test.

--- gcc/ipa-icf-gimple.cc.jj2025-01-02 11:23:16.334519404 +0100
+++ gcc/ipa-icf-gimple.cc   2025-01-30 16:21:05.782127011 +0100
@@ -459,7 +459,9 @@ func_checker::compare_asm_inputs_outputs
return false;
 
   if (!compare_operand (TREE_VALUE (t1), TREE_VALUE (t2),
-   get_operand_access_type (map, t1)))
+   get_operand_access_type (map, t1))
+ || !types_compatible_p (TREE_TYPE (TREE_VALUE (t1)),
+ TREE_TYPE (TREE_VALUE (t2
return return_false ();
 
   tree p1 = TREE_PURPOSE (t1);
@@ -718,8 +720,11 @@ func_checker::compare_gimple_call (gcall
 
   /* For direct calls we verify that types are compatible so if we matched
  callees, callers must match, too.  For indirect calls however verify
- function type.  */
-  if (!gimple_call_fndecl (s1))
+ function type.  And also verify it for direct calls with some different
+ fntype.  */
+  if (!gimple_call_fndecl (s1)
+  || TREE_TYPE (TREE_TYPE (t1)) != fntype1
+  || TREE_TYPE (TREE_TYPE (t2)) != fntype2)
 {
   if ((fntype1 && !fntype2)
  || (!fntype1 && fntype2)
@@ -738,6 +743,24 @@ func_checker::compare_gimple_call (gcall
   get_operand_access_type (&map, chain1)))
 return return_false_with_msg ("static call chains are different");
 
+  unsigned check_arg_types_from = gimple_call_num_args (s1);
+  if (!fntype1
+  || !fntype2
+  || !prototype_p (fntype1)
+  || !prototype_p (fntype2))
+check_arg_types_from = 0;
+  else if (stdarg_p (fntype1))
+{
+  check_arg_types_from = list_length (TYPE_ARG_TYPES (fntype1));
+  if (stdarg_p (fntype2))
+   {
+ unsigned n = list_length (TYPE_ARG_TYPES (fntype2));
+ check_arg_types_from = MIN (check_arg_types_from, n);
+   }
+}
+  else if (stdarg_p (fntype2))
+check_arg_types_from = list_length (TYPE_ARG_TYPES (fntype2));
+
   /* Checking of argument.  */
   for (i = 0; i < gimple_call_num_args (s1); ++i)
 {
@@ -746,6 +769,10 @@ func_checker::compare_gimple_call (gcall
 
   if (!compare_operand (t1, t2, get_operand_access_type (&map, t1)))
return return_false_with_msg ("GIMPLE call operands are different");
+  if (i >= check_arg_types_from
+ && !types_compatible_p (TREE_TYPE (t1), TREE_TYPE (t2)))
+   return return_false_with_msg ("GIMPLE call operand types are "
+ "dif

[PATCH] niter: Make build_cltz_expr more robust [PR118689]

2025-01-31 Thread Jakub Jelinek

Hi!

Since my r15-7223 the niter analysis can recognize one loop during bootstrap
as being ctz like.
The patch just turned
@@ -2173,7 +2173,7 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
   _T535_44 = &buf[i.40_2]{lb: 1 sz: 4};
   _T536_45 = x_21 & 1;
   *_T535_44 = _T536_45;
-  _T537_47 = x_21 / 2;
+  _T537_47 = x_21 >> 1;
   x_48 = _T537_47;
   # DEBUG x => x_48
   if (x_48 != 0)
which is not a big deal for the number_of_iterations_cltz optimization, it
recognizes both right shift by 1 and unsigned division by 2 (and similarly
for clz left shift by 1 or multiplication by 2).
But starting with forwprop1 that change also resulted in
@@ -1875,9 +1875,9 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
   i.40_2 = (INTEGER) _T530_34;
   _T536_45 = x_21 & 1;
   MEM  [(CARDINAL *)&buf][i.40_2]{lb: 1 sz: 4} = _T536_45;
-  _T537_47 = x_21 / 2;
+  _T537_47 = x_21 >> 1;
   # DEBUG x => _T537_47
-  if (x_21 > 1)
+  if (_T537_47 != 0)
 goto ; [INV]
   else
 goto ; [INV]
and apparently it is only the latter form that number_of_iterations_cltz
pattern matches, not the former (after all, that was the exact reason
for r15-7223).
The problem is that build_cltz_expr assumes if IFN_C[LT]Z can't be used it
can use the __builtin_c[lt]z{,l,ll} builtins, and while most of the FEs do
create them, modula 2 does not.

The following patch just let us punt if the FE doesn't build those builtins.
I've filed a PR against modula2 so that they add the builtins too.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-01-31  Jakub Jelinek  

PR tree-optimization/118689
PR modula2/115032
* tree-ssa-loop-niter.cc (build_cltz_expr): Return NULL_TREE if fn is
NULL and use_ifn is false.

--- gcc/tree-ssa-loop-niter.cc.jj   2025-01-17 11:29:34.080683133 +0100
+++ gcc/tree-ssa-loop-niter.cc  2025-01-30 14:51:57.528933620 +0100
@@ -2238,6 +2238,8 @@ build_cltz_expr (tree src, bool leading,
  build_int_cst (integer_type_node, prec));
}
 }
+  else if (fn == NULL_TREE)
+return NULL_TREE;
   else if (prec == 2 * lli_prec)
 {
   tree src1 = fold_convert (long_long_unsigned_type_node,

Jakub

Re: [PATCH] niter: Make build_cltz_expr more robust [PR118689]

2025-01-31 Thread Richard Biener




> Am 31.01.2025 um 10:24 schrieb Jakub Jelinek :
> 
> Hi!
> 
> Since my r15-7223 the niter analysis can recognize one loop during bootstrap
> as being ctz like.
> The patch just turned
> @@ -2173,7 +2173,7 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
>   _T535_44 = &buf[i.40_2]{lb: 1 sz: 4};
>   _T536_45 = x_21 & 1;
>   *_T535_44 = _T536_45;
> -  _T537_47 = x_21 / 2;
> +  _T537_47 = x_21 >> 1;
>   x_48 = _T537_47;
>   # DEBUG x => x_48
>   if (x_48 != 0)
> which is not a big deal for the number_of_iterations_cltz optimization, it
> recognizes both right shift by 1 and unsigned division by 2 (and similarly
> for clz left shift by 1 or multiplication by 2).
> But starting with forwprop1 that change also resulted in
> @@ -1875,9 +1875,9 @@ PROC m2pim_NumberIO_BinToStr (CARDINAL x
>   i.40_2 = (INTEGER) _T530_34;
>   _T536_45 = x_21 & 1;
>   MEM  [(CARDINAL *)&buf][i.40_2]{lb: 1 sz: 4} = _T536_45;
> -  _T537_47 = x_21 / 2;
> +  _T537_47 = x_21 >> 1;
>   # DEBUG x => _T537_47
> -  if (x_21 > 1)
> +  if (_T537_47 != 0)
> goto ; [INV]
>   else
> goto ; [INV]
> and apparently it is only the latter form that number_of_iterations_cltz
> pattern matches, not the former (after all, that was the exact reason
> for r15-7223).
> The problem is that build_cltz_expr assumes if IFN_C[LT]Z can't be used it
> can use the __builtin_c[lt]z{,l,ll} builtins, and while most of the FEs do
> create them, modula 2 does not.
> 
> The following patch just let us punt if the FE doesn't build those builtins.
> I've filed a PR against modula2 so that they add the builtins too.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok

I suppose the middle end could step in for Thors builtins as well, as we do for 
memcpy and others.

Richard 

> 2025-01-31  Jakub Jelinek  
> 
>PR tree-optimization/118689
>PR modula2/115032
>* tree-ssa-loop-niter.cc (build_cltz_expr): Return NULL_TREE if fn is
>NULL and use_ifn is false.
> 
> --- gcc/tree-ssa-loop-niter.cc.jj2025-01-17 11:29:34.080683133 +0100
> +++ gcc/tree-ssa-loop-niter.cc2025-01-30 14:51:57.528933620 +0100
> @@ -2238,6 +2238,8 @@ build_cltz_expr (tree src, bool leading,
>  build_int_cst (integer_type_node, prec));
>}
> }
> +  else if (fn == NULL_TREE)
> +return NULL_TREE;
>   else if (prec == 2 * lli_prec)
> {
>   tree src1 = fold_convert (long_long_unsigned_type_node,
> 
>Jakub
>

Re: [PATCH] icf: Compare call argument types in certain cases and asm operands [PR117432]

2025-01-31 Thread Jakub Jelinek

On Fri, Jan 31, 2025 at 02:19:28PM +0100, Richard Biener wrote:
> > For internal calls gimple_call_fndecl (s1) will be NULL, so
> > !gimple_call_fndecl (s1) will be true and so the new checks aren't done.
> 
> Yes, but also fntype1/2 will be NULL then.
> 
> > > if (gimple_call_internal_p (s1) (with gimple_call_internal_fn compare
> > > in a conditiona if) would be a lot clearer?
> > 
> > What the patch does is just trying to avoid the comparison in the common
> > case (direct calls from the beginning and there what the comment says
> > applies, if there would be a mismatch, we'd already knew that).
> > 
> > If you want to compare unconditionally, it would be about just removing the
> >   if (!gimple_call_fndecl (s1))
> > {
> > and
> > }
> > and reindenting + rewriting the comment above it.  Shall I do that?
> 
> That's what I suggested, or rather
> 
>   if (gimple_call_internal_p (s1))
> {
>   if (gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
>  return false;
> }
>   else
> {
>   tree fntype1 = gimple_call_fntype (s1);
>   tree fntype2 = gimple_call_fntype (s2);
> 
>   if ((fntype1 && !fntype2)
>   || (!fntype1 && fntype2)
>   || (fntype1 && !types_compatible_p (fntype1, fntype2)))
> return return_false_with_msg ("call function types are not 
> compatible");
> }
> 
> I think in the else { fntype1 and fntype2 should never be NULL and thus
> this should simplify even more.

This isn't possible because fntype{1,2} are used later on in the function;
sure, that
  if (fntype1 && fntype2 && comp_type_attributes (fntype1, fntype2) != 1)
return return_false_with_msg ("different fntype attributes");
can be moved into the else, but the new checks to determine which args to
check still use that.

Jakub

[PATCH v2] x86: Handle -mindirect-branch-register for -fno-plt

2025-01-31 Thread H.J. Lu

-fno-plt forces external call to indirect call via GOT memory.  But
-mindirect-branch-register requires indirect call and jump via register.
For -mindirect-branch-register, expanding indirect call via register and
update call patterns and peepholes to disable indirect call via memory.

gcc/

PR target/118713
* config/i386/i386-expand.cc (ix86_expand_call): Force indirect
call via register for -mindirect-branch-register.
* config/i386/i386.md (*call): Disable indirect call via memory
for -mindirect-branch-register.
(*call_got_x32): Likewise.
(*sibcall_GOT_32): Likewise.
(*sibcall): Likewise.
(*sibcall_memory): Likewise.
(*call_pop): Likewise.
(*sibcall_pop): Likewise.
(*sibcall_pop_memory): Likewise.
(*call_value): Likewise.
(*call_value_got_x32): Likewise.
(*sibcall_value_GOT_32): Likewise.
(*sibcall_value): Likewise.
(*sibcall_value_memory): Likewise.
(*call_value_pop): Likewise.
(*sibcall_value_pop): Likewise.
(*sibcall_value_pop_memory): Likewise.

gcc/testsuite/

PR target/118713
* gcc.target/i386/pr118713-1-x32.c: New test.
* gcc.target/i386/pr118713-1.c: Likewise.
* gcc.target/i386/pr118713-2-x32.c: Likewise.
* gcc.target/i386/pr118713-2.c: Likewise.
* gcc.target/i386/pr118713-3-x32.c: Likewise.
* gcc.target/i386/pr118713-3.c: Likewise.
* gcc.target/i386/pr118713-4-x32.c: Likewise.
* gcc.target/i386/pr118713-4.c: Likewise.
* gcc.target/i386/pr118713-5-x32.c: Likewise.
* gcc.target/i386/pr118713-5.c: Likewise.
* gcc.target/i386/pr118713-6-x32.c: Likewise.
* gcc.target/i386/pr118713-6.c: Likewise.
* gcc.target/i386/pr118713-7-x32.c: Likewise.
* gcc.target/i386/pr118713-7.c: Likewise.
* gcc.target/i386/pr118713-8-x32.c: Likewise.
* gcc.target/i386/pr118713-8.c: Likewise.
* gcc.target/i386/pr118713-9-x32.c: Likewise.
* gcc.target/i386/pr118713-9.c: Likewise.
* gcc.target/i386/pr118713-10-x32.c: Likewise.
* gcc.target/i386/pr118713-10.c: Likewise.
* gcc.target/i386/pr118713-11-x32.c: Likewise.
* gcc.target/i386/pr118713-11.c: Likewise.
* gcc.target/i386/pr118713-12-x32.c: Likewise.
* gcc.target/i386/pr118713-12.c: Likewise.

Co-Authored-By: Uros Bizjak 
Signed-off-by: H.J. Lu 
---
 gcc/config/i386/i386-expand.cc| 20 ++--
 gcc/config/i386/i386.md   | 98 +--
 .../gcc.target/i386/pr118713-1-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-1.c| 14 +++
 .../gcc.target/i386/pr118713-10-x32.c |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-10.c   | 15 +++
 .../gcc.target/i386/pr118713-11-x32.c |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-11.c   | 14 +++
 .../gcc.target/i386/pr118713-12-x32.c |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-12.c   | 14 +++
 .../gcc.target/i386/pr118713-2-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-2.c| 15 +++
 .../gcc.target/i386/pr118713-3-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-3.c| 14 +++
 .../gcc.target/i386/pr118713-4-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-4.c| 14 +++
 .../gcc.target/i386/pr118713-5-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-5.c| 13 +++
 .../gcc.target/i386/pr118713-6-x32.c  | 15 +++
 gcc/testsuite/gcc.target/i386/pr118713-6.c| 14 +++
 .../gcc.target/i386/pr118713-7-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-7.c| 13 +++
 .../gcc.target/i386/pr118713-8-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-8.c| 13 +++
 .../gcc.target/i386/pr118713-9-x32.c  |  8 ++
 gcc/testsuite/gcc.target/i386/pr118713-9.c| 14 +++
 26 files changed, 353 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-3-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-4-x32.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-5-x32.c
 create mode 100644 g

Re: [PATCH] x86: Handle -mindirect-branch-register for indirect calls

2025-01-31 Thread H.J. Lu

On Fri, Jan 31, 2025 at 8:44 PM Uros Bizjak  wrote:
>
> On Fri, Jan 31, 2025 at 12:09 PM H.J. Lu  wrote:
> >
> > -mindirect-branch-register requires indirect call and jump via register.
> > For -mindirect-branch-register, expanding indirect call via register and
> > update call patterns and peepholes to disable indirect call via memory.
>
> I think the approach is wrong, we already have
> TARGET_INDIRECT_BRANCH_REGISTER that includes
> ix86_indirect_branch_register:
>
> #define TARGET_INDIRECT_BRANCH_REGISTER \
>   (ix86_indirect_branch_register \
>|| cfun->machine->indirect_branch_type != indirect_branch_keep)
>
> and:
>
> (define_constraint "Bs"
>   "@internal Sibcall memory operand."
>   (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
> (not (match_test "TARGET_X32"))
> (match_operand 0 "sibcall_memory_operand"))
>(and (match_test "TARGET_X32")
> (match_test "Pmode == DImode")
> (match_operand 0 "GOT_memory_operand"
>
> (define_constraint "Bw"
>   "@internal Call memory operand."
>   (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
> (not (match_test "TARGET_X32"))
> (match_operand 0 "memory_operand"))
>(and (match_test "TARGET_X32")
> (match_test "Pmode == DImode")
> (match_operand 0 "GOT_memory_operand"
>
> So, the changes in the patch are mostly redundant, the remaining cases
> should use TARGET_INDIRECT_BRANCH_REGISTER instead.
>
> Uros.
>
> >
> > gcc/
> >
> > PR target/115673
> > * config/i386/i386-expand.cc (ix86_expand_call): Force indirect
> > call via register for -mindirect-branch-register.
> > * config/i386/i386.md (*call): Disable indirect call via memory
> > for -mindirect-branch-register.
> > (*call_got_x32): Likewise.
> > (*sibcall_GOT_32): Likewise.
> > (*sibcall): Likewise.
> > (*sibcall_memory): Likewise.
> > (*call_pop): Likewise.
> > (*sibcall_pop): Likewise.
> > (*sibcall_pop_memory): Likewise.
> > (*call_value): Likewise.
> > (*call_value_got_x32): Likewise.
> > (*sibcall_value_GOT_32): Likewise.
> > (*sibcall_value): Likewise.
> > (*sibcall_value_memory): Likewise.
> > (*call_value_pop): Likewise.
> > (*sibcall_value_pop): Likewise.
> > (*sibcall_value_pop_memory): Likewise.
> > Disable indirect call via memory peepholes for
> > -mindirect-branch-register.
> >
> > gcc/testsuite/
> >
> > PR target/115673
> > * gcc.target/i386/pr115673-1-x32.c: New test.
> > * gcc.target/i386/pr115673-1.c: Likewise.
> > * gcc.target/i386/pr115673-2-x32.c: Likewise.
> > * gcc.target/i386/pr115673-2.c: Likewise.
> > * gcc.target/i386/pr115673-3-x32.c: Likewise.
> > * gcc.target/i386/pr115673-3.c: Likewise.
> > * gcc.target/i386/pr115673-4-x32.c: Likewise.
> > * gcc.target/i386/pr115673-4.c: Likewise.
> > * gcc.target/i386/pr115673-5-x32.c: Likewise.
> > * gcc.target/i386/pr115673-5.c: Likewise.
> > * gcc.target/i386/pr115673-6-x32.c: Likewise.
> > * gcc.target/i386/pr115673-6.c: Likewise.
> > * gcc.target/i386/pr115673-7-x32.c: Likewise.
> > * gcc.target/i386/pr115673-7.c: Likewise.
> > * gcc.target/i386/pr115673-8-x32.c: Likewise.
> > * gcc.target/i386/pr115673-8.c: Likewise.
> > * gcc.target/i386/pr115673-9-x32.c: Likewise.
> > * gcc.target/i386/pr115673-9.c: Likewise.
> > * gcc.target/i386/pr115673-10-x32.c: Likewise.
> > * gcc.target/i386/pr115673-10.c: Likewise.
> > * gcc.target/i386/pr115673-11-x32.c: Likewise.
> > * gcc.target/i386/pr115673-11.c: Likewise.
> > * gcc.target/i386/pr115673-12-x32.c: Likewise.
> > * gcc.target/i386/pr115673-12.c: Likewise.
> >
> > Co-Authored-By: Uros Bizjak 
> > Signed-off-by: H.J. Lu 
> > ---
> >  gcc/config/i386/i386-expand.cc|  20 +--
> >  gcc/config/i386/i386.md   | 118 --
> >  .../gcc.target/i386/pr115673-1-x32.c  |   8 ++
> >  gcc/testsuite/gcc.target/i386/pr115673-1.c|  14 +++
> >  .../gcc.target/i386/pr115673-10-x32.c |   8 ++
> >  gcc/testsuite/gcc.target/i386/pr115673-10.c   |  15 +++
> >  .../gcc.target/i386/pr115673-11-x32.c |   8 ++
> >  gcc/testsuite/gcc.target/i386/pr115673-11.c   |  14 +++
> >  .../gcc.target/i386/pr115673-12-x32.c |   8 ++
> >  gcc/testsuite/gcc.target/i386/pr115673-12.c   |  14 +++
> >  .../gcc.target/i386/pr115673-2-x32.c  |   8 ++
> >  gcc/testsuite/gcc.target/i386/pr115673-2.c|  15 +++
> >  .../gcc.target/i386/pr115673-3-x32.c  |   8 ++
> >  gcc/testsuite/gcc.target/i386/pr115673-3.c|  14 +++
> >  .../gcc.target/i386/pr115673-4-x32.c  |   8 ++
> >  gcc/testsuite/gcc.target/i386/pr115673-4.c|  14 +++
> >  .../gcc.targe

[PATCH 1/3] c++: Fix mangling of lambas in static member template initializers [PR107741]

2025-01-31 Thread Nathaniel Shead

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

My fix for this issue in r15-7147 turns out to not be quite sufficient;
static member templates apparently go down a different code path and
need their own handling.

PR c++/107741

gcc/cp/ChangeLog:

* decl2.cc (start_initialized_static_member): Push the
TEMPLATE_DECL when appropriate.
* parser.cc (cp_parser_init_declarator): Start the member decl
early for static members so that lambda scope is set.
(cp_parser_template_declaration_after_parameters): Don't
register static members here.

gcc/testsuite/ChangeLog:

* g++.dg/abi/lambda-ctx2-19.C: Add tests for template members.
* g++.dg/abi/lambda-ctx2-19vs20.C: Likewise.
* g++.dg/abi/lambda-ctx2-20.C: Likewise.
* g++.dg/abi/lambda-ctx2.h: Likewise.
* g++.dg/cpp0x/static-member-init-1.C: Likewise.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/decl2.cc   | 15 --
 gcc/cp/parser.cc  | 30 +++
 gcc/testsuite/g++.dg/abi/lambda-ctx2-19.C |  3 ++
 gcc/testsuite/g++.dg/abi/lambda-ctx2-19vs20.C |  3 ++
 gcc/testsuite/g++.dg/abi/lambda-ctx2-20.C |  3 ++
 gcc/testsuite/g++.dg/abi/lambda-ctx2.h| 16 ++
 .../g++.dg/cpp0x/static-member-init-1.C   |  5 
 7 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 9e61afd359f..994a459c79c 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -1295,6 +1295,8 @@ start_initialized_static_member (const cp_declarator 
*declarator,
   gcc_checking_assert (VAR_P (value));
 
   DECL_CONTEXT (value) = current_class_type;
+  DECL_INITIALIZED_IN_CLASS_P (value) = true;
+
   if (processing_template_decl)
 {
   value = push_template_decl (value);
@@ -1305,8 +1307,17 @@ start_initialized_static_member (const cp_declarator 
*declarator,
   if (attrlist)
 cplus_decl_attributes (&value, attrlist, 0);
 
-  finish_member_declaration (value);
-  DECL_INITIALIZED_IN_CLASS_P (value) = true;
+  /* When defining a template we need to register the TEMPLATE_DECL.  */
+  tree maybe_template = value;
+  if (template_parm_scope_p ())
+{
+  if (!DECL_TEMPLATE_SPECIALIZATION (value))
+   maybe_template = DECL_TI_TEMPLATE (value);
+  else
+   maybe_template = NULL_TREE;
+}
+  if (maybe_template)
+finish_member_declaration (maybe_template);
 
   return value;
 }
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 7ddb7f119a4..af1c3774f74 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -24179,8 +24179,17 @@ cp_parser_init_declarator (cp_parser* parser,
 here we only handle the latter two.  */
  bool has_lambda_scope = false;
 
+ if (member_p && decl_specifiers->storage_class == sc_static)
+   {
+ gcc_checking_assert (!decl);
+ tree all_attrs = attr_chainon (attributes, prefix_attributes);
+ decl = start_initialized_static_member (declarator,
+ decl_specifiers,
+ all_attrs);
+   }
+
  if (decl != error_mark_node
- && !member_p
+ && (!member_p || decl)
  && (processing_template_decl || DECL_NAMESPACE_SCOPE_P (decl)))
has_lambda_scope = true;
 
@@ -24230,10 +24239,14 @@ cp_parser_init_declarator (cp_parser* parser,
  pop_scope (pushed_scope);
  pushed_scope = NULL_TREE;
}
-  decl = grokfield (declarator, decl_specifiers,
-   initializer, !is_non_constant_init,
-   /*asmspec=*/NULL_TREE,
-   attr_chainon (attributes, prefix_attributes));
+  if (decl)
+   finish_initialized_static_member (decl, initializer,
+ /*asmspec=*/NULL_TREE);
+  else
+   decl = grokfield (declarator, decl_specifiers,
+ initializer, !is_non_constant_init,
+ /*asmspec=*/NULL_TREE,
+ attr_chainon (attributes, prefix_attributes));
   if (decl && TREE_CODE (decl) == FUNCTION_DECL)
cp_parser_save_default_args (parser, decl);
   cp_finalize_omp_declare_simd (parser, decl);
@@ -33739,7 +33752,12 @@ cp_parser_template_declaration_after_parameters 
(cp_parser* parser,
 }
 
   /* Register member declarations.  */
-  if (member_p && !friend_p && decl && !DECL_CLASS_TEMPLATE_P (decl))
+  if (member_p && !friend_p && decl && !DECL_CLASS_TEMPLATE_P (decl)
+  /* But this is not needed for initialised static members, that were
+registered early to be able to be used in their own definition.  */
+  && !(variable_template_p (decl)
+  && DECL_CLASS_SCOPE_P (decl)
+  && DECL_INITIALIZED_IN_CLASS_P (DECL_TEMPLATE_RESULT (decl
 finish_me

[PATCH 2/3] c++: Clear lambda scope for unattached member template lambdas

2025-01-31 Thread Nathaniel Shead

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

-- >8 --

In r15-7202 we made lambdas between a template parameter scope and a
class/function/initializer be considered TU-local, in lieu of working
out how to mangle them to the succeeding declaration.

I neglected to clear any existing mangling on the template declaration
however; this means that such lambdas can occasionally get a lambda
scope, and will in general inherit the lambda scope of their
instantiation context (whatever that might be).

This patch ensures that the scope is cleared on the template declaration
as well.

gcc/cp/ChangeLog:

* lambda.cc (record_lambda_scope): Clear mangling scope for
otherwise unattached lambdas in class member templates.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/lambda-uneval22.C: Add check that the primary
specialisation of the lambda is TU-local.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/lambda.cc | 11 +++
 gcc/testsuite/g++.dg/cpp2a/lambda-uneval22.C |  3 ++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc
index 5593636eaf8..73cf816b6e1 100644
--- a/gcc/cp/lambda.cc
+++ b/gcc/cp/lambda.cc
@@ -1575,6 +1575,17 @@ record_lambda_scope (tree lambda)
}
 }
 
+  /* An otherwise unattached class-scope lambda in a member template
+ should not have a mangling scope, as the mangling scope will not
+ correctly inherit on instantiation.  */
+  tree ctx = TYPE_CONTEXT (closure);
+  if (scope
+  && ctx
+  && CLASS_TYPE_P (ctx)
+  && ctx == TREE_TYPE (scope)
+  && current_template_depth > template_class_depth (ctx))
+scope = NULL_TREE;
+
   LAMBDA_EXPR_EXTRA_SCOPE (lambda) = scope;
   if (scope)
 maybe_key_decl (scope, TYPE_NAME (closure));
diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-uneval22.C 
b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval22.C
index 9c0e8128f10..1a25a0255fc 100644
--- a/gcc/testsuite/g++.dg/cpp2a/lambda-uneval22.C
+++ b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval22.C
@@ -5,7 +5,7 @@ struct S {
   using T = decltype([]{ return I; });
 
   template 
-  decltype([]{ return I; }) f() { return {}; }
+  decltype([]{ return I; }) f();  // { dg-error "declared using local type" }
 };
 
 void a(S::T<0>*);  // { dg-error "declared using local type" }
@@ -18,4 +18,5 @@ int main() {
   b(nullptr);
   c(nullptr);
   d(nullptr);
+  S{}.f<2>()();
 }
-- 
2.47.0

Re: [PATCH v2] x86: Handle -mindirect-branch-register for -fno-plt

2025-01-31 Thread Uros Bizjak

On Fri, Jan 31, 2025 at 2:54 PM Uros Bizjak  wrote:
>
> On Fri, Jan 31, 2025 at 2:36 PM H.J. Lu  wrote:
> >
> > -fno-plt forces external call to indirect call via GOT memory.  But
> > -mindirect-branch-register requires indirect call and jump via register.
> > For -mindirect-branch-register, expanding indirect call via register and
> > update call patterns and peepholes to disable indirect call via memory.
> >
> > gcc/
> >
> > PR target/118713
> > * config/i386/i386-expand.cc (ix86_expand_call): Force indirect
> > call via register for -mindirect-branch-register.
> > * config/i386/i386.md (*call): Disable indirect call via memory
> > for -mindirect-branch-register.
> > (*call_got_x32): Likewise.
> > (*sibcall_GOT_32): Likewise.
> > (*sibcall): Likewise.
> > (*sibcall_memory): Likewise.
> > (*call_pop): Likewise.
> > (*sibcall_pop): Likewise.
> > (*sibcall_pop_memory): Likewise.
> > (*call_value): Likewise.
> > (*call_value_got_x32): Likewise.
> > (*sibcall_value_GOT_32): Likewise.
> > (*sibcall_value): Likewise.
> > (*sibcall_value_memory): Likewise.
> > (*call_value_pop): Likewise.
> > (*sibcall_value_pop): Likewise.
> > (*sibcall_value_pop_memory): Likewise.
> >
> > gcc/testsuite/
> >
> > PR target/118713
> > * gcc.target/i386/pr118713-1-x32.c: New test.
> > * gcc.target/i386/pr118713-1.c: Likewise.
> > * gcc.target/i386/pr118713-2-x32.c: Likewise.
> > * gcc.target/i386/pr118713-2.c: Likewise.
> > * gcc.target/i386/pr118713-3-x32.c: Likewise.
> > * gcc.target/i386/pr118713-3.c: Likewise.
> > * gcc.target/i386/pr118713-4-x32.c: Likewise.
> > * gcc.target/i386/pr118713-4.c: Likewise.
> > * gcc.target/i386/pr118713-5-x32.c: Likewise.
> > * gcc.target/i386/pr118713-5.c: Likewise.
> > * gcc.target/i386/pr118713-6-x32.c: Likewise.
> > * gcc.target/i386/pr118713-6.c: Likewise.
> > * gcc.target/i386/pr118713-7-x32.c: Likewise.
> > * gcc.target/i386/pr118713-7.c: Likewise.
> > * gcc.target/i386/pr118713-8-x32.c: Likewise.
> > * gcc.target/i386/pr118713-8.c: Likewise.
> > * gcc.target/i386/pr118713-9-x32.c: Likewise.
> > * gcc.target/i386/pr118713-9.c: Likewise.
> > * gcc.target/i386/pr118713-10-x32.c: Likewise.
> > * gcc.target/i386/pr118713-10.c: Likewise.
> > * gcc.target/i386/pr118713-11-x32.c: Likewise.
> > * gcc.target/i386/pr118713-11.c: Likewise.
> > * gcc.target/i386/pr118713-12-x32.c: Likewise.
> > * gcc.target/i386/pr118713-12.c: Likewise.
> >
> > Co-Authored-By: Uros Bizjak 
> > Signed-off-by: H.J. Lu 
> > ---
> >  gcc/config/i386/i386-expand.cc| 20 ++--
> >  gcc/config/i386/i386.md   | 98 +--
> >  .../gcc.target/i386/pr118713-1-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-1.c| 14 +++
> >  .../gcc.target/i386/pr118713-10-x32.c |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-10.c   | 15 +++
> >  .../gcc.target/i386/pr118713-11-x32.c |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-11.c   | 14 +++
> >  .../gcc.target/i386/pr118713-12-x32.c |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-12.c   | 14 +++
> >  .../gcc.target/i386/pr118713-2-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-2.c| 15 +++
> >  .../gcc.target/i386/pr118713-3-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-3.c| 14 +++
> >  .../gcc.target/i386/pr118713-4-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-4.c| 14 +++
> >  .../gcc.target/i386/pr118713-5-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-5.c| 13 +++
> >  .../gcc.target/i386/pr118713-6-x32.c  | 15 +++
> >  gcc/testsuite/gcc.target/i386/pr118713-6.c| 14 +++
> >  .../gcc.target/i386/pr118713-7-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-7.c| 13 +++
> >  .../gcc.target/i386/pr118713-8-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-8.c| 13 +++
> >  .../gcc.target/i386/pr118713-9-x32.c  |  8 ++
> >  gcc/testsuite/gcc.target/i386/pr118713-9.c| 14 +++
> >  26 files changed, 353 insertions(+), 35 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1-x32.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10-x32.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11-x32.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12-x32.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12.c

[PATCH] aarch64: Fix dupq_* testsuite failures

2025-01-31 Thread Richard Sandiford

This patch fixes the dupq_* testsuite failures.  The tests were
introduced with r15-3669-ga92f54f580c3 (which was a nice improvement)
and Pengxuan originally had a follow-on patch to recognise INDEX
constants during vec_init.

I'd originally wanted to solve this a different way, using wildcards
when building a vector and letting vector_builder::finalize find the
best way of filling them in.  I no longer think that's the best
approach though.  Stepped constants are likely to be more expensive
than unstepped constants, so we should first try finding an unstepped
constant that is valid, even if it has a longer representation than
the stepped version.

This patch therefore uses a variant of Pengxuan's idea.

While there, I noticed that the (old) code for finding an unstepped
constant only tried varying one bit at a time.  So for index 0 in a
16-element constant, the code would try picking a constant from index 8,
4, 2, and then 1.  But since the goal is to create "fewer, larger,
repeating parts", it would be better to iterate over a bit-reversed
increment, so that after trying an XOR with 0 and 8, we try adding 4
to each previous attempt, then 2 to each previous attempt, and so on.
In the previous example this would give 8, 4, 12, 2, 10, 6, 14, ...

The test shows an example of this for 8 shorts.

I'll push this on Monday evening UTC if there are no comments
before then.

Richard

gcc/
* config/aarch64/aarch64.cc (aarch64_choose_vector_init_constant):
New function, split out from...
(aarch64_expand_vector_init_fallback): ...here.  Use a bit-
reversed increment to find a constant index.  Add support for
stepped constants.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/dupq_12.c: New test.
---
 gcc/config/aarch64/aarch64.cc | 110 +-
 .../aarch64/sve/acle/general/dupq_12.c|  13 +++
 2 files changed, 95 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_12.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index be99137b052..16754fa9e7b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24036,6 +24036,85 @@ aarch64_simd_make_constant (rtx vals)
 return NULL_RTX;
 }
 
+/* VALS is a PARALLEL rtx that contains element values for a vector of
+   mode MODE.  Return a constant that contains all the CONST_INT and
+   CONST_DOUBLE elements of VALS, using any convenient values for the
+   other elements.  */
+
+static rtx
+aarch64_choose_vector_init_constant (machine_mode mode, rtx vals)
+{
+  unsigned int n_elts = XVECLEN (vals, 0);
+
+  /* We really don't care what goes into the parts we will overwrite, but we're
+ more likely to be able to load the constant efficiently if it has fewer,
+ larger, repeating parts (see aarch64_simd_valid_imm).  */
+  rtvec copy = shallow_copy_rtvec (XVEC (vals, 0));
+  for (unsigned int i = 0; i < n_elts; ++i)
+{
+  rtx x = RTVEC_ELT (copy, i);
+  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
+   continue;
+  /* This is effectively a bit-reversed increment, e.g.: 8, 4, 12,
+2, 10, 6, 12, ... for n_elts == 16.  The early break makes the
+outer "i" loop O(n_elts * log(n_elts)).  */
+  unsigned int j = 0;
+  for (;;)
+   {
+ for (unsigned int bit = n_elts / 2; bit > 0; bit /= 2)
+   {
+ j ^= bit;
+ if (j & bit)
+   break;
+   }
+ rtx test = XVECEXP (vals, 0, i ^ j);
+ if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
+   {
+ RTVEC_ELT (copy, i) = test;
+ break;
+   }
+ gcc_assert (j != 0);
+   }
+}
+
+  rtx c = gen_rtx_CONST_VECTOR (mode, copy);
+  if (aarch64_simd_valid_mov_imm (c))
+return c;
+
+  /* Try generating a stepped sequence.  */
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+for (unsigned int i = 0; i < n_elts; ++i)
+  if (CONST_INT_P (XVECEXP (vals, 0, i)))
+   {
+ auto base = UINTVAL (XVECEXP (vals, 0, i));
+ for (unsigned int j = i + 1; j < n_elts; ++j)
+   if (CONST_INT_P (XVECEXP (vals, 0, j)))
+ {
+   /* It doesn't matter whether this division is exact.
+  All that matters is whether the constant we produce
+  is valid.  */
+   HOST_WIDE_INT diff = UINTVAL (XVECEXP (vals, 0, j)) - base;
+   unsigned HOST_WIDE_INT step = diff / int (j - i);
+   rtx_vector_builder builder (mode, n_elts, 1);
+   for (unsigned int k = 0; k < n_elts; ++k)
+ {
+   rtx x = XVECEXP (vals, 0, k);
+   if (!CONST_INT_P (x))
+ x = gen_int_mode (int (k - i) * step + base,
+   GET_MODE_INNER (mode));
+   builder.quick_push (x);
+ }
+

[PATCH v11] c++: Fix overeager Woverloaded-virtual with conversion operators [PR109918]

2025-01-31 Thread Simon Martin

Hi Jason,

On 27 Jan 2025, at 16:49, Jason Merrill wrote:

> On 1/27/25 10:41 AM, Simon Martin wrote:
>> Hi Jason,
>>
>> On 17 Jan 2025, at 23:33, Jason Merrill wrote:
>>
>>> On 1/17/25 9:52 AM, Simon Martin wrote:
 Hi Jason,

 On 16 Jan 2025, at 22:49, Jason Merrill wrote:

> On 10/16/24 11:43 AM, Simon Martin wrote:
>> As you know the patch had to be reverted due to PR117114, that
>> highlighted a bunch of issues with comparing DECL_VINDEXes: it
>> might
>> give false positives in case of multiple inheritance (the case in

>> PR117114), but also if there’s single inheritance by the
>>
>> hierarchy
>> has
>> more than two levels (another issue I found while bootstrapping
>> with
>> rust enabled).
>
> Yes, relying on DECL_VINDEX equality was wrong, sorry to mislead
> you.
>
>> The attached updated patch introduces an overrides_p function,
>> based
>> on
>> the existing check_final_overrider, and uses it when the 
>> signatures
>>
>> match.
>
> That seems unnecessary.  It seems like removing that only breaks
> Woverloaded-virt11.C, and making that work again only requires
> bringing back the check that DECL_VINDEX (fndecl) is set (to any
> value).  Or remembering that fndecl was a template, so it can't
> really
> have the same signature as a non-template, whatever 
> same_signature_p
>>
> says.
 That’s right, only Woverloaded-virt11.C fails without the
 check_final_overrider call.

 Thanks for the suggestion to check whether fndecl is a template. 

 This
 is
 what the updated attached patch does, successfully tested on
 x86_64-pc-linux-gnu.

 OK for GCC 15? And if so, thoughts on backporting to release 
 branches
 (technically it’s a regression but it’s “just” an incorrect
 warning fix, so probably not worth the risk)?
>>>
>>> Right, I wouldn't backport.
>>>
 +  if (warn_overloaded_virtual == 1
 +  && overrider_fndecls.elements () == num_fns)
 +/* All the fns override a base virtual.  */
 +continue;
>>>
>>> This looks like the only use of the overrider_fndecls hash_set.  A
>>> hash_set seems a bit overkill for checking whether everything in fns

>>> is an overrider; keeping track of how many times the old 
>>> any_override
>>> was set should work just as well?
>> Yeah you’re right :-/ I’ve changed my latest patch to simply 
>> count
>> overriders.
>>
 +  /* fndecls hides base_fndecls[k].  */
 +  auto_vec &hiders =
 +hidden_base_fndecls.get_or_insert (base_fndecls[k]);
 +  if (!hiders.contains (fndecl))
 +hiders.safe_push (fndecl);
>>>
>>> Hmm, do you think users want a full list of the overloads that don't

>>> override?  I'd think the problem is more the overload that doesn't

>>> exist rather than the ones that do.  The current code ends up in the

>>> OVERLOAD handling of dump_decl that just prints scope::name.
>> Indeed, the full list is probably not super useful... One problem 
>> with
>> the current code is that for conversion operators, it will give a 
>> note
>> such as “note:   by 'operator’”, so I propose to keep track of 
>> at
>> least one of the hiders, and use it to show the note (and get a 
>> proper
>> “by 'virtual B::operator char()'” note for conversion operators).
>>
>> Hence the updated patch, successfully tested on x86_64-pc-linux-gnu. 
>> Ok
>> for trunk?
>
>> +else if (!template_p /* Template methods don't override.  */
>> + && same_signature_p (fndecl, base_fndecls[k]))
>> +  {
>> +overriden_base_fndecls.add (base_fndecls[k]);
>> +++num_overriders;
>> +  }
>
> I'm concerned that this will increment num_overriders multiple times 
> for a single fndecl if it overrides functions in multiple bases.
Such a case is covered by the new Woverloaded-virt11.C and does not 
warn, but it’s true that we don’t take the “if 
(warn_overloaded_virtual == 1 && num_overriders == num_fns)” continue, 
and we should - thanks.

I have updated the patch to only increment num_overriders at the end of 
the loop iterating on base functions if we’ve seen at least one 
overridden base function. Successfully tested on x86_64-pc-linux-gnu. OK 
for trunk?

Thanks, SimonFrom 995bd213d82d0428299bf185ffa5f13536cc523a Mon Sep 17 00:00:00 2001
From: Simon Martin 
Date: Fri, 31 Jan 2025 14:53:01 +0100
Subject: [PATCH] c++: Fix overeager Woverloaded-virtual with conversion
 operators [PR109918]

We currently emit an incorrect -Woverloaded-virtual warning upon the
following test case

=== cut here ===
struct A {
  virtual operator int() { return 42; }
  virtual operator char() = 0;
};
struct B : public A {
  operator char() { return 'A'; }
};
=== cut here ===

The problem is that when iterating over ovl_range (fns), warn_hidden
gets confus

[PATCH 0/61] Improve Mips target

2025-01-31 Thread Aleksandar Rakic

This patch series improves the support for the mips64r6 target in GCC,
includes the enhancements to the general bug fixes and contains other
MIPS ISA and processor enablement.

These patches are cherry-picked from the mips_rel/11_2_0/master
and mips_rel/9_3_0/master branches from the MIPS' repository:
https://github.com/MIPS/gcc .
Further details on the individual changes are included in the
respective patches.

Re: [PATCH] icf: Compare call argument types in certain cases and asm operands [PR117432]

2025-01-31 Thread Richard Biener

On Fri, 31 Jan 2025, Jakub Jelinek wrote:

> On Fri, Jan 31, 2025 at 01:38:36PM +0100, Richard Biener wrote:
> > > @@ -718,8 +720,11 @@ func_checker::compare_gimple_call (gcall
> > >  
> > >/* For direct calls we verify that types are compatible so if we 
> > > matched
> > >   callees, callers must match, too.  For indirect calls however verify
> > > - function type.  */
> > > -  if (!gimple_call_fndecl (s1))
> > > + function type.  And also verify it for direct calls with some 
> > > different
> > > + fntype.  */
> > > +  if (!gimple_call_fndecl (s1)
> > > +  || TREE_TYPE (TREE_TYPE (t1)) != fntype1
> > > +  || TREE_TYPE (TREE_TYPE (t2)) != fntype2)
> > 
> > I think we want to always compare the ABI relevant fntypes.  It seems
> > we can arrive here with internal function calls where t1/t2 are
> > "somthing" (NULL?).  I guess doing this as else {} of the
> 
> For internal calls gimple_call_fndecl (s1) will be NULL, so
> !gimple_call_fndecl (s1) will be true and so the new checks aren't done.

Yes, but also fntype1/2 will be NULL then.

> > if (gimple_call_internal_p (s1) (with gimple_call_internal_fn compare
> > in a conditiona if) would be a lot clearer?
> 
> What the patch does is just trying to avoid the comparison in the common
> case (direct calls from the beginning and there what the comment says
> applies, if there would be a mismatch, we'd already knew that).
> 
> If you want to compare unconditionally, it would be about just removing the
>   if (!gimple_call_fndecl (s1))
> {
> and
> }
> and reindenting + rewriting the comment above it.  Shall I do that?

That's what I suggested, or rather

  if (gimple_call_internal_p (s1))
{
  if (gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
 return false;
}
  else
{
  tree fntype1 = gimple_call_fntype (s1);
  tree fntype2 = gimple_call_fntype (s2);

  if ((fntype1 && !fntype2)
  || (!fntype1 && fntype2)
  || (fntype1 && !types_compatible_p (fntype1, fntype2)))
return return_false_with_msg ("call function types are not 
compatible");
}

I think in the else { fntype1 and fntype2 should never be NULL and thus
this should simplify even more.

Richard.

>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

[PATCH] icf, v2: Compare call argument types in certain cases and asm operands [PR117432]

2025-01-31 Thread Jakub Jelinek

On Fri, Jan 31, 2025 at 02:29:57PM +0100, Jakub Jelinek wrote:
> > }
> >   else
> > {
> >   tree fntype1 = gimple_call_fntype (s1);
> >   tree fntype2 = gimple_call_fntype (s2);
> > 
> >   if ((fntype1 && !fntype2)
> >   || (!fntype1 && fntype2)
> >   || (fntype1 && !types_compatible_p (fntype1, fntype2)))
> > return return_false_with_msg ("call function types are not 
> > compatible");
> > }
> > 
> > I think in the else { fntype1 and fntype2 should never be NULL and thus
> > this should simplify even more.
> 
> This isn't possible because fntype{1,2} are used later on in the function;
> sure, that
>   if (fntype1 && fntype2 && comp_type_attributes (fntype1, fntype2) != 1)
> return return_false_with_msg ("different fntype attributes");
> can be moved into the else, but the new checks to determine which args to
> check still use that.

So like this then (if it passes bootstrap/regtest)?

2025-01-31  Jakub Jelinek  

PR ipa/117432
* ipa-icf-gimple.cc (func_checker::compare_asm_inputs_outputs):
Also return_false if operands have incompatible types.
(func_checker::compare_gimple_call): Check fntype1 vs. fntype2
compatibility for all non-internal calls and assume fntype1 and
fntype2 are non-NULL for those.  For calls to non-prototyped
calls or for stdarg_p functions after the last named argument (if any)
check type compatibility of call arguments.

* gcc.c-torture/execute/pr117432.c: New test.
* gcc.target/i386/pr117432.c: New test.

--- gcc/ipa-icf-gimple.cc.jj2025-01-30 18:29:22.237190471 +0100
+++ gcc/ipa-icf-gimple.cc   2025-01-31 15:25:57.168535197 +0100
@@ -459,7 +459,9 @@ func_checker::compare_asm_inputs_outputs
return false;
 
   if (!compare_operand (TREE_VALUE (t1), TREE_VALUE (t2),
-   get_operand_access_type (map, t1)))
+   get_operand_access_type (map, t1))
+ || !types_compatible_p (TREE_TYPE (TREE_VALUE (t1)),
+ TREE_TYPE (TREE_VALUE (t2
return return_false ();
 
   tree p1 = TREE_PURPOSE (t1);
@@ -709,26 +711,37 @@ func_checker::compare_gimple_call (gcall
   || gimple_call_alloca_for_var_p (s1) != gimple_call_alloca_for_var_p 
(s2))
 return false;
 
-  if (gimple_call_internal_p (s1)
-  && gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
-return false;
-
-  tree fntype1 = gimple_call_fntype (s1);
-  tree fntype2 = gimple_call_fntype (s2);
-
-  /* For direct calls we verify that types are compatible so if we matched
- callees, callers must match, too.  For indirect calls however verify
- function type.  */
-  if (!gimple_call_fndecl (s1))
+  unsigned check_arg_types_from = 0;
+  if (gimple_call_internal_p (s1))
 {
-  if ((fntype1 && !fntype2)
- || (!fntype1 && fntype2)
- || (fntype1 && !types_compatible_p (fntype1, fntype2)))
-   return return_false_with_msg ("call function types are not compatible");
+  if (gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
+   return false;
 }
+  else
+{
+  tree fntype1 = gimple_call_fntype (s1);
+  tree fntype2 = gimple_call_fntype (s2);
+  if (!types_compatible_p (fntype1, fntype2))
+   return return_false_with_msg ("call function types are not compatible");
+
+  if (comp_type_attributes (fntype1, fntype2) != 1)
+   return return_false_with_msg ("different fntype attributes");
 
-  if (fntype1 && fntype2 && comp_type_attributes (fntype1, fntype2) != 1)
-return return_false_with_msg ("different fntype attributes");
+  check_arg_types_from = gimple_call_num_args (s1);
+  if (!prototype_p (fntype1) || !prototype_p (fntype2))
+   check_arg_types_from = 0;
+  else if (stdarg_p (fntype1))
+   {
+ check_arg_types_from = list_length (TYPE_ARG_TYPES (fntype1));
+ if (stdarg_p (fntype2))
+   {
+ unsigned n = list_length (TYPE_ARG_TYPES (fntype2));
+ check_arg_types_from = MIN (check_arg_types_from, n);
+   }
+   }
+  else if (stdarg_p (fntype2))
+   check_arg_types_from = list_length (TYPE_ARG_TYPES (fntype2));
+}
 
   tree chain1 = gimple_call_chain (s1);
   tree chain2 = gimple_call_chain (s2);
@@ -746,6 +759,10 @@ func_checker::compare_gimple_call (gcall
 
   if (!compare_operand (t1, t2, get_operand_access_type (&map, t1)))
return return_false_with_msg ("GIMPLE call operands are different");
+  if (i >= check_arg_types_from
+ && !types_compatible_p (TREE_TYPE (t1), TREE_TYPE (t2)))
+   return return_false_with_msg ("GIMPLE call operand types are "
+ "different");
 }
 
   /* Return value checking.  */
--- gcc/testsuite/gcc.c-torture/execute/pr117432.c.jj   2025-01-31 
15:14:54.358852495 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr117432.

Re: [PATCH] libstdc++: Use canonical loop form in std::reduce

2025-01-31 Thread Abhishek Kaushik

Sorry for the confusion, the change is for the intel compiler which is not able 
to vectorize correctly the while loop.
I'll change the commit message to show this clearly.

But it looks like the change still might be beneficial to g++: 
https://godbolt.org/z/Mo3PdxbaY



From: Jonathan Wakely 
Sent: Friday, January 31, 2025 7:19 PM
To: Richard Biener 
Cc: Abhishek Kaushik ; libstd...@gcc.gnu.org 
; gcc-patches@gcc.gnu.org 
Subject: Re: [PATCH] libstdc++: Use canonical loop form in std::reduce

On Fri, 31 Jan 2025 at 12:48, Richard Biener  wrote:
>
> On Fri, Jan 31, 2025 at 12:01 PM Abhishek Kaushik
>  wrote:
> >
> > From 4ac7c7e56e23ed2f4dd2dafdfab6cfa110c14260 Mon Sep 17 00:00:00 2001
> > From: Abhishek Kaushik 
> > Date: Fri, 31 Jan 2025 01:28:48 -0800
> > Subject: [PATCH] libstdc++: Use canonical loop form in std::reduce
> >
> > The current while loop in std::reduce and related functions is hard to
> > vectorize because the loop control variable is hard to detect.
> >
> > `while ((__last - __first) >= 4)`
> >
> > Changing the loop header to a for loop following the OpenMP canonical
> > form allows easy vectorization, resulting in improved performance.
> >
> > `for (; __first <= __last - 4; __first += 4)`
> >
> > This patch modifies the loop header for std::reduce & std::transform_reduce.
>
> Can you add a testcase to g++.dg/vect/ that is now vectorized but not before?

According to https://gcc.gnu.org/pipermail/libstdc++/2025-January/060353.html
this is only a problem for the Intel compiler, not for GCC. So a GCC
testcase doesn't help.

But if it's only for Intel, then the commit msg should say that.


>
> Thanks,
> Richard.
>
> > ---
> >  libstdc++-v3/include/std/numeric | 10 +++---
> >  1 file changed, 3 insertions(+), 7 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/std/numeric 
> > b/libstdc++-v3/include/std/numeric
> > index 4d36fcd36d9..9c38ad89e21 100644
> > --- a/libstdc++-v3/include/std/numeric
> > +++ b/libstdc++-v3/include/std/numeric
> > @@ -300,13 +300,12 @@ namespace __detail
> >static_assert(is_invocable_r_v<_Tp, _BinaryOperation&, __ref, 
> > __ref>);
> >if constexpr (__is_random_access_iter<_InputIterator>::value)
> > {
> > - while ((__last - __first) >= 4)
> > + for (; __first <= __last - 4; __first += 4)
> > {
> >   _Tp __v1 = __binary_op(__first[0], __first[1]);
> >   _Tp __v2 = __binary_op(__first[2], __first[3]);
> >   _Tp __v3 = __binary_op(__v1, __v2);
> >   __init = __binary_op(__init, __v3);
> > - __first += 4;
> > }
> > }
> >for (; __first != __last; ++__first)
> > @@ -381,7 +380,7 @@ namespace __detail
> >if constexpr (__and_v<__is_random_access_iter<_InputIterator1>,
> > __is_random_access_iter<_InputIterator2>>)
> > {
> > - while ((__last1 - __first1) >= 4)
> > + for (; __first1 <= __last1 - 4; __first1 += 4, __first2 += 4)
> > {
> >   _Tp __v1 = __binary_op1(__binary_op2(__first1[0], __first2[0]),
> >   __binary_op2(__first1[1], __first2[1]));
> > @@ -389,8 +388,6 @@ namespace __detail
> >   __binary_op2(__first1[3], __first2[3]));
> >   _Tp __v3 = __binary_op1(__v1, __v2);
> >   __init = __binary_op1(__init, __v3);
> > - __first1 += 4;
> > - __first2 += 4;
> > }
> > }
> >for (; __first1 != __last1; ++__first1, (void) ++__first2)
> > @@ -447,7 +444,7 @@ namespace __detail
> >  {
> >if constexpr (__is_random_access_iter<_InputIterator>::value)
> > {
> > - while ((__last - __first) >= 4)
> > + for (; __first <= __last - 4; __first += 4)
> > {
> >   _Tp __v1 = __binary_op(__unary_op(__first[0]),
> >  __unary_op(__first[1]));
> > @@ -455,7 +452,6 @@ namespace __detail
> >  __unary_op(__first[3]));
> >   _Tp __v3 = __binary_op(__v1, __v2);
> >   __init = __binary_op(__init, __v3);
> > - __first += 4;
> > }
> > }
> >for (; __first != __last; ++__first)
> > --
> > 2.31.1
> >
> >
> >
> >
>

[PATCH] libstdc++: Use canonical loop form in std::reduce

2025-01-31 Thread Abhishek Kaushik

>From 7a7c9a2a976fbb29f67c46284e7c1581cbe8cb07 Mon Sep 17 00:00:00 2001
From: Abhishek Kaushik 
Date: Fri, 31 Jan 2025 01:28:48 -0800
Subject: [PATCH] libstdc++: Use canonical loop form in std::reduce

This change is for the INTEL C compiler (icx).

The current while loop in std::reduce and related functions is hard to
vectorize because the loop control variable is hard to detect in icx.

`while ((__last - __first) >= 4)`

Changing the loop header to a for loop following the OpenMP canonical
form allows easy vectorization, resulting in improved performance.

`for (; __first <= __last - 4; __first += 4)`

This patch modifies the loop header for std::reduce & std::transform_reduce.
---
 libstdc++-v3/include/std/numeric | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric
index 4d36fcd36d9..9c38ad89e21 100644
--- a/libstdc++-v3/include/std/numeric
+++ b/libstdc++-v3/include/std/numeric
@@ -300,13 +300,12 @@ namespace __detail
   static_assert(is_invocable_r_v<_Tp, _BinaryOperation&, __ref, __ref>);
   if constexpr (__is_random_access_iter<_InputIterator>::value)
{
- while ((__last - __first) >= 4)
+ for (; __first <= __last - 4; __first += 4)
{
  _Tp __v1 = __binary_op(__first[0], __first[1]);
  _Tp __v2 = __binary_op(__first[2], __first[3]);
  _Tp __v3 = __binary_op(__v1, __v2);
  __init = __binary_op(__init, __v3);
- __first += 4;
}
}
   for (; __first != __last; ++__first)
@@ -381,7 +380,7 @@ namespace __detail
   if constexpr (__and_v<__is_random_access_iter<_InputIterator1>,
__is_random_access_iter<_InputIterator2>>)
{
- while ((__last1 - __first1) >= 4)
+ for (; __first1 <= __last1 - 4; __first1 += 4, __first2 += 4)
{
  _Tp __v1 = __binary_op1(__binary_op2(__first1[0], __first2[0]),
  __binary_op2(__first1[1], __first2[1]));
@@ -389,8 +388,6 @@ namespace __detail
  __binary_op2(__first1[3], __first2[3]));
  _Tp __v3 = __binary_op1(__v1, __v2);
  __init = __binary_op1(__init, __v3);
- __first1 += 4;
- __first2 += 4;
}
}
   for (; __first1 != __last1; ++__first1, (void) ++__first2)
@@ -447,7 +444,7 @@ namespace __detail
 {
   if constexpr (__is_random_access_iter<_InputIterator>::value)
{
- while ((__last - __first) >= 4)
+ for (; __first <= __last - 4; __first += 4)
{
  _Tp __v1 = __binary_op(__unary_op(__first[0]),
 __unary_op(__first[1]));
@@ -455,7 +452,6 @@ namespace __detail
 __unary_op(__first[3]));
  _Tp __v3 = __binary_op(__v1, __v2);
  __init = __binary_op(__init, __v3);
- __first += 4;
}
}
   for (; __first != __last; ++__first)
--
2.31.1





From: Abhishek Kaushik
Sent: Friday, January 31, 2025 4:28 PM
To: libstd...@gcc.gnu.org 
Cc: gcc-patches@gcc.gnu.org 
Subject: [PATCH] libstdc++: Use canonical loop form in std::reduce

>From 4ac7c7e56e23ed2f4dd2dafdfab6cfa110c14260 Mon Sep 17 00:00:00 2001
From: Abhishek Kaushik 
Date: Fri, 31 Jan 2025 01:28:48 -0800
Subject: [PATCH] libstdc++: Use canonical loop form in std::reduce

The current while loop in std::reduce and related functions is hard to
vectorize because the loop control variable is hard to detect.

`while ((__last - __first) >= 4)`

Changing the loop header to a for loop following the OpenMP canonical
form allows easy vectorization, resulting in improved performance.

`for (; __first <= __last - 4; __first += 4)`

This patch modifies the loop header for std::reduce & std::transform_reduce.
---
 libstdc++-v3/include/std/numeric | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric
index 4d36fcd36d9..9c38ad89e21 100644
--- a/libstdc++-v3/include/std/numeric
+++ b/libstdc++-v3/include/std/numeric
@@ -300,13 +300,12 @@ namespace __detail
   static_assert(is_invocable_r_v<_Tp, _BinaryOperation&, __ref, __ref>);
   if constexpr (__is_random_access_iter<_InputIterator>::value)
{
- while ((__last - __first) >= 4)
+ for (; __first <= __last - 4; __first += 4)
{
  _Tp __v1 = __binary_op(__first[0], __first[1]);
  _Tp __v2 = __binary_op(__first[2], __first[3]);
  _Tp __v3 = __binary_op(__v1, __v2);
  __init = __binary_op(__init, __v3);
- __first += 4;
}
}
   for (; __first != __last; ++__first)
@@ -381,7 +380,7 @@ namespace __detail
   if constexpr (__and_v<__is_random_access_iter<_InputIterator1>,
__is_random_access_iter<_InputIterator2>>)
{
- while ((__last1 - __first1) >= 4)
+ for (; __first1 <= __last1 - 4; __first1 += 4, __first2 += 4)
{
  _Tp __v1 = __binary_op1(__bin

Re: [PATCH] c++: auto in trailing-return-type in parameter [PR117778]

2025-01-31 Thread Jason Merrill


On 1/30/25 5:24 PM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/14?

-- >8 --
This PR describes a few issues, both ICE and rejects-valid, but
ultimately the problem is that we don't properly synthesize the
second auto in:

   int
   g (auto fp() -> auto)
   {
 return fp ();
   }

since r12-5860, which disabled auto_is_implicit_function_template_parm_p
in cp_parser_parameter_declaration after parsing the decl-specifier-seq.

If there is no trailing auto, there is no problem.

So we have to make sure auto_is_implicit_function_template_parm_p is
properly set when parsing the trailing auto.  A complication is that
one can write:

   auto f (auto fp(auto fp2() -> auto) -> auto) -> auto;
   ~~~

where only the underlined auto should be synthesized.  So when we
parse a parameter-declaration-clause inside another
parameter-declaration-clause, we should not enable the flag.  We
have no flags to keep track of such nesting, but I think I can walk
current_binding_level to see if we find ourselves in such an unlikely
scenario.

PR c++/117778

gcc/cp/ChangeLog:

* parser.cc (cp_parser_late_return_type_opt): Maybe override
auto_is_implicit_function_template_parm_p.
(cp_parser_parameter_declaration): Update commentary.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/lambda-generic-117778.C: New test.
* g++.dg/cpp2a/abbrev-fn2.C: New test.
* g++.dg/cpp2a/abbrev-fn3.C: New test.
---
  gcc/cp/parser.cc  | 24 -
  .../g++.dg/cpp1y/lambda-generic-117778.C  | 12 +
  gcc/testsuite/g++.dg/cpp2a/abbrev-fn2.C   | 49 +++
  gcc/testsuite/g++.dg/cpp2a/abbrev-fn3.C   |  7 +++
  4 files changed, 90 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1y/lambda-generic-117778.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/abbrev-fn2.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/abbrev-fn3.C

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 44515bb9074..89c5c2721a7 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -25514,6 +25514,25 @@ cp_parser_late_return_type_opt (cp_parser *parser, 
cp_declarator *declarator,
/* Consume the ->.  */
cp_lexer_consume_token (parser->lexer);
  
+  /* We may be in the context of parsing a parameter declaration,

+namely, its declarator.  auto_is_implicit_function_template_parm_p
+will be disabled in that case.  But for code like
+
+  int g (auto fp() -> auto);
+
+we have to re-enable the flag for the trailing auto.  However, that
+only applies for the outermost trailing auto in a parameter clause; in
+
+  int f2 (auto fp(auto fp2() -> auto) -> auto);
+
+the inner -> auto should not be synthesized.  */
+  int i = 0;
+  for (cp_binding_level *b = current_binding_level;
+  b->kind == sk_function_parms; b = b->level_chain)
+   ++i;
+  auto cleanup = make_temp_override
+   (parser->auto_is_implicit_function_template_parm_p, i == 2);


This looks like it will wrongly allow declaring an implicit template 
within a function; you need a testcase with local extern declarations.


Incidentally, it seems odd that the override in 
cp_parser_parameter_declaration is before an error early exit a few 
lines below, moving it after that would avoid needing to clean it up on 
that path.



type = cp_parser_trailing_type_id (parser);
  }
  
@@ -26283,8 +26302,9 @@ cp_parser_parameter_declaration (cp_parser *parser,

   type-constraint opt auto can be used as a decl-specifier of the
   decl-specifier-seq of a parameter-declaration of a function declaration
   or lambda-expression..." but we must not synthesize an implicit template
- type parameter in its declarator.  That is, in "void f(auto[auto{10}]);"
- we want to synthesize only the first auto.  */
+ type parameter in its declarator (except the trailing-return-type).
+ That is, in "void f(auto[auto{10}]);" we want to synthesize only the
+ first auto.  */
auto cleanup = make_temp_override
  (parser->auto_is_implicit_function_template_parm_p, false);
  
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-117778.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-117778.C

new file mode 100644
index 000..f377e3acc91
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-117778.C
@@ -0,0 +1,12 @@
+// PR c++/117778
+// { dg-do compile { target c++14 } }
+
+auto l1 = [](auto (*fp)() -> auto) { return fp; };
+auto l2 = [](auto fp() -> auto) { return fp; };
+auto l3 = [](auto fp()) { return fp; };
+auto l4 = [](auto (*fp)()) { return fp; };
+auto l5 = [](auto fp() -> auto) -> auto { return fp; };
+auto l6 = [](auto fp(auto fp2()) -> auto) -> auto { return fp; }; // { dg-error 
".auto. parameter not permitted" }
+auto l7 = [](auto fp(auto fp2() -> auto) -> auto) -> auto { return f

Re: [PATCH v2] c++: Don't merge friend declarations that specify default arguments [PR118319]

2025-01-31 Thread Simon Martin

Hi Jason,

On 31 Jan 2025, at 16:29, Jason Merrill wrote:

> On 1/31/25 9:52 AM, Simon Martin wrote:
>> Hi Jason,
>>
>> On 9 Jan 2025, at 22:55, Jason Merrill wrote:
>>
>>> On 1/9/25 8:25 AM, Simon Martin wrote:
 We segfault upon the following invalid code

 === cut here ===
 template  struct S {
 friend void foo (int a = []{}());
 };
 void foo (int a) {}
 int main () {
 S<0> t;
 foo ();
 }
 === cut here ===

 The problem is that we end up with a LAMBDA_EXPR callee in
 set_flags_from_callee, and dereference its NULL_TREE
 TREE_TYPE (TREE_TYPE ( )).

 This patch simply sets the default argument to error_mark_node for
 friend functions that do not meet the requirement in C++17 
 11.3.6/4.

 Successfully tested on x86_64-pc-linux-gnu.

PR c++/118319

 gcc/cp/ChangeLog:

* decl.cc (grokfndecl): Inspect all friend function parameters,
and set them to error_mark_node if invalid.

 gcc/testsuite/ChangeLog:

* g++.dg/parse/defarg18.C: New test.

 ---
gcc/cp/decl.cc| 13 +---
gcc/testsuite/g++.dg/parse/defarg18.C | 48
 +++
2 files changed, 57 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/parse/defarg18.C

 diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
 index 503ecd9387e..b2761c23d3e 100644
 --- a/gcc/cp/decl.cc
 +++ b/gcc/cp/decl.cc
 @@ -11134,14 +11134,19 @@ grokfndecl (tree ctype,
 expression, that declaration shall be a definition..."  */
  if (friendp && !funcdef_flag)
{
 +  bool has_permerrored = false;
  for (tree t = FUNCTION_FIRST_USER_PARMTYPE (decl);
   t && t != void_list_node; t = TREE_CHAIN (t))
if (TREE_PURPOSE (t))
  {
 -  permerror (DECL_SOURCE_LOCATION (decl),
 - "friend declaration of %qD specifies default "
 - "arguments and isn%'t a definition", decl);
 -  break;
 +  if (!has_permerrored)
 +{
 +  has_permerrored = true;
 +  permerror (DECL_SOURCE_LOCATION (decl),
 + "friend declaration of %qD specifies default "
 + "arguments and isn%'t a definition", decl);
 +}
 +  TREE_PURPOSE (t) = error_mark_node;
>>>
>>> If we're going to unconditionally change TREE_PURPOSE, then 
>>> permerror
>>> needs to strengthen to error.  But I'd think we could leave the
>>> current state in a non-template class, only changing the template
>>> case.
>> Thanks. It’s true that setting the argument to error_mark_node is
>> contradictory with the fact that we accept the code with 
>> -fpermissive,
>> even if only under processing_template_decl, so I checked if 
>> there’s
>> not a better way of approaching this PR.
>>
>> After a bit of investigation, I think that the real problem is that
>> duplicate_decls tries to merge the two declarations, even though they
>> don’t meet the constraint about friend functions and default
>> arguments.
>
> I disagree; in this testcase the friend is the (lexically) first 
> declaration, the problem is that it's a non-defining friend (in a 
> template) that specifies default args, as addressed by your first 
> patch.
Fair.

> I still think my earlier comments are the way forward here: leave the 
> non-template case alone (permerror, don't change TREE_PURPOSE), in a 
> template give a hard error and change to error_mark_node.
Thanks, understood. The reason I looked for another “solution” is 
that it felt strange to be permissive in non-templates and stricter in 
templates. For example, if we do so, we’ll regress the case I added in 
defarg19.C in -fpermissive (also available at 
https://godbolt.org/z/YT3dexGjM).

I’m probably splitting hair, and I’m happy to go ahead with your 
suggestion if you think it’s fine. Otherwise I’ll see if I find some 
better fix.

Simon

[PATCH 17/61] Add -munique-sections feature

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

gcc/
* config/mips/mips.cc (mips_unique_sections_list): New global
variable.
(mips_read_list): Update prototype and error message.
(ultimate_transparent_alias_target): New function.  Copied
from varasm.c.
(mips_asm_unique_section): Update to rename unique sections.
(mips_option_override): Read the unique_sections file.
* config/mips/mips.opt: Add -munique-sections option.
* doc/invoke.texi: Document -munique-sections
* varasm.cc (resolve_unique_section): Try to create a unique
section even for explicitly provided section names.
(default_unique_section): Do nothing if a section is already
set.

gcc/testsuite/
* gcc.target/mips/mips.exp: Support -munique-sections.
(mips-dg-options): Translate filename argument to
-munique-sections.
* gcc.target/mips/unique-sections-bad.c: New file.
* gcc.target/mips/unique-sections.c: Likewise.
* gcc.target/mips/unique-sections.txt: Likewise.

Cherry-picked 9cd38c0b698287caff43d0aac3c963bb425391d8
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 54 +--
 gcc/config/mips/mips.opt  |  4 ++
 gcc/doc/invoke.texi   | 10 
 gcc/testsuite/gcc.target/mips/mips.exp| 18 +++
 .../gcc.target/mips/unique-sections-bad.c |  3 ++
 .../gcc.target/mips/unique-sections.c | 15 ++
 .../gcc.target/mips/unique-sections.txt   |  3 ++
 gcc/varasm.cc | 11 
 8 files changed, 114 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/unique-sections-bad.c
 create mode 100644 gcc/testsuite/gcc.target/mips/unique-sections.c
 create mode 100644 gcc/testsuite/gcc.target/mips/unique-sections.txt

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 6e48feeb560..55d06b87c0d 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -647,9 +647,10 @@ struct mips_sdata_entry
 };
 
 static struct mips_sdata_entry *mips_sdata_opt_list;
+static struct mips_sdata_entry *mips_unique_sections_list;
 
 static struct mips_sdata_entry *
-mips_read_list (const char * filename)
+mips_read_list (const char * filename, const char *opt_name)
 {
   FILE *fd;
   char line[256];
@@ -661,7 +662,7 @@ mips_read_list (const char * filename)
   fd = fopen (filename, "r");
   if (fd == NULL)
 {
-  error ("Bad filename for -msdata-opt-list: %s\n", filename);
+  error ("Bad filename for %s: %s\n", opt_name, filename);
   return NULL;
 }
 
@@ -9884,16 +9885,57 @@ mips_encode_section_info (tree decl, rtx rtl, int first)
 }
 }
 
+/* This should be the same as ultimate_transparent_alias_target from
+   gcc/varasm.c.  */
+
+static inline tree
+ultimate_transparent_alias_target (tree *alias)
+{
+  tree target = *alias;
+
+  if (IDENTIFIER_TRANSPARENT_ALIAS (target))
+{
+  gcc_assert (TREE_CHAIN (target));
+  target = ultimate_transparent_alias_target (&TREE_CHAIN (target));
+  gcc_assert (! IDENTIFIER_TRANSPARENT_ALIAS (target)
+ && ! TREE_CHAIN (target));
+  *alias = target;
+}
+
+  return target;
+}
+
 /* Implement TARGET_ASM_UNIQUE_SECTION.  */
 
 void
 mips_asm_unique_section (tree decl, int reloc)
 {
+  const char *old_secname = DECL_SECTION_NAME (decl);
+
+  if (old_secname != NULL
+  && mips_find_list (old_secname, mips_unique_sections_list))
+{
+  tree id = DECL_ASSEMBLER_NAME (decl);
+  ultimate_transparent_alias_target (&id);
+  const char *name = IDENTIFIER_POINTER (id);
+  name = targetm.strip_name_encoding (name);
+
+  /* We may end up here twice for data symbols,
+so we need to prevent renaming sections twice.  */
+  char *suffix = ACONCAT ((".", name, NULL));
+  if (strstr (old_secname, suffix) == NULL)
+   {
+ char *new_secname = ACONCAT ((old_secname, suffix, NULL));
+ set_decl_section_name (decl, new_secname);
+   }
+}
+
   default_unique_section (decl, reloc);
 
   const char *name = DECL_SECTION_NAME (decl);
 
-  if (mips_sdata_section_num > -1
+  if (old_secname == NULL
+  && mips_sdata_section_num > -1
   && (strncmp (".sdata", name, 6) == 0
  || strncmp (".sbss", name, 5) == 0))
 {
@@ -20664,7 +20706,11 @@ mips_option_override (void)
   if (TARGET_FLIP_MIPS16)
 TARGET_INTERLINK_COMPRESSED = 1;
 
-  mips_sdata_opt_list = mips_read_list (mips_sdata_opt_list_file);
+  mips_sdata_opt_list = mips_read_list (mips_sdata_opt_list_file,
+   "-msdata-opt-list");
+
+  mips_unique_sections_list = mips_read_list (mips_unique_sections_file,
+ "-munique-sections");
 
   /* Set the small data limit.  */
   mips_sm

[PATCH 15/61] Possible inlining improvements with -Os

2025-01-31 Thread Aleksandar Rakic

From: Robert Suchanek 

--param early-inlining-insns-cold=NUMBER
--param max-inline-insns-small-and-cold=NUMBER

Analysis shows that the main difference between -O2 and -Os goes down to
inlining of cold or unlikely functions. The new parameters (defaulted to
0) mean to disable these limitations with -Os. NUMBER could be set to
something like 4-32 to see the impact.

The main reason that smaller functions are treated as cold or unlikely
is the function cgraph_maybe_hot_edge_p () always returning FALSE for
-Os.

Cherry-picked c38d7e548cbb3defb141efb528cb356333e8eb7a
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/ipa-inline.cc | 4 +++-
 gcc/params.opt| 8 
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc
index fe8efa9a157..1a2a62b73cd 100644
--- a/gcc/ipa-inline.cc
+++ b/gcc/ipa-inline.cc
@@ -820,7 +820,8 @@ want_early_inline_function_p (struct cgraph_edge *e)
 
   if (!want_inline || growth <= param_max_inline_insns_size)
;
-  else if (!e->maybe_hot_p ())
+  else if (!e->maybe_hot_p ()
+  && growth > param_early_inlining_insns_cold)
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
@@ -1060,6 +1061,7 @@ want_inline_small_function_p (struct cgraph_edge *e, bool 
report)
}
   /* If call is cold, do not inline when function body would grow. */
   else if (!e->maybe_hot_p ()
+&& growth > param_max_inline_insns_small_and_cold
   && (growth >= inline_insns_single (e->caller, false, false)
   || growth_positive_p (callee, e, growth)))
{
diff --git a/gcc/params.opt b/gcc/params.opt
index 7c572774df2..edb62a221fb 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -130,6 +130,10 @@ Maximum size (in bytes) of objects tracked bytewise by 
dead store elimination.
 Common Joined UInteger Var(param_early_inlining_insns) Init(6) Optimization 
Param
 Maximal estimated growth of function body caused by early inlining of single 
call.
 
+-param=early-inlining-insns-cold=
+Common Joined UInteger Var(param_early_inlining_insns_cold) Init(0) 
Optimization Param
+Maximal estimated growth of function body caused by early inlining of cold 
call.
+
 -param=fsm-scale-path-stmts=
 Common Joined UInteger Var(param_fsm_scale_path_stmts) Init(2) IntegerRange(1, 
10) Param Optimization
 Scale factor to apply to the number of statements in a threading path crossing 
a loop backedge when comparing to max-jump-thread-duplication-stmts.
@@ -573,6 +577,10 @@ The maximum number of instructions when inlining for size.
 Common Joined UInteger Var(param_max_inline_insns_small) Optimization Param
 The maximum number of instructions when automatically inlining small functions.
 
+-param=max-inline-insns-small-and-cold=
+Common Joined UInteger Var(param_max_inline_insns_small_and_cold) Optimization 
Init(0) Param
+The maximum number of instructions in a small and cold function eligible for 
inlining.
+
 -param=max-inline-recursive-depth=
 Common Joined UInteger Var(param_max_inline_recursive_depth) Optimization 
Init(8) Param
 The maximum depth of recursive inlining for inline functions.
-- 
2.34.1

[PATCH 29/61] Prevent FP values being spilled to GPRs

2025-01-31 Thread Aleksandar Rakic

From: Simon Dardis 

gcc/
* config/mips/mips.cc (mips_ira_change_pseudo_allocno_class):
Prevent FP modes being reloaded to GPRs. Don't force integer
mode pseudos into GR_REGS (and likewise for float mode pseudos
and FP_REGS) if both the allocno class and best cost class are
ALL_REGS to prevent inefficient scattered complex load
with MSA.

gcc/testsuite/

* gcc.target/mips/msa-scattered-load.c: New.

Cherry-picked 1996aa906aeb4f958b77bb12aa60745ca9962fa2,
5314c36e83e9f8e13144b3a991e392d152514938 and
e9b42ac26ee8eeea0f5ca5a54b3b2dca5a69dd71
from https://github.com/MIPS/gcc

Signed-off-by: Simon Dardis 
Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 16 ---
 .../gcc.target/mips/msa-scattered-load.c  | 20 +++
 2 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-scattered-load.c

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 36ce297085b..1fa727c2ff5 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -25347,7 +25347,7 @@ mips_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED,
 
 static reg_class_t
 mips_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
- reg_class_t best_class ATTRIBUTE_UNUSED)
+ reg_class_t best_class)
 {
   /* LRA will allocate an FPR for an integer mode pseudo instead of spilling
  to memory if an FPR is present in the allocno class.  It is rare that
@@ -25357,7 +25357,9 @@ mips_ira_change_pseudo_allocno_class (int regno, 
reg_class_t allocno_class,
  to reload into FPRs in LRA.  Such reloads are sometimes eliminated and
  sometimes only partially eliminated.  We choose to take this penalty
  in order to eliminate usage of FPRs in code that does not use floating
- point data.
+ point data.  In the case when IRA computes both allocno class and best
+ cost class as ALL_REGS, do not force integer mode pseudo into GR_REGS
+ as it is probably best to be placed into FPR.
 
  This change has a similar effect to increasing the cost of FPR->GPR
  register moves for integer modes so that they are higher than the cost
@@ -25366,8 +25368,16 @@ mips_ira_change_pseudo_allocno_class (int regno, 
reg_class_t allocno_class,
  This is also similar to forbidding integer mode values in FPRs entirely
  but this would lead to an inconsistency in the integer to/from float
  instructions that say integer mode values must be placed in FPRs.  */
-  if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS)
+  if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS
+  && allocno_class != best_class)
 return GR_REGS;
+
+  /* Likewise for the mirror case of floating mode pseudos being allocated in
+ a GPR.  */
+  if (FLOAT_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS
+  && allocno_class != best_class)
+return FP_REGS;
+
   return allocno_class;
 }
 
diff --git a/gcc/testsuite/gcc.target/mips/msa-scattered-load.c 
b/gcc/testsuite/gcc.target/mips/msa-scattered-load.c
new file mode 100644
index 000..f42574ae772
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-scattered-load.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mfp64 -mhard-float -mmsa" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+
+#include 
+#include 
+#include 
+
+void pgather2cf1(const float complex* from, v4f32* pv, size_t stride) {
+  v4f32 v;
+  v[0] = crealf(from[0]);
+  v[1] = cimagf(from[0]);
+  v[2] = crealf(from[stride]);
+  v[3] = cimagf(from[stride]);
+  *pv = v;
+}
+
+/* { dg-final { scan-assembler-not "mfc1" } } */
+/* { dg-final { scan-assembler-not "mtc1" } } */
+
-- 
2.34.1

[PATCH 05/61] Hazard barrier return support

2025-01-31 Thread Aleksandar Rakic

From: Chao-ying Fu 

gcc/
* config/mips/mips.cc (mips_use_hazard_barrier_return_p): New
static function.
(mips_function_attr_inlinable_p): Likewise.
(mips_compute_frame_info): Set use_hazard_barrier_return_p.
Emit error for unsupported architecture choice.
(mips_function_ok_for_sibcall, mips_can_use_return_insn):
Return false for use_hazard_barrier_return.
(mips_expand_epilogue): Emit hazard barrier return.

gcc/testsuite/
* gcc.target/mips/hazard-barrier-return-attribute.c:
Modified test.

Cherry-picked 42eb0571165dbb5ae518808ba7123b0b9db09a11
from https://github.com/MIPS/gcc

Signed-off-by: Prachi Godbole 
Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Chao-ying Fu 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 60 +--
 .../mips/hazard-barrier-return-attribute.c|  2 +-
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 392755316eb..9db2a2a9396 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -1391,6 +1391,16 @@ mips_get_code_readable_attr (tree decl)
 }
 
 
+/* Check if the attribute to use hazard barrier return is set for
+   the function declaration DECL.  */
+
+static bool
+mips_use_hazard_barrier_return_p (const_tree decl)
+{
+  return lookup_attribute ("use_hazard_barrier_return",
+  DECL_ATTRIBUTES (decl)) != NULL;
+}
+
 /* Return the set of compression modes that are explicitly required
by the attributes in ATTRIBUTES.  */
 
@@ -1576,6 +1586,21 @@ mips_can_inline_p (tree caller, tree callee)
   return default_target_can_inline_p (caller, callee);
 }
 
+/* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P.
+
+   A function reqeuesting clearing of all instruction and execution hazards
+   before returning cannot be inlined - thereby not clearing any hazards.
+   All our other function attributes are related to how out-of-line copies
+   should be compiled or called.  They don't in themselves prevent inlining.  
*/
+
+static bool
+mips_function_attr_inlinable_p (const_tree decl)
+{
+  if (mips_use_hazard_barrier_return_p (decl))
+return false;
+  return hook_bool_const_tree_true (decl);
+}
+
 /* Handle an "interrupt" attribute with an optional argument.  */
 
 static tree
@@ -8350,6 +8375,11 @@ mips_function_ok_for_sibcall (tree decl, tree exp 
ATTRIBUTE_UNUSED)
   && !targetm.binds_local_p (decl))
 return false;
 
+  /* Can't generate sibling calls if returning from current function using
+ hazard barrier return.  */
+  if (mips_use_hazard_barrier_return_p (current_function_decl))
+return false;
+
   /* Otherwise OK.  */
   return true;
 }
@@ -11450,6 +11480,18 @@ mips_compute_frame_info (void)
}
 }
 
+  /* Determine whether to use hazard barrier return or not.  */
+  if (mips_use_hazard_barrier_return_p (current_function_decl))
+{
+  if (mips_isa_rev < 2)
+   error ("hazard barrier returns require a MIPS32r2 processor or"
+  " greater");
+  else if (TARGET_MIPS16)
+   error ("hazard barrier returns are not supported for MIPS16 functions");
+  else
+   cfun->machine->use_hazard_barrier_return_p = true;
+}
+
   frame = &cfun->machine->frame;
   memset (frame, 0, sizeof (*frame));
   size = get_frame_size ();
@@ -13139,7 +13181,8 @@ mips_expand_epilogue (bool sibcall_p)
   && !crtl->calls_eh_return
   && !sibcall_p
   && step2 > 0
-  && mips_unsigned_immediate_p (step2, 5, 2))
+  && mips_unsigned_immediate_p (step2, 5, 2)
+  && !cfun->machine->use_hazard_barrier_return_p)
use_jraddiusp_p = true;
   else
/* Deallocate the final bit of the frame.  */
@@ -13180,6 +13223,11 @@ mips_expand_epilogue (bool sibcall_p)
  else
emit_jump_insn (gen_mips_eret ());
}
+  else if (cfun->machine->use_hazard_barrier_return_p)
+   {
+ rtx reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ emit_jump_insn (gen_mips_hb_return_internal (reg));
+   }
   else
{
  rtx pat;
@@ -13238,6 +13286,11 @@ mips_can_use_return_insn (void)
   if (cfun->machine->interrupt_handler_p)
 return false;
 
+  /* Even if the function has a null epilogue, generating hazard barrier return
+ in epilogue handler is a lot cleaner and more manageable.  */
+  if (cfun->machine->use_hazard_barrier_return_p)
+return false;
+
   if (!reload_completed)
 return false;
 
@@ -23506,10 +23559,9 @@ mips_bit_clear_p (enum machine_mode mode, unsigned 
HOST_WIDE_INT m)
 
 #undef TARGET_ATTRIBUTE_TABLE
 #define TARGET_ATTRIBUTE_TABLE mips_attribute_table
-/* All our function attributes are related to how out-of-line copies should
-   be compiled or called.  They don't in themselves prevent inlining.  */
+
 #undef TARGET_FUNCTION_

[PATCH 31/61] Improve aligned straight line memcpy

2025-01-31 Thread Aleksandar Rakic

From: Robert Suchanek 

Cherry-picked 4194c529fade9b3106d118cac63b71bc8b13f7be
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 8 +++-
 gcc/config/mips/mips.h  | 5 +
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 3185fa9633e..cd4bce71ae8 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -9631,7 +9631,13 @@ mips_expand_block_move (rtx dest, rtx src, rtx length, 
rtx alignment)
 {
   if (ISA_HAS_COPY)
  return mips16_expand_copy (dest, src, length, alignment);
-  else if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER)
+  else if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER
+   /* We increase slightly the maximum number of bytes in
+ a straight-line block if the source and destination
+ are aligned to the register width.  */
+   || (!optimize_size
+  && INTVAL (alignment) == UNITS_PER_WORD
+  && INTVAL (length) <= MIPS_MAX_MOVE_MEM_STRAIGHT))
   {
 mips_block_move_straight (dest, src, INTVAL (length),
INTVAL (alignment));
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index efd23a262f9..0245287f9bf 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -3171,6 +3171,11 @@ while (0)
 #define MIPS_MAX_MOVE_BYTES_STRAIGHT \
   (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
 
+/* The maximum number of bytes that can be copied by any expanded block move;
+   see mips_expand_block_move.  */
+#define MIPS_MAX_MOVE_MEM_STRAIGHT \
+  (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER * 3)
+
 /* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
values were determined experimentally by benchmarking with CSiBE.
In theory, the call overhead is higher for TARGET_ABICALLS (especially
-- 
2.34.1

[PATCH 22/61] Add -minline-intermix to ignore mips16/nomips16

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Add a CLI option and an inline_intermix function attribute to ignore ISA
differences between a caller and a callee. The format of this attribute
is __attribute__((inline_intermix(yes|no))).

gcc/
* doc/extend.texi: Document inline_intermix.
* config/mips/mips.cc (mips_attribute_table): Add
inline_intermix.
(mips_handle_inline_intermix_attr): New function.
(mips_get_inline_intermix_attr): Likewise.
(mips_can_inline_p): Use mips_get_inline_intermix_attr.

gcc/testsuite/
* gcc.target/mips/mips.exp: Add -m[no-]inline-intermix.
* gcc.target/mips/inline-intermix-1.c: New file.
* gcc.target/mips/inline-intermix-2.c: Likewise.
* gcc.target/mips/inline-intermix-3.c: Likewise.
* gcc.target/mips/inline-intermix-4.c: Likewise.

Cherry-picked 02c76fc61198186af09fd9c4c0ef7352ab6511ad
and ae484b9431e5bd407e09b66392a1882b6878e4de
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 72 ++-
 gcc/config/mips/mips.opt  |  4 ++
 gcc/doc/extend.texi   | 17 +
 gcc/doc/invoke.texi   | 12 
 .../gcc.target/mips/inline-intermix-1.c   | 13 
 .../gcc.target/mips/inline-intermix-2.c   | 13 
 .../gcc.target/mips/inline-intermix-3.c   | 13 
 .../gcc.target/mips/inline-intermix-4.c   | 13 
 gcc/testsuite/gcc.target/mips/mips.exp|  1 +
 9 files changed, 157 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/inline-intermix-1.c
 create mode 100644 gcc/testsuite/gcc.target/mips/inline-intermix-2.c
 create mode 100644 gcc/testsuite/gcc.target/mips/inline-intermix-3.c
 create mode 100644 gcc/testsuite/gcc.target/mips/inline-intermix-4.c

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 9808fda286c..e8ed002dfed 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -607,6 +607,7 @@ const enum reg_class 
mips_regno_to_class[FIRST_PSEUDO_REGISTER] = {
 };
 
 static tree mips_handle_code_readable_attr (tree *, tree, tree, int, bool *);
+static tree mips_handle_inline_intermix_attr (tree *, tree, tree, int, bool *);
 static tree mips_handle_interrupt_attr (tree *, tree, tree, int, bool *);
 static tree mips_handle_use_shadow_register_set_attr (tree *, tree, tree, int,
  bool *);
@@ -627,6 +628,8 @@ TARGET_GNU_ATTRIBUTES (mips_attribute_table, {
   { "nomips16",0, 0, true,  false, false, false, NULL, NULL },
   { "micromips",   0, 0, true,  false, false, false, NULL, NULL },
   { "nomicromips", 0, 0, true,  false, false, false, NULL, NULL },
+  { "inline_intermix", 0, 1, true,  false, false, false,
+mips_handle_inline_intermix_attr, NULL },
   { "nocompression", 0, 0, true,  false, false, false, NULL, NULL },
   { "code_readable", 0, 1, true,  false, false, false,
 mips_handle_code_readable_attr, NULL },
@@ -770,6 +773,7 @@ static const struct attr_desc mips_func_opt_list_strings[] 
= {
   {"hot",   0,  FOL_ARG_NONE, 1 << FOLC_COLD },
   {"cold",  0,  FOL_ARG_NONE, 1 << FOLC_HOT },
   {"code_readable", 0,  FOL_ARG_STRING, 0 },
+  {"inline_intermix",   0,  FOL_ARG_STRING, 0 },
   {"alias", 0,  FOL_ARG_STRING, 0 },
   {"aligned",   0,  FOL_ARG_SINGLE_NUM, 0},
   {"alloc_size",0,  FOL_ARG_NUM_ONE_OR_TWO, 0},
@@ -1917,6 +1921,71 @@ mips_use_debug_exception_return_p (tree type)
   TYPE_ATTRIBUTES (type)) != NULL;
 }
 
+/* Verify the arguments to an inline_intermix attribute.  */
+
+static tree
+mips_handle_inline_intermix_attr (tree *node ATTRIBUTE_UNUSED, tree name,
+ tree args, int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
+{
+  if (!is_attribute_p ("inline_intermix", name) || args == NULL)
+return NULL_TREE;
+
+  if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+{
+  warning (OPT_Wattributes,
+  "%qE attribute requires a string argument", name);
+  *no_add_attrs = true;
+}
+  else if (strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "no") != 0
+  && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "yes") != 0)
+{
+  warning (OPT_Wattributes,
+  "argument to %qE attribute is neither no nor yes", name);
+  *no_add_attrs = true;
+}
+
+  return NULL_TREE;
+}
+
+/* Determine the inline_intermix setting for a function if it has one.
+   When inline_intermix is used without an argument it is the same as
+   inline_intermix=yes.  */
+
+static bool
+mips_get_inline_intermix_attr (tree decl)
+{
+  tree attr;
+
+  if (decl == NULL)
+return TARGET_INLINE_INTERMIX;
+
+  attr = lookup_attribute ("inli

[PATCH 13/61] MIPS: Only split shifts if using -mdebugd

2025-01-31 Thread Aleksandar Rakic

From: Andrew Bennett 

Enable -mdebugd by default.

Cherry-picked adb95984114b7636ee15f2ba79f94b028c8b35b2
from https://github.com/MIPS/gcc

Signed-off-by: Andrew Bennett 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.md  | 1 +
 gcc/config/mips/mips.opt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index f67fa2e66be..bf8a1217ee9 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -5871,6 +5871,7 @@
  be careful not to allocate a new register if we've reached the
  reload pass.  */
   if (TARGET_MIPS16
+  && !TARGET_DEBUG_D_MODE
   && optimize
   && CONST_INT_P (operands[2])
   && INTVAL (operands[2]) > 8
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 201a9650915..64c3dca4cc2 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -131,7 +131,7 @@ mdebug
 Target Var(TARGET_DEBUG_MODE) Undocumented
 
 mdebugd
-Target Var(TARGET_DEBUG_D_MODE) Undocumented
+Target Var(TARGET_DEBUG_D_MODE) Undocumented Init(1)
 
 meb
 Target RejectNegative Mask(BIG_ENDIAN)
-- 
2.34.1

[PATCH 33/61] Testsuite: Fix insn-*.c tests from trunk

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Ensure micromips test does not get confused about library support.
Ensure insn-casesi.c and insn-tablejump.c can be executed.

Move the micromips/mips16 selection into the file as per function
attributes so that there is no requirement on having a full
micromips or mips16 runtime to execute the test.

gcc/testsuite/

* gcc.target/mips/insn-tablejump.c: Force o32 ABI as
we do not really support n32/n64 microMIPS.  Require micromips
support but not the command line option.
* gcc.target/mips/insn-casesi.c: Require mips16 support but
not the command line option.

Cherry-picked e7aaf244857638adeb9d1eb5207dbe2842cbe81d
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.target/mips/insn-casesi.c| 6 +++---
 gcc/testsuite/gcc.target/mips/insn-tablejump.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/mips/insn-casesi.c 
b/gcc/testsuite/gcc.target/mips/insn-casesi.c
index 2b4c9f21986..03d13070460 100644
--- a/gcc/testsuite/gcc.target/mips/insn-casesi.c
+++ b/gcc/testsuite/gcc.target/mips/insn-casesi.c
@@ -1,7 +1,7 @@
 /* { dg-do run } */
-/* { dg-options "-mips16 -mcode-readable=yes" } */
+/* { dg-options "(-mips16) -mabi=32 -mcode-readable=yes" } */
 
-int __attribute__ ((noinline))
+MIPS16 int __attribute__ ((noinline))
 frob (int i)
 {
   switch (i)
@@ -22,7 +22,7 @@ frob (int i)
   return i;
 }
 
-int
+MIPS16 int
 main (int argc, char **argv)
 {
   asm ("" : "+r" (argc));
diff --git a/gcc/testsuite/gcc.target/mips/insn-tablejump.c 
b/gcc/testsuite/gcc.target/mips/insn-tablejump.c
index ecba154b9e0..271108a3ed6 100644
--- a/gcc/testsuite/gcc.target/mips/insn-tablejump.c
+++ b/gcc/testsuite/gcc.target/mips/insn-tablejump.c
@@ -1,7 +1,7 @@
 /* { dg-do run } */
-/* { dg-options "-mmicromips" } */
+/* { dg-options "(-mmicromips) -mabi=32" } */
 
-int __attribute__ ((noinline))
+MICROMIPS int __attribute__ ((noinline))
 frob (int i)
 {
   switch (i)
@@ -22,7 +22,7 @@ frob (int i)
   return i;
 }
 
-int
+MICROMIPS int
 main (int argc, char **argv)
 {
   asm ("" : "+r" (argc));
-- 
2.34.1

[PATCH 56/61] Inefficient 64-bit signed modulo by powers of two

2025-01-31 Thread Aleksandar Rakic

From: Mihailo Stojanovic 

This adds the custom MIPS-specific modulo by power of two expander,
which uses a modified algorithm, tailored to MIPS instruction set.

gcc/

* config/mips/mips-protos.h (mips_expand_mod_pow2): New prototype.
* config/mips/mips.cc (mips_rtx_costs): Don't force power of two
constants into registers during modulo operations. Modify the cost
modulo by power of two.
(mips_expand_mod_pow2): New expander for modulo by power of two of
64-bit values on 32-bit targets.
* config/mips/mips.md (define_expand "mod3"): Separate
define_expand for "mod3" from the define_insn and call the
new expander for 64-bit values on 32-bit targets.
(define_insn "*mod3"): Add * to the pattern name to avoid
clash with the define_expand pattern.

gcc/testsuite/

* gcc.target/mips/mod-pow2.c: New test.

Cherry-picked e683ed1717b3f689c959c738a764174fdcdc7998
from https://github.com/MIPS/gcc

Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips-protos.h|   2 +
 gcc/config/mips/mips.cc  | 144 ++-
 gcc/config/mips/mips.md  |  31 +++-
 gcc/testsuite/gcc.target/mips/mod-pow2.c | 176 +++
 4 files changed, 350 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/mod-pow2.c

diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 435b2e7e179..5782cd5d1b7 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -401,4 +401,6 @@ extern void mips_split_msa_subreg_move (rtx, rtx);
 
 extern const char *mips_output_compare (const char *fpcmp, const char *fcond,
const char *fmt, const char *fpcc_mode, bool swap);
+extern bool mips_expand_mod_pow2 (rtx, rtx, rtx);
+
 #endif /* ! GCC_MIPS_PROTOS_H */
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index d23c30a43be..19d428e6ed6 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -5292,6 +5292,19 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
  return true;
}
 
+  /* Don't force the constant into register during modulo by power of two.
+This is needed so that the MIPS-specific modulo pattern will be
+selected during the expand phase.  */
+  if (!TARGET_64BIT
+ && !TARGET_MIPS16
+ && outer_code == MOD
+ && mode == DImode
+ && (exact_log2 (INTVAL (x)) > 0))
+   {
+ *total = 0;
+ return true;
+   }
+
   if (TARGET_MIPS16)
{
  cost = mips16_constant_cost (outer_code, INTVAL (x));
@@ -5615,8 +5628,20 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
}
   /* Fall through.  */
 
-case SQRT:
 case MOD:
+  /* Modulo by power of two produces (at most) nine instructions.  */
+  if (CONST_INT_P (XEXP (x, 1))
+ && exact_log2 (INTVAL (XEXP (x, 1))) > 0
+ && !TARGET_64BIT
+ && !TARGET_MIPS16
+ && mode == DImode)
+   {
+ *total = COSTS_N_INSNS (9);
+ return true;
+   }
+  /* Fall through.  */
+
+case SQRT:
   if (float_mode_p)
{
  *total = mips_fp_div_cost (mode);
@@ -26107,6 +26132,123 @@ mips_prune_insertions_deletions (struct edge_list* 
edge_list,
   sbitmap_free (ifcv_blocks);
   sbitmap_free (insertions);
 }
+
+/* Expand modulo by power of two of DImode values on 32-bit targets.  */
+
+bool
+mips_expand_mod_pow2 (rtx target, rtx op1, rtx op2)
+{
+  HOST_WIDE_INT val, reg_width;
+  rtx out_low, out_high;
+  rtx in_low, in_high;
+  rtx at, temp;
+  rtx comp, cond_operands[4];
+
+  gcc_assert (GET_CODE (op2) == CONST_INT);
+
+  val = INTVAL (op2);
+
+  int logd = exact_log2 (val);
+
+  if (logd <= 0)
+return false;
+
+  /* Extract lower and upper words of DImode source and destination.  */
+  out_low = mips_subword (target, 0);
+  out_high = mips_subword (target, 1);
+
+  in_low = mips_subword (op1, 0);
+  in_high = mips_subword (op1, 1);
+
+  at = gen_reg_rtx (SImode);
+  temp = gen_reg_rtx (SImode);
+
+  reg_width = GET_MODE_BITSIZE (SImode);
+
+  /* Divisor equals 2.  */
+  if (logd == 1)
+{
+  mips_emit_binary (AND, at, in_low, const1_rtx);
+  mips_emit_binary (ASHIFT, temp, in_low,
+   gen_int_mode (reg_width - 1, SImode));
+  mips_emit_binary (AND, temp, in_high, temp);
+  mips_emit_binary (ASHIFTRT, out_high, temp,
+   gen_int_mode (reg_width - 1, SImode));
+  mips_emit_binary (IOR, out_low, out_high, at);
+
+  return true;
+}
+  /* Divisor fits into 32 bits.  */
+  else if (logd <= reg_width)
+{
+  mips_emit_binary (ASHIFTRT, at, in_high,
+   gen_int_mode (reg_width - 1, SImode));
+
+  if (logd == reg_width)
+   mips_emit_move (out_low, in_low);
+  else if (ISA_HAS_EXT_INS || logd <= 16)
+

[PATCH 26/61] Load/store bonding improvements

2025-01-31 Thread Aleksandar Rakic

From: Robert Suchanek 

gcc/ChangeLog:

* config/mips/mips-protos.h (mips_load_store_bonding_p): New
prototype.
* config/mips/mips.cc (mips_load_store_bond_insns): New static
function.
(mips_block_move_straight): Bond insns where possible.
(mips_for_each_saved_gpr_and_fpr): Save/restore registers with
increasing offsets if load store pairs optimisation is enabled.
(mips_expand_prologue): Bond insns in the prologue.
(mips_expand_epilogue): Bond insns in the epilogue.
(mips_multipass_dfa_lookahead): Fix sched_fusion with compiler
checking enabled.
(mips_sched_fusion_priority): New static function.
(mips_avoid_hazard): Check if instruction is not in forbidden
slot.
(mips_reorg_process_insns): Likewise.
(mips_option_override): Disable schedule_fusion for MSA.
(mips_load_store_p): New function.
(mips_load_store_insn_p): Likewise.
(mips_load_store_bond_insns_in_range): Likewise.
(mips_load_store_bonding_p): Remove load_p argument.
(mips_load_store_bonding_insn_p): Add more rules for bonding.
(TARGET_SCHED_FUSION_PRIORITY): Define macro.
* config/mips/mips.md (can_forbidden): New attribute.
(JOIN_MODE): Add DI mode to the mode iterator.
(join2_load_store): Change this to named
pattern.  Add 0 operand to constraints.  Add `can_forbidden'
attribute.
(*join2_loadhi): Add `can_forbidden' attribute.
* config/mips/predicates.md (nonimmediate_or_0_operand): New
predicate.

Cherry-picked 65c0efe581901a706fbe2d4a9d96337090ac220a
and 4a2432906766a48b7f3f9aaad8a1358604ce2f88
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips-protos.h |   2 +-
 gcc/config/mips/mips.cc   | 399 --
 gcc/config/mips/mips.md   |  28 ++-
 gcc/config/mips/predicates.md |   5 +
 4 files changed, 404 insertions(+), 30 deletions(-)

diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 6b8f2370752..1ec6f386f5f 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -379,7 +379,7 @@ extern bool mips_epilogue_uses (unsigned int);
 extern void mips_final_prescan_insn (rtx_insn *, rtx *, int);
 extern int mips_trampoline_code_size (void);
 extern void mips_function_profiler (FILE *);
-extern bool mips_load_store_bonding_p (rtx *, machine_mode, bool);
+extern bool mips_load_store_bonding_p (rtx *, machine_mode);
 
 typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx);
 #ifdef RTX_CODE
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 508435cc9eb..36ce297085b 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -1790,6 +1790,9 @@ static int mips_register_move_cost (machine_mode, 
reg_class_t,
reg_class_t);
 static unsigned int mips_function_arg_boundary (machine_mode, const_tree);
 static rtx mips_gen_const_int_vector_shuffle (machine_mode, int);
+static bool mips_load_store_insn_p (rtx_insn *, rtx *,
+   HOST_WIDE_INT *, bool *);
+static void mips_load_store_bond_insns ();
 
 /* This hash table keeps track of implicit "mips16" and "nomips16" attributes
for -mflip_mips16.  It maps decl names onto a boolean mode setting.  */
@@ -9398,6 +9401,9 @@ mips_block_move_straight (rtx dest, rtx src, 
HOST_WIDE_INT length,
   move_by_pieces (dest, src, length - offset,
  MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
 }
+
+  if (ENABLE_LD_ST_PAIRS)
+mips_load_store_bond_insns ();
 }
 
 /* Helper function for doing a loop-based block operation on memory
@@ -13279,8 +13285,9 @@ mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT 
sp_offset,
   machine_mode fpr_mode;
   int regno;
   const struct mips_frame_info *frame = &cfun->machine->frame;
-  HOST_WIDE_INT offset;
+  HOST_WIDE_INT offset, offset_dec;
   unsigned int mask;
+  bool increasing_order_p = false;
 
   /* Save registers starting from high to low.  The debuggers prefer at least
  the return register be stored at func+4, and also it allows us not to
@@ -13292,20 +13299,53 @@ mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT 
sp_offset,
   if (TARGET_MICROMIPS)
 umips_build_save_restore (fn, &mask, &offset);
 
-  for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+  if (ENABLE_LD_ST_PAIRS)
+increasing_order_p = true;
+
+  if (BITSET_P (mask, (regno = GP_REG_LAST - GP_REG_FIRST)))
+{
+  /* Record the ra offset for use by mips_function_profiler.  */
+  if (regno == RETURN_ADDR_REGNUM)
+   cfun->machine->frame.ra_fp_offset = offset + sp_offset;
+  mips_save_restore_reg (word_mode, regno, offset, fn);
+  offset -= UNITS_PER_WORD;
+}
+
+  if (increasing_order_p)
+{
+  offset_dec = 0;
+  for (

[PATCH 58/61] Add EHB after last load if branch within 16 inst.

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

This workaround adds -mfix-i6400 and -mfix-i6500. If any of those two
options are active, it will add an EHB after the last load instruction
in sequence if there is a branch within 16 instructions following it.

Options have no effect on pre-R6 or compressed ISA targets.

Inline assembly is treated as safe. It is up to user to insert required
EHB instruction after the loads and before the branches/jumps.

gcc/

* config/mips/mips.cc (MIPS_JR): Define as JR.HB for
TARGET_FIX_I6500.
(mips_idiv_insns): Account for extra ehb instruction for
TARGET_FIX_I6500.
(mips_adjust_insn_length): Likewise for long pic jumps.
(mips_output_conditional_branch): Output ehb in long pic jumps
for TARGET_FIX_I6500.
(mips_process_sync_loop): Output ehb before the first branch in
sequence for TARGET_FIX_I6500.
(mips_output_division): Likewise for -mdivide-breaks.
(mips_msa_output_division): Likewise.
(mips_avoid_hazard): Add new state to track loads and handle
ehb insertion.
(mips_reorg_process_insns): Setup new state for calling
mips_avoid_hazard.
(mips_set_compression_mode): Disable the TARGET_FIX_I6500 for
compressed ISA.
(mips_option_override): Allow TARGET_FIX_i6400 and TARGET_FIX_I6500
only for R6 ISA.
(mips_trampoline_init): Do not use compact branches with
TARGET_FIX_I6500.
* config/mips/mips.md (can_delay): Set to "no" for load instruction
when TARGET_FIX_I6500 is enabled.
(jump_pic) : Output ehb for TARGET_FIX_I6500.
* config/mips/mips.opt (-mfix-i6400): New option.
(-mfix-i6500): Likewise.
* doc/invoke.texi (-mfix-i6400): Document.
(-mfix-i6500): Likewise.

gcc/testsuite/

* gcc.target/mips/fix-i6500.c: New file.

Cherry-picked 784408360ef462711181e5cb59f1b0ff575f92ca
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 81 ---
 gcc/config/mips/mips.md   |  8 ++-
 gcc/config/mips/mips.opt  |  8 +++
 gcc/doc/invoke.texi   |  8 +++
 gcc/testsuite/gcc.target/mips/fix-i6500.c | 18 +
 5 files changed, 112 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/fix-i6500.c

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 63b7bdd255c..b09794eab15 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -179,10 +179,13 @@ static int *consumer_luid = NULL;
   ((0xf << 26) | ((DEST) << 16) | (VALUE))
 
 /* Return the opcode to jump to register DEST.  When the JR opcode is not
-   available use JALR $0, DEST.  */
+   available use JALR $0, DEST.
+   Use hazard barrier for TARGET_FIX_I6500.  */
 #define MIPS_JR(DEST) \
-  (TARGET_CB_ALWAYS ? ((0x1b << 27) | ((DEST) << 16)) \
-   : (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9)))
+  (TARGET_CB_ALWAYS && !TARGET_FIX_I6500 \
+  ? ((0x1b << 27) | ((DEST) << 16)) \
+  : (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9) \
+  | (TARGET_FIX_I6500 ? (0x1 << 10) : 0x0)))
 
 /* Return the opcode for:
 
@@ -3993,7 +3996,7 @@ mips_idiv_insns (machine_mode mode)
   if (GENERATE_DIVIDE_TRAPS && !MSA_SUPPORTED_MODE_P (mode))
 count++;
   else
-count += 2;
+   count += !TARGET_FIX_I6500 ? 2 : 3;
 }
 
   if (TARGET_FIX_R4000 || TARGET_FIX_R4400)
@@ -15601,6 +15604,9 @@ mips_adjust_insn_length (rtx_insn *insn, int length)
 
   /* Add the length of an indirect jump, ignoring the delay slot.  */
   length += TARGET_COMPRESSION ? 2 : 4;
+
+  if (TARGET_FIX_I6500 && !TARGET_ABSOLUTE_JUMPS)
+   length += 4;
 }
 
   /* A unconditional jump has an unfilled delay slot if it is not part
@@ -15769,6 +15775,10 @@ mips_output_conditional_branch (rtx_insn *insn, rtx 
*operands,
   else
 {
   mips_output_load_label (taken);
+
+  if (TARGET_FIX_I6500)
+   output_asm_insn ("ehb", 0);
+
   if (TARGET_CB_MAYBE)
output_asm_insn ("jrc\t%@%]", 0);
   else
@@ -16149,6 +16159,10 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands)
   at, oldval, inclusive_mask, NULL);
  tmp1 = at;
}
+
+  if (TARGET_FIX_I6500)
+   mips_multi_add_insn ("ehb", NULL);
+
   if (TARGET_CB_NEVER)
mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
 
@@ -16413,6 +16427,9 @@ mips_output_division (const char *division, rtx 
*operands)
}
   else
{
+ if (TARGET_FIX_I6500)
+   output_asm_insn ("ehb", NULL);
+
  if (flag_delayed_branch)
{
  output_asm_insn ("%(bne\t%2,%.,1f", operands);
@@ -16441,6 +16458,9 @@ mips_msa_output_division (const char *division, rtx 
*operands)
   s = division;
   if (TARGET_CHECK_ZERO_DIV)
 {
+  if (TARGET_FIX_I6500)
+   output_asm_insn ("ehb", NULL);
+
   output_

[PATCH 38/61] MIPSR6: Mark R6 unaligned access

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

gcc/
* config/mips/mips.cc (mips_output_move): Mark unaligned
load and store with a comment.

Cherry-picked 42be7aa50f3b04a88768e08c000cfe7923f22b0f
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 36 +++-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index a1208bcef69..dcb4b9f9f99 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -6255,9 +6255,15 @@ mips_output_move (rtx insn, rtx dest, rtx src)
switch (GET_MODE_SIZE (mode))
  {
  case 1: return "sb\t%z1,%0";
- case 2: return "sh\t%z1,%0";
- case 4: return "sw\t%z1,%0";
- case 8: return "sd\t%z1,%0";
+ case 2: return ((MEM_ALIGN (dest) >= BITS_PER_UNIT * 2)
+ ? "sh\t%z1,%0"
+ : "sh\t%z1,%0 # unaligned");
+ case 4: return ((MEM_ALIGN (dest) >= BITS_PER_UNIT * 4)
+ ? "sw\t%z1,%0"
+ : "sw\t%z1,%0 # unaligned");
+ case 8: return ((MEM_ALIGN (dest) >= BITS_PER_UNIT * 8)
+ ? "sd\t%z1,%0"
+ : "sd\t%z1,%0 # unaligned");
  default: gcc_unreachable ();
  }
 }
@@ -6313,18 +6319,30 @@ mips_output_move (rtx insn, rtx dest, rtx src)
switch (GET_MODE_SIZE (mode))
  {
  case 1: return "lbu\t$0,%1";
- case 2: return "lhu\t$0,%1";
- case 4: return "lw\t$0,%1";
- case 8: return "ld\t$0,%1";
+ case 2: return ((MEM_ALIGN (src) >= BITS_PER_UNIT * 2)
+ ? "lhu\t$0,%1"
+ : "lhu\t$0,%1 # unaligned");
+ case 4: return ((MEM_ALIGN (src) >= BITS_PER_UNIT * 4)
+ ? "lw\t$0,%1"
+ : "lw\t$0,%1 # unaligned");
+ case 8: return ((MEM_ALIGN (src) >= BITS_PER_UNIT * 8)
+ ? "ld\t$0,%1"
+ : "ld\t$0,%1 # unaligned");
  default: gcc_unreachable ();
  }
  else
switch (GET_MODE_SIZE (mode))
  {
  case 1: return "lbu\t%0,%1";
- case 2: return "lhu\t%0,%1";
- case 4: return "lw\t%0,%1";
- case 8: return "ld\t%0,%1";
+ case 2: return ((MEM_ALIGN (src) >= BITS_PER_UNIT * 2)
+ ? "lhu\t%0,%1"
+ : "lhu\t%0,%1 # unaligned");
+ case 4: return ((MEM_ALIGN (src) >= BITS_PER_UNIT * 4)
+ ? "lw\t%0,%1"
+ : "lw\t%0,%1 # unaligned");
+ case 8: return ((MEM_ALIGN (src) >= BITS_PER_UNIT * 8)
+ ? "ld\t%0,%1"
+ : "ld\t%0,%1 # unaligned");
  default: gcc_unreachable ();
  }
}
-- 
2.34.1

[PATCH 59/61] Add uclibc support

2025-01-31 Thread Aleksandar Rakic

From: Jean Lee 

Disable stack unwind and fix page size for uclibc on mips target.

Fix "ASan runtime does not come first in initial library list; you
should either link runtime to your application or manually preload it
with LD_PRELOAD."

Disable SANITIZER_INTERCEPT_GLOB.

Resolve libsanitizer build issues for uclibc.

Cherry-picked 94e7806991cf3af0dbaf6147d0010480a7760cc8,
52849bd97f29b6ad17d493ad383d8833473ee6a7,
0dcb2d0c3cc4d7118bd211a24e01d9a991dd72d2,
127b4d28d9bfba10e7006decdcd0f24665e5d5af,
5cd320103ba1248c6925b9843f1139e60d283bed and
af4425414cef0155d6f00ad118417f4908eae756
from https://github.com/MIPS/gcc

Signed-off-by: Jean Lee 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 libsanitizer/asan/asan_descriptions.cpp   |  5 ++
 libsanitizer/asan/asan_linux.cpp  |  2 +-
 .../interception/interception_linux.cpp   |  6 ++-
 .../interception/interception_linux.h |  6 ++-
 .../sanitizer_common/sanitizer_common.cpp | 10 +++-
 .../sanitizer_common/sanitizer_linux.cpp  |  6 ++-
 .../sanitizer_common/sanitizer_platform.h |  9 
 .../sanitizer_platform_interceptors.h | 14 --
 .../sanitizer_platform_limits_posix.cpp   | 46 +++
 .../sanitizer_platform_limits_posix.h |  9 +++-
 .../sanitizer_unwind_linux_libcdep.cpp|  2 +
 11 files changed, 93 insertions(+), 22 deletions(-)

diff --git a/libsanitizer/asan/asan_descriptions.cpp 
b/libsanitizer/asan/asan_descriptions.cpp
index caec79313e2..0b8180bbf0f 100644
--- a/libsanitizer/asan/asan_descriptions.cpp
+++ b/libsanitizer/asan/asan_descriptions.cpp
@@ -175,10 +175,15 @@ bool GetHeapAddressInformation(uptr addr, uptr 
access_size,
 }
 
 static StackTrace GetStackTraceFromId(u32 id) {
+#if !(defined(__mips__) && SANITIZER_UCLIBC)
   CHECK(id);
   StackTrace res = StackDepotGet(id);
   CHECK(res.trace);
   return res;
+#else
+  StackTrace res;
+  return res;
+#endif
 }
 
 bool DescribeAddressIfHeap(uptr addr, uptr access_size) {
diff --git a/libsanitizer/asan/asan_linux.cpp b/libsanitizer/asan/asan_linux.cpp
index 4cabca388ca..abcfb2467c9 100644
--- a/libsanitizer/asan/asan_linux.cpp
+++ b/libsanitizer/asan/asan_linux.cpp
@@ -107,7 +107,7 @@ void FlushUnneededASanShadowMemory(uptr p, uptr size) {
   ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
 }
 
-#  if SANITIZER_ANDROID
+#  if SANITIZER_ANDROID || SANITIZER_UCLIBC
 // FIXME: should we do anything for Android?
 void AsanCheckDynamicRTPrereqs() {}
 void AsanCheckIncompatibleRT() {}
diff --git a/libsanitizer/interception/interception_linux.cpp 
b/libsanitizer/interception/interception_linux.cpp
index ef8136eb4fc..ed8bd3a80b9 100644
--- a/libsanitizer/interception/interception_linux.cpp
+++ b/libsanitizer/interception/interception_linux.cpp
@@ -64,7 +64,8 @@ bool InterceptFunction(const char *name, uptr *ptr_to_real, 
uptr func,
 }
 
 // dlvsym is a GNU extension supported by some other platforms.
-#if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD
+#if (SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD) && \
+!SANITIZER_UCLIBC
 static void *GetFuncAddr(const char *name, const char *ver) {
   return dlvsym(RTLD_NEXT, name, ver);
 }
@@ -75,7 +76,8 @@ bool InterceptFunction(const char *name, const char *ver, 
uptr *ptr_to_real,
   *ptr_to_real = (uptr)addr;
   return addr && (func == trampoline);
 }
-#  endif  // SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD
+#  endif  // (SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD) &&
+ // !SANITIZER_UCLIBC
 
 }  // namespace __interception
 
diff --git a/libsanitizer/interception/interception_linux.h 
b/libsanitizer/interception/interception_linux.h
index 2e01ff44578..897ec677350 100644
--- a/libsanitizer/interception/interception_linux.h
+++ b/libsanitizer/interception/interception_linux.h
@@ -38,7 +38,8 @@ bool InterceptFunction(const char *name, const char *ver, 
uptr *ptr_to_real,
   (::__interception::uptr) &TRAMPOLINE(func))
 
 // dlvsym is a GNU extension supported by some other platforms.
-#if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD
+#if (SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD) && \
+!SANITIZER_UCLIBC
 #define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \
   ::__interception::InterceptFunction(\
   #func, symver,  \
@@ -48,7 +49,8 @@ bool InterceptFunction(const char *name, const char *ver, 
uptr *ptr_to_real,
 #else
 #define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \
   INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func)
-#endif  // SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD
+#endif  // (SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD) &&
+   // !SANITIZER_UCLIBC
 
 #endif  // INTERCEPTION_LINUX_H
 #endif  // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD ||
diff --git a/libsanitizer/sanitizer_common/sanitizer_common.cpp 
b/libsanit

[PATCH 27/61] MIPSR6: Define new R6 FPU instructions

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

gcc/
* config/mips/mips.h (ISA_HAS_FCLASS): New macro.
(ISA_HAS_RINT): Likewise.
* config/mips/mips.md (unspec): Add UNSPEC_FCLASS and
UNSPEC_FRINT.
(type) Add fclass and frint.
(fnma4): Enable for ISA_HAS_FUSED_MADDF.
(fnma4_msubf): New define_insn.
(fmax_a_): Likewise.
(fmin_a_): Likewise.
(fclass_): Likewise.
(frint_): Likewise.
* config/mips/i6400.md (i6400_fpu_minmax): Include
fclass type.
(i6400_fpu_fadd): Include frint type.
* config/mips/p6600.md (p6600_fpu_fadd): Include frint type.
(p6600_fpu_fabs): Include fclass type.

Cherry-picked bbc81087aa0e307aaf262021c40473644ed2a9b2
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/i6400.md |  6 ++--
 gcc/config/mips/mips.h   |  4 +++
 gcc/config/mips/mips.md  | 61 ++--
 gcc/config/mips/p6600.md |  4 +--
 4 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/gcc/config/mips/i6400.md b/gcc/config/mips/i6400.md
index d6f691ee217..4a2361667abd 100644
--- a/gcc/config/mips/i6400.md
+++ b/gcc/config/mips/i6400.md
@@ -219,16 +219,16 @@
(eq_attr "type" "fabs,fneg,fmove"))
   "i6400_fpu_short, i6400_fpu_apu")
 
-;; min, max
+;; min, max, min_a, max_a, class
 (define_insn_reservation "i6400_fpu_minmax" 2
   (and (eq_attr "cpu" "i6400")
-   (eq_attr "type" "fminmax"))
+   (eq_attr "type" "fminmax,fclass"))
   "i6400_fpu_short+i6400_fpu_logic")
 
 ;; fadd, fsub, fcvt
 (define_insn_reservation "i6400_fpu_fadd" 4
   (and (eq_attr "cpu" "i6400")
-   (eq_attr "type" "fadd,fcvt"))
+   (eq_attr "type" "fadd,fcvt,frint"))
   "i6400_fpu_long, i6400_fpu_apu")
 
 ;; fmul
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index b727074bf53..efd23a262f9 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -1291,6 +1291,10 @@ struct mips_cpu_info {
 
 #define ISA_HAS_FMIN_FMAX  (mips_isa_rev >= 6)
 
+#define ISA_HAS_FCLASS (mips_isa_rev >= 6)
+
+#define ISA_HAS_RINT   (mips_isa_rev >= 6)
+
 /* ISA has data indexed prefetch instructions.  This controls use of
'prefx', along with TARGET_HARD_FLOAT and TARGET_DOUBLE_FLOAT.
(prefx is a cop1x instruction, so can only be used if FP is
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 814692aecf1..7d27e7d4b20 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -102,6 +102,8 @@
   ;; Floating-point unspecs.
   UNSPEC_FMIN
   UNSPEC_FMAX
+  UNSPEC_FCLASS
+  UNSPEC_FRINT
 
   ;; HI/LO moves.
   UNSPEC_MFHI
@@ -395,7 +397,7 @@
shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move,
fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt,
frsqrt,frsqrt1,frsqrt2,fminmax,dspmac,dspmacsat,accext,accmod,dspalu,
-   dspalusat,multi,atomic,syncloop,nop,ghost,multimem,
+   dspalusat,multi,atomic,syncloop,nop,ghost,multimem,fclass,frint,
simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
@@ -2656,8 +2658,9 @@
(fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand"))
  (match_operand:ANYF 2 "register_operand")
  (match_operand:ANYF 3 "register_operand")))]
-  "(ISA_HAS_FUSED_MADD3 || ISA_HAS_FUSED_MADD4)
-   && !HONOR_SIGNED_ZEROS (mode)")
+  "((ISA_HAS_FUSED_MADD3 || ISA_HAS_FUSED_MADD4)
+&& !HONOR_SIGNED_ZEROS (mode))
+   || ISA_HAS_FUSED_MADDF")
 
 (define_insn "*fnma4_nmsub3"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2679,6 +2682,16 @@
   [(set_attr "type" "fmadd")
(set_attr "mode" "")])
 
+(define_insn "*fnma4_msubf"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+   (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+ (match_operand:ANYF 2 "register_operand" "f")
+ (match_operand:ANYF 3 "register_operand" "0")))]
+  "ISA_HAS_FUSED_MADDF"
+  "msubf.\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "")])
+
 ;; fnms is defined as: (fma (neg op1) op2 (neg op3))
 ;; ((-op1) * op2) - op3 ==> -(op1 * op2) - op3 ==> -((op1 * op2) + op3)
 ;; The mips nmadd instructions implement -((op1 * op2) + op3)
@@ -8156,6 +8169,48 @@
   [(set_attr "type" "fminmax")
   (set_attr "mode" "")])
 
+(define_insn "fmax_a_"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+   (if_then_else
+  (gt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f"))
+  (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f")))
+  (match_dup 1)
+  (match_dup 2)))]
+  "ISA_HAS_FMIN_FMAX"
+  "maxa.\t%0,%1,%2"
+  [(set_attr "type" "fminmax")
+   (set_attr "mod

[PATCH 28/61] Fix wrong instruction in the delay slot

2025-01-31 Thread Aleksandar Rakic

From: Robert Suchanek 

The problematic test case shows that the use of __builtin_unreachable ()
has a branch not optimised away causing confusion in the eager
delay slot filler if the "unreachable" is moved elsewhere by the block
reordering pass.

It appears that a series of unfortunate events causes a wrong
instruction to be placed in the delay slot:

1. The branch is not optimised away during expansion.  It has a diamond
   shape so the unreachable case falls through.
2. The block reordering pass moves the basic block elsewhere.
3. The eager delay slot filler (EDSF):
   a. It initially skips all the consecutive labels, ignoring barriers,
  until it finds an instruction.  This is done by design.  Similarly
  what first_active_target_insn() does.
   b. The branch now points to a load for the branch taken case.
   c. The arithmetic shift left instruction is not placed in the slot
  because the EDSF detects that there is a conflict with the
  resource usage (because of a set $4, $4 being referenced or both
  but very likely because it's referenced).
   d. As (c) failed, another attempt is taken and the other thread/path
  explored.  This time it succeeds as, at least it appears that,
  the reverse search for the branch taken path looks for the
  beginning of the basic block and it does not see that $4 is also
  used.  The lack of referencing $4 by the shift is likely to be the
  cause of not seeing the usage.
   e. As (d) succeeded, the load is "legitimately" placed in the delay
  slot.

Perhaps this is a vague description but this is more and less what is
happening.

The fix attempts to treat the unreachable block (that represents
__builtin_unreachable) in a special way:

1. The label is not skipped if it is a label with a barrier only.  Notes
   and debug instructions are ignored.  This prevents redirecting the
   jump to a wrong place that seemed to be treated as a valid
   redirection.  Since the behaviour of such branching is undefined, we
   don't want to analyse the taken path.

2. The first_active_target_insn() must recognize the unreachable block
   and not to go beyond the barrier for the same reason as above.

3. With this in place, the eager delay slot filler uses the correct
   instruction.  We don't care where the branch branches to as the
   behaviour of the program is undefined.  The slot is not filled
   letting the assembler to do the right thing (.set noreorder/reorder
   are not emitted).

gcc/
* reorg.cc (label_with_barrier_p): New function.
(skip_consecutive_labels): Use it.  Don't skip the label if an
empty block is found.
(first_active_target_insn): Likewise.  Don't ignore the empty
block when searching for the next active instruction.

Cherry-picked 3667d07c7f0512e8996eab9ab75efc79ac1827c2
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/reorg.cc | 28 
 1 file changed, 28 insertions(+)

diff --git a/gcc/reorg.cc b/gcc/reorg.cc
index 68bf30801cf..91a752b7d4a 100644
--- a/gcc/reorg.cc
+++ b/gcc/reorg.cc
@@ -113,6 +113,30 @@ along with GCC; see the file COPYING3.  If not see
These functions are now only used here in reorg.cc, and have therefore
been moved here to avoid inadvertent misuse elsewhere in the compiler.  */
 
+/* Return true if a LABEL is followed by a BARRIER.  Ignore notes and debug
+   instructions.  */
+
+static bool
+label_with_barrier_p (rtx_insn *label)
+{
+  bool empty_bb = true;
+
+  if (GET_CODE (label) != CODE_LABEL)
+empty_bb = false;
+  else
+label = NEXT_INSN (label);
+
+  while (!BARRIER_P (label) && empty_bb)
+  {
+if (!(DEBUG_INSN_P (label)
+ || NOTE_P (label)))
+  empty_bb = false;
+label = NEXT_INSN (label);
+  }
+
+  return empty_bb;
+}
+
 /* Return the last label to mark the same position as LABEL.  Return LABEL
itself if it is null or any return rtx.  */
 
@@ -140,6 +164,8 @@ skip_consecutive_labels (rtx label_or_return)
   for (insn = label;
insn != 0 && !INSN_P (insn) && !BARRIER_P (insn);
insn = NEXT_INSN (insn))
+if (LABEL_P (insn) && label_with_barrier_p (insn))
+  break;
 if (LABEL_P (insn))
   label = insn;
 
@@ -230,6 +256,8 @@ first_active_target_insn (rtx insn)
 {
   if (ANY_RETURN_P (insn))
 return insn;
+  if (LABEL_P (insn) && label_with_barrier_p ((rtx_insn *)insn))
+return NULL_RTX;
   return next_active_insn (as_a  (insn));
 }
 
-- 
2.34.1

[PATCH 16/61] Add -msdata-num and -msdata-opt-list support

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Cherry-picked 2403e09c3a08b797e22e30f70f762ed1eadbd783
and f76b493c090cfc2f9270528e84ef0f04fb463c3f
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc  | 161 ++-
 gcc/config/mips/mips.opt |   8 ++
 gcc/doc/invoke.texi  |  36 +
 3 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 4c719fbaed5..6e48feeb560 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -640,6 +640,62 @@ TARGET_GNU_ATTRIBUTES (mips_attribute_table, {
   { "use_hazard_barrier_return", 0, 0, true, false, false, false, NULL, NULL }
 });
 
+struct mips_sdata_entry
+{
+  char *var;
+  struct mips_sdata_entry *next;
+};
+
+static struct mips_sdata_entry *mips_sdata_opt_list;
+
+static struct mips_sdata_entry *
+mips_read_list (const char * filename)
+{
+  FILE *fd;
+  char line[256];
+  struct mips_sdata_entry *current = NULL;
+  struct mips_sdata_entry *head = NULL;
+
+  if (filename == NULL)
+return NULL;
+  fd = fopen (filename, "r");
+  if (fd == NULL)
+{
+  error ("Bad filename for -msdata-opt-list: %s\n", filename);
+  return NULL;
+}
+
+  while (fgets (line, sizeof (line), fd))
+{
+  struct mips_sdata_entry *entry;
+  entry = (struct mips_sdata_entry *)xmalloc (
+   sizeof (struct mips_sdata_entry));
+  entry->var = xstrdup (line);
+  if (entry->var[strlen (entry->var)-1] == '\n')
+   entry->var[strlen (entry->var)-1] = '\0';
+  entry->next = NULL;
+  if (head == NULL)
+   current = head = entry;
+  else
+   current = current->next = entry;
+}
+  fclose (fd);
+  return head;
+}
+
+static bool
+mips_find_list (const char *var, struct mips_sdata_entry *list)
+{
+  while (list != NULL)
+{
+  if (strcmp (list->var, var) == 0)
+   return true;
+  list = list->next;
+}
+
+  return false;
+}
+
 /* A table describing all the processors GCC knows about; see
mips-cpus.def for details.  */
 static const struct mips_cpu_info mips_cpu_info_table[] = {
@@ -1533,6 +1589,30 @@ mips_insert_attributes (tree decl, tree *attributes)
   if (compression_flags)
error ("%qs attribute only applies to functions",
   mips_get_compress_on_name (nocompression_flags));
+
+  if (TREE_CODE (decl) == VAR_DECL
+ && is_global_var (decl)
+ && DECL_NAME (decl)
+ && mips_find_list (IDENTIFIER_POINTER (DECL_NAME (decl)),
+mips_sdata_opt_list))
+   {
+ tree attr_args;
+ if (mips_sdata_section_num > -1)
+   {
+ char sec_name[13];
+ sprintf (sec_name, ".sdata_%d", mips_sdata_section_num);
+ attr_args = build_tree_list (NULL_TREE,
+  build_string (strlen (sec_name),
+sec_name));
+   }
+ else
+   attr_args = build_tree_list (NULL_TREE,
+build_string (6, ".sdata"));
+
+ *attributes = tree_cons (get_identifier ("section"),
+  attr_args,
+  *attributes);
+   }
 }
   else
 {
@@ -9804,6 +9884,70 @@ mips_encode_section_info (tree decl, rtx rtl, int first)
 }
 }
 
+/* Implement TARGET_ASM_UNIQUE_SECTION.  */
+
+void
+mips_asm_unique_section (tree decl, int reloc)
+{
+  default_unique_section (decl, reloc);
+
+  const char *name = DECL_SECTION_NAME (decl);
+
+  if (mips_sdata_section_num > -1
+  && (strncmp (".sdata", name, 6) == 0
+ || strncmp (".sbss", name, 5) == 0))
+{
+  char *sec_name = (char*) alloca (strlen (name) + 5);
+  if (strncmp (".sdata", name, 6) == 0)
+   sprintf (sec_name, ".sdata_%d%s", mips_sdata_section_num, name + 6);
+  else
+   sprintf (sec_name, ".sbss_%d%s", mips_sdata_section_num, name + 5);
+
+  set_decl_section_name (decl, sec_name);
+}
+}
+
+/* Implement TARGET_ASM_SELECT_SECTION.  */
+
+static section *
+mips_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
+{
+  char * sec_name;
+  section *s;
+
+  s = default_elf_select_section (exp, reloc, align);
+
+  if (mips_sdata_section_num > -1
+  && (s->named.common.flags & SECTION_NAMED)
+  && (strncmp (".sdata", s->named.name, 6) == 0
+ || strncmp (".sbss", s->named.name, 5) == 0))
+{
+  sec_name = (char*) alloca (strlen (s->named.name) + 5);
+  if (strncmp (".sdata", s->named.name, 6) == 0)
+   sprintf (sec_name, ".sdata_%d%s", mips_sdata_section_num,
+s->named.name + 6);
+  else
+   sprintf (sec_name, ".sbss_%d%s", mips_sdata_section_num,
+s->named.name + 5);
+  s = get_section (sec_name, s->nam

[PATCH 19/61] Add support for a limit for inlining memcpy

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Expose it with an option: -mblockmov-limit. A memcpy strictly less than
this value will be considered for inlining.

gcc/ChangeLog:

* config/mips/mips.cc (mips_expand_block_move): Add support to
control size of inlined memcpy.
* config/mips/mips.opt (mblockmov-limit): New option.

Cherry-picked cf1e4960a4f80301e4c8f71a35cbbc8fef1ce6fd
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc  | 21 -
 gcc/config/mips/mips.opt |  3 +++
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 32fe62ce79b..d9c913f2e23 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -9360,16 +9360,19 @@ mips_expand_block_move (rtx dest, rtx src, rtx length)
  || MEM_ALIGN (dest) < MIPS_MIN_MOVE_MEM_ALIGN))
 return false;
 
-  if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER)
+  if (mips_movmem_limit == -1 || INTVAL (length) < mips_movmem_limit)
 {
-  mips_block_move_straight (dest, src, INTVAL (length));
-  return true;
-}
-  else if (optimize)
-{
-  mips_block_move_loop (dest, src, INTVAL (length),
-   MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER);
-  return true;
+  if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER)
+  {
+mips_block_move_straight (dest, src, INTVAL (length));
+return true;
+  }
+  else if (optimize)
+  {
+mips_block_move_loop (dest, src, INTVAL (length),
+ MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER);
+return true;
+  }
 }
 
   return false;
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 012ca91560f..a4b93de924d 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -556,3 +556,6 @@ munique-sections=FILE   Use to specify sections that should 
be made unique.
 mfunc-opt-list=
 Target RejectNegative Joined Var(mips_func_opt_list_file) Init(0) Defer
 mfunc-opt-list=FILEUse to specify per function optimizations.
+
+mblockmov-limit=
+Target RejectNegative Undocumented Joined UInteger Var(mips_movmem_limit) 
Init(-1)
-- 
2.34.1

[PATCH 21/61] Testsuite: Modify the gcc.dg/memcpy-4.c test

2025-01-31 Thread Aleksandar Rakic

From: Andrew Bennett 

Firstly, remove the MIPS specific bit of the test.
Secondly, create a MIPS specific version in the gcc.target/mips.
This will only execute for a MIPS ISA less than R6.

Cherry-picked c8b051cdbb1d5b166293513b0360d3d67cf31eb9
from https://github.com/MIPS/gcc

Signed-off-by: Andrew Bennett 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.dg/memcpy-4.c  |  7 +--
 gcc/testsuite/gcc.target/mips/memcpy-2.c | 12 
 2 files changed, 13 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/memcpy-2.c

diff --git a/gcc/testsuite/gcc.dg/memcpy-4.c b/gcc/testsuite/gcc.dg/memcpy-4.c
index 4c726f0ad74..b17b369c5c6 100644
--- a/gcc/testsuite/gcc.dg/memcpy-4.c
+++ b/gcc/testsuite/gcc.dg/memcpy-4.c
@@ -1,13 +1,8 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-rtl-expand" } */
+/* { dg-options "-O2" } */
 
-#ifdef __mips
-__attribute__((nomips16))
-#endif
 void
 f1 (char *p)
 {
   __builtin_memcpy (p, "12345", 5);
 }
-
-/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" { target mips*-*-* } } } 
*/
diff --git a/gcc/testsuite/gcc.target/mips/memcpy-2.c 
b/gcc/testsuite/gcc.target/mips/memcpy-2.c
new file mode 100644
index 000..df0cd18c2b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/memcpy-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "isa_rev<=5 -fdump-rtl-expand" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-Os" } { "" } } */
+
+__attribute__((nomips16))
+void
+f1 (char *p)
+{
+  __builtin_memcpy (p, "12345", 5);
+}
+
+/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" } } */
-- 
2.34.1

[PATCH 18/61] Add -mfunc-opt-list=

2025-01-31 Thread Aleksandar Rakic

From: Simon Dardis 

New option for MIPS -mfunc-opt-list=FILE. This option takes a file which
has one function per line followed by a whitespace (space/tab) followed
by one or more attributes. Supported attributes are O2, Os,
code-read=pcrel, always_inline, noinline, mips16, nomips16, epi,
longcall.

Attributes are applied to functions that the compiler sees, so functions
listed that the compiler doesn't see are ignored.

Now understands the majority of function attributes. These are:
O1, O2, O3, Os, mips16, nomips16, always_inline, noinline, unused, used,
far, near, hot, cold, code_readable, alias, aligned, alloc_size,
alloc_align, assume_aligned, artifical, constructor, const, deprecated,
destructor, error, flatten, gnu_inline, interrupt,
keep_interrupts_masked, long_call, leaf, noclone, noreturn, malloc,
nonnull, nothrow, optimize, returns_nonnull, returns_twice, section,
pure, use_debug_exception_return, use_shadow_register_set, visibility,
warning, warn_unused_result, weak, weakref.

Syntax of attributes that take arguments is like: alias ("O2")
or nonnull (1,2)

Attach unknown attributes anyway.

Cherry-picked e2ff99868adedb1a563ee69b3076838dd7ae4450
from https://github.com/MIPS/gcc

Signed-off-by: Simon Dardis 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc  | 609 +++
 gcc/config/mips/mips.opt |   4 +
 gcc/doc/invoke.texi  |  33 +++
 3 files changed, 646 insertions(+)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 55d06b87c0d..32fe62ce79b 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -697,6 +697,524 @@ mips_find_list (const char *var, struct mips_sdata_entry 
*list)
   return false;
 }
 
+/* Argument type descriptor.  */
+
+enum mips_func_opt_list_arg_t
+{
+  FOL_ARG_NONE,
+  FOL_ARG_STRING,
+  FOL_ARG_SINGLE_NUM,
+  FOL_ARG_OPTIONAL_NUM_LIST,
+  FOL_ARG_NUM_ONE_OR_TWO,
+  FOL_ARG_OPTIONAL_STRING,
+  FOL_ARG_OPTIONAL_NUM,
+  FOL_ARG_UNKNOWN
+};
+
+/* Collisons for FUNC_OPT_LIST.  Rather that just relying on the middle to
+   complain, check at parse time so we can produce accurate diagnositics.  */
+
+enum mips_fol_collides
+{
+  FOLC_O1,
+  FOLC_O2,
+  FOLC_O3,
+  FOLC_OS,
+  FOLC_MIPS16,
+  FOLC_NOMIPS16,
+  FOLC_ALWAYS_INLINE,
+  FOLC_NOINLINE,
+  FOLC_UNUSED,
+  FOLC_USED,
+  FOLC_FAR,
+  FOLC_NEAR,
+  FOLC_HOT,
+  FOLC_COLD,
+  FOLC_END
+};
+
+/* Part of FUNC_OPT_LIST.  Use a tuple to record the name to be matched against
+   which GCC uses internally, an optional second string if the name is required
+   to be an argument of a different attribute and a bitmask describing which
+   other entries collide with this entry.  */
+
+struct attr_desc
+{
+  const char * optstring;
+  const char * maintype;
+  enum mips_func_opt_list_arg_t arg_type;
+  int collisions;
+};
+
+/* This table encodes the strings to match against for parsing func-opt-list,
+   an optional string which the first is argument of, e.g. optimize ("O2")
+   and the colliding attributes.  */
+
+static const struct attr_desc mips_func_opt_list_strings[] = {
+  {"O1",   "optimize", FOL_ARG_NONE,
+   1 << FOLC_O2 | 1 << FOLC_O3 | 1 << FOLC_OS },
+  {"O2",   "optimize", FOL_ARG_NONE,
+   1 << FOLC_O1 | 1 << FOLC_O3 | 1 << FOLC_OS },
+  {"O3",   "optimize", FOL_ARG_NONE,
+   1 << FOLC_O1 | 1 << FOLC_O2 | 1 << FOLC_OS },
+  {"Os",   "optimize", FOL_ARG_NONE,
+   1 << FOLC_O1 | 1 << FOLC_O2 | 1 << FOLC_O3 },
+  {"mips16",0,  FOL_ARG_NONE, 1 << FOLC_NOMIPS16 },
+  {"nomips16",  0,  FOL_ARG_NONE, 1 << FOLC_MIPS16 },
+  {"always_inline", 0,  FOL_ARG_NONE, 1 << FOLC_NOINLINE },
+  {"noinline",  0,  FOL_ARG_NONE, 1 << FOLC_ALWAYS_INLINE },
+  {"unused",0,  FOL_ARG_NONE, 1 << FOLC_USED },
+  {"used",  0,  FOL_ARG_NONE, 1 << FOLC_UNUSED },
+  {"far",   0,  FOL_ARG_NONE, 1 << FOLC_NEAR },
+  {"near",  0,  FOL_ARG_NONE, 1 << FOLC_FAR },
+  {"hot",   0,  FOL_ARG_NONE, 1 << FOLC_COLD },
+  {"cold",  0,  FOL_ARG_NONE, 1 << FOLC_HOT },
+  {"code_readable", 0,  FOL_ARG_STRING, 0 },
+  {"alias", 0,  FOL_ARG_STRING, 0 },
+  {"aligned",   0,  FOL_ARG_SINGLE_NUM, 0},
+  {"alloc_size",0,  FOL_ARG_NUM_ONE_OR_TWO, 0},
+  {"alloc_align",   0,  FOL_ARG_SINGLE_NUM, 0},
+  {"assume_aligned",0,  FOL_ARG_NUM_ONE_OR_TWO, 0},
+  {"artifical", 0,  FOL_ARG_NONE, 0 },
+  {"constructor",   0,  FOL_ARG_OPTIONAL_NUM, 0},
+  {"const", 0,  FOL_ARG_NONE, 0 },
+  {"deprecated",0,  FOL_ARG_OPTIONAL_STRING, 0},
+  {"destructor",0,  FOL_ARG_OPTIONAL_NUM, 0},
+  {"error", 0,  FOL_ARG_OPTIONAL_STRING, 0},
+  {"flatten",   0,  FOL_ARG_NONE, 0 },

[PATCH 32/61] Account for LWL/LWR in store_by_pieces_p

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Cherry-picked 53d838794ad3379fdd8d1f3a812aa8f2dff56399
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index cd4bce71ae8..a1208bcef69 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -9286,7 +9286,7 @@ mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, 
unsigned int align)
  LW/SWL/SWR sequence.  This is often better than the 4 LIs and
  4 SBs that we would generate when storing by pieces.  */
   if (align <= BITS_PER_UNIT)
-return size < 4;
+return size < 4 || !ISA_HAS_LWL_LWR;
 
   /* If the data is 2-byte aligned, then:
 
@@ -9321,7 +9321,9 @@ mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, 
unsigned int align)
  (c4) A block move of 8 bytes can use two LW/SW sequences or a single
  LD/SD sequence, and in these cases we've traditionally preferred
  the memory copy over the more bulky constant moves.  */
-  return size < 8;
+  return (size < 8
+ || (align < 4 * BITS_PER_UNIT
+ && !ISA_HAS_LWL_LWR));
 }
 
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
-- 
2.34.1

[PATCH 23/61] Add offset shrinking pass (-mshrink-offsets)

2025-01-31 Thread Aleksandar Rakic

From: mfortune 

This is derived from code produced by Steve Ellcey.

This approach is slightly diverged from the original concept.  It tries
to adjust the base pointer to a common value and keep the costing lower
than original by trying to find the best common value to trigger more
16-bit instructions.

Although this works, the magic includes zero adjustments when no best
common value is found.  This will need more digging as to why the code
size is better.  Some initial cases show indexed loads/stores prevented
but replaced with normal loads/stores with offsets close to zero, hence,
more potential for 16-bit load/store.

gcc/ChangeLog:

* config/mips/mips.cc (offset_cmp): New function.
(get_size_cost): Likewise.
(get_total_cost): Likewise.
(calculate_offsets_cost): Likewise.
(mark_mem): Likewise.
(dump_modified_offsets): Likewise.
(get_best_offset): Likewise.
(adjust_base_offset): Likewise.
(make_pass_shrink_mips_offsets):
(class pass_shrink_mips_offsets): New class.
(pass_shrink_mips_offsets::execute): New method.
(mips_option_override): Enable offset shrinking pass.
* config/mips/mips.opt (mshrink-offsets): New option.
* doc/invoke.texi: Document the new option.

Cherry-picked 4c4fc03fdcad57d052a29e163ca961ae7cf913ed
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek 
Signed-off-by: Steve Ellcey 
Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc  | 379 +++
 gcc/config/mips/mips.opt |   4 +
 gcc/doc/invoke.texi  |  11 ++
 3 files changed, 394 insertions(+)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index e8ed002dfed..56e0d4ba021 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -68,6 +68,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-iter.h"
 #include "flags.h"
 #include "opts.h"
+#include "tm-constrs.h"
+#include "print-rtl.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -21583,6 +21585,369 @@ mips_set_tune (const struct mips_cpu_info *info)
 }
 }
 
+typedef struct mem_offset_def
+{
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT modified_offset;
+  basic_block bb;
+  machine_mode mode;
+  rtx insn;
+} mem_offset_def_t;
+
+typedef struct mem_offset_def *mem_offset_info;
+
+typedef struct offset_entry : free_ptr_hash 
+{
+  /* We hash by.  */
+  int base_regno;
+
+  /* Store.  */
+  int orig_cost;
+  int best_cost;
+  HOST_WIDE_INT best_offset;
+  vec offsets;
+
+  /* hash table support.  */
+  static inline hashval_t hash (const offset_entry *v)
+{ return (hashval_t) v->base_regno; };
+  static bool equal (const offset_entry *v, const offset_entry *c)
+{ return (v->base_regno == c->base_regno); };
+  static void remove (offset_entry *)
+{};
+} offset_entry_t;
+
+static int
+offset_cmp (const void *x, const void *y)
+{
+  const mem_offset_info p1 = *((const mem_offset_info *) x);
+  const mem_offset_info p2 = *((const mem_offset_info *) y);
+  if (p1->offset < p2->offset)
+return -1;
+  if (p1->offset > p2->offset)
+return 1;
+  return 0;
+}
+
+/* This is only an approximate optimistic size cost as we cannot decide
+   whether we use 16-bit or 32-bit before register allocation.  */
+static int
+get_size_cost (HOST_WIDE_INT offset, machine_mode mode)
+{
+  /* If the offset does not fit, it is likely to be split.  */
+  switch (mode)
+{
+case E_QImode:
+  if (mips_unsigned_immediate_p (offset, 5, 0))
+   return 2;
+  else if (SMALL_OPERAND (offset))
+   return 4;
+  else
+   return 8;
+case E_HImode:
+  if (mips_unsigned_immediate_p (offset, 5, 1))
+   return 2;
+  else if (SMALL_OPERAND (offset))
+   return 4;
+  else
+   return 8;
+case E_SImode:
+  if (mips_unsigned_immediate_p (offset, 5, 2))
+   return 2;
+  else if (SMALL_OPERAND (offset))
+   return 4;
+  else
+   return 8;
+default:
+  return 4;
+}
+}
+
+static int
+get_total_cost (offset_entry *info, HOST_WIDE_INT mod_offset)
+{
+  int i;
+  mem_offset_info m;
+  HOST_WIDE_INT cost;
+
+  cost = 0;
+  for (i = 0; info->offsets.iterate (i, &m); i++)
+ cost += get_size_cost (m->offset - mod_offset, m->mode);
+  return cost;
+}
+
+int
+calculate_offsets_cost (offset_entry **slot,
+   void *data ATTRIBUTE_UNUSED)
+{
+  int i;
+  mem_offset_info m;
+  offset_entry *info = *slot;
+  HOST_WIDE_INT prev_offset;
+
+  info->offsets.qsort (offset_cmp);
+
+  info->best_cost = info->orig_cost = get_total_cost (info, 0);
+  prev_offset = 0;
+  for (i = 0; info->offsets.iterate (i, &m); i++)
+{
+  /* The initial adjustment will cost us one ADD instruction.  */
+  int cur_cost = 4;
+
+  if (m->offset == prev_offset)
+   continue;
+
+  cur_cost += get_total_cost (info, m-

[PATCH 12/61] Add microMIPS R6 support

2025-01-31 Thread Aleksandar Rakic

From: Andrew Bennett 

Squashed commits:
- Add umipsr6 compact branch support.

- Multilib - microMIPS R6.

- Don't think short micromips instructions are barriers.

Some micromips insns have length of 2, but unfortuantely 2/4
returns 0, so the routine incorrectly thinks that the instruction
is a barrier and does not reset the fs_delay state.

This patch does need to check whether the hilo calculcation is
still ok for micromips instructions.

- Add undocumented command line option to enable forbidden slot filling.

Option is -mforbidden-slots.

- Fix micromips r6 issue with clear_hazard insn.

- Prevent -mdsp and -mdspr2 with -mmicromips -mips32r6.

- Update ZC/ZD constraints for microMIPS R6 9-bit offsets.

- LWXS removed for microMIPS R6.

- Add DSPr3 support.

- Add m6201 architecture and scheduler.

- Ensure micromips is always considered NAN2008 for hard-float.

Cherry-picked 02af969d5f07fb73f23cedd95a82fe581ccfe820
from https://github.com/MIPS/gcc

Signed-off-by: Andrew Bennett 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/constraints.md |  33 ++--
 gcc/config/mips/m6200.md   | 229 +
 gcc/config/mips/mips-cpus.def  |   3 +
 gcc/config/mips/mips-dsp.md|  17 +-
 gcc/config/mips/mips-tables.opt|  49 +++---
 gcc/config/mips/mips.cc|  54 --
 gcc/config/mips/mips.h |  40 +++--
 gcc/config/mips/mips.md|  88 --
 gcc/config/mips/mips.opt   |   7 +
 gcc/config/mips/ml-img-elf |   4 +
 gcc/config/mips/ml-img-linux   |   4 +
 gcc/config/mips/t-mips-multi   |  43 +
 gcc/doc/invoke.texi|   2 +-
 gcc/doc/md.texi|  11 +-
 gcc/testsuite/gcc.target/mips/mips.exp |   1 -
 15 files changed, 498 insertions(+), 87 deletions(-)
 create mode 100644 gcc/config/mips/m6200.md

diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md
index a96028dd746..3b8fe9c3b70 100644
--- a/gcc/config/mips/constraints.md
+++ b/gcc/config/mips/constraints.md
@@ -368,25 +368,30 @@
(match_test "mips_const_vector_same_bytes_p (op, mode)")))
 
 (define_memory_constraint "ZC"
-  "A memory operand whose address is formed by a base register and offset
-   that is suitable for use in instructions with the same addressing mode
-   as @code{ll} and @code{sc}."
+  "When compiling R6 code, this constraint matches a memory operand whose
+   address is formed from a base register and a 9-bit offset.
+   When compiling microMIPS code, this constraint matches a memory operand
+   whose address is formed from a base register and a 12-bit offset.
+   When not compiling for microMIPS nor R6, @code{ZC} is equivalent to
+   @code{R}.
+   These operands can be used for instructions such as @code{ll} and
+   @code{sc}."
   (and (match_code "mem")
-   (if_then_else
-(match_test "TARGET_MICROMIPS")
-(match_test "umips_12bit_offset_address_p (XEXP (op, 0), mode)")
-(if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT")
-  (match_test "mips_9bit_offset_address_p (XEXP (op, 0), mode)")
-  (match_test "mips_address_insns (XEXP (op, 0), mode, false)")
+   (if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT")
+(match_test "mips_9bit_offset_address_p (XEXP (op, 0), mode)")
+  (if_then_else
+(match_test "TARGET_MICROMIPS")
+(match_test "umips_12bit_offset_address_p (XEXP (op, 0), mode)")
+(match_test "mips_address_insns (XEXP (op, 0), mode, false)")
 
 (define_address_constraint "ZD"
   "An address suitable for a @code{prefetch} instruction, or for any other
instruction with the same addressing mode as @code{prefetch}."
-   (if_then_else (match_test "TARGET_MICROMIPS")
-(match_test "umips_12bit_offset_address_p (op, mode)")
- (if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT")
-   (match_test "mips_9bit_offset_address_p (op, mode)")
-   (match_test "mips_address_insns (op, mode, false)"
+  (if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT")
+   (match_test "mips_9bit_offset_address_p (op, mode)")
+   (if_then_else (match_test "TARGET_MICROMIPS")
+ (match_test "umips_12bit_offset_address_p (op, mode)")
+ (match_test "mips_address_insns (op, mode, false)"
 
 (define_memory_constraint "ZR"
  "@internal
diff --git a/gcc/config/mips/m6200.md b/gcc/config/mips/m6200.md
new file mode 100644
index 000..10f07475be0
--- /dev/null
+++ b/gcc/config/mips/m6200.md
@@ -0,0 +1,229 @@
+;; DFA-based pipeline description for MIPS32 models M6200.
+;;
+;; Copyright (C) 2024 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License

[PATCH 10/61] Add -mgrow-frame-downwards

2025-01-31 Thread Aleksandar Rakic

From: mfortune 

Grow the local frame down instead of up for mips16 code size.

By growing the frame downwards we get spill slots created at the lowest
address rather than highest address in a local frame. The benefit being
that when the frame is large the spill slots can still be accessed using
a 16bit instruction whereas it is less important for large local
variables to be accessed using short instructions as they are (probably)
accessed less frequently.

This is default on for MIPS16.

Cherry-picked 7c1bf276c0ebb45c87fe7bc30f057866d6153ec4
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.h   | 10 --
 gcc/config/mips/mips.opt |  4 
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index cf3a4e04880..535172d3406 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2365,8 +2365,14 @@ enum reg_class
 
 #define STACK_GROWS_DOWNWARD 1
 
-#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0  \
- || (flag_sanitize & SANITIZE_ADDRESS) != 0)
+/* Growing the frame downwards allows us to put spills closest to
+   the stack pointer which is good as they are likely to be accessed
+   frequently.  We can also arrange for normal stack usage to place
+   scalars last so that they too are close to the stack pointer.  */
+#define FRAME_GROWS_DOWNWARD ((TARGET_MIPS16   \
+  && TARGET_FRAME_GROWS_DOWNWARDS) \
+ || (flag_stack_protect != 0   \
+ || (flag_sanitize & SANITIZE_ADDRESS) != 0))
 
 /* Size of the area allocated in the frame to save the GP.  */
 
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index c04a3d9dbfa..ca4d377291e 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -473,6 +473,10 @@ mframe-header-opt
 Target Var(flag_frame_header_optimization) Optimization
 Optimize frame header.
 
+mgrow-frame-downwards
+Target Var(TARGET_FRAME_GROWS_DOWNWARDS) Init(1) Undocumented
+Change the behaviour to grow the frame downwards.
+
 noasmopt
 Driver
 
-- 
2.34.1

[PATCH 46/61] nanoMIPS: unnecessary AND following an EXT

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

The fwprop1 introduces a new use of Y by replacing the
(subreg:QI (reg:SI X)) with (reg:QI Y) preventing the optimization of
zero_extend later during the combine. This patch prevents this
replacement in two new cases.

 A: (set (subreg:SI (reg:QI Y))
 (zero_extract:SI Z (const_int 8) (const_int ?)))
 B: (set (reg:SI X) (zero_extend:SI (reg:QI Y)))
 C: (... (subreg:QI (reg:SI X)) ...)
 D: (... (reg:SI X) ...)

 A: (set (reg:SI Y)
 (zero_extract:SI Z (const_int 8) (const_int ?)))
 B: (set (reg:SI X) (zero_extend:SI (reg:QI Y)))
 C: (... (subreg:QI (reg:SI X)) ...)
 D: (... (reg:SI X) ...)

gcc/

* fwprop.cc (free_load_extend): Renamed to ...
  (free_extend): Handle zero/sign_extract sources.
  (forward_propagate_subreg): Use free_extend.

gcc/testsuite/

* gcc.target/mips/union-zext.c: New.

Cherry-picked a76808b917661f102d4b5f6256f76a1a1e580676
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/fwprop.cc  | 38 ++
 gcc/testsuite/gcc.target/mips/union-zext.c | 29 +
 2 files changed, 60 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/union-zext.c

diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index 8cba6b7ce9f..717415a4fb6 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -614,15 +614,14 @@ try_fwprop_subst (use_info *use, set_info *def,
 
 /* For the given single_set INSN, containing SRC known to be a
ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
-   is redundant due to the register being set by a LOAD_EXTEND_OP
-   load from memory.  */
+   is redundant due to the register being set by ZERO_EXTRACT or
+   SIGN_EXTRACT of appropriate size or by LOAD_EXTEND_OP load
+   from memory.  */
 
 static bool
-free_load_extend (rtx src, insn_info *insn)
+free_extend (rtx src, insn_info *insn)
 {
   rtx reg = XEXP (src, 0);
-  if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
-return false;
 
   def_info *def = nullptr;
   for (use_info *use : insn->uses ())
@@ -644,10 +643,35 @@ free_load_extend (rtx src, insn_info *insn)
 {
   rtx patt = PATTERN (def_rtl);
 
-  if (GET_CODE (patt) == SET
+  if (GET_CODE (patt) != SET)
+  return false;
+
+#ifdef LOAD_EXTEND_OP
+  if (LOAD_EXTEND_OP (GET_MODE (reg)) == GET_CODE (src)
  && GET_CODE (SET_SRC (patt)) == MEM
  && rtx_equal_p (SET_DEST (patt), reg))
return true;
+#endif
+
+  int extract_code = GET_CODE (src) == ZERO_EXTEND
+? ZERO_EXTRACT : SIGN_EXTRACT;
+
+  if (GET_CODE (SET_SRC (patt)) == extract_code
+&& GET_MODE (SET_SRC (patt)) == GET_MODE (src)
+ && INTVAL (XEXP (SET_SRC (patt), 1))
+<= GET_MODE_BITSIZE (GET_MODE (reg)).to_constant ())
+  {
+if (GET_CODE (SET_DEST (patt)) == SUBREG
+  && GET_MODE (SET_DEST (patt)) == GET_MODE (src)
+  && rtx_equal_p (XEXP (SET_DEST (patt), 0), reg))
+  return true;
+
+if (REG_P (SET_DEST (patt))
+  && GET_MODE (SET_DEST (patt)) == GET_MODE (src)
+  && REGNO (SET_DEST (patt)) == REGNO (reg))
+  return true;
+  }
+
 }
   return false;
 }
@@ -709,7 +733,7 @@ forward_propagate_subreg (use_info *use, set_info *def,
  && REG_P (XEXP (src, 0))
  && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
  && GET_MODE (XEXP (src, 0)) == use_mode
- && !free_load_extend (src, def->insn ())
+ && !free_extend (src, def->insn ())
  && (targetm.mode_rep_extended (int_use_mode, src_mode)
  != (int) GET_CODE (src)))
return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
diff --git a/gcc/testsuite/gcc.target/mips/union-zext.c 
b/gcc/testsuite/gcc.target/mips/union-zext.c
new file mode 100644
index 000..6728d415f5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/union-zext.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "(HAS_INS) -mgp32" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+/* { dg-final { scan-assembler-not "\tandi\t"} } */
+
+typedef struct bits
+{
+  unsigned B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+
+typedef union
+{
+  unsigned v;
+  bits_t b;
+} bitfields_t;
+
+void *
+strcpy (void *__restrict__ dst, const void *__restrict__ _a)
+{
+  unsigned x = *(unsigned *) _a;
+  bitfields_t bx;
+  bx.v = x;
+
+  unsigned char v2 = (unsigned char) bx.b.B2;
+  ((unsigned char *) (dst))[2] = (v2);
+  if (v2 == 0)
+return 0;
+  return dst;
+}
-- 
2.34.1

[PATCH 43/61] Disable ssa-dom-cse-2.c for MIPS lp64

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

The optimisation to reduce the result to constant 28 still happens
but only much later in combine.

gcc/testsuite/
* gcc.dg/tree-ssa/ssa-dom-cse-2.c: Do not check output for
MIPS lp64 abi.

Cherry-picked 7a9286a94817badb312e3bb2b4a7a83b8b3fa28a
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
index 5c89e3f8698..5097ae8bf11 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
@@ -27,4 +27,4 @@ foo ()
but the loop reads only one element at a time, and DOM cannot resolve these.
The same happens on powerpc depending on the SIMD support available.  */
 
-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* 
hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { powerpc*-*-* 
sparc*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
+/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* 
hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { mips*-*-* 
powerpc*-*-* sparc*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } 
} } } } } */
-- 
2.34.1

[PATCH 37/61] Testsuite: Skip tests making calls to variables

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

The compressed MIPS ISAs (microMIPS and MIPS16) require the LSB of
an address to indicate which ISA to execute.  The non-conformant
patterns used in these tests cannot set the ISA mode bit and may
attempt to directly call the variable which triggers an error from
the assembler about calling a different ISA mode.

gcc/testsuite/
* gcc.c-torture/compile/20020129-1.c: Skip for MIPS16/microMIPS.
* gcc.c-torture/compile/pr37433-1.c: Likewise.
* gcc.c-torture/compile/pr37433.c: Likewise.
* lib/target-supports.exp
(check_effective_target_mips_compressed): New function.

Cherry-picked 97f2d5c6403c0cb8b65e059349ec18ffc9505bfd
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.c-torture/compile/20020129-1.c |  5 +
 gcc/testsuite/gcc.c-torture/compile/pr37433-1.c  |  5 +
 gcc/testsuite/gcc.c-torture/compile/pr37433.c|  5 +
 gcc/testsuite/lib/target-supports.exp| 10 ++
 4 files changed, 25 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/20020129-1.c 
b/gcc/testsuite/gcc.c-torture/compile/20020129-1.c
index c14ac07655e..d06bcb0976d 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20020129-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20020129-1.c
@@ -2,6 +2,11 @@
 
 /* { dg-require-effective-target indirect_calls } */
 
+/* MIPS compressed ISAs require the LSB of an address to indicate which
+   ISA mode to use.  This test cannot do that and raises an assembler
+   warning (binutils 2.29 onwards) of a branch to a different ISA.  */
+/* { dg-skip-if "" { mips_compressed } } */
+
 typedef struct
 {
   long long a[10];
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr37433-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr37433-1.c
index 48a57b637d7..5948b3d740a 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr37433-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr37433-1.c
@@ -1,5 +1,10 @@
 /* { dg-require-effective-target indirect_calls } */
 
+/* MIPS compressed ISAs require the LSB of an address to indicate which
+   ISA mode to use.  This test cannot do that and raises an assembler
+   warning (binutils 2.29 onwards) of a branch to a different ISA.  */
+/* { dg-skip-if "" { mips_compressed } } */
+
 void regex_subst(void)
 {
   const void *subst = "";
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr37433.c 
b/gcc/testsuite/gcc.c-torture/compile/pr37433.c
index 95d168afa2f..69e622132af 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr37433.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr37433.c
@@ -1,5 +1,10 @@
 /* { dg-require-effective-target indirect_calls } */
 
+
+/* MIPS compressed ISAs require the LSB of an address to indicate which
+   ISA mode to use.  This test cannot do that and raises an assembler
+   warning (binutils 2.29 onwards) of a branch to a different ISA.  */
+/* { dg-skip-if "" { mips_compressed } } */
 int regex_subst(void)
 {
   const void *subst = "";
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index fd58682cae3..4f005c5a7d2 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1514,6 +1514,16 @@ proc check_effective_target_mips64 { } {
 }]
 }
 
+# Return true if the target is using a compressed MIPS ISA.
+
+proc check_effective_target_mips_compressed { } {
+return [check_no_compiler_messages mips_compressed assembly {
+   #if !defined (__mips_micromips) && !defined (__mips16)
+   #error !__mips_micromips && !__mips16
+   #endif
+}]
+}
+
 # Return true if the target is a MIPS target that does not produce
 # MIPS16 code.
 
-- 
2.34.1

[PATCH 36/61] Testsuite: Disable the time-profiler-2.c test

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

gcc/testsuite/
* gcc.dg/tree-prof/time-profiler-2.c: Skip for mips* triples
as it is unstable in simulation.

Cherry-picked 7c5a494a31c72ee3285ffae9fda738aa875869b9
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.dg/tree-prof/time-profiler-2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/tree-prof/time-profiler-2.c 
b/gcc/testsuite/gcc.dg/tree-prof/time-profiler-2.c
index eed0b1dd08d..bcf9adf1b09 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/time-profiler-2.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/time-profiler-2.c
@@ -1,4 +1,5 @@
 /* { dg-options "-O2 -fdump-ipa-profile -fno-ipa-vrp" } */
+/* { dg-skip-if "This test is unstable for MIPS " { "mips*-*-*" } } */
 
 #include 
 
-- 
2.34.1

[PATCH 57/61] Implement synthesised conditional xor/or

2025-01-31 Thread Aleksandar Rakic

From: Mihailo Stojanovic 

Create an additional case for if-conversion which expands the following
sequence: "if (test) x ^= C;" as

a = 0;
if (test) a = C;
x ^= a;

This reduces the number of necessary conditional moves on some targets
(most notably MIPS).

gcc/

* config/mips/mips.cc (mips_rtx_costs): Increase the cost of
conditional moves which allow both operands to be registers on
mips64r6.
* ifcvt.cc (noce_try_synthesized_xor_ok): New function.  Do not
try the XOR/IOR conversion if the target has a conditional move
which accepts two registers.
(noce_try_synthesized_xor): New function.  Discover the sequence
of instructions which fit the description and expand them
accordingly.

gcc/testsuite/

* gcc.target/mips/cond_xor.c: New test.
* gcc.target/mips/cond_xor1.c: New test.
* gcc.target/mips/cond_xor2.c: New test. Skip -Os.

Cherry-picked 5409eee7c24688cd73df92d83a6844a041545c2f,
31d6d46912ad3cbb56c6fc251418c2624b4bb07f and
ff607fa78b23b8e1d753a6e836419e3fe46e3045
from https://github.com/MIPS/gcc

Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Chao-ying Fu 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   |  18 ++-
 gcc/ifcvt.cc  | 135 ++
 gcc/testsuite/gcc.target/mips/cond_xor.c  |  15 +++
 gcc/testsuite/gcc.target/mips/cond_xor1.c |  15 +++
 gcc/testsuite/gcc.target/mips/cond_xor2.c |  15 +++
 5 files changed, 194 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/cond_xor.c
 create mode 100644 gcc/testsuite/gcc.target/mips/cond_xor1.c
 create mode 100644 gcc/testsuite/gcc.target/mips/cond_xor2.c

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 19d428e6ed6..63b7bdd255c 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -5784,10 +5784,20 @@ mips_rtx_costs (rtx x, machine_mode mode, int 
outer_code,
   return false;
 
 case IF_THEN_ELSE:
- if (reg_or_0_operand (XEXP (x, 1), VOIDmode)
- || reg_or_0_operand (XEXP (x, 2), VOIDmode))
-   *total = 0;
-return false;
+  if (reg_or_0_operand (XEXP (x, 1), VOIDmode)
+ || reg_or_0_operand (XEXP (x, 2), VOIDmode))
+   *total = 0;
+  if (outer_code == SET)
+   {
+ /* Conditional moves on r6 only allow one parameter to be a register
+(the other parameter is zero).  Increase the cost of conditional
+moves which allow both parameters to be registers.  */
+ if (mips_isa_rev == 6
+ && register_operand (XEXP (x, 1), VOIDmode)
+ && register_operand (XEXP (x, 2), VOIDmode))
+   *total = 1;
+   }
+  return false;
 
 default:
   return false;
diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
index 74f13a637b2..297ccd470dc 100644
--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
@@ -1962,6 +1962,137 @@ noce_try_cmove (struct noce_if_info *if_info)
   return false;
 }
 
+/* If the target has a conditional move which accepts two registers, do not
+   try synthesized conditional XOR/IOR, as it will not yield any benefits.  */
+
+static bool
+noce_try_synthesized_xor_ok (struct noce_if_info *if_info)
+{
+  rtx testreg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
+
+  rtx if_then_else = gen_rtx_IF_THEN_ELSE (word_mode,
+   if_info->cond,
+   const0_rtx, if_info->x);
+
+  rtx if_then_else_2 = gen_rtx_IF_THEN_ELSE (word_mode,
+ if_info->cond,
+ testreg, if_info->x);
+
+  return rtx_cost (if_then_else_2, word_mode, SET, 1, true)
+ > rtx_cost (if_then_else, word_mode, SET, 1, true);
+}
+
+/* Expand "if (test) x ^= C;" as
+
+   a = 0;
+   if (test) a = C;
+   x ^= a;
+
+   This lowers the number of necessary conditional moves on some targets.
+
+   We allow for maximum of three instructions in the then block.
+   First one loads the constant into a register.  Second one is an actual
+   XOR/IOR instruction.  Third one is a zero or sign extend.  */
+
+static bool
+noce_try_synthesized_xor (struct noce_if_info *if_info)
+{
+  enum rtx_code code = GET_CODE (if_info->cond);
+
+  if (code != NE && code != EQ)
+return FALSE;
+
+  /* Fail if there is an else block.  */
+  if (if_info->else_bb)
+return FALSE;
+
+  /* We allow for the final instruction in the basic block to be sign or
+ zero extend.  */
+  rtx a = if_info->a;
+  rtx_insn *insn_a = if_info->insn_a;
+  if ((GET_CODE (a) == ZERO_EXTEND
+   || GET_CODE (a) == SIGN_EXTEND)
+  && single_set (prev_nonnote_nondebug_insn (insn_a)))
+{
+  a = SET_SRC (single_set (prev_nonnote_nondebug_insn (insn_a)));
+  insn_a = prev_nonnote_nondebug_insn (insn_a);
+}
+
+  /* Check that the operation is indeed XOR or IOR.  Also

[PATCH 49/61] Make rtl if-conversion more common

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

Tune ifcvt parameters, so that we get if-conversion in more cases.

gcc/
* config/mips/mips.cc (mips_rtx_costs): Reduce cost of
if_then_else pattern.
(mips_max_noce_ifcvt_seq_cost): New function. Decrease
maximum permissible cost for the unconditional sequence which
should be generated during if-conversion (for all non-r6
targets). This disables if-conversion for non-r6 targets in
branch-cost-1.c test.
(mips_noce_conversion_profitable_p): New function.
(TARGET_MAX_NOCE_IFCVT_SEQ_COST): Define hook.
(TARGET_NOCE_CONVERSION_PROFITABLE_P): Define hook.

gcc/testsuite/

* gcc.target/mips/branch-cost-1.c: Disable for -Os.

Cherry-picked 1d1ac2a7bdbb6a1ab1a90bfcd9fa6e8a96dcb316
and 8f596d9c4336e8f6e0a01fa22634989eda7d51da
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 65 +++
 gcc/testsuite/gcc.target/mips/branch-cost-1.c |  2 +-
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 51d9812151a..0b155c107c2 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -70,6 +70,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts.h"
 #include "tm-constrs.h"
 #include "print-rtl.h"
+#include "ifcvt.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -5618,6 +5619,12 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
}
   return false;
 
+case IF_THEN_ELSE:
+ if (reg_or_0_operand (XEXP (x, 1), VOIDmode)
+ || reg_or_0_operand (XEXP (x, 2), VOIDmode))
+   *total = 0;
+return false;
+
 default:
   return false;
 }
@@ -25641,6 +25648,58 @@ mips_bit_clear_p (enum machine_mode mode, unsigned 
HOST_WIDE_INT m)
 
   return false;
 }
+
+/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  */
+
+static unsigned int
+mips_max_noce_ifcvt_seq_cost (edge e)
+{
+  bool predictable_p = predictable_edge_p (e);
+
+  /* If we have a parameter set, use that, otherwise take a guess using
+ BRANCH_COST.  */
+  if (predictable_p)
+{
+  if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
+   return param_max_rtl_if_conversion_predictable_cost;
+}
+  else
+{
+  if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
+   return param_max_rtl_if_conversion_unpredictable_cost;
+}
+
+  return BRANCH_COST (true, predictable_p)
+ * COSTS_N_INSNS (mips_isa_rev == 6 ? 4 : 3);
+}
+
+/* Return true if SEQ is a good candidate as a replacement for the
+   if-convertible sequence described in IF_INFO.  */
+
+static bool
+mips_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+{
+  bool speed = if_info->speed_p;
+  unsigned cost = 0;
+  rtx set;
+
+for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
+{
+  set = single_set (insn);
+  if (set)
+   cost += pattern_cost (set, speed);
+  else
+   cost++;
+}
+
+  if (cost <= if_info->original_cost)
+return true;
+  /* When compiling for size, we can make a reasonably accurately guess
+ at the size growth.  When compiling for speed, use the maximum.  */
+  return speed && cost <= if_info->max_seq_cost;
+}
+
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -25976,6 +26035,12 @@ mips_bit_clear_p (enum machine_mode mode, unsigned 
HOST_WIDE_INT m)
 #undef TARGET_SCHED_FUSION_PRIORITY
 #define TARGET_SCHED_FUSION_PRIORITY mips_sched_fusion_priority
 
+#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
+#define TARGET_MAX_NOCE_IFCVT_SEQ_COST mips_max_noce_ifcvt_seq_cost
+
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P mips_noce_conversion_profitable_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-mips.h"
diff --git a/gcc/testsuite/gcc.target/mips/branch-cost-1.c 
b/gcc/testsuite/gcc.target/mips/branch-cost-1.c
index 7f7ebbe5fc9..006a29a7361 100644
--- a/gcc/testsuite/gcc.target/mips/branch-cost-1.c
+++ b/gcc/testsuite/gcc.target/mips/branch-cost-1.c
@@ -1,5 +1,5 @@
 /* { dg-options "-mbranch-cost=1 (HAS_MOVN)" } */
-/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" "-Os" } { "" } } */
 NOMIPS16 int
 foo (int x, int y, int z, int k)
 {
-- 
2.34.1

[PATCH 1/2] libstdc++: Fix return value of vector::insert_range

2025-01-31 Thread Patrick Palka

In some cases we're wrongly returning an iterator pointing to (one past)
the last element inserted instead of to the first element inserted.

libstdc++-v3/ChangeLog:

* include/bits/stl_bvector.h (vector::insert_range):
Consistently return an iterator pointing to the first element
inserted.
* include/bits/vector.tcc (vector::insert_range): Likewise.
* testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc:
Verify insert_range return values.
* testsuite/23_containers/vector/modifiers/insert/insert_range.cc:
Likewise.
---
 libstdc++-v3/include/bits/stl_bvector.h|  8 
 libstdc++-v3/include/bits/vector.tcc   |  3 ++-
 .../bool/modifiers/insert/insert_range.cc  | 18 --
 .../vector/modifiers/insert/insert_range.cc| 18 --
 4 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 961e4a25299..e5e4b7db5a9 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -1341,9 +1341,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  std::copy_backward(__pos._M_const_cast(), end(),
 this->_M_impl._M_finish
   + difference_type(__n));
- auto __i = ranges::copy(__rg, __pos._M_const_cast()).out;
+ ranges::copy(__rg, __pos._M_const_cast()).out;
  this->_M_impl._M_finish += difference_type(__n);
- return __i;
+ return __pos._M_const_cast();
}
  else
{
@@ -1355,9 +1355,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  iterator __i = _M_copy_aligned(__begin,
 __pos._M_const_cast(),
 __start);
- __i = ranges::copy(__rg, __i).out;
+ iterator __j = ranges::copy(__rg, __i).out;
  iterator __finish = std::copy(__pos._M_const_cast(),
-   __end, __i);
+   __end, __j);
  this->_M_deallocate();
  this->_M_impl._M_end_of_storage = __q + _S_nword(__len);
  this->_M_impl._M_start = __start;
diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index 4f4c366080b..acb2f5fca1e 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -984,8 +984,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   {
if (__pos == cend())
  {
+   const auto __ins_idx = size();
append_range(std::forward<_Rg>(__rg));
-   return end();
+   return begin() + __ins_idx;
  }
 
if constexpr (ranges::forward_range<_Rg>)
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
 
b/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
index 4f4835746ea..5c65610667d 100644
--- 
a/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
@@ -35,16 +35,22 @@ do_test()
   VERIFY( eq(v, a) );
   v.clear();
   v.shrink_to_fit();
-  v.insert_range(v.begin(), Range(a, a+3));
-  v.insert_range(v.end(), Range(a+6, a+9));
-  v.insert_range(v.begin()+3, Range(a+3, a+6));
+  auto it = v.insert_range(v.begin(), Range(a, a+3));
+  VERIFY( it == v.begin() );
+  it = v.insert_range(v.end(), Range(a+6, a+9));
+  VERIFY( it == v.begin()+3 );
+  it = v.insert_range(v.begin()+3, Range(a+3, a+6));
+  VERIFY( it == v.begin()+3 );
   VERIFY( eq(v, a) );
   v.resize(3);
-  v.insert_range(v.begin()+1, Range(a+4, a+9));
-  v.insert_range(v.begin()+1, Range(a+1, a+4));
+  it = v.insert_range(v.begin()+1, Range(a+4, a+9));
+  VERIFY( it == v.begin()+1 );
+  it = v.insert_range(v.begin()+1, Range(a+1, a+4));
+  VERIFY( it == v.begin()+1 );
   v.resize(9);
   VERIFY( eq(v, a) );
-  v.insert_range(v.begin(), Range(a, a));
+  it = v.insert_range(v.begin(), Range(a, a));
+  VERIFY( it == v.begin() );
   VERIFY( eq(v, a) );
 }
 
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc 
b/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc
index 68218e94f28..59071435126 100644
--- 
a/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc
@@ -39,16 +39,22 @@ do_test()
   VERIFY( eq(v, a) );
   v.clear();
   v.shrink_to_fit();
-  v.insert_range(v.begin(), Range(a, a+3));
-  v.insert_range(v.end

[PATCH 61/61] Fix pr54240

2025-01-31 Thread Aleksandar Rakic

From: Chao-ying Fu 

gcc/testsuite/
* gcc.target/mips/pr54240.c: Scan phiopt2.

Cherry-picked 02dd052d4822ca187af075f1fb5301c954844144
from https://github.com/MIPS/gcc

Signed-off-by: Chao-ying Fu 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.target/mips/pr54240.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/mips/pr54240.c 
b/gcc/testsuite/gcc.target/mips/pr54240.c
index d3976f6cfef..31b793bb8c6 100644
--- a/gcc/testsuite/gcc.target/mips/pr54240.c
+++ b/gcc/testsuite/gcc.target/mips/pr54240.c
@@ -27,4 +27,4 @@ NOMIPS16 int foo(S *s)
   return next->v;
 }
 
-/* { dg-final { scan-tree-dump "Hoisting adjacent loads" "phiopt1" } } */
+/* { dg-final { scan-tree-dump "Hoisting adjacent loads" "phiopt2" } } */
-- 
2.34.1

[PATCH/GCC16 v2 1/1] AArch64: Emit half-precision FCMP/FCMPE

2025-01-31 Thread Spencer Abson

Enable a target with FEAT_FP16 to emit the half-precision variants
of FCMP/FCMPE.

gcc/ChangeLog:

* config/aarch64/aarch64.md: Update cbranch, cstore, fcmp
and fcmpe to use the GPF_F16 iterator for floating-point
modes.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/_Float16_cmp_1.c: New test.
* gcc.target/aarch64/_Float16_cmp_2.c: New (negative) test.
---
 gcc/config/aarch64/aarch64.md | 29 +-
 .../gcc.target/aarch64/_Float16_cmp_1.c   | 54 +++
 .../gcc.target/aarch64/_Float16_cmp_2.c   |  7 +++
 3 files changed, 77 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/_Float16_cmp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/_Float16_cmp_2.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 071058dbeb3..f63e4d79b3c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -707,11 +707,12 @@
 )
 
 (define_expand "cbranch4"
-  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-   [(match_operand:GPF 1 "register_operand")
-(match_operand:GPF 2 
"aarch64_fp_compare_operand")])
-  (label_ref (match_operand 3 "" ""))
-  (pc)))]
+  [(set (pc) (if_then_else
+   (match_operator 0 "aarch64_comparison_operator"
+[(match_operand:GPF_F16 1 "register_operand")
+ (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")])
+   (label_ref (match_operand 3 "" ""))
+   (pc)))]
   ""
   "
   operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
@@ -4338,26 +4339,28 @@
 
 (define_insn "fcmp"
   [(set (reg:CCFP CC_REGNUM)
-(compare:CCFP (match_operand:GPF 0 "register_operand")
- (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+   (compare:CCFP
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1  ]
   [ w   , Y  ] fcmp\t%0, #0.0
   [ w   , w  ] fcmp\t%0, %1
   }
-  [(set_attr "type" "fcmp")]
+  [(set_attr "type" "fcmp")]
 )
 
 (define_insn "fcmpe"
   [(set (reg:CCFPE CC_REGNUM)
-(compare:CCFPE (match_operand:GPF 0 "register_operand")
-  (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+   (compare:CCFPE
+ (match_operand:GPF_F16 0 "register_operand")
+ (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
"TARGET_FLOAT"
{@ [ cons: 0 , 1  ]
   [ w   , Y  ] fcmpe\t%0, #0.0
   [ w   , w  ] fcmpe\t%0, %1
   }
-  [(set_attr "type" "fcmp")]
+  [(set_attr "type" "fcmp")]
 )
 
 (define_insn "*cmp_swp__reg"
@@ -4425,8 +4428,8 @@
 (define_expand "cstore4"
   [(set (match_operand:SI 0 "register_operand")
(match_operator:SI 1 "aarch64_comparison_operator_mode"
-[(match_operand:GPF 2 "register_operand")
- (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
+[(match_operand:GPF_F16 2 "register_operand")
+ (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))]
   ""
   "
   operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_cmp_1.c 
b/gcc/testsuite/gcc.target/aarch64/_Float16_cmp_1.c
new file mode 100644
index 000..e49ace1d7dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/_Float16_cmp_1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+fp16" } */
+
+/*
+** test_fcmp_store:
+** fcmph0, h1
+** csetw0, eq
+** ret
+*/
+int
+test_fcmp_store(_Float16 a, _Float16 b)
+{
+return a == b;
+}
+
+/*
+** test_fcmpe_store:
+** fcmpe   h0, h1
+** csetw0, mi
+** ret
+*/
+int
+test_fcmpe_store(_Float16 a, _Float16 b)
+{
+return a < b;
+}
+
+/*
+** test_fcmp_branch:
+** fcmph0, h1
+** ...
+*/
+_Float16
+test_fcmp_branch(_Float16 a, _Float16 b)
+{
+if (a == b)
+return a * b;
+return a;
+}
+
+/*
+** test_fcmpe_branch:
+** fcmpe   h0, h1
+** ...
+*/
+_Float16
+test_fcmpe_branch(_Float16 a, _Float16 b)
+{
+if (a < b)
+return a * b;
+return a;
+}
+
+/* { dg-final { check-function-bodies "**" "" "" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_cmp_2.c 
b/gcc/testsuite/gcc.target/aarch64/_Float16_cmp_2.c
new file mode 100644
index 000..0ff7cda8796
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/_Float16_cmp_2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+nofp16" } */
+
+#include "_Float16_cmp_1.c"
+
+/* { dg-final { scan-assembler-not {\tfcmp\th[0-9]+} } } */
+/* { dg-final { scan-assembler-not {\tfcmpe\th[0-9]+} } } */
-- 
2.34.1

[PATCH/GCC16 v2 0/1] AArch64: Emit half-precision FCMP/FCMPE

2025-01-31 Thread Spencer Abson

Applied the fixups suggested in the previous review, cheers.


This patch allows the AArch64 back end to emit the half-precision variants of
FCMP and FCMPE, given the target supports FEAT_FP16. Previously, such 
comparisons
would be unnecessarily promoted to single-precision.

The latest documentation of these instructions can be found here:
https://developer.arm.com/documentation/ddi0602/2024-12

Successfully bootstrapped and regtested on aarch64-linux-gnu.

OK for stage 1?

Spencer Abson (1):
  AArch64: Emit half-precision FCMP/FCMPE

 gcc/config/aarch64/aarch64.md | 29 +-
 .../gcc.target/aarch64/_Float16_cmp_1.c   | 54 +++
 .../gcc.target/aarch64/_Float16_cmp_2.c   |  7 +++
 3 files changed, 77 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/_Float16_cmp_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/_Float16_cmp_2.c

-- 
2.34.1

Re: [PATCH] icf: Compare call argument types in certain cases and asm operands [PR117432]

2025-01-31 Thread Jakub Jelinek

On Fri, Jan 31, 2025 at 01:38:36PM +0100, Richard Biener wrote:
> > @@ -718,8 +720,11 @@ func_checker::compare_gimple_call (gcall
> >  
> >/* For direct calls we verify that types are compatible so if we matched
> >   callees, callers must match, too.  For indirect calls however verify
> > - function type.  */
> > -  if (!gimple_call_fndecl (s1))
> > + function type.  And also verify it for direct calls with some 
> > different
> > + fntype.  */
> > +  if (!gimple_call_fndecl (s1)
> > +  || TREE_TYPE (TREE_TYPE (t1)) != fntype1
> > +  || TREE_TYPE (TREE_TYPE (t2)) != fntype2)
> 
> I think we want to always compare the ABI relevant fntypes.  It seems
> we can arrive here with internal function calls where t1/t2 are
> "somthing" (NULL?).  I guess doing this as else {} of the

For internal calls gimple_call_fndecl (s1) will be NULL, so
!gimple_call_fndecl (s1) will be true and so the new checks aren't done.

> if (gimple_call_internal_p (s1) (with gimple_call_internal_fn compare
> in a conditiona if) would be a lot clearer?

What the patch does is just trying to avoid the comparison in the common
case (direct calls from the beginning and there what the comment says
applies, if there would be a mismatch, we'd already knew that).

If you want to compare unconditionally, it would be about just removing the
  if (!gimple_call_fndecl (s1))
{
and
}
and reindenting + rewriting the comment above it.  Shall I do that?

Jakub

Re: [PATCH] OpenMP/Fortran: Add missing pop_state in parse_omp_dispatch

2025-01-31 Thread Paul-Antoine Arras


Pushed to master as obvious. This should fix PR118714.

On 31/01/2025 11:46, Paul-Antoine Arras wrote:

When the ST_NONE case is taken, the function returns immediately. Not calling
pop_state causes a dangling pointer.

gcc/fortran/ChangeLog:

* parse.cc (parse_omp_dispatch): Add missing pop_state.
---
  gcc/fortran/parse.cc | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc
index 00cd23d7729..5094d9d3ead 100644
--- a/gcc/fortran/parse.cc
+++ b/gcc/fortran/parse.cc
@@ -6375,7 +6375,10 @@ parse_omp_dispatch (void)
  
st = next_statement ();

if (st == ST_NONE)
-return st;
+{
+  pop_state ();
+  return st;
+}
if (st == ST_CALL || st == ST_ASSIGNMENT)
  accept_statement (st);
else



--
PA

[Ada] Fix wrong elaboration for allocator at library level of dynamic library

2025-01-31 Thread Eric Botcazou

The problem was preexisting for class-wide allocators, but now occurs for 
allocators of controlled types on the mainline, because of the recent overhaul 
of the finalization machinery.

Tested on x86-64/Linux, applied on the mainline.


2025-01-31  Eric Botcazou  

* gcc-interface/utils.cc (gnat_pushdecl): Clear TREE_PUBLIC on
functions really nested in another function.

-- 
Eric Botcazoudiff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
index 5a90a1b81f7..1448716acc5 100644
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -882,16 +882,20 @@ gnat_pushdecl (tree decl, Node_Id gnat_node)
   if (!deferred_decl_context && !context)
 context = get_global_context ();
 
-  /* Functions imported in another function are not really nested.
- For really nested functions mark them initially as needing
- a static chain for uses of that flag before unnesting;
- lower_nested_functions will then recompute it.  */
+  /* Mark functions really nested in another function, that is to say defined
+ there as opposed to imported from elsewhere, as initially needing a static
+ chain for the sake of uniformity (lower_nested_functions will recompute it
+ exacly later) and as private to the translation unit (the static chain may
+ be clobbered by calling conventions used across translation units).  */
   if (TREE_CODE (decl) == FUNCTION_DECL
-  && !TREE_PUBLIC (decl)
+  && !DECL_EXTERNAL (decl)
   && context
   && (TREE_CODE (context) == FUNCTION_DECL
 	  || decl_function_context (context)))
-DECL_STATIC_CHAIN (decl) = 1;
+{
+  DECL_STATIC_CHAIN (decl) = 1;
+  TREE_PUBLIC (decl) = 0;
+}
 
   if (!deferred_decl_context)
 DECL_CONTEXT (decl) = context;

Re: [PATCH] icf: Compare call argument types in certain cases and asm operands [PR117432]

2025-01-31 Thread Richard Biener

On Fri, 31 Jan 2025, Jakub Jelinek wrote:

> Hi!
> 
> compare_operand uses operand_equal_p under the hood, which e.g. for
> INTEGER_CSTs will just match the values rather regardless of their types.
> Now, in many comparing the type is redundant, if we have
>   x_2 = y_3 + 1;
> we've already compared the type for the lhs and also for rhs1, there won't
> be any surprises on rhs2.
> As noted in the PR, there are cases where the type of the operand is the
> sole place of information and we don't want to ICF merge functions if the
> types differ.
> One case is stdarg functions, arguments passed to ..., it is different
> if we pass 1, 1L, 1LL.
> Another case are the K&R unprototyped functions (sure, gone in C23).
> And yet another case are inline asm operands, "r" (1) is different from "r"
> (1L) from "r" (1LL).
> 
> So, the following patch determines based on lack of fntype (e.g. for
> internal functions), or on !prototype_p, or on stdarg_p (in that case
> using number of named arguments) which arguments need to have type checked
> and does that, plus compares types on inline asm operands (maybe it would be
> enough to do that just for input operands but we have just a routine to
> handle both and I didn't feel we need to differentiate).
> 
> Fuirthermore, I've noticed fntype{1,2} isn't actually compared if it is a
> direct call (gimple_call_fndecl is non-NULL).  That is wrong too, we could
> have
>   void (*fn) (int, long long) = (void (*) (int, long long)) foo;
>   fn (1, 1LL);
> in one case and
>   void (*fn) (long long, int) = (void (*) (long long, int)) foo;
>   fn (1LL, 1);
> in another, both folded into a direct call of foo with different
> gimple_call_fntype.  Sure, one of them would be UB at runtime (or both), but
> what if we ICF merge it into something that into the one UB at runtime
> and the program actually calls the correct one only?
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2025-01-31  Jakub Jelinek  
> 
>   PR ipa/117432
>   * ipa-icf-gimple.cc (func_checker::compare_asm_inputs_outputs):
>   Also return_false if operands have incompatible types.
>   (func_checker::compare_gimple_call): Also check fntype1 vs. fntype2
>   compatibility if at least one of the calls has different
>   gimple_call_fntype from the FUNCTION_TYPE of the called decl.  For
>   calls to non-prototyped calls or for stdarg_p functions after the
>   last named argument (if any) check type compatibility of call
>   arguments.
> 
>   * gcc.c-torture/execute/pr117432.c: New test.
>   * gcc.target/i386/pr117432.c: New test.
> 
> --- gcc/ipa-icf-gimple.cc.jj  2025-01-02 11:23:16.334519404 +0100
> +++ gcc/ipa-icf-gimple.cc 2025-01-30 16:21:05.782127011 +0100
> @@ -459,7 +459,9 @@ func_checker::compare_asm_inputs_outputs
>   return false;
>  
>if (!compare_operand (TREE_VALUE (t1), TREE_VALUE (t2),
> - get_operand_access_type (map, t1)))
> + get_operand_access_type (map, t1))
> +   || !types_compatible_p (TREE_TYPE (TREE_VALUE (t1)),
> +   TREE_TYPE (TREE_VALUE (t2
>   return return_false ();
>  
>tree p1 = TREE_PURPOSE (t1);
> @@ -718,8 +720,11 @@ func_checker::compare_gimple_call (gcall
>  
>/* For direct calls we verify that types are compatible so if we matched
>   callees, callers must match, too.  For indirect calls however verify
> - function type.  */
> -  if (!gimple_call_fndecl (s1))
> + function type.  And also verify it for direct calls with some different
> + fntype.  */
> +  if (!gimple_call_fndecl (s1)
> +  || TREE_TYPE (TREE_TYPE (t1)) != fntype1
> +  || TREE_TYPE (TREE_TYPE (t2)) != fntype2)

I think we want to always compare the ABI relevant fntypes.  It seems
we can arrive here with internal function calls where t1/t2 are
"somthing" (NULL?).  I guess doing this as else {} of the
if (gimple_call_internal_p (s1) (with gimple_call_internal_fn compare
in a conditiona if) would be a lot clearer?

>  {
>if ((fntype1 && !fntype2)
> || (!fntype1 && fntype2)
> @@ -738,6 +743,24 @@ func_checker::compare_gimple_call (gcall
>  get_operand_access_type (&map, chain1)))
>  return return_false_with_msg ("static call chains are different");
>  
> +  unsigned check_arg_types_from = gimple_call_num_args (s1);
> +  if (!fntype1
> +  || !fntype2
> +  || !prototype_p (fntype1)
> +  || !prototype_p (fntype2))
> +check_arg_types_from = 0;
> +  else if (stdarg_p (fntype1))
> +{
> +  check_arg_types_from = list_length (TYPE_ARG_TYPES (fntype1));
> +  if (stdarg_p (fntype2))
> + {
> +   unsigned n = list_length (TYPE_ARG_TYPES (fntype2));
> +   check_arg_types_from = MIN (check_arg_types_from, n);
> + }
> +}
> +  else if (stdarg_p (fntype2))
> +check_arg_types_from = list_length (TYPE_ARG_TYPES (fntype2));
> +
>/* Checkin

[PATCH][stage1] middle-end/80342 - genmatch optimize outer conversions

2025-01-31 Thread Richard Biener

The following improves genmatch generated code so we avoid more
spurious SSA assignments to be pushed to the GIMPLE sequence or
simplifications rejected when we're not supposed to produce any
for outer and intermediate conversions.

Bootstrapped and tested on x86_64-unknown-linux-gnu, queued for stage1.

Richard.

* genmatch.cc (::gen_transform): Add in_place parameter.
Assert it isn't set in unexpected places.
(possible_noop_convert): New.
(expr::gen_transform): Support in_place and emit code to
compute a child in-place when the operation is a conversion.
(dt_simplify::gen_1): Arrange for an outermost conversion
to be elided by generating the transform of the operand
in-place.
* match.pd (__real cepxi (x) -> cos (x)): Use single_use.
---
 gcc/genmatch.cc | 201 +---
 gcc/match.pd|  10 ++-
 2 files changed, 160 insertions(+), 51 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index b9a792e2455..a81629c57b2 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -1475,7 +1475,7 @@ public:
   virtual void gen_transform (FILE *, int, const char *, bool, int,
  const char *, capture_info *,
  dt_operand ** = 0,
- int = 0)
+ int = 0, const char * = nullptr)
 { gcc_unreachable  (); }
 };
 
@@ -1528,8 +1528,8 @@ public:
   /* If non-zero, the group for optional handling.  */
   unsigned char opt_grp;
   void gen_transform (FILE *f, int, const char *, bool, int,
- const char *, capture_info *,
- dt_operand ** = 0, int = 0) override;
+ const char *, capture_info *, dt_operand ** = 0,
+ int = 0, const char * = nullptr) override;
 };
 
 /* An operator that is represented by native C code.  This is always
@@ -1562,8 +1562,8 @@ public:
   /* The identifier replacement vector.  */
   vec ids;
   void gen_transform (FILE *f, int, const char *, bool, int,
- const char *, capture_info *,
- dt_operand ** = 0, int = 0) final override;
+ const char *, capture_info *, dt_operand ** = 0,
+ int = 0, const char * = nullptr) final override;
 };
 
 /* A wrapper around another operand that captures its value.  */
@@ -1583,8 +1583,8 @@ public:
   /* The captured value.  */
   operand *what;
   void gen_transform (FILE *f, int, const char *, bool, int,
- const char *, capture_info *,
- dt_operand ** = 0, int = 0) final override;
+ const char *, capture_info *, dt_operand ** = 0,
+ int = 0, const char * = nullptr) final override;
 };
 
 /* if expression.  */
@@ -3186,6 +3186,14 @@ is_conversion (id_base *op)
  || *op == VIEW_CONVERT_EXPR);
 }
 
+bool
+possible_noop_convert (id_base *op)
+{
+  return (*op == CONVERT_EXPR
+ || *op == NOP_EXPR
+ || *op == VIEW_CONVERT_EXPR);
+}
+
 /* Get the type to be used for generating operand POS of OP from the
various sources.  */
 
@@ -3239,7 +3247,7 @@ get_operand_type (id_base *op, unsigned pos,
 void
 expr::gen_transform (FILE *f, int indent, const char *dest, bool gimple,
 int depth, const char *in_type, capture_info *cinfo,
-dt_operand **indexes, int)
+dt_operand **indexes, int, const char *in_place)
 {
   id_base *opr = operation;
   /* When we delay operator substituting during lowering of fors we
@@ -3297,10 +3305,23 @@ expr::gen_transform (FILE *f, int indent, const char 
*dest, bool gimple,
   if (!type)
 fatal_at (location, "cannot determine type of operand");
 
+  bool child_in_place = (!in_place
+&& gimple
+&& possible_noop_convert (opr)
+&& is_a  (ops[0]));
+
   fprintf_indent (f, indent, "{\n");
   indent += 2;
-  fprintf_indent (f, indent,
- "tree _o%d[%u], _r%d;\n", depth, ops.length (), depth);
+  if (child_in_place)
+{
+  fprintf_indent (f, indent, "tree _r%d;\n", depth);
+  fprintf_indent (f, indent,
+ "gimple_match_op tem_op (res_op->cond.any_else (), "
+ "ERROR_MARK, error_mark_node, 1);\n");
+}
+  else
+fprintf_indent (f, indent,
+   "tree _o%d[%u], _r%d;\n", depth, ops.length (), depth);
   char op0type[64];
   snprintf (op0type, sizeof (op0type), "TREE_TYPE (_o%d[0])", depth);
   for (unsigned i = 0; i < ops.length (); ++i)
@@ -3312,7 +,8 @@ expr::gen_transform (FILE *f, int indent, const char 
*dest, bool gimple,
i == 0 ? NULL : op0type);
   ops[i]->gen_transform (f, indent, dest1, gimple, depth + 1, optype1,
 cinfo, indexes,
-*opr == COND_EXPR && i == 0 ?

[committed] testsuite: Add testcase for already fixed PR [PR117498]

2025-01-31 Thread Jakub Jelinek

Hi!

This wrong-code issue has been fixed with r15-7249.
We still emit warnings which are questionable and perhaps we'd
get better generated code if niters determined the loop has only a single
iteration without UB and we'd punt on vectorizing it (or unrolling).

Tested on x86_64-linux -m32/-m64, committed to trunk as obvious.

2025-01-31  Jakub Jelinek  

PR middle-end/117498
* gcc.c-torture/execute/pr117498.c: New test.

--- gcc/testsuite/gcc.c-torture/execute/pr117498.c.jj
+++ gcc/testsuite/gcc.c-torture/execute/pr117498.c
@@ -0,0 +1,35 @@
+/* PR middle-end/117498 */
+
+int a, d, f;
+char g;
+volatile int c = 1;
+
+int
+foo ()
+{
+  if (c == 0)
+return -1;
+  return 1;
+}
+
+void
+bar (int h, int i, char *k, char *m)
+{
+  for (; d < i; d += 2)
+for (int j = 0; j < h; j++)
+  m[j] = k[4 * j];
+}
+
+void
+baz (long h)
+{
+  char n = 0;
+  bar (h, 4, &n, &g);
+}
+
+int
+main ()
+{
+  f = foo ();
+  baz ((unsigned char) f - 4);
+}


Jakub

Re: [PATCH] libstdc++: Use canonical loop form in std::reduce

2025-01-31 Thread Jonathan Wakely

On Fri, 31 Jan 2025 at 12:48, Richard Biener  wrote:
>
> On Fri, Jan 31, 2025 at 12:01 PM Abhishek Kaushik
>  wrote:
> >
> > From 4ac7c7e56e23ed2f4dd2dafdfab6cfa110c14260 Mon Sep 17 00:00:00 2001
> > From: Abhishek Kaushik 
> > Date: Fri, 31 Jan 2025 01:28:48 -0800
> > Subject: [PATCH] libstdc++: Use canonical loop form in std::reduce
> >
> > The current while loop in std::reduce and related functions is hard to
> > vectorize because the loop control variable is hard to detect.
> >
> > `while ((__last - __first) >= 4)`
> >
> > Changing the loop header to a for loop following the OpenMP canonical
> > form allows easy vectorization, resulting in improved performance.
> >
> > `for (; __first <= __last - 4; __first += 4)`
> >
> > This patch modifies the loop header for std::reduce & std::transform_reduce.
>
> Can you add a testcase to g++.dg/vect/ that is now vectorized but not before?

According to https://gcc.gnu.org/pipermail/libstdc++/2025-January/060353.html
this is only a problem for the Intel compiler, not for GCC. So a GCC
testcase doesn't help.

But if it's only for Intel, then the commit msg should say that.


>
> Thanks,
> Richard.
>
> > ---
> >  libstdc++-v3/include/std/numeric | 10 +++---
> >  1 file changed, 3 insertions(+), 7 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/std/numeric 
> > b/libstdc++-v3/include/std/numeric
> > index 4d36fcd36d9..9c38ad89e21 100644
> > --- a/libstdc++-v3/include/std/numeric
> > +++ b/libstdc++-v3/include/std/numeric
> > @@ -300,13 +300,12 @@ namespace __detail
> >static_assert(is_invocable_r_v<_Tp, _BinaryOperation&, __ref, 
> > __ref>);
> >if constexpr (__is_random_access_iter<_InputIterator>::value)
> > {
> > - while ((__last - __first) >= 4)
> > + for (; __first <= __last - 4; __first += 4)
> > {
> >   _Tp __v1 = __binary_op(__first[0], __first[1]);
> >   _Tp __v2 = __binary_op(__first[2], __first[3]);
> >   _Tp __v3 = __binary_op(__v1, __v2);
> >   __init = __binary_op(__init, __v3);
> > - __first += 4;
> > }
> > }
> >for (; __first != __last; ++__first)
> > @@ -381,7 +380,7 @@ namespace __detail
> >if constexpr (__and_v<__is_random_access_iter<_InputIterator1>,
> > __is_random_access_iter<_InputIterator2>>)
> > {
> > - while ((__last1 - __first1) >= 4)
> > + for (; __first1 <= __last1 - 4; __first1 += 4, __first2 += 4)
> > {
> >   _Tp __v1 = __binary_op1(__binary_op2(__first1[0], __first2[0]),
> >   __binary_op2(__first1[1], __first2[1]));
> > @@ -389,8 +388,6 @@ namespace __detail
> >   __binary_op2(__first1[3], __first2[3]));
> >   _Tp __v3 = __binary_op1(__v1, __v2);
> >   __init = __binary_op1(__init, __v3);
> > - __first1 += 4;
> > - __first2 += 4;
> > }
> > }
> >for (; __first1 != __last1; ++__first1, (void) ++__first2)
> > @@ -447,7 +444,7 @@ namespace __detail
> >  {
> >if constexpr (__is_random_access_iter<_InputIterator>::value)
> > {
> > - while ((__last - __first) >= 4)
> > + for (; __first <= __last - 4; __first += 4)
> > {
> >   _Tp __v1 = __binary_op(__unary_op(__first[0]),
> >  __unary_op(__first[1]));
> > @@ -455,7 +452,6 @@ namespace __detail
> >  __unary_op(__first[3]));
> >   _Tp __v3 = __binary_op(__v1, __v2);
> >   __init = __binary_op(__init, __v3);
> > - __first += 4;
> > }
> > }
> >for (; __first != __last; ++__first)
> > --
> > 2.31.1
> >
> >
> >
> >
>

[PATCH 3/3] c++/modules: Handle exposures of TU-local types in uninstantiated member templates

2025-01-31 Thread Nathaniel Shead

Bootstrapped and regtested on x86_64-pc-linux-gnu, OK for trunk?

Happy to remove the custom inform for lambdas, but I felt that the
original message (which suggests that defining it within a class should
make it OK) was unhelpful here.

Similarly the 'is_exposure_of_member_type' function is not necessary to
fix the bug, and is just for slightly nicer diagnostics.

-- >8 --

Previously, 'is_tu_local_entity' wouldn't detect the exposure of the (in
practice) TU-local lambda in the following example, unless instantiated:

  struct S {
template 
static inline decltype([]{}) x = {};
  };

This is for two reasons.  Firstly, when traversing the TYPE_FIELDS of S
we only see the TEMPLATE_DECL, and never end up building a dependency on
its DECL_TEMPLATE_RESULT (due to not being instantiated).  This patch
fixes this by stripping any templates before checking for unnamed types.

The second reason is that we currently assume all class-scope entities
are not TU-local.  Despite this being unambiguous in the standard, this
is not actually true in our implementation just yet, due to issues with
mangling lambdas in some circumstances.  Allowing these lambdas to be
exported can cause issues in importers with apparently conflicting
declarations, so this patch treats them as TU-local as well.

After these changes, we now get double diagnostics from the two ways
that we can see the above lambda being exposed, via 'S' (through
TYPE_FIELDS) or via 'S::x'.  To workaround this we hide diagnostics from
the first case, so we only get errors from 'S::x' which will be closer
to the point the offending lambda is declared.

gcc/cp/ChangeLog:

* module.cc (trees_out::type_node): Adjust assertion.
(depset::hash::is_tu_local_entity): Handle unnamed template
types, treat lambdas specially.
(is_exposure_of_member_type): New function.
(depset::hash::add_dependency): Use it.
(depset::hash::finalize_dependencies): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/modules/internal-10.C: New test.

Signed-off-by: Nathaniel Shead 
---
 gcc/cp/module.cc   | 67 ++
 gcc/testsuite/g++.dg/modules/internal-10.C | 25 
 2 files changed, 81 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/modules/internal-10.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index c89834c1abd..59b7270f4a5 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -9261,7 +9261,9 @@ trees_out::type_node (tree type)
 
/* We'll have either visited this type or have newly discovered
   that it's TU-local; either way we won't need to visit it again.  */
-   gcc_checking_assert (TREE_VISITED (type) || has_tu_local_dep (name));
+   gcc_checking_assert (TREE_VISITED (type)
+|| has_tu_local_dep (TYPE_NAME (type))
+|| has_tu_local_dep (TYPE_TI_TEMPLATE (type)));
return;
   }
 
@@ -13432,19 +13434,31 @@ depset::hash::is_tu_local_entity (tree decl, bool 
explain/*=false*/)
 
  We consider types with names for linkage purposes as having names, since
  these aren't really TU-local.  */
-  if (TREE_CODE (decl) == TYPE_DECL
+  tree inner = STRIP_TEMPLATE (decl);
+  if (inner
+  && TREE_CODE (inner) == TYPE_DECL
   && TYPE_ANON_P (type)
-  && !DECL_SELF_REFERENCE_P (decl)
+  && !DECL_SELF_REFERENCE_P (inner)
   /* An enum with an enumerator name for linkage.  */
   && !(UNSCOPED_ENUM_P (type) && TYPE_VALUES (type)))
 {
   tree main_decl = TYPE_MAIN_DECL (type);
-  if (!DECL_CLASS_SCOPE_P (main_decl)
- && !decl_function_context (main_decl)
- /* LAMBDA_EXPR_EXTRA_SCOPE will be set for lambdas defined in
-contexts where they would not be TU-local.  */
- && !(LAMBDA_TYPE_P (type)
-  && LAMBDA_TYPE_EXTRA_SCOPE (type)))
+  if (LAMBDA_TYPE_P (type))
+   {
+ /* A lambda expression is, in practice, TU-local iff it has no
+mangling scope.  This currently doesn't line up exactly with
+the standard's definition due to some ABI issues, but it's
+pretty close, and avoids other issues down the line.  */
+ if (!LAMBDA_TYPE_EXTRA_SCOPE (type))
+   {
+ if (explain)
+   inform (loc, "%qT has no name and cannot be differentiated "
+   "from similar lambdas in other TUs", type);
+ return true;
+   }
+   }
+  else if (!DECL_CLASS_SCOPE_P (main_decl)
+  && !decl_function_context (main_decl))
{
  if (explain)
inform (loc, "%qT has no name and is not defined within a class, "
@@ -13748,6 +13762,35 @@ depset::hash::make_dependency (tree decl, entity_kind 
ek)
   return dep;
 }
 
+/* Whether REF is an exposure of a member type of SOURCE.
+
+   This comes up with exposures of class-scope lambdas, that we currently
+   treat as TU-

Re: [PATCH v2] x86: Handle -mindirect-branch-register for -fno-plt

2025-01-31 Thread Uros Bizjak

On Fri, Jan 31, 2025 at 2:36 PM H.J. Lu  wrote:
>
> -fno-plt forces external call to indirect call via GOT memory.  But
> -mindirect-branch-register requires indirect call and jump via register.
> For -mindirect-branch-register, expanding indirect call via register and
> update call patterns and peepholes to disable indirect call via memory.
>
> gcc/
>
> PR target/118713
> * config/i386/i386-expand.cc (ix86_expand_call): Force indirect
> call via register for -mindirect-branch-register.
> * config/i386/i386.md (*call): Disable indirect call via memory
> for -mindirect-branch-register.
> (*call_got_x32): Likewise.
> (*sibcall_GOT_32): Likewise.
> (*sibcall): Likewise.
> (*sibcall_memory): Likewise.
> (*call_pop): Likewise.
> (*sibcall_pop): Likewise.
> (*sibcall_pop_memory): Likewise.
> (*call_value): Likewise.
> (*call_value_got_x32): Likewise.
> (*sibcall_value_GOT_32): Likewise.
> (*sibcall_value): Likewise.
> (*sibcall_value_memory): Likewise.
> (*call_value_pop): Likewise.
> (*sibcall_value_pop): Likewise.
> (*sibcall_value_pop_memory): Likewise.
>
> gcc/testsuite/
>
> PR target/118713
> * gcc.target/i386/pr118713-1-x32.c: New test.
> * gcc.target/i386/pr118713-1.c: Likewise.
> * gcc.target/i386/pr118713-2-x32.c: Likewise.
> * gcc.target/i386/pr118713-2.c: Likewise.
> * gcc.target/i386/pr118713-3-x32.c: Likewise.
> * gcc.target/i386/pr118713-3.c: Likewise.
> * gcc.target/i386/pr118713-4-x32.c: Likewise.
> * gcc.target/i386/pr118713-4.c: Likewise.
> * gcc.target/i386/pr118713-5-x32.c: Likewise.
> * gcc.target/i386/pr118713-5.c: Likewise.
> * gcc.target/i386/pr118713-6-x32.c: Likewise.
> * gcc.target/i386/pr118713-6.c: Likewise.
> * gcc.target/i386/pr118713-7-x32.c: Likewise.
> * gcc.target/i386/pr118713-7.c: Likewise.
> * gcc.target/i386/pr118713-8-x32.c: Likewise.
> * gcc.target/i386/pr118713-8.c: Likewise.
> * gcc.target/i386/pr118713-9-x32.c: Likewise.
> * gcc.target/i386/pr118713-9.c: Likewise.
> * gcc.target/i386/pr118713-10-x32.c: Likewise.
> * gcc.target/i386/pr118713-10.c: Likewise.
> * gcc.target/i386/pr118713-11-x32.c: Likewise.
> * gcc.target/i386/pr118713-11.c: Likewise.
> * gcc.target/i386/pr118713-12-x32.c: Likewise.
> * gcc.target/i386/pr118713-12.c: Likewise.
>
> Co-Authored-By: Uros Bizjak 
> Signed-off-by: H.J. Lu 
> ---
>  gcc/config/i386/i386-expand.cc| 20 ++--
>  gcc/config/i386/i386.md   | 98 +--
>  .../gcc.target/i386/pr118713-1-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-1.c| 14 +++
>  .../gcc.target/i386/pr118713-10-x32.c |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-10.c   | 15 +++
>  .../gcc.target/i386/pr118713-11-x32.c |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-11.c   | 14 +++
>  .../gcc.target/i386/pr118713-12-x32.c |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-12.c   | 14 +++
>  .../gcc.target/i386/pr118713-2-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-2.c| 15 +++
>  .../gcc.target/i386/pr118713-3-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-3.c| 14 +++
>  .../gcc.target/i386/pr118713-4-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-4.c| 14 +++
>  .../gcc.target/i386/pr118713-5-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-5.c| 13 +++
>  .../gcc.target/i386/pr118713-6-x32.c  | 15 +++
>  gcc/testsuite/gcc.target/i386/pr118713-6.c| 14 +++
>  .../gcc.target/i386/pr118713-7-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-7.c| 13 +++
>  .../gcc.target/i386/pr118713-8-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-8.c| 13 +++
>  .../gcc.target/i386/pr118713-9-x32.c  |  8 ++
>  gcc/testsuite/gcc.target/i386/pr118713-9.c| 14 +++
>  26 files changed, 353 insertions(+), 35 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1-x32.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10-x32.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11-x32.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-11.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12-x32.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-12.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2-x32.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-3-x32.c
>  create mode 100644 gcc/testsuite/gcc.ta

[PATCH 2/2] libstdc++: Fix flat_foo::insert_range for non-common ranges [PR118156]

2025-01-31 Thread Patrick Palka

This fixes flat_map/multimap::insert_range by simply generalizing the
::insert implementation to handle heterogenous iterator/sentinel pair.
I'm not sure we can do better than this, e.g. we can't implement it
in terms of the adapted containers' insert_range because that'd require
two passes over the range.

For flat_set/multiset, we can implement insert_range directly in terms
of the adapted container's insert_range.  A fallback implementation
is also provided if insert_range isn't available, as is the case for
std::deque currently.

PR libstdc++/118156

libstdc++-v3/ChangeLog:

* include/std/flat_map (_Flat_map_impl::_M_insert): Generalized
version of insert taking heterogenous iterator/sentinel pair.
(_Flat_map_impl::insert): Dispatch to _M_insert.
(_Flat_map_impl::insert_range): Likewise.
(flat_map): Export _Flat_map_impl::insert_range.
(flat_multimap): Likewise.
* include/std/flat_set (_Flat_set_impl::insert_range):
Reimplement directly, not in terms of insert.
(flat_set): Export _Flat_set_impl::insert_range.
(flat_multiset): Likewise.
* testsuite/23_containers/flat_map/1.cc (test06): New test.
* testsuite/23_containers/flat_multimap/1.cc (test06): New test.
* testsuite/23_containers/flat_multiset/1.cc (test06): New test.
* testsuite/23_containers/flat_set/1.cc (test06): New test.
---
 libstdc++-v3/include/std/flat_map | 17 +
 libstdc++-v3/include/std/flat_set | 25 ---
 .../testsuite/23_containers/flat_map/1.cc | 17 +
 .../23_containers/flat_multimap/1.cc  | 16 
 .../23_containers/flat_multiset/1.cc  | 16 
 .../testsuite/23_containers/flat_set/1.cc | 16 
 6 files changed, 99 insertions(+), 8 deletions(-)

diff --git a/libstdc++-v3/include/std/flat_map 
b/libstdc++-v3/include/std/flat_map
index 1ecc2e7f6e7..405caa8a81b 100644
--- a/libstdc++-v3/include/std/flat_map
+++ b/libstdc++-v3/include/std/flat_map
@@ -538,9 +538,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
insert(const_iterator __position, _Arg&& __x)
{ return emplace_hint(__position, std::forward<_Arg>(__x)); }
 
-  template<__has_input_iter_cat _InputIterator>
+private:
+  template
void
-   insert(_InputIterator __first, _InputIterator __last)
+   _M_insert(_Iter __first, _Sent __last)
{
  // FIXME: This implementation fails its complexity requirements.
  // We can't idiomatically implement an efficient version (as in the
@@ -574,6 +575,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
}
 
+public:
+  template<__has_input_iter_cat _InputIterator>
+   void
+   insert(_InputIterator __first, _InputIterator __last)
+   { _M_insert(std::move(__first), std::move(__last)); }
+
   template<__has_input_iter_cat _InputIterator>
void
insert(__sorted_t, _InputIterator __first, _InputIterator __last)
@@ -585,7 +592,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<__detail::__container_compatible_range _Rg>
void
insert_range(_Rg&& __rg)
-   { insert(ranges::begin(__rg), ranges::end(__rg)); }
+   { _M_insert(ranges::begin(__rg), ranges::end(__rg)); }
 
   void
   insert(initializer_list __il)
@@ -1181,7 +1188,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using _Impl::emplace;
   using _Impl::emplace_hint;
   using _Impl::insert;
-  // using _Impl::insert_range;
+  using _Impl::insert_range;
   using _Impl::extract;
   using _Impl::replace;
   using _Impl::erase;
@@ -1460,7 +1467,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using _Impl::emplace;
   using _Impl::emplace_hint;
   using _Impl::insert;
-  // using _Impl::insert_range;
+  using _Impl::insert_range;
   using _Impl::extract;
   using _Impl::replace;
   using _Impl::erase;
diff --git a/libstdc++-v3/include/std/flat_set 
b/libstdc++-v3/include/std/flat_set
index 3e1347a6a0a..c7b48e5d2a7 100644
--- a/libstdc++-v3/include/std/flat_set
+++ b/libstdc++-v3/include/std/flat_set
@@ -475,7 +475,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<__detail::__container_compatible_range _Rg>
void
insert_range(_Rg&& __rg)
-   { insert(ranges::begin(__rg), ranges::end(__rg)); }
+   {
+ auto __guard = _M_make_clear_guard();
+ typename container_type::iterator __it;
+ if constexpr (requires { _M_cont.insert_range(_M_cont.end(), __rg); })
+   __it = _M_cont.insert_range(_M_cont.end(), __rg);
+ else
+   {
+ size_type __n = size();
+ auto __first = ranges::begin(__rg);
+ auto __last = ranges::end(__rg);
+ for (; __first != __last; ++__first)
+   _M_cont.emplace_back(*__first);
+ __it = _M_cont.begin() + __n;
+   }
+ std::sor

[PATCH 48/61] Performance degradation for iDCT-4M example

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

This workaround adds mfuse-vect-init option which causes the back-end to
emit a single load for the vect_init if all the init elements come from
the consecutive memory locations and are in the right order.

gcc/
* config/mips/mips.cc (mips_fuse_vect_init_p): New function.
(mips_expand_vector_init): Detect init sequence that can be
fused into a single load.
* config/mips/mips.opt (mfuse-vect-init): New option.

gcc/testsuite/
* gcc.target/mips/msa-fuse-vect-init.c: New file.

Cherry-picked 4f440a87ad32b3549be8a0b89900d656ac70d4f8
and 1eb9d22dc480c962027eed522e0b26d0ebbd3e0b
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 61 +++
 gcc/config/mips/mips.opt  |  3 +
 .../gcc.target/mips/msa-fuse-vect-init.c  | 18 ++
 3 files changed, 82 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index bd62b8b7823..51d9812151a 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -24873,6 +24873,57 @@ mips_expand_vi_general (machine_mode vmode, 
machine_mode imode,
   emit_move_insn (target, mem);
 }
 
+/* Return true if elements of vector initialization list should be loaded
+   via single "fused" vector load.  */
+
+bool
+mips_fuse_vect_init_p (machine_mode imode, unsigned nelt, rtx vals)
+{
+  unsigned i;
+  rtx base;
+  rtx base1;
+  rtx first;
+  rtx next;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT offset1;
+  unsigned min_align = GET_MODE_BITSIZE (imode);
+  unsigned step_size = GET_MODE_SIZE (imode);
+
+  if (!flag_fuse_vect_init)
+return false;
+
+  first = XVECEXP (vals, 0, 0);
+
+  if (MEM_VOLATILE_P (first))
+return false;
+
+  if (MEM_ALIGN (first) < min_align)
+return false;
+
+  if (GET_MODE (first) != imode)
+return false;
+
+  mips_split_plus (XEXP (first, 0), &base, &offset);
+
+  if (!REG_P (base))
+return false;
+
+  for (i = 1; i < nelt; ++i)
+{
+   next = XVECEXP (vals, 0, i);
+   if (MEM_VOLATILE_P (next)
+  || MEM_ALIGN (next) < min_align
+  || GET_MODE (next) != imode)
+   return false;
+   mips_split_plus (XEXP (next, 0), &base1, &offset1);
+   if (!rtx_equal_p (base, base1) || (offset1 - offset) != step_size)
+   return false;
+   offset = offset1;
+}
+
+  return true;
+}
+
 /* Expand a vector initialization.  */
 
 void
@@ -24883,6 +24934,7 @@ mips_expand_vector_init (rtx target, rtx vals)
   unsigned i, nelt = GET_MODE_NUNITS (vmode);
   unsigned nvar = 0, one_var = -1u;
   bool all_same = true;
+  bool all_mem = true;
   rtx x;
 
   for (i = 0; i < nelt; ++i)
@@ -24890,6 +24942,8 @@ mips_expand_vector_init (rtx target, rtx vals)
   x = XVECEXP (vals, 0, i);
   if (!mips_constant_elt_p (x))
nvar++, one_var = i;
+  if (!MEM_P (x))
+   all_mem = false;
   if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
all_same = false;
 }
@@ -24950,6 +25004,13 @@ mips_expand_vector_init (rtx target, rtx vals)
}
   else
{
+ if (all_mem && mips_fuse_vect_init_p (imode, nelt, vals))
+   {
+ rtx mem = widen_memory_access (XVECEXP (vals, 0, 0), vmode, 0);
+ emit_move_insn (target, mem);
+ return;
+   }
+
  emit_move_insn (target, CONST0_RTX (vmode));
 
  for (i = 0; i < nelt; ++i)
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index d162702c220..be347155286 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -576,3 +576,6 @@ Allow inlining even if the compression flags differ between 
caller and callee.
 
 msched-weight
 Target Var(TARGET_SCHED_WEIGHT) Undocumented
+
+mfuse-vect-init
+Target Var(flag_fuse_vect_init) Undocumented Init(-1)
diff --git a/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c 
b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
new file mode 100644
index 000..faa1ff4eee6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mfp64 -mhard-float -mmsa" } */
+/* { dg-additional-options "-mfuse-vect-init" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+
+typedef int v4i32 __attribute__ ((vector_size(16)));
+
+void
+copy (int* src, v4i32* dst)
+{
+  v4i32 chunk = (v4i32){src[0], src[1], src[2], src[3]};
+  dst[0] = chunk;
+}
+
+/* { dg-final { scan-assembler-not "insert" } } */
+/* { dg-final { scan-assembler-times "\tld\\\.w" 1 } } */
+/* { dg-final { scan-assembler-times "\tst\\\.w" 1 } } */
+
-- 
2.34.1

[PATCH 42/61] Remove redundant moves

2025-01-31 Thread Aleksandar Rakic

From: Robert Suchanek 

Add peepholes to remove silly moves. These reloads happens because of
different modes making elimination non-trivial.

Cherry-picked 85462a9dbf8d659bfb0417d354a0a4f9cd4b8e07
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.md | 24 
 1 file changed, 24 insertions(+)

diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 159fc2e2615..1243f20f344 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -5254,6 +5254,18 @@
   [(set_attr "move_type" "move,move,move,const,constN,load,store,mflo")
(set_attr "mode" "HI")])
 
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand")
+   (match_operand:HI 1 "register_operand"))
+   (set (match_operand:SI 2 "register_operand")
+   (match_operand:SI 3 "register_operand"))]
+  "TARGET_MIPS16
+   && REGNO (operands[1]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[3])"
+  [(const_int 0)]
+  "")
+
 ;; On the mips16, we can split lh $r,N($r) into an add and a load,
 ;; when the original load is a 4 byte instruction but the add and the
 ;; load are 2 2 byte instructions.
@@ -5330,6 +5342,18 @@
   [(set_attr "move_type" "move,move,move,const,constN,load,store,mflo")
(set_attr "mode" "QI")])
 
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand")
+   (match_operand:QI 1 "register_operand"))
+   (set (match_operand:SI 2 "register_operand")
+   (match_operand:SI 3 "register_operand"))]
+  "TARGET_MIPS16
+   && REGNO (operands[1]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[3])"
+  [(const_int 0)]
+  "")
+
 ;; On the mips16, we can split lb $r,N($r) into an add and a load,
 ;; when the original load is a 4 byte instruction but the add and the
 ;; load are 2 2 byte instructions.
-- 
2.34.1

[PATCH 39/61] Frame barrier fix

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Ensure the frame barrier prevents reordering of stack pointer changes.

It is possible for a load/store accessing the stack via a copy of the
stack pointer to be moved across the epilogue meaning that it accesses
stack that is no longer allocated. This leads to a situation where the
code is unsafe in the event of an interrupt where the same stack is
used for interrupt handling.

gcc/
* config/mips/mips.cc (mips_frame_barrier): Upgrade to a full
blockage.

Cherry-picked 0c240da6f6032bd19348b97148d25c05ba2e8356
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index dcb4b9f9f99..57a858aca39 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -13626,7 +13626,7 @@ mips_output_function_epilogue (FILE *)
 static void
 mips_frame_barrier (void)
 {
-  emit_clobber (gen_frame_mem (BLKmode, stack_pointer_rtx));
+  emit_insn (gen_blockage ());
 }
 
 
-- 
2.34.1

Re: [PATCH 1/2] libstdc++: Fix return value of vector::insert_range

2025-01-31 Thread Patrick Palka

On Fri, 31 Jan 2025, Patrick Palka wrote:

> In some cases we're wrongly returning an iterator pointing to (one past)
> the last element inserted instead of to the first element inserted.
> 
> libstdc++-v3/ChangeLog:
> 
>   * include/bits/stl_bvector.h (vector::insert_range):
>   Consistently return an iterator pointing to the first element
>   inserted.
>   * include/bits/vector.tcc (vector::insert_range): Likewise.
>   * testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc:
>   Verify insert_range return values.
>   * testsuite/23_containers/vector/modifiers/insert/insert_range.cc:
>   Likewise.
> ---
>  libstdc++-v3/include/bits/stl_bvector.h|  8 
>  libstdc++-v3/include/bits/vector.tcc   |  3 ++-
>  .../bool/modifiers/insert/insert_range.cc  | 18 --
>  .../vector/modifiers/insert/insert_range.cc| 18 --
>  4 files changed, 30 insertions(+), 17 deletions(-)
> 
> diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
> b/libstdc++-v3/include/bits/stl_bvector.h
> index 961e4a25299..e5e4b7db5a9 100644
> --- a/libstdc++-v3/include/bits/stl_bvector.h
> +++ b/libstdc++-v3/include/bits/stl_bvector.h
> @@ -1341,9 +1341,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
> std::copy_backward(__pos._M_const_cast(), end(),
>this->_M_impl._M_finish
>  + difference_type(__n));
> -   auto __i = ranges::copy(__rg, __pos._M_const_cast()).out;
> +   ranges::copy(__rg, __pos._M_const_cast()).out;

Oops, consider this stray '.out' removed.

> this->_M_impl._M_finish += difference_type(__n);
> -   return __i;
> +   return __pos._M_const_cast();
>   }
> else
>   {
> @@ -1355,9 +1355,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
> iterator __i = _M_copy_aligned(__begin,
>__pos._M_const_cast(),
>__start);
> -   __i = ranges::copy(__rg, __i).out;
> +   iterator __j = ranges::copy(__rg, __i).out;
> iterator __finish = std::copy(__pos._M_const_cast(),
> - __end, __i);
> + __end, __j);
> this->_M_deallocate();
> this->_M_impl._M_end_of_storage = __q + _S_nword(__len);
> this->_M_impl._M_start = __start;
> diff --git a/libstdc++-v3/include/bits/vector.tcc 
> b/libstdc++-v3/include/bits/vector.tcc
> index 4f4c366080b..acb2f5fca1e 100644
> --- a/libstdc++-v3/include/bits/vector.tcc
> +++ b/libstdc++-v3/include/bits/vector.tcc
> @@ -984,8 +984,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
>{
>   if (__pos == cend())
> {
> + const auto __ins_idx = size();
>   append_range(std::forward<_Rg>(__rg));
> - return end();
> + return begin() + __ins_idx;
> }
>  
>   if constexpr (ranges::forward_range<_Rg>)
> diff --git 
> a/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
>  
> b/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
> index 4f4835746ea..5c65610667d 100644
> --- 
> a/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
> +++ 
> b/libstdc++-v3/testsuite/23_containers/vector/bool/modifiers/insert/insert_range.cc
> @@ -35,16 +35,22 @@ do_test()
>VERIFY( eq(v, a) );
>v.clear();
>v.shrink_to_fit();
> -  v.insert_range(v.begin(), Range(a, a+3));
> -  v.insert_range(v.end(), Range(a+6, a+9));
> -  v.insert_range(v.begin()+3, Range(a+3, a+6));
> +  auto it = v.insert_range(v.begin(), Range(a, a+3));
> +  VERIFY( it == v.begin() );
> +  it = v.insert_range(v.end(), Range(a+6, a+9));
> +  VERIFY( it == v.begin()+3 );
> +  it = v.insert_range(v.begin()+3, Range(a+3, a+6));
> +  VERIFY( it == v.begin()+3 );
>VERIFY( eq(v, a) );
>v.resize(3);
> -  v.insert_range(v.begin()+1, Range(a+4, a+9));
> -  v.insert_range(v.begin()+1, Range(a+1, a+4));
> +  it = v.insert_range(v.begin()+1, Range(a+4, a+9));
> +  VERIFY( it == v.begin()+1 );
> +  it = v.insert_range(v.begin()+1, Range(a+1, a+4));
> +  VERIFY( it == v.begin()+1 );
>v.resize(9);
>VERIFY( eq(v, a) );
> -  v.insert_range(v.begin(), Range(a, a));
> +  it = v.insert_range(v.begin(), Range(a, a));
> +  VERIFY( it == v.begin() );
>VERIFY( eq(v, a) );
>  }
>  
> diff --git 
> a/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc
>  
> b/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc
> index 68218e94f28..59071435126 100644
> --- 
> a/libstdc++-v3/testsuite/23_containers/vector/modifiers/insert/insert_range.cc
> +++ 
>

Re: [PATCH] Fortran: host association issue with symbol in COMMON block [PR108454]

2025-01-31 Thread Jerry D


On 1/30/25 1:44 PM, Harald Anlauf wrote:

Dear all,

analyzing the the PR (by Gerhard) turned out to two slightly related
issues.  The first one, where a variable in a COMMON block is falsely
resolved to a derived type declared in the host, leads to a false
freeing of the symbol, resulting in memory corruption and ICE.
If we already know that the symbol is in a common block, we may
just skip that interface search.

The other issue is a resolution issue, where the derived type
declared in the host is used in a variable declaration in the
procedure (as type or class), and a variable of the same name
as the derived type is used in a common block but later resolves
to a basic type, without a proper detection of the conflict.
But as this issue is found to be independent of the presence of
a COMMON block, I have opened a separate issue (pr118709) for it.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald



Looks good to go Harald.

As always, thanks for the fix.

Jerry

[PATCH 52/61] Fix register spill issue for soft-float glibc 2.29

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

Adding the float-agnostic reproducer as test-case.

gcc/testsuite/

  * gcc.target/mips/tls-1.c: New file.

Cherry-picked fa3b6a1347154973324d264e6ad2dbd66d3f0028
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.target/mips/tls-1.c | 10 ++
 1 file changed, 10 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/mips/tls-1.c

diff --git a/gcc/testsuite/gcc.target/mips/tls-1.c 
b/gcc/testsuite/gcc.target/mips/tls-1.c
new file mode 100644
index 000..38f6a5e1176
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/tls-1.c
@@ -0,0 +1,10 @@
+/* { dg-options "-mgp32" } */
+
+extern __thread int x __attribute__ ((tls_model ("initial-exec")));
+
+long long
+foo (long long y)
+{
+  x = 0;
+  return y;
+}
-- 
2.34.1

[PATCH 60/61] Check anti-dependence between 0 and 3 for loads

2025-01-31 Thread Aleksandar Rakic

From: Chao-ying Fu 

gcc/
* config/mips/mips.md (join2_load_store): Check
operand 0 and 3. Assert other two operands do not overlap after
they are reordered.
(*join2_loadhi): Same.

Cherry-picked 63175687761e51dfe2f75dfab7b4de7f44bb4abe
from https://github.com/MIPS/gcc

Signed-off-by: Chao-ying Fu 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.md | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 52abb9c1119..c5603c1aa9e 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -8130,7 +8130,7 @@
 /* Reg-renaming pass reuses base register if it is dead after bonded loads.
Hardware does not bond those loads, even when they are consecutive.
However, order of the loads need to be checked for correctness.  */
-if (!load_p || !reg_overlap_mentioned_p (operands[0], operands[1]))
+if (!load_p || !reg_overlap_mentioned_p (operands[0], operands[3]))
   {
output_asm_insn (mips_output_move (insn, operands[0], operands[1]),
 operands);
@@ -8139,6 +8139,8 @@
   }
 else
   {
+   /* Check the other two registers.  */
+   gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1]));
output_asm_insn (mips_output_move (insn, operands[2], operands[3]),
 &operands[2]);
output_asm_insn (mips_output_move (insn, operands[0], operands[1]),
@@ -8193,13 +8195,15 @@
 /* Reg-renaming pass reuses base register if it is dead after bonded loads.
Hardware does not bond those loads, even when they are consecutive.
However, order of the loads need to be checked for correctness.  */
-if (!reg_overlap_mentioned_p (operands[0], operands[1]))
+if (!reg_overlap_mentioned_p (operands[0], operands[3]))
   {
output_asm_insn ("lh\t%0,%1", operands);
output_asm_insn ("lh\t%2,%3", operands);
   }
 else
   {
+   /* Check the other two registers.  */
+   gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1]));
output_asm_insn ("lh\t%2,%3", operands);
output_asm_insn ("lh\t%0,%1", operands);
   }
-- 
2.34.1

[PATCH 53/61] Inefficient scattered double precision load in MSA

2025-01-31 Thread Aleksandar Rakic

From: Mihailo Stojanovic 

gcc/
* config/mips/mips.cc (mips_legitimate_combined_insn):
New function.

Cherry-picked 092a39db956a418e7e020107b062c170ed976841
from https://github.com/MIPS/gcc

Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 20128c7f537..4894e07f72c 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -25749,6 +25749,22 @@ mips_set_up_by_prologue (hard_reg_set_container *regs)
 CLEAR_HARD_REG_BIT (regs->set, GLOBAL_POINTER_REGNUM);
 }
 
+/* Implemet TARGET_LEGITIMATE_COMBINED_INSN hook.  */
+
+static bool
+mips_legitimate_combined_insn (rtx_insn *insn)
+{
+  rtx p = PATTERN (insn);
+  if (GET_CODE (p) == SET
+  && GET_CODE (XEXP (p, 1)) == VEC_DUPLICATE
+  && GET_CODE (XEXP (XEXP (p, 1), 0)) == REG
+  && (GET_MODE_UNIT_SIZE (GET_MODE (XEXP (XEXP (p, 1), 0)))
+ > UNITS_PER_WORD))
+return false;
+
+  return true;
+}
+
 void
 mips_bit_clear_info (enum machine_mode mode, unsigned HOST_WIDE_INT m,
  int *start_pos, int *size)
@@ -26130,6 +26146,9 @@ mips_noce_conversion_profitable_p (rtx_insn *seq, 
struct noce_if_info *if_info)
 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS 
mips_ira_change_pseudo_allocno_class
 
+#undef TARGET_LEGITIMATE_COMBINED_INSN
+#define TARGET_LEGITIMATE_COMBINED_INSN mips_legitimate_combined_insn
+
 #undef TARGET_HARD_REGNO_SCRATCH_OK
 #define TARGET_HARD_REGNO_SCRATCH_OK mips_hard_regno_scratch_ok
 
-- 
2.34.1

[PATCH 54/61] fmadd.w should be restricted to mipsr6

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

This patch prevents middle-end from using MSA fma on pre-r6 targets
in order to avoid subtle inconsistencies with auto-vectorized code that
might mix MSA fma with unfused scalar multiply-add.

There might be Loongson targets that support MSA while having scalar
multiply-add that is fused (contrary to ISA spec). This patch doesn't
handle those cases.

gcc/
* config/mips/mips-msa.md (fma4, fnma4): Transform
into empty expander. Conditionalize on ISA_HAS_FUSED_MADDF.
Move the body into ...
(msa_fmadd_, msa_fmsub_): New insn patterns.
* config/mips/mips.cc (CODE_FOR_msa_fmadd_*): Remove.
(CODE_FOR_msa_fmsub_*): Ditto.

gcc/testsuite/
* gcc.target/mips/msa-fuse-madd-double.c: New test.
* gcc.target/mips/msa-fuse-madd-single.c: New test.
* gcc.target/mips/msa.c: Do not match fmadd/fmsub on
!mipsisar6 targets.
* lib/target-supports.exp: Define mipsisar6 target.

Cherry-picked 7a48948f245a5e46f55d59c6ac0982a815665ccf
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips-msa.md   | 26 +++---
 gcc/config/mips/mips.cc   |  4 --
 .../gcc.target/mips/msa-fuse-madd-double.c| 52 +++
 .../gcc.target/mips/msa-fuse-madd-single.c| 51 ++
 gcc/testsuite/gcc.target/mips/msa.c   | 14 +++--
 gcc/testsuite/lib/target-supports.exp | 10 
 6 files changed, 143 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-madd-single.c

diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index e2fdf8e191e..34f140e159c 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -973,21 +973,35 @@
   [(set_attr "type" "simd_fdiv")
(set_attr "mode" "")])
 
-(define_insn "fma4"
+(define_expand "fma4"
   [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
(fma:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f")
  (match_operand:FMSA 2 "msa_reg_operand" "f")
  (match_operand:FMSA 3 "msa_reg_operand" "0")))]
-  "ISA_HAS_MSA"
-  "fmadd.\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "")])
+  "ISA_HAS_MSA && ISA_HAS_FUSED_MADDF")
 
-(define_insn "fnma4"
+(define_expand "fnma4"
   [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
(fma:FMSA (neg:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f"))
  (match_operand:FMSA 2 "msa_reg_operand" "f")
  (match_operand:FMSA 3 "msa_reg_operand" "0")))]
+  "ISA_HAS_MSA && ISA_HAS_FUSED_MADDF")
+
+(define_insn "msa_fmadd_"
+  [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
+  (fma:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f")
+  (match_operand:FMSA 2 "msa_reg_operand" "f")
+  (match_operand:FMSA 3 "msa_reg_operand" "0")))]
+ "ISA_HAS_MSA"
+  "fmadd.\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fmadd")
+   (set_attr "mode" "")])
+
+(define_insn "msa_fmsub_"
+  [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
+  (fma:FMSA (neg:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f"))
+  (match_operand:FMSA 2 "msa_reg_operand" "f")
+  (match_operand:FMSA 3 "msa_reg_operand" "0")))]
   "ISA_HAS_MSA"
   "fmsub.\t%w0,%w1,%w2"
   [(set_attr "type" "simd_fmadd")
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 4894e07f72c..4521cac15c7 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -17752,10 +17752,6 @@ AVAIL_NON_MIPS16 (msa, TARGET_MSA)
 #define CODE_FOR_msa_ffint_u_d CODE_FOR_floatunsv2div2df2
 #define CODE_FOR_msa_fsub_w CODE_FOR_subv4sf3
 #define CODE_FOR_msa_fsub_d CODE_FOR_subv2df3
-#define CODE_FOR_msa_fmadd_w CODE_FOR_fmav4sf4
-#define CODE_FOR_msa_fmadd_d CODE_FOR_fmav2df4
-#define CODE_FOR_msa_fmsub_w CODE_FOR_fnmav4sf4
-#define CODE_FOR_msa_fmsub_d CODE_FOR_fnmav2df4
 #define CODE_FOR_msa_fmul_w CODE_FOR_mulv4sf3
 #define CODE_FOR_msa_fmul_d CODE_FOR_mulv2df3
 #define CODE_FOR_msa_fdiv_w CODE_FOR_divv4sf3
diff --git a/gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c 
b/gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c
new file mode 100644
index 000..e98bf017a6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-mhard-float -mmsa" } */
+/* { dg-additional-options "-ffp-contract=fast" } */
+
+#define VSIZE 8
+
+typedef union
+{ double d; long long unsigned i; } double_ull_u;
+
+struct test_vec {
+  double_ull_u a;
+  double_ull_u b;
+  double_ull_u c;
+} test_bench[VSIZE] = {
+  {{.i=0x2c27173b4c9b0904ull}, {.i=0x6aa7b75c1df029d3ull}, 
{.i=0x5675ff363dd15094ull}},
+  {{.i=0x3a6f0e78379a5b56ull}, {.i=0x53b735d529784870ull}, 
{.i=0x4cdced4c10a30d9cull}},
+  {{.i=0x12d2eee56cc2b66aull}, {.i=0x60cd438558b

[PATCH 45/61] Test float32-basic.c fails with -mabi=64 -EB

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

Unlike float, the  _Float32 value is passed w/o promotion when used as
varargs parameter. On N32/64, the callee side expects it to be at offset
0 inside of 8-byte slot, which matches float behavior when passed on
stack as named argument. Because of this, we need to make sure that
_Float32 value resides in upper 32 bits on big-endian when passed in
register as varargs parameter.

In order to accomplish this, the BLOCK_REG_PADDING macro is extended
with parameter NAMED whose value can be interpreted as follows
 1: request for padding of known named argument
 0: request for padding of known unnamed argument
-1: request for padding in unknown context

Cherry-picked e8414cb48566bf5db33d24c6310d9558cd3b3fc0
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/calls.cc|  7 ---
 gcc/config/aarch64/aarch64-protos.h |  2 +-
 gcc/config/aarch64/aarch64.cc   |  8 
 gcc/config/aarch64/aarch64.h|  7 ---
 gcc/config/arm/arm-protos.h |  2 +-
 gcc/config/arm/arm.cc   |  6 +++---
 gcc/config/arm/arm.h|  4 ++--
 gcc/config/c6x/c6x-protos.h |  2 +-
 gcc/config/c6x/c6x.cc   |  5 +++--
 gcc/config/c6x/c6x.h|  5 +++--
 gcc/config/mips/mips-protos.h   |  2 +-
 gcc/config/mips/mips.cc | 15 +++
 gcc/config/mips/mips.h  |  4 ++--
 gcc/config/nios2/nios2-protos.h |  2 +-
 gcc/config/nios2/nios2.cc   |  2 +-
 gcc/config/nios2/nios2.h|  4 ++--
 gcc/config/pa/pa.h  |  2 +-
 gcc/config/rs6000/aix.h |  2 +-
 gcc/config/rs6000/darwin.h  |  2 +-
 gcc/config/rs6000/freebsd64.h   |  2 +-
 gcc/config/rs6000/linux64.h |  2 +-
 gcc/expr.cc |  4 ++--
 gcc/function.cc | 10 +-
 23 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/gcc/calls.cc b/gcc/calls.cc
index f67067acad4..69b5cfe51bf 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -1229,7 +1229,7 @@ store_unaligned_arguments_into_pseudos (struct arg_data 
*args, int num_actuals)
if (bytes < UNITS_PER_WORD
 #ifdef BLOCK_REG_PADDING
&& (BLOCK_REG_PADDING (args[i].mode,
-  TREE_TYPE (args[i].tree_value), 1)
+  TREE_TYPE (args[i].tree_value), 1, -1)
== PAD_DOWNWARD)
 #else
&& BYTES_BIG_ENDIAN
@@ -1586,7 +1586,8 @@ initialize_argument_information (int num_actuals 
ATTRIBUTE_UNUSED,
   end it should be padded.  */
args[i].locate.where_pad =
  BLOCK_REG_PADDING (arg.mode, type,
-int_size_in_bytes (type) <= UNITS_PER_WORD);
+int_size_in_bytes (type) <= UNITS_PER_WORD,
+argpos < n_named_args);
 #endif
 
   /* Update ARGS_SIZE, the total stack space for args so far.  */
@@ -4432,7 +4433,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx 
value,
argvec[count].locate.where_pad =
  BLOCK_REG_PADDING (arg.mode, NULL_TREE,
 known_le (GET_MODE_SIZE (arg.mode),
-  UNITS_PER_WORD));
+  UNITS_PER_WORD), 1);
 #endif
 
   targetm.calls.function_arg_advance (args_so_far, arg);
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 05d3258abf7..4b05c6936a9 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -917,7 +917,7 @@ char *aarch64_output_simd_xor_imm (rtx, unsigned);
 
 char *aarch64_output_sve_mov_immediate (rtx);
 char *aarch64_output_sve_ptrues (rtx);
-bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
+bool aarch64_pad_reg_upward (machine_mode, const_tree, bool, int);
 bool aarch64_regno_ok_for_base_p (int, bool);
 bool aarch64_regno_ok_for_index_p (int, bool);
 bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT 
*fail);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 00bcf18ae97..999adac 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -7501,7 +7501,7 @@ aarch64_function_arg_padding (machine_mode mode, 
const_tree type)
   return PAD_UPWARD;
 }
 
-/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
+/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST, NAMED).
 
It specifies padding for the last (may also be the only)
element of a block move between registers and memory.  If
@@ -7525,7 +7525,7 @@ aarch64_function_arg_padding (machine_mode mode, 
const_tree type)
 
 bool
 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
-bool first ATTRIBUTE_UNUSED)
+bool first ATTRIBUTE_UNUSE

[PATCH 25/61] Fix negative offset memory addressing

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Unconditionally set DONT_BREAK_DEPENDENCIES in scheduling flags.

The code to break dependencies does not appear to provide a win under
any circumstance and is often harmful.  Disable it completely pending
further investigation.

gcc/

* config/mips/mips.cc (mips_set_sched_flags): Set
DONT_BREAK_DEPENDENCIES unconditionally.

Cherry-picked f732af3ad1a393d2f2e708f0d7c469a093049d01
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 55339d577fb..508435cc9eb 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -16872,11 +16872,8 @@ mips_evaluation_hook (rtx_insn *head, rtx_insn *tail)
 static void
 mips_set_sched_flags (spec_info_t spec_info ATTRIBUTE_UNUSED)
 {
-  if (!reload_completed && TARGET_SCHED_WEIGHT)
-{
-  unsigned int *flags = &(current_sched_info->flags);
-  *flags |= DONT_BREAK_DEPENDENCIES;
-}
+  unsigned int *flags = &(current_sched_info->flags);
+  *flags |= DONT_BREAK_DEPENDENCIES;
 }
 
 static void
-- 
2.34.1

[PATCH 11/61] Fix unsafe comparison against stack_pointer_rtx

2025-01-31 Thread Aleksandar Rakic

From: Andrew Bennett 

GCC can modify a rtx which was created using stack_pointer_rtx.
This means that just doing a straight address comparision of a rtx
against stack_pointer_rtx to see whether it is the stack pointer
register will not be correct in all cases.

This patch rewrites these comparisons to check that firstly the rtx
is a register and its register number is STACK_POINTER_REGNUM.

Cherry-picked 1a066c0af8e7ccf36e8c3f01529c90603a981c18
from https://github.com/MIPS/gcc

Signed-off-by: Andrew Bennett 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 16 +---
 gcc/config/mips/mips.md |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 9db2a2a9396..69c5cdbe20d 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -2804,7 +2804,7 @@ mips_stack_address_p (rtx x, machine_mode mode)
 
   return (mips_classify_address (&addr, x, mode, false)
  && addr.type == ADDRESS_REG
- && addr.reg == stack_pointer_rtx);
+ && REGNO (addr.reg) == STACK_POINTER_REGNUM);
 }
 
 /* Return true if ADDR matches the pattern for the LWXS load scaled indexed
@@ -2870,7 +2870,8 @@ mips16_unextended_reference_p (machine_mode mode, rtx 
base,
   if (mode != BLKmode && offset % GET_MODE_SIZE (mode) == 0
   && REGNO (base) != GLOBAL_POINTER_REGNUM)
 {
-  if (GET_MODE_SIZE (mode) == 4 && base == stack_pointer_rtx)
+  if (GET_MODE_SIZE (mode) == 4 && GET_CODE (base) == REG
+ && REGNO (base) == STACK_POINTER_REGNUM)
return offset < 256U * GET_MODE_SIZE (mode);
   return offset < 32U * GET_MODE_SIZE (mode);
 }
@@ -9879,7 +9880,7 @@ mips_debugger_offset (rtx addr, HOST_WIDE_INT offset)
   if (offset == 0)
 offset = INTVAL (offset2);
 
-  if (reg == stack_pointer_rtx
+  if ((GET_CODE (reg) == REG && REGNO (reg) == STACK_POINTER_REGNUM)
   || reg == frame_pointer_rtx
   || reg == hard_frame_pointer_rtx)
 {
@@ -10622,7 +10623,7 @@ mips16e_collect_argument_save_p (rtx dest, rtx src, rtx 
*reg_values,
   required_offset = cfun->machine->frame.total_size + argno * UNITS_PER_WORD;
   if (base == hard_frame_pointer_rtx)
 required_offset -= cfun->machine->frame.hard_frame_pointer_offset;
-  else if (base != stack_pointer_rtx)
+  else if (!(GET_CODE (base) == REG && REGNO (base) == STACK_POINTER_REGNUM))
 return false;
   if (offset != required_offset)
 return false;
@@ -10833,7 +10834,7 @@ mips16e_save_restore_pattern_p (rtx pattern, 
HOST_WIDE_INT adjust,
   /* Check that the address is the sum of the stack pointer and a
 possibly-zero constant offset.  */
   mips_split_plus (XEXP (mem, 0), &base, &offset);
-  if (base != stack_pointer_rtx)
+  if (!(GET_CODE (base) == REG && REGNO (base) == STACK_POINTER_REGNUM))
return false;
 
   /* Check that SET's other operand is a register.  */
@@ -13001,7 +13002,8 @@ mips_restore_reg (rtx reg, rtx mem)
 static void
 mips_deallocate_stack (rtx base, rtx offset, HOST_WIDE_INT new_frame_size)
 {
-  if (base == stack_pointer_rtx && offset == const0_rtx)
+  if (GET_CODE (base) == REG && REGNO (base) == STACK_POINTER_REGNUM
+  && offset == const0_rtx)
 return;
 
   mips_frame_barrier ();
@@ -18222,7 +18224,7 @@ r10k_simplify_address (rtx x, rtx_insn *insn)
{
  /* Replace the incoming value of $sp with
 virtual_incoming_args_rtx.  */
- if (x == stack_pointer_rtx
+ if (GET_CODE (x) == REG && REGNO (x) == STACK_POINTER_REGNUM
  && DF_REF_BB (def) == ENTRY_BLOCK_PTR_FOR_FN (cfun))
newx = virtual_incoming_args_rtx;
}
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index f147667d63a..4b486a7ad29 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -7714,7 +7714,7 @@
[(set (match_operand:SI 1 "register_operand")
 (plus:SI (match_dup 1)
  (match_operand:SI 2 "const_int_operand")))])]
-  "operands[1] == stack_pointer_rtx
+  "GET_CODE (operands[1]) == REG && REGNO (operands[1]) == STACK_POINTER_REGNUM
&& mips16e_save_restore_pattern_p (operands[0], INTVAL (operands[2]), NULL)"
   { return mips16e_output_save_restore (operands[0], INTVAL (operands[2])); }
   [(set_attr "type" "arith")
-- 
2.34.1

[PATCH 20/61] Add -march=interaptiv-mr2 with MIPS16E2

2025-01-31 Thread Aleksandar Rakic

From: Robert Suchanek 

- Bugfix [MIPS16E2]: split of moves of negative constants should exclude
zero const.

- Add support for every style of ZEB/ZEH support that has been tried:

An earlier attempt to improve generation of ZEB/ZEH led to a chaotic
effect of sometimes generating the instructions and sometimes retaining
the ANDI 0x. Also occasional generation of LHU/LBU appeared where
the original value was not already in memory.

Performance results are showing wild and unexpected variation which
appears to correlate with the way in which ZEH/ZEB handling is or is
not implemented. Support all forms tried so far with a hidden option
defaulting to the preferred method.

- Check to see if it is safe to use the SAVE/RESTORE instruction in a
function.

- Add interaptiv-mr2 architecture with COPYW/UCOPYW.

- Add -muse-copyw-ucopyw option (hidden from help).

- Disable tests at -O0 due to introducing a frame:

SAVE/RESTORE end up introducing a frame owing to saving more data
than strictly necessary.

gcc/
* config/mips/mips.cc (mips_option_override): Set default for
TARGET_USE_COPYW_UCOPYW.
* config/mips/mips.h (ISA_HAS_COPY): Update to reference
TARGET_USE_COPYW_UCOPYW.
* config/mips/mips.opt (-muse-copyw-ucopyw): New hidden option.
* config/mips/mips-cpus.def: Set PTF_AVOID_BRANCHLIKELY_ALWAYS
flag for interAptiv-mr2 CPU.

gcc/testsuite/
* gcc.target/mips/iamr2.c: New test.
* gcc.target/mips/memcpy-3.c: New test.
* gcc.target/mips/memcpy-4.c: Likewise.
* gcc.target/mips/mips.exp: Accept -muse-copyw-ucopyw and
isa=interaptiv-mr2.
(mips-dg-init): Add memcpy option.
* gcc.target/mips/r10k-cache-barrier-9.c: Skip test for -O0.
* gcc.target/mips/stack-1.c: Likewise.

Cherry-picked 01dbcc401881f2e4ed063fe43406f8670e4e0cac,
34e4b01b6e6afea14f51c093520c58e7eb3ddb66,
3475f16f5ce9d1247758f5d3a858af5163116d71 and
aecf341540d1462145eaf47e3cfa7e7780ee7adc
from https://github.com/MIPS/gcc

Signed-off-by: Robert Suchanek 
Signed-off-by: Matthew Fortune 
Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/24k.md|  68 ++--
 gcc/config/mips/mips-cpus.def |   2 +
 gcc/config/mips/mips-protos.h |   3 +-
 gcc/config/mips/mips-tables.opt   |  57 ++--
 gcc/config/mips/mips.cc   | 300 --
 gcc/config/mips/mips.h|  35 +-
 gcc/config/mips/mips.md   |  94 +-
 gcc/config/mips/mips.opt  |   6 +
 gcc/config/mips/predicates.md |   2 +-
 gcc/doc/invoke.texi   |   2 +-
 gcc/testsuite/gcc.target/mips/iamr2.c |  51 +++
 gcc/testsuite/gcc.target/mips/memcpy-3.c  |  14 +
 gcc/testsuite/gcc.target/mips/memcpy-4.c  |  14 +
 gcc/testsuite/gcc.target/mips/mips.exp|   6 +
 .../gcc.target/mips/r10k-cache-barrier-9.c|   1 +
 gcc/testsuite/gcc.target/mips/stack-1.c   |   1 +
 16 files changed, 557 insertions(+), 99 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/iamr2.c
 create mode 100644 gcc/testsuite/gcc.target/mips/memcpy-3.c
 create mode 100644 gcc/testsuite/gcc.target/mips/memcpy-4.c

diff --git a/gcc/config/mips/24k.md b/gcc/config/mips/24k.md
index 1d09c929ab4..8e49456eac0 100644
--- a/gcc/config/mips/24k.md
+++ b/gcc/config/mips/24k.md
@@ -41,7 +41,7 @@
 
 ;; 1. Loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs
 (define_insn_reservation "r24k_int_load" 2
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
(eq_attr "type" "load"))
   "r24k_iss+r24k_ixu_arith")
 
@@ -53,7 +53,7 @@
 ;; (movn/movz is not matched, we'll need to split condmov to
 ;;  differentiate between integer/float moves)
 (define_insn_reservation "r24k_int_arith" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
(eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt"))
   "r24k_iss+r24k_ixu_arith")
 
@@ -61,13 +61,13 @@
 ;; 3. Links: bgezal, bgezall, bltzal, bltzall, jal, jalr, jalx
 ;; 3a. jr/jalr consumer
 (define_insn_reservation "r24k_int_jump" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
(eq_attr "type" "call,jump"))
   "r24k_iss+r24k_ixu_arith")
 
 ;; 3b. branch consumer
 (define_insn_reservation "r24k_int_branch" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1,interaptiv_mr2")
(eq_attr "type" "branch"))
   "r24k_iss+r24k_ixu_arith")
 
@@ -75,38 +75,38 @@
 ;; 4. MDU: fully pipelined multiplier
 ;; mult - delivers result to hi/lo in 1 cycle (pipelined)
 (define_insn_reservation "r24k_int_mult" 1
-  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+  (and (eq_attr "cp

[PATCH 34/61] Testsuite: Adjust tests to cope with -mips16

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Cherry-picked 38288a0fd125d70a7876763d7165f858d902
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 .../gcc.target/mips/call-clobbered-2.c|   3 +-
 .../gcc.target/mips/call-clobbered-3.c|   2 +-
 .../gcc.target/mips/call-clobbered-5.c|   2 +-
 gcc/testsuite/gcc.target/mips/ds-schedule-2.c |   4 +-
 .../gcc.target/mips/interrupt_handler-bug-1.c |   2 +-
 gcc/testsuite/gcc.target/mips/movdf-1.c   |   2 +-
 gcc/testsuite/gcc.target/mips/movdf-2.c   |   2 +-
 gcc/testsuite/gcc.target/mips/movdf-3.c   |   2 +-
 gcc/testsuite/gcc.target/mips/msa-builtins.c  | 334 +-
 gcc/testsuite/gcc.target/mips/msa.c   |  12 +-
 10 files changed, 183 insertions(+), 182 deletions(-)

diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-2.c 
b/gcc/testsuite/gcc.target/mips/call-clobbered-2.c
index 5f9a47208c7..86be4453797 100644
--- a/gcc/testsuite/gcc.target/mips/call-clobbered-2.c
+++ b/gcc/testsuite/gcc.target/mips/call-clobbered-2.c
@@ -4,7 +4,8 @@
 
 void bar (void);
 float a;
-float
+
+NOMIPS16 float
 foo ()
 {
   float b = a + 1.0f;
diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-3.c 
b/gcc/testsuite/gcc.target/mips/call-clobbered-3.c
index 3a9e8d883fc..cca94bdd5ba 100644
--- a/gcc/testsuite/gcc.target/mips/call-clobbered-3.c
+++ b/gcc/testsuite/gcc.target/mips/call-clobbered-3.c
@@ -4,7 +4,7 @@
 
 void bar (void);
 float a;
-float
+NOMIPS16 float
 foo ()
 {
   float b = a + 1.0f;
diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-5.c 
b/gcc/testsuite/gcc.target/mips/call-clobbered-5.c
index c7cd7cac7dd..b9ca58746f6 100644
--- a/gcc/testsuite/gcc.target/mips/call-clobbered-5.c
+++ b/gcc/testsuite/gcc.target/mips/call-clobbered-5.c
@@ -4,7 +4,7 @@
 
 void bar (void);
 float a;
-float
+NOMIPS16 float
 foo ()
 {
   float b = a + 1.0f;
diff --git a/gcc/testsuite/gcc.target/mips/ds-schedule-2.c 
b/gcc/testsuite/gcc.target/mips/ds-schedule-2.c
index 6c5de5dac92..3cb3c593765 100644
--- a/gcc/testsuite/gcc.target/mips/ds-schedule-2.c
+++ b/gcc/testsuite/gcc.target/mips/ds-schedule-2.c
@@ -1,4 +1,4 @@
-/* { dg-options "-mcompact-branches=never -mno-abicalls -G4" } */
+/* { dg-options "-mcompact-branches=never -mno-mips16 -mno-abicalls -G4" } */
 /* { dg-skip-if "code quality test" { *-*-* } { "-O0" "-O1" "-Os" } { "" } } */
 /* { dg-final { scan-assembler "beq.*\n\tlw" } } */
 /* { dg-final { scan-assembler-times "\\(foo\\)" 2 } } */
@@ -19,7 +19,7 @@ int foo;
 
 extern void t (int, int, int*);
 
-void
+NOMIPS16 void
 f (struct list **ptr)
 {
   if (gr)
diff --git a/gcc/testsuite/gcc.target/mips/interrupt_handler-bug-1.c 
b/gcc/testsuite/gcc.target/mips/interrupt_handler-bug-1.c
index 083e1524450..d8412f17876 100644
--- a/gcc/testsuite/gcc.target/mips/interrupt_handler-bug-1.c
+++ b/gcc/testsuite/gcc.target/mips/interrupt_handler-bug-1.c
@@ -2,7 +2,7 @@
 int foo;
 int bar;
 
-void __attribute__ ((interrupt))
+NOMIPS16 void __attribute__ ((interrupt))
 isr (void)
 {
   if (!foo)
diff --git a/gcc/testsuite/gcc.target/mips/movdf-1.c 
b/gcc/testsuite/gcc.target/mips/movdf-1.c
index f0267d00e97..5fe61807d56 100644
--- a/gcc/testsuite/gcc.target/mips/movdf-1.c
+++ b/gcc/testsuite/gcc.target/mips/movdf-1.c
@@ -4,7 +4,7 @@
 
 void bar (void);
 
-double
+NOMIPS16 double
 foo (int x, double a)
 {
   return a;
diff --git a/gcc/testsuite/gcc.target/mips/movdf-2.c 
b/gcc/testsuite/gcc.target/mips/movdf-2.c
index 175b61c7e77..0e52c9fec7a 100644
--- a/gcc/testsuite/gcc.target/mips/movdf-2.c
+++ b/gcc/testsuite/gcc.target/mips/movdf-2.c
@@ -4,7 +4,7 @@
 
 void bar (void);
 
-double
+NOMIPS16 double
 foo (int x, double a)
 {
   return a;
diff --git a/gcc/testsuite/gcc.target/mips/movdf-3.c 
b/gcc/testsuite/gcc.target/mips/movdf-3.c
index 5db52c9487b..f1dd2abf5ec 100644
--- a/gcc/testsuite/gcc.target/mips/movdf-3.c
+++ b/gcc/testsuite/gcc.target/mips/movdf-3.c
@@ -4,7 +4,7 @@
 
 void bar (void);
 
-double
+NOMIPS16 double
 foo (int x, double a)
 {
   return a;
diff --git a/gcc/testsuite/gcc.target/mips/msa-builtins.c 
b/gcc/testsuite/gcc.target/mips/msa-builtins.c
index 6a146b3e6ae..932cc8db961 100644
--- a/gcc/testsuite/gcc.target/mips/msa-builtins.c
+++ b/gcc/testsuite/gcc.target/mips/msa-builtins.c
@@ -1,6 +1,6 @@
 /* Test builtins for MIPS MSA ASE instructions */
 /* { dg-do compile } */
-/* { dg-options "-mfp64 -mhard-float -mmsa" } */
+/* { dg-options "-mno-mips16 -mfp64 -mhard-float -mmsa" } */
 
 /* { dg-final { scan-assembler-times "msa_addv_b:.*addv\\.b.*msa_addv_b" 1 } } 
*/
 /* { dg-final { scan-assembler-times "msa_addv_h:.*addv\\.h.*msa_addv_h" 1 } } 
*/
@@ -641,182 +641,182 @@
 #define FN(NAME, T) FN_EVAL (NAME, T)
 
 /* MSA Arithmetic builtins.  */
-#define ADDV(T) NOMIPS16 T FN (addv, T ## _DF) (T i, T j) { return BUILTIN 
(addv, T ## _DF) (i, j); }
-#define ADDVI(T) NOMIPS16 T FN (addvi, T ## _DF) (T i) { return BUILTIN 
(addvi, T ## _D

[PATCH 40/61] MIPSR6: Fix ICE occurred in R6 target

2025-01-31 Thread Aleksandar Rakic

From: Jaydeep Patil 

Fix ICE occurred in R6 target due to a clobber-list introduced in
MADD/MSUB during combine pass.

Cherry-picked 180f74c8ebdf13ddac806695d0333af7b924c402
from https://github.com/MIPS/gcc

Signed-off-by: Jaydeep Patil 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.md | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 7d27e7d4b20..159fc2e2615 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -1830,6 +1830,19 @@
 
 ;; Split *mul_acc_si if both the source and destination accumulator
 ;; values are GPRs.
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+   (plus:SI (mult:SI (match_operand:SI 1 "d_operand")
+ (match_operand:SI 2 "d_operand"))
+(match_operand:SI 3 "d_operand")))
+   (clobber (match_operand:SI 4 "lo_operand"))
+   (clobber (match_operand:SI 5 "d_operand"))]
+  "reload_completed && ISA_HAS_R6MUL"
+  [(set (match_dup 5)
+   (mult:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 5) (match_dup 3)))]
+  "")
+
 (define_split
   [(set (match_operand:SI 0 "d_operand")
(plus:SI (mult:SI (match_operand:SI 1 "d_operand")
@@ -2052,6 +2065,19 @@
 
 ;; Split *mul_sub_si if both the source and destination accumulator
 ;; values are GPRs.
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+   (minus:SI (match_operand:SI 1 "d_operand")
+ (mult:SI (match_operand:SI 2 "d_operand")
+  (match_operand:SI 3 "d_operand"
+   (clobber (match_operand:SI 4 "lo_operand"))
+   (clobber (match_operand:SI 5 "d_operand"))]
+  "reload_completed && ISA_HAS_R6MUL"
+  [(set (match_dup 5)
+   (mult:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 5)))]
+  "")
+
 (define_split
   [(set (match_operand:SI 0 "d_operand")
 (minus:SI (match_operand:SI 1 "d_operand")
-- 
2.34.1

[PATCH 24/61] P5600: Option -msched-weight added

2025-01-31 Thread Aleksandar Rakic

From: Jaydeep Patil 

Cherry-picked 0cf2542b41d8102800af180f0b6da1fe55a9d76b
from https://github.com/MIPS/gcc

Signed-off-by: Prachi Godbole 
Signed-off-by: Jaydeep Patil 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc  | 242 +++
 gcc/config/mips/mips.opt |   3 +
 2 files changed, 245 insertions(+)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 56e0d4ba021..55339d577fb 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -74,6 +74,17 @@ along with GCC; see the file COPYING3.  If not see
 /* This file should be included last.  */
 #include "target-def.h"
 
+/* Definitions used in ready queue reordering for first scheduling pass.  */
+
+static int *level = NULL;
+static int *consumer_luid = NULL;
+
+#define LEVEL(INSN)\
+  level[INSN_UID ((INSN))]
+
+#define CONSUMER_LUID(INSN)\
+  consumer_luid[INSN_UID ((INSN))]
+
 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
 #define UNSPEC_ADDRESS_P(X)\
   (GET_CODE (X) == UNSPEC  \
@@ -16737,6 +16748,220 @@ mips_74k_agen_reorder (rtx_insn **ready, int nready)
   break;
 }
 }
+
+/* These functions are called when -msched-weight is set.  */
+
+/* Find register born in given X if any.  */
+
+static int
+find_reg_born (rtx x)
+{
+  if (GET_CODE (x) == CLOBBER)
+return 1;
+
+  if (GET_CODE (x) == SET)
+{
+  if (REG_P (SET_DEST (x)) && reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
+   return 0;
+  return 1;
+}
+  return 0;
+}
+
+/* Calculate register weight for given INSN.  */
+
+static int
+get_weight (rtx insn)
+{
+  int weight = 0;
+  rtx x;
+
+  /* Increment weight for each register born here.  */
+  x = PATTERN (insn);
+  weight = find_reg_born (x);
+
+  if (GET_CODE (x) == PARALLEL)
+{
+  int i;
+  for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+   {
+ x = XVECEXP (PATTERN (insn), 0, i);
+ weight += find_reg_born (x);
+   }
+}
+
+  /* Decrement weight for each register that dies here.  */
+  for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
+{
+  if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
+   {
+ rtx note = XEXP (x, 0);
+ if (REG_P (note))
+   weight--;
+   }
+}
+  return weight;
+}
+
+/* TARGET_SCHED_WEIGHT helper function.
+   Allocate and initialize global data.  */
+
+static void
+mips_weight_init_global (int old_max_uid)
+{
+  level = (int *) xcalloc (old_max_uid, sizeof (int));
+  consumer_luid = (int *) xcalloc (old_max_uid, sizeof (int));
+}
+
+/* Implement TARGET_SCHED_INIT_GLOBAL.  */
+
+static void
+mips_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
+   int verbose ATTRIBUTE_UNUSED,
+   int old_max_uid)
+{
+  if (!reload_completed && TARGET_SCHED_WEIGHT)
+mips_weight_init_global (old_max_uid);
+}
+
+/* TARGET_SCHED_WEIGHT helper function.  Called for each basic block
+   with dependency chain information in HEAD and TAIL.
+   Calculates LEVEL for each INSN from its forward dependencies
+   and finds out UID of first consumer instruction (CONSUMER_LUID) of INSN.  */
+
+static void
+mips_weight_evaluation (rtx_insn *head, rtx_insn *tail)
+{
+  sd_iterator_def sd_it;
+  dep_t dep;
+  rtx_insn *prev_head, *insn;
+  rtx x;
+  prev_head = PREV_INSN (head);
+
+  for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
+if (INSN_P (insn))
+  {
+   FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+ {
+   x = DEP_CON (dep);
+   if (! DEBUG_INSN_P (x))
+ {
+   if (LEVEL (x) > LEVEL (insn))
+ LEVEL (insn) = LEVEL (x);
+   CONSUMER_LUID (insn) = INSN_LUID (x);
+ }
+ }
+   LEVEL (insn)++;
+  }
+}
+
+/* Implement TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK.  */
+
+static void
+mips_evaluation_hook (rtx_insn *head, rtx_insn *tail)
+{
+  if (!reload_completed && TARGET_SCHED_WEIGHT)
+mips_weight_evaluation (head, tail);
+}
+
+/* Implement TARGET_SCHED_SET_SCHED_FLAGS.
+   Enables DONT_BREAK_DEPENDENCIES for the first scheduling pass.
+   It prevents breaking of dependencies on mem/inc pair in the first pass
+   which would otherwise increase stalls.  */
+
+static void
+mips_set_sched_flags (spec_info_t spec_info ATTRIBUTE_UNUSED)
+{
+  if (!reload_completed && TARGET_SCHED_WEIGHT)
+{
+  unsigned int *flags = &(current_sched_info->flags);
+  *flags |= DONT_BREAK_DEPENDENCIES;
+}
+}
+
+static void
+mips_weight_finish_global ()
+{
+  if (level != NULL)
+free (level);
+
+  if (consumer_luid != NULL)
+free (consumer_luid);
+}
+
+/* Implement TARGET_SCHED_FINISH_GLOBAL.  */
+
+static void
+mips_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+ int verbose ATTRIBUTE_UNUSED)
+{
+  if (!reload_completed && TARGET_S

[PATCH 30/61] MSA: Make MSA and microMIPS R5 unsupported

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

There are no platforms nor simulators for MSA and microMIPS R5 so
turning off this support for now.

gcc/ChangeLog:

* config/mips/mips.cc (mips_option_override): Error out for
-mmicromips -mmsa.

Cherry-picked 1009d6ff7a8d3b56e0224a6b193c5a7b3c29aa5f
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 1fa727c2ff5..3185fa9633e 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -22551,6 +22551,9 @@ mips_option_override (void)
  "-mcompact-branches=never");
 }
 
+  if (is_micromips && TARGET_MSA)
+error ("unsupported combination: %s", "-mmicromips -mmsa");
+
   /* Enable the use of interAptiv MIPS32 SAVE/RESTORE instructions.  */
   if (TARGET_USE_SAVE_RESTORE == -1)
 {
-- 
2.34.1

[PATCH 51/61] Test solution on dspmac builtins

2025-01-31 Thread Aleksandar Rakic

From: Mihailo Stojanovic 

gcc/

* config/mips/mips.cc (mips_expand_builtin_insn): During expansion
of DSP mac builtins, force the operands which correspond to the same
inout register to have the same pseudo assigned.

gcc/testsuite

* gcc.target/mips/mac_zero_reload.c: New testcase.

Cherry-picked 110ec7a3e56737bb8ed2ae653298aa55ad014377
and 712169a3630d45284ddd6ea6d0dedcb2b60e0ba4
from https://github.com/MIPS/gcc

Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc   | 24 ++
 .../gcc.target/mips/mac_zero_reload.c | 32 +++
 2 files changed, 56 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/mips/mac_zero_reload.c

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index fa2039175ff..20128c7f537 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -19215,6 +19215,30 @@ mips_expand_builtin_insn (enum insn_code icode, 
unsigned int nops,
 case CODE_FOR_msa_dpsub_u_w:
 case CODE_FOR_msa_dpsub_u_h:
 case CODE_FOR_msa_dpsub_u_d:
+
+case CODE_FOR_mips_dpau_h_qbl:
+case CODE_FOR_mips_dpau_h_qbr:
+case CODE_FOR_mips_dpsu_h_qbl:
+case CODE_FOR_mips_dpsu_h_qbr:
+case CODE_FOR_mips_dpaq_s_w_ph:
+case CODE_FOR_mips_dpsq_s_w_ph:
+case CODE_FOR_mips_mulsaq_s_w_ph:
+case CODE_FOR_mips_dpaq_sa_l_w:
+case CODE_FOR_mips_dpsq_sa_l_w:
+case CODE_FOR_mips_maq_s_w_phl:
+case CODE_FOR_mips_maq_s_w_phr:
+case CODE_FOR_mips_maq_sa_w_phl:
+case CODE_FOR_mips_maq_sa_w_phr:
+
+case CODE_FOR_mips_dpa_w_ph:
+case CODE_FOR_mips_dps_w_ph:
+case CODE_FOR_mips_mulsa_w_ph:
+case CODE_FOR_mips_dpax_w_ph:
+case CODE_FOR_mips_dpsx_w_ph:
+case CODE_FOR_mips_dpaqx_s_w_ph:
+case CODE_FOR_mips_dpaqx_sa_w_ph:
+case CODE_FOR_mips_dpsqx_s_w_ph:
+case CODE_FOR_mips_dpsqx_sa_w_ph:
   /* Force the operands which correspond to the same in-out register
  to have the same pseudo assigned to them.  If the input operand
  is not REG, create one for it.  */
diff --git a/gcc/testsuite/gcc.target/mips/mac_zero_reload.c 
b/gcc/testsuite/gcc.target/mips/mac_zero_reload.c
new file mode 100644
index 000..63261bc1493
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/mac_zero_reload.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-fno-unroll-loops -mgp32 -mdspr2" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+/* { dg-final { scan-assembler-not "\tmflo\t" } } */
+/* { dg-final { scan-assembler-not "\tmfhi\t" } } */
+/* { dg-final { scan-assembler-not "\tmtlo\t" } } */
+/* { dg-final { scan-assembler-not "\tmthi\t" } } */
+
+typedef short v2i16 __attribute__ ((vector_size(4)));
+
+extern v2i16 ps32Ptrl[4096];
+
+extern int sink[4096];
+
+int main(void)
+{
+v2i16 v2i16_h0;
+long long   s64Acc;
+
+for (int i = 0; i < 4; ++i)
+{
+v2i16_h0 = ps32Ptrl[i];
+
+s64Acc = 0;
+
+s64Acc = __builtin_mips_dpa_w_ph(s64Acc, v2i16_h0, v2i16_h0);
+
+sink[i] = __builtin_mips_extr_rs_w(s64Acc, 0);
+}
+
+return 0;
+}
-- 
2.34.1

[PATCH 44/61] Autovectorization failures on BE targets

2025-01-31 Thread Aleksandar Rakic

From: "dragan.mladjenovic" 

GCC assumes that taking a vector mode B SUBREG of vector mode A register
allows it to interpret its memory layout as if in A vector mode.

We currently allow this mode change to be no-op on MSA registers. This
works on little-endian because MSA register layout matches that of
vector value in memory. This breaks on big-endian because ordering of
bytes within the lane depends of target endianes.

We now conservatively disallow direct MSA register mode change via
TARGET_CAN_CHANGE_MODE_CLASS making it go through memory.

gcc/
* config/mips/mips-msa.md (UNSPEC_MSA_CHANGE_MODE): New unspec.
(msa_change_mode): New expand pattern.
(msa_change_mode_): New insn pattern.
* config/mips/mips.cc (mips_split_128bit_move): Replace MSA mode
changing uses of simplify_gen_subreg with gen_rtx_REG.
(mips_split_msa_copy_d): Ditto.
(mips_split_msa_insert_d): Ditto.
(mips_split_msa_fill_d): Ditto.
(mips_can_change_mode_class): Disallow change of MSA modes with
different lane width on big-endian targets.
(mips_expand_vec_unpack): Use gen_msa_change_mode instead of
gen_lowpart for MSA modes.

Cherry-picked c00d34621429f31926e0c72e027b0c1028d046f0
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips-msa.md | 37 +
 gcc/config/mips/mips.cc | 29 ++---
 2 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index f6edd5897a4..5ac4fa4bf24 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -90,6 +90,7 @@
   UNSPEC_MSA_SUBSUU_S
   UNSPEC_MSA_SUBSUS_U
   UNSPEC_MSA_VSHF
+  UNSPEC_MSA_CHANGE_MODE
 ])
 
 ;; All vector modes with 128 bits.
@@ -2930,3 +2931,39 @@
  const0_rtx));
   DONE;
 })
+
+;; On big-endian targets we cannot use subregs to refer to MSA register
+;; in different mode.  See mips_can_change_mode_class.
+(define_expand "msa_change_mode"
+  [(match_operand 0 "register_operand")
+   (match_operand 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  gcc_assert (MSA_SUPPORTED_MODE_P (GET_MODE (operands[0]))
+ && MSA_SUPPORTED_MODE_P (GET_MODE (operands[1])));
+
+  if (!TARGET_BIG_ENDIAN)
+  emit_move_insn (operands[0],
+ gen_lowpart (GET_MODE (operands[0]), operands[1]));
+else
+  emit_move_insn (operands[0],
+ gen_rtx_UNSPEC (GET_MODE (operands[0]),
+ gen_rtvec (1, operands[1]),
+ UNSPEC_MSA_CHANGE_MODE));
+  DONE;
+})
+
+(define_insn_and_split "msa_change_mode_"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+(unspec:MSA [(match_operand 1 "register_operand" "f")]
+ UNSPEC_MSA_CHANGE_MODE))]
+  "ISA_HAS_MSA && TARGET_BIG_ENDIAN
+   && MSA_SUPPORTED_MODE_P (GET_MODE (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+operands[1] = gen_rtx_REG (mode, REGNO (operands[1]));
+}
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "")])
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 10f302e0790..e0b357a651a 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -5998,12 +5998,12 @@ mips_split_128bit_move (rtx dest, rtx src)
   if (!TARGET_64BIT)
{
  if (GET_MODE (dest) != V4SImode)
-   new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+   new_dest = gen_rtx_REG (V4SImode, REGNO (dest));
}
   else
{
  if (GET_MODE (dest) != V2DImode)
-   new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
+   new_dest = gen_rtx_REG (V2DImode, REGNO (dest));
}
 
   for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
@@ -6026,12 +6026,12 @@ mips_split_128bit_move (rtx dest, rtx src)
   if (!TARGET_64BIT)
{
  if (GET_MODE (src) != V4SImode)
-   new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
+   new_src = gen_rtx_REG (V4SImode, REGNO (src));
}
   else
{
  if (GET_MODE (src) != V2DImode)
-   new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
+   new_src = gen_rtx_REG (V2DImode, REGNO (src));
}
 
   for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
@@ -6087,7 +6087,8 @@ mips_split_msa_copy_d (rtx dest, rtx src, rtx index,
  from the higher index.  */
   rtx low = mips_subword (dest, false);
   rtx high = mips_subword (dest, true);
-  rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
+
+  rtx new_src = gen_rtx_REG (V4SImode, REGNO (src));
 
   emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2)));
   emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index)

[PATCH 35/61] Testsuite: Use HAS_LDC instead of a specific ISA

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

The call-clobbered-1.c test has both reasons to be above a certain
ISA and below a certain ISA level.  The option based ISA min/max
code only triggers if there is no isa level request.

gcc/testsuite/
* gcc.target/mips/call-clobbered-1.c: Use HAS_LDC ghost
option instead of isa>=2.

Cherry-picked e9df15b1a308aa8a10473c820f35d628fa8f2efb
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/testsuite/gcc.target/mips/call-clobbered-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-1.c 
b/gcc/testsuite/gcc.target/mips/call-clobbered-1.c
index 8880ad13684..2e0521318f4 100644
--- a/gcc/testsuite/gcc.target/mips/call-clobbered-1.c
+++ b/gcc/testsuite/gcc.target/mips/call-clobbered-1.c
@@ -1,6 +1,6 @@
 /* Check that we handle call-clobbered FPRs correctly.  */
 /* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
-/* { dg-options "isa>=2 -mabi=32 -mfpxx -mhard-float -ffixed-f0 -ffixed-f1 
-ffixed-f2 -ffixed-f3 -ffixed-f4 -ffixed-f5 -ffixed-f6 -ffixed-f7 -ffixed-f8 
-ffixed-f9 -ffixed-f10 -ffixed-f11 -ffixed-f12 -ffixed-f13 -ffixed-f14 
-ffixed-f15 -ffixed-f16 -ffixed-f17 -ffixed-f18 -ffixed-f19" } */
+/* { dg-options "(HAS_LDC) -mabi=32 -mfpxx -mhard-float -ffixed-f0 -ffixed-f1 
-ffixed-f2 -ffixed-f3 -ffixed-f4 -ffixed-f5 -ffixed-f6 -ffixed-f7 -ffixed-f8 
-ffixed-f9 -ffixed-f10 -ffixed-f11 -ffixed-f12 -ffixed-f13 -ffixed-f14 
-ffixed-f15 -ffixed-f16 -ffixed-f17 -ffixed-f18 -ffixed-f19" } */
 
 void bar (void);
 double a;
-- 
2.34.1

[PATCH 50/61] Fix MSA SUBREG moves on big-endian targets

2025-01-31 Thread Aleksandar Rakic

From: Mihailo Stojanovic 

This fixes the MSA implementation on big-endian targets which is
essentially broken for things like SUBREG handling and calling
convention for vector types. It borrows heavily from [1] as Aarch64 has
the same problem with SVE vectors.

Conceptually, register bitconverts should act as the data has been
stored to memory in one mode, and loaded from memory in the other.
This isn't what happens on big-endian as vector load/store instructions
are essentially mixed-endian with respect to the vector as a whole.
The in-register representation of data must be changed so that the
load/store round trip becomes valid. This is done by inserting one or
two shuffle instructions for every SUBREG move, as previously
implemented in [2] for LLVM. Even if the shuffle instructions weren't
generated, constraint in mips_can_change_mode_class would force the
conceptual memory reload of SUBREG move operand, which would generate
correct, albeit very inefficient code.

New msa_reg_predicate was created in order to forbig SUBREG operands in
MSA patterns on big-endian targets. It weeds SUBREGs out of the
instruction patterns into SUBREG->REG moves which are caught by the new
msa_mov_subreg_be pattern and transformed in shuffle(s).

As for the MSA calling convention, ABI states that compiling for MSA
should not change the base ABIs vector calling convention, that is, MSA
vectors passed of returned by value do not use the MSA vector registers.
Instead, they are passed by general-purpose registers, as described by
the ABI. Passing the vector argument requires splitting it into 2 (or 4)
general-purpose registers and bitconverting it into V2DImode (or
V4SImode). The solution boils down to the one presented for SUBREG
moves: force every vector argument to the appropriate mode (V2DI or
V4SI) so that the shuffle instruction(s) might be generated in order to
conform to the calling convention. The same applies to vectors as return
values.

New testcases were added to check calling convention compliance for all
possible combinations of MSA and non-MSA interlinking.

gcc/

* config/mips/mips-msa.md: Replace register_operand predicate with
msa_reg_operand in every pattern.
(*msa_mov_subreg_be): New unspec.
* config/mips/mips-protos.h (mips_split_msa_subreg_move): Declare.
* config/mips/mips.cc (mips_maybe_expand_msa_subreg_move): New
function.
(mips_replace_reg_mode): Ditto.
(mips_split_msa_subreg_move): Ditto.
(mips_legitimize_move): Modify machine modes of MSA vectors which
reside in general-purpose registers. Check whether SUBREG move can
be replaced with shuffle(s).
(mips_split_128bit_move): Replace explicit REG creation with
mips_replace_reg_mode.
(mips_split_msa_copy_d): Ditto.
(mips_split_msa_insert_d): Ditto.
(mips_split_msa_fill_d): Ditto.
* config/mips/predicates.md (msa_reg_operand): New predicate.

gcc/testsuite:

* gcc.target/mips/inter/msa-inter.exp: New file.
* gcc.target/mips/inter/msa_1.h: New test.
* gcc.target/mips/inter/msa_1_main.c: New test.
* gcc.target/mips/inter/msa_1_x.c: New test.
* gcc.target/mips/inter/msa_1_y.c: New test.

Cherry-picked 8c54834001d76ed3c76832b6a47dfcb7d62bb664
and ae63e81745e8a5c07812428ecda735320201d500
from https://github.com/MIPS/gcc

Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Dragan Mladjenovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips-msa.md   | 1056 +
 gcc/config/mips/mips-protos.h |2 +
 gcc/config/mips/mips.cc   |  153 ++-
 gcc/config/mips/predicates.md |5 +
 .../gcc.target/mips/inter/msa-inter.exp   |   67 ++
 gcc/testsuite/gcc.target/mips/inter/msa_1.h   |   23 +
 .../gcc.target/mips/inter/msa_1_main.c|8 +
 gcc/testsuite/gcc.target/mips/inter/msa_1_x.c |   35 +
 gcc/testsuite/gcc.target/mips/inter/msa_1_y.c |   14 +
 9 files changed, 836 insertions(+), 527 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/inter/msa-inter.exp
 create mode 100644 gcc/testsuite/gcc.target/mips/inter/msa_1.h
 create mode 100644 gcc/testsuite/gcc.target/mips/inter/msa_1_main.c
 create mode 100644 gcc/testsuite/gcc.target/mips/inter/msa_1_x.c
 create mode 100644 gcc/testsuite/gcc.target/mips/inter/msa_1_y.c

diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index 5ac4fa4bf24..e2fdf8e191e 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -91,6 +91,7 @@
   UNSPEC_MSA_SUBSUS_U
   UNSPEC_MSA_VSHF
   UNSPEC_MSA_CHANGE_MODE
+  UNSPEC_MSA_SUBREG_BE
 ])
 
 ;; All vector modes with 128 bits.
@@ -237,7 +238,7 @@
 (V4SF "31") (V8SF "31")])
 
 (define_expand "vec_init"
-  [(match_operand:MSA 0 "register_operand")
+  [(match_operand:MSA 0 "msa_reg_operand")
(match_operand:MSA 1 "")]
   "ISA_HAS_MSA"
 {
@@ -247,22 +248,22 @@
 
 ;; pckev pattern with implicit ty

[PATCH 47/61] Add -mmxu and -mno-mxu driver pass through

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

Cherry-picked 9acbf0b0efdfcc27e30b1db7a707dbe9cc6b64eb
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 6ed39af01e1..e0a150080e9 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -1502,6 +1502,7 @@ struct mips_cpu_info {
 %{mloongson-ext2} %{mno-loongson-ext2} \
 %{msmartmips} %{mno-smartmips} \
 %{mmt} %{mno-mt} \
+%{mmxu} %{mno-mxu} \
 %{mfix-r5900} %{mno-fix-r5900} \
 %{mfix-rm7000} %{mno-fix-rm7000} \
 %{mfix-vr4120} %{mfix-vr4130} \
-- 
2.34.1

[PATCH 55/61] Performance drop in mips-img-linux-gnu-gcc 7.x

2025-01-31 Thread Aleksandar Rakic

From: Mihailo Stojanovic 

gcc/
* config/mips/mips.cc (mips_rtx_costs): Reduce branch cost of
conditional branches.
(mips_prune_insertions_deletions): Target hook which checks
whether a basic block is possibly if-convertible. Adjusts the
insertion and deletion maps accordingly.
(check_bb): Check whether a basic block is a THEN or ELSE block
of IF-THEN-ELSE construct and whether it consists only of a
single set instruction. This is a condition for marking the
block as possibly if-convertible.
(bb_valid_for_noce): Helper function.
(last_active_insn): Same.
(first_active_insn): Same.
(insn_valid_noce_process_p): Same.
(noce_operand_ok): Same.
* config/mips/mips.opt: Add an option which disables the
mips_prune_insertions_deletions hook.
* doc/tm.texi.in: Add a macro definition for the new target
hook.
* gcse.c (compute_pre_data): Add the target hook call, which
will modify the insertion and deletion bitmaps.
* target.def: Define the target hook.
* targhooks.h: Add default target hook prototype.
* targhooks.c: Define the default target hook prototype.
* doc/tm.texi: Regenerated.

Cherry-picked 64e5b4b4ff53872482454908a29c94665e40d25c
from https://github.com/MIPS/gcc

Signed-off-by: Mihailo Stojanovic 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc  | 238 +++
 gcc/config/mips/mips.opt |   3 +
 gcc/doc/tm.texi  |   5 +
 gcc/doc/tm.texi.in   |   2 +
 gcc/gcse.cc  |   3 +
 gcc/target.def   |   8 ++
 gcc/targhooks.cc |   9 ++
 gcc/targhooks.h  |   5 +
 8 files changed, 273 insertions(+)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 4521cac15c7..d23c30a43be 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -5754,6 +5754,8 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
default:
  break;
}
+  if (GET_CODE (SET_DEST (x)) != PC)
+   *total = 0;
   return false;
 
 case IF_THEN_ELSE:
@@ -25872,6 +25874,239 @@ mips_noce_conversion_profitable_p (rtx_insn *seq, 
struct noce_if_info *if_info)
   return speed && cost <= if_info->max_seq_cost;
 }
 
+
+/* Return true if OP is ok for if-then-else processing.  */
+
+static int
+noce_operand_ok (const_rtx op)
+{
+  if (side_effects_p (op))
+return FALSE;
+
+  /* We special-case memories, so handle any of them with
+ no address side effects.  */
+  if (MEM_P (op))
+return ! side_effects_p (XEXP (op, 0));
+
+  return ! may_trap_p (op);
+}
+
+
+/* Helper for bb_valid_for_noce_process_p.  Validate that
+   the rtx insn INSN is a single set that does not set
+   the conditional register CC and is in general valid for
+   if-conversion.  */
+
+static bool
+insn_valid_noce_process_p (rtx_insn *insn)
+{
+  if (!insn
+  || !NONJUMP_INSN_P (insn))
+  return false;
+
+  rtx sset = single_set (insn);
+
+  /* Currently support only simple single sets in test_bb.  */
+  if (!sset
+  || !noce_operand_ok (SET_DEST (sset))
+  || !noce_operand_ok (SET_SRC (sset)))
+return false;
+
+  return true;
+}
+
+/* Return the first non-jump active insn in the basic block.  */
+
+static rtx_insn *
+first_active_insn (basic_block bb)
+{
+  rtx_insn *insn = BB_HEAD (bb);
+
+  if (LABEL_P (insn))
+{
+  if (insn == BB_END (bb))
+   return NULL;
+  insn = NEXT_INSN (insn);
+}
+
+  while (NOTE_P (insn) || DEBUG_INSN_P (insn))
+{
+  if (insn == BB_END (bb))
+   return NULL;
+  insn = NEXT_INSN (insn);
+}
+
+  if (JUMP_P (insn))
+return NULL;
+
+  return insn;
+}
+
+static rtx_insn *
+last_active_insn (basic_block bb, int skip_use_p)
+{
+  rtx_insn *insn = BB_END (bb);
+  rtx_insn *head = BB_HEAD (bb);
+
+  while (NOTE_P (insn)
+|| JUMP_P (insn)
+|| DEBUG_INSN_P (insn)
+|| (skip_use_p
+&& NONJUMP_INSN_P (insn)
+&& GET_CODE (PATTERN (insn)) == USE))
+{
+  if (insn == head)
+   return NULL;
+  insn = PREV_INSN (insn);
+}
+
+  if (LABEL_P (insn))
+return NULL;
+
+  return insn;
+}
+
+static bool
+bb_valid_for_noce (basic_block test_bb, bool *simple)
+{
+  if (!test_bb)
+return false;
+
+  rtx_insn *last_insn = last_active_insn (test_bb, FALSE);
+
+  if (!insn_valid_noce_process_p (last_insn))
+return false;
+
+  rtx_insn *first_insn = first_active_insn (test_bb);
+  rtx first_set = single_set (first_insn);
+
+  if (!first_set)
+return false;
+
+  *simple = first_insn == last_insn;
+  return true;
+}
+
+#define NULL_BLOCK ((basic_block) NULL)
+
+static bool
+check_bb (basic_block test_bb, sbitmap *ifcv_blocks)
+{
+  /* The kind of block we're looking for has exactly two successors.  */
+  if (EDGE_COUNT (test_bb->succs) != 2)
+return false;
+

[PATCH 41/61] Lightweight fix for shrink-wrapping inhibition

2025-01-31 Thread Aleksandar Rakic

From: Matthew Fortune 

This should be solved using the various PIC related macros such as
PIC_OFFSET_TABLE_REGNUM and pic_offset_table_rtx but changing these is
too dangerous without investigation.  The lightweight fix for
shrink-wrapping being inhibited by -mgpopt just clears the global
pointer from being related to the prologue and only affects shrink
wrapping.

Cherry-picked 4ea3a82b5e8e23591d79a9ca63018acceb53c2a5
from https://github.com/MIPS/gcc

Signed-off-by: Matthew Fortune 
Signed-off-by: Faraz Shahbazker 
Signed-off-by: Aleksandar Rakic 
---
 gcc/config/mips/mips.cc | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 57a858aca39..10f302e0790 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -25497,6 +25497,15 @@ mips_c_mode_for_floating_type (enum tree_index ti)
   return default_mode_for_floating_type (ti);
 }
 
+/* Implement TARGET_SET_UP_BY_PROLOGUE.  */
+
+static void
+mips_set_up_by_prologue (hard_reg_set_container *regs)
+{
+  if (!TARGET_USE_GOT && TARGET_GPOPT)
+CLEAR_HARD_REG_BIT (regs->set, GLOBAL_POINTER_REGNUM);
+}
+
 void
 mips_bit_clear_info (enum machine_mode mode, unsigned HOST_WIDE_INT m,
  int *start_pos, int *size)
@@ -25634,6 +25643,9 @@ mips_bit_clear_p (enum machine_mode mode, unsigned 
HOST_WIDE_INT m)
 #undef TARGET_IN_SMALL_DATA_P
 #define TARGET_IN_SMALL_DATA_P mips_in_small_data_p
 
+#undef TARGET_SET_UP_BY_PROLOGUE
+#define TARGET_SET_UP_BY_PROLOGUE mips_set_up_by_prologue
+
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG mips_reorg
 
-- 
2.34.1

Re: [PATCH v2] x86: Handle -mindirect-branch-register for -fno-plt

2025-01-31 Thread H.J. Lu

On Fri, Jan 31, 2025 at 10:09 PM Uros Bizjak  wrote:
>
> On Fri, Jan 31, 2025 at 2:54 PM Uros Bizjak  wrote:
> >
> > On Fri, Jan 31, 2025 at 2:36 PM H.J. Lu  wrote:
> > >
> > > -fno-plt forces external call to indirect call via GOT memory.  But
> > > -mindirect-branch-register requires indirect call and jump via register.
> > > For -mindirect-branch-register, expanding indirect call via register and
> > > update call patterns and peepholes to disable indirect call via memory.
> > >
> > > gcc/
> > >
> > > PR target/118713
> > > * config/i386/i386-expand.cc (ix86_expand_call): Force indirect
> > > call via register for -mindirect-branch-register.
> > > * config/i386/i386.md (*call): Disable indirect call via memory
> > > for -mindirect-branch-register.
> > > (*call_got_x32): Likewise.
> > > (*sibcall_GOT_32): Likewise.
> > > (*sibcall): Likewise.
> > > (*sibcall_memory): Likewise.
> > > (*call_pop): Likewise.
> > > (*sibcall_pop): Likewise.
> > > (*sibcall_pop_memory): Likewise.
> > > (*call_value): Likewise.
> > > (*call_value_got_x32): Likewise.
> > > (*sibcall_value_GOT_32): Likewise.
> > > (*sibcall_value): Likewise.
> > > (*sibcall_value_memory): Likewise.
> > > (*call_value_pop): Likewise.
> > > (*sibcall_value_pop): Likewise.
> > > (*sibcall_value_pop_memory): Likewise.
> > >
> > > gcc/testsuite/
> > >
> > > PR target/118713
> > > * gcc.target/i386/pr118713-1-x32.c: New test.
> > > * gcc.target/i386/pr118713-1.c: Likewise.
> > > * gcc.target/i386/pr118713-2-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-2.c: Likewise.
> > > * gcc.target/i386/pr118713-3-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-3.c: Likewise.
> > > * gcc.target/i386/pr118713-4-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-4.c: Likewise.
> > > * gcc.target/i386/pr118713-5-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-5.c: Likewise.
> > > * gcc.target/i386/pr118713-6-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-6.c: Likewise.
> > > * gcc.target/i386/pr118713-7-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-7.c: Likewise.
> > > * gcc.target/i386/pr118713-8-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-8.c: Likewise.
> > > * gcc.target/i386/pr118713-9-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-9.c: Likewise.
> > > * gcc.target/i386/pr118713-10-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-10.c: Likewise.
> > > * gcc.target/i386/pr118713-11-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-11.c: Likewise.
> > > * gcc.target/i386/pr118713-12-x32.c: Likewise.
> > > * gcc.target/i386/pr118713-12.c: Likewise.
> > >
> > > Co-Authored-By: Uros Bizjak 
> > > Signed-off-by: H.J. Lu 
> > > ---
> > >  gcc/config/i386/i386-expand.cc| 20 ++--
> > >  gcc/config/i386/i386.md   | 98 +--
> > >  .../gcc.target/i386/pr118713-1-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-1.c| 14 +++
> > >  .../gcc.target/i386/pr118713-10-x32.c |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-10.c   | 15 +++
> > >  .../gcc.target/i386/pr118713-11-x32.c |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-11.c   | 14 +++
> > >  .../gcc.target/i386/pr118713-12-x32.c |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-12.c   | 14 +++
> > >  .../gcc.target/i386/pr118713-2-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-2.c| 15 +++
> > >  .../gcc.target/i386/pr118713-3-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-3.c| 14 +++
> > >  .../gcc.target/i386/pr118713-4-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-4.c| 14 +++
> > >  .../gcc.target/i386/pr118713-5-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-5.c| 13 +++
> > >  .../gcc.target/i386/pr118713-6-x32.c  | 15 +++
> > >  gcc/testsuite/gcc.target/i386/pr118713-6.c| 14 +++
> > >  .../gcc.target/i386/pr118713-7-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-7.c| 13 +++
> > >  .../gcc.target/i386/pr118713-8-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-8.c| 13 +++
> > >  .../gcc.target/i386/pr118713-9-x32.c  |  8 ++
> > >  gcc/testsuite/gcc.target/i386/pr118713-9.c| 14 +++
> > >  26 files changed, 353 insertions(+), 35 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1-x32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-1.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10-x32.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr118713-10.c
> > >  create mode 100644 gcc/testsu

Re: [PATCH][stage1] middle-end/80342 - genmatch optimize outer conversions

2025-01-31 Thread Andrew Pinski

On Fri, Jan 31, 2025 at 4:44 AM Richard Biener  wrote:
>
> The following improves genmatch generated code so we avoid more
> spurious SSA assignments to be pushed to the GIMPLE sequence or
> simplifications rejected when we're not supposed to produce any
> for outer and intermediate conversions.

After this goes in, I will test removing the special casing in
phiopt_early_allow which was designed to workaround the extra SSA
assignment happening.

Thanks,
Andrew Pinski

>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, queued for stage1.
>
> Richard.
>
> * genmatch.cc (::gen_transform): Add in_place parameter.
> Assert it isn't set in unexpected places.
> (possible_noop_convert): New.
> (expr::gen_transform): Support in_place and emit code to
> compute a child in-place when the operation is a conversion.
> (dt_simplify::gen_1): Arrange for an outermost conversion
> to be elided by generating the transform of the operand
> in-place.
> * match.pd (__real cepxi (x) -> cos (x)): Use single_use.
> ---
>  gcc/genmatch.cc | 201 +---
>  gcc/match.pd|  10 ++-
>  2 files changed, 160 insertions(+), 51 deletions(-)
>
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index b9a792e2455..a81629c57b2 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -1475,7 +1475,7 @@ public:
>virtual void gen_transform (FILE *, int, const char *, bool, int,
>   const char *, capture_info *,
>   dt_operand ** = 0,
> - int = 0)
> + int = 0, const char * = nullptr)
>  { gcc_unreachable  (); }
>  };
>
> @@ -1528,8 +1528,8 @@ public:
>/* If non-zero, the group for optional handling.  */
>unsigned char opt_grp;
>void gen_transform (FILE *f, int, const char *, bool, int,
> - const char *, capture_info *,
> - dt_operand ** = 0, int = 0) override;
> + const char *, capture_info *, dt_operand ** = 0,
> + int = 0, const char * = nullptr) override;
>  };
>
>  /* An operator that is represented by native C code.  This is always
> @@ -1562,8 +1562,8 @@ public:
>/* The identifier replacement vector.  */
>vec ids;
>void gen_transform (FILE *f, int, const char *, bool, int,
> - const char *, capture_info *,
> - dt_operand ** = 0, int = 0) final override;
> + const char *, capture_info *, dt_operand ** = 0,
> + int = 0, const char * = nullptr) final override;
>  };
>
>  /* A wrapper around another operand that captures its value.  */
> @@ -1583,8 +1583,8 @@ public:
>/* The captured value.  */
>operand *what;
>void gen_transform (FILE *f, int, const char *, bool, int,
> - const char *, capture_info *,
> - dt_operand ** = 0, int = 0) final override;
> + const char *, capture_info *, dt_operand ** = 0,
> + int = 0, const char * = nullptr) final override;
>  };
>
>  /* if expression.  */
> @@ -3186,6 +3186,14 @@ is_conversion (id_base *op)
>   || *op == VIEW_CONVERT_EXPR);
>  }
>
> +bool
> +possible_noop_convert (id_base *op)
> +{
> +  return (*op == CONVERT_EXPR
> + || *op == NOP_EXPR
> + || *op == VIEW_CONVERT_EXPR);
> +}
> +
>  /* Get the type to be used for generating operand POS of OP from the
> various sources.  */
>
> @@ -3239,7 +3247,7 @@ get_operand_type (id_base *op, unsigned pos,
>  void
>  expr::gen_transform (FILE *f, int indent, const char *dest, bool gimple,
>  int depth, const char *in_type, capture_info *cinfo,
> -dt_operand **indexes, int)
> +dt_operand **indexes, int, const char *in_place)
>  {
>id_base *opr = operation;
>/* When we delay operator substituting during lowering of fors we
> @@ -3297,10 +3305,23 @@ expr::gen_transform (FILE *f, int indent, const char 
> *dest, bool gimple,
>if (!type)
>  fatal_at (location, "cannot determine type of operand");
>
> +  bool child_in_place = (!in_place
> +&& gimple
> +&& possible_noop_convert (opr)
> +&& is_a  (ops[0]));
> +
>fprintf_indent (f, indent, "{\n");
>indent += 2;
> -  fprintf_indent (f, indent,
> - "tree _o%d[%u], _r%d;\n", depth, ops.length (), depth);
> +  if (child_in_place)
> +{
> +  fprintf_indent (f, indent, "tree _r%d;\n", depth);
> +  fprintf_indent (f, indent,
> + "gimple_match_op tem_op (res_op->cond.any_else (), "
> + "ERROR_MARK, error_mark_node, 1);\n");
> +}
> +  else
> +fprintf_indent (f, indent,
> +   "tree _o%d[%u], _r%d;\n", depth, ops.length (), depth);
>char op0type[64

Re: [PATCH] libstdc++: Use canonical loop form in std::reduce

2025-01-31 Thread Jonathan Wakely

On Fri, 31 Jan 2025 at 14:47, Marc Glisse  wrote:
>
> On Fri, 31 Jan 2025, Abhishek Kaushik wrote:
>
> > The current while loop in std::reduce and related functions is hard to
> > vectorize because the loop control variable is hard to detect in icx.
> >
> > `while ((__last - __first) >= 4)`
> >
> > Changing the loop header to a for loop following the OpenMP canonical
> > form allows easy vectorization, resulting in improved performance.
> >
> > `for (; __first <= __last - 4; __first += 4)`
>
> Is that always legal? If the sequence has size 1, is __last - 4 well
> defined?

No. I thought of that and for some reason assumed that since we're
already doing (last - first) it's OK ... but that's nonsense.

We need to check the size before the stride=4 loop.

Re: [PATCH v2] wwwdocs: add a Python postprocessing script

2025-01-31 Thread Gerald Pfeifer

On Wed, 29 Jan 2025, David Malcolm wrote:
>> python3: can't open file '/www/gcc/htdocs- 
>> preformatted/bin/process_html.py': [Errno 2] No such file or directory 
>> bin/process_html.py failed; aborting.
>> 
>> I tried replacing this with just "process_html.py" or
>> "./process_html.py", 
>> alas neither did the job.
> I used SOURCETREE; do we need a different variable?

I don't think there is one to use yet (SOURCETREE is for the web contents) 
yet. For now I went with the hardcoded value, though I'll plan to abstract 
the location/name of the script.

> If you've got it working, then please go ahead and do what's needed to
> safely push this and get the changes live.

Done! This is now life on gcc.gnu.org and as a final test I regenerated 
the website and compared with my testrun - looks all good.

> Once the python script is in place, I can take spend some cycles looking 
> at reimplementing parts of the mhc functionality in python.

Awesome, thank you!

Please go ahead and commit any tweaks or improvements as you see fit. 
Happy to have a look and/or test, but that is not required.

Gerald

Re: [PATCH] libstdc++: Use canonical loop form in std::reduce

2025-01-31 Thread Abhishek Kaushik

  *
ICX needs to be improved here

Yes, we're trying to fix this but I figure I could also try asking politely.

  *
a user could write such code himself.

But it still makes sense for std::reduce to be faster than a hand-written 
reduce because we assume that as users of stl :)


From: Richard Biener 
Sent: Friday, January 31, 2025 8:27 PM
To: Jonathan Wakely 
Cc: Abhishek Kaushik ; libstd...@gcc.gnu.org 
; gcc-patches@gcc.gnu.org 
Subject: Re: [PATCH] libstdc++: Use canonical loop form in std::reduce

On Fri, Jan 31, 2025 at 2:50 PM Jonathan Wakely  wrote:
>
> On Fri, 31 Jan 2025 at 12:48, Richard Biener  
> wrote:
> >
> > On Fri, Jan 31, 2025 at 12:01 PM Abhishek Kaushik
> >  wrote:
> > >
> > > From 4ac7c7e56e23ed2f4dd2dafdfab6cfa110c14260 Mon Sep 17 00:00:00 2001
> > > From: Abhishek Kaushik 
> > > Date: Fri, 31 Jan 2025 01:28:48 -0800
> > > Subject: [PATCH] libstdc++: Use canonical loop form in std::reduce
> > >
> > > The current while loop in std::reduce and related functions is hard to
> > > vectorize because the loop control variable is hard to detect.
> > >
> > > `while ((__last - __first) >= 4)`
> > >
> > > Changing the loop header to a for loop following the OpenMP canonical
> > > form allows easy vectorization, resulting in improved performance.
> > >
> > > `for (; __first <= __last - 4; __first += 4)`
> > >
> > > This patch modifies the loop header for std::reduce & 
> > > std::transform_reduce.
> >
> > Can you add a testcase to g++.dg/vect/ that is now vectorized but not 
> > before?
>
> According to https://gcc.gnu.org/pipermail/libstdc++/2025-January/060353.html
> this is only a problem for the Intel compiler, not for GCC. So a GCC
> testcase doesn't help.
>
> But if it's only for Intel, then the commit msg should say that.

A testcase that GCC can vectorize the result is still appreciated
(unless we already
have one).  I do wonder why we need to fix our standard library of
course, I'd say
ICX needs to be improved here, a user could write such code himself.

Richard.

>
> >
> > Thanks,
> > Richard.
> >
> > > ---
> > >  libstdc++-v3/include/std/numeric | 10 +++---
> > >  1 file changed, 3 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/libstdc++-v3/include/std/numeric 
> > > b/libstdc++-v3/include/std/numeric
> > > index 4d36fcd36d9..9c38ad89e21 100644
> > > --- a/libstdc++-v3/include/std/numeric
> > > +++ b/libstdc++-v3/include/std/numeric
> > > @@ -300,13 +300,12 @@ namespace __detail
> > >static_assert(is_invocable_r_v<_Tp, _BinaryOperation&, __ref, 
> > > __ref>);
> > >if constexpr (__is_random_access_iter<_InputIterator>::value)
> > > {
> > > - while ((__last - __first) >= 4)
> > > + for (; __first <= __last - 4; __first += 4)
> > > {
> > >   _Tp __v1 = __binary_op(__first[0], __first[1]);
> > >   _Tp __v2 = __binary_op(__first[2], __first[3]);
> > >   _Tp __v3 = __binary_op(__v1, __v2);
> > >   __init = __binary_op(__init, __v3);
> > > - __first += 4;
> > > }
> > > }
> > >for (; __first != __last; ++__first)
> > > @@ -381,7 +380,7 @@ namespace __detail
> > >if constexpr (__and_v<__is_random_access_iter<_InputIterator1>,
> > > __is_random_access_iter<_InputIterator2>>)
> > > {
> > > - while ((__last1 - __first1) >= 4)
> > > + for (; __first1 <= __last1 - 4; __first1 += 4, __first2 += 4)
> > > {
> > >   _Tp __v1 = __binary_op1(__binary_op2(__first1[0], __first2[0]),
> > >   __binary_op2(__first1[1], __first2[1]));
> > > @@ -389,8 +388,6 @@ namespace __detail
> > >   __binary_op2(__first1[3], __first2[3]));
> > >   _Tp __v3 = __binary_op1(__v1, __v2);
> > >   __init = __binary_op1(__init, __v3);
> > > - __first1 += 4;
> > > - __first2 += 4;
> > > }
> > > }
> > >for (; __first1 != __last1; ++__first1, (void) ++__first2)
> > > @@ -447,7 +444,7 @@ namespace __detail
> > >  {
> > >if constexpr (__is_random_access_iter<_InputIterator>::value)
> > > {
> > > - while ((__last - __first) >= 4)
> > > + for (; __first <= __last - 4; __first += 4)
> > > {
> > >   _Tp __v1 = __binary_op(__unary_op(__first[0]),
> > >  __unary_op(__first[1]));
> > > @@ -455,7 +452,6 @@ namespace __detail
> > >  __unary_op(__first[3]));
> > >   _Tp __v3 = __binary_op(__v1, __v2);
> > >   __init = __binary_op(__init, __v3);
> > > - __first += 4;
> > > }
> > > }
> > >for (; __first != __last; ++__first)
> > > --
> > > 2.31.1
> > >
> > >
> > >
> > >
> >
>

1 2 >

1 - 100 of 131 matches

Mail list logo