[gcc] Created branch 'mikael/heads/add_scalar_mask_code_gcc14_v01' in namespace 'refs/users'

2024-07-14 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/add_scalar_mask_code_gcc14_v01' was created in 
namespace 'refs/users' pointing to:

 4032ccc4713a... fortran: Correctly evaluate scalar MASK arguments of MINLOC


[gcc(refs/users/mikael/heads/add_scalar_mask_code_gcc14_v01)] fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC

2024-07-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:4032ccc4713a5d75c02d00432d4cf1dee88dcd12

commit 4032ccc4713a5d75c02d00432d4cf1dee88dcd12
Author: Mikael Morin 
Date:   Sat Jul 13 20:21:20 2024 +0200

fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC

Add the preliminary code that the generated expression for MASK may depend
on when generating the inline code to evaluate MINLOC or MAXLOC with a
scalar MASK.

The generated code was only keeping the generated expression but not the
preliminary code, which was sufficient for simple cases such as data
references or simple (scalar) function calls, but was bogus with more
complicated ones.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Add the
preliminary code generated for MASK to the preliminary code of
MINLOC/MAXLOC.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_17.f90: New test.

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  1 +
 gcc/testsuite/gfortran.dg/minmaxloc_17.f90 | 33 ++
 2 files changed, 34 insertions(+)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 9ad372113b0c..5ef4f230472a 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5738,6 +5738,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gfc_init_se (&maskse, NULL);
   gfc_conv_expr_val (&maskse, maskexpr);
+  gfc_add_block_to_block (&se->pre, &maskse.pre);
   gfc_init_block (&block);
   gfc_add_block_to_block (&block, &loop.pre);
   gfc_add_block_to_block (&block, &loop.post);
diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 
b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90
new file mode 100644
index ..7e6e586ab03f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90
@@ -0,0 +1,33 @@
+! { dg-do run }
+!
+! Check that the code necessary to evaluate MINLOC's or MAXLOC's MASK
+! argument is correctly generated.
+
+program p
+  implicit none
+  integer, parameter :: data10(*) = (/ 2, 5, 2, 0, 6, 5, 3, 6, 0, 1 /)
+  logical, parameter :: mask10(*) = (/ .false., .true., .false., &
+   .false., .true., .true.,  &
+   .true. , .true., .false., &
+   .false. /)
+  type bool_wrapper
+logical :: l
+  end type
+  call check_minloc
+  call check_maxloc
+contains
+  subroutine check_minloc
+integer :: a(10)
+integer :: r
+a = data10
+r = minloc(a, dim = 1, mask = sum(a) > 0)
+if (r /= 4) stop 11
+  end subroutine
+  subroutine check_maxloc
+integer :: a(10)
+integer :: r
+a = data10
+r = maxloc(a, dim = 1, mask = sum(a) > 0)
+if (r /= 5) stop 18
+  end subroutine
+end program


[gcc] Created branch 'mikael/heads/add_code_scalar_mask_minmaxloc_v02' in namespace 'refs/users'

2024-07-14 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/add_code_scalar_mask_minmaxloc_v02' was created in 
namespace 'refs/users' pointing to:

 08267b90e326... fortran: Correctly evaluate scalar MASK arguments of MINLOC


[gcc(refs/users/mikael/heads/add_code_scalar_mask_minmaxloc_v02)] fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC

2024-07-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:08267b90e3267faa744170c32a19a50435a622d4

commit 08267b90e3267faa744170c32a19a50435a622d4
Author: Mikael Morin 
Date:   Sat Jul 13 20:21:20 2024 +0200

fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC

Add the preliminary code that the generated expression for MASK may depend
on when generating the inline code to evaluate MINLOC or MAXLOC with a
scalar MASK.

The generated code was only keeping the generated expression but not the
preliminary code, which was sufficient for simple cases such as data
references or simple (scalar) function calls, but was bogus with more
complicated ones.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Add the
preliminary code generated for MASK to the preliminary code of
MINLOC/MAXLOC.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_17.f90: New test.

(cherry picked from commit d211100903d4d532d989451243ea00d7fa2e9d5e)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  1 +
 gcc/testsuite/gfortran.dg/minmaxloc_17.f90 | 33 ++
 2 files changed, 34 insertions(+)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 9ad372113b0c..5ef4f230472a 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5738,6 +5738,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gfc_init_se (&maskse, NULL);
   gfc_conv_expr_val (&maskse, maskexpr);
+  gfc_add_block_to_block (&se->pre, &maskse.pre);
   gfc_init_block (&block);
   gfc_add_block_to_block (&block, &loop.pre);
   gfc_add_block_to_block (&block, &loop.post);
diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 
b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90
new file mode 100644
index ..7e6e586ab03f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90
@@ -0,0 +1,33 @@
+! { dg-do run }
+!
+! Check that the code necessary to evaluate MINLOC's or MAXLOC's MASK
+! argument is correctly generated.
+
+program p
+  implicit none
+  integer, parameter :: data10(*) = (/ 2, 5, 2, 0, 6, 5, 3, 6, 0, 1 /)
+  logical, parameter :: mask10(*) = (/ .false., .true., .false., &
+   .false., .true., .true.,  &
+   .true. , .true., .false., &
+   .false. /)
+  type bool_wrapper
+logical :: l
+  end type
+  call check_minloc
+  call check_maxloc
+contains
+  subroutine check_minloc
+integer :: a(10)
+integer :: r
+a = data10
+r = minloc(a, dim = 1, mask = sum(a) > 0)
+if (r /= 4) stop 11
+  end subroutine
+  subroutine check_maxloc
+integer :: a(10)
+integer :: r
+a = data10
+r = maxloc(a, dim = 1, mask = sum(a) > 0)
+if (r /= 5) stop 18
+  end subroutine
+end program


[gcc r15-2026] c, objc: Add -Wunterminated-string-initialization

2024-07-14 Thread Martin Uecker via Gcc-cvs
https://gcc.gnu.org/g:44c9403ed1833ae71a59e84f9e37af3182be0df5

commit r15-2026-g44c9403ed1833ae71a59e84f9e37af3182be0df5
Author: Alejandro Colomar 
Date:   Sat Jun 29 15:10:43 2024 +0200

c, objc: Add -Wunterminated-string-initialization

Warn about the following:

char  s[3] = "foo";

Initializing a char array with a string literal of the same length as
the size of the array is usually a mistake.  Rarely is the case where
one wants to create a non-terminated character sequence from a string
literal.

In some cases, for writing faster code, one may want to use arrays
instead of pointers, since that removes the need for storing an array of
pointers apart from the strings themselves.

char  *log_levels[]   = { "info", "warning", "err" };
vs.
char  log_levels[][7] = { "info", "warning", "err" };

This forces the programmer to specify a size, which might change if a
new entry is later added.  Having no way to enforce null termination is
very dangerous, however, so it is useful to have a warning for this, so
that the compiler can make sure that the programmer didn't make any
mistakes.  This warning catches the bug above, so that the programmer
will be able to fix it and write:

char  log_levels[][8] = { "info", "warning", "err" };

This warning already existed as part of -Wc++-compat, but this patch
allows enabling it separately.  It is also included in -Wextra, since
it may not always be desired (when unterminated character sequences are
wanted), but it's likely to be desired in most cases.

Since Wc++-compat now includes this warning, the test has to be modified
to expect the text of the new warning too, in .

Link: https://lists.gnu.org/archive/html/groff/2022-11/msg00059.html
Link: https://lists.gnu.org/archive/html/groff/2022-11/msg00063.html
Link: 
https://inbox.sourceware.org/gcc/36da94eb-1cac-5ae8-7fea-ec66160cf...@gmail.com/T/

PR c/115185

gcc/c-family/ChangeLog:

* c.opt: Add -Wunterminated-string-initialization.

gcc/c/ChangeLog:

* c-typeck.cc (digest_init): Separate warnings about character
arrays being initialized as unterminated character sequences
with string literals, from -Wc++-compat, into a new warning,
-Wunterminated-string-initialization.

gcc/ChangeLog:

* doc/invoke.texi: Document the new
-Wunterminated-string-initialization.

gcc/testsuite/ChangeLog:

* gcc.dg/Wcxx-compat-14.c: Adapt the test to match the new text
of the warning, which doesn't say anything about C++ anymore.
* gcc.dg/Wunterminated-string-initialization.c: New test.

Acked-by: Doug McIlroy 
Acked-by: Mike Stump 
Reviewed-by: Sandra Loosemore 
Reviewed-by: Martin Uecker 
Signed-off-by: Alejandro Colomar 
Reviewed-by: Marek Polacek 

Diff:
---
 gcc/c-family/c.opt   |  4 
 gcc/c/c-typeck.cc|  6 +++---
 gcc/doc/invoke.texi  | 20 +++-
 gcc/testsuite/gcc.dg/Wcxx-compat-14.c|  2 +-
 .../gcc.dg/Wunterminated-string-initialization.c |  6 ++
 5 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 5c1006ff321f..a52682d835ce 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1464,6 +1464,10 @@ Wunsuffixed-float-constants
 C ObjC Var(warn_unsuffixed_float_constants) Warning
 Warn about unsuffixed float constants.
 
+Wunterminated-string-initialization
+C ObjC Var(warn_unterminated_string_initialization) Warning LangEnabledBy(C 
ObjC,Wextra || Wc++-compat)
+Warn about character arrays initialized as unterminated character sequences 
with a string literal.
+
 Wunused
 C ObjC C++ ObjC++ LangEnabledBy(C ObjC C++ ObjC++,Wall)
 ; documented in common.opt
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 574114d541fd..7e0f01ed22b9 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -8878,11 +8878,11 @@ digest_init (location_t init_loc, tree type, tree init, 
tree origtype,
pedwarn_init (init_loc, 0,
  ("initializer-string for array of %qT "
   "is too long"), typ1);
- else if (warn_cxx_compat
+ else if (warn_unterminated_string_initialization
   && compare_tree_int (TYPE_SIZE_UNIT (type), len) < 0)
-   warning_at (init_loc, OPT_Wc___compat,
+   warning_at (init_loc, OPT_Wunterminated_string_initialization,
("initializer-string for array of %qT "
-"is too long for C++"), typ1);
+"is too long"), typ1);
  if (compar

[gcc r15-2027] i386: Tweak i386-expand.cc to restore bootstrap on RHEL.

2024-07-14 Thread Roger Sayle via Gcc-cvs
https://gcc.gnu.org/g:74e6dfb23163c2dd670d1d60fbf4c782e0b44b94

commit r15-2027-g74e6dfb23163c2dd670d1d60fbf4c782e0b44b94
Author: Roger Sayle 
Date:   Sun Jul 14 17:22:27 2024 +0100

i386: Tweak i386-expand.cc to restore bootstrap on RHEL.

This is a minor change to restore bootstrap on systems using gcc 4.8
as a host compiler.  The fatal error is:

In file included from gcc/gcc/coretypes.h:471:0,
 from gcc/gcc/config/i386/i386-expand.cc:23:
gcc/gcc/config/i386/i386-expand.cc: In function 'void 
ix86_expand_fp_absneg_operator(rtx_code, machine_mode, rtx_def**)':
./insn-modes.h:315:75: error: temporary of non-literal type 
'scalar_float_mode' in a constant expression
 #define HFmode (scalar_float_mode ((scalar_float_mode::from_int) E_HFmode))
   ^
gcc/gcc/config/i386/i386-expand.cc:2179:8: note: in expansion of macro 
'HFmode'
   case HFmode:
^

The solution is to use the E_?Fmode enumeration constants as case values
in switch statements.

2024-07-14  Roger Sayle  

* config/i386/i386-expand.cc (ix86_expand_fp_absneg_operator):
Use E_?Fmode enumeration constants in switch statement.
(ix86_expand_copysign): Likewise.
(ix86_expand_xorsign): Likewise.

Diff:
---
 gcc/config/i386/i386-expand.cc | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cfcfdd94e8f0..9a31e6df2aa2 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2176,19 +2176,19 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, 
machine_mode mode,
 
   switch (mode)
   {
-  case HFmode:
+  case E_HFmode:
 use_sse = true;
 vmode = V8HFmode;
 break;
-  case BFmode:
+  case E_BFmode:
 use_sse = true;
 vmode = V8BFmode;
 break;
-  case SFmode:
+  case E_SFmode:
 use_sse = TARGET_SSE_MATH && TARGET_SSE;
 vmode = V4SFmode;
 break;
-  case DFmode:
+  case E_DFmode:
 use_sse = TARGET_SSE_MATH && TARGET_SSE2;
 vmode = V2DFmode;
 break;
@@ -2330,19 +2330,19 @@ ix86_expand_copysign (rtx operands[])
 
   switch (mode)
   {
-  case HFmode:
+  case E_HFmode:
 vmode = V8HFmode;
 break;
-  case BFmode:
+  case E_BFmode:
 vmode = V8BFmode;
 break;
-  case SFmode:
+  case E_SFmode:
 vmode = V4SFmode;
 break;
-  case DFmode:
+  case E_DFmode:
 vmode = V2DFmode;
 break;
-  case TFmode:
+  case E_TFmode:
 vmode = mode;
 break;
   default:
@@ -2410,16 +2410,16 @@ ix86_expand_xorsign (rtx operands[])
 
   switch (mode)
   {
-  case HFmode:
+  case E_HFmode:
 vmode = V8HFmode;
 break;
-  case BFmode:
+  case E_BFmode:
 vmode = V8BFmode;
 break;
-  case SFmode:
+  case E_SFmode:
 vmode = V4SFmode;
 break;
-  case DFmode:
+  case E_DFmode:
 vmode = V2DFmode;
 break;
   default:


[gcc r14-10419] fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC

2024-07-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:55988c48ead9adb6a11b0dffa60ce49bb542074e

commit r14-10419-g55988c48ead9adb6a11b0dffa60ce49bb542074e
Author: Mikael Morin 
Date:   Sat Jul 13 20:21:20 2024 +0200

fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC

Add the preliminary code that the generated expression for MASK may depend
on when generating the inline code to evaluate MINLOC or MAXLOC with a
scalar MASK.

The generated code was only keeping the generated expression but not the
preliminary code, which was sufficient for simple cases such as data
references or simple (scalar) function calls, but was bogus with more
complicated ones.

gcc/fortran/ChangeLog:

* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Add the
preliminary code generated for MASK to the preliminary code of
MINLOC/MAXLOC.

gcc/testsuite/ChangeLog:

* gfortran.dg/minmaxloc_17.f90: New test.

(cherry picked from commit d211100903d4d532d989451243ea00d7fa2e9d5e)

Diff:
---
 gcc/fortran/trans-intrinsic.cc |  1 +
 gcc/testsuite/gfortran.dg/minmaxloc_17.f90 | 33 ++
 2 files changed, 34 insertions(+)

diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 9ad372113b0c..5ef4f230472a 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5738,6 +5738,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * 
expr, enum tree_code op)
 
   gfc_init_se (&maskse, NULL);
   gfc_conv_expr_val (&maskse, maskexpr);
+  gfc_add_block_to_block (&se->pre, &maskse.pre);
   gfc_init_block (&block);
   gfc_add_block_to_block (&block, &loop.pre);
   gfc_add_block_to_block (&block, &loop.post);
diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 
b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90
new file mode 100644
index ..7e6e586ab03f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90
@@ -0,0 +1,33 @@
+! { dg-do run }
+!
+! Check that the code necessary to evaluate MINLOC's or MAXLOC's MASK
+! argument is correctly generated.
+
+program p
+  implicit none
+  integer, parameter :: data10(*) = (/ 2, 5, 2, 0, 6, 5, 3, 6, 0, 1 /)
+  logical, parameter :: mask10(*) = (/ .false., .true., .false., &
+   .false., .true., .true.,  &
+   .true. , .true., .false., &
+   .false. /)
+  type bool_wrapper
+logical :: l
+  end type
+  call check_minloc
+  call check_maxloc
+contains
+  subroutine check_minloc
+integer :: a(10)
+integer :: r
+a = data10
+r = minloc(a, dim = 1, mask = sum(a) > 0)
+if (r /= 4) stop 11
+  end subroutine
+  subroutine check_maxloc
+integer :: a(10)
+integer :: r
+a = data10
+r = maxloc(a, dim = 1, mask = sum(a) > 0)
+if (r /= 5) stop 18
+  end subroutine
+end program


[gcc] Created branch 'mikael/heads/backport14_PR99798_v01' in namespace 'refs/users'

2024-07-14 Thread Mikael Morin via Gcc-cvs
The branch 'mikael/heads/backport14_PR99798_v01' was created in namespace 
'refs/users' pointing to:

 c80a74602390... fortran: Assume there is no cyclic reference with submodule


[gcc(refs/users/mikael/heads/backport14_PR99798_v01)] fortran: Assume there is no cyclic reference with submodule symbols [PR99798]

2024-07-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:c80a7460239037d8cf8426dbb7d03c6ddac09bab

commit c80a7460239037d8cf8426dbb7d03c6ddac09bab
Author: Mikael Morin 
Date:   Sun May 12 15:16:23 2024 +0200

fortran: Assume there is no cyclic reference with submodule symbols 
[PR99798]

This prevents a premature release of memory with procedure symbols from
submodules, causing random compiler crashes.

The problem is a fragile detection of cyclic references, which can match
with procedures host-associated from a module in submodules, in cases where 
it
shouldn't.  The formal namespace is released, and with it the dummy 
arguments
symbols of the procedure.  But there is no cyclic reference, so the 
procedure
symbol itself is not released and remains, with pointers to its dummy 
arguments
now dangling.

The fix adds a condition to avoid the case, and refactors to a new predicate
by the way.  Part of the original condition is also removed, for lack of a
reason to keep it.

PR fortran/99798

gcc/fortran/ChangeLog:

* symbol.cc (gfc_release_symbol): Move the condition guarding
the handling cyclic references...
(cyclic_reference_break_needed): ... here as a new predicate.
Remove superfluous parts.  Add a condition preventing any premature
release with submodule symbols.

gcc/testsuite/ChangeLog:

* gfortran.dg/submodule_33.f08: New test.

(cherry picked from commit 38d1761c0c94b77a081ccc180d6e039f7a670468)

Diff:
---
 gcc/fortran/symbol.cc  | 54 --
 gcc/testsuite/gfortran.dg/submodule_33.f08 | 20 +++
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/symbol.cc b/gcc/fortran/symbol.cc
index 8f7deac1d1ee..0a1646def678 100644
--- a/gcc/fortran/symbol.cc
+++ b/gcc/fortran/symbol.cc
@@ -3179,6 +3179,57 @@ gfc_free_symbol (gfc_symbol *&sym)
 }
 
 
+/* Returns true if the symbol SYM has, through its FORMAL_NS field, a reference
+   to itself which should be eliminated for the symbol memory to be released
+   via normal reference counting.
+
+   The implementation is crucial as it controls the proper release of symbols,
+   especially (contained) procedure symbols, which can represent a lot of 
memory
+   through the namespace of their body.
+
+   We try to avoid freeing too much memory (causing dangling pointers), to not
+   leak too much (wasting memory), and to avoid expensive walks of the symbol
+   tree (which would be the correct way to check for a cycle).  */
+
+bool
+cyclic_reference_break_needed (gfc_symbol *sym)
+{
+  /* Normal symbols don't reference themselves.  */
+  if (sym->formal_ns == nullptr)
+return false;
+
+  /* Procedures at the root of the file do have a self reference, but they 
don't
+ have a reference in a parent namespace preventing the release of the
+ procedure namespace, so they can use the normal reference counting.  */
+  if (sym->formal_ns == sym->ns)
+return false;
+
+  /* If sym->refs == 1, we can use normal reference counting.  If sym->refs > 
2,
+ the symbol won't be freed anyway, with or without cyclic reference.  */
+  if (sym->refs != 2)
+return false;
+
+  /* Procedure symbols host-associated from a module in submodules are special,
+ because the namespace of the procedure block in the submodule is different
+ from the FORMAL_NS namespace generated by host-association.  So there are
+ two different namespaces representing the same procedure namespace.  As
+ FORMAL_NS comes from host-association, which only imports symbols visible
+ from the outside (dummy arguments basically), we can assume there is no
+ self reference through FORMAL_NS in that case.  */
+  if (sym->attr.host_assoc && sym->attr.used_in_submodule)
+return false;
+
+  /* We can assume that contained procedures have cyclic references, because
+ the symbol of the procedure itself is accessible in the procedure body
+ namespace.  So we assume that symbols with a formal namespace different
+ from the declaration namespace and two references, one of which is about
+ to be removed, are procedures with just the self reference left.  At this
+ point, the symbol SYM matches that pattern, so we return true here to
+ permit the release of SYM.  */
+  return true;
+}
+
+
 /* Decrease the reference counter and free memory when we reach zero.
Returns true if the symbol has been freed, false otherwise.  */
 
@@ -3188,8 +3239,7 @@ gfc_release_symbol (gfc_symbol *&sym)
   if (sym == NULL)
 return false;
 
-  if (sym->formal_ns != NULL && sym->refs == 2 && sym->formal_ns != sym->ns
-  && (!sym->attr.entry || !sym->module))
+  if (cyclic_reference_break_needed (sym))
 {
   /* As formal_ns contains a reference to sym, delete formal_ns just
 before the deletion of sym.  */
diff --git a/gcc/testsuite/gfortran.d

[gcc r14-10420] fortran: Assume there is no cyclic reference with submodule symbols [PR99798]

2024-07-14 Thread Mikael Morin via Gcc-cvs
https://gcc.gnu.org/g:c80a7460239037d8cf8426dbb7d03c6ddac09bab

commit r14-10420-gc80a7460239037d8cf8426dbb7d03c6ddac09bab
Author: Mikael Morin 
Date:   Sun May 12 15:16:23 2024 +0200

fortran: Assume there is no cyclic reference with submodule symbols 
[PR99798]

This prevents a premature release of memory with procedure symbols from
submodules, causing random compiler crashes.

The problem is a fragile detection of cyclic references, which can match
with procedures host-associated from a module in submodules, in cases where 
it
shouldn't.  The formal namespace is released, and with it the dummy 
arguments
symbols of the procedure.  But there is no cyclic reference, so the 
procedure
symbol itself is not released and remains, with pointers to its dummy 
arguments
now dangling.

The fix adds a condition to avoid the case, and refactors to a new predicate
by the way.  Part of the original condition is also removed, for lack of a
reason to keep it.

PR fortran/99798

gcc/fortran/ChangeLog:

* symbol.cc (gfc_release_symbol): Move the condition guarding
the handling cyclic references...
(cyclic_reference_break_needed): ... here as a new predicate.
Remove superfluous parts.  Add a condition preventing any premature
release with submodule symbols.

gcc/testsuite/ChangeLog:

* gfortran.dg/submodule_33.f08: New test.

(cherry picked from commit 38d1761c0c94b77a081ccc180d6e039f7a670468)

Diff:
---
 gcc/fortran/symbol.cc  | 54 --
 gcc/testsuite/gfortran.dg/submodule_33.f08 | 20 +++
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/symbol.cc b/gcc/fortran/symbol.cc
index 8f7deac1d1ee..0a1646def678 100644
--- a/gcc/fortran/symbol.cc
+++ b/gcc/fortran/symbol.cc
@@ -3179,6 +3179,57 @@ gfc_free_symbol (gfc_symbol *&sym)
 }
 
 
+/* Returns true if the symbol SYM has, through its FORMAL_NS field, a reference
+   to itself which should be eliminated for the symbol memory to be released
+   via normal reference counting.
+
+   The implementation is crucial as it controls the proper release of symbols,
+   especially (contained) procedure symbols, which can represent a lot of 
memory
+   through the namespace of their body.
+
+   We try to avoid freeing too much memory (causing dangling pointers), to not
+   leak too much (wasting memory), and to avoid expensive walks of the symbol
+   tree (which would be the correct way to check for a cycle).  */
+
+bool
+cyclic_reference_break_needed (gfc_symbol *sym)
+{
+  /* Normal symbols don't reference themselves.  */
+  if (sym->formal_ns == nullptr)
+return false;
+
+  /* Procedures at the root of the file do have a self reference, but they 
don't
+ have a reference in a parent namespace preventing the release of the
+ procedure namespace, so they can use the normal reference counting.  */
+  if (sym->formal_ns == sym->ns)
+return false;
+
+  /* If sym->refs == 1, we can use normal reference counting.  If sym->refs > 
2,
+ the symbol won't be freed anyway, with or without cyclic reference.  */
+  if (sym->refs != 2)
+return false;
+
+  /* Procedure symbols host-associated from a module in submodules are special,
+ because the namespace of the procedure block in the submodule is different
+ from the FORMAL_NS namespace generated by host-association.  So there are
+ two different namespaces representing the same procedure namespace.  As
+ FORMAL_NS comes from host-association, which only imports symbols visible
+ from the outside (dummy arguments basically), we can assume there is no
+ self reference through FORMAL_NS in that case.  */
+  if (sym->attr.host_assoc && sym->attr.used_in_submodule)
+return false;
+
+  /* We can assume that contained procedures have cyclic references, because
+ the symbol of the procedure itself is accessible in the procedure body
+ namespace.  So we assume that symbols with a formal namespace different
+ from the declaration namespace and two references, one of which is about
+ to be removed, are procedures with just the self reference left.  At this
+ point, the symbol SYM matches that pattern, so we return true here to
+ permit the release of SYM.  */
+  return true;
+}
+
+
 /* Decrease the reference counter and free memory when we reach zero.
Returns true if the symbol has been freed, false otherwise.  */
 
@@ -3188,8 +3239,7 @@ gfc_release_symbol (gfc_symbol *&sym)
   if (sym == NULL)
 return false;
 
-  if (sym->formal_ns != NULL && sym->refs == 2 && sym->formal_ns != sym->ns
-  && (!sym->attr.entry || !sym->module))
+  if (cyclic_reference_break_needed (sym))
 {
   /* As formal_ns contains a reference to sym, delete formal_ns just
 before the deletion of sym.  */
diff --git a/gcc/testsuite

[gcc r15-2029] RISC-V: Add vector type of BFloat16 format

2024-07-14 Thread fengwang via Gcc-cvs
https://gcc.gnu.org/g:666f167bec09d1234e6496c86b566fe1a71f61f0

commit r15-2029-g666f167bec09d1234e6496c86b566fe1a71f61f0
Author: Feng Wang 
Date:   Thu Jun 13 00:32:14 2024 +

RISC-V: Add vector type of BFloat16 format

v3: Rebase
v2: Rebase
The vector type of BFloat16 format is added in this patch,
subsequent extensions to zvfbfmin and zvfwma need to be based
on this patch.

Signed-off-by: Feng Wang 
gcc/ChangeLog:

* config/riscv/genrvv-type-indexer.cc (bfloat16_type):
Generate bf16 vector_type and scalar_type in DEF_RVV_TYPE_INDEX.
(bfloat16_wide_type): Ditto.
(same_ratio_eew_bf16_type): Ditto.
(main): Ditto.
* config/riscv/riscv-modes.def (ADJUST_BYTESIZE):
Add vector type for BFloat16.
(RVV_WHOLE_MODES): Add vector type for BFloat16.
(RVV_FRACT_MODE): Ditto.
(RVV_NF4_MODES): Ditto.
(RVV_NF8_MODES): Ditto.
(RVV_NF2_MODES): Ditto.
* config/riscv/riscv-vector-builtins-types.def (vbfloat16mf4_t):
Add builtin vector type for BFloat16.
(vbfloat16mf2_t): Add builtin vector type for BFloat16.
(vbfloat16m1_t): Ditto.
(vbfloat16m2_t): Ditto.
(vbfloat16m4_t): Ditto.
(vbfloat16m8_t): Ditto.
(vbfloat16mf4x2_t): Ditto.
(vbfloat16mf4x3_t): Ditto.
(vbfloat16mf4x4_t): Ditto.
(vbfloat16mf4x5_t): Ditto.
(vbfloat16mf4x6_t): Ditto.
(vbfloat16mf4x7_t): Ditto.
(vbfloat16mf4x8_t): Ditto.
(vbfloat16mf2x2_t): Ditto.
(vbfloat16mf2x3_t): Ditto.
(vbfloat16mf2x4_t): Ditto.
(vbfloat16mf2x5_t): Ditto.
(vbfloat16mf2x6_t): Ditto.
(vbfloat16mf2x7_t): Ditto.
(vbfloat16mf2x8_t): Ditto.
(vbfloat16m1x2_t): Ditto.
(vbfloat16m1x3_t): Ditto.
(vbfloat16m1x4_t): Ditto.
(vbfloat16m1x5_t): Ditto.
(vbfloat16m1x6_t): Ditto.
(vbfloat16m1x7_t): Ditto.
(vbfloat16m1x8_t): Ditto.
(vbfloat16m2x2_t): Ditto.
(vbfloat16m2x3_t): Ditto.
(vbfloat16m2x4_t): Ditto.
(vbfloat16m4x2_t): Ditto.
* config/riscv/riscv-vector-builtins.cc (check_required_extensions):
Add required_ext checking for BFloat16.
* config/riscv/riscv-vector-builtins.def (vbfloat16mf4_t):
Add vector_type for BFloat16 in builtins.def.
(vbfloat16mf4x2_t): Ditto.
(vbfloat16mf4x3_t): Ditto.
(vbfloat16mf4x4_t): Ditto.
(vbfloat16mf4x5_t): Ditto.
(vbfloat16mf4x6_t): Ditto.
(vbfloat16mf4x7_t): Ditto.
(vbfloat16mf4x8_t): Ditto.
(vbfloat16mf2_t): Ditto.
(vbfloat16mf2x2_t): Ditto.
(vbfloat16mf2x3_t): Ditto.
(vbfloat16mf2x4_t): Ditto.
(vbfloat16mf2x5_t): Ditto.
(vbfloat16mf2x6_t): Ditto.
(vbfloat16mf2x7_t): Ditto.
(vbfloat16mf2x8_t): Ditto.
(vbfloat16m1_t): Ditto.
(vbfloat16m1x2_t): Ditto.
(vbfloat16m1x3_t): Ditto.
(vbfloat16m1x4_t): Ditto.
(vbfloat16m1x5_t): Ditto.
(vbfloat16m1x6_t): Ditto.
(vbfloat16m1x7_t): Ditto.
(vbfloat16m1x8_t): Ditto.
(vbfloat16m2_t): Ditto.
(vbfloat16m2x2_t): Ditto.
(vbfloat16m2x3_t): Ditto.
(vbfloat16m2x4_t): Ditto.
(vbfloat16m4_t): Ditto.
(vbfloat16m4x2_t): Ditto.
(vbfloat16m8_t): Ditto.
(double_trunc_bfloat_scalar): Add scalar_type def for BFloat16.
(double_trunc_bfloat_vector): Add vector_type def for BFloat16.
* config/riscv/riscv-vector-builtins.h (RVV_REQUIRE_ELEN_BF_16):
Add required defination of BFloat16 ext.
* config/riscv/riscv-vector-switch.def (ENTRY):
Add vector_type information for BFloat16.
(TUPLE_ENTRY): Add tuple vector_type information for BFloat16.

Diff:
---
 gcc/config/riscv/genrvv-type-indexer.cc  | 115 +++
 gcc/config/riscv/riscv-modes.def |  30 +-
 gcc/config/riscv/riscv-vector-builtins-types.def |  50 ++
 gcc/config/riscv/riscv-vector-builtins.cc|   7 +-
 gcc/config/riscv/riscv-vector-builtins.def   |  55 ++-
 gcc/config/riscv/riscv-vector-builtins.h |   1 +
 gcc/config/riscv/riscv-vector-switch.def |  36 +++
 7 files changed, 291 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/genrvv-type-indexer.cc 
b/gcc/config/riscv/genrvv-type-indexer.cc
index 27cbd14982c1..8626ddeaaa8b 100644
--- a/gcc/config/riscv/genrvv-type-indexer.cc
+++ b/gcc/config/riscv/genrvv-type-indexer.cc
@@ -117,6 +117,42 @@ inttype

[gcc r15-2030] AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [PR115889]

2024-07-14 Thread Hongyu Wang via Gcc-cvs
https://gcc.gnu.org/g:02a3bf5e2f0c18078bf67fc0002219edba1d76ff

commit r15-2030-g02a3bf5e2f0c18078bf67fc0002219edba1d76ff
Author: Hongyu Wang 
Date:   Sat Jul 13 11:45:31 2024 +0800

AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [PR115889]

According to the instruction spec of AVX512BF16, the convert from float
to BF16 is not a simple truncation. It has special handling for
denormal/nan, even for normal float it will add an extra bias according
to the least significant bit for bf number. This means we cannot use the
vcvtne2ps2bf16 for any bf16 vector shuffle.
The optimization introduced in r15-1368 adds a specific split to convert
HImode permutation with this instruction, so remove it and treat the
BFmode permutation same as HFmode.

gcc/ChangeLog:

PR target/115889
* config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove.
* config/i386/sse.md (hi_cvt_bf): Remove.
(HI_CVT_BF): Likewise.
(vpermt2_sepcial_bf16_shuffle_):Likewise.

gcc/testsuite/ChangeLog:

PR target/115889
* gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust output
scan.

Diff:
---
 gcc/config/i386/predicates.md  | 11 ---
 gcc/config/i386/sse.md | 35 --
 .../i386/vpermt2-special-bf16-shufflue.c   |  3 +-
 3 files changed, 1 insertion(+), 48 deletions(-)

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a894847adaf7..5d0bb1e0f54a 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2327,14 +2327,3 @@
 
   return true;
 })
-
-;; Check that each element is odd and incrementally increasing from 1
-(define_predicate "vcvtne2ps2bf_parallel"
-  (and (match_code "const_vector")
-   (match_code "const_int" "a"))
-{
-  for (int i = 0; i < XVECLEN (op, 0); ++i)
-if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
-  return false;
-  return true;
-})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b3b4697924b5..c134494cd200 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -31460,38 +31460,3 @@
   "TARGET_AVXVNNIINT16"
   "vpdp\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "prefix" "vex")])
-
-(define_mode_attr hi_cvt_bf
-  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
-
-(define_mode_attr HI_CVT_BF
-  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
-
-(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_"
-  [(set (match_operand:VI2_AVX512F 0 "register_operand")
-   (unspec:VI2_AVX512F
- [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
-  (match_operand:VI2_AVX512F 2 "register_operand")
-  (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
-  UNSPEC_VPERMT2))]
-  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  rtx op0 = gen_reg_rtx (mode);
-  operands[2] = lowpart_subreg (mode,
-   force_reg (mode, operands[2]),
-   mode);
-  operands[3] = lowpart_subreg (mode,
-   force_reg (mode, operands[3]),
-   mode);
-
-  emit_insn (gen_avx512f_cvtne2ps2bf16_(op0,
-  operands[3],
-  operands[2]));
-  emit_move_insn (operands[0], lowpart_subreg (mode, op0,
-  mode));
-  DONE;
-}
-[(set_attr "mode" "")])
diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c 
b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
index 5c65f2a98847..e504f3f4cd70 100755
--- a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
+++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
-/* { dg-final { scan-assembler-not "vpermi2b" } } */
-/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
+/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */
 
 typedef __bf16 v8bf __attribute__((vector_size(16)));
 typedef __bf16 v16bf __attribute__((vector_size(32)));


[gcc r15-2031] RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic

2024-07-14 Thread fengwang via Gcc-cvs
https://gcc.gnu.org/g:281f021ed4fbf9c2336048e34b6b40c6f7119baa

commit r15-2031-g281f021ed4fbf9c2336048e34b6b40c6f7119baa
Author: Feng Wang 
Date:   Mon Jun 17 01:59:57 2024 +

RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic

v3: Modify warning message in riscv.cc
v2: Rebase
Accroding to the intrinsic doc, the 'Zvfbfmin' and 'Zvfbfwma' intrinsic
functions are added by this patch.

Signed-off-by: Feng Wang 
gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc (class vfncvtbf16_f):
Add 'Zvfbfmin' intrinsic in bases.
(class vfwcvtbf16_f): Ditto.
(class vfwmaccbf16): Add 'Zvfbfwma' intrinsic in bases.
(BASE): Add BASE macro for 'Zvfbfmin' and 'Zvfbfwma'.
* config/riscv/riscv-vector-builtins-bases.h: Add declaration for 
'Zvfbfmin' and 'Zvfbfwma'.
* config/riscv/riscv-vector-builtins-functions.def 
(REQUIRED_EXTENSIONS):
Add builtins def for 'Zvfbfmin' and 'Zvfbfwma'.
(vfncvtbf16_f): Ditto.
(vfncvtbf16_f_frm): Ditto.
(vfwcvtbf16_f): Ditto.
(vfwmaccbf16): Ditto.
(vfwmaccbf16_frm): Ditto.
* config/riscv/riscv-vector-builtins-shapes.cc (supports_vectype_p):
Add vector intrinsic build judgment for BFloat16.
(build_all): Ditto.
(BASE_NAME_MAX_LEN): Adjust max length.
* config/riscv/riscv-vector-builtins-types.def (DEF_RVV_F32_OPS):
Add new operand type for BFloat16.
(vfloat32mf2_t): Ditto.
(vfloat32m1_t): Ditto.
(vfloat32m2_t): Ditto.
(vfloat32m4_t): Ditto.
(vfloat32m8_t): Ditto.
* config/riscv/riscv-vector-builtins.cc (DEF_RVV_F32_OPS): Ditto.
(validate_instance_type_required_extensions):
Add required_ext checking for 'Zvfbfmin' and 'Zvfbfwma'.
* config/riscv/riscv-vector-builtins.h (enum required_ext):
Add required_ext declaration for 'Zvfbfmin' and 'Zvfbfwma'.
(reqired_ext_to_isa_name): Ditto.
(required_extensions_specified): Ditto.
(struct function_group_info): Add match case for 'Zvfbfmin' and 
'Zvfbfwma'.
* config/riscv/riscv.cc (riscv_validate_vector_type):
Add required_ext checking for 'Zvfbfmin' and 'Zvfbfwma'.

Diff:
---
 gcc/config/riscv/riscv-vector-builtins-bases.cc| 69 ++
 gcc/config/riscv/riscv-vector-builtins-bases.h |  7 +++
 .../riscv/riscv-vector-builtins-functions.def  | 15 +
 gcc/config/riscv/riscv-vector-builtins-shapes.cc   | 31 +-
 gcc/config/riscv/riscv-vector-builtins-types.def   | 13 
 gcc/config/riscv/riscv-vector-builtins.cc  | 67 +
 gcc/config/riscv/riscv-vector-builtins.h   | 34 +++
 gcc/config/riscv/riscv.cc  | 13 ++--
 8 files changed, 232 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 6483faba39c4..193392fbcc2a 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -2417,6 +2417,60 @@ public:
   }
 };
 
+/* Implements vfncvtbf16_f. */
+template 
+class vfncvtbf16_f : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override
+  {
+return FRM_OP == HAS_FRM;
+  }
+
+  bool may_require_frm_p () const override { return true; }
+
+  rtx expand (function_expander &e) const override
+  {
+return e.use_exact_insn (code_for_pred_trunc_to_bf16 (e.vector_mode ()));
+  }
+};
+
+/* Implements vfwcvtbf16_f. */
+class vfwcvtbf16_f : public function_base
+{
+public:
+  rtx expand (function_expander &e) const override
+  {
+return e.use_exact_insn (code_for_pred_extend_bf16_to (e.vector_mode ()));
+  }
+};
+
+/* Implements vfwmaccbf16. */
+template 
+class vfwmaccbf16 : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override
+  {
+return FRM_OP == HAS_FRM;
+  }
+
+  bool may_require_frm_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_bf16_mul_scalar (e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_bf16_mul (e.vector_mode ()));
+gcc_unreachable ();
+  }
+};
+
 static CONSTEXPR const vsetvl vsetvl_obj;
 static CONSTEXPR const vsetvl vsetvlmax_obj;
 static CONSTEXPR const loadstore vle_obj;
@@ -2734,6 +2788,14 @@ static CONSTEXPR const crypto_vv   
vsm4r_obj;
 static CONSTEXPR const vsm3me vsm3me_obj;
 static CONSTEXPR const vaeskf2_vsm3c   vsm3c_obj;
 
+/* Zvfbfmin */
+static CONSTEXPR const vfncvtbf16_f vf

[gcc r15-2032] RISC-V: Add md files for vector BFloat16

2024-07-14 Thread fengwang via Gcc-cvs
https://gcc.gnu.org/g:9f521632dd9ce71ce28ff1da9c161f76bc20fe3e

commit r15-2032-g9f521632dd9ce71ce28ff1da9c161f76bc20fe3e
Author: Feng Wang 
Date:   Tue Jun 18 06:13:35 2024 +

RISC-V: Add md files for vector BFloat16

V3: Add Bfloat16 vector insn in generic-vector-ooo.md
v2: Rebase
Accroding to the BFloat16 spec, some vector iterators and new pattern
are added in md files.

Signed-off-by: Feng Wang 
gcc/ChangeLog:

* config/riscv/generic-vector-ooo.md: Add def_insn_reservation for 
vector BFloat16.
* config/riscv/riscv.md: Add new insn name for vector BFloat16.
* config/riscv/vector-iterators.md: Add some iterators for vector 
BFloat16.
* config/riscv/vector.md: Add some attribute for vector BFloat16.
* config/riscv/vector-bfloat16.md: New file. Add insn pattern 
vector BFloat16.

Diff:
---
 gcc/config/riscv/generic-vector-ooo.md |   4 +-
 gcc/config/riscv/riscv.md  |  13 ++-
 gcc/config/riscv/vector-bfloat16.md| 135 ++
 gcc/config/riscv/vector-iterators.md   | 169 -
 gcc/config/riscv/vector.md | 103 +---
 5 files changed, 407 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/generic-vector-ooo.md 
b/gcc/config/riscv/generic-vector-ooo.md
index 5e933c838418..efe6bc41e864 100644
--- a/gcc/config/riscv/generic-vector-ooo.md
+++ b/gcc/config/riscv/generic-vector-ooo.md
@@ -53,7 +53,7 @@
 (define_insn_reservation "vec_fcmp" 3
   (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\
vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
-   vfncvtftoi,vfncvtftof")
+   vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16")
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector integer multiplication.
@@ -69,7 +69,7 @@
 
 ;; Vector float multiplication and FMA.
 (define_insn_reservation "vec_fmul" 6
-  (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd")
+  (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16")
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, assumed to be a generic operation for now.
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 5dee837a5878..379015c60de8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -200,6 +200,7 @@
   RVVMF64BI,RVVMF32BI,RVVMF16BI,RVVMF8BI,RVVMF4BI,RVVMF2BI,RVVM1BI,
   RVVM8QI,RVVM4QI,RVVM2QI,RVVM1QI,RVVMF2QI,RVVMF4QI,RVVMF8QI,
   RVVM8HI,RVVM4HI,RVVM2HI,RVVM1HI,RVVMF2HI,RVVMF4HI,
+  RVVM8BF,RVVM4BF,RVVM2BF,RVVM1BF,RVVMF2BF,RVVMF4BF,
   RVVM8HF,RVVM4HF,RVVM2HF,RVVM1HF,RVVMF2HF,RVVMF4HF,
   RVVM8SI,RVVM4SI,RVVM2SI,RVVM1SI,RVVMF2SI,
   RVVM8SF,RVVM4SF,RVVM2SF,RVVM1SF,RVVMF2SF,
@@ -219,6 +220,11 @@
   RVVM2x4HI,RVVM1x4HI,RVVMF2x4HI,RVVMF4x4HI,
   RVVM2x3HI,RVVM1x3HI,RVVMF2x3HI,RVVMF4x3HI,
   RVVM4x2HI,RVVM2x2HI,RVVM1x2HI,RVVMF2x2HI,RVVMF4x2HI,
+  RVVM1x8BF,RVVMF2x8BF,RVVMF4x8BF,RVVM1x7BF,RVVMF2x7BF,
+  RVVMF4x7BF,RVVM1x6BF,RVVMF2x6BF,RVVMF4x6BF,RVVM1x5BF,
+  RVVMF2x5BF,RVVMF4x5BF,RVVM2x4BF,RVVM1x4BF,RVVMF2x4BF,
+  RVVMF4x4BF,RVVM2x3BF,RVVM1x3BF,RVVMF2x3BF,RVVMF4x3BF,
+  RVVM4x2BF,RVVM2x2BF,RVVM1x2BF,RVVMF2x2BF,RVVMF4x2BF,
   RVVM1x8HF,RVVMF2x8HF,RVVMF4x8HF,RVVM1x7HF,RVVMF2x7HF,
   RVVMF4x7HF,RVVM1x6HF,RVVMF2x6HF,RVVMF4x6HF,RVVM1x5HF,
   RVVMF2x5HF,RVVMF4x5HF,RVVM2x4HF,RVVM1x4HF,RVVMF2x4HF,
@@ -462,6 +468,10 @@
 ;; vsm4rcrypto vector SM4 Rounds instructions
 ;; vsm3me   crypto vector SM3 Message Expansion instructions
 ;; vsm3ccrypto vector SM3 Compression instructions
+;; 18.Vector BF16 instrctions
+;; vfncvtbf16  vector narrowing single floating-point to brain floating-point 
instruction
+;; vfwcvtbf16  vector widening brain floating-point to single floating-point 
instruction
+;; vfwmaccbf16  vector BF16 widening multiply-accumulate
 (define_attr "type"
   "unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore,
mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
@@ -483,7 +493,7 @@
vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,

vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,

vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
-   vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c"
+   
vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16"
   (cond [(eq_attr "got" "load") (const_string "load")
 
 ;; If a doubleword move uses these expensive instructions,
@@ -4373,6 +4383,7 @@
 (include "generic-ooo.md")
 (include "vector.md")
 (include "vector-crypto.md")
+(include "vector-bfloat16.md")
 (include "zicond.md")
 (include "sfb.md")
 (include "zc.md")
diff --git a/gcc/config/riscv/vector-bfloat16.md 
b/gcc/config/riscv/vector-bfloat16.md
new file mode 100644
index ..562aa8ee5ed7
--- /dev/null
+++ b/gcc/config/riscv/vector-bfloat16.md
@@ -0,0 +1,135 @@
+;; Machine des

[gcc r15-2033] CRIS: Adjust gcc.dg/tree-ssa/loop-1.c

2024-07-14 Thread Hans-Peter Nilsson via Gcc-cvs
https://gcc.gnu.org/g:da37a272beceacb362373a9eab1e915db587be9e

commit r15-2033-gda37a272beceacb362373a9eab1e915db587be9e
Author: Hans-Peter Nilsson 
Date:   Mon Jul 15 04:57:06 2024 +0200

CRIS: Adjust gcc.dg/tree-ssa/loop-1.c

With r15-1619-g3b9b8d6cfdf593, there's a XPASS and a FAIL
for this test-case for cris-elf.  Looking at the generated
code, _foo is indeed no longer saved in a register for CRIS.
While that looks like a regression, coremark results are the
same around this revision, so simply adjust the test-case:
remove the target-specific exceptions for cris-*-*.

* gcc.dg/tree-ssa/loop-1.c: Remove target-specific test
and xfail to adjust for recent changes in register allocation.

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/loop-1.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c
index a531b7584a64..a8f2c3bbfdb4 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c
@@ -43,16 +43,15 @@ int xxx(void)
 /* The SH targets always use separate instructions to load the address
and to do the actual call - bsr is only generated by link time
relaxation.  */
-/* CRIS and MSP430 keep the address in a register.  */
+/* MSP430 keeps the address in a register.  */
 /* m68k sometimes puts the address in a register, depending on CPU and PIC.  */
 
-/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* 
sh*-*-* cris-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* 
visium-*-* nvptx*-*-* pdp11*-*-* msp430-*-* amdgcn*-*-* } } } */
+/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* 
sh*-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* 
visium-*-* nvptx*-*-* pdp11*-*-* msp430-*-* amdgcn*-*-* } } } */
 /* { dg-final { scan-assembler-times "foo,%r" 5 { target hppa*-*-* } } } */
 /* { dg-final { scan-assembler-times "= foo"  5 { target ia64*-*-* } } } */
 /* { dg-final { scan-assembler-times "call\[ \t\]*_foo" 5 { target 
i?86-*-mingw* i?86-*-cygwin* } } } */
 /* { dg-final { scan-assembler-times "call\[ \t\]*foo" 5 { target 
x86_64-*-mingw* } } } */
 /* { dg-final { scan-assembler-times "jsr|bsrf|blink\ttr?,r18"  5 { target 
sh*-*-* } } } */
-/* { dg-final { scan-assembler-times "Jsr \\\$r" 5 { target cris-*-* } } } */
 /* { dg-final { scan-assembler-times "\[jb\]sr" 5 { target fido-*-* m68k-*-* 
pdp11-*-* } } } */
 /* { dg-final { scan-assembler-times "bra *tr,r\[1-9\]*,r21" 5 { target 
visium-*-* } } } */
 /* { dg-final { scan-assembler-times "(?n)\[ \t\]call\[ \t\].*\[ \t\]foo," 5 { 
target nvptx*-*-* } } } */


[gcc r15-2034] aarch64: Fix the expected output of the test cpy_1.c [PR115892]

2024-07-14 Thread Surya Kumari Jangala via Gcc-cvs
https://gcc.gnu.org/g:8b1492012e5a11e9400e30ee4ae9195c08a2a81e

commit r15-2034-g8b1492012e5a11e9400e30ee4ae9195c08a2a81e
Author: Surya Kumari Jangala 
Date:   Thu Jul 11 11:02:17 2024 -0500

aarch64: Fix the expected output of the test cpy_1.c [PR115892]

The fix at r15-1619-g3b9b8d6cfdf593 results in a rearrangement of
instructions generated for cpy_1.c. This patch fixes the expected output.

2024-07-12  Surya Kumari Jangala  

gcc/testsuite:
PR testsuite/115892
* gcc.target/aarch64/sve/acle/general/cpy_1.c: Update expected
output.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c
index 57b56a7e256f..1d669913df2e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c
@@ -11,9 +11,15 @@ extern "C" {
 /*
 ** dup_x0_m:
 ** ...
+** (
 ** add (x[0-9]+), x0, #?1
 ** mov (p[0-7])\.b, p15\.b
 ** mov z0\.d, \2/m, \1
+** |
+** mov (p[0-7])\.b, p15\.b
+** add (x[0-9]+), x0, #?1
+** mov z0\.d, \3/m, \4
+** )
 ** ...
 ** ret
 */


[gcc r12-10617] Fix SSA_NAME leak due to def_stmt is removed before use_stmt.

2024-07-14 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:e1427b39d28f382d21e7a0ea1714b3250e0a6e5d

commit r12-10617-ge1427b39d28f382d21e7a0ea1714b3250e0a6e5d
Author: liuhongt 
Date:   Fri Jul 12 09:39:23 2024 +0800

Fix SSA_NAME leak due to def_stmt is removed before use_stmt.

-  _5 = __atomic_fetch_or_8 (&set_work_pending_p, 1, 0);
-  # DEBUG old => (long int) _5
+  _6 = .ATOMIC_BIT_TEST_AND_SET (&set_work_pending_p, 0, 1, 0, 
__atomic_fetch_or_8);
+  # DEBUG old => NULL
   # DEBUG BEGIN_STMT
-  # DEBUG D#2 => _5 & 1
+  # DEBUG D#2 => NULL
...
-  _10 = ~_5;
-  _8 = (_Bool) _10;
-  # DEBUG ret => _8
+  _8 = _6 == 0;
+  # DEBUG ret => (_Bool) _10

confirmed.  convert_atomic_bit_not does this, it checks for single_use
and removes the def, failing to release the name (which would fix this up
IIRC).

Note the function removes stmts in "wrong" order (before uses of LHS
are removed), so it requires larger surgery.  And it leaks SSA names.

gcc/ChangeLog:

PR target/115872
* tree-ssa-ccp.cc (convert_atomic_bit_not): Remove use_stmt after 
use_nop_stmt is removed.
(optimize_atomic_bit_test_and): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr115872.c: New test.

(cherry picked from commit a8209237dc46dc4db7d9d8e3807e6c93734c64b5)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr115872.c | 16 
 gcc/tree-ssa-ccp.cc  | 12 
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr115872.c 
b/gcc/testsuite/gcc.target/i386/pr115872.c
new file mode 100644
index ..937004456d37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115872.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g" } */
+
+long set_work_pending_p;
+_Bool set_work_pending() {
+  _Bool __trans_tmp_1;
+  long mask = 1, old = __atomic_fetch_or(&set_work_pending_p, mask, 0);
+  __trans_tmp_1 = old & mask;
+  return !__trans_tmp_1;
+}
+void __queue_work() {
+  _Bool ret = set_work_pending();
+  if (ret)
+__queue_work();
+}
+
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 42a02dccaeb1..3c63f2dd8a3b 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -3306,9 +3306,10 @@ convert_atomic_bit_not (enum internal_fn fn, gimple 
*use_stmt,
 return nullptr;
 
   gimple_stmt_iterator gsi;
-  gsi = gsi_for_stmt (use_stmt);
-  gsi_remove (&gsi, true);
   tree var = make_ssa_name (TREE_TYPE (lhs));
+  /* use_stmt need to be removed after use_nop_stmt,
+ so use_lhs can be released.  */
+  gimple *use_stmt_removal = use_stmt;
   use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
   gsi = gsi_for_stmt (use_not_stmt);
   gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
@@ -3318,6 +3319,8 @@ convert_atomic_bit_not (enum internal_fn fn, gimple 
*use_stmt,
   gsi_insert_after (&gsi, g, GSI_NEW_STMT);
   gsi = gsi_for_stmt (use_not_stmt);
   gsi_remove (&gsi, true);
+  gsi = gsi_for_stmt (use_stmt_removal);
+  gsi_remove (&gsi, true);
   return use_stmt;
 }
 
@@ -3569,8 +3572,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   */
}
  var = make_ssa_name (TREE_TYPE (use_rhs));
- gsi = gsi_for_stmt (use_stmt);
- gsi_remove (&gsi, true);
+ gimple* use_stmt_removal = use_stmt;
  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
   and_mask);
  gsi = gsi_for_stmt (use_nop_stmt);
@@ -3584,6 +3586,8 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
  gsi = gsi_for_stmt (use_nop_stmt);
  gsi_remove (&gsi, true);
+ gsi = gsi_for_stmt (use_stmt_removal);
+ gsi_remove (&gsi, true);
}
}
  else


[gcc r13-8913] Fix SSA_NAME leak due to def_stmt is removed before use_stmt.

2024-07-14 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:9a1cdaa5e8441394d613f5f3401e7aab21efe8f0

commit r13-8913-g9a1cdaa5e8441394d613f5f3401e7aab21efe8f0
Author: liuhongt 
Date:   Fri Jul 12 09:39:23 2024 +0800

Fix SSA_NAME leak due to def_stmt is removed before use_stmt.

-  _5 = __atomic_fetch_or_8 (&set_work_pending_p, 1, 0);
-  # DEBUG old => (long int) _5
+  _6 = .ATOMIC_BIT_TEST_AND_SET (&set_work_pending_p, 0, 1, 0, 
__atomic_fetch_or_8);
+  # DEBUG old => NULL
   # DEBUG BEGIN_STMT
-  # DEBUG D#2 => _5 & 1
+  # DEBUG D#2 => NULL
...
-  _10 = ~_5;
-  _8 = (_Bool) _10;
-  # DEBUG ret => _8
+  _8 = _6 == 0;
+  # DEBUG ret => (_Bool) _10

confirmed.  convert_atomic_bit_not does this, it checks for single_use
and removes the def, failing to release the name (which would fix this up
IIRC).

Note the function removes stmts in "wrong" order (before uses of LHS
are removed), so it requires larger surgery.  And it leaks SSA names.

gcc/ChangeLog:

PR target/115872
* tree-ssa-ccp.cc (convert_atomic_bit_not): Remove use_stmt after 
use_nop_stmt is removed.
(optimize_atomic_bit_test_and): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr115872.c: New test.

(cherry picked from commit a8209237dc46dc4db7d9d8e3807e6c93734c64b5)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr115872.c | 16 
 gcc/tree-ssa-ccp.cc  | 12 
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr115872.c 
b/gcc/testsuite/gcc.target/i386/pr115872.c
new file mode 100644
index ..937004456d37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115872.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g" } */
+
+long set_work_pending_p;
+_Bool set_work_pending() {
+  _Bool __trans_tmp_1;
+  long mask = 1, old = __atomic_fetch_or(&set_work_pending_p, mask, 0);
+  __trans_tmp_1 = old & mask;
+  return !__trans_tmp_1;
+}
+void __queue_work() {
+  _Bool ret = set_work_pending();
+  if (ret)
+__queue_work();
+}
+
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index 2e552b330b74..6c9da603ef95 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -3321,9 +3321,10 @@ convert_atomic_bit_not (enum internal_fn fn, gimple 
*use_stmt,
 return nullptr;
 
   gimple_stmt_iterator gsi;
-  gsi = gsi_for_stmt (use_stmt);
-  gsi_remove (&gsi, true);
   tree var = make_ssa_name (TREE_TYPE (lhs));
+  /* use_stmt need to be removed after use_nop_stmt,
+ so use_lhs can be released.  */
+  gimple *use_stmt_removal = use_stmt;
   use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
   gsi = gsi_for_stmt (use_not_stmt);
   gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
@@ -,6 +3334,8 @@ convert_atomic_bit_not (enum internal_fn fn, gimple 
*use_stmt,
   gsi_insert_after (&gsi, g, GSI_NEW_STMT);
   gsi = gsi_for_stmt (use_not_stmt);
   gsi_remove (&gsi, true);
+  gsi = gsi_for_stmt (use_stmt_removal);
+  gsi_remove (&gsi, true);
   return use_stmt;
 }
 
@@ -3635,8 +3638,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   */
}
  var = make_ssa_name (TREE_TYPE (use_rhs));
- gsi = gsi_for_stmt (use_stmt);
- gsi_remove (&gsi, true);
+ gimple* use_stmt_removal = use_stmt;
  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
   and_mask);
  gsi = gsi_for_stmt (use_nop_stmt);
@@ -3653,6 +3655,8 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
  gsi = gsi_for_stmt (use_nop_stmt);
  gsi_remove (&gsi, true);
+ gsi = gsi_for_stmt (use_stmt_removal);
+ gsi_remove (&gsi, true);
}
}
  else


[gcc r15-2035] RISC-V: Implement locality for __builtin_prefetch

2024-07-14 Thread Monk Chiang via Gcc-cvs
https://gcc.gnu.org/g:bf26413fc4081dfd18b915580b35bdb71481327e

commit r15-2035-gbf26413fc4081dfd18b915580b35bdb71481327e
Author: Monk Chiang 
Date:   Thu Jul 6 14:05:17 2023 +0800

RISC-V: Implement locality for __builtin_prefetch

The patch add the Zihintntl instructions in the prefetch pattern.
Zicbop has prefetch instructions. Zihintntl has NTL instructions.
Insert NTL instructions before prefetch instruction, if target
has Zihintntl extension.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_print_operand): Add 'L' letter
to print zihintntl instructions string.
* config/riscv/riscv.md (prefetch): Add zihintntl instructions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/prefetch-zicbop.c: New test.
* gcc.target/riscv/prefetch-zihintntl.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc  | 22 ++
 gcc/config/riscv/riscv.md  | 10 +++---
 gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c   | 20 
 .../gcc.target/riscv/prefetch-zihintntl.c  | 20 
 4 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 53ab2f1a8814..084a592a313c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6488,6 +6488,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char 
*p)
'A' Print the atomic operation suffix for memory model OP.
'I' Print the LR suffix for memory model OP.
'J' Print the SC suffix for memory model OP.
+   'L' Print a non-temporal locality hints instruction.
'z' Print x0 if OP is zero, otherwise print OP normally.
'i' Print i if the operand is not a register.
'S' Print shift-index of single-bit mask OP.
@@ -6682,6 +6683,27 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 }
 
+case 'L':
+  {
+   const char *ntl_hint = NULL;
+   switch (INTVAL (op))
+ {
+ case 0:
+   ntl_hint = "ntl.all";
+   break;
+ case 1:
+   ntl_hint = "ntl.pall";
+   break;
+ case 2:
+   ntl_hint = "ntl.p1";
+   break;
+ }
+
+  if (ntl_hint)
+   asm_fprintf (file, "%s\n\t", ntl_hint);
+  break;
+  }
+
 case 'i':
   if (code != REG)
 fputs ("i", file);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 379015c60de8..46c46039c33a 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4113,12 +4113,16 @@
 {
   switch (INTVAL (operands[1]))
   {
-case 0: return "prefetch.r\t%a0";
-case 1: return "prefetch.w\t%a0";
+case 0: return TARGET_ZIHINTNTL ? "%L2prefetch.r\t%a0" : "prefetch.r\t%a0";
+case 1: return TARGET_ZIHINTNTL ? "%L2prefetch.w\t%a0" : "prefetch.w\t%a0";
 default: gcc_unreachable ();
   }
 }
-  [(set_attr "type" "store")])
+  [(set_attr "type" "store")
+   (set (attr "length") (if_then_else (and (match_test "TARGET_ZIHINTNTL")
+  (match_test "IN_RANGE (INTVAL 
(operands[2]), 0, 2)"))
+ (const_string "8")
+ (const_string "4")))])
 
 (define_insn "riscv_prefetchi_"
   [(unspec_volatile:X [(match_operand:X 0 "address_operand" "r")
diff --git a/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c 
b/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c
new file mode 100644
index ..0faa120f1f79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c
@@ -0,0 +1,20 @@
+/* { dg-do compile target { { rv64-*-*}}} */
+/* { dg-options "-march=rv64gc_zicbop -mabi=lp64" } */
+
+void foo (char *p)
+{
+  __builtin_prefetch (p, 0, 0);
+  __builtin_prefetch (p, 0, 1);
+  __builtin_prefetch (p, 0, 2);
+  __builtin_prefetch (p, 0, 3);
+  __builtin_prefetch (p, 1, 0);
+  __builtin_prefetch (p, 1, 1);
+  __builtin_prefetch (p, 1, 2);
+  __builtin_prefetch (p, 1, 3);
+}
+
+/* { dg-final { scan-assembler-not "ntl.all\t" } } */
+/* { dg-final { scan-assembler-not "ntl.pall\t" } } */
+/* { dg-final { scan-assembler-not "ntl.p1\t" } } */
+/* { dg-final { scan-assembler-times "prefetch.r" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.w" 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c 
b/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c
new file mode 100644
index ..78a3afe68333
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c
@@ -0,0 +1,20 @@
+/* { dg-do compile target { { rv64-*-*}}} */
+/* { dg-options "-march=rv64gc_zicbop_zihintntl -mabi=lp64" } */
+
+void foo (char *p)
+{
+  __builtin_prefetch (p, 0, 0);
+  __builtin_prefetch (p, 0, 1);
+  __builtin_prefetch (p, 0, 2);
+  __builtin_prefetch (p, 0, 3);
+  __builtin_prefetch (p, 1, 0);
+  __builtin_prefetch (p, 1, 1);
+  __builtin_prefetch (p, 1, 2);
+ 

[gcc r14-10422] Fix SSA_NAME leak due to def_stmt is removed before use_stmt.

2024-07-14 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:13bfc385b0baebd22aeabb0d90915f2e9b18febe

commit r14-10422-g13bfc385b0baebd22aeabb0d90915f2e9b18febe
Author: liuhongt 
Date:   Fri Jul 12 09:39:23 2024 +0800

Fix SSA_NAME leak due to def_stmt is removed before use_stmt.

-  _5 = __atomic_fetch_or_8 (&set_work_pending_p, 1, 0);
-  # DEBUG old => (long int) _5
+  _6 = .ATOMIC_BIT_TEST_AND_SET (&set_work_pending_p, 0, 1, 0, 
__atomic_fetch_or_8);
+  # DEBUG old => NULL
   # DEBUG BEGIN_STMT
-  # DEBUG D#2 => _5 & 1
+  # DEBUG D#2 => NULL
...
-  _10 = ~_5;
-  _8 = (_Bool) _10;
-  # DEBUG ret => _8
+  _8 = _6 == 0;
+  # DEBUG ret => (_Bool) _10

confirmed.  convert_atomic_bit_not does this, it checks for single_use
and removes the def, failing to release the name (which would fix this up
IIRC).

Note the function removes stmts in "wrong" order (before uses of LHS
are removed), so it requires larger surgery.  And it leaks SSA names.

gcc/ChangeLog:

PR target/115872
* tree-ssa-ccp.cc (convert_atomic_bit_not): Remove use_stmt after 
use_nop_stmt is removed.
(optimize_atomic_bit_test_and): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr115872.c: New test.

(cherry picked from commit a8209237dc46dc4db7d9d8e3807e6c93734c64b5)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr115872.c | 16 
 gcc/tree-ssa-ccp.cc  | 12 
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr115872.c 
b/gcc/testsuite/gcc.target/i386/pr115872.c
new file mode 100644
index ..937004456d37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115872.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g" } */
+
+long set_work_pending_p;
+_Bool set_work_pending() {
+  _Bool __trans_tmp_1;
+  long mask = 1, old = __atomic_fetch_or(&set_work_pending_p, mask, 0);
+  __trans_tmp_1 = old & mask;
+  return !__trans_tmp_1;
+}
+void __queue_work() {
+  _Bool ret = set_work_pending();
+  if (ret)
+__queue_work();
+}
+
diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc
index f6a5cd0ee6e0..cc78ff20bb81 100644
--- a/gcc/tree-ssa-ccp.cc
+++ b/gcc/tree-ssa-ccp.cc
@@ -3331,9 +3331,10 @@ convert_atomic_bit_not (enum internal_fn fn, gimple 
*use_stmt,
 return nullptr;
 
   gimple_stmt_iterator gsi;
-  gsi = gsi_for_stmt (use_stmt);
-  gsi_remove (&gsi, true);
   tree var = make_ssa_name (TREE_TYPE (lhs));
+  /* use_stmt need to be removed after use_nop_stmt,
+ so use_lhs can be released.  */
+  gimple *use_stmt_removal = use_stmt;
   use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask);
   gsi = gsi_for_stmt (use_not_stmt);
   gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT);
@@ -3343,6 +3344,8 @@ convert_atomic_bit_not (enum internal_fn fn, gimple 
*use_stmt,
   gsi_insert_after (&gsi, g, GSI_NEW_STMT);
   gsi = gsi_for_stmt (use_not_stmt);
   gsi_remove (&gsi, true);
+  gsi = gsi_for_stmt (use_stmt_removal);
+  gsi_remove (&gsi, true);
   return use_stmt;
 }
 
@@ -3645,8 +3648,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
   */
}
  var = make_ssa_name (TREE_TYPE (use_rhs));
- gsi = gsi_for_stmt (use_stmt);
- gsi_remove (&gsi, true);
+ gimple* use_stmt_removal = use_stmt;
  g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs,
   and_mask);
  gsi = gsi_for_stmt (use_nop_stmt);
@@ -3663,6 +3665,8 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
  gsi = gsi_for_stmt (use_nop_stmt);
  gsi_remove (&gsi, true);
+ gsi = gsi_for_stmt (use_stmt_removal);
+ gsi_remove (&gsi, true);
}
}
  else


[gcc r15-2036] arm: Fix the expected output of the test pr111235.c [PR115894]

2024-07-14 Thread Surya Kumari Jangala via Gcc-cvs
https://gcc.gnu.org/g:60ba989220d9dec07d82009b0dafe684e652577f

commit r15-2036-g60ba989220d9dec07d82009b0dafe684e652577f
Author: Surya Kumari Jangala 
Date:   Mon Jul 15 00:03:06 2024 -0500

arm: Fix the expected output of the test pr111235.c  [PR115894]

With r15-1619-g3b9b8d6cfdf593, pr111235.c fails due to different
registers used in ldrexd instruction. The key part of this test is that
the compiler generates LDREXD. The registers used for that are pretty
much irrelevant as they are not matched with any other operations within
the test. This patch changes the test to test only for the mnemonic and
not for any of the operands.

2024-07-15  Surya Kumari Jangala  

gcc/testsuite:
PR testsuite/115894
* gcc.target/arm/pr111235.c: Update expected output.

Diff:
---
 gcc/testsuite/gcc.target/arm/pr111235.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr111235.c 
b/gcc/testsuite/gcc.target/arm/pr111235.c
index b06a5bfb8e29..1f732cab983a 100644
--- a/gcc/testsuite/gcc.target/arm/pr111235.c
+++ b/gcc/testsuite/gcc.target/arm/pr111235.c
@@ -31,7 +31,7 @@ void t3 (long long *p, int x)
 atomic_store_explicit (p, x, memory_order_relaxed);
 }
 
-/* { dg-final { scan-assembler-times "ldrexd\tr\[0-9\]+, r\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "ldrexd\t" 2 } } */
 /* { dg-final { scan-assembler-not "ldrgt" } } */
 /* { dg-final { scan-assembler-not "ldrdgt" } } */
 /* { dg-final { scan-assembler-not "ldrexdgt" } } */