[gcc] Created branch 'mikael/heads/add_scalar_mask_code_gcc14_v01' in namespace 'refs/users'
The branch 'mikael/heads/add_scalar_mask_code_gcc14_v01' was created in namespace 'refs/users' pointing to: 4032ccc4713a... fortran: Correctly evaluate scalar MASK arguments of MINLOC
[gcc(refs/users/mikael/heads/add_scalar_mask_code_gcc14_v01)] fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC
https://gcc.gnu.org/g:4032ccc4713a5d75c02d00432d4cf1dee88dcd12 commit 4032ccc4713a5d75c02d00432d4cf1dee88dcd12 Author: Mikael Morin Date: Sat Jul 13 20:21:20 2024 +0200 fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC Add the preliminary code that the generated expression for MASK may depend on when generating the inline code to evaluate MINLOC or MAXLOC with a scalar MASK. The generated code was only keeping the generated expression but not the preliminary code, which was sufficient for simple cases such as data references or simple (scalar) function calls, but was bogus with more complicated ones. gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Add the preliminary code generated for MASK to the preliminary code of MINLOC/MAXLOC. gcc/testsuite/ChangeLog: * gfortran.dg/minmaxloc_17.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 1 + gcc/testsuite/gfortran.dg/minmaxloc_17.f90 | 33 ++ 2 files changed, 34 insertions(+) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 9ad372113b0c..5ef4f230472a 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5738,6 +5738,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_init_se (&maskse, NULL); gfc_conv_expr_val (&maskse, maskexpr); + gfc_add_block_to_block (&se->pre, &maskse.pre); gfc_init_block (&block); gfc_add_block_to_block (&block, &loop.pre); gfc_add_block_to_block (&block, &loop.post); diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 new file mode 100644 index ..7e6e586ab03f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 @@ -0,0 +1,33 @@ +! { dg-do run } +! +! Check that the code necessary to evaluate MINLOC's or MAXLOC's MASK +! argument is correctly generated. + +program p + implicit none + integer, parameter :: data10(*) = (/ 2, 5, 2, 0, 6, 5, 3, 6, 0, 1 /) + logical, parameter :: mask10(*) = (/ .false., .true., .false., & + .false., .true., .true., & + .true. , .true., .false., & + .false. /) + type bool_wrapper +logical :: l + end type + call check_minloc + call check_maxloc +contains + subroutine check_minloc +integer :: a(10) +integer :: r +a = data10 +r = minloc(a, dim = 1, mask = sum(a) > 0) +if (r /= 4) stop 11 + end subroutine + subroutine check_maxloc +integer :: a(10) +integer :: r +a = data10 +r = maxloc(a, dim = 1, mask = sum(a) > 0) +if (r /= 5) stop 18 + end subroutine +end program
[gcc] Created branch 'mikael/heads/add_code_scalar_mask_minmaxloc_v02' in namespace 'refs/users'
The branch 'mikael/heads/add_code_scalar_mask_minmaxloc_v02' was created in namespace 'refs/users' pointing to: 08267b90e326... fortran: Correctly evaluate scalar MASK arguments of MINLOC
[gcc(refs/users/mikael/heads/add_code_scalar_mask_minmaxloc_v02)] fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC
https://gcc.gnu.org/g:08267b90e3267faa744170c32a19a50435a622d4 commit 08267b90e3267faa744170c32a19a50435a622d4 Author: Mikael Morin Date: Sat Jul 13 20:21:20 2024 +0200 fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC Add the preliminary code that the generated expression for MASK may depend on when generating the inline code to evaluate MINLOC or MAXLOC with a scalar MASK. The generated code was only keeping the generated expression but not the preliminary code, which was sufficient for simple cases such as data references or simple (scalar) function calls, but was bogus with more complicated ones. gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Add the preliminary code generated for MASK to the preliminary code of MINLOC/MAXLOC. gcc/testsuite/ChangeLog: * gfortran.dg/minmaxloc_17.f90: New test. (cherry picked from commit d211100903d4d532d989451243ea00d7fa2e9d5e) Diff: --- gcc/fortran/trans-intrinsic.cc | 1 + gcc/testsuite/gfortran.dg/minmaxloc_17.f90 | 33 ++ 2 files changed, 34 insertions(+) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 9ad372113b0c..5ef4f230472a 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5738,6 +5738,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_init_se (&maskse, NULL); gfc_conv_expr_val (&maskse, maskexpr); + gfc_add_block_to_block (&se->pre, &maskse.pre); gfc_init_block (&block); gfc_add_block_to_block (&block, &loop.pre); gfc_add_block_to_block (&block, &loop.post); diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 new file mode 100644 index ..7e6e586ab03f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 @@ -0,0 +1,33 @@ +! { dg-do run } +! +! Check that the code necessary to evaluate MINLOC's or MAXLOC's MASK +! argument is correctly generated. + +program p + implicit none + integer, parameter :: data10(*) = (/ 2, 5, 2, 0, 6, 5, 3, 6, 0, 1 /) + logical, parameter :: mask10(*) = (/ .false., .true., .false., & + .false., .true., .true., & + .true. , .true., .false., & + .false. /) + type bool_wrapper +logical :: l + end type + call check_minloc + call check_maxloc +contains + subroutine check_minloc +integer :: a(10) +integer :: r +a = data10 +r = minloc(a, dim = 1, mask = sum(a) > 0) +if (r /= 4) stop 11 + end subroutine + subroutine check_maxloc +integer :: a(10) +integer :: r +a = data10 +r = maxloc(a, dim = 1, mask = sum(a) > 0) +if (r /= 5) stop 18 + end subroutine +end program
[gcc r15-2026] c, objc: Add -Wunterminated-string-initialization
https://gcc.gnu.org/g:44c9403ed1833ae71a59e84f9e37af3182be0df5 commit r15-2026-g44c9403ed1833ae71a59e84f9e37af3182be0df5 Author: Alejandro Colomar Date: Sat Jun 29 15:10:43 2024 +0200 c, objc: Add -Wunterminated-string-initialization Warn about the following: char s[3] = "foo"; Initializing a char array with a string literal of the same length as the size of the array is usually a mistake. Rarely is the case where one wants to create a non-terminated character sequence from a string literal. In some cases, for writing faster code, one may want to use arrays instead of pointers, since that removes the need for storing an array of pointers apart from the strings themselves. char *log_levels[] = { "info", "warning", "err" }; vs. char log_levels[][7] = { "info", "warning", "err" }; This forces the programmer to specify a size, which might change if a new entry is later added. Having no way to enforce null termination is very dangerous, however, so it is useful to have a warning for this, so that the compiler can make sure that the programmer didn't make any mistakes. This warning catches the bug above, so that the programmer will be able to fix it and write: char log_levels[][8] = { "info", "warning", "err" }; This warning already existed as part of -Wc++-compat, but this patch allows enabling it separately. It is also included in -Wextra, since it may not always be desired (when unterminated character sequences are wanted), but it's likely to be desired in most cases. Since Wc++-compat now includes this warning, the test has to be modified to expect the text of the new warning too, in . Link: https://lists.gnu.org/archive/html/groff/2022-11/msg00059.html Link: https://lists.gnu.org/archive/html/groff/2022-11/msg00063.html Link: https://inbox.sourceware.org/gcc/36da94eb-1cac-5ae8-7fea-ec66160cf...@gmail.com/T/ PR c/115185 gcc/c-family/ChangeLog: * c.opt: Add -Wunterminated-string-initialization. gcc/c/ChangeLog: * c-typeck.cc (digest_init): Separate warnings about character arrays being initialized as unterminated character sequences with string literals, from -Wc++-compat, into a new warning, -Wunterminated-string-initialization. gcc/ChangeLog: * doc/invoke.texi: Document the new -Wunterminated-string-initialization. gcc/testsuite/ChangeLog: * gcc.dg/Wcxx-compat-14.c: Adapt the test to match the new text of the warning, which doesn't say anything about C++ anymore. * gcc.dg/Wunterminated-string-initialization.c: New test. Acked-by: Doug McIlroy Acked-by: Mike Stump Reviewed-by: Sandra Loosemore Reviewed-by: Martin Uecker Signed-off-by: Alejandro Colomar Reviewed-by: Marek Polacek Diff: --- gcc/c-family/c.opt | 4 gcc/c/c-typeck.cc| 6 +++--- gcc/doc/invoke.texi | 20 +++- gcc/testsuite/gcc.dg/Wcxx-compat-14.c| 2 +- .../gcc.dg/Wunterminated-string-initialization.c | 6 ++ 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 5c1006ff321f..a52682d835ce 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1464,6 +1464,10 @@ Wunsuffixed-float-constants C ObjC Var(warn_unsuffixed_float_constants) Warning Warn about unsuffixed float constants. +Wunterminated-string-initialization +C ObjC Var(warn_unterminated_string_initialization) Warning LangEnabledBy(C ObjC,Wextra || Wc++-compat) +Warn about character arrays initialized as unterminated character sequences with a string literal. + Wunused C ObjC C++ ObjC++ LangEnabledBy(C ObjC C++ ObjC++,Wall) ; documented in common.opt diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 574114d541fd..7e0f01ed22b9 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -8878,11 +8878,11 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype, pedwarn_init (init_loc, 0, ("initializer-string for array of %qT " "is too long"), typ1); - else if (warn_cxx_compat + else if (warn_unterminated_string_initialization && compare_tree_int (TYPE_SIZE_UNIT (type), len) < 0) - warning_at (init_loc, OPT_Wc___compat, + warning_at (init_loc, OPT_Wunterminated_string_initialization, ("initializer-string for array of %qT " -"is too long for C++"), typ1); +"is too long"), typ1); if (compar
[gcc r15-2027] i386: Tweak i386-expand.cc to restore bootstrap on RHEL.
https://gcc.gnu.org/g:74e6dfb23163c2dd670d1d60fbf4c782e0b44b94 commit r15-2027-g74e6dfb23163c2dd670d1d60fbf4c782e0b44b94 Author: Roger Sayle Date: Sun Jul 14 17:22:27 2024 +0100 i386: Tweak i386-expand.cc to restore bootstrap on RHEL. This is a minor change to restore bootstrap on systems using gcc 4.8 as a host compiler. The fatal error is: In file included from gcc/gcc/coretypes.h:471:0, from gcc/gcc/config/i386/i386-expand.cc:23: gcc/gcc/config/i386/i386-expand.cc: In function 'void ix86_expand_fp_absneg_operator(rtx_code, machine_mode, rtx_def**)': ./insn-modes.h:315:75: error: temporary of non-literal type 'scalar_float_mode' in a constant expression #define HFmode (scalar_float_mode ((scalar_float_mode::from_int) E_HFmode)) ^ gcc/gcc/config/i386/i386-expand.cc:2179:8: note: in expansion of macro 'HFmode' case HFmode: ^ The solution is to use the E_?Fmode enumeration constants as case values in switch statements. 2024-07-14 Roger Sayle * config/i386/i386-expand.cc (ix86_expand_fp_absneg_operator): Use E_?Fmode enumeration constants in switch statement. (ix86_expand_copysign): Likewise. (ix86_expand_xorsign): Likewise. Diff: --- gcc/config/i386/i386-expand.cc | 26 +- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cfcfdd94e8f0..9a31e6df2aa2 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2176,19 +2176,19 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, switch (mode) { - case HFmode: + case E_HFmode: use_sse = true; vmode = V8HFmode; break; - case BFmode: + case E_BFmode: use_sse = true; vmode = V8BFmode; break; - case SFmode: + case E_SFmode: use_sse = TARGET_SSE_MATH && TARGET_SSE; vmode = V4SFmode; break; - case DFmode: + case E_DFmode: use_sse = TARGET_SSE_MATH && TARGET_SSE2; vmode = V2DFmode; break; @@ -2330,19 +2330,19 @@ ix86_expand_copysign (rtx operands[]) switch (mode) { - case HFmode: + case E_HFmode: vmode = V8HFmode; break; - case BFmode: + case E_BFmode: vmode = V8BFmode; break; - case SFmode: + case E_SFmode: vmode = V4SFmode; break; - case DFmode: + case E_DFmode: vmode = V2DFmode; break; - case TFmode: + case E_TFmode: vmode = mode; break; default: @@ -2410,16 +2410,16 @@ ix86_expand_xorsign (rtx operands[]) switch (mode) { - case HFmode: + case E_HFmode: vmode = V8HFmode; break; - case BFmode: + case E_BFmode: vmode = V8BFmode; break; - case SFmode: + case E_SFmode: vmode = V4SFmode; break; - case DFmode: + case E_DFmode: vmode = V2DFmode; break; default:
[gcc r14-10419] fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC
https://gcc.gnu.org/g:55988c48ead9adb6a11b0dffa60ce49bb542074e commit r14-10419-g55988c48ead9adb6a11b0dffa60ce49bb542074e Author: Mikael Morin Date: Sat Jul 13 20:21:20 2024 +0200 fortran: Correctly evaluate scalar MASK arguments of MINLOC/MAXLOC Add the preliminary code that the generated expression for MASK may depend on when generating the inline code to evaluate MINLOC or MAXLOC with a scalar MASK. The generated code was only keeping the generated expression but not the preliminary code, which was sufficient for simple cases such as data references or simple (scalar) function calls, but was bogus with more complicated ones. gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Add the preliminary code generated for MASK to the preliminary code of MINLOC/MAXLOC. gcc/testsuite/ChangeLog: * gfortran.dg/minmaxloc_17.f90: New test. (cherry picked from commit d211100903d4d532d989451243ea00d7fa2e9d5e) Diff: --- gcc/fortran/trans-intrinsic.cc | 1 + gcc/testsuite/gfortran.dg/minmaxloc_17.f90 | 33 ++ 2 files changed, 34 insertions(+) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 9ad372113b0c..5ef4f230472a 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5738,6 +5738,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) gfc_init_se (&maskse, NULL); gfc_conv_expr_val (&maskse, maskexpr); + gfc_add_block_to_block (&se->pre, &maskse.pre); gfc_init_block (&block); gfc_add_block_to_block (&block, &loop.pre); gfc_add_block_to_block (&block, &loop.post); diff --git a/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 new file mode 100644 index ..7e6e586ab03f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/minmaxloc_17.f90 @@ -0,0 +1,33 @@ +! { dg-do run } +! +! Check that the code necessary to evaluate MINLOC's or MAXLOC's MASK +! argument is correctly generated. + +program p + implicit none + integer, parameter :: data10(*) = (/ 2, 5, 2, 0, 6, 5, 3, 6, 0, 1 /) + logical, parameter :: mask10(*) = (/ .false., .true., .false., & + .false., .true., .true., & + .true. , .true., .false., & + .false. /) + type bool_wrapper +logical :: l + end type + call check_minloc + call check_maxloc +contains + subroutine check_minloc +integer :: a(10) +integer :: r +a = data10 +r = minloc(a, dim = 1, mask = sum(a) > 0) +if (r /= 4) stop 11 + end subroutine + subroutine check_maxloc +integer :: a(10) +integer :: r +a = data10 +r = maxloc(a, dim = 1, mask = sum(a) > 0) +if (r /= 5) stop 18 + end subroutine +end program
[gcc] Created branch 'mikael/heads/backport14_PR99798_v01' in namespace 'refs/users'
The branch 'mikael/heads/backport14_PR99798_v01' was created in namespace 'refs/users' pointing to: c80a74602390... fortran: Assume there is no cyclic reference with submodule
[gcc(refs/users/mikael/heads/backport14_PR99798_v01)] fortran: Assume there is no cyclic reference with submodule symbols [PR99798]
https://gcc.gnu.org/g:c80a7460239037d8cf8426dbb7d03c6ddac09bab commit c80a7460239037d8cf8426dbb7d03c6ddac09bab Author: Mikael Morin Date: Sun May 12 15:16:23 2024 +0200 fortran: Assume there is no cyclic reference with submodule symbols [PR99798] This prevents a premature release of memory with procedure symbols from submodules, causing random compiler crashes. The problem is a fragile detection of cyclic references, which can match with procedures host-associated from a module in submodules, in cases where it shouldn't. The formal namespace is released, and with it the dummy arguments symbols of the procedure. But there is no cyclic reference, so the procedure symbol itself is not released and remains, with pointers to its dummy arguments now dangling. The fix adds a condition to avoid the case, and refactors to a new predicate by the way. Part of the original condition is also removed, for lack of a reason to keep it. PR fortran/99798 gcc/fortran/ChangeLog: * symbol.cc (gfc_release_symbol): Move the condition guarding the handling cyclic references... (cyclic_reference_break_needed): ... here as a new predicate. Remove superfluous parts. Add a condition preventing any premature release with submodule symbols. gcc/testsuite/ChangeLog: * gfortran.dg/submodule_33.f08: New test. (cherry picked from commit 38d1761c0c94b77a081ccc180d6e039f7a670468) Diff: --- gcc/fortran/symbol.cc | 54 -- gcc/testsuite/gfortran.dg/submodule_33.f08 | 20 +++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/symbol.cc b/gcc/fortran/symbol.cc index 8f7deac1d1ee..0a1646def678 100644 --- a/gcc/fortran/symbol.cc +++ b/gcc/fortran/symbol.cc @@ -3179,6 +3179,57 @@ gfc_free_symbol (gfc_symbol *&sym) } +/* Returns true if the symbol SYM has, through its FORMAL_NS field, a reference + to itself which should be eliminated for the symbol memory to be released + via normal reference counting. + + The implementation is crucial as it controls the proper release of symbols, + especially (contained) procedure symbols, which can represent a lot of memory + through the namespace of their body. + + We try to avoid freeing too much memory (causing dangling pointers), to not + leak too much (wasting memory), and to avoid expensive walks of the symbol + tree (which would be the correct way to check for a cycle). */ + +bool +cyclic_reference_break_needed (gfc_symbol *sym) +{ + /* Normal symbols don't reference themselves. */ + if (sym->formal_ns == nullptr) +return false; + + /* Procedures at the root of the file do have a self reference, but they don't + have a reference in a parent namespace preventing the release of the + procedure namespace, so they can use the normal reference counting. */ + if (sym->formal_ns == sym->ns) +return false; + + /* If sym->refs == 1, we can use normal reference counting. If sym->refs > 2, + the symbol won't be freed anyway, with or without cyclic reference. */ + if (sym->refs != 2) +return false; + + /* Procedure symbols host-associated from a module in submodules are special, + because the namespace of the procedure block in the submodule is different + from the FORMAL_NS namespace generated by host-association. So there are + two different namespaces representing the same procedure namespace. As + FORMAL_NS comes from host-association, which only imports symbols visible + from the outside (dummy arguments basically), we can assume there is no + self reference through FORMAL_NS in that case. */ + if (sym->attr.host_assoc && sym->attr.used_in_submodule) +return false; + + /* We can assume that contained procedures have cyclic references, because + the symbol of the procedure itself is accessible in the procedure body + namespace. So we assume that symbols with a formal namespace different + from the declaration namespace and two references, one of which is about + to be removed, are procedures with just the self reference left. At this + point, the symbol SYM matches that pattern, so we return true here to + permit the release of SYM. */ + return true; +} + + /* Decrease the reference counter and free memory when we reach zero. Returns true if the symbol has been freed, false otherwise. */ @@ -3188,8 +3239,7 @@ gfc_release_symbol (gfc_symbol *&sym) if (sym == NULL) return false; - if (sym->formal_ns != NULL && sym->refs == 2 && sym->formal_ns != sym->ns - && (!sym->attr.entry || !sym->module)) + if (cyclic_reference_break_needed (sym)) { /* As formal_ns contains a reference to sym, delete formal_ns just before the deletion of sym. */ diff --git a/gcc/testsuite/gfortran.d
[gcc r14-10420] fortran: Assume there is no cyclic reference with submodule symbols [PR99798]
https://gcc.gnu.org/g:c80a7460239037d8cf8426dbb7d03c6ddac09bab commit r14-10420-gc80a7460239037d8cf8426dbb7d03c6ddac09bab Author: Mikael Morin Date: Sun May 12 15:16:23 2024 +0200 fortran: Assume there is no cyclic reference with submodule symbols [PR99798] This prevents a premature release of memory with procedure symbols from submodules, causing random compiler crashes. The problem is a fragile detection of cyclic references, which can match with procedures host-associated from a module in submodules, in cases where it shouldn't. The formal namespace is released, and with it the dummy arguments symbols of the procedure. But there is no cyclic reference, so the procedure symbol itself is not released and remains, with pointers to its dummy arguments now dangling. The fix adds a condition to avoid the case, and refactors to a new predicate by the way. Part of the original condition is also removed, for lack of a reason to keep it. PR fortran/99798 gcc/fortran/ChangeLog: * symbol.cc (gfc_release_symbol): Move the condition guarding the handling cyclic references... (cyclic_reference_break_needed): ... here as a new predicate. Remove superfluous parts. Add a condition preventing any premature release with submodule symbols. gcc/testsuite/ChangeLog: * gfortran.dg/submodule_33.f08: New test. (cherry picked from commit 38d1761c0c94b77a081ccc180d6e039f7a670468) Diff: --- gcc/fortran/symbol.cc | 54 -- gcc/testsuite/gfortran.dg/submodule_33.f08 | 20 +++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/symbol.cc b/gcc/fortran/symbol.cc index 8f7deac1d1ee..0a1646def678 100644 --- a/gcc/fortran/symbol.cc +++ b/gcc/fortran/symbol.cc @@ -3179,6 +3179,57 @@ gfc_free_symbol (gfc_symbol *&sym) } +/* Returns true if the symbol SYM has, through its FORMAL_NS field, a reference + to itself which should be eliminated for the symbol memory to be released + via normal reference counting. + + The implementation is crucial as it controls the proper release of symbols, + especially (contained) procedure symbols, which can represent a lot of memory + through the namespace of their body. + + We try to avoid freeing too much memory (causing dangling pointers), to not + leak too much (wasting memory), and to avoid expensive walks of the symbol + tree (which would be the correct way to check for a cycle). */ + +bool +cyclic_reference_break_needed (gfc_symbol *sym) +{ + /* Normal symbols don't reference themselves. */ + if (sym->formal_ns == nullptr) +return false; + + /* Procedures at the root of the file do have a self reference, but they don't + have a reference in a parent namespace preventing the release of the + procedure namespace, so they can use the normal reference counting. */ + if (sym->formal_ns == sym->ns) +return false; + + /* If sym->refs == 1, we can use normal reference counting. If sym->refs > 2, + the symbol won't be freed anyway, with or without cyclic reference. */ + if (sym->refs != 2) +return false; + + /* Procedure symbols host-associated from a module in submodules are special, + because the namespace of the procedure block in the submodule is different + from the FORMAL_NS namespace generated by host-association. So there are + two different namespaces representing the same procedure namespace. As + FORMAL_NS comes from host-association, which only imports symbols visible + from the outside (dummy arguments basically), we can assume there is no + self reference through FORMAL_NS in that case. */ + if (sym->attr.host_assoc && sym->attr.used_in_submodule) +return false; + + /* We can assume that contained procedures have cyclic references, because + the symbol of the procedure itself is accessible in the procedure body + namespace. So we assume that symbols with a formal namespace different + from the declaration namespace and two references, one of which is about + to be removed, are procedures with just the self reference left. At this + point, the symbol SYM matches that pattern, so we return true here to + permit the release of SYM. */ + return true; +} + + /* Decrease the reference counter and free memory when we reach zero. Returns true if the symbol has been freed, false otherwise. */ @@ -3188,8 +3239,7 @@ gfc_release_symbol (gfc_symbol *&sym) if (sym == NULL) return false; - if (sym->formal_ns != NULL && sym->refs == 2 && sym->formal_ns != sym->ns - && (!sym->attr.entry || !sym->module)) + if (cyclic_reference_break_needed (sym)) { /* As formal_ns contains a reference to sym, delete formal_ns just before the deletion of sym. */ diff --git a/gcc/testsuite
[gcc r15-2029] RISC-V: Add vector type of BFloat16 format
https://gcc.gnu.org/g:666f167bec09d1234e6496c86b566fe1a71f61f0 commit r15-2029-g666f167bec09d1234e6496c86b566fe1a71f61f0 Author: Feng Wang Date: Thu Jun 13 00:32:14 2024 + RISC-V: Add vector type of BFloat16 format v3: Rebase v2: Rebase The vector type of BFloat16 format is added in this patch, subsequent extensions to zvfbfmin and zvfwma need to be based on this patch. Signed-off-by: Feng Wang gcc/ChangeLog: * config/riscv/genrvv-type-indexer.cc (bfloat16_type): Generate bf16 vector_type and scalar_type in DEF_RVV_TYPE_INDEX. (bfloat16_wide_type): Ditto. (same_ratio_eew_bf16_type): Ditto. (main): Ditto. * config/riscv/riscv-modes.def (ADJUST_BYTESIZE): Add vector type for BFloat16. (RVV_WHOLE_MODES): Add vector type for BFloat16. (RVV_FRACT_MODE): Ditto. (RVV_NF4_MODES): Ditto. (RVV_NF8_MODES): Ditto. (RVV_NF2_MODES): Ditto. * config/riscv/riscv-vector-builtins-types.def (vbfloat16mf4_t): Add builtin vector type for BFloat16. (vbfloat16mf2_t): Add builtin vector type for BFloat16. (vbfloat16m1_t): Ditto. (vbfloat16m2_t): Ditto. (vbfloat16m4_t): Ditto. (vbfloat16m8_t): Ditto. (vbfloat16mf4x2_t): Ditto. (vbfloat16mf4x3_t): Ditto. (vbfloat16mf4x4_t): Ditto. (vbfloat16mf4x5_t): Ditto. (vbfloat16mf4x6_t): Ditto. (vbfloat16mf4x7_t): Ditto. (vbfloat16mf4x8_t): Ditto. (vbfloat16mf2x2_t): Ditto. (vbfloat16mf2x3_t): Ditto. (vbfloat16mf2x4_t): Ditto. (vbfloat16mf2x5_t): Ditto. (vbfloat16mf2x6_t): Ditto. (vbfloat16mf2x7_t): Ditto. (vbfloat16mf2x8_t): Ditto. (vbfloat16m1x2_t): Ditto. (vbfloat16m1x3_t): Ditto. (vbfloat16m1x4_t): Ditto. (vbfloat16m1x5_t): Ditto. (vbfloat16m1x6_t): Ditto. (vbfloat16m1x7_t): Ditto. (vbfloat16m1x8_t): Ditto. (vbfloat16m2x2_t): Ditto. (vbfloat16m2x3_t): Ditto. (vbfloat16m2x4_t): Ditto. (vbfloat16m4x2_t): Ditto. * config/riscv/riscv-vector-builtins.cc (check_required_extensions): Add required_ext checking for BFloat16. * config/riscv/riscv-vector-builtins.def (vbfloat16mf4_t): Add vector_type for BFloat16 in builtins.def. (vbfloat16mf4x2_t): Ditto. (vbfloat16mf4x3_t): Ditto. (vbfloat16mf4x4_t): Ditto. (vbfloat16mf4x5_t): Ditto. (vbfloat16mf4x6_t): Ditto. (vbfloat16mf4x7_t): Ditto. (vbfloat16mf4x8_t): Ditto. (vbfloat16mf2_t): Ditto. (vbfloat16mf2x2_t): Ditto. (vbfloat16mf2x3_t): Ditto. (vbfloat16mf2x4_t): Ditto. (vbfloat16mf2x5_t): Ditto. (vbfloat16mf2x6_t): Ditto. (vbfloat16mf2x7_t): Ditto. (vbfloat16mf2x8_t): Ditto. (vbfloat16m1_t): Ditto. (vbfloat16m1x2_t): Ditto. (vbfloat16m1x3_t): Ditto. (vbfloat16m1x4_t): Ditto. (vbfloat16m1x5_t): Ditto. (vbfloat16m1x6_t): Ditto. (vbfloat16m1x7_t): Ditto. (vbfloat16m1x8_t): Ditto. (vbfloat16m2_t): Ditto. (vbfloat16m2x2_t): Ditto. (vbfloat16m2x3_t): Ditto. (vbfloat16m2x4_t): Ditto. (vbfloat16m4_t): Ditto. (vbfloat16m4x2_t): Ditto. (vbfloat16m8_t): Ditto. (double_trunc_bfloat_scalar): Add scalar_type def for BFloat16. (double_trunc_bfloat_vector): Add vector_type def for BFloat16. * config/riscv/riscv-vector-builtins.h (RVV_REQUIRE_ELEN_BF_16): Add required defination of BFloat16 ext. * config/riscv/riscv-vector-switch.def (ENTRY): Add vector_type information for BFloat16. (TUPLE_ENTRY): Add tuple vector_type information for BFloat16. Diff: --- gcc/config/riscv/genrvv-type-indexer.cc | 115 +++ gcc/config/riscv/riscv-modes.def | 30 +- gcc/config/riscv/riscv-vector-builtins-types.def | 50 ++ gcc/config/riscv/riscv-vector-builtins.cc| 7 +- gcc/config/riscv/riscv-vector-builtins.def | 55 ++- gcc/config/riscv/riscv-vector-builtins.h | 1 + gcc/config/riscv/riscv-vector-switch.def | 36 +++ 7 files changed, 291 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/genrvv-type-indexer.cc b/gcc/config/riscv/genrvv-type-indexer.cc index 27cbd14982c1..8626ddeaaa8b 100644 --- a/gcc/config/riscv/genrvv-type-indexer.cc +++ b/gcc/config/riscv/genrvv-type-indexer.cc @@ -117,6 +117,42 @@ inttype
[gcc r15-2030] AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [PR115889]
https://gcc.gnu.org/g:02a3bf5e2f0c18078bf67fc0002219edba1d76ff commit r15-2030-g02a3bf5e2f0c18078bf67fc0002219edba1d76ff Author: Hongyu Wang Date: Sat Jul 13 11:45:31 2024 +0800 AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [PR115889] According to the instruction spec of AVX512BF16, the convert from float to BF16 is not a simple truncation. It has special handling for denormal/nan, even for normal float it will add an extra bias according to the least significant bit for bf number. This means we cannot use the vcvtne2ps2bf16 for any bf16 vector shuffle. The optimization introduced in r15-1368 adds a specific split to convert HImode permutation with this instruction, so remove it and treat the BFmode permutation same as HFmode. gcc/ChangeLog: PR target/115889 * config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove. * config/i386/sse.md (hi_cvt_bf): Remove. (HI_CVT_BF): Likewise. (vpermt2_sepcial_bf16_shuffle_):Likewise. gcc/testsuite/ChangeLog: PR target/115889 * gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust output scan. Diff: --- gcc/config/i386/predicates.md | 11 --- gcc/config/i386/sse.md | 35 -- .../i386/vpermt2-special-bf16-shufflue.c | 3 +- 3 files changed, 1 insertion(+), 48 deletions(-) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index a894847adaf7..5d0bb1e0f54a 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -2327,14 +2327,3 @@ return true; }) - -;; Check that each element is odd and incrementally increasing from 1 -(define_predicate "vcvtne2ps2bf_parallel" - (and (match_code "const_vector") - (match_code "const_int" "a")) -{ - for (int i = 0; i < XVECLEN (op, 0); ++i) -if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1)) - return false; - return true; -}) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b3b4697924b5..c134494cd200 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -31460,38 +31460,3 @@ "TARGET_AVXVNNIINT16" "vpdp\t{%3, %2, %0|%0, %2, %3}" [(set_attr "prefix" "vex")]) - -(define_mode_attr hi_cvt_bf - [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")]) - -(define_mode_attr HI_CVT_BF - [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")]) - -(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_" - [(set (match_operand:VI2_AVX512F 0 "register_operand") - (unspec:VI2_AVX512F - [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel") - (match_operand:VI2_AVX512F 2 "register_operand") - (match_operand:VI2_AVX512F 3 "nonimmediate_operand")] - UNSPEC_VPERMT2))] - "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()" - "#" - "&& 1" - [(const_int 0)] -{ - rtx op0 = gen_reg_rtx (mode); - operands[2] = lowpart_subreg (mode, - force_reg (mode, operands[2]), - mode); - operands[3] = lowpart_subreg (mode, - force_reg (mode, operands[3]), - mode); - - emit_insn (gen_avx512f_cvtne2ps2bf16_(op0, - operands[3], - operands[2])); - emit_move_insn (operands[0], lowpart_subreg (mode, op0, - mode)); - DONE; -} -[(set_attr "mode" "")]) diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c index 5c65f2a98847..e504f3f4cd70 100755 --- a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c +++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c @@ -1,7 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */ -/* { dg-final { scan-assembler-not "vpermi2b" } } */ -/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */ +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ typedef __bf16 v8bf __attribute__((vector_size(16))); typedef __bf16 v16bf __attribute__((vector_size(32)));
[gcc r15-2031] RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic
https://gcc.gnu.org/g:281f021ed4fbf9c2336048e34b6b40c6f7119baa commit r15-2031-g281f021ed4fbf9c2336048e34b6b40c6f7119baa Author: Feng Wang Date: Mon Jun 17 01:59:57 2024 + RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic v3: Modify warning message in riscv.cc v2: Rebase Accroding to the intrinsic doc, the 'Zvfbfmin' and 'Zvfbfwma' intrinsic functions are added by this patch. Signed-off-by: Feng Wang gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (class vfncvtbf16_f): Add 'Zvfbfmin' intrinsic in bases. (class vfwcvtbf16_f): Ditto. (class vfwmaccbf16): Add 'Zvfbfwma' intrinsic in bases. (BASE): Add BASE macro for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv-vector-builtins-bases.h: Add declaration for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv-vector-builtins-functions.def (REQUIRED_EXTENSIONS): Add builtins def for 'Zvfbfmin' and 'Zvfbfwma'. (vfncvtbf16_f): Ditto. (vfncvtbf16_f_frm): Ditto. (vfwcvtbf16_f): Ditto. (vfwmaccbf16): Ditto. (vfwmaccbf16_frm): Ditto. * config/riscv/riscv-vector-builtins-shapes.cc (supports_vectype_p): Add vector intrinsic build judgment for BFloat16. (build_all): Ditto. (BASE_NAME_MAX_LEN): Adjust max length. * config/riscv/riscv-vector-builtins-types.def (DEF_RVV_F32_OPS): Add new operand type for BFloat16. (vfloat32mf2_t): Ditto. (vfloat32m1_t): Ditto. (vfloat32m2_t): Ditto. (vfloat32m4_t): Ditto. (vfloat32m8_t): Ditto. * config/riscv/riscv-vector-builtins.cc (DEF_RVV_F32_OPS): Ditto. (validate_instance_type_required_extensions): Add required_ext checking for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv-vector-builtins.h (enum required_ext): Add required_ext declaration for 'Zvfbfmin' and 'Zvfbfwma'. (reqired_ext_to_isa_name): Ditto. (required_extensions_specified): Ditto. (struct function_group_info): Add match case for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv.cc (riscv_validate_vector_type): Add required_ext checking for 'Zvfbfmin' and 'Zvfbfwma'. Diff: --- gcc/config/riscv/riscv-vector-builtins-bases.cc| 69 ++ gcc/config/riscv/riscv-vector-builtins-bases.h | 7 +++ .../riscv/riscv-vector-builtins-functions.def | 15 + gcc/config/riscv/riscv-vector-builtins-shapes.cc | 31 +- gcc/config/riscv/riscv-vector-builtins-types.def | 13 gcc/config/riscv/riscv-vector-builtins.cc | 67 + gcc/config/riscv/riscv-vector-builtins.h | 34 +++ gcc/config/riscv/riscv.cc | 13 ++-- 8 files changed, 232 insertions(+), 17 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 6483faba39c4..193392fbcc2a 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2417,6 +2417,60 @@ public: } }; +/* Implements vfncvtbf16_f. */ +template +class vfncvtbf16_f : public function_base +{ +public: + bool has_rounding_mode_operand_p () const override + { +return FRM_OP == HAS_FRM; + } + + bool may_require_frm_p () const override { return true; } + + rtx expand (function_expander &e) const override + { +return e.use_exact_insn (code_for_pred_trunc_to_bf16 (e.vector_mode ())); + } +}; + +/* Implements vfwcvtbf16_f. */ +class vfwcvtbf16_f : public function_base +{ +public: + rtx expand (function_expander &e) const override + { +return e.use_exact_insn (code_for_pred_extend_bf16_to (e.vector_mode ())); + } +}; + +/* Implements vfwmaccbf16. */ +template +class vfwmaccbf16 : public function_base +{ +public: + bool has_rounding_mode_operand_p () const override + { +return FRM_OP == HAS_FRM; + } + + bool may_require_frm_p () const override { return true; } + + bool has_merge_operand_p () const override { return false; } + + rtx expand (function_expander &e) const override + { +if (e.op_info->op == OP_TYPE_vf) + return e.use_widen_ternop_insn ( + code_for_pred_widen_bf16_mul_scalar (e.vector_mode ())); +if (e.op_info->op == OP_TYPE_vv) + return e.use_widen_ternop_insn ( + code_for_pred_widen_bf16_mul (e.vector_mode ())); +gcc_unreachable (); + } +}; + static CONSTEXPR const vsetvl vsetvl_obj; static CONSTEXPR const vsetvl vsetvlmax_obj; static CONSTEXPR const loadstore vle_obj; @@ -2734,6 +2788,14 @@ static CONSTEXPR const crypto_vv vsm4r_obj; static CONSTEXPR const vsm3me vsm3me_obj; static CONSTEXPR const vaeskf2_vsm3c vsm3c_obj; +/* Zvfbfmin */ +static CONSTEXPR const vfncvtbf16_f vf
[gcc r15-2032] RISC-V: Add md files for vector BFloat16
https://gcc.gnu.org/g:9f521632dd9ce71ce28ff1da9c161f76bc20fe3e commit r15-2032-g9f521632dd9ce71ce28ff1da9c161f76bc20fe3e Author: Feng Wang Date: Tue Jun 18 06:13:35 2024 + RISC-V: Add md files for vector BFloat16 V3: Add Bfloat16 vector insn in generic-vector-ooo.md v2: Rebase Accroding to the BFloat16 spec, some vector iterators and new pattern are added in md files. Signed-off-by: Feng Wang gcc/ChangeLog: * config/riscv/generic-vector-ooo.md: Add def_insn_reservation for vector BFloat16. * config/riscv/riscv.md: Add new insn name for vector BFloat16. * config/riscv/vector-iterators.md: Add some iterators for vector BFloat16. * config/riscv/vector.md: Add some attribute for vector BFloat16. * config/riscv/vector-bfloat16.md: New file. Add insn pattern vector BFloat16. Diff: --- gcc/config/riscv/generic-vector-ooo.md | 4 +- gcc/config/riscv/riscv.md | 13 ++- gcc/config/riscv/vector-bfloat16.md| 135 ++ gcc/config/riscv/vector-iterators.md | 169 - gcc/config/riscv/vector.md | 103 +--- 5 files changed, 407 insertions(+), 17 deletions(-) diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md index 5e933c838418..efe6bc41e864 100644 --- a/gcc/config/riscv/generic-vector-ooo.md +++ b/gcc/config/riscv/generic-vector-ooo.md @@ -53,7 +53,7 @@ (define_insn_reservation "vec_fcmp" 3 (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\ vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\ - vfncvtftoi,vfncvtftof") + vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16") "vxu_ooo_issue,vxu_ooo_alu") ;; Vector integer multiplication. @@ -69,7 +69,7 @@ ;; Vector float multiplication and FMA. (define_insn_reservation "vec_fmul" 6 - (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd") + (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16") "vxu_ooo_issue,vxu_ooo_alu") ;; Vector crypto, assumed to be a generic operation for now. diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 5dee837a5878..379015c60de8 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -200,6 +200,7 @@ RVVMF64BI,RVVMF32BI,RVVMF16BI,RVVMF8BI,RVVMF4BI,RVVMF2BI,RVVM1BI, RVVM8QI,RVVM4QI,RVVM2QI,RVVM1QI,RVVMF2QI,RVVMF4QI,RVVMF8QI, RVVM8HI,RVVM4HI,RVVM2HI,RVVM1HI,RVVMF2HI,RVVMF4HI, + RVVM8BF,RVVM4BF,RVVM2BF,RVVM1BF,RVVMF2BF,RVVMF4BF, RVVM8HF,RVVM4HF,RVVM2HF,RVVM1HF,RVVMF2HF,RVVMF4HF, RVVM8SI,RVVM4SI,RVVM2SI,RVVM1SI,RVVMF2SI, RVVM8SF,RVVM4SF,RVVM2SF,RVVM1SF,RVVMF2SF, @@ -219,6 +220,11 @@ RVVM2x4HI,RVVM1x4HI,RVVMF2x4HI,RVVMF4x4HI, RVVM2x3HI,RVVM1x3HI,RVVMF2x3HI,RVVMF4x3HI, RVVM4x2HI,RVVM2x2HI,RVVM1x2HI,RVVMF2x2HI,RVVMF4x2HI, + RVVM1x8BF,RVVMF2x8BF,RVVMF4x8BF,RVVM1x7BF,RVVMF2x7BF, + RVVMF4x7BF,RVVM1x6BF,RVVMF2x6BF,RVVMF4x6BF,RVVM1x5BF, + RVVMF2x5BF,RVVMF4x5BF,RVVM2x4BF,RVVM1x4BF,RVVMF2x4BF, + RVVMF4x4BF,RVVM2x3BF,RVVM1x3BF,RVVMF2x3BF,RVVMF4x3BF, + RVVM4x2BF,RVVM2x2BF,RVVM1x2BF,RVVMF2x2BF,RVVMF4x2BF, RVVM1x8HF,RVVMF2x8HF,RVVMF4x8HF,RVVM1x7HF,RVVMF2x7HF, RVVMF4x7HF,RVVM1x6HF,RVVMF2x6HF,RVVMF4x6HF,RVVM1x5HF, RVVMF2x5HF,RVVMF4x5HF,RVVM2x4HF,RVVM1x4HF,RVVMF2x4HF, @@ -462,6 +468,10 @@ ;; vsm4rcrypto vector SM4 Rounds instructions ;; vsm3me crypto vector SM3 Message Expansion instructions ;; vsm3ccrypto vector SM3 Compression instructions +;; 18.Vector BF16 instrctions +;; vfncvtbf16 vector narrowing single floating-point to brain floating-point instruction +;; vfwcvtbf16 vector widening brain floating-point to single floating-point instruction +;; vfwmaccbf16 vector BF16 widening multiply-accumulate (define_attr "type" "unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore, mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, @@ -483,7 +493,7 @@ vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down, vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, - vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c" + vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16" (cond [(eq_attr "got" "load") (const_string "load") ;; If a doubleword move uses these expensive instructions, @@ -4373,6 +4383,7 @@ (include "generic-ooo.md") (include "vector.md") (include "vector-crypto.md") +(include "vector-bfloat16.md") (include "zicond.md") (include "sfb.md") (include "zc.md") diff --git a/gcc/config/riscv/vector-bfloat16.md b/gcc/config/riscv/vector-bfloat16.md new file mode 100644 index ..562aa8ee5ed7 --- /dev/null +++ b/gcc/config/riscv/vector-bfloat16.md @@ -0,0 +1,135 @@ +;; Machine des
[gcc r15-2033] CRIS: Adjust gcc.dg/tree-ssa/loop-1.c
https://gcc.gnu.org/g:da37a272beceacb362373a9eab1e915db587be9e commit r15-2033-gda37a272beceacb362373a9eab1e915db587be9e Author: Hans-Peter Nilsson Date: Mon Jul 15 04:57:06 2024 +0200 CRIS: Adjust gcc.dg/tree-ssa/loop-1.c With r15-1619-g3b9b8d6cfdf593, there's a XPASS and a FAIL for this test-case for cris-elf. Looking at the generated code, _foo is indeed no longer saved in a register for CRIS. While that looks like a regression, coremark results are the same around this revision, so simply adjust the test-case: remove the target-specific exceptions for cris-*-*. * gcc.dg/tree-ssa/loop-1.c: Remove target-specific test and xfail to adjust for recent changes in register allocation. Diff: --- gcc/testsuite/gcc.dg/tree-ssa/loop-1.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c index a531b7584a64..a8f2c3bbfdb4 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c @@ -43,16 +43,15 @@ int xxx(void) /* The SH targets always use separate instructions to load the address and to do the actual call - bsr is only generated by link time relaxation. */ -/* CRIS and MSP430 keep the address in a register. */ +/* MSP430 keeps the address in a register. */ /* m68k sometimes puts the address in a register, depending on CPU and PIC. */ -/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* cris-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* nvptx*-*-* pdp11*-*-* msp430-*-* amdgcn*-*-* } } } */ +/* { dg-final { scan-assembler-times "foo" 5 { xfail hppa*-*-* ia64*-*-* sh*-*-* fido-*-* m68k-*-* i?86-*-mingw* i?86-*-cygwin* x86_64-*-mingw* visium-*-* nvptx*-*-* pdp11*-*-* msp430-*-* amdgcn*-*-* } } } */ /* { dg-final { scan-assembler-times "foo,%r" 5 { target hppa*-*-* } } } */ /* { dg-final { scan-assembler-times "= foo" 5 { target ia64*-*-* } } } */ /* { dg-final { scan-assembler-times "call\[ \t\]*_foo" 5 { target i?86-*-mingw* i?86-*-cygwin* } } } */ /* { dg-final { scan-assembler-times "call\[ \t\]*foo" 5 { target x86_64-*-mingw* } } } */ /* { dg-final { scan-assembler-times "jsr|bsrf|blink\ttr?,r18" 5 { target sh*-*-* } } } */ -/* { dg-final { scan-assembler-times "Jsr \\\$r" 5 { target cris-*-* } } } */ /* { dg-final { scan-assembler-times "\[jb\]sr" 5 { target fido-*-* m68k-*-* pdp11-*-* } } } */ /* { dg-final { scan-assembler-times "bra *tr,r\[1-9\]*,r21" 5 { target visium-*-* } } } */ /* { dg-final { scan-assembler-times "(?n)\[ \t\]call\[ \t\].*\[ \t\]foo," 5 { target nvptx*-*-* } } } */
[gcc r15-2034] aarch64: Fix the expected output of the test cpy_1.c [PR115892]
https://gcc.gnu.org/g:8b1492012e5a11e9400e30ee4ae9195c08a2a81e commit r15-2034-g8b1492012e5a11e9400e30ee4ae9195c08a2a81e Author: Surya Kumari Jangala Date: Thu Jul 11 11:02:17 2024 -0500 aarch64: Fix the expected output of the test cpy_1.c [PR115892] The fix at r15-1619-g3b9b8d6cfdf593 results in a rearrangement of instructions generated for cpy_1.c. This patch fixes the expected output. 2024-07-12 Surya Kumari Jangala gcc/testsuite: PR testsuite/115892 * gcc.target/aarch64/sve/acle/general/cpy_1.c: Update expected output. Diff: --- gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c index 57b56a7e256f..1d669913df2e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c @@ -11,9 +11,15 @@ extern "C" { /* ** dup_x0_m: ** ... +** ( ** add (x[0-9]+), x0, #?1 ** mov (p[0-7])\.b, p15\.b ** mov z0\.d, \2/m, \1 +** | +** mov (p[0-7])\.b, p15\.b +** add (x[0-9]+), x0, #?1 +** mov z0\.d, \3/m, \4 +** ) ** ... ** ret */
[gcc r12-10617] Fix SSA_NAME leak due to def_stmt is removed before use_stmt.
https://gcc.gnu.org/g:e1427b39d28f382d21e7a0ea1714b3250e0a6e5d commit r12-10617-ge1427b39d28f382d21e7a0ea1714b3250e0a6e5d Author: liuhongt Date: Fri Jul 12 09:39:23 2024 +0800 Fix SSA_NAME leak due to def_stmt is removed before use_stmt. - _5 = __atomic_fetch_or_8 (&set_work_pending_p, 1, 0); - # DEBUG old => (long int) _5 + _6 = .ATOMIC_BIT_TEST_AND_SET (&set_work_pending_p, 0, 1, 0, __atomic_fetch_or_8); + # DEBUG old => NULL # DEBUG BEGIN_STMT - # DEBUG D#2 => _5 & 1 + # DEBUG D#2 => NULL ... - _10 = ~_5; - _8 = (_Bool) _10; - # DEBUG ret => _8 + _8 = _6 == 0; + # DEBUG ret => (_Bool) _10 confirmed. convert_atomic_bit_not does this, it checks for single_use and removes the def, failing to release the name (which would fix this up IIRC). Note the function removes stmts in "wrong" order (before uses of LHS are removed), so it requires larger surgery. And it leaks SSA names. gcc/ChangeLog: PR target/115872 * tree-ssa-ccp.cc (convert_atomic_bit_not): Remove use_stmt after use_nop_stmt is removed. (optimize_atomic_bit_test_and): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115872.c: New test. (cherry picked from commit a8209237dc46dc4db7d9d8e3807e6c93734c64b5) Diff: --- gcc/testsuite/gcc.target/i386/pr115872.c | 16 gcc/tree-ssa-ccp.cc | 12 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr115872.c b/gcc/testsuite/gcc.target/i386/pr115872.c new file mode 100644 index ..937004456d37 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115872.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -g" } */ + +long set_work_pending_p; +_Bool set_work_pending() { + _Bool __trans_tmp_1; + long mask = 1, old = __atomic_fetch_or(&set_work_pending_p, mask, 0); + __trans_tmp_1 = old & mask; + return !__trans_tmp_1; +} +void __queue_work() { + _Bool ret = set_work_pending(); + if (ret) +__queue_work(); +} + diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index 42a02dccaeb1..3c63f2dd8a3b 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -3306,9 +3306,10 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt, return nullptr; gimple_stmt_iterator gsi; - gsi = gsi_for_stmt (use_stmt); - gsi_remove (&gsi, true); tree var = make_ssa_name (TREE_TYPE (lhs)); + /* use_stmt need to be removed after use_nop_stmt, + so use_lhs can be released. */ + gimple *use_stmt_removal = use_stmt; use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask); gsi = gsi_for_stmt (use_not_stmt); gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT); @@ -3318,6 +3319,8 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt, gsi_insert_after (&gsi, g, GSI_NEW_STMT); gsi = gsi_for_stmt (use_not_stmt); gsi_remove (&gsi, true); + gsi = gsi_for_stmt (use_stmt_removal); + gsi_remove (&gsi, true); return use_stmt; } @@ -3569,8 +3572,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, */ } var = make_ssa_name (TREE_TYPE (use_rhs)); - gsi = gsi_for_stmt (use_stmt); - gsi_remove (&gsi, true); + gimple* use_stmt_removal = use_stmt; g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs, and_mask); gsi = gsi_for_stmt (use_nop_stmt); @@ -3584,6 +3586,8 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, gsi_insert_after (&gsi, g, GSI_NEW_STMT); gsi = gsi_for_stmt (use_nop_stmt); gsi_remove (&gsi, true); + gsi = gsi_for_stmt (use_stmt_removal); + gsi_remove (&gsi, true); } } else
[gcc r13-8913] Fix SSA_NAME leak due to def_stmt is removed before use_stmt.
https://gcc.gnu.org/g:9a1cdaa5e8441394d613f5f3401e7aab21efe8f0 commit r13-8913-g9a1cdaa5e8441394d613f5f3401e7aab21efe8f0 Author: liuhongt Date: Fri Jul 12 09:39:23 2024 +0800 Fix SSA_NAME leak due to def_stmt is removed before use_stmt. - _5 = __atomic_fetch_or_8 (&set_work_pending_p, 1, 0); - # DEBUG old => (long int) _5 + _6 = .ATOMIC_BIT_TEST_AND_SET (&set_work_pending_p, 0, 1, 0, __atomic_fetch_or_8); + # DEBUG old => NULL # DEBUG BEGIN_STMT - # DEBUG D#2 => _5 & 1 + # DEBUG D#2 => NULL ... - _10 = ~_5; - _8 = (_Bool) _10; - # DEBUG ret => _8 + _8 = _6 == 0; + # DEBUG ret => (_Bool) _10 confirmed. convert_atomic_bit_not does this, it checks for single_use and removes the def, failing to release the name (which would fix this up IIRC). Note the function removes stmts in "wrong" order (before uses of LHS are removed), so it requires larger surgery. And it leaks SSA names. gcc/ChangeLog: PR target/115872 * tree-ssa-ccp.cc (convert_atomic_bit_not): Remove use_stmt after use_nop_stmt is removed. (optimize_atomic_bit_test_and): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115872.c: New test. (cherry picked from commit a8209237dc46dc4db7d9d8e3807e6c93734c64b5) Diff: --- gcc/testsuite/gcc.target/i386/pr115872.c | 16 gcc/tree-ssa-ccp.cc | 12 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr115872.c b/gcc/testsuite/gcc.target/i386/pr115872.c new file mode 100644 index ..937004456d37 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115872.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -g" } */ + +long set_work_pending_p; +_Bool set_work_pending() { + _Bool __trans_tmp_1; + long mask = 1, old = __atomic_fetch_or(&set_work_pending_p, mask, 0); + __trans_tmp_1 = old & mask; + return !__trans_tmp_1; +} +void __queue_work() { + _Bool ret = set_work_pending(); + if (ret) +__queue_work(); +} + diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index 2e552b330b74..6c9da603ef95 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -3321,9 +3321,10 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt, return nullptr; gimple_stmt_iterator gsi; - gsi = gsi_for_stmt (use_stmt); - gsi_remove (&gsi, true); tree var = make_ssa_name (TREE_TYPE (lhs)); + /* use_stmt need to be removed after use_nop_stmt, + so use_lhs can be released. */ + gimple *use_stmt_removal = use_stmt; use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask); gsi = gsi_for_stmt (use_not_stmt); gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT); @@ -,6 +3334,8 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt, gsi_insert_after (&gsi, g, GSI_NEW_STMT); gsi = gsi_for_stmt (use_not_stmt); gsi_remove (&gsi, true); + gsi = gsi_for_stmt (use_stmt_removal); + gsi_remove (&gsi, true); return use_stmt; } @@ -3635,8 +3638,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, */ } var = make_ssa_name (TREE_TYPE (use_rhs)); - gsi = gsi_for_stmt (use_stmt); - gsi_remove (&gsi, true); + gimple* use_stmt_removal = use_stmt; g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs, and_mask); gsi = gsi_for_stmt (use_nop_stmt); @@ -3653,6 +3655,8 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, gsi_insert_after (&gsi, g, GSI_NEW_STMT); gsi = gsi_for_stmt (use_nop_stmt); gsi_remove (&gsi, true); + gsi = gsi_for_stmt (use_stmt_removal); + gsi_remove (&gsi, true); } } else
[gcc r15-2035] RISC-V: Implement locality for __builtin_prefetch
https://gcc.gnu.org/g:bf26413fc4081dfd18b915580b35bdb71481327e commit r15-2035-gbf26413fc4081dfd18b915580b35bdb71481327e Author: Monk Chiang Date: Thu Jul 6 14:05:17 2023 +0800 RISC-V: Implement locality for __builtin_prefetch The patch add the Zihintntl instructions in the prefetch pattern. Zicbop has prefetch instructions. Zihintntl has NTL instructions. Insert NTL instructions before prefetch instruction, if target has Zihintntl extension. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_print_operand): Add 'L' letter to print zihintntl instructions string. * config/riscv/riscv.md (prefetch): Add zihintntl instructions. gcc/testsuite/ChangeLog: * gcc.target/riscv/prefetch-zicbop.c: New test. * gcc.target/riscv/prefetch-zihintntl.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 22 ++ gcc/config/riscv/riscv.md | 10 +++--- gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c | 20 .../gcc.target/riscv/prefetch-zihintntl.c | 20 4 files changed, 69 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 53ab2f1a8814..084a592a313c 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -6488,6 +6488,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char *p) 'A' Print the atomic operation suffix for memory model OP. 'I' Print the LR suffix for memory model OP. 'J' Print the SC suffix for memory model OP. + 'L' Print a non-temporal locality hints instruction. 'z' Print x0 if OP is zero, otherwise print OP normally. 'i' Print i if the operand is not a register. 'S' Print shift-index of single-bit mask OP. @@ -6682,6 +6683,27 @@ riscv_print_operand (FILE *file, rtx op, int letter) break; } +case 'L': + { + const char *ntl_hint = NULL; + switch (INTVAL (op)) + { + case 0: + ntl_hint = "ntl.all"; + break; + case 1: + ntl_hint = "ntl.pall"; + break; + case 2: + ntl_hint = "ntl.p1"; + break; + } + + if (ntl_hint) + asm_fprintf (file, "%s\n\t", ntl_hint); + break; + } + case 'i': if (code != REG) fputs ("i", file); diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 379015c60de8..46c46039c33a 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4113,12 +4113,16 @@ { switch (INTVAL (operands[1])) { -case 0: return "prefetch.r\t%a0"; -case 1: return "prefetch.w\t%a0"; +case 0: return TARGET_ZIHINTNTL ? "%L2prefetch.r\t%a0" : "prefetch.r\t%a0"; +case 1: return TARGET_ZIHINTNTL ? "%L2prefetch.w\t%a0" : "prefetch.w\t%a0"; default: gcc_unreachable (); } } - [(set_attr "type" "store")]) + [(set_attr "type" "store") + (set (attr "length") (if_then_else (and (match_test "TARGET_ZIHINTNTL") + (match_test "IN_RANGE (INTVAL (operands[2]), 0, 2)")) + (const_string "8") + (const_string "4")))]) (define_insn "riscv_prefetchi_" [(unspec_volatile:X [(match_operand:X 0 "address_operand" "r") diff --git a/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c b/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c new file mode 100644 index ..0faa120f1f79 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c @@ -0,0 +1,20 @@ +/* { dg-do compile target { { rv64-*-*}}} */ +/* { dg-options "-march=rv64gc_zicbop -mabi=lp64" } */ + +void foo (char *p) +{ + __builtin_prefetch (p, 0, 0); + __builtin_prefetch (p, 0, 1); + __builtin_prefetch (p, 0, 2); + __builtin_prefetch (p, 0, 3); + __builtin_prefetch (p, 1, 0); + __builtin_prefetch (p, 1, 1); + __builtin_prefetch (p, 1, 2); + __builtin_prefetch (p, 1, 3); +} + +/* { dg-final { scan-assembler-not "ntl.all\t" } } */ +/* { dg-final { scan-assembler-not "ntl.pall\t" } } */ +/* { dg-final { scan-assembler-not "ntl.p1\t" } } */ +/* { dg-final { scan-assembler-times "prefetch.r" 4 } } */ +/* { dg-final { scan-assembler-times "prefetch.w" 4 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c b/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c new file mode 100644 index ..78a3afe68333 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c @@ -0,0 +1,20 @@ +/* { dg-do compile target { { rv64-*-*}}} */ +/* { dg-options "-march=rv64gc_zicbop_zihintntl -mabi=lp64" } */ + +void foo (char *p) +{ + __builtin_prefetch (p, 0, 0); + __builtin_prefetch (p, 0, 1); + __builtin_prefetch (p, 0, 2); + __builtin_prefetch (p, 0, 3); + __builtin_prefetch (p, 1, 0); + __builtin_prefetch (p, 1, 1); + __builtin_prefetch (p, 1, 2); +
[gcc r14-10422] Fix SSA_NAME leak due to def_stmt is removed before use_stmt.
https://gcc.gnu.org/g:13bfc385b0baebd22aeabb0d90915f2e9b18febe commit r14-10422-g13bfc385b0baebd22aeabb0d90915f2e9b18febe Author: liuhongt Date: Fri Jul 12 09:39:23 2024 +0800 Fix SSA_NAME leak due to def_stmt is removed before use_stmt. - _5 = __atomic_fetch_or_8 (&set_work_pending_p, 1, 0); - # DEBUG old => (long int) _5 + _6 = .ATOMIC_BIT_TEST_AND_SET (&set_work_pending_p, 0, 1, 0, __atomic_fetch_or_8); + # DEBUG old => NULL # DEBUG BEGIN_STMT - # DEBUG D#2 => _5 & 1 + # DEBUG D#2 => NULL ... - _10 = ~_5; - _8 = (_Bool) _10; - # DEBUG ret => _8 + _8 = _6 == 0; + # DEBUG ret => (_Bool) _10 confirmed. convert_atomic_bit_not does this, it checks for single_use and removes the def, failing to release the name (which would fix this up IIRC). Note the function removes stmts in "wrong" order (before uses of LHS are removed), so it requires larger surgery. And it leaks SSA names. gcc/ChangeLog: PR target/115872 * tree-ssa-ccp.cc (convert_atomic_bit_not): Remove use_stmt after use_nop_stmt is removed. (optimize_atomic_bit_test_and): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115872.c: New test. (cherry picked from commit a8209237dc46dc4db7d9d8e3807e6c93734c64b5) Diff: --- gcc/testsuite/gcc.target/i386/pr115872.c | 16 gcc/tree-ssa-ccp.cc | 12 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr115872.c b/gcc/testsuite/gcc.target/i386/pr115872.c new file mode 100644 index ..937004456d37 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115872.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -g" } */ + +long set_work_pending_p; +_Bool set_work_pending() { + _Bool __trans_tmp_1; + long mask = 1, old = __atomic_fetch_or(&set_work_pending_p, mask, 0); + __trans_tmp_1 = old & mask; + return !__trans_tmp_1; +} +void __queue_work() { + _Bool ret = set_work_pending(); + if (ret) +__queue_work(); +} + diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index f6a5cd0ee6e0..cc78ff20bb81 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -3331,9 +3331,10 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt, return nullptr; gimple_stmt_iterator gsi; - gsi = gsi_for_stmt (use_stmt); - gsi_remove (&gsi, true); tree var = make_ssa_name (TREE_TYPE (lhs)); + /* use_stmt need to be removed after use_nop_stmt, + so use_lhs can be released. */ + gimple *use_stmt_removal = use_stmt; use_stmt = gimple_build_assign (var, BIT_AND_EXPR, lhs, and_mask); gsi = gsi_for_stmt (use_not_stmt); gsi_insert_before (&gsi, use_stmt, GSI_NEW_STMT); @@ -3343,6 +3344,8 @@ convert_atomic_bit_not (enum internal_fn fn, gimple *use_stmt, gsi_insert_after (&gsi, g, GSI_NEW_STMT); gsi = gsi_for_stmt (use_not_stmt); gsi_remove (&gsi, true); + gsi = gsi_for_stmt (use_stmt_removal); + gsi_remove (&gsi, true); return use_stmt; } @@ -3645,8 +3648,7 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, */ } var = make_ssa_name (TREE_TYPE (use_rhs)); - gsi = gsi_for_stmt (use_stmt); - gsi_remove (&gsi, true); + gimple* use_stmt_removal = use_stmt; g = gimple_build_assign (var, BIT_AND_EXPR, use_rhs, and_mask); gsi = gsi_for_stmt (use_nop_stmt); @@ -3663,6 +3665,8 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, gsi_insert_after (&gsi, g, GSI_NEW_STMT); gsi = gsi_for_stmt (use_nop_stmt); gsi_remove (&gsi, true); + gsi = gsi_for_stmt (use_stmt_removal); + gsi_remove (&gsi, true); } } else
[gcc r15-2036] arm: Fix the expected output of the test pr111235.c [PR115894]
https://gcc.gnu.org/g:60ba989220d9dec07d82009b0dafe684e652577f commit r15-2036-g60ba989220d9dec07d82009b0dafe684e652577f Author: Surya Kumari Jangala Date: Mon Jul 15 00:03:06 2024 -0500 arm: Fix the expected output of the test pr111235.c [PR115894] With r15-1619-g3b9b8d6cfdf593, pr111235.c fails due to different registers used in ldrexd instruction. The key part of this test is that the compiler generates LDREXD. The registers used for that are pretty much irrelevant as they are not matched with any other operations within the test. This patch changes the test to test only for the mnemonic and not for any of the operands. 2024-07-15 Surya Kumari Jangala gcc/testsuite: PR testsuite/115894 * gcc.target/arm/pr111235.c: Update expected output. Diff: --- gcc/testsuite/gcc.target/arm/pr111235.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/arm/pr111235.c b/gcc/testsuite/gcc.target/arm/pr111235.c index b06a5bfb8e29..1f732cab983a 100644 --- a/gcc/testsuite/gcc.target/arm/pr111235.c +++ b/gcc/testsuite/gcc.target/arm/pr111235.c @@ -31,7 +31,7 @@ void t3 (long long *p, int x) atomic_store_explicit (p, x, memory_order_relaxed); } -/* { dg-final { scan-assembler-times "ldrexd\tr\[0-9\]+, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "ldrexd\t" 2 } } */ /* { dg-final { scan-assembler-not "ldrgt" } } */ /* { dg-final { scan-assembler-not "ldrdgt" } } */ /* { dg-final { scan-assembler-not "ldrexdgt" } } */