[gcc r15-2400] AVR: Propose to use attribute signal(n) via AVR-LibC's ISR_N.
https://gcc.gnu.org/g:922083693136be9516b9f916fd5139f419f8 commit r15-2400-g922083693136be9516b9f916fd5139f419f8 Author: Georg-Johann Lay Date: Tue Jul 30 09:16:02 2024 +0200 AVR: Propose to use attribute signal(n) via AVR-LibC's ISR_N. gcc/ * doc/extend.texi (AVR Function Attributes): Propose to use attribute signal(n) via AVR-LibC's ISR_N from avr/interrupt.h Diff: --- gcc/doc/extend.texi | 30 +++--- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 927aa24ab635..48b27ff9f390 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -5147,22 +5147,38 @@ the attribute, rather than providing the ISR name itself as the function name: @example __attribute__((signal(1))) -void my_handler (void) +static void my_handler (void) @{ // Code for __vector_1 @} +@end example -#include +Notice that the handler function needs not to be externally visible. +The recommended way to use these attributes is by means of the +@code{ISR_N} macro provided by @code{avr/interrupt.h} from +@w{@uref{https://www.nongnu.org/avr-libc/user-manual/group__avr__interrupts.html,,AVR-LibC}}: + +@example +#include -__attribute__((__signal__(PCINT0_vect_num, PCINT1_vect_num))) -static void my_pcint0_1_handler (void) +ISR_N (PCINT0_vect_num) +static void my_pcint0_handler (void) @{ - // Code for PCINT0 and PCINT1 (__vector_3 and __vector_4 - // on ATmega328). + // Code +@} + +ISR_N (ADC_vect_num, ISR_NOBLOCK) +static void my_adc_handler (void) +@{ +// Code @} @end example -Notice that the handler function needs not to be externally visible. +@code{ISR_N} can be specified more than once, in which case several +interrupt vectors are pointing to the same handler function. This +is similar to the @code{ISR_ALIASOF} macro provided by AVR-LibC, but +without the overhead introduced by @code{ISR_ALIASOF}. + @cindex @code{noblock} function attribute, AVR @item noblock
[gcc r14-10523] x86: Don't enable APX_F in 32-bit mode
https://gcc.gnu.org/g:ee6c5afec36aee14d2244a37a833ef7c2d16ab88 commit r14-10523-gee6c5afec36aee14d2244a37a833ef7c2d16ab88 Author: Lingling Kong Date: Wed Jul 24 14:52:47 2024 +0800 x86: Don't enable APX_F in 32-bit mode gcc/ChangeLog: PR target/115978 * config/i386/driver-i386.cc (host_detect_local_cpu): Enable APX_F only for 64-bit codegen. * config/i386/i386-options.cc (DEF_PTA): Skip PTA_APX_F if not in 64-bit mode. gcc/testsuite/ChangeLog: PR target/115978 * gcc.target/i386/pr115978-1.c: New test. * gcc.target/i386/pr115978-2.c: Ditto. Diff: --- gcc/config/i386/driver-i386.cc | 3 ++- gcc/config/i386/i386-options.cc| 3 ++- gcc/testsuite/gcc.target/i386/pr115978-1.c | 22 ++ gcc/testsuite/gcc.target/i386/pr115978-2.c | 6 ++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index bb53af4b2039..695d8e6cdf14 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -893,7 +893,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (has_feature (isa_names_table[i].feature)) { if (codegen_x86_64 - || isa_names_table[i].feature != FEATURE_UINTR) + || (isa_names_table[i].feature != FEATURE_UINTR + && isa_names_table[i].feature != FEATURE_APX_F)) options = concat (options, " ", isa_names_table[i].option, NULL); } diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index af450dba73dd..6c212a8edeb9 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2385,7 +2385,8 @@ ix86_option_override_internal (bool main_args_p, #define DEF_PTA(NAME) \ if (((processor_alias_table[i].flags & PTA_ ## NAME) != 0) \ && PTA_ ## NAME != PTA_64BIT \ - && (TARGET_64BIT || PTA_ ## NAME != PTA_UINTR) \ + && (TARGET_64BIT || (PTA_ ## NAME != PTA_UINTR \ +&& PTA_ ## NAME != PTA_APX_F))\ && !TARGET_EXPLICIT_ ## NAME ## _P (opts)) \ SET_TARGET_ ## NAME (opts); #include "i386-isa.def" diff --git a/gcc/testsuite/gcc.target/i386/pr115978-1.c b/gcc/testsuite/gcc.target/i386/pr115978-1.c new file mode 100644 index ..18a1c5f153a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115978-1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=native" } */ + +int +main () +{ + if (__builtin_cpu_supports ("apxf")) +{ +#ifdef __x86_64__ +# ifndef __APX_F__ + __builtin_abort (); +# endif +#else +# ifdef __APX_F__ + __builtin_abort (); +# endif +#endif + return 0; +} + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr115978-2.c b/gcc/testsuite/gcc.target/i386/pr115978-2.c new file mode 100644 index ..900d6eb096ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115978-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=native -mno-apxf" } */ + +#ifdef __APX_F__ +# error APX_F should be disabled +#endif
[gcc r15-2401] gfortran.dg/compiler-directive_2.f: Update dg-error
https://gcc.gnu.org/g:15158a8853a69f27a3c14644f35a93029dea6a84 commit r15-2401-g15158a8853a69f27a3c14644f35a93029dea6a84 Author: Tobias Burnus Date: Tue Jul 30 10:18:06 2024 +0200 gfortran.dg/compiler-directive_2.f: Update dg-error This is a fallout of commit r15-2378-g29b1587e7d3466 OpenMP/Fortran: Fix handling of 'declare target' with 'link' clause [PR115559] where the '!GCC$' attributes were added in reverse order. Result: The error diagnostic for the stdcall/fastcall was reversed. Solution: Swap the order in dg-error. gcc/testsuite/ChangeLog: * gfortran.dg/compiler-directive_2.f: Update dg-error. Diff: --- gcc/testsuite/gfortran.dg/compiler-directive_2.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gfortran.dg/compiler-directive_2.f b/gcc/testsuite/gfortran.dg/compiler-directive_2.f index 79169a896ae6..c31349f3348c 100644 --- a/gcc/testsuite/gfortran.dg/compiler-directive_2.f +++ b/gcc/testsuite/gfortran.dg/compiler-directive_2.f @@ -5,6 +5,6 @@ ! Check for calling convention consitency ! in procedure-pointer assignments. ! - subroutine test() ! { dg-error "fastcall and stdcall attributes are not compatible" } + subroutine test() ! { dg-error "stdcall and fastcall attributes are not compatible" } cGCC$ attributes stdcall, fastcall::test end subroutine test
[gcc/devel/omp/gcc-14] gfortran.dg/compiler-directive_2.f: Update dg-error
https://gcc.gnu.org/g:4d76fe46875b14296ffc675b9580cb3c543d2310 commit 4d76fe46875b14296ffc675b9580cb3c543d2310 Author: Tobias Burnus Date: Tue Jul 30 10:35:11 2024 +0200 gfortran.dg/compiler-directive_2.f: Update dg-error This is a fallout of commit r15-2378-g29b1587e7d3466 OpenMP/Fortran: Fix handling of 'declare target' with 'link' clause [PR115559] where the '!GCC$' attributes were added in reverse order. Result: The error diagnostic for the stdcall/fastcall was reversed. Solution: Swap the order in dg-error. gcc/testsuite/ChangeLog: * gfortran.dg/compiler-directive_2.f: Update dg-error. (cherry picked from commit 15158a8853a69f27a3c14644f35a93029dea6a84) Diff: --- gcc/testsuite/ChangeLog.omp | 7 +++ gcc/testsuite/gfortran.dg/compiler-directive_2.f | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index 055730cbdebb..3df8cf8b3b87 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -1,3 +1,10 @@ +2024-07-30 Tobias Burnus + + Backported from master: + 2024-07-30 Tobias Burnus + + * gfortran.dg/compiler-directive_2.f: Update dg-error. + 2024-05-28 Tobias Burnus * c-c++-common/gomp/lastprivate-conditional-1.c: Remove diff --git a/gcc/testsuite/gfortran.dg/compiler-directive_2.f b/gcc/testsuite/gfortran.dg/compiler-directive_2.f index 79169a896ae6..c31349f3348c 100644 --- a/gcc/testsuite/gfortran.dg/compiler-directive_2.f +++ b/gcc/testsuite/gfortran.dg/compiler-directive_2.f @@ -5,6 +5,6 @@ ! Check for calling convention consitency ! in procedure-pointer assignments. ! - subroutine test() ! { dg-error "fastcall and stdcall attributes are not compatible" } + subroutine test() ! { dg-error "stdcall and fastcall attributes are not compatible" } cGCC$ attributes stdcall, fastcall::test end subroutine test
[gcc r15-2402] Fix warnings for tree formats in gfc_error
https://gcc.gnu.org/g:0450a143d2d132a8b3e6cff896f69e191c3316e2 commit r15-2402-g0450a143d2d132a8b3e6cff896f69e191c3316e2 Author: Paul-Antoine Arras Date: Fri Jun 28 14:14:38 2024 +0200 Fix warnings for tree formats in gfc_error This enables proper warnings for formats like %qD. gcc/c-family/ChangeLog: * c-format.cc (gcc_gfc_char_table): Add formats for tree objects. Diff: --- gcc/c-family/c-format.cc | 4 1 file changed, 4 insertions(+) diff --git a/gcc/c-family/c-format.cc b/gcc/c-family/c-format.cc index 07b91a1c7a1d..7614f1e97ead 100644 --- a/gcc/c-family/c-format.cc +++ b/gcc/c-family/c-format.cc @@ -850,6 +850,10 @@ static const format_char_info gcc_gfc_char_table[] = /* This will require a "locus" at runtime. */ { "L", 0, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "", "R", NULL }, + /* These will require a "tree" at runtime. */ + { "DFTV", 1, STD_C89, { T89_T, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q+", "'", NULL }, + { "E", 1, STD_C89, { T89_T, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q+", "", NULL }, + /* These will require nothing. */ { "<>",0, STD_C89, NOARGUMENTS, "", "", NULL }, { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
[gcc] Created branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 'refs/users'
The branch 'mikael/heads/inline_minmaxloc_without_dim_v06' was created in namespace 'refs/users' pointing to: b2a5e99f9fb7... fortran: Continue MINLOC/MAXLOC second loop where the first
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]
https://gcc.gnu.org/g:c1eb87cb9470518cf499765fd72c8810f943c239 commit c1eb87cb9470518cf499765fd72c8810f943c239 Author: Mikael Morin Date: Thu Jul 25 18:04:13 2024 +0200 fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608] Add the tests covering the various cases for which we are about to implement inline expansion of MINLOC and MAXLOC. Those are cases where the DIM argument is not present. PR fortran/90608 gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_7.f90: New test. * gfortran.dg/maxloc_with_mask_1.f90: New test. * gfortran.dg/minloc_8.f90: New test. * gfortran.dg/minloc_with_mask_1.f90: New test. Diff: --- gcc/testsuite/gfortran.dg/maxloc_7.f90 | 220 + gcc/testsuite/gfortran.dg/maxloc_with_mask_1.f90 | 393 +++ gcc/testsuite/gfortran.dg/minloc_8.f90 | 220 + gcc/testsuite/gfortran.dg/minloc_with_mask_1.f90 | 392 ++ 4 files changed, 1225 insertions(+) diff --git a/gcc/testsuite/gfortran.dg/maxloc_7.f90 b/gcc/testsuite/gfortran.dg/maxloc_7.f90 new file mode 100644 index ..a875083052a9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/maxloc_7.f90 @@ -0,0 +1,220 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check the correct behaviour of the inline maxloc implementation, +! when there is no optional argument. + +program p + implicit none + integer, parameter :: data5(*) = (/ 1, 7, 2, 7, 0 /) + integer, parameter :: data64(*) = (/ 2, 5, 4, 6, 0, 9, 3, 5, & + 4, 4, 1, 7, 3, 2, 1, 2, & + 5, 4, 6, 0, 9, 3, 5, 4, & + 4, 1, 7, 3, 2, 1, 2, 5, & + 4, 6, 0, 9, 3, 5, 4, 4, & + 1, 7, 3, 2, 1, 2, 5, 4, & + 6, 0, 9, 3, 5, 4, 4, 1, & + 7, 3, 2, 1, 2, 5, 4, 6 /) + call check_int_const_shape_rank_1 + call check_int_const_shape_rank_3 + call check_int_const_shape_empty_4 + call check_int_alloc_rank_1 + call check_int_alloc_rank_3 + call check_int_alloc_empty_4 + call check_real_const_shape_rank_1 + call check_real_const_shape_rank_3 + call check_real_const_shape_empty_4 + call check_real_alloc_rank_1 + call check_real_alloc_rank_3 + call check_real_alloc_empty_4 + call check_int_lower_bounds + call check_real_lower_bounds + call check_all_nans + call check_dependencies +contains + subroutine check_int_const_shape_rank_1() +integer :: a(5) +integer, allocatable :: m(:) +a = data5 +m = maxloc(a) +if (size(m, dim=1) /= 1) stop 11 +if (any(m /= (/ 2 /))) stop 12 + end subroutine + subroutine check_int_const_shape_rank_3() +integer :: a(4,4,4) +integer, allocatable :: m(:) +a = reshape(data64, shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 3) stop 21 +if (any(m /= (/ 2, 2, 1 /))) stop 22 + end subroutine + subroutine check_int_const_shape_empty_4() +integer :: a(9,3,0,7) +integer, allocatable :: m(:) +a = reshape((/ integer:: /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 4) stop 31 +if (any(m /= (/ 0, 0, 0, 0 /))) stop 32 + end subroutine + subroutine check_int_alloc_rank_1() +integer, allocatable :: a(:) +integer, allocatable :: m(:) +allocate(a(5)) +a(:) = data5 +m = maxloc(a) +if (size(m, dim=1) /= 1) stop 41 +if (any(m /= (/ 2 /))) stop 42 + end subroutine + subroutine check_int_alloc_rank_3() +integer, allocatable :: a(:,:,:) +integer, allocatable :: m(:) +allocate(a(4,4,4)) +a(:,:,:) = reshape(data64, shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 3) stop 51 +if (any(m /= (/ 2, 2, 1 /))) stop 52 + end subroutine + subroutine check_int_alloc_empty_4() +integer, allocatable :: a(:,:,:,:) +integer, allocatable :: m(:) +allocate(a(9,3,0,7)) +a(:,:,:,:) = reshape((/ integer:: /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 4) stop 61 +if (any(m /= (/ 0, 0, 0, 0 /))) stop 62 + end subroutine + subroutine check_real_const_shape_rank_1() +real :: a(5) +integer, allocatable :: m(:) +a = (/ real:: data5 /) +m = maxloc(a) +if (size(m, dim=1) /= 1) stop 71 +if (any(m /= (/ 2 /))) stop 72 + end subroutine + subroutine check_real_const_shape_rank_3() +real :: a(4,4,4) +integer, allocatable :: m(:) +a = reshape((/ real:: data64 /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 3) stop 81 +if (any(m /= (/ 2, 2, 1 /))) stop 82 + end subroutine + subroutine check_real_const_shape_empty_4() +real :: a(9,3,0,7) +integer, allocatable :: m(:) +a = reshape((/ real:: /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 4) stop 91 +if (any(m /= (/ 0, 0, 0, 0 /))) stop 92 + end subroutine
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined
https://gcc.gnu.org/g:0c6f911ae6433ab0c3cc668083bd45aee52723bf commit 0c6f911ae6433ab0c3cc668083bd45aee52723bf Author: Mikael Morin Date: Wed Nov 15 10:23:32 2023 +0100 fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined Disable rewriting of MINLOC/MAXLOC expressions for which inline code generation is supported. Update the gfc_inline_intrinsic_function_p predicate (already existing) for that, with the current state of MINLOC/MAXLOC inlining support, that is only the cases of a scalar result and non-CHARACTER argument for now. This change has no effect currently, as the MINLOC/MAXLOC front-end passes only change expressions of rank 1, but the inlining control predicate gfc_inline_intrinsic_function_p returns false for those. However, later changes will extend MINLOC/MAXLOC inline expansion support to array expressions and update the inlining control predicate, and this will become effective. gcc/fortran/ChangeLog: * frontend-passes.cc (optimize_minmaxloc): Skip if we can generate inline code for the unmodified expression. * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Add MINLOC and MAXLOC cases. Diff: --- gcc/fortran/frontend-passes.cc | 3 ++- gcc/fortran/trans-intrinsic.cc | 23 +++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc index 3c06018fdbbf..8e4c6310ba8d 100644 --- a/gcc/fortran/frontend-passes.cc +++ b/gcc/fortran/frontend-passes.cc @@ -2277,7 +2277,8 @@ optimize_minmaxloc (gfc_expr **e) || fn->value.function.actual == NULL || fn->value.function.actual->expr == NULL || fn->value.function.actual->expr->ts.type == BT_CHARACTER - || fn->value.function.actual->expr->rank != 1) + || fn->value.function.actual->expr->rank != 1 + || gfc_inline_intrinsic_function_p (fn)) return; *e = gfc_get_array_expr (fn->ts.type, fn->ts.kind, &fn->where); diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 9f3c3ce47bc5..cc0d00f4e399 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -11650,6 +11650,29 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr) case GFC_ISYM_TRANSPOSE: return true; +case GFC_ISYM_MINLOC: +case GFC_ISYM_MAXLOC: + { + /* Disable inline expansion if code size matters. */ + if (optimize_size) + return false; + + gfc_actual_arglist *array_arg = expr->value.function.actual; + gfc_actual_arglist *dim_arg = array_arg->next; + + gfc_expr *array = array_arg->expr; + gfc_expr *dim = dim_arg->expr; + + if (!(array->ts.type == BT_INTEGER + || array->ts.type == BT_REAL)) + return false; + + if (array->rank == 1 && dim != nullptr) + return true; + + return false; + } + default: return false; }
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC.
https://gcc.gnu.org/g:826cbd802b72c2a5d450493f86e7ee0106432282 commit 826cbd802b72c2a5d450493f86e7ee0106432282 Author: Mikael Morin Date: Mon Jul 22 13:27:24 2024 +0200 fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC. Hello, this fixes a null pointer dereference with absent optional dummy passed as BACK argument of MINLOC/MAXLOC. Tested for regression on x86_64-linux. OK for master? -- >8 -- Protect the evaluation of BACK with a check that the reference is non-null in case the expression is an optional dummy, in the inline code generated for MINLOC and MAXLOC. This change contains a revert of the non-testsuite part of commit r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3, which factored the evaluation of BACK out of the loop using the scalarizer. It was a bad idea, because delegating the argument evaluation to the scalarizer makes it cumbersome to add a null pointer check next to the evaluation. Instead, evaluate BACK at the beginning, before scalarization, add a check that the argument is present if necessary, and evaluate the resulting expression to a variable, before using the variable in the inline code. gcc/fortran/ChangeLog: * trans-intrinsic.cc (maybe_absent_optional_variable): New function. (gfc_conv_intrinsic_minmaxloc): Remove BACK from scalarization and evaluate it before. Add a check that BACK is not null if the expression is an optional dummy. Save the resulting expression to a variable. Use the variable in the generated inline code. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_6.f90: New test. * gfortran.dg/minloc_7.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 81 ++-- gcc/testsuite/gfortran.dg/maxloc_6.f90 | 366 + gcc/testsuite/gfortran.dg/minloc_7.f90 | 366 + 3 files changed, 799 insertions(+), 14 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 180d0d7a88c6..9f3c3ce47bc5 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5209,6 +5209,50 @@ gfc_conv_intrinsic_dot_product (gfc_se * se, gfc_expr * expr) } +/* Tells whether the expression E is a reference to an optional variable whose + presence is not known at compile time. Those are variable references without + subreference; if there is a subreference, we can assume the variable is + present. We have to special case full arrays, which we represent with a fake + "full" reference, and class descriptors for which a reference to data is not + really a subreference. */ + +bool +maybe_absent_optional_variable (gfc_expr *e) +{ + if (!(e && e->expr_type == EXPR_VARIABLE)) +return false; + + gfc_symbol *sym = e->symtree->n.sym; + if (!sym->attr.optional) +return false; + + gfc_ref *ref = e->ref; + if (ref == nullptr) +return true; + + if (ref->type == REF_ARRAY + && ref->u.ar.type == AR_FULL + && ref->next == nullptr) +return true; + + if (!(sym->ts.type == BT_CLASS + && ref->type == REF_COMPONENT + && ref->u.c.component == CLASS_DATA (sym))) +return false; + + gfc_ref *next_ref = ref->next; + if (next_ref == nullptr) +return true; + + if (next_ref->type == REF_ARRAY + && next_ref->u.ar.type == AR_FULL + && next_ref->next == nullptr) +return true; + + return false; +} + + /* Remove unneeded kind= argument from actual argument list when the result conversion is dealt with in a different place. */ @@ -5321,11 +5365,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) tree nonempty; tree lab1, lab2; tree b_if, b_else; + tree back; gfc_loopinfo loop; gfc_actual_arglist *actual; gfc_ss *arrayss; gfc_ss *maskss; - gfc_ss *backss; gfc_se arrayse; gfc_se maskse; gfc_expr *arrayexpr; @@ -5391,10 +5435,27 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) && maskexpr->symtree->n.sym->attr.dummy && maskexpr->symtree->n.sym->attr.optional; backexpr = actual->next->next->expr; - if (backexpr) -backss = gfc_get_scalar_ss (gfc_ss_terminator, backexpr); + + gfc_init_se (&backse, NULL); + if (backexpr == nullptr) +back = logical_false_node; + else if (maybe_absent_optional_variable (backexpr)) +{ + gcc_assert (backexpr->expr_type == EXPR_VARIABLE); + + gfc_conv_expr (&backse, backexpr); + tree present = gfc_conv_expr_present (backexpr->symtree->n.sym, false); + back = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, + logical_type_node, present, backse.expr); +} else -backss = nullptr; +{ + gfc_conv_expr (&backse, backexpr); + back = backse.expr; +
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]
https://gcc.gnu.org/g:243591a6c8b99b1a337fd79643a27a9b287a2fed commit 243591a6c8b99b1a337fd79643a27a9b287a2fed Author: Mikael Morin Date: Tue Jul 9 21:05:40 2024 +0200 fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608] Enable inline code generation for the MINLOC and MAXLOC intrinsic, if the DIM argument is not present and ARRAY has rank 1. This case is similar to the case where the result is scalar (DIM present and rank 1 ARRAY), which already supports inline expansion of the intrinsic. Both cases return the same value, with the difference that the result is an array of size 1 if DIM is absent, whereas it's a scalar if DIM is present. So all there is to do for this case to work is hook the inline expansion with the scalarizer. PR fortran/90608 gcc/fortran/ChangeLog: * trans-array.cc (gfc_conv_ss_startstride): Set the scalarization rank based on the MINLOC/MAXLOC rank if needed. Call the inline code generation and setup the scalarizer array descriptor info in the MINLOC and MAXLOC cases. * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Return the result array element if the scalarizer is setup and we are inside the loop. Restrict library function call dispatch to the case where inline expansion is not supported. Declare an array result if the expression isn't scalar. Initialize the array result single element and return the result variable if the expression isn't scalar. (walk_inline_intrinsic_minmaxloc): New function. (walk_inline_intrinsic_function): Add MINLOC and MAXLOC cases, dispatching to walk_inline_intrinsic_minmaxloc. (gfc_add_intrinsic_ss_code): Add MINLOC and MAXLOC cases. (gfc_inline_intrinsic_function_p): Return true if ARRAY has rank 1, regardless of DIM. Diff: --- gcc/fortran/trans-array.cc | 25 ++ gcc/fortran/trans-intrinsic.cc | 196 +++-- 2 files changed, 155 insertions(+), 66 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index c93a5f1e7543..0c78e1fecd8f 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -4771,6 +4771,8 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) case GFC_ISYM_UBOUND: case GFC_ISYM_LCOBOUND: case GFC_ISYM_UCOBOUND: + case GFC_ISYM_MAXLOC: + case GFC_ISYM_MINLOC: case GFC_ISYM_SHAPE: case GFC_ISYM_THIS_IMAGE: loop->dimen = ss->dimen; @@ -4820,6 +4822,29 @@ done: case GFC_SS_INTRINSIC: switch (expr->value.function.isym->id) { + case GFC_ISYM_MINLOC: + case GFC_ISYM_MAXLOC: + { + gfc_se se; + gfc_init_se (&se, nullptr); + se.loop = loop; + se.ss = ss; + gfc_conv_intrinsic_function (&se, expr); + gfc_add_block_to_block (&outer_loop->pre, &se.pre); + gfc_add_block_to_block (&outer_loop->post, &se.post); + + info->descriptor = se.expr; + + info->data = gfc_conv_array_data (info->descriptor); + info->data = gfc_evaluate_now (info->data, &outer_loop->pre); + + info->offset = gfc_index_zero_node; + info->start[0] = gfc_index_zero_node; + info->end[0] = gfc_index_zero_node; + info->stride[0] = gfc_index_one_node; + continue; + } + /* Fall through to supply start and stride. */ case GFC_ISYM_LBOUND: case GFC_ISYM_UBOUND: diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index cc0d00f4e399..7b7d0102b86a 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5273,66 +5273,69 @@ strip_kind_from_actual (gfc_actual_arglist * actual) we need to handle. For performance reasons we sometimes create two loops instead of one, where the second one is much simpler. Examples for minloc intrinsic: - 1) Result is an array, a call is generated - 2) Array mask is used and NaNs need to be supported: - limit = Infinity; - pos = 0; - S = from; - while (S <= to) { - if (mask[S]) { - if (pos == 0) pos = S + (1 - from); - if (a[S] <= limit) { limit = a[S]; pos = S + (1 - from); goto lab1; } - } - S++; - } - goto lab2; - lab1:; - while (S <= to) { - if (mask[S]) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); } - S++; - } - lab2:; - 3) NaNs need to be supported, but it is known at compile time or cheaply - at runtime whether array is nonempty or not: - limit = Infinity; - pos = 0; -
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Outline array bound check generation code
https://gcc.gnu.org/g:0090c54560c24fead1245245626fe7afe6339373 commit 0090c54560c24fead1245245626fe7afe6339373 Author: Mikael Morin Date: Wed Apr 10 21:18:03 2024 +0200 fortran: Outline array bound check generation code The next patch will need reindenting of the array bound check generation code. This outlines it to its own function beforehand, reducing the churn in the next patch. -- >8 -- gcc/fortran/ChangeLog: * trans-array.cc (gfc_conv_ss_startstride): Move array bound check generation code... (add_check_section_in_array_bounds): ... here as a new function. Diff: --- gcc/fortran/trans-array.cc | 297 ++--- 1 file changed, 143 insertions(+), 154 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 0c78e1fecd8f..99a603a3afb2 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -4736,6 +4736,146 @@ gfc_conv_section_startstride (stmtblock_t * block, gfc_ss * ss, int dim) } +/* Generate in INNER the bounds checking code along the dimension DIM for + the array associated with SS_INFO. */ + +static void +add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info, + int dim) +{ + gfc_expr *expr = ss_info->expr; + locus *expr_loc = &expr->where; + const char *expr_name = expr->symtree->name; + + gfc_array_info *info = &ss_info->data.array; + + bool check_upper; + if (dim == info->ref->u.ar.dimen - 1 + && info->ref->u.ar.as->type == AS_ASSUMED_SIZE) +check_upper = false; + else +check_upper = true; + + /* Zero stride is not allowed. */ + tree tmp = fold_build2_loc (input_location, EQ_EXPR, logical_type_node, + info->stride[dim], gfc_index_zero_node); + char * msg = xasprintf ("Zero stride is not allowed, for dimension %d " + "of array '%s'", dim + 1, expr_name); + gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg); + free (msg); + + tree desc = info->descriptor; + + /* This is the run-time equivalent of resolve.cc's + check_dimension. The logical is more readable there + than it is here, with all the trees. */ + tree lbound = gfc_conv_array_lbound (desc, dim); + tree end = info->end[dim]; + tree ubound = check_upper ? gfc_conv_array_ubound (desc, dim) : NULL_TREE; + + /* non_zerosized is true when the selected range is not + empty. */ + tree stride_pos = fold_build2_loc (input_location, GT_EXPR, logical_type_node, +info->stride[dim], gfc_index_zero_node); + tmp = fold_build2_loc (input_location, LE_EXPR, logical_type_node, +info->start[dim], end); + stride_pos = fold_build2_loc (input_location, TRUTH_AND_EXPR, + logical_type_node, stride_pos, tmp); + + tree stride_neg = fold_build2_loc (input_location, LT_EXPR, logical_type_node, +info->stride[dim], gfc_index_zero_node); + tmp = fold_build2_loc (input_location, GE_EXPR, logical_type_node, +info->start[dim], end); + stride_neg = fold_build2_loc (input_location, TRUTH_AND_EXPR, + logical_type_node, stride_neg, tmp); + tree non_zerosized = fold_build2_loc (input_location, TRUTH_OR_EXPR, + logical_type_node, stride_pos, + stride_neg); + + /* Check the start of the range against the lower and upper + bounds of the array, if the range is not empty. + If upper bound is present, include both bounds in the + error message. */ + if (check_upper) +{ + tmp = fold_build2_loc (input_location, LT_EXPR, logical_type_node, +info->start[dim], lbound); + tmp = fold_build2_loc (input_location, TRUTH_AND_EXPR, logical_type_node, +non_zerosized, tmp); + tree tmp2 = fold_build2_loc (input_location, GT_EXPR, logical_type_node, + info->start[dim], ubound); + tmp2 = fold_build2_loc (input_location, TRUTH_AND_EXPR, logical_type_node, + non_zerosized, tmp2); + msg = xasprintf ("Index '%%ld' of dimension %d of array '%s' outside of " + "expected range (%%ld:%%ld)", dim + 1, expr_name); + gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg, + fold_convert (long_integer_type_node, info->start[dim]), + fold_convert (long_integer_type_node, lbound), + fold_convert (long_integer_type_node, ubound)); + gfc_trans_runtime_check (true, false, tmp2, inner, expr_loc, msg, + fold_convert (long_integer_type_node, info->start[dim]), + fold_convert (long_integer_type_node, lbound), + fold_convert (long_integer_type_node, ubound)); + free (
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]
https://gcc.gnu.org/g:31aa4cd8489361a61cbf1f92327934bcc860a2f2 commit 31aa4cd8489361a61cbf1f92327934bcc860a2f2 Author: Mikael Morin Date: Thu Nov 16 22:14:41 2023 +0100 fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608] Enable generation of inline code for the MINLOC and MAXLOC intrinsic, if the ARRAY argument is of integral type and of any rank (only the rank 1 case was previously inlined), and neither DIM nor MASK arguments are present. This needs a few adjustments in gfc_conv_intrinsic_minmaxloc, mainly to replace the single variables POS and OFFSET, with collections of variables, one variable per dimension each. The restriction to integral ARRAY and absent MASK limits the scope of the change to the cases where we generate single loop inline code. The code generation for the second loop is only accessible with ARRAY of rank 1, so it can continue using a single variable. A later change will extend inlining to the double loop cases. There is some bounds checking code that was previously handled by the library, and that needed some changes in the scalarizer to avoid regressing. The bounds check code generation was already by the scalarizer, but it was only applying to array reference sections, checking both individual array bounds and shape conformability between all the array involved. For MINLOC or MAXLOC, enable the conformability check between all the scalarized arrays, and disable the check that the array reference is within its bounds. PR fortran/90608 gcc/fortran/ChangeLog: * trans-array.cc (gfc_conv_ss_startstride): Set the MINLOC/MAXLOC result upper bound using the ARRAY argument rank. Ajdust the error message for intrinsic result arrays. Only check array bounds for array references. Move bound check decision code... (bounds_check_needed): ... here as a new predicate. Allow bound check for MINLOC/MAXLOC intrinsic results. * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Change the result array upper bound to the rank of ARRAY. Update the NONEMPTY variable to depend on the non-empty extent of every dimension. Use one variable per dimension instead of a single variable for the position and the offset. Update their declaration, initialization, and update to affect the variable of each dimension. Use the first variable only in areas only accessed with rank 1 ARRAY argument. Set every element of the result using its corresponding variable. (gfc_inline_intrinsic_function_p): Return true for integral ARRAY and absent DIM and MASK. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_4.f90: Additionally accept the error message emitted by the scalarizer. Diff: --- gcc/fortran/trans-array.cc| 70 +--- gcc/fortran/trans-intrinsic.cc| 148 +++--- gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 | 4 +- 3 files changed, 165 insertions(+), 57 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 99a603a3afb2..c9d63d13509d 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -4876,6 +4876,35 @@ add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info, } +/* Tells whether we need to generate bounds checking code for the array + associated with SS. */ + +bool +bounds_check_needed (gfc_ss *ss) +{ + /* Catch allocatable lhs in f2003. */ + if (flag_realloc_lhs && ss->no_bounds_check) +return false; + + gfc_ss_info *ss_info = ss->info; + if (ss_info->type == GFC_SS_SECTION) +return true; + + if (!(ss_info->type == GFC_SS_INTRINSIC + && ss_info->expr + && ss_info->expr->expr_type == EXPR_FUNCTION)) +return false; + + gfc_intrinsic_sym *isym = ss_info->expr->value.function.isym; + if (!(isym + && (isym->id == GFC_ISYM_MAXLOC + || isym->id == GFC_ISYM_MINLOC))) +return false; + + return gfc_inline_intrinsic_function_p (ss_info->expr); +} + + /* Calculates the range start and stride for a SS chain. Also gets the descriptor and data pointer. The range of vector subscripts is the size of the vector. Array bounds are also checked. */ @@ -4977,10 +5006,19 @@ done: info->data = gfc_conv_array_data (info->descriptor); info->data = gfc_evaluate_now (info->data, &outer_loop->pre); - info->offset = gfc_index_zero_node; + gfc_expr *array = expr->value.function.actual->expr; + tree rank = build_int_cst (gfc_array_index_type, array->rank); + + tree tmp = fold_build2_loc (input_location, MINUS_EXPR, +
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]
https://gcc.gnu.org/g:26fc4fb4228dc6584ee9153498cc85a16a5ec822 commit 26fc4fb4228dc6584ee9153498cc85a16a5ec822 Author: Mikael Morin Date: Fri Nov 17 15:40:55 2023 +0100 fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608] Enable the generation of inline code for MINLOC/MAXLOC when argument ARRAY is of integral type, DIM is not present, and MASK is present and is scalar (only absent MASK or rank 1 ARRAY were inlined before). Scalar masks are implemented with a wrapping condition around the code one would generate if MASK wasn't present, so they are easy to support once inline code without MASK is working. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate variable initialization for each dimension in the else branch of the toplevel condition. (gfc_inline_intrinsic_function_p): Return TRUE for scalar MASK. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_7.f90: Additionally accept the error message reported by the scalarizer. Diff: --- gcc/fortran/trans-intrinsic.cc| 13 - gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index ac8bd2d48123..855208717973 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5886,7 +5886,6 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) /* For a scalar mask, enclose the loop in an if statement. */ if (maskexpr && maskss == NULL) { - gcc_assert (loop.dimen == 1); tree ifmask; gfc_init_se (&maskse, NULL); @@ -5901,7 +5900,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) the pos variable the same way as above. */ gfc_init_block (&elseblock); - gfc_add_modify (&elseblock, pos[0], gfc_index_zero_node); + for (int i = 0; i < loop.dimen; i++) + gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node); elsetmp = gfc_finish_block (&elseblock); ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask); tmp = build3_v (COND_EXPR, ifmask, tmp, elsetmp); @@ -11795,9 +11795,12 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr) if (array->rank == 1) return true; - if (array->ts.type == BT_INTEGER - && dim == nullptr - && mask == nullptr) + if (array->ts.type != BT_INTEGER + || dim != nullptr) + return false; + + if (mask == nullptr + || mask->rank == 0) return true; return false; diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 index 206a29b149da..3aa9d3dcebee 100644 --- a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 +++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 @@ -1,6 +1,6 @@ ! { dg-do run } ! { dg-options "-fbounds-check" } -! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2" } +! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." } module tst contains subroutine foo(res) @@ -18,4 +18,4 @@ program main integer :: res(3) call foo(res) end program main -! { dg-output "Fortran runtime error: Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2" } +! { dg-output "Fortran runtime error: Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608]
https://gcc.gnu.org/g:c8f1b21681c6d3e4f313044545d5426f50cb commit c8f1b21681c6d3e4f313044545d5426f50cb Author: Mikael Morin Date: Fri Nov 17 16:47:26 2023 +0100 fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608] Enable generation of inline MINLOC/MAXLOC code in the case where DIM is not present, and either ARRAY is of floating point type or MASK is an array. Those cases are the remaining bits to fully support inlining of non-CHARACTER MINLOC/MAXLOC without DIM. They are treated together because they generate similar code, the NANs for REAL types being handled a bit like a second level of masking. These are the cases for which we generate two sets of loops. This change affects the code generating the second loop, that was previously accessible only in the cases ARRAY has rank rank 1. The single variable initialization and update are changed to apply to multiple variables, one per dimension. This change generates slightly worse code if ARRAY has rank 1. Indeed the code we used to generate was: for (idx1 in lower..upper) { ... if (...) { ... break; } } for (idx2 in idx1..upper) { ... } which avoided starting the second loop from lower, skipping in the second loop the elements already processed in the first one. Unfortunately, extending that code the obvious way to apply to rank > 1 leads to wrong code: for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... goto second_loop; } } } second_loop: for (idx21 in index11..upper1) { for (idx22 in index12..upper2) { ... } } That code is incorrect, as the loop over idx22, being nested, may be run more than once, and the second run should restart from lower2, not index12. So with this change, we generate instead as second set of loops: ... second_loop: for (idx21 in lower1..upper1) { for (idx12 in lower2..upper2) { ... } } which means the second set of loops processes again elements already processed by the first one, and the rank 1 case becomes: for (idx1 in lower..upper) { ... if (...) { ... break; } } for (idx2 in lower..upper) { ... } processing the first elements twice as well, which was not the case before. A later change will avoid the duplicate processing and restore the generated code in the rank 1 case. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Initialize and update all the variables. Put the label and goto in the outermost scalarizer loop. Don't start the second loop where the first stopped. (gfc_inline_intrinsic_function_p): Also return TRUE for array MASK or for any REAL type. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_5.f90: Additionally accept error messages reported by the scalarizer. * gfortran.dg/maxloc_bounds_6.f90: Ditto. Diff: --- gcc/fortran/trans-intrinsic.cc| 127 +- gcc/testsuite/gfortran.dg/maxloc_bounds_5.f90 | 4 +- gcc/testsuite/gfortran.dg/maxloc_bounds_6.f90 | 4 +- 3 files changed, 87 insertions(+), 48 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 855208717973..bae3b49a9498 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5332,12 +5332,55 @@ strip_kind_from_actual (gfc_actual_arglist * actual) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); } S++; } - B: ARRAY has rank 1, and DIM is absent. Use the same code as the scalar - case and wrap the result in an array. - C: ARRAY has rank > 1, NANs are not supported, and DIM and MASK are absent. - Generate code similar to the single loop scalar case, but using one - variable per dimension, for example if ARRAY has rank 2: - 4) NAN's aren't supported, no MASK: + B: Array result, non-CHARACTER type, DIM
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Continue MINLOC/MAXLOC second loop where the first stopped [PR90608]
https://gcc.gnu.org/g:b2a5e99f9fb724b9838533c1eed5f4fc024be633 commit b2a5e99f9fb724b9838533c1eed5f4fc024be633 Author: Mikael Morin Date: Thu Jul 25 12:27:09 2024 +0200 fortran: Continue MINLOC/MAXLOC second loop where the first stopped [PR90608] Continue the second set of loops where the first one stopped in the generated inline MINLOC/MAXLOC code in the cases where the generated code contains two sets of loops. This fixes a regression that was introduced when enabling the generation of inline MINLOC/MAXLOC code with ARRAY of rank greater than 1, non-scalar MASK and no DIM arguments. In the cases where two sets of loops are generated as inline MINLOC/MAXLOC code, we previously generated code such as (for rank 2 ARRAY, so with two levels of nesting): for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... goto second_loop; } } } second_loop: for (idx21 in lower1..upper1) { for (idx22 in lower2..upper2) { ... } } which means we process the first elements twice, once in the first set of loops and once in the second one. This change avoids this duplicate processing by using a conditional as lower bound for the second set of loops, generating code like: second_loop_entry = false; for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... second_loop_entry = true; goto second_loop; } } } second_loop: for (idx21 in (second_loop_entry ? idx11 : lower1)..upper1) { for (idx22 in (second_loop_entry ? idx12 : lower2)..upper2) { ... second_loop_entry = false; } } It was expected that the compiler optimizations would be able to remove the state variable second_loop_entry. It is the case if ARRAY has rank 1 (so without loop nesting), the variable is removed and the loop bounds become unconditional, which restores previously generated code, fully fixing the regression. For larger rank, unfortunately, the state variable and conditional loop bounds remain, but those cases were previously using library calls, so it's not a regression. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate a set of index variables. Set them using the loop indexes before leaving the first set of loops. Generate a new loop entry predicate. Set it before leaving the first set of loops. Clear it in the body of the second set of loops. For the second set of loops, update each loop variable to use the corresponding index variable if the predicate variable is set. Diff: --- gcc/fortran/trans-intrinsic.cc | 33 +++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index bae3b49a9498..29367c69d16b 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5342,6 +5342,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) pos0 = 0; pos1 = 1 S1 = from1; +second_loop_entry = false; while (S1 <= to1) { S0 = from0; while (s0 <= to0 { @@ -5354,6 +5355,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) limit = a[S1][S0]; pos0 = S0 + (1 - from0); pos1 = S1 + (1 - from1); +second_loop_entry = true; goto lab1; } } @@ -5363,9 +5365,9 @@ strip_kind_from_actual (gfc_actual_arglist * actual) } goto lab2; lab1:; -S1 = from1; +S1 = second_loop_entry ? S1 : from1; while (S1 <= to1) { - S0 = from0; + S0 = second_loop_entry ? S0 : from0; while (S0 <= to0) { if (mask[S1][S0]) if (a[S1][S0] < limit) { @@ -5373,6 +5375,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) pos0 = S + (1 - from0); pos1 = S + (1 - from1); } +second_loop_entry = false; S0++; } S1++; @@ -5444,6 +5447,7
[gcc r15-2403] testsuite: fix dg-do run whitespace
https://gcc.gnu.org/g:136f364e26d9ad4f05e0005e480813cdc8f56c96 commit r15-2403-g136f364e26d9ad4f05e0005e480813cdc8f56c96 Author: Sam James Date: Tue Jul 30 11:08:31 2024 +0100 testsuite: fix dg-do run whitespace This caused the tests to not be run. I may do further passes for non-run next. Tested on x86_64-pc-linux-gnu and checked test logs before/after. PR c/53548 PR target/101529 PR tree-optimization/102359 * c-c++-common/fam-in-union-alone-in-struct-1.c: Fix whitespace in dg directive. * c-c++-common/fam-in-union-alone-in-struct-2.c: Likewise. * c-c++-common/torture/builtin-shufflevector-2.c: Likewise. * g++.dg/pr102359_2.C: Likewise. * g++.target/i386/mvc1.C: Likewise. Diff: --- gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c | 2 +- gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c | 2 +- gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c | 2 +- gcc/testsuite/g++.dg/pr102359_2.C| 2 +- gcc/testsuite/g++.target/i386/mvc1.C | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c index 7d4721aa95ac..39ebf17850bf 100644 --- a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c +++ b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-1.c @@ -1,6 +1,6 @@ /* testing the correct usage of flexible array members in unions and alone in structures. */ -/* { dg-do run} */ +/* { dg-do run } */ /* { dg-options "-Wpedantic" } */ union with_fam_1 { diff --git a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c index 3743f9e7dac5..93f9d5128f6e 100644 --- a/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c +++ b/gcc/testsuite/c-c++-common/fam-in-union-alone-in-struct-2.c @@ -1,6 +1,6 @@ /* testing the correct usage of flexible array members in unions and alone in structures: initialization */ -/* { dg-do run} */ +/* { dg-do run } */ /* { dg-options "-O2" } */ union with_fam_1 { diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c index b1ffc95e39ae..a84e0a626211 100644 --- a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c @@ -1,4 +1,4 @@ -/* { dg-do run} */ +/* { dg-do run } */ /* PR target/101529 */ typedef unsigned char C; typedef unsigned char __attribute__((__vector_size__ (8))) V; diff --git a/gcc/testsuite/g++.dg/pr102359_2.C b/gcc/testsuite/g++.dg/pr102359_2.C index d026d727dd5c..1b3f6147dec1 100644 --- a/gcc/testsuite/g++.dg/pr102359_2.C +++ b/gcc/testsuite/g++.dg/pr102359_2.C @@ -1,6 +1,6 @@ /* PR middle-end/102359 ICE gimplification failed since r12-3433-ga25e0b5e6ac8a77a. */ -/* { dg-do run} */ +/* { dg-do run } */ /* { dg-options "-ftrivial-auto-var-init=zero" } */ /* { dg-require-effective-target c++17 } */ diff --git a/gcc/testsuite/g++.target/i386/mvc1.C b/gcc/testsuite/g++.target/i386/mvc1.C index b307d01ace63..348bd0ec7202 100644 --- a/gcc/testsuite/g++.target/i386/mvc1.C +++ b/gcc/testsuite/g++.target/i386/mvc1.C @@ -1,4 +1,4 @@ -/* { dg-do run} */ +/* { dg-do run } */ /* { dg-require-ifunc "" } */ __attribute__((target_clones("avx","arch=slm","arch=core-avx2","default")))
[gcc r15-2404] c++: make source_location follow DECL_RAMP_FN
https://gcc.gnu.org/g:265aa32062167a5b299c2ffb616edce5997b64bf commit r15-2404-g265aa32062167a5b299c2ffb616edce5997b64bf Author: Arsen Arsenović Date: Thu Jul 25 16:13:24 2024 +0200 c++: make source_location follow DECL_RAMP_FN This fixes the value of current_function in compiler generated coroutine code. PR c++/110855 - std::source_location doesn't work with C++20 coroutine gcc/cp/ChangeLog: PR c++/110855 * cp-gimplify.cc (fold_builtin_source_location): Use the name of the DECL_RAMP_FN of the current function if present. gcc/testsuite/ChangeLog: PR c++/110855 * g++.dg/coroutines/pr110855.C: New test. Diff: --- gcc/cp/cp-gimplify.cc | 9 - gcc/testsuite/g++.dg/coroutines/pr110855.C | 61 ++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc index 6a5e4cf62ca1..b88c3b7f370b 100644 --- a/gcc/cp/cp-gimplify.cc +++ b/gcc/cp/cp-gimplify.cc @@ -3933,7 +3933,14 @@ fold_builtin_source_location (const_tree t) const char *name = ""; if (current_function_decl) - name = cxx_printable_name (current_function_decl, 2); + { + /* If this is a coroutine, we should get the name of the user +function rather than the actor we generate. */ + if (tree ramp = DECL_RAMP_FN (current_function_decl)) + name = cxx_printable_name (ramp, 2); + else + name = cxx_printable_name (current_function_decl, 2); + } val = build_string_literal (name); } diff --git a/gcc/testsuite/g++.dg/coroutines/pr110855.C b/gcc/testsuite/g++.dg/coroutines/pr110855.C new file mode 100644 index ..6b5c0147ec83 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr110855.C @@ -0,0 +1,61 @@ +// { dg-do run } +// { dg-output {^} } +// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} } +// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} } +// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} } +// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} } +// { dg-output {ReturnObject bar\(int, char, bool\)(\n|\r\n|\r)} } +// { dg-output {$} } +// https://gcc.gnu.org/PR110855 +#include +#include + +struct ReturnObject { + struct promise_type { +auto +initial_suspend(const std::source_location location = +std::source_location::current()) { + __builtin_puts (location.function_name ()); + return std::suspend_never{}; +} +auto +final_suspend(const std::source_location location = + std::source_location::current()) noexcept { + __builtin_puts (location.function_name ()); + return std::suspend_never{}; +} +auto +get_return_object(const std::source_location location = + std::source_location::current()) { + __builtin_puts (location.function_name ()); + return ReturnObject{std::coroutine_handle::from_promise(*this)}; +} +auto +unhandled_exception() { } +auto return_void(const std::source_location location = + std::source_location::current()) { + __builtin_puts (location.function_name ()); +} + }; + std::coroutine_handle<> handle; +}; + +struct awaitable : std::suspend_never +{ + void await_resume(const std::source_location location = + std::source_location::current()) + { + __builtin_puts (location.function_name ()); + } +}; + +ReturnObject +bar(int, char, bool) { + co_await awaitable{}; + co_return; +} + +int +main() { + bar(1, 'a', false); +}
[gcc r15-2405] SVE intrinsics: Add strength reduction for division by constant.
https://gcc.gnu.org/g:7cde140863edea536c676096cbc3d84a6d1424e4 commit r15-2405-g7cde140863edea536c676096cbc3d84a6d1424e4 Author: Jennifer Schmitz Date: Tue Jul 16 01:59:50 2024 -0700 SVE intrinsics: Add strength reduction for division by constant. This patch folds SVE division where all divisor elements are the same power of 2 to svasrd (signed) or svlsr (unsigned). Tests were added to check 1) whether the transform is applied (existing test harness was amended), and 2) correctness using runtime tests for all input types of svdiv; for signed and unsigned integers, several corner cases were covered. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold): Implement strength reduction. gcc/testsuite/ * gcc.target/aarch64/sve/div_const_run.c: New test. * gcc.target/aarch64/sve/acle/asm/div_s32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_s64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_u32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/div_u64.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc| 49 +++- .../gcc.target/aarch64/sve/acle/asm/div_s32.c | 273 +++-- .../gcc.target/aarch64/sve/acle/asm/div_s64.c | 273 +++-- .../gcc.target/aarch64/sve/acle/asm/div_u32.c | 201 +-- .../gcc.target/aarch64/sve/acle/asm/div_u64.c | 201 +-- .../gcc.target/aarch64/sve/div_const_run.c | 91 +++ 6 files changed, 1031 insertions(+), 57 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index a2268353ae31..d55bee0b72fa 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -746,6 +746,53 @@ public: } }; +class svdiv_impl : public rtx_code_function +{ +public: + CONSTEXPR svdiv_impl () +: rtx_code_function (DIV, UDIV, UNSPEC_COND_FDIV) {} + + gimple * + fold (gimple_folder &f) const override + { +tree divisor = gimple_call_arg (f.call, 2); +tree divisor_cst = uniform_integer_cst_p (divisor); + +if (!divisor_cst || !integer_pow2p (divisor_cst)) + return NULL; + +tree new_divisor; +gcall *call; + +if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1) + { + function_instance instance ("svlsr", functions::svlsr, + shapes::binary_uint_opt_n, MODE_n, + f.type_suffix_ids, GROUP_none, f.pred); + call = f.redirect_call (instance); + tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst; + new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d)); + } +else + { + if (tree_int_cst_sign_bit (divisor_cst) + || tree_to_shwi (divisor_cst) == 1) + return NULL; + + function_instance instance ("svasrd", functions::svasrd, + shapes::shift_right_imm, MODE_n, + f.type_suffix_ids, GROUP_none, f.pred); + call = f.redirect_call (instance); + new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t], + tree_log2 (divisor_cst)); + } + +gimple_call_set_arg (call, 2, new_divisor); +return call; + } +}; + + class svdot_impl : public function_base { public: @@ -3043,7 +3090,7 @@ FUNCTION (svcreate3, svcreate_impl, (3)) FUNCTION (svcreate4, svcreate_impl, (4)) FUNCTION (svcvt, svcvt_impl,) FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) -FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) +FUNCTION (svdiv, svdiv_impl,) FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) FUNCTION (svdot, svdot_impl,) FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c index c49ca1aa5243..d5a23bf07262 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c @@ -2,6 +2,8 @@ #include "test_sve_acle.h" +#define MAXPOW 1<<30 + /* ** div_s32_m_tied1: ** sdivz0\.s, p0/m, z0\.s, z1\.s @@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t, z0 = svdiv_n_s32_m (p0, z1, x0), z0 = svdiv_m (p0, z1, x0)) +/* +** div_1_s32_m_tied1: +** sel z0\.s, p0, z0\.s, z0\.s +** ret +*/ +TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t, + z0 = svdiv_n_s32_m (p0, z0, 1), + z0 = svdiv_m (p0, z0, 1
[gcc r15-2406] c++: fix ICE on FUNCTION_DECLs inside coroutines [PR115906]
https://gcc.gnu.org/g:a362c9ca4ef6585e678f899705043a9aa10dd670 commit r15-2406-ga362c9ca4ef6585e678f899705043a9aa10dd670 Author: Arsen Arsenovic Date: Tue Jul 30 13:42:56 2024 +0200 c++: fix ICE on FUNCTION_DECLs inside coroutines [PR115906] When register_local_var_uses iterates a BIND_EXPRs BIND_EXPR_VARS, it fails to account for the fact that FUNCTION_DECLs might be present, and later passes it to DECL_HAS_VALUE_EXPR_P. This leads to a tree check failure in DECL_HAS_VALUE_EXPR_P: tree check: expected var_decl or parm_decl or result_decl, have function_decl in register_local_var_uses We only care about PARM_DECL and VAR_DECL, so select only those. PR c++/115906 - [coroutines] missing diagnostic and ICE when co_await used as default argument in function declaration gcc/cp/ChangeLog: PR c++/115906 * coroutines.cc (register_local_var_uses): Only process PARM_DECL and VAR_DECLs. gcc/testsuite/ChangeLog: PR c++/115906 * g++.dg/coroutines/coro-function-decl.C: New test. Diff: --- gcc/cp/coroutines.cc | 4 ++-- gcc/testsuite/g++.dg/coroutines/coro-function-decl.C | 19 +++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 2b16b4814d10..127a1c06b56e 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -3927,8 +3927,8 @@ register_local_var_uses (tree *stmt, int *do_subtree, void *d) local_var.field_idx = local_var.field_id = NULL_TREE; /* Make sure that we only present vars to the tests below. */ - if (TREE_CODE (lvar) == TYPE_DECL - || TREE_CODE (lvar) == NAMESPACE_DECL) + if (TREE_CODE (lvar) != PARM_DECL + && TREE_CODE (lvar) != VAR_DECL) continue; /* We don't move static vars into the frame. */ diff --git a/gcc/testsuite/g++.dg/coroutines/coro-function-decl.C b/gcc/testsuite/g++.dg/coroutines/coro-function-decl.C new file mode 100644 index ..86140569a76e --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/coro-function-decl.C @@ -0,0 +1,19 @@ +#include + +struct task +{ + struct promise_type + { +std::suspend_always initial_suspend () { return {}; } +std::suspend_always final_suspend () noexcept { return {}; } +void unhandled_exception () {} +task get_return_object () noexcept { return {}; } +void return_void () {} + }; +}; + +task foo () +{ + void bar (); + co_return; +}
[gcc r15-2407] c++: diagnose usage of co_await and co_yield in default args [PR115906]
https://gcc.gnu.org/g:0c382da0943dc7d14455ba2ada2f620a25bd1366 commit r15-2407-g0c382da0943dc7d14455ba2ada2f620a25bd1366 Author: Arsen Arsenović Date: Thu Jul 25 01:00:02 2024 +0200 c++: diagnose usage of co_await and co_yield in default args [PR115906] This is a partial fix for PR115906. Per [expr.await] 2s3, "An await-expression shall not appear in a default argument ([dcl.fct.default])". This patch introduces the diagnostic in that case, and in the case of a co_yield (as co_yield is defined in terms of co_await, so prerequisites of co_await hold). PR c++/115906 - [coroutines] missing diagnostic and ICE when co_await used as default argument in function declaration gcc/cp/ChangeLog: PR c++/115906 * parser.cc (cp_parser_unary_expression): Reject await expressions if use of local variables is currently forbidden. (cp_parser_yield_expression): Reject yield expressions if use of local variables is currently forbidden. gcc/testsuite/ChangeLog: PR c++/115906 * g++.dg/coroutines/pr115906-yield.C: New test. * g++.dg/coroutines/pr115906.C: New test. * g++.dg/coroutines/co-await-syntax-02-outside-fn.C: Don't rely on default arguments. * g++.dg/coroutines/co-yield-syntax-01-outside-fn.C: Ditto. Diff: --- gcc/cp/parser.cc | 17 .../coroutines/co-await-syntax-02-outside-fn.C | 2 +- .../coroutines/co-yield-syntax-01-outside-fn.C | 3 +- gcc/testsuite/g++.dg/coroutines/pr115906-yield.C | 29 gcc/testsuite/g++.dg/coroutines/pr115906.C | 32 ++ 5 files changed, 80 insertions(+), 3 deletions(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index e46cdfd20e19..eb102dea8299 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -9242,6 +9242,14 @@ cp_parser_unary_expression (cp_parser *parser, cp_id_kind * pidk, if (expr == error_mark_node) return error_mark_node; + /* ... but, we cannot use co_await in default arguments. */ + if (parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN) + { + error_at (kw_loc, + "% cannot be used in default arguments"); + return error_mark_node; + } + /* Handle [expr.await]. */ return cp_expr (finish_co_await_expr (kw_loc, expr)); } @@ -29651,6 +29659,15 @@ cp_parser_yield_expression (cp_parser* parser) else expr = cp_parser_assignment_expression (parser); + /* Similar to co_await, we cannot use co_yield in default arguments (as + co_awaits underlie co_yield). */ + if (parser->local_variables_forbidden_p & LOCAL_VARS_FORBIDDEN) +{ + error_at (kw_loc, + "% cannot be used in default arguments"); + return error_mark_node; +} + if (expr == error_mark_node) return expr; diff --git a/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C b/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C index 4ce5c2e04a0a..132128f27192 100644 --- a/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C +++ b/gcc/testsuite/g++.dg/coroutines/co-await-syntax-02-outside-fn.C @@ -2,4 +2,4 @@ #include "coro.h" -auto f (int x = co_await coro::suspend_always{}); // { dg-error {'co_await' cannot be used outside a function} } +auto x = co_await coro::suspend_always{}; // { dg-error {'co_await' cannot be used outside a function} } diff --git a/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C b/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C index 30db0e963b09..51c304625278 100644 --- a/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C +++ b/gcc/testsuite/g++.dg/coroutines/co-yield-syntax-01-outside-fn.C @@ -2,5 +2,4 @@ #include "coro.h" -auto f (int x = co_yield 5); // { dg-error {'co_yield' cannot be used outside a function} } - +auto x = co_yield 5; // { dg-error {'co_yield' cannot be used outside a function} } diff --git a/gcc/testsuite/g++.dg/coroutines/pr115906-yield.C b/gcc/testsuite/g++.dg/coroutines/pr115906-yield.C new file mode 100644 index ..f8b6ded5001c --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr115906-yield.C @@ -0,0 +1,29 @@ +#include + +struct Promise; + +struct Handle : std::coroutine_handle { +using promise_type = Promise; +}; + +struct Promise { +Handle get_return_object() noexcept { +return {Handle::from_promise(*this)}; +} +std::suspend_never initial_suspend() const noexcept { return {}; } +std::suspend_never final_suspend() const noexcept { return {}; } +void return_void() const noexcept {} +void unhandled_exception() const noexcept {} +std::suspend_never yield_value(int) { return {}; } +}; + +Handle Coro() { +
[gcc] Deleted branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 'refs/users'
The branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 'refs/users' was deleted. It previously pointed to: b2a5e99f9fb7... fortran: Continue MINLOC/MAXLOC second loop where the first Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- b2a5e99... fortran: Continue MINLOC/MAXLOC second loop where the first c8f... fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR9060 26fc4fb... fortran: Inline integral MINLOC/MAXLOC with no DIM and scal 31aa4cd... fortran: Inline integral MINLOC/MAXLOC with no DIM and no M 0090c54... fortran: Outline array bound check generation code 243591a... fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 0c6f911... fortran: Disable frontend passes for MINLOC/MAXLOC if it's c1eb87c... fortran: Add tests covering inline MINLOC/MAXLOC without DI 826cbd8... fortran: Support optional dummy as BACK argument of MINLOC/
[gcc] Created branch 'mikael/heads/inline_minmaxloc_without_dim_v06' in namespace 'refs/users'
The branch 'mikael/heads/inline_minmaxloc_without_dim_v06' was created in namespace 'refs/users' pointing to: 215d87c9e87f... fortran: Continue MINLOC/MAXLOC second loop where the first
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC.
https://gcc.gnu.org/g:826cbd802b72c2a5d450493f86e7ee0106432282 commit 826cbd802b72c2a5d450493f86e7ee0106432282 Author: Mikael Morin Date: Mon Jul 22 13:27:24 2024 +0200 fortran: Support optional dummy as BACK argument of MINLOC/MAXLOC. Hello, this fixes a null pointer dereference with absent optional dummy passed as BACK argument of MINLOC/MAXLOC. Tested for regression on x86_64-linux. OK for master? -- >8 -- Protect the evaluation of BACK with a check that the reference is non-null in case the expression is an optional dummy, in the inline code generated for MINLOC and MAXLOC. This change contains a revert of the non-testsuite part of commit r15-1994-ga55d24b3cf7f4d07492bb8e6fcee557175b47ea3, which factored the evaluation of BACK out of the loop using the scalarizer. It was a bad idea, because delegating the argument evaluation to the scalarizer makes it cumbersome to add a null pointer check next to the evaluation. Instead, evaluate BACK at the beginning, before scalarization, add a check that the argument is present if necessary, and evaluate the resulting expression to a variable, before using the variable in the inline code. gcc/fortran/ChangeLog: * trans-intrinsic.cc (maybe_absent_optional_variable): New function. (gfc_conv_intrinsic_minmaxloc): Remove BACK from scalarization and evaluate it before. Add a check that BACK is not null if the expression is an optional dummy. Save the resulting expression to a variable. Use the variable in the generated inline code. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_6.f90: New test. * gfortran.dg/minloc_7.f90: New test. Diff: --- gcc/fortran/trans-intrinsic.cc | 81 ++-- gcc/testsuite/gfortran.dg/maxloc_6.f90 | 366 + gcc/testsuite/gfortran.dg/minloc_7.f90 | 366 + 3 files changed, 799 insertions(+), 14 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 180d0d7a88c6..9f3c3ce47bc5 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5209,6 +5209,50 @@ gfc_conv_intrinsic_dot_product (gfc_se * se, gfc_expr * expr) } +/* Tells whether the expression E is a reference to an optional variable whose + presence is not known at compile time. Those are variable references without + subreference; if there is a subreference, we can assume the variable is + present. We have to special case full arrays, which we represent with a fake + "full" reference, and class descriptors for which a reference to data is not + really a subreference. */ + +bool +maybe_absent_optional_variable (gfc_expr *e) +{ + if (!(e && e->expr_type == EXPR_VARIABLE)) +return false; + + gfc_symbol *sym = e->symtree->n.sym; + if (!sym->attr.optional) +return false; + + gfc_ref *ref = e->ref; + if (ref == nullptr) +return true; + + if (ref->type == REF_ARRAY + && ref->u.ar.type == AR_FULL + && ref->next == nullptr) +return true; + + if (!(sym->ts.type == BT_CLASS + && ref->type == REF_COMPONENT + && ref->u.c.component == CLASS_DATA (sym))) +return false; + + gfc_ref *next_ref = ref->next; + if (next_ref == nullptr) +return true; + + if (next_ref->type == REF_ARRAY + && next_ref->u.ar.type == AR_FULL + && next_ref->next == nullptr) +return true; + + return false; +} + + /* Remove unneeded kind= argument from actual argument list when the result conversion is dealt with in a different place. */ @@ -5321,11 +5365,11 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) tree nonempty; tree lab1, lab2; tree b_if, b_else; + tree back; gfc_loopinfo loop; gfc_actual_arglist *actual; gfc_ss *arrayss; gfc_ss *maskss; - gfc_ss *backss; gfc_se arrayse; gfc_se maskse; gfc_expr *arrayexpr; @@ -5391,10 +5435,27 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) && maskexpr->symtree->n.sym->attr.dummy && maskexpr->symtree->n.sym->attr.optional; backexpr = actual->next->next->expr; - if (backexpr) -backss = gfc_get_scalar_ss (gfc_ss_terminator, backexpr); + + gfc_init_se (&backse, NULL); + if (backexpr == nullptr) +back = logical_false_node; + else if (maybe_absent_optional_variable (backexpr)) +{ + gcc_assert (backexpr->expr_type == EXPR_VARIABLE); + + gfc_conv_expr (&backse, backexpr); + tree present = gfc_conv_expr_present (backexpr->symtree->n.sym, false); + back = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, + logical_type_node, present, backse.expr); +} else -backss = nullptr; +{ + gfc_conv_expr (&backse, backexpr); + back = backse.expr; +
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608]
https://gcc.gnu.org/g:c1eb87cb9470518cf499765fd72c8810f943c239 commit c1eb87cb9470518cf499765fd72c8810f943c239 Author: Mikael Morin Date: Thu Jul 25 18:04:13 2024 +0200 fortran: Add tests covering inline MINLOC/MAXLOC without DIM [PR90608] Add the tests covering the various cases for which we are about to implement inline expansion of MINLOC and MAXLOC. Those are cases where the DIM argument is not present. PR fortran/90608 gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_7.f90: New test. * gfortran.dg/maxloc_with_mask_1.f90: New test. * gfortran.dg/minloc_8.f90: New test. * gfortran.dg/minloc_with_mask_1.f90: New test. Diff: --- gcc/testsuite/gfortran.dg/maxloc_7.f90 | 220 + gcc/testsuite/gfortran.dg/maxloc_with_mask_1.f90 | 393 +++ gcc/testsuite/gfortran.dg/minloc_8.f90 | 220 + gcc/testsuite/gfortran.dg/minloc_with_mask_1.f90 | 392 ++ 4 files changed, 1225 insertions(+) diff --git a/gcc/testsuite/gfortran.dg/maxloc_7.f90 b/gcc/testsuite/gfortran.dg/maxloc_7.f90 new file mode 100644 index ..a875083052a9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/maxloc_7.f90 @@ -0,0 +1,220 @@ +! { dg-do run } +! +! PR fortran/90608 +! Check the correct behaviour of the inline maxloc implementation, +! when there is no optional argument. + +program p + implicit none + integer, parameter :: data5(*) = (/ 1, 7, 2, 7, 0 /) + integer, parameter :: data64(*) = (/ 2, 5, 4, 6, 0, 9, 3, 5, & + 4, 4, 1, 7, 3, 2, 1, 2, & + 5, 4, 6, 0, 9, 3, 5, 4, & + 4, 1, 7, 3, 2, 1, 2, 5, & + 4, 6, 0, 9, 3, 5, 4, 4, & + 1, 7, 3, 2, 1, 2, 5, 4, & + 6, 0, 9, 3, 5, 4, 4, 1, & + 7, 3, 2, 1, 2, 5, 4, 6 /) + call check_int_const_shape_rank_1 + call check_int_const_shape_rank_3 + call check_int_const_shape_empty_4 + call check_int_alloc_rank_1 + call check_int_alloc_rank_3 + call check_int_alloc_empty_4 + call check_real_const_shape_rank_1 + call check_real_const_shape_rank_3 + call check_real_const_shape_empty_4 + call check_real_alloc_rank_1 + call check_real_alloc_rank_3 + call check_real_alloc_empty_4 + call check_int_lower_bounds + call check_real_lower_bounds + call check_all_nans + call check_dependencies +contains + subroutine check_int_const_shape_rank_1() +integer :: a(5) +integer, allocatable :: m(:) +a = data5 +m = maxloc(a) +if (size(m, dim=1) /= 1) stop 11 +if (any(m /= (/ 2 /))) stop 12 + end subroutine + subroutine check_int_const_shape_rank_3() +integer :: a(4,4,4) +integer, allocatable :: m(:) +a = reshape(data64, shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 3) stop 21 +if (any(m /= (/ 2, 2, 1 /))) stop 22 + end subroutine + subroutine check_int_const_shape_empty_4() +integer :: a(9,3,0,7) +integer, allocatable :: m(:) +a = reshape((/ integer:: /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 4) stop 31 +if (any(m /= (/ 0, 0, 0, 0 /))) stop 32 + end subroutine + subroutine check_int_alloc_rank_1() +integer, allocatable :: a(:) +integer, allocatable :: m(:) +allocate(a(5)) +a(:) = data5 +m = maxloc(a) +if (size(m, dim=1) /= 1) stop 41 +if (any(m /= (/ 2 /))) stop 42 + end subroutine + subroutine check_int_alloc_rank_3() +integer, allocatable :: a(:,:,:) +integer, allocatable :: m(:) +allocate(a(4,4,4)) +a(:,:,:) = reshape(data64, shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 3) stop 51 +if (any(m /= (/ 2, 2, 1 /))) stop 52 + end subroutine + subroutine check_int_alloc_empty_4() +integer, allocatable :: a(:,:,:,:) +integer, allocatable :: m(:) +allocate(a(9,3,0,7)) +a(:,:,:,:) = reshape((/ integer:: /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 4) stop 61 +if (any(m /= (/ 0, 0, 0, 0 /))) stop 62 + end subroutine + subroutine check_real_const_shape_rank_1() +real :: a(5) +integer, allocatable :: m(:) +a = (/ real:: data5 /) +m = maxloc(a) +if (size(m, dim=1) /= 1) stop 71 +if (any(m /= (/ 2 /))) stop 72 + end subroutine + subroutine check_real_const_shape_rank_3() +real :: a(4,4,4) +integer, allocatable :: m(:) +a = reshape((/ real:: data64 /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 3) stop 81 +if (any(m /= (/ 2, 2, 1 /))) stop 82 + end subroutine + subroutine check_real_const_shape_empty_4() +real :: a(9,3,0,7) +integer, allocatable :: m(:) +a = reshape((/ real:: /), shape(a)) +m = maxloc(a) +if (size(m, dim=1) /= 4) stop 91 +if (any(m /= (/ 0, 0, 0, 0 /))) stop 92 + end subroutine
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined
https://gcc.gnu.org/g:0c6f911ae6433ab0c3cc668083bd45aee52723bf commit 0c6f911ae6433ab0c3cc668083bd45aee52723bf Author: Mikael Morin Date: Wed Nov 15 10:23:32 2023 +0100 fortran: Disable frontend passes for MINLOC/MAXLOC if it's inlined Disable rewriting of MINLOC/MAXLOC expressions for which inline code generation is supported. Update the gfc_inline_intrinsic_function_p predicate (already existing) for that, with the current state of MINLOC/MAXLOC inlining support, that is only the cases of a scalar result and non-CHARACTER argument for now. This change has no effect currently, as the MINLOC/MAXLOC front-end passes only change expressions of rank 1, but the inlining control predicate gfc_inline_intrinsic_function_p returns false for those. However, later changes will extend MINLOC/MAXLOC inline expansion support to array expressions and update the inlining control predicate, and this will become effective. gcc/fortran/ChangeLog: * frontend-passes.cc (optimize_minmaxloc): Skip if we can generate inline code for the unmodified expression. * trans-intrinsic.cc (gfc_inline_intrinsic_function_p): Add MINLOC and MAXLOC cases. Diff: --- gcc/fortran/frontend-passes.cc | 3 ++- gcc/fortran/trans-intrinsic.cc | 23 +++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/frontend-passes.cc b/gcc/fortran/frontend-passes.cc index 3c06018fdbbf..8e4c6310ba8d 100644 --- a/gcc/fortran/frontend-passes.cc +++ b/gcc/fortran/frontend-passes.cc @@ -2277,7 +2277,8 @@ optimize_minmaxloc (gfc_expr **e) || fn->value.function.actual == NULL || fn->value.function.actual->expr == NULL || fn->value.function.actual->expr->ts.type == BT_CHARACTER - || fn->value.function.actual->expr->rank != 1) + || fn->value.function.actual->expr->rank != 1 + || gfc_inline_intrinsic_function_p (fn)) return; *e = gfc_get_array_expr (fn->ts.type, fn->ts.kind, &fn->where); diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 9f3c3ce47bc5..cc0d00f4e399 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -11650,6 +11650,29 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr) case GFC_ISYM_TRANSPOSE: return true; +case GFC_ISYM_MINLOC: +case GFC_ISYM_MAXLOC: + { + /* Disable inline expansion if code size matters. */ + if (optimize_size) + return false; + + gfc_actual_arglist *array_arg = expr->value.function.actual; + gfc_actual_arglist *dim_arg = array_arg->next; + + gfc_expr *array = array_arg->expr; + gfc_expr *dim = dim_arg->expr; + + if (!(array->ts.type == BT_INTEGER + || array->ts.type == BT_REAL)) + return false; + + if (array->rank == 1 && dim != nullptr) + return true; + + return false; + } + default: return false; }
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608]
https://gcc.gnu.org/g:7c8dc5220170816e9c44fb1e42e3feb80831c740 commit 7c8dc5220170816e9c44fb1e42e3feb80831c740 Author: Mikael Morin Date: Tue Jul 9 21:05:40 2024 +0200 fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY of rank 1 [PR90608] Enable inline code generation for the MINLOC and MAXLOC intrinsic, if the DIM argument is not present and ARRAY has rank 1. This case is similar to the case where the result is scalar (DIM present and rank 1 ARRAY), which already supports inline expansion of the intrinsic. Both cases return the same value, with the difference that the result is an array of size 1 if DIM is absent, whereas it's a scalar if DIM is present. So all there is to do for this case to work is hook the inline expansion with the scalarizer. PR fortran/90608 gcc/fortran/ChangeLog: * trans-array.cc (gfc_conv_ss_startstride): Set the scalarization rank based on the MINLOC/MAXLOC rank if needed. Call the inline code generation and setup the scalarizer array descriptor info in the MINLOC and MAXLOC cases. * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Return the result array element if the scalarizer is setup and we are inside the loop. Restrict library function call dispatch to the case where inline expansion is not supported. Declare an array result if the expression isn't scalar. Initialize the array result single element and return the result variable if the expression isn't scalar. (walk_inline_intrinsic_minmaxloc): New function. (walk_inline_intrinsic_function): Add MINLOC and MAXLOC cases, dispatching to walk_inline_intrinsic_minmaxloc. (gfc_add_intrinsic_ss_code): Add MINLOC and MAXLOC cases. (gfc_inline_intrinsic_function_p): Return true if ARRAY has rank 1, regardless of DIM. Diff: --- gcc/fortran/trans-array.cc | 25 ++ gcc/fortran/trans-intrinsic.cc | 198 +++-- 2 files changed, 155 insertions(+), 68 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index c93a5f1e7543..0c78e1fecd8f 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -4771,6 +4771,8 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop) case GFC_ISYM_UBOUND: case GFC_ISYM_LCOBOUND: case GFC_ISYM_UCOBOUND: + case GFC_ISYM_MAXLOC: + case GFC_ISYM_MINLOC: case GFC_ISYM_SHAPE: case GFC_ISYM_THIS_IMAGE: loop->dimen = ss->dimen; @@ -4820,6 +4822,29 @@ done: case GFC_SS_INTRINSIC: switch (expr->value.function.isym->id) { + case GFC_ISYM_MINLOC: + case GFC_ISYM_MAXLOC: + { + gfc_se se; + gfc_init_se (&se, nullptr); + se.loop = loop; + se.ss = ss; + gfc_conv_intrinsic_function (&se, expr); + gfc_add_block_to_block (&outer_loop->pre, &se.pre); + gfc_add_block_to_block (&outer_loop->post, &se.post); + + info->descriptor = se.expr; + + info->data = gfc_conv_array_data (info->descriptor); + info->data = gfc_evaluate_now (info->data, &outer_loop->pre); + + info->offset = gfc_index_zero_node; + info->start[0] = gfc_index_zero_node; + info->end[0] = gfc_index_zero_node; + info->stride[0] = gfc_index_one_node; + continue; + } + /* Fall through to supply start and stride. */ case GFC_ISYM_LBOUND: case GFC_ISYM_UBOUND: diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index cc0d00f4e399..a947dd1ba0b2 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5273,66 +5273,69 @@ strip_kind_from_actual (gfc_actual_arglist * actual) we need to handle. For performance reasons we sometimes create two loops instead of one, where the second one is much simpler. Examples for minloc intrinsic: - 1) Result is an array, a call is generated - 2) Array mask is used and NaNs need to be supported: - limit = Infinity; - pos = 0; - S = from; - while (S <= to) { - if (mask[S]) { - if (pos == 0) pos = S + (1 - from); - if (a[S] <= limit) { limit = a[S]; pos = S + (1 - from); goto lab1; } - } - S++; - } - goto lab2; - lab1:; - while (S <= to) { - if (mask[S]) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); } - S++; - } - lab2:; - 3) NaNs need to be supported, but it is known at compile time or cheaply - at runtime whether array is nonempty or not: - limit = Infinity; - pos = 0; -
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Outline array bound check generation code
https://gcc.gnu.org/g:b06cfb12ade15dd221f4a3ffbe707da5597e172e commit b06cfb12ade15dd221f4a3ffbe707da5597e172e Author: Mikael Morin Date: Wed Apr 10 21:18:03 2024 +0200 fortran: Outline array bound check generation code The next patch will need reindenting of the array bound check generation code. This outlines it to its own function beforehand, reducing the churn in the next patch. -- >8 -- gcc/fortran/ChangeLog: * trans-array.cc (gfc_conv_ss_startstride): Move array bound check generation code... (add_check_section_in_array_bounds): ... here as a new function. Diff: --- gcc/fortran/trans-array.cc | 297 ++--- 1 file changed, 143 insertions(+), 154 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 0c78e1fecd8f..99a603a3afb2 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -4736,6 +4736,146 @@ gfc_conv_section_startstride (stmtblock_t * block, gfc_ss * ss, int dim) } +/* Generate in INNER the bounds checking code along the dimension DIM for + the array associated with SS_INFO. */ + +static void +add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info, + int dim) +{ + gfc_expr *expr = ss_info->expr; + locus *expr_loc = &expr->where; + const char *expr_name = expr->symtree->name; + + gfc_array_info *info = &ss_info->data.array; + + bool check_upper; + if (dim == info->ref->u.ar.dimen - 1 + && info->ref->u.ar.as->type == AS_ASSUMED_SIZE) +check_upper = false; + else +check_upper = true; + + /* Zero stride is not allowed. */ + tree tmp = fold_build2_loc (input_location, EQ_EXPR, logical_type_node, + info->stride[dim], gfc_index_zero_node); + char * msg = xasprintf ("Zero stride is not allowed, for dimension %d " + "of array '%s'", dim + 1, expr_name); + gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg); + free (msg); + + tree desc = info->descriptor; + + /* This is the run-time equivalent of resolve.cc's + check_dimension. The logical is more readable there + than it is here, with all the trees. */ + tree lbound = gfc_conv_array_lbound (desc, dim); + tree end = info->end[dim]; + tree ubound = check_upper ? gfc_conv_array_ubound (desc, dim) : NULL_TREE; + + /* non_zerosized is true when the selected range is not + empty. */ + tree stride_pos = fold_build2_loc (input_location, GT_EXPR, logical_type_node, +info->stride[dim], gfc_index_zero_node); + tmp = fold_build2_loc (input_location, LE_EXPR, logical_type_node, +info->start[dim], end); + stride_pos = fold_build2_loc (input_location, TRUTH_AND_EXPR, + logical_type_node, stride_pos, tmp); + + tree stride_neg = fold_build2_loc (input_location, LT_EXPR, logical_type_node, +info->stride[dim], gfc_index_zero_node); + tmp = fold_build2_loc (input_location, GE_EXPR, logical_type_node, +info->start[dim], end); + stride_neg = fold_build2_loc (input_location, TRUTH_AND_EXPR, + logical_type_node, stride_neg, tmp); + tree non_zerosized = fold_build2_loc (input_location, TRUTH_OR_EXPR, + logical_type_node, stride_pos, + stride_neg); + + /* Check the start of the range against the lower and upper + bounds of the array, if the range is not empty. + If upper bound is present, include both bounds in the + error message. */ + if (check_upper) +{ + tmp = fold_build2_loc (input_location, LT_EXPR, logical_type_node, +info->start[dim], lbound); + tmp = fold_build2_loc (input_location, TRUTH_AND_EXPR, logical_type_node, +non_zerosized, tmp); + tree tmp2 = fold_build2_loc (input_location, GT_EXPR, logical_type_node, + info->start[dim], ubound); + tmp2 = fold_build2_loc (input_location, TRUTH_AND_EXPR, logical_type_node, + non_zerosized, tmp2); + msg = xasprintf ("Index '%%ld' of dimension %d of array '%s' outside of " + "expected range (%%ld:%%ld)", dim + 1, expr_name); + gfc_trans_runtime_check (true, false, tmp, inner, expr_loc, msg, + fold_convert (long_integer_type_node, info->start[dim]), + fold_convert (long_integer_type_node, lbound), + fold_convert (long_integer_type_node, ubound)); + gfc_trans_runtime_check (true, false, tmp2, inner, expr_loc, msg, + fold_convert (long_integer_type_node, info->start[dim]), + fold_convert (long_integer_type_node, lbound), + fold_convert (long_integer_type_node, ubound)); + free (
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608]
https://gcc.gnu.org/g:081f12a1d9d1bd793e3571daf5ab25db594ff57a commit 081f12a1d9d1bd793e3571daf5ab25db594ff57a Author: Mikael Morin Date: Thu Nov 16 22:14:41 2023 +0100 fortran: Inline integral MINLOC/MAXLOC with no DIM and no MASK [PR90608] Enable generation of inline code for the MINLOC and MAXLOC intrinsic, if the ARRAY argument is of integral type and of any rank (only the rank 1 case was previously inlined), and neither DIM nor MASK arguments are present. This needs a few adjustments in gfc_conv_intrinsic_minmaxloc, mainly to replace the single variables POS and OFFSET, with collections of variables, one variable per dimension each. The restriction to integral ARRAY and absent MASK limits the scope of the change to the cases where we generate single loop inline code. The code generation for the second loop is only accessible with ARRAY of rank 1, so it can continue using a single variable. A later change will extend inlining to the double loop cases. There is some bounds checking code that was previously handled by the library, and that needed some changes in the scalarizer to avoid regressing. The bounds check code generation was already by the scalarizer, but it was only applying to array reference sections, checking both individual array bounds and shape conformability between all the array involved. For MINLOC or MAXLOC, enable the conformability check between all the scalarized arrays, and disable the check that the array reference is within its bounds. PR fortran/90608 gcc/fortran/ChangeLog: * trans-array.cc (gfc_conv_ss_startstride): Set the MINLOC/MAXLOC result upper bound using the ARRAY argument rank. Ajdust the error message for intrinsic result arrays. Only check array bounds for array references. Move bound check decision code... (bounds_check_needed): ... here as a new predicate. Allow bound check for MINLOC/MAXLOC intrinsic results. * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Change the result array upper bound to the rank of ARRAY. Update the NONEMPTY variable to depend on the non-empty extent of every dimension. Use one variable per dimension instead of a single variable for the position and the offset. Update their declaration, initialization, and update to affect the variable of each dimension. Use the first variable only in areas only accessed with rank 1 ARRAY argument. Set every element of the result using its corresponding variable. (gfc_inline_intrinsic_function_p): Return true for integral ARRAY and absent DIM and MASK. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_4.f90: Additionally accept the error message emitted by the scalarizer. Diff: --- gcc/fortran/trans-array.cc| 70 +--- gcc/fortran/trans-intrinsic.cc| 150 +++--- gcc/testsuite/gfortran.dg/maxloc_bounds_4.f90 | 4 +- 3 files changed, 167 insertions(+), 57 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 99a603a3afb2..c9d63d13509d 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -4876,6 +4876,35 @@ add_check_section_in_array_bounds (stmtblock_t *inner, gfc_ss_info *ss_info, } +/* Tells whether we need to generate bounds checking code for the array + associated with SS. */ + +bool +bounds_check_needed (gfc_ss *ss) +{ + /* Catch allocatable lhs in f2003. */ + if (flag_realloc_lhs && ss->no_bounds_check) +return false; + + gfc_ss_info *ss_info = ss->info; + if (ss_info->type == GFC_SS_SECTION) +return true; + + if (!(ss_info->type == GFC_SS_INTRINSIC + && ss_info->expr + && ss_info->expr->expr_type == EXPR_FUNCTION)) +return false; + + gfc_intrinsic_sym *isym = ss_info->expr->value.function.isym; + if (!(isym + && (isym->id == GFC_ISYM_MAXLOC + || isym->id == GFC_ISYM_MINLOC))) +return false; + + return gfc_inline_intrinsic_function_p (ss_info->expr); +} + + /* Calculates the range start and stride for a SS chain. Also gets the descriptor and data pointer. The range of vector subscripts is the size of the vector. Array bounds are also checked. */ @@ -4977,10 +5006,19 @@ done: info->data = gfc_conv_array_data (info->descriptor); info->data = gfc_evaluate_now (info->data, &outer_loop->pre); - info->offset = gfc_index_zero_node; + gfc_expr *array = expr->value.function.actual->expr; + tree rank = build_int_cst (gfc_array_index_type, array->rank); + + tree tmp = fold_build2_loc (input_location, MINUS_EXPR, +
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608]
https://gcc.gnu.org/g:64fa63b902146982db4fd81d8240aef16d24311d commit 64fa63b902146982db4fd81d8240aef16d24311d Author: Mikael Morin Date: Fri Nov 17 15:40:55 2023 +0100 fortran: Inline integral MINLOC/MAXLOC with no DIM and scalar MASK [PR90608] Enable the generation of inline code for MINLOC/MAXLOC when argument ARRAY is of integral type, DIM is not present, and MASK is present and is scalar (only absent MASK or rank 1 ARRAY were inlined before). Scalar masks are implemented with a wrapping condition around the code one would generate if MASK wasn't present, so they are easy to support once inline code without MASK is working. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate variable initialization for each dimension in the else branch of the toplevel condition. (gfc_inline_intrinsic_function_p): Return TRUE for scalar MASK. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_7.f90: Additionally accept the error message reported by the scalarizer. Diff: --- gcc/fortran/trans-intrinsic.cc| 13 - gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index ac8bd2d48123..855208717973 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5886,7 +5886,6 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) /* For a scalar mask, enclose the loop in an if statement. */ if (maskexpr && maskss == NULL) { - gcc_assert (loop.dimen == 1); tree ifmask; gfc_init_se (&maskse, NULL); @@ -5901,7 +5900,8 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) the pos variable the same way as above. */ gfc_init_block (&elseblock); - gfc_add_modify (&elseblock, pos[0], gfc_index_zero_node); + for (int i = 0; i < loop.dimen; i++) + gfc_add_modify (&elseblock, pos[i], gfc_index_zero_node); elsetmp = gfc_finish_block (&elseblock); ifmask = conv_mask_condition (&maskse, maskexpr, optional_mask); tmp = build3_v (COND_EXPR, ifmask, tmp, elsetmp); @@ -11795,9 +11795,12 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr) if (array->rank == 1) return true; - if (array->ts.type == BT_INTEGER - && dim == nullptr - && mask == nullptr) + if (array->ts.type != BT_INTEGER + || dim != nullptr) + return false; + + if (mask == nullptr + || mask->rank == 0) return true; return false; diff --git a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 index 206a29b149da..3aa9d3dcebee 100644 --- a/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 +++ b/gcc/testsuite/gfortran.dg/maxloc_bounds_7.f90 @@ -1,6 +1,6 @@ ! { dg-do run } ! { dg-options "-fbounds-check" } -! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2" } +! { dg-shouldfail "Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." } module tst contains subroutine foo(res) @@ -18,4 +18,4 @@ program main integer :: res(3) call foo(res) end program main -! { dg-output "Fortran runtime error: Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2" } +! { dg-output "Fortran runtime error: Incorrect extent in return value of MAXLOC intrinsic: is 3, should be 2|Array bound mismatch for dimension 1 of array 'res' .3/2." }
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608]
https://gcc.gnu.org/g:ebde7ff486ec700d59eb2cc530c3ece3f9a07f67 commit ebde7ff486ec700d59eb2cc530c3ece3f9a07f67 Author: Mikael Morin Date: Fri Nov 17 16:47:26 2023 +0100 fortran: Inline all MINLOC/MAXLOC calls with no DIM [PR90608] Enable generation of inline MINLOC/MAXLOC code in the case where DIM is not present, and either ARRAY is of floating point type or MASK is an array. Those cases are the remaining bits to fully support inlining of non-CHARACTER MINLOC/MAXLOC without DIM. They are treated together because they generate similar code, the NANs for REAL types being handled a bit like a second level of masking. These are the cases for which we generate two sets of loops. This change affects the code generating the second loop, that was previously accessible only in the cases ARRAY has rank rank 1. The single variable initialization and update are changed to apply to multiple variables, one per dimension. This change generates slightly worse code if ARRAY has rank 1. Indeed the code we used to generate was: for (idx1 in lower..upper) { ... if (...) { ... break; } } for (idx2 in idx1..upper) { ... } which avoided starting the second loop from lower, skipping in the second loop the elements already processed in the first one. Unfortunately, extending that code the obvious way to apply to rank > 1 leads to wrong code: for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... goto second_loop; } } } second_loop: for (idx21 in index11..upper1) { for (idx22 in index12..upper2) { ... } } That code is incorrect, as the loop over idx22, being nested, may be run more than once, and the second run should restart from lower2, not index12. So with this change, we generate instead as second set of loops: ... second_loop: for (idx21 in lower1..upper1) { for (idx12 in lower2..upper2) { ... } } which means the second set of loops processes again elements already processed by the first one, and the rank 1 case becomes: for (idx1 in lower..upper) { ... if (...) { ... break; } } for (idx2 in lower..upper) { ... } processing the first elements twice as well, which was not the case before. A later change will avoid the duplicate processing and restore the generated code in the rank 1 case. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Initialize and update all the variables. Put the label and goto in the outermost scalarizer loop. Don't start the second loop where the first stopped. (gfc_inline_intrinsic_function_p): Also return TRUE for array MASK or for any REAL type. gcc/testsuite/ChangeLog: * gfortran.dg/maxloc_bounds_5.f90: Additionally accept error messages reported by the scalarizer. * gfortran.dg/maxloc_bounds_6.f90: Ditto. Diff: --- gcc/fortran/trans-intrinsic.cc| 127 +- gcc/testsuite/gfortran.dg/maxloc_bounds_5.f90 | 4 +- gcc/testsuite/gfortran.dg/maxloc_bounds_6.f90 | 4 +- 3 files changed, 87 insertions(+), 48 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 855208717973..bae3b49a9498 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5332,12 +5332,55 @@ strip_kind_from_actual (gfc_actual_arglist * actual) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); } S++; } - B: ARRAY has rank 1, and DIM is absent. Use the same code as the scalar - case and wrap the result in an array. - C: ARRAY has rank > 1, NANs are not supported, and DIM and MASK are absent. - Generate code similar to the single loop scalar case, but using one - variable per dimension, for example if ARRAY has rank 2: - 4) NAN's aren't supported, no MASK: + B: Array result, non-CHARACTER type, DIM
[gcc(refs/users/mikael/heads/inline_minmaxloc_without_dim_v06)] fortran: Continue MINLOC/MAXLOC second loop where the first stopped [PR90608]
https://gcc.gnu.org/g:215d87c9e87f09f7b49dd679fdccb6fa22c02f74 commit 215d87c9e87f09f7b49dd679fdccb6fa22c02f74 Author: Mikael Morin Date: Thu Jul 25 12:27:09 2024 +0200 fortran: Continue MINLOC/MAXLOC second loop where the first stopped [PR90608] Continue the second set of loops where the first one stopped in the generated inline MINLOC/MAXLOC code in the cases where the generated code contains two sets of loops. This fixes a regression that was introduced when enabling the generation of inline MINLOC/MAXLOC code with ARRAY of rank greater than 1, non-scalar MASK and no DIM arguments. In the cases where two sets of loops are generated as inline MINLOC/MAXLOC code, we previously generated code such as (for rank 2 ARRAY, so with two levels of nesting): for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... goto second_loop; } } } second_loop: for (idx21 in lower1..upper1) { for (idx22 in lower2..upper2) { ... } } which means we process the first elements twice, once in the first set of loops and once in the second one. This change avoids this duplicate processing by using a conditional as lower bound for the second set of loops, generating code like: second_loop_entry = false; for (idx11 in lower1..upper1) { for (idx12 in lower2..upper2) { ... if (...) { ... second_loop_entry = true; goto second_loop; } } } second_loop: for (idx21 in (second_loop_entry ? idx11 : lower1)..upper1) { for (idx22 in (second_loop_entry ? idx12 : lower2)..upper2) { ... second_loop_entry = false; } } It was expected that the compiler optimizations would be able to remove the state variable second_loop_entry. It is the case if ARRAY has rank 1 (so without loop nesting), the variable is removed and the loop bounds become unconditional, which restores previously generated code, fully fixing the regression. For larger rank, unfortunately, the state variable and conditional loop bounds remain, but those cases were previously using library calls, so it's not a regression. PR fortran/90608 gcc/fortran/ChangeLog: * trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Generate a set of index variables. Set them using the loop indexes before leaving the first set of loops. Generate a new loop entry predicate. Set it before leaving the first set of loops. Clear it in the body of the second set of loops. For the second set of loops, update each loop variable to use the corresponding index variable if the predicate variable is set. Diff: --- gcc/fortran/trans-intrinsic.cc | 33 +++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index bae3b49a9498..29367c69d16b 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -5342,6 +5342,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) pos0 = 0; pos1 = 1 S1 = from1; +second_loop_entry = false; while (S1 <= to1) { S0 = from0; while (s0 <= to0 { @@ -5354,6 +5355,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) limit = a[S1][S0]; pos0 = S0 + (1 - from0); pos1 = S1 + (1 - from1); +second_loop_entry = true; goto lab1; } } @@ -5363,9 +5365,9 @@ strip_kind_from_actual (gfc_actual_arglist * actual) } goto lab2; lab1:; -S1 = from1; +S1 = second_loop_entry ? S1 : from1; while (S1 <= to1) { - S0 = from0; + S0 = second_loop_entry ? S0 : from0; while (S0 <= to0) { if (mask[S1][S0]) if (a[S1][S0] < limit) { @@ -5373,6 +5375,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual) pos0 = S + (1 - from0); pos1 = S + (1 - from1); } +second_loop_entry = false; S0++; } S1++; @@ -5444,6 +5447,7
[gcc r15-2408] libstdc++: Fix fs::hard_link_count behaviour on MinGW [PR113663]
https://gcc.gnu.org/g:658193658f05e9a8ebf0bce8bab1f43bfee1 commit r15-2408-g658193658f05e9a8ebf0bce8bab1f43bfee1 Author: Lennox Shou Hao Ho Date: Mon Jul 29 21:09:27 2024 +0100 libstdc++: Fix fs::hard_link_count behaviour on MinGW [PR113663] std::filesystem::hard_link_count() always returns 1 on mingw-w64ucrt-11.0.1-r3 on Windows 10 19045 hard_link_count() queries _wstat64() on MinGW-w64 The MSFT documentation claims _wstat64() will always return 1 *non*-NTFS volumes https://learn.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2013/14h5k7ff(v=vs.120) My tests suggest that is not always true - hard_link_count()/_wstat64() still returns 1 on NTFS. GetFileInformationByHandle does return the correct result of 2. Please see the PR for a minimal repro. This patch changes the Windows implementation to always call GetFileInformationByHandle. PR libstdc++/113663 libstdc++-v3/ChangeLog: * src/c++17/fs_ops.cc (fs::equivalent): Moved helper class auto_handle to anonymous namespace as auto_win_file_handle. (fs::hard_link_count): Changed Windows implementation to use information provided by GetFileInformationByHandle which is more reliable. * testsuite/27_io/filesystem/operations/hard_link_count.cc: New test. Signed-off-by: "Lennox" Shou Hao Ho Reviewed-by: Jonathan Wakely Diff: --- libstdc++-v3/src/c++17/fs_ops.cc | 59 ++ .../27_io/filesystem/operations/hard_link_count.cc | 37 ++ 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc index 07bc2a0fa88d..81227c49dfde 100644 --- a/libstdc++-v3/src/c++17/fs_ops.cc +++ b/libstdc++-v3/src/c++17/fs_ops.cc @@ -822,6 +822,34 @@ fs::equivalent(const path& p1, const path& p2) return result; } +#if _GLIBCXX_FILESYSTEM_IS_WINDOWS +namespace +{ + // An RAII type that opens a handle for an existing file. + struct auto_win_file_handle + { +explicit +auto_win_file_handle(const fs::path& p_) +: handle(CreateFileW(p_.c_str(), 0, +FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, +0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0)) +{ } + +~auto_win_file_handle() +{ if (*this) CloseHandle(handle); } + +explicit operator bool() const +{ return handle != INVALID_HANDLE_VALUE; } + +bool get_info() +{ return GetFileInformationByHandle(handle, &info); } + +HANDLE handle; +BY_HANDLE_FILE_INFORMATION info; + }; +} +#endif + bool fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept { @@ -858,27 +886,8 @@ fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev) return false; - struct auto_handle { - explicit auto_handle(const path& p_) - : handle(CreateFileW(p_.c_str(), 0, - FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - 0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0)) - { } - - ~auto_handle() - { if (*this) CloseHandle(handle); } - - explicit operator bool() const - { return handle != INVALID_HANDLE_VALUE; } - - bool get_info() - { return GetFileInformationByHandle(handle, &info); } - - HANDLE handle; - BY_HANDLE_FILE_INFORMATION info; - }; - auto_handle h1(p1); - auto_handle h2(p2); + auto_win_file_handle h1(p1); + auto_win_file_handle h2(p2); if (!h1 || !h2) { if (!h1 && !h2) @@ -982,7 +991,13 @@ fs::hard_link_count(const path& p) std::uintmax_t fs::hard_link_count(const path& p, error_code& ec) noexcept { -#ifdef _GLIBCXX_HAVE_SYS_STAT_H +#if _GLIBCXX_FILESYSTEM_IS_WINDOWS + auto_win_file_handle h(p); + if (h && h.get_info()) +return static_cast(h.info.nNumberOfLinks); + ec = __last_system_error(); + return static_cast(-1); +#elif defined _GLIBCXX_HAVE_SYS_STAT_H return do_stat(p, ec, std::mem_fn(&stat_type::st_nlink), static_cast(-1)); #else diff --git a/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc b/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc new file mode 100644 index ..8b2fb4f190e2 --- /dev/null +++ b/libstdc++-v3/testsuite/27_io/filesystem/operations/hard_link_count.cc @@ -0,0 +1,37 @@ +// { dg-do run { target c++17 } } +// { dg-require-filesystem-ts "" } + +#include +#include +#include + +namespace fs = std::filesystem; + +void test01() +{ + // PR libstdc++/113663 + + fs::path p1 = __gnu_test::nonexistent_path(); + VERIFY( !fs::exists(p1) ); + + __gnu_test::scoped_file f1(p1); + VERIFY( fs::exists(p1) ); + + VERIFY( fs::hard_link_count(p1) == 1 ); + +
[gcc r15-2409] libstdc++: Fix overwriting files with fs::copy_file on Windows
https://gcc.gnu.org/g:017e3f89b081e4828a588a3bd27b5feacea042b7 commit r15-2409-g017e3f89b081e4828a588a3bd27b5feacea042b7 Author: Jonathan Wakely Date: Tue Jul 30 10:55:55 2024 +0100 libstdc++: Fix overwriting files with fs::copy_file on Windows There are no inode numbers on Windows filesystems, so stat_type::st_ino is always zero and the check for equivalent files in do_copy_file was incorrectly identifying distinct files as equivalent. This caused copy_file to incorrectly report errors when trying to overwrite existing files. The fs::equivalent function already does the right thing on Windows, so factor that logic out into a new function that can be reused by fs::copy_file. The tests for fs::copy_file were quite inadequate, so this also adds checks for that function's error conditions. libstdc++-v3/ChangeLog: * src/c++17/fs_ops.cc (auto_win_file_handle): Change constructor parameter from const path& to const wchar_t*. (fs::equiv_files): New function. (fs::equivalent): Use equiv_files. * src/filesystem/ops-common.h (fs::equiv_files): Declare. (do_copy_file): Use equiv_files. * src/filesystem/ops.cc (fs::equiv_files): Define. (fs::copy, fs::equivalent): Use equiv_files. * testsuite/27_io/filesystem/operations/copy.cc: Test overwriting directory contents recursively. * testsuite/27_io/filesystem/operations/copy_file.cc: Test overwriting existing files. Diff: --- libstdc++-v3/src/c++17/fs_ops.cc | 71 +++- libstdc++-v3/src/filesystem/ops-common.h | 12 +- libstdc++-v3/src/filesystem/ops.cc | 18 ++- .../testsuite/27_io/filesystem/operations/copy.cc | 9 ++ .../27_io/filesystem/operations/copy_file.cc | 122 + 5 files changed, 199 insertions(+), 33 deletions(-) diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc index 81227c49dfde..7ffdce67782a 100644 --- a/libstdc++-v3/src/c++17/fs_ops.cc +++ b/libstdc++-v3/src/c++17/fs_ops.cc @@ -350,7 +350,7 @@ fs::copy(const path& from, const path& to, copy_options options, f = make_file_status(from_st); if (exists(t) && !is_other(t) && !is_other(f) - && to_st.st_dev == from_st.st_dev && to_st.st_ino == from_st.st_ino) + && fs::equiv_files(from.c_str(), from_st, to.c_str(), to_st, ec)) { ec = std::make_error_code(std::errc::file_exists); return; @@ -829,8 +829,8 @@ namespace struct auto_win_file_handle { explicit -auto_win_file_handle(const fs::path& p_) -: handle(CreateFileW(p_.c_str(), 0, +auto_win_file_handle(const wchar_t* p) +: handle(CreateFileW(p, 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0)) { } @@ -850,6 +850,44 @@ namespace } #endif +#ifdef _GLIBCXX_HAVE_SYS_STAT_H +#ifdef NEED_DO_COPY_FILE // Only define this once, not in cow-ops.o too +bool +fs::equiv_files([[maybe_unused]] const char_type* p1, const stat_type& st1, + [[maybe_unused]] const char_type* p2, const stat_type& st2, + [[maybe_unused]] error_code& ec) +{ +#if ! _GLIBCXX_FILESYSTEM_IS_WINDOWS + // For POSIX the device ID and inode number uniquely identify a file. + return st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino; +#else + // For Windows st_ino is not set, so can't be used to distinguish files. + // We can compare modes and device IDs as a cheap initial check: + if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev) +return false; + + // Need to use GetFileInformationByHandle to get more info about the files. + auto_win_file_handle h1(p1); + auto_win_file_handle h2(p2); + if (!h1 || !h2) +{ + if (!h1 && !h2) + ec = __last_system_error(); + return false; +} + if (!h1.get_info() || !h2.get_info()) +{ + ec = __last_system_error(); + return false; +} + return h1.info.dwVolumeSerialNumber == h2.info.dwVolumeSerialNumber + && h1.info.nFileIndexHigh == h2.info.nFileIndexHigh + && h1.info.nFileIndexLow == h2.info.nFileIndexLow; +#endif // _GLIBCXX_FILESYSTEM_IS_WINDOWS +} +#endif // NEED_DO_COPY_FILE +#endif // _GLIBCXX_HAVE_SYS_STAT_H + bool fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept { @@ -881,30 +919,7 @@ fs::equivalent(const path& p1, const path& p2, error_code& ec) noexcept ec.clear(); if (is_other(s1) || is_other(s2)) return false; -#if _GLIBCXX_FILESYSTEM_IS_WINDOWS - // st_ino is not set, so can't be used to distinguish files - if (st1.st_mode != st2.st_mode || st1.st_dev != st2.st_dev) - return false; - - auto_win_file_handle h1(p1); - auto_win_file_handle h2(p2); - if (!h1 || !h2) -
[gcc r15-2410] RISC-V: Remove configure check for zabha
https://gcc.gnu.org/g:c0af64af636a801850fc8fabee12635ec73daa22 commit r15-2410-gc0af64af636a801850fc8fabee12635ec73daa22 Author: Patrick O'Neill Date: Mon Jul 29 19:52:02 2024 -0700 RISC-V: Remove configure check for zabha This patch removes the zabha configure check since it's not a breaking change and updates the existing zaamo/zalrsc comment. gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::to_string): Remove zabha configure check handling and clarify zaamo/zalrsc comment. * config.in: Regenerate. * configure: Regenerate. * configure.ac: Remove zabha configure check. Signed-off-by: Patrick O'Neill Diff: --- gcc/common/config/riscv/riscv-common.cc | 12 +++- gcc/config.in | 6 -- gcc/configure | 31 --- gcc/configure.ac| 5 - 4 files changed, 3 insertions(+), 51 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 682826c0e344..d2912877784d 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -855,7 +855,6 @@ riscv_subset_list::to_string (bool version_p) const bool skip_zifencei = false; bool skip_zaamo_zalrsc = false; - bool skip_zabha = false; bool skip_zicsr = false; bool i2p0 = false; @@ -884,13 +883,11 @@ riscv_subset_list::to_string (bool version_p) const skip_zifencei = true; #endif #ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC - /* Skip since binutils 2.42 and earlier don't recognize zaamo/zalrsc. */ + /* Skip since binutils 2.42 and earlier don't recognize zaamo/zalrsc. + Expanding 'a' to zaamo/zalrsc would otherwise break compilations + for users with an older version of binutils. */ skip_zaamo_zalrsc = true; #endif -#ifndef HAVE_AS_MARCH_ZABHA - /* Skip since binutils 2.42 and earlier don't recognize zabha. */ - skip_zabha = true; -#endif for (subset = m_head; subset != NULL; subset = subset->next) { @@ -908,9 +905,6 @@ riscv_subset_list::to_string (bool version_p) const if (skip_zaamo_zalrsc && subset->name == "zalrsc") continue; - if (skip_zabha && subset->name == "zabha") - continue; - /* For !version_p, we only separate extension with underline for multi-letter extension. */ if (!first && diff --git a/gcc/config.in b/gcc/config.in index bc819005bd62..3af153eaec5c 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -635,12 +635,6 @@ #endif -/* Define if the assembler understands -march=rv*_zabha. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_MARCH_ZABHA -#endif - - /* Define if the assembler understands -march=rv*_zifencei. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_MARCH_ZIFENCEI diff --git a/gcc/configure b/gcc/configure index 01acca7fb5cc..7541bdeb7248 100755 --- a/gcc/configure +++ b/gcc/configure @@ -30882,37 +30882,6 @@ if test $gcc_cv_as_riscv_march_zaamo_zalrsc = yes; then $as_echo "#define HAVE_AS_MARCH_ZAAMO_ZALRSC 1" >>confdefs.h -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -march=rv32i_zabha support" >&5 -$as_echo_n "checking assembler for -march=rv32i_zabha support... " >&6; } -if ${gcc_cv_as_riscv_march_zabha+:} false; then : - $as_echo_n "(cached) " >&6 -else - gcc_cv_as_riscv_march_zabha=no - if test x$gcc_cv_as != x; then -$as_echo '' > conftest.s -if { ac_try='$gcc_cv_as $gcc_cv_as_flags -march=rv32i_zabha -o conftest.o conftest.s >&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; } -then - gcc_cv_as_riscv_march_zabha=yes -else - echo "configure: failed program was" >&5 - cat conftest.s >&5 -fi -rm -f conftest.o conftest.s - fi -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_riscv_march_zabha" >&5 -$as_echo "$gcc_cv_as_riscv_march_zabha" >&6; } -if test $gcc_cv_as_riscv_march_zabha = yes; then - -$as_echo "#define HAVE_AS_MARCH_ZABHA 1" >>confdefs.h - fi ;; diff --git a/gcc/configure.ac b/gcc/configure.ac index 3f20c107b6aa..52c1780379d5 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -5461,11 +5461,6 @@ configured with --enable-newlib-nano-formatted-io.]) [-march=rv32i_zaamo_zalrsc],,, [AC_DEFINE(HAVE_AS_MARCH_ZAAMO_ZALRSC, 1, [Define if the assembler understands -march=rv*_zaamo_zalrsc.])]) -gcc_GAS_CHECK_FEATURE([-march=rv32i_zabha support], - gcc_cv_as_riscv_march_zabha, - [-march=rv32i_zabha],,, - [AC_DEFINE(HAVE_AS_MARCH_ZABHA, 1, -[Define if the assembler understands -march=rv*_zabha.])]) ;; loongarch*-*-*) gcc_GAS_CHECK_FEATURE([.dtprelword support],
[gcc r15-2411] RISC-V: Add basic support for the Zacas extension
https://gcc.gnu.org/g:11c2453a16b725b7fb67778e1ab4636a51a1217d commit r15-2411-g11c2453a16b725b7fb67778e1ab4636a51a1217d Author: Gianluca Guida Date: Mon Jul 29 15:13:46 2024 -0700 RISC-V: Add basic support for the Zacas extension This patch adds support for amocas.{b|h|w|d}. Support for amocas.q (64/128 bit cas for rv32/64) will be added in a future patch. Extension: https://github.com/riscv/riscv-zacas Ratification: https://jira.riscv.org/browse/RVS-680 gcc/ChangeLog: * common/config/riscv/riscv-common.cc: Add zacas extension. * config/riscv/arch-canonicalize: Make zacas imply zaamo. * config/riscv/riscv.opt: Add zacas. * config/riscv/sync.md (zacas_atomic_cas_value): New pattern. (atomic_compare_and_swap): Use new pattern for compare-and-swap ops. (zalrsc_atomic_cas_value_strong): Rename atomic_cas_value_strong. * doc/sourcebuild.texi: Add Zacas documentation. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add zacas testsuite infra support. * gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-acquire-release.c: Remove zacas to continue to test the lr/sc pairs. * gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-acquire.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-consume.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-relaxed.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-release.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-seq-cst-relaxed.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-compare-exchange-int-seq-cst.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-acquire-release.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-acquire.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-consume.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-relaxed.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-release.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-seq-cst-relaxed.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-compare-exchange-int-seq-cst.c: Ditto. * gcc.target/riscv/amo/zabha-zacas-preferred-over-zalrsc.c: New test. * gcc.target/riscv/amo/zacas-char-requires-zabha.c: New test. * gcc.target/riscv/amo/zacas-char-requires-zacas.c: New test. * gcc.target/riscv/amo/zacas-preferred-over-zalrsc.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-acq-rel.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-acquire.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-relaxed.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-release.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-char-seq-cst.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-compatability-mapping-no-fence.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-compatability-mapping.cc: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-acq-rel.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-acquire.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-relaxed.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-release.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-int-seq-cst.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-acq-rel.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-acquire.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-relaxed.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-release.c: New test. * gcc.target/riscv/amo/zacas-rvwmo-compare-exchange-short-seq-cst.c: New test. * gcc.target/riscv/amo/zacas-ztso-compare-exchange-char-seq-cst.c: New test. * gcc.target/riscv/amo/zacas-ztso-compare-exchange-char.c: New test. * gcc.target/riscv/amo/zacas-ztso-compare-exchange-compatability-mapping-no-fence.c: New test. * gcc.target/riscv/amo/zacas-ztso-compare-exchange-compatability-mapping.cc: New test. * gcc.target/riscv/amo/zacas-ztso-compare-exchange-int-seq-cst.c: New test. * gcc.target/riscv/amo/zacas-ztso-compare-exchange-int.c: New test. * gcc.target/riscv/amo/zacas-ztso-compare-exchange-short-seq-cst.c: New test. * gc
[gcc r15-2412] testsuite: fix whitespace in dg-do compile directives
https://gcc.gnu.org/g:2e662dedf84aa23fdff7bceca040432bf9f1ab72 commit r15-2412-g2e662dedf84aa23fdff7bceca040432bf9f1ab72 Author: Sam James Date: Tue Jul 30 12:20:47 2024 +0100 testsuite: fix whitespace in dg-do compile directives Nothing seems to change here in reality at least on x86_64-pc-linux-gnu, but important to fix nonetheless in case people copy it. PR rtl-optimization/48633 PR tree-optimization/83072 PR tree-optimization/83073 PR tree-optimization/96542 PR tree-optimization/96707 PR tree-optimization/97567 PR target/69225 PR target/89929 PR target/96562 * g++.dg/pr48633.C: Fix whitespace in dg directive. * g++.dg/pr96707.C: Likewise. * g++.target/i386/mv28.C: Likewise. * gcc.dg/Warray-bounds-flex-arrays-1.c: Likewise. * gcc.dg/pr83072-2.c: Likewise. * gcc.dg/pr83073.c: Likewise. * gcc.dg/pr96542.c: Likewise. * gcc.dg/pr97567-2.c: Likewise. * gcc.target/i386/avx512fp16-11a.c: Likewise. * gcc.target/i386/avx512fp16-13.c: Likewise. * gcc.target/i386/avx512fp16-14.c: Likewise. * gcc.target/i386/avx512fp16-conjugation-1.c: Likewise. * gcc.target/i386/avx512fp16-neg-1a.c: Likewise. * gcc.target/i386/avx512fp16-set1-pch-1a.c: Likewise. * gcc.target/i386/avx512fp16vl-conjugation-1.c: Likewise. * gcc.target/i386/avx512fp16vl-neg-1a.c: Likewise. * gcc.target/i386/avx512fp16vl-set1-pch-1a.c: Likewise. * gcc.target/i386/avx512vlfp16-11a.c: Likewise. * gcc.target/i386/pr69225-1.c: Likewise. * gcc.target/i386/pr69225-2.c: Likewise. * gcc.target/i386/pr69225-3.c: Likewise. * gcc.target/i386/pr69225-4.c: Likewise. * gcc.target/i386/pr69225-5.c: Likewise. * gcc.target/i386/pr69225-6.c: Likewise. * gcc.target/i386/pr69225-7.c: Likewise. * gcc.target/i386/pr96562-1.c: Likewise. * gcc.target/riscv/rv32e_stack.c: Likewise. * gfortran.dg/c-interop/removed-restrictions-3.f90: Likewise. * gnat.dg/renaming1.adb: Likewise. Diff: --- gcc/testsuite/g++.dg/pr48633.C | 2 +- gcc/testsuite/g++.dg/pr96707.C | 2 +- gcc/testsuite/g++.target/i386/mv28.C | 2 +- gcc/testsuite/gcc.dg/Warray-bounds-flex-arrays-1.c | 2 +- gcc/testsuite/gcc.dg/pr83072-2.c | 2 +- gcc/testsuite/gcc.dg/pr83073.c | 2 +- gcc/testsuite/gcc.dg/pr96542.c | 2 +- gcc/testsuite/gcc.dg/pr97567-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16-11a.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16-13.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16-14.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16-conjugation-1.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16-neg-1a.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16vl-conjugation-1.c | 2 +- gcc/testsuite/gcc.target/i386/avx512fp16vl-neg-1a.c| 2 +- gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c | 2 +- gcc/testsuite/gcc.target/i386/pr69225-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr69225-2.c | 2 +- gcc/testsuite/gcc.target/i386/pr69225-3.c | 2 +- gcc/testsuite/gcc.target/i386/pr69225-4.c | 2 +- gcc/testsuite/gcc.target/i386/pr69225-5.c | 2 +- gcc/testsuite/gcc.target/i386/pr69225-6.c | 2 +- gcc/testsuite/gcc.target/i386/pr69225-7.c | 2 +- gcc/testsuite/gcc.target/i386/pr96562-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rv32e_stack.c | 2 +- gcc/testsuite/gfortran.dg/c-interop/removed-restrictions-3.f90 | 2 +- gcc/testsuite/gnat.dg/renaming1.adb| 2 +- 29 files changed, 29 insertions(+), 29 deletions(-) diff --git a/gcc/testsuite/g++.dg/pr48633.C b/gcc/testsuite/g++.dg/pr48633.C index 90f053a74c88..efcdab02acbd 100644 --- a/gcc/testsuite/g++.dg/pr48633.C +++ b/gcc/testsuite/g++.dg/pr48633.C @@ -1,4 +1,4 @@ -/* { dg-do compile} */ +/* { dg-do compile } */ /* { dg-options "-O2 -fira-region=all -fnon-call-exceptions" } */ extern long double getme (void); extern void useme (long double); diff --git a/gcc/testsuite/g++.dg/pr96707.C b/gcc/testsuite/g++.dg/pr96707.C index 2653fe3d0431..868ee416e269 100644 --- a/gcc/testsuite/g++.dg/pr96707.C +++ b/gcc
[gcc r15-2413] testsuite: fix whitespace in dg-do preprocess directive
https://gcc.gnu.org/g:7f1aa73bde0babde0ed3ff58d7226b86d25d075d commit r15-2413-g7f1aa73bde0babde0ed3ff58d7226b86d25d075d Author: Sam James Date: Tue Jul 30 12:21:42 2024 +0100 testsuite: fix whitespace in dg-do preprocess directive PR preprocessor/90581 * c-c++-common/cpp/fmax-include-depth.c: Fix whitespace in dg directive. Diff: --- gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c b/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c index bd8cc3adcdd7..134c29805c89 100644 --- a/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c +++ b/gcc/testsuite/c-c++-common/cpp/fmax-include-depth.c @@ -1,4 +1,4 @@ -/* { dg-do preprocess} */ +/* { dg-do preprocess } */ /* { dg-options "-fmax-include-depth=1" } */ #include "fmax-include-depth-1b.h" /* { dg-error ".include nested depth 1 exceeds maximum of 1 .use -fmax-include-depth=DEPTH to increase the maximum." } */
[gcc r15-2414] testsuite: fix whitespace in dg-do assemble directive
https://gcc.gnu.org/g:2d105efd6f60dce4d57380cf9820a4dd52cc8abb commit r15-2414-g2d105efd6f60dce4d57380cf9820a4dd52cc8abb Author: Sam James Date: Tue Jul 30 17:10:01 2024 +0100 testsuite: fix whitespace in dg-do assemble directive * gcc.target/aarch64/simd/vmmla.c: Fix whitespace in dg directive. Diff: --- gcc/testsuite/gcc.target/aarch64/simd/vmmla.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c index 5eec2b5cfb96..777decc56a20 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c @@ -1,4 +1,4 @@ -/* { dg-do assemble} */ +/* { dg-do assemble } */ /* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ /* { dg-additional-options "-march=armv8.2-a+i8mm" } */
[gcc r15-2415] libbacktrace: fix syntax of Windows registration functions
https://gcc.gnu.org/g:37aa98f79a7fbad620c0318a48552b5442a49456 commit r15-2415-g37aa98f79a7fbad620c0318a48552b5442a49456 Author: Ian Lance Taylor Date: Tue Jul 30 09:25:03 2024 -0700 libbacktrace: fix syntax of Windows registration functions Adjust the syntax to keep MSVC happy. Fixes https://github.com/ianlancetaylor/libbacktrace/issues/131 * pecoff.c (LDR_DLL_NOTIFICATION): Put function modifier inside parentheses. (LDR_REGISTER_FUNCTION): Likewise. Diff: --- libbacktrace/pecoff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libbacktrace/pecoff.c b/libbacktrace/pecoff.c index 636e1b11296b..ccd5ccbce2ce 100644 --- a/libbacktrace/pecoff.c +++ b/libbacktrace/pecoff.c @@ -83,10 +83,10 @@ struct dll_notification_data #define LDR_DLL_NOTIFICATION_REASON_LOADED 1 typedef LONG NTSTATUS; -typedef VOID CALLBACK (*LDR_DLL_NOTIFICATION)(ULONG, +typedef VOID (CALLBACK *LDR_DLL_NOTIFICATION)(ULONG, struct dll_notification_data*, PVOID); -typedef NTSTATUS NTAPI (*LDR_REGISTER_FUNCTION)(ULONG, +typedef NTSTATUS (NTAPI *LDR_REGISTER_FUNCTION)(ULONG, LDR_DLL_NOTIFICATION, PVOID, PVOID*); #endif
[gcc r15-2416] gimple ssa: Teach switch conversion to optimize powers of 2 switches
https://gcc.gnu.org/g:2b3533cd871f62923e7a4f06a826f37bf0f35c5c commit r15-2416-g2b3533cd871f62923e7a4f06a826f37bf0f35c5c Author: Filip Kastl Date: Tue Jul 30 18:40:29 2024 +0200 gimple ssa: Teach switch conversion to optimize powers of 2 switches Sometimes a switch has case numbers that are powers of 2. Switch conversion usually isn't able to optimize these switches. This patch adds "exponential index transformation" to switch conversion. After switch conversion applies this transformation on the switch the index variable of the switch becomes the exponent instead of the whole value. For example: switch (i) { case (1 << 0): return 0; case (1 << 1): return 1; case (1 << 2): return 2; ... case (1 << 30): return 30; default: return 31; } gets transformed roughly into switch (log2(i)) { case 0: return 0; case 1: return 1; case 2: return 2; ... case 30: return 30; default: return 31; } This enables switch conversion to further optimize the switch. This patch only enables this transformation if there are optabs for FFS so that the base 2 logarithm can be computed efficiently at runtime. gcc/ChangeLog: * tree-switch-conversion.cc (can_log2): New static function to check if gen_log2 can be used on current target. (gen_log2): New static function to generate efficient GIMPLE code for taking an exact base 2 log. (gen_pow2p): New static function to generate efficient GIMPLE code for checking if a value is a power of 2. (switch_conversion::switch_conversion): Track if the transformation happened. (switch_conversion::is_exp_index_transform_viable): New function to decide whether the transformation should be applied. (switch_conversion::exp_index_transform): New function to execute the transformation. (switch_conversion::gen_inbound_check): Don't remove the default BB if the transformation happened. (switch_conversion::expand): Execute the transform if it is viable. Skip the "sufficiently small case range" test if the transformation is going to be executed. * tree-switch-conversion.h: Add is_exp_index_transform_viable and exp_index_transform. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/switch-3.c: Disable switch conversion. * gcc.target/i386/switch-exp-transform-1.c: New test. * gcc.target/i386/switch-exp-transform-2.c: New test. * gcc.target/i386/switch-exp-transform-3.c: New test. Signed-off-by: Filip Kastl Diff: --- gcc/testsuite/gcc.dg/tree-ssa/switch-3.c | 2 +- .../gcc.target/i386/switch-exp-transform-1.c | 32 ++ .../gcc.target/i386/switch-exp-transform-2.c | 35 +++ .../gcc.target/i386/switch-exp-transform-3.c | 148 ++ gcc/tree-switch-conversion.cc | 326 - gcc/tree-switch-conversion.h | 18 ++ 6 files changed, 555 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c b/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c index 44981e1d1861..83aae3843e91 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/switch-3.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2 -fdump-tree-switchlower1" } */ +/* { dg-options "-O2 -fdump-tree-switchlower1 -fdisable-tree-switchconv" } */ int cipher_to_alg(int cipher) { diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c new file mode 100644 index ..53d31460ba37 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-switchconv -mpopcnt -mbmi" } */ + +/* Checks that exponential index transform enables switch conversion to convert + this switch into an array lookup. Also checks that the "index variable is a + power of two" check has been generated. */ + +int foo(unsigned bar) +{ +switch (bar) +{ +case (1 << 0): +return 1; +case (1 << 1): +return 2; +case (1 << 2): +return 3; +case (1 << 3): +return 4; +case (1 << 4): +return 8; +case (1 << 5): +return 13; +case (1 << 6): +return 21; +default: +return 0; +} +} + +/* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */ +/* { dg-final { scan-tree-dump "POPCOUNT" "switchconv" } } */ diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-2.c b/gcc/testsuite/
[gcc r15-2417] testsuite: fix whitespace in dg-require-effective-target directives
https://gcc.gnu.org/g:ee12a13d25778a1ad8a9b5dc63aadf9f4320088b commit r15-2417-gee12a13d25778a1ad8a9b5dc63aadf9f4320088b Author: Sam James Date: Tue Jul 30 17:23:08 2024 +0100 testsuite: fix whitespace in dg-require-effective-target directives PR middle-end/54400 PR target/98161 * gcc.dg/vect/bb-slp-layout-18.c: Fix whitespace in dg directive. * gcc.dg/vect/bb-slp-pr54400.c: Likewise. * gcc.target/i386/pr98161.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c | 2 +- gcc/testsuite/gcc.target/i386/pr98161.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c index ff4627225074..ebbf9d2da7ca 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-18.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-require-effective-target vect_float} */ +/* { dg-require-effective-target vect_float } */ /* { dg-additional-options "-w -Wno-psabi -ffast-math" } */ typedef float v4sf __attribute__((vector_size(sizeof(float)*4))); diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c index 6ecd51103ed8..745e3ced70ea 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c @@ -1,4 +1,4 @@ -/* { dg-require-effective-target vect_float} */ +/* { dg-require-effective-target vect_float } */ /* { dg-additional-options "-w -Wno-psabi -ffast-math" } */ #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.target/i386/pr98161.c b/gcc/testsuite/gcc.target/i386/pr98161.c index 5825b9bd1dbb..8ea93325214f 100644 --- a/gcc/testsuite/gcc.target/i386/pr98161.c +++ b/gcc/testsuite/gcc.target/i386/pr98161.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-options "-O2 -msse4" } */ -/* { dg-require-effective-target sse4} */ +/* { dg-require-effective-target sse4 } */ typedef unsigned short u16; typedef unsigned int u32;
[gcc r15-2418] RISC-V: Add configure check for B extention support
https://gcc.gnu.org/g:7ef8a9d4b1cea3fea3791859074df79b71abd549 commit r15-2418-g7ef8a9d4b1cea3fea3791859074df79b71abd549 Author: Edwin Lu Date: Wed Jul 24 16:37:18 2024 -0700 RISC-V: Add configure check for B extention support Binutils 2.42 and before don't recognize the b extension in the march strings even though it supports zba_zbb_zbs. Add a configure check to ignore the b in the march string if found. gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::to_string): Skip b in march string * config.in: Regenerate. * configure: Regenerate. * configure.ac: Add B assembler check Signed-off-by: Edwin Lu Diff: --- gcc/common/config/riscv/riscv-common.cc | 8 gcc/config.in | 6 ++ gcc/configure | 31 +++ gcc/configure.ac| 5 + 4 files changed, 50 insertions(+) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 0c12e12cde51..1944c7785c48 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -858,6 +858,7 @@ riscv_subset_list::to_string (bool version_p) const bool skip_zifencei = false; bool skip_zaamo_zalrsc = false; bool skip_zicsr = false; + bool skip_b = false; bool i2p0 = false; /* For RISC-V ISA version 2.2 or earlier version, zicsr and zifencei is @@ -890,6 +891,10 @@ riscv_subset_list::to_string (bool version_p) const for users with an older version of binutils. */ skip_zaamo_zalrsc = true; #endif +#ifndef HAVE_AS_MARCH_B + /* Skip since binutils 2.42 and earlier don't recognize b. */ + skip_b = true; +#endif for (subset = m_head; subset != NULL; subset = subset->next) { @@ -907,6 +912,9 @@ riscv_subset_list::to_string (bool version_p) const if (skip_zaamo_zalrsc && subset->name == "zalrsc") continue; + if (skip_b && subset->name == "b") + continue; + /* For !version_p, we only separate extension with underline for multi-letter extension. */ if (!first && diff --git a/gcc/config.in b/gcc/config.in index 3af153eaec5c..7fcabbe5061d 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -629,6 +629,12 @@ #endif +/* Define if the assembler understands -march=rv*_b. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_MARCH_B +#endif + + /* Define if the assembler understands -march=rv*_zaamo_zalrsc. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_MARCH_ZAAMO_ZALRSC diff --git a/gcc/configure b/gcc/configure index 7541bdeb7248..557ea5fa3ac9 100755 --- a/gcc/configure +++ b/gcc/configure @@ -30882,6 +30882,37 @@ if test $gcc_cv_as_riscv_march_zaamo_zalrsc = yes; then $as_echo "#define HAVE_AS_MARCH_ZAAMO_ZALRSC 1" >>confdefs.h +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -march=rv32i_b support" >&5 +$as_echo_n "checking assembler for -march=rv32i_b support... " >&6; } +if ${gcc_cv_as_riscv_march_b+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_riscv_march_b=no + if test x$gcc_cv_as != x; then +$as_echo '' > conftest.s +if { ac_try='$gcc_cv_as $gcc_cv_as_flags -march=rv32i_b -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } +then + gcc_cv_as_riscv_march_b=yes +else + echo "configure: failed program was" >&5 + cat conftest.s >&5 +fi +rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_riscv_march_b" >&5 +$as_echo "$gcc_cv_as_riscv_march_b" >&6; } +if test $gcc_cv_as_riscv_march_b = yes; then + +$as_echo "#define HAVE_AS_MARCH_B 1" >>confdefs.h + fi ;; diff --git a/gcc/configure.ac b/gcc/configure.ac index 52c1780379d5..eaa01d0d7e56 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -5461,6 +5461,11 @@ configured with --enable-newlib-nano-formatted-io.]) [-march=rv32i_zaamo_zalrsc],,, [AC_DEFINE(HAVE_AS_MARCH_ZAAMO_ZALRSC, 1, [Define if the assembler understands -march=rv*_zaamo_zalrsc.])]) +gcc_GAS_CHECK_FEATURE([-march=rv32i_b support], + gcc_cv_as_riscv_march_b, + [-march=rv32i_b],,, + [AC_DEFINE(HAVE_AS_MARCH_B, 1, +[Define if the assembler understands -march=rv*_b.])]) ;; loongarch*-*-*) gcc_GAS_CHECK_FEATURE([.dtprelword support],
[gcc r15-2419] i386/testsuite: Add testcase for fixed PR [PR51492]
https://gcc.gnu.org/g:8b737ec289da83e9e2a9672be0336980616e8932 commit r15-2419-g8b737ec289da83e9e2a9672be0336980616e8932 Author: Uros Bizjak Date: Tue Jul 30 20:02:36 2024 +0200 i386/testsuite: Add testcase for fixed PR [PR51492] PR target/51492 gcc/testsuite/ChangeLog: * gcc.target/i386/pr51492.c: New test. Diff: --- gcc/testsuite/gcc.target/i386/pr51492.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/gcc/testsuite/gcc.target/i386/pr51492.c b/gcc/testsuite/gcc.target/i386/pr51492.c new file mode 100644 index ..0892e0c79a7b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr51492.c @@ -0,0 +1,19 @@ +/* PR target/51492 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#define SIZE 65536 +#define WSIZE 64 +unsigned short head[SIZE] __attribute__((aligned(64))); + +void +f(void) +{ + for (unsigned n = 0; n < SIZE; ++n) { +unsigned short m = head[n]; +head[n] = (unsigned short)(m >= WSIZE ? m-WSIZE : 0); + } +} + +/* { dg-final { scan-assembler "psubusw" } } */ +/* { dg-final { scan-assembler-not "paddw" } } */
[gcc r15-2420] libstdc++: Fix name of source file in comment
https://gcc.gnu.org/g:df67f383d8f6a0fc6820510b500ea74ddde5001d commit r15-2420-gdf67f383d8f6a0fc6820510b500ea74ddde5001d Author: Jonathan Wakely Date: Tue Jul 30 15:42:04 2024 +0100 libstdc++: Fix name of source file in comment libstdc++-v3/ChangeLog: * src/c++17/fs_ops.cc: Fix file name in comment. Diff: --- libstdc++-v3/src/c++17/fs_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/src/c++17/fs_ops.cc b/libstdc++-v3/src/c++17/fs_ops.cc index 7ffdce67782a..9606afa9f1f7 100644 --- a/libstdc++-v3/src/c++17/fs_ops.cc +++ b/libstdc++-v3/src/c++17/fs_ops.cc @@ -851,7 +851,7 @@ namespace #endif #ifdef _GLIBCXX_HAVE_SYS_STAT_H -#ifdef NEED_DO_COPY_FILE // Only define this once, not in cow-ops.o too +#ifdef NEED_DO_COPY_FILE // Only define this once, not in cow-fs_ops.o too bool fs::equiv_files([[maybe_unused]] const char_type* p1, const stat_type& st1, [[maybe_unused]] const char_type* p2, const stat_type& st2,
[gcc r15-2421] testsuite: fix 'dg-compile' typos
https://gcc.gnu.org/g:acc70606c59e3f14072cc8a164362e728d8df5d6 commit r15-2421-gacc70606c59e3f14072cc8a164362e728d8df5d6 Author: Sam James Date: Tue Jul 30 20:04:40 2024 +0100 testsuite: fix 'dg-compile' typos 'dg-compile' is not a thing, replace it with 'dg-do compile'. PR target/68015 PR c++/83979 * c-c++-common/goacc/loop-shape.c: Fix 'dg-compile' typo. * g++.dg/pr83979.C: Likewise. * g++.target/aarch64/sve/acle/general-c++/attributes_2.C: Likewise. * gcc.dg/tree-ssa/builtin-sprintf-7.c: Likewise. * gcc.dg/tree-ssa/builtin-sprintf-8.c: Likewise. * gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-char.c: Likewise. * gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-short.c: Likewise. * gcc.target/s390/20181024-1.c: Likewise. * gcc.target/s390/addr-constraints-1.c: Likewise. * gcc.target/s390/arch12/aghsghmgh-1.c: Likewise. * gcc.target/s390/arch12/mul-1.c: Likewise. * gcc.target/s390/arch13/bitops-1.c: Likewise. * gcc.target/s390/arch13/bitops-2.c: Likewise. * gcc.target/s390/arch13/fp-signedint-convert-1.c: Likewise. * gcc.target/s390/arch13/fp-unsignedint-convert-1.c: Likewise. * gcc.target/s390/arch13/popcount-1.c: Likewise. * gcc.target/s390/pr68015.c: Likewise. * gcc.target/s390/vector/fp-signedint-convert-1.c: Likewise. * gcc.target/s390/vector/fp-unsignedint-convert-1.c: Likewise. * gcc.target/s390/vector/reverse-elements-1.c: Likewise. * gcc.target/s390/vector/reverse-elements-2.c: Likewise. * gcc.target/s390/vector/reverse-elements-3.c: Likewise. * gcc.target/s390/vector/reverse-elements-4.c: Likewise. * gcc.target/s390/vector/reverse-elements-5.c: Likewise. * gcc.target/s390/vector/reverse-elements-6.c: Likewise. * gcc.target/s390/vector/reverse-elements-7.c: Likewise. * gnat.dg/alignment15.adb: Likewise. * gnat.dg/debug4.adb: Likewise. * gnat.dg/inline21.adb: Likewise. * gnat.dg/inline22.adb: Likewise. * gnat.dg/opt37.adb: Likewise. * gnat.dg/warn13.adb: Likewise. Diff: --- gcc/testsuite/c-c++-common/goacc/loop-shape.c| 2 +- gcc/testsuite/g++.dg/pr83979.C | 2 +- gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/attributes_2.C | 2 +- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-7.c| 2 +- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-8.c| 2 +- gcc/testsuite/gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-char.c| 2 +- gcc/testsuite/gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-short.c | 2 +- gcc/testsuite/gcc.target/s390/20181024-1.c | 2 +- gcc/testsuite/gcc.target/s390/addr-constraints-1.c | 2 +- gcc/testsuite/gcc.target/s390/arch12/aghsghmgh-1.c | 2 +- gcc/testsuite/gcc.target/s390/arch12/mul-1.c | 2 +- gcc/testsuite/gcc.target/s390/arch13/bitops-1.c | 2 +- gcc/testsuite/gcc.target/s390/arch13/bitops-2.c | 2 +- gcc/testsuite/gcc.target/s390/arch13/fp-signedint-convert-1.c| 2 +- gcc/testsuite/gcc.target/s390/arch13/fp-unsignedint-convert-1.c | 2 +- gcc/testsuite/gcc.target/s390/arch13/popcount-1.c| 2 +- gcc/testsuite/gcc.target/s390/pr68015.c | 2 +- gcc/testsuite/gcc.target/s390/vector/fp-signedint-convert-1.c| 2 +- gcc/testsuite/gcc.target/s390/vector/fp-unsignedint-convert-1.c | 2 +- gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c| 2 +- gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c| 2 +- gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c| 2 +- gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c| 2 +- gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c| 2 +- gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c| 2 +- gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c| 2 +- gcc/testsuite/gnat.dg/alignment15.adb| 2 +- gcc/testsuite/gnat.dg/debug4.adb | 2 +- gcc/testsuite/gnat.dg/inline21.adb | 2 +- gcc/testsuite/gnat.dg/inline22.adb | 2 +- gcc/testsuite/gnat.dg/opt37.adb | 2 +- gcc/testsuite/gnat.dg/warn13.adb | 2 +- 32 files changed, 32 insertions(+), 32 deletions(-) diff --git a/gcc/testsuite/c-c++-common/goacc/loop-shape.c b/gcc/testsuite/c-c++-common/goacc/loop-shape.c index 9708f7bf5eb3..b3199b4044
[gcc(refs/users/meissner/heads/work174)] Add more processors to arch flags.
https://gcc.gnu.org/g:48f7791835b495c254452a20d4d06bc9bedd7cae commit 48f7791835b495c254452a20d4d06bc9bedd7cae Author: Michael Meissner Date: Tue Jul 30 16:45:47 2024 -0400 Add more processors to arch flags. 2024-07-30 Michael Meissner * config/rs6000/rs6000-arch.def: Add support for 476, a2, and cell processors. * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Use arch flags for power4, power5, and power6. * config/rs6000/rs6000.cc (get_arch_flags): Add 476, a2, and cell processors. Diff: --- gcc/config/rs6000/rs6000-arch.def | 3 +++ gcc/config/rs6000/rs6000-c.cc | 6 +++--- gcc/config/rs6000/rs6000.cc | 4 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gcc/config/rs6000/rs6000-arch.def b/gcc/config/rs6000/rs6000-arch.def index 6725736076da..96f176775b59 100644 --- a/gcc/config/rs6000/rs6000-arch.def +++ b/gcc/config/rs6000/rs6000-arch.def @@ -37,6 +37,9 @@ the appropriate architecture flags based on the actual processor enumeration. */ +ARCH_EXPAND(PPC476, "476") +ARCH_EXPAND(PPCA2, "a2") +ARCH_EXPAND(CELL,"cell") ARCH_EXPAND(POWER4, "power4") ARCH_EXPAND(POWER5, "power5") ARCH_EXPAND(POWER6, "power6") diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index a8a6a956874f..27f18f48e837 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -420,13 +420,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR"); if ((flags & OPTION_MASK_POWERPC64) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64"); - if ((flags & OPTION_MASK_MFCRF) != 0) + if ((arch_flags & ARCH_MASK_POWER4) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4"); - if ((flags & OPTION_MASK_POPCNTB) != 0) + if ((arch_flags & ARCH_MASK_POWER5) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5"); if ((flags & OPTION_MASK_FPRND) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X"); - if ((flags & OPTION_MASK_CMPB) != 0) + if ((arch_flags & ARCH_MASK_POWER6) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6"); if ((arch_flags & ARCH_MASK_POWER7) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 19adc66cc801..f9ccaa67e619 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1851,14 +1851,18 @@ get_arch_flags (int cpu_index) /* fall through. */ case PROCESSOR_POWER6: + case PROCESSOR_PPCA2: ret |= ARCH_MASK_POWER6; /* fall through. */ + case PROCESSOR_PPC476: case PROCESSOR_POWER5: ret |= ARCH_MASK_POWER5; /* fall through. */ + case PROCESSOR_CELL: case PROCESSOR_POWER4: + case PROCESSOR_PPCE6500: ret |= ARCH_MASK_POWER4; break;
[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*
https://gcc.gnu.org/g:a10a34fe1cb8d4c96f1888942e52f1a4216cb7f3 commit a10a34fe1cb8d4c96f1888942e52f1a4216cb7f3 Author: Michael Meissner Date: Tue Jul 30 16:46:54 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 13 + 1 file changed, 13 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 58ffd411e22d..16ad0870ec3e 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,3 +1,16 @@ + Branch work174, patch #7 + +Add more processors to arch flags. + +2024-07-30 Michael Meissner + + * config/rs6000/rs6000-arch.def: Add support for 476, a2, and cell + processors. + * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Use arch + flags for power4, power5, and power6. + * config/rs6000/rs6000.cc (get_arch_flags): Add 476, a2, and cell + processors. + Branch work174, patch #6 Update tests to work with architecture flags changes.
[gcc(refs/users/meissner/heads/work174)] Revert changes
https://gcc.gnu.org/g:b0847289e8894dd1069958bd902bcd66df5c0c71 commit b0847289e8894dd1069958bd902bcd66df5c0c71 Author: Michael Meissner Date: Tue Jul 30 16:51:34 2024 -0400 Revert changes Diff: --- gcc/ChangeLog.meissner| 13 + gcc/config/rs6000/rs6000-arch.def | 3 --- gcc/config/rs6000/rs6000-c.cc | 6 +++--- gcc/config/rs6000/rs6000.cc | 4 4 files changed, 4 insertions(+), 22 deletions(-) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 16ad0870ec3e..0f6cd95f9269 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,15 +1,4 @@ - Branch work174, patch #7 - -Add more processors to arch flags. - -2024-07-30 Michael Meissner - - * config/rs6000/rs6000-arch.def: Add support for 476, a2, and cell - processors. - * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Use arch - flags for power4, power5, and power6. - * config/rs6000/rs6000.cc (get_arch_flags): Add 476, a2, and cell - processors. + Branch work174, patch #7 was reverted Branch work174, patch #6 diff --git a/gcc/config/rs6000/rs6000-arch.def b/gcc/config/rs6000/rs6000-arch.def index 96f176775b59..6725736076da 100644 --- a/gcc/config/rs6000/rs6000-arch.def +++ b/gcc/config/rs6000/rs6000-arch.def @@ -37,9 +37,6 @@ the appropriate architecture flags based on the actual processor enumeration. */ -ARCH_EXPAND(PPC476, "476") -ARCH_EXPAND(PPCA2, "a2") -ARCH_EXPAND(CELL,"cell") ARCH_EXPAND(POWER4, "power4") ARCH_EXPAND(POWER5, "power5") ARCH_EXPAND(POWER6, "power6") diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 27f18f48e837..a8a6a956874f 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -420,13 +420,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCGR"); if ((flags & OPTION_MASK_POWERPC64) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64"); - if ((arch_flags & ARCH_MASK_POWER4) != 0) + if ((flags & OPTION_MASK_MFCRF) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4"); - if ((arch_flags & ARCH_MASK_POWER5) != 0) + if ((flags & OPTION_MASK_POPCNTB) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5"); if ((flags & OPTION_MASK_FPRND) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X"); - if ((arch_flags & ARCH_MASK_POWER6) != 0) + if ((flags & OPTION_MASK_CMPB) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6"); if ((arch_flags & ARCH_MASK_POWER7) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index f9ccaa67e619..19adc66cc801 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1851,18 +1851,14 @@ get_arch_flags (int cpu_index) /* fall through. */ case PROCESSOR_POWER6: - case PROCESSOR_PPCA2: ret |= ARCH_MASK_POWER6; /* fall through. */ - case PROCESSOR_PPC476: case PROCESSOR_POWER5: ret |= ARCH_MASK_POWER5; /* fall through. */ - case PROCESSOR_CELL: case PROCESSOR_POWER4: - case PROCESSOR_PPCE6500: ret |= ARCH_MASK_POWER4; break;
[gcc r15-2422] libstdc++: Implement LWG 3886 for std::optional and std::expected
https://gcc.gnu.org/g:a9e472c6b748abde55b5ecde2e2d98dcb2f96ded commit r15-2422-ga9e472c6b748abde55b5ecde2e2d98dcb2f96ded Author: Jonathan Wakely Date: Thu Jul 25 23:08:14 2024 +0100 libstdc++: Implement LWG 3886 for std::optional and std::expected This uses remove_cv_t for the default template argument used for deducing a type for a braced-init-list used with std::optional and std::expected. libstdc++-v3/ChangeLog: * include/std/expected (expected(U&&), operator=(U&&)) (value_or): Use remove_cv_t on default template argument, as per LWG 3886. * include/std/optional (optional(U&&), operator=(U&&)) (value_or): Likewise. * testsuite/20_util/expected/lwg3886.cc: New test. * testsuite/20_util/optional/cons/lwg3886.cc: New test. Diff: --- libstdc++-v3/include/std/expected | 8 +-- libstdc++-v3/include/std/optional | 12 ++--- libstdc++-v3/testsuite/20_util/expected/lwg3886.cc | 58 ++ .../testsuite/20_util/optional/cons/lwg3886.cc | 58 ++ 4 files changed, 126 insertions(+), 10 deletions(-) diff --git a/libstdc++-v3/include/std/expected b/libstdc++-v3/include/std/expected index 515a1e6ab8f5..b8217e577fa3 100644 --- a/libstdc++-v3/include/std/expected +++ b/libstdc++-v3/include/std/expected @@ -468,7 +468,7 @@ namespace __expected std::move(__x)._M_unex); } - template + template> requires (!is_same_v, expected>) && (!is_same_v, in_place_t>) && is_constructible_v<_Tp, _Up> @@ -582,7 +582,7 @@ namespace __expected return *this; } - template + template> requires (!is_same_v>) && (!__expected::__is_unexpected>) && is_constructible_v<_Tp, _Up> && is_assignable_v<_Tp&, _Up> @@ -818,7 +818,7 @@ namespace __expected return std::move(_M_unex); } - template + template> constexpr _Tp value_or(_Up&& __v) const & noexcept(__and_v, @@ -832,7 +832,7 @@ namespace __expected return static_cast<_Tp>(std::forward<_Up>(__v)); } - template + template> constexpr _Tp value_or(_Up&& __v) && noexcept(__and_v, diff --git a/libstdc++-v3/include/std/optional b/libstdc++-v3/include/std/optional index 4694d594f98a..2c4cc260f90e 100644 --- a/libstdc++-v3/include/std/optional +++ b/libstdc++-v3/include/std/optional @@ -868,7 +868,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Converting constructors for engaged optionals. #ifdef _GLIBCXX_USE_CONSTRAINTS_FOR_OPTIONAL - template + template> requires (!is_same_v>) && (!is_same_v>) && is_constructible_v<_Tp, _Up> @@ -919,7 +919,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION : _Base(std::in_place, __il, std::forward<_Args>(__args)...) { } #else - template, _Requires<__not_self<_Up>, __not_tag<_Up>, is_constructible<_Tp, _Up>, is_convertible<_Up, _Tp>, @@ -929,7 +929,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION noexcept(is_nothrow_constructible_v<_Tp, _Up>) : _Base(std::in_place, std::forward<_Up>(__t)) { } - template, _Requires<__not_self<_Up>, __not_tag<_Up>, is_constructible<_Tp, _Up>, __not_>, @@ -1017,7 +1017,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return *this; } - template + template> #ifdef _GLIBCXX_USE_CONSTRAINTS_FOR_OPTIONAL requires (!is_same_v>) && (!(is_scalar_v<_Tp> && is_same_v<_Tp, decay_t<_Up>>)) @@ -1242,7 +1242,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __throw_bad_optional_access(); } - template + template> constexpr _Tp value_or(_Up&& __u) const& { @@ -1255,7 +1255,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return static_cast<_Tp>(std::forward<_Up>(__u)); } - template + template> constexpr _Tp value_or(_Up&& __u) && { diff --git a/libstdc++-v3/testsuite/20_util/expected/lwg3886.cc b/libstdc++-v3/testsuite/20_util/expected/lwg3886.cc new file mode 100644 index ..cf1a2ce4421f --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/expected/lwg3886.cc @@ -0,0 +1,58 @@ +// { dg-do compile { target c++23 } } + +// LWG 3886. Monad mo' problems + +#include + +void +test_constructor() +{ + struct MoveOnly { +MoveOnly(int, int) { } +MoveOnly(MoveOnly&&) { } + }; + + // The {0,0} should be deduced as MoveOnly not const MoveOnly + [[maybe_unused]] std::expected e({0,0}); +} + +struct Tracker { + bool moved = false; + constexpr Tracker(int, int) { } + constexpr Tracker(const Tracker&) { } + constexpr Tracker(Tracker&&) : moved(true) { } + + // The follow means that is_assignable is tr
[gcc r15-2423] libstdc++: Fix std::format output for std::chrono::zoned_time
https://gcc.gnu.org/g:8f05ada7dfb9a40d4333a2aa9ccb5ddcdf8e2b06 commit r15-2423-g8f05ada7dfb9a40d4333a2aa9ccb5ddcdf8e2b06 Author: Jonathan Wakely Date: Fri Jul 26 18:11:26 2024 +0100 libstdc++: Fix std::format output for std::chrono::zoned_time When formatting a chrono::zoned_time with an empty chrono-specs, we were only formatting its _M_time member, but the ostream insertion operator uses the format "{:L%F %T %Z}" which includes the time zone abbreviation. The %Z should also be used when formatting with an empty chrono-specs. This commit makes _M_format_to_ostream handle __local_time_fmt specializations directly, rather than calling itself recursively to format the _M_time member. We need to be able to customize the output of _M_format_to_ostream for __local_time_fmt, because we use that type for gps_time and tai_time as well as for zoned_time and __local_time_fmt. When formatting gps_time and tai_time we don't want to include the time zone abbreviation in the "{}" output, but for zoned_time we do want to. We can reuse the __is_neg flag passed to _M_format_to_ostream (via _M_format) to say that we want the time zone abbreviation. Currently the __is_neg flag is only used for duration specializations, so it's available for __local_time_fmt to use. In addition to fixing the zoned_time output to use %Z, this commit also changes the __local_time_fmt output to use %Z. Previously it didn't use it, just like zoned_time. The standard doesn't actually say how to format local-time-format-t for an empty chrono-specs, but this behaviour seems sensible and is what I'm proposing as part of LWG 4124. While testing this I noticed that some chrono types were not being tested with empty chrono-specs, so this adds more tests. I also noticed that std/time/clock/local/io.cc was testing tai_time instead of local_time, which was completely wrong. That's fixed now too. libstdc++-v3/ChangeLog: * include/bits/chrono_io.h (__local_fmt_t): Remove unused declaration. (__formatter_chrono::_M_format_to_ostream): Add explicit handling for specializations of __local_time_fmt, including the time zone abbreviation in the output if __is_neg is true. (formatter>::format): Add comment. (formatter>::format): Likewise. (formatter struct __local_time_fmt { @@ -163,8 +164,6 @@ namespace __detail const string* _M_abbrev; const seconds* _M_offset_sec; }; - - struct __local_fmt_t; } /// @endcond @@ -695,13 +694,34 @@ namespace __format using ::std::chrono::__detail::__utc_leap_second; using ::std::chrono::__detail::__local_time_fmt; + basic_ostringstream<_CharT> __os; + __os.imbue(_M_locale(__fc)); + if constexpr (__is_specialization_of<_Tp, __local_time_fmt>) - return _M_format_to_ostream(__t._M_time, __fc, false); - else { - basic_ostringstream<_CharT> __os; - __os.imbue(_M_locale(__fc)); + // Format as "{:L%F %T}" + auto __days = chrono::floor(__t._M_time); + __os << chrono::year_month_day(__days) << ' ' + << chrono::hh_mm_ss(__t._M_time - __days); + // For __local_time_fmt the __is_neg flags says whether to + // append " %Z" to the result. + if (__is_neg) + { + if (!__t._M_abbrev) [[unlikely]] + __format::__no_timezone_available(); + else if constexpr (is_same_v<_CharT, char>) + __os << ' ' << *__t._M_abbrev; + else + { + __os << L' '; + for (char __c : *__t._M_abbrev) + __os << __c; + } + } + } + else + { if constexpr (__is_specialization_of<_Tp, __utc_leap_second>) __os << __t._M_date << ' ' << __t._M_time; else if constexpr (chrono::__is_time_point_v<_Tp>) @@ -727,11 +747,11 @@ namespace __format __os << _S_plus_minus[1]; __os << __t; } - - auto __str = std::move(__os).str(); - return __format::__write_padded_as_spec(__str, __str.size(), - __fc, _M_spec); } + + auto __str = std::move(__os).str(); + return __format::__write_padded_as_spec(__str, __str.size(), + __fc, _M_spec); } static constexpr const _CharT* _S_chars @@ -2008,6 +2028,8 @@ namespace __format _FormatContext& __fc) const { // Convert to __local_time_fmt with abbrev "TAI" and offset 0s. +
[gcc r15-2424] libstdc++: Fix formatter for low-resolution chrono::zoned_time (LWG 4124)
https://gcc.gnu.org/g:4883c9571f5fb8fc7e873bb8a31aa164c5cfd0e0 commit r15-2424-g4883c9571f5fb8fc7e873bb8a31aa164c5cfd0e0 Author: Jonathan Wakely Date: Mon Jul 29 12:52:40 2024 +0100 libstdc++: Fix formatter for low-resolution chrono::zoned_time (LWG 4124) This implements the proposed resolution of LWG 4124, so that low-resolution chrono::zoned_time objects can be formatted. The formatter for zoned_time needs to account for get_local_time returning local_time> not local_time. libstdc++-v3/ChangeLog: * include/bits/chrono_io.h (__local_time_fmt_for): New alias template. (formatter>): Use __local_time_fmt_for. * testsuite/std/time/zoned_time/io.cc: Check zoned_time can be formatted. Diff: --- libstdc++-v3/include/bits/chrono_io.h| 12 +--- libstdc++-v3/testsuite/std/time/zoned_time/io.cc | 4 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/include/bits/chrono_io.h b/libstdc++-v3/include/bits/chrono_io.h index e7e7deb2cde3..d8a4a121113c 100644 --- a/libstdc++-v3/include/bits/chrono_io.h +++ b/libstdc++-v3/include/bits/chrono_io.h @@ -164,6 +164,12 @@ namespace __detail const string* _M_abbrev; const seconds* _M_offset_sec; }; + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 4124. Cannot format zoned_time with resolution coarser than seconds + template +using __local_time_fmt_for + = __local_time_fmt>; } /// @endcond @@ -2137,15 +2143,15 @@ namespace __format #if _GLIBCXX_USE_CXX11_ABI || ! _GLIBCXX_USE_DUAL_ABI template struct formatter, _CharT> -: formatter, _CharT> +: formatter, _CharT> { template typename _FormatContext::iterator format(const chrono::zoned_time<_Duration, _TimeZonePtr>& __tp, _FormatContext& __ctx) const { - using chrono::__detail::__local_time_fmt; - using _Base = formatter<__local_time_fmt<_Duration>, _CharT>; + using _Ltf = chrono::__detail::__local_time_fmt_for<_Duration>; + using _Base = formatter<_Ltf, _CharT>; const chrono::sys_info __info = __tp.get_info(); const auto __lf = chrono::local_time_format(__tp.get_local_time(), &__info.abbrev, diff --git a/libstdc++-v3/testsuite/std/time/zoned_time/io.cc b/libstdc++-v3/testsuite/std/time/zoned_time/io.cc index ee3b9edba810..c113eea6d3fe 100644 --- a/libstdc++-v3/testsuite/std/time/zoned_time/io.cc +++ b/libstdc++-v3/testsuite/std/time/zoned_time/io.cc @@ -66,6 +66,10 @@ test_format() ws = std::format(L"{:+^34}", zoned_time(zone, t)); VERIFY( ws == L"++2022-12-19 12:26:25.708000 EST++" ); #endif + + // LWG 4124. Cannot format zoned_time with resolution coarser than seconds + s = std::format("{}", zoned_time(zone, time_point_cast(t))); + VERIFY( s == "2022-12-19 12:26:00 EST" ); } int main()
[gcc(refs/users/meissner/heads/work174)] Remove arch flags power4 and power5. Use const HOST_WIDE_INT for arch masks.
https://gcc.gnu.org/g:816307fad902b82fd4ed6079b9113057b7496252 commit 816307fad902b82fd4ed6079b9113057b7496252 Author: Michael Meissner Date: Tue Jul 30 18:05:54 2024 -0400 Remove arch flags power4 and power5. Use const HOST_WIDE_INT for arch masks. 2024-07-30 Michael Meissner * config/rs6000/rs6000-arch.def: Remove power4 and power5 architecture masks. * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Use arch flags for power6. * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const HOST_WIDE_INT and not as an enumeratio. * config/rs6000/rs6000.cc (get_arch_flags): Drop power4 and power5 arch flags support. (rs6000_machine_from_flags): Likewise. Diff: --- gcc/config/rs6000/rs6000-arch.def | 2 -- gcc/config/rs6000/rs6000-c.cc | 2 +- gcc/config/rs6000/rs6000-opts.h | 9 - gcc/config/rs6000/rs6000.cc | 12 ++-- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/gcc/config/rs6000/rs6000-arch.def b/gcc/config/rs6000/rs6000-arch.def index 6725736076da..d317200016a2 100644 --- a/gcc/config/rs6000/rs6000-arch.def +++ b/gcc/config/rs6000/rs6000-arch.def @@ -37,8 +37,6 @@ the appropriate architecture flags based on the actual processor enumeration. */ -ARCH_EXPAND(POWER4, "power4") -ARCH_EXPAND(POWER5, "power5") ARCH_EXPAND(POWER6, "power6") ARCH_EXPAND(POWER7, "power7") ARCH_EXPAND(POWER8, "power8") diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index a8a6a956874f..7d0b24b7c09e 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -426,7 +426,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5"); if ((flags & OPTION_MASK_FPRND) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X"); - if ((flags & OPTION_MASK_CMPB) != 0) + if ((arch_flags & ARCH_MASK_POWER6) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6"); if ((arch_flags & ARCH_MASK_POWER7) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h index 9a52a1d4b147..c7764e66cd03 100644 --- a/gcc/config/rs6000/rs6000-opts.h +++ b/gcc/config/rs6000/rs6000-opts.h @@ -80,15 +80,14 @@ enum { ARCH_ENUM_LAST }; -/* Create an architecture mask for the newer architectures (power7 and +/* Create an architecture mask for the newer architectures (power6 and up).. */ #undef ARCH_EXPAND -#define ARCH_EXPAND(PROC, NAME)ARCH_MASK_ ## PROC = 1 << ARCH_ENUM_ ## PROC, +#define ARCH_EXPAND(PROC, NAME) \ + static const HOST_WIDE_INT ARCH_MASK_ ## PROC \ += HOST_WIDE_INT_1 << ARCH_ENUM_ ## PROC; -enum { #include "rs6000-arch.def" - ARCH_MASK_ZERO = 0 -}; #undef ARCH_EXPAND diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 19adc66cc801..bccf30a44b20 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1852,14 +1852,6 @@ get_arch_flags (int cpu_index) case PROCESSOR_POWER6: ret |= ARCH_MASK_POWER6; - /* fall through. */ - - case PROCESSOR_POWER5: - ret |= ARCH_MASK_POWER5; - /* fall through. */ - - case PROCESSOR_POWER4: - ret |= ARCH_MASK_POWER4; break; default: @@ -5937,9 +5929,9 @@ rs6000_machine_from_flags (void) return "power7"; if ((arch_flags & ARCH_MASK_POWER6) != 0) return "power6"; - if ((arch_flags & ARCH_MASK_POWER5) != 0) + if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0) return "power5"; - if ((arch_flags & ARCH_MASK_POWER4) != 0) + if ((flags & ISA_2_1_MASKS) != 0) return "power4"; if ((flags & OPTION_MASK_POWERPC64) != 0) return "ppc64";
[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*
https://gcc.gnu.org/g:e2a3cfdcb3909f7893afeb36c366c4dba2306206 commit e2a3cfdcb3909f7893afeb36c366c4dba2306206 Author: Michael Meissner Date: Tue Jul 30 18:07:02 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 16 1 file changed, 16 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 0f6cd95f9269..eb0b90a45dbe 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,3 +1,19 @@ + Branch work174, patch #8 + +Remove arch flags power4 and power5. Use const HOST_WIDE_INT for arch masks. + +2024-07-30 Michael Meissner + + * config/rs6000/rs6000-arch.def: Remove power4 and power5 architecture + masks. + * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Use arch flags + for power6. + * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const + HOST_WIDE_INT and not as an enumeratio. + * config/rs6000/rs6000.cc (get_arch_flags): Drop power4 and power5 arch + flags support. + (rs6000_machine_from_flags): Likewise. + Branch work174, patch #7 was reverted Branch work174, patch #6
[gcc(refs/users/meissner/heads/work174)] Add a2 processor to arch flags.
https://gcc.gnu.org/g:7d2ba2da50c107056bc9a5f589012d42ff1439de commit 7d2ba2da50c107056bc9a5f589012d42ff1439de Author: Michael Meissner Date: Tue Jul 30 18:13:07 2024 -0400 Add a2 processor to arch flags. 2024-07-30 Michael Meissner * config/rs6000/rs6000-arch.def: Add a2 processor. Diff: --- gcc/config/rs6000/rs6000-arch.def | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/config/rs6000/rs6000-arch.def b/gcc/config/rs6000/rs6000-arch.def index d317200016a2..89b1b87c89ac 100644 --- a/gcc/config/rs6000/rs6000-arch.def +++ b/gcc/config/rs6000/rs6000-arch.def @@ -37,6 +37,7 @@ the appropriate architecture flags based on the actual processor enumeration. */ +ARCH_EXPAND(PPCA2, "a2") ARCH_EXPAND(POWER6, "power6") ARCH_EXPAND(POWER7, "power7") ARCH_EXPAND(POWER8, "power8")
[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*
https://gcc.gnu.org/g:51e8229f65e2d7e5fd846ae016cd2634e5828e65 commit 51e8229f65e2d7e5fd846ae016cd2634e5828e65 Author: Michael Meissner Date: Tue Jul 30 18:13:59 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 8 1 file changed, 8 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index eb0b90a45dbe..86cc52c8636d 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,3 +1,11 @@ + Branch work174, patch #9 + +Add a2 processor to arch flags. + +2024-07-30 Michael Meissner + + * config/rs6000/rs6000-arch.def: Add a2 processor. + Branch work174, patch #8 Remove arch flags power4 and power5. Use const HOST_WIDE_INT for arch masks.
[gcc(refs/users/meissner/heads/work174)] Revert changes
https://gcc.gnu.org/g:4d9ccfc12fa1c534d01eb7c3aac57fe0268fc913 commit 4d9ccfc12fa1c534d01eb7c3aac57fe0268fc913 Author: Michael Meissner Date: Tue Jul 30 18:19:52 2024 -0400 Revert changes Diff: --- gcc/ChangeLog.meissner| 26 ++ gcc/config/rs6000/rs6000-arch.def | 3 ++- gcc/config/rs6000/rs6000-c.cc | 2 +- gcc/config/rs6000/rs6000-opts.h | 9 + gcc/config/rs6000/rs6000.cc | 12 ++-- 5 files changed, 20 insertions(+), 32 deletions(-) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 86cc52c8636d..75d085b17f0c 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,27 +1,5 @@ - Branch work174, patch #9 - -Add a2 processor to arch flags. - -2024-07-30 Michael Meissner - - * config/rs6000/rs6000-arch.def: Add a2 processor. - - Branch work174, patch #8 - -Remove arch flags power4 and power5. Use const HOST_WIDE_INT for arch masks. - -2024-07-30 Michael Meissner - - * config/rs6000/rs6000-arch.def: Remove power4 and power5 architecture - masks. - * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Use arch flags - for power6. - * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const - HOST_WIDE_INT and not as an enumeratio. - * config/rs6000/rs6000.cc (get_arch_flags): Drop power4 and power5 arch - flags support. - (rs6000_machine_from_flags): Likewise. - + Branch work174, patch #9 was reverted + Branch work174, patch #8 was reverted Branch work174, patch #7 was reverted Branch work174, patch #6 diff --git a/gcc/config/rs6000/rs6000-arch.def b/gcc/config/rs6000/rs6000-arch.def index 89b1b87c89ac..6725736076da 100644 --- a/gcc/config/rs6000/rs6000-arch.def +++ b/gcc/config/rs6000/rs6000-arch.def @@ -37,7 +37,8 @@ the appropriate architecture flags based on the actual processor enumeration. */ -ARCH_EXPAND(PPCA2, "a2") +ARCH_EXPAND(POWER4, "power4") +ARCH_EXPAND(POWER5, "power5") ARCH_EXPAND(POWER6, "power6") ARCH_EXPAND(POWER7, "power7") ARCH_EXPAND(POWER8, "power8") diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 7d0b24b7c09e..a8a6a956874f 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -426,7 +426,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5"); if ((flags & OPTION_MASK_FPRND) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X"); - if ((arch_flags & ARCH_MASK_POWER6) != 0) + if ((flags & OPTION_MASK_CMPB) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6"); if ((arch_flags & ARCH_MASK_POWER7) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h index c7764e66cd03..9a52a1d4b147 100644 --- a/gcc/config/rs6000/rs6000-opts.h +++ b/gcc/config/rs6000/rs6000-opts.h @@ -80,14 +80,15 @@ enum { ARCH_ENUM_LAST }; -/* Create an architecture mask for the newer architectures (power6 and +/* Create an architecture mask for the newer architectures (power7 and up).. */ #undef ARCH_EXPAND -#define ARCH_EXPAND(PROC, NAME) \ - static const HOST_WIDE_INT ARCH_MASK_ ## PROC \ -= HOST_WIDE_INT_1 << ARCH_ENUM_ ## PROC; +#define ARCH_EXPAND(PROC, NAME)ARCH_MASK_ ## PROC = 1 << ARCH_ENUM_ ## PROC, +enum { #include "rs6000-arch.def" + ARCH_MASK_ZERO = 0 +}; #undef ARCH_EXPAND diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index bccf30a44b20..19adc66cc801 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1852,6 +1852,14 @@ get_arch_flags (int cpu_index) case PROCESSOR_POWER6: ret |= ARCH_MASK_POWER6; + /* fall through. */ + + case PROCESSOR_POWER5: + ret |= ARCH_MASK_POWER5; + /* fall through. */ + + case PROCESSOR_POWER4: + ret |= ARCH_MASK_POWER4; break; default: @@ -5929,9 +5937,9 @@ rs6000_machine_from_flags (void) return "power7"; if ((arch_flags & ARCH_MASK_POWER6) != 0) return "power6"; - if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0) + if ((arch_flags & ARCH_MASK_POWER5) != 0) return "power5"; - if ((flags & ISA_2_1_MASKS) != 0) + if ((arch_flags & ARCH_MASK_POWER4) != 0) return "power4"; if ((flags & OPTION_MASK_POWERPC64) != 0) return "ppc64";
[gcc(refs/users/meissner/heads/work174)] Use const HOST_WIDE_INT for arch masks.
https://gcc.gnu.org/g:a66ceb34e3df733362f3ee4b56bb504ff5e2a81c commit a66ceb34e3df733362f3ee4b56bb504ff5e2a81c Author: Michael Meissner Date: Tue Jul 30 18:23:14 2024 -0400 Use const HOST_WIDE_INT for arch masks. 2024-07-30 Michael Meissner * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const HOST_WIDE_INT and not as an enumeratio. Diff: --- gcc/config/rs6000/rs6000-opts.h | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h index 9a52a1d4b147..c7764e66cd03 100644 --- a/gcc/config/rs6000/rs6000-opts.h +++ b/gcc/config/rs6000/rs6000-opts.h @@ -80,15 +80,14 @@ enum { ARCH_ENUM_LAST }; -/* Create an architecture mask for the newer architectures (power7 and +/* Create an architecture mask for the newer architectures (power6 and up).. */ #undef ARCH_EXPAND -#define ARCH_EXPAND(PROC, NAME)ARCH_MASK_ ## PROC = 1 << ARCH_ENUM_ ## PROC, +#define ARCH_EXPAND(PROC, NAME) \ + static const HOST_WIDE_INT ARCH_MASK_ ## PROC \ += HOST_WIDE_INT_1 << ARCH_ENUM_ ## PROC; -enum { #include "rs6000-arch.def" - ARCH_MASK_ZERO = 0 -}; #undef ARCH_EXPAND
[gcc(refs/users/meissner/heads/work174)] Update ChangeLog.*
https://gcc.gnu.org/g:1b8c6a9878846cc8925df7189f8aee0c1013036c commit 1b8c6a9878846cc8925df7189f8aee0c1013036c Author: Michael Meissner Date: Tue Jul 30 18:24:50 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 9 + 1 file changed, 9 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 75d085b17f0c..ce7ed892daf3 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,3 +1,12 @@ + Branch work174, patch #10 + +Use const HOST_WIDE_INT for arch masks. + +2024-07-30 Michael Meissner + + * config/rs6000/rs6000-opts.h (ARCH_MASK_*): Encode the masks as const + HOST_WIDE_INT and not as an enumeratio. + Branch work174, patch #9 was reverted Branch work174, patch #8 was reverted Branch work174, patch #7 was reverted
[gcc r15-2426] LoongArch: Expand some SImode operations through "si3_extend" instructions if TARGET_64BIT
https://gcc.gnu.org/g:b929083dd83ab50f26e10bbaa5097d5f6fb3c908 commit r15-2426-gb929083dd83ab50f26e10bbaa5097d5f6fb3c908 Author: Xi Ruoyao Date: Sat Jul 20 20:38:13 2024 +0800 LoongArch: Expand some SImode operations through "si3_extend" instructions if TARGET_64BIT We already had "si3_extend" insns and we hoped the fwprop or combine passes can use them to remove unnecessary sign extensions. But this does not always work: for cases like x << 1 | y, the compiler tends to do (sign_extend:DI (ior:SI (ashift:SI (reg:SI $r4) (const_int 1)) (reg:SI $r5))) instead of (ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1))) (sign_extend:DI (reg:SI $r5))) So we cannot match the ashlsi3_extend instruction here and we get: slli.w $r4,$r4,1 or $r4,$r5,$r4 slli.w $r4,$r4,0# <= redundant jr $r1 To eliminate this redundant extension we need to turn SImode shift etc. to DImode "si3_extend" operations earlier, when we expand the SImode operation. We are already doing this for addition, now do it for shifts, rotates, substract, multiplication, division, and modulo as well. The bytepick.w definition for TARGET_64BIT needs to be adjusted so it won't be undone by the shift expanding. gcc/ChangeLog: * config/loongarch/loongarch.md (optab): Add (rotatert "rotr"). (3, 3, sub3, rotr3, mul3): Add a "*" to the insn name so we can redefine the names with define_expand. (*si3_extend): Remove "*" so we can use them in expanders. (*subsi3_extended, *mulsi3_extended): Likewise, also remove the trailing "ed" for consistency. (*si3_extended): Add mode for sign_extend to prevent an ICE using it in expanders. (shift_w, arith_w): New define_code_iterator. (3): New define_expand. Expand with si3_extend for SImode if TARGET_64BIT. (3): Likewise. (mul3): Expand to mulsi3_extended for SImode if TARGET_64BIT and ISA_HAS_DIV32. (3): Expand to si3_extended for SImode if TARGET_64BIT. (rotl3): Expand to rotrsi3_extend for SImode if TARGET_64BIT. (bytepick_w_): Add mode for lshiftrt and ashift. (bitsize, bytepick_imm, bytepick_w_ashift_amount): New define_mode_attr. (bytepick_w__extend): Adjust for the RTL change caused by 32-bit shift expanding. Now bytepick_imm only covers 2 and 3, separate one remaining case to ... (bytepick_w_1_extend): ... here, new define_insn. gcc/testsuite/ChangeLog: * gcc.target/loongarch/bitwise_extend.c: New test. Diff: --- gcc/config/loongarch/loongarch.md | 131 + .../gcc.target/loongarch/bitwise_extend.c | 45 +++ 2 files changed, 154 insertions(+), 22 deletions(-) diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 459ad30b9bb9..9bad79bbf45e 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -546,6 +546,7 @@ (define_code_attr optab [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") +(rotatert "rotr") (ior "ior") (xor "xor") (and "and") @@ -624,6 +625,49 @@ (48 "6") (56 "7")]) +;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT +;; so the redundant sign extension can be removed if the output is used as +;; an input of a bitwise operation. Note plus, rotl, and div are handled +;; separately. +(define_code_iterator shift_w [any_shift rotatert]) +(define_code_iterator arith_w [minus mult]) + +(define_expand "3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (shift_w:GPR (match_operand:GPR 1 "register_operand" "r") +(match_operand:SI 2 "arith_operand" "rI")))] + "" +{ + if (TARGET_64BIT && mode == SImode) +{ + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_si3_extend (t, operands[1], operands[2])); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + DONE; +} +}) + +(define_expand "3" + [(set (match_operand:GPR 0 "register_operand" "=r") + (arith_w:GPR (match_operand:GPR 1 "register_operand" "r") +(match_operand:GPR 2 "register_operand" "r")))] + "" +{ + if (TARGET_64BIT && mode == SImode) +{ + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_si3_extend
[gcc r15-2427] rs6000: Use standard name uabd for absdu insns
https://gcc.gnu.org/g:169341f0893a009736f9715db969909880d0e876 commit r15-2427-g169341f0893a009736f9715db969909880d0e876 Author: Kewen Lin Date: Tue Jul 30 21:20:51 2024 -0500 rs6000: Use standard name uabd for absdu insns r14-1832 adds recognition pattern, ifn and optab for ABD (ABsolute Difference), we have some vector absolute difference unsigned instructions since ISA 3.0, as the associated test cases shown, they are not exploited well as we don't define it (them) with a standard name. So this patch is to rename it with standard name first. And it merges both define_expand and define_insn as a separated define_expand isn't needed. Besides, it adjusts the RTL pattern by using generic umax and umin rather than UNSPEC_VADU, it's more meaningful and can catch umin/umax opportunity. gcc/ChangeLog: * config/rs6000/altivec.md (p9_vadu3): Rename to ... (uabd3): ... this. Update RTL pattern with umin and umax rather than UNSPEC_VADU. (vadu3): Remove. (UNSPEC_VADU): Remove. (usadv16qi): Replace gen_p9_vaduv16qi3 with gen_uabdv16qi3. (usadv8hi): Replace gen_p9_vaduv8hi3 with gen_uabdv8hi3. * config/rs6000/rs6000-builtins.def (__builtin_altivec_vadub): Replace expander with uabdv16qi3. (__builtin_altivec_vaduh): Adjust expander with uabdv8hi3. (__builtin_altivec_vaduw): Adjust expander with uabdv4si3. gcc/testsuite/ChangeLog: * gcc.target/powerpc/abd-vectorize-1.c: New test. * gcc.target/powerpc/abd-vectorize-2.c: New test. Diff: --- gcc/config/rs6000/altivec.md | 25 ++- gcc/config/rs6000/rs6000-builtins.def | 6 ++-- gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c | 27 gcc/testsuite/gcc.target/powerpc/abd-vectorize-2.c | 37 ++ 4 files changed, 77 insertions(+), 18 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 5af9bf920a2e..aa9d8fffc901 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -119,7 +119,6 @@ UNSPEC_STVLXL UNSPEC_STVRX UNSPEC_STVRXL - UNSPEC_VADU UNSPEC_VSLV UNSPEC_VSRV UNSPEC_VMULWHUB @@ -4323,19 +4322,15 @@ [(set_attr "type" "vecsimple")]) ;; Vector absolute difference unsigned -(define_expand "vadu3" - [(set (match_operand:VI 0 "register_operand") -(unspec:VI [(match_operand:VI 1 "register_operand") - (match_operand:VI 2 "register_operand")] - UNSPEC_VADU))] - "TARGET_P9_VECTOR") - -;; Vector absolute difference unsigned -(define_insn "p9_vadu3" +(define_insn "uabd3" [(set (match_operand:VI 0 "register_operand" "=v") -(unspec:VI [(match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")] - UNSPEC_VADU))] + (minus:VI + (umax:VI + (match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")) + (umin:VI + (match_dup 1) + (match_dup 2] "TARGET_P9_VECTOR" "vabsdu %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -4500,7 +4495,7 @@ rtx zero = gen_reg_rtx (V4SImode); rtx psum = gen_reg_rtx (V4SImode); - emit_insn (gen_p9_vaduv16qi3 (absd, operands[1], operands[2])); + emit_insn (gen_uabdv16qi3 (absd, operands[1], operands[2])); emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); emit_insn (gen_altivec_vsum4ubs (psum, absd, zero)); emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); @@ -4521,7 +4516,7 @@ rtx zero = gen_reg_rtx (V4SImode); rtx psum = gen_reg_rtx (V4SImode); - emit_insn (gen_p9_vaduv8hi3 (absd, operands[1], operands[2])); + emit_insn (gen_uabdv8hi3 (absd, operands[1], operands[2])); emit_insn (gen_altivec_vspltisw (zero, const0_rtx)); emit_insn (gen_altivec_vsum4shs (psum, absd, zero)); emit_insn (gen_addv4si3 (operands[0], psum, operands[3])); diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 12d131d016d6..0c3c884c1104 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2345,13 +2345,13 @@ VFIRSTMISMATCHOREOSINDEX_V4SI first_mismatch_or_eos_index_v4si {} const vsc __builtin_altivec_vadub (vsc, vsc); -VADUB vaduv16qi3 {} +VADUB uabdv16qi3 {} const vss __builtin_altivec_vaduh (vss, vss); -VADUH vaduv8hi3 {} +VADUH uabdv8hi3 {} const vsi __builtin_altivec_vaduw (vsi, vsi); -VADUW vaduv4si3 {} +VADUW uabdv4si3 {} const vsll __builtin_altivec_vbpermd (vsll, vsc); VBPERMD altivec_vbpermd {} diff --git a/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/abd-vectorize-1.c new file mode 100644 index ..d63b887b4b8f --- /d
[gcc r15-2428] rs6000: Relax some FLOAT128 expander condition for FLOAT128_IEEE_P [PR105359]
https://gcc.gnu.org/g:993a3c0894c487dce5efc6cfb5b31a8358905e8f commit r15-2428-g993a3c0894c487dce5efc6cfb5b31a8358905e8f Author: Kewen Lin Date: Tue Jul 30 21:21:15 2024 -0500 rs6000: Relax some FLOAT128 expander condition for FLOAT128_IEEE_P [PR105359] As PR105359 shows, we disable some FLOAT128 expanders for 64-bit long double, but in fact IEEE float128 types like __ieee128 are only guarded with TARGET_FLOAT128_TYPE and TARGET_LONG_DOUBLE_128 is only checked when determining if we can reuse long_double_type_node. So this patch is to relax all affected FLOAT128 expander conditions for FLOAT128_IEEE_P. By the way, currently IBM double double type __ibm128 is guarded by TARGET_LONG_DOUBLE_128, so we have to use TARGET_LONG_DOUBLE_128 for it. IMHO, it's not necessary and can be enhanced later. Btw, for all test cases mentioned in PR105359, I removed the xfails and tested them with explicit -mlong-double-64, both pr79004.c and float128-hw.c are tested well and float128-hw4.c isn't tested (unsupported due to 64 bit long double conflicts with -mabi=ieeelongdouble). PR target/105359 gcc/ChangeLog: * config/rs6000/rs6000.md (@extenddf2): Don't check TARGET_LONG_DOUBLE_128 for FLOAT128_IEEE_P modes. (extendsf2): Likewise. (truncdf2): Likewise. (truncsf2): Likewise. (floatsi2): Likewise. (fix_truncsi2): Likewise. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr79004.c: Remove xfails. Diff: --- gcc/config/rs6000/rs6000.md| 18 -- gcc/testsuite/gcc.target/powerpc/pr79004.c | 14 ++ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index cfb22a3cb7da..d352a1431add 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -8845,7 +8845,8 @@ (define_expand "@extenddf2" [(set (match_operand:FLOAT128 0 "gpc_reg_operand") (float_extend:FLOAT128 (match_operand:DF 1 "gpc_reg_operand")))] - "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT + && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))" { if (FLOAT128_IEEE_P (mode)) rs6000_expand_float128_convert (operands[0], operands[1], false); @@ -8903,7 +8904,8 @@ (define_expand "extendsf2" [(set (match_operand:FLOAT128 0 "gpc_reg_operand") (float_extend:FLOAT128 (match_operand:SF 1 "gpc_reg_operand")))] - "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT + && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))" { if (FLOAT128_IEEE_P (mode)) rs6000_expand_float128_convert (operands[0], operands[1], false); @@ -8919,7 +8921,8 @@ (define_expand "truncdf2" [(set (match_operand:DF 0 "gpc_reg_operand") (float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand")))] - "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT + && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))" { if (FLOAT128_IEEE_P (mode)) { @@ -8956,7 +8959,8 @@ (define_expand "truncsf2" [(set (match_operand:SF 0 "gpc_reg_operand") (float_truncate:SF (match_operand:FLOAT128 1 "gpc_reg_operand")))] - "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT + && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))" { if (FLOAT128_IEEE_P (mode)) rs6000_expand_float128_convert (operands[0], operands[1], false); @@ -8973,7 +8977,8 @@ [(parallel [(set (match_operand:FLOAT128 0 "gpc_reg_operand") (float:FLOAT128 (match_operand:SI 1 "gpc_reg_operand"))) (clobber (match_scratch:DI 2))])] - "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT + && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))" { rtx op0 = operands[0]; rtx op1 = operands[1]; @@ -9009,7 +9014,8 @@ (define_expand "fix_truncsi2" [(set (match_operand:SI 0 "gpc_reg_operand") (fix:SI (match_operand:FLOAT128 1 "gpc_reg_operand")))] - "TARGET_HARD_FLOAT && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT + && (TARGET_LONG_DOUBLE_128 || FLOAT128_IEEE_P (mode))" { rtx op0 = operands[0]; rtx op1 = operands[1]; diff --git a/gcc/testsuite/gcc.target/powerpc/pr79004.c b/gcc/testsuite/gcc.target/powerpc/pr79004.c index 60c576cd36b6..ac89a4c9f327 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr79004.c +++ b/gcc/testsuite/gcc.target/powerpc/pr79004.c @@ -100,12 +100,10 @@ void to_uns_short_store_n (TYPE a, unsigned short *p, long n) { p[n] = (unsigned void to_uns_int_store_n (TYPE a, unsigned int *p, long n) { p[n] = (unsigned int)a; } void to_uns_long_store_n (TYPE a, unsigned long *p, long n) { p[n] = (unsigned long)a; } -/* On targets with 64-bit long double, some opcodes to deal with __float128 are - disabled, see PR target/105359. */ -/* { dg-final { scan-assembler-not