Re: [PATCH] regcprop: Determine subreg offset depending on endianness [PR101260]
On Fri, Oct 8, 2021 at 1:31 PM Stefan Schulze Frielinghaus via Gcc-patches wrote: > > gcc/ChangeLog: > > * regcprop.c (maybe_mode_change): Determine offset relative to > high or low part depending on endianness. > > Bootstrapped and regtested on IBM Z. Ok for mainline and gcc-{11,10,9}? Is there a testcase to add? > --- > gcc/regcprop.c | 11 --- > 1 file changed, 8 insertions(+), 3 deletions(-) > > diff --git a/gcc/regcprop.c b/gcc/regcprop.c > index d2a01130fe1..0e1ac12458a 100644 > --- a/gcc/regcprop.c > +++ b/gcc/regcprop.c > @@ -414,9 +414,14 @@ maybe_mode_change (machine_mode orig_mode, machine_mode > copy_mode, > copy_nregs, &bytes_per_reg)) > return NULL_RTX; >poly_uint64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs); > - poly_uint64 offset > - = subreg_size_lowpart_offset (GET_MODE_SIZE (new_mode) + copy_offset, > - GET_MODE_SIZE (orig_mode)); > + poly_uint64 offset = > +#if WORDS_BIG_ENDIAN > + subreg_size_highpart_offset > +#else > + subreg_size_lowpart_offset > +#endif > + (GET_MODE_SIZE (new_mode) + copy_offset, > +GET_MODE_SIZE (orig_mode)); >regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); >if (targetm.hard_regno_mode_ok (regno, new_mode)) > return gen_raw_REG (new_mode, regno); > -- > 2.31.1 >
Re: [PATCH] options: use cl_optimization_hash.
On 10/8/21 12:23, Martin Liška wrote: Patch can bootstrap on x86_64-linux-gnu and survives regression tests. I've spoken to Honza and he approves the patch. I'm going to install it. Martin
Re: [PATCH][i386] Support reduc_{plus, smax, smin, umax, umin}_scal_v4qi.
On Mon, Oct 11, 2021 at 8:26 AM liuhongt wrote: > > After providing expanders for reduc_umin/umax/smin/smax_scal_v4qi, > perfomance are a little bit faster than before for reduce operations > w/ options -O2 -march=haswell, -O2 -march=skylake-avx512 > and -Ofast -march=skylake-avx512. > > gcc/ChangeLog > > PR target/102483 > * config/i386/i386-expand.c (emit_reduc_half): Handle > V4QImode. > * config/i386/mmx.md (reduc__scal_v4qi): New expander. > (reduc_plus_scal_v4qi): Ditto. > > gcc/testsuite/ChangeLog > > * gcc.target/i386/pr102483.c: New test. > * gcc.target/i386/pr102483-2.c: New test. LGTM. Thanks, Uros. > --- > gcc/config/i386/i386-expand.c | 5 ++ > gcc/config/i386/mmx.md | 45 + > gcc/testsuite/gcc.target/i386/pr102483-2.c | 26 ++ > gcc/testsuite/gcc.target/i386/pr102483.c | 58 ++ > 4 files changed, 134 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr102483-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr102483.c > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 3e6f7d8ef7e..4bade9e 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -16043,6 +16043,11 @@ emit_reduc_half (rtx dest, rtx src, int i) > case E_V2DFmode: >tem = gen_vec_interleave_highv2df (dest, src, src); >break; > +case E_V4QImode: > + d = gen_reg_rtx (V1SImode); > + tem = gen_mmx_lshrv1si3 (d, gen_lowpart (V1SImode, src), > + GEN_INT (i / 2)); > + break; > case E_V4HImode: >d = gen_reg_rtx (V1DImode); >tem = gen_mmx_lshrv1di3 (d, gen_lowpart (V1DImode, src), > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 106d41c8fd9..6c5cbcfa52c 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -3989,6 +3989,18 @@ (define_expand "reduc__scal_v4hi" >DONE; > }) > > +(define_expand "reduc__scal_v4qi" > + [(smaxmin:V4QI > + (match_operand:QI 0 "register_operand") > + (match_operand:V4QI 1 "register_operand"))] > + "TARGET_SSE4_1" > +{ > + rtx tmp = gen_reg_rtx (V4QImode); > + ix86_expand_reduc (gen_v4qi3, tmp, operands[1]); > + emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx)); > + DONE; > +}) > + > (define_expand "reduc__scal_v4hi" >[(umaxmin:V4HI > (match_operand:HI 0 "register_operand") > @@ -4001,6 +4013,39 @@ (define_expand "reduc__scal_v4hi" >DONE; > }) > > +(define_expand "reduc__scal_v4qi" > + [(umaxmin:V4QI > + (match_operand:QI 0 "register_operand") > + (match_operand:V4QI 1 "register_operand"))] > + "TARGET_SSE4_1" > +{ > + rtx tmp = gen_reg_rtx (V4QImode); > + ix86_expand_reduc (gen_v4qi3, tmp, operands[1]); > + emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx)); > + DONE; > +}) > + > +(define_expand "reduc_plus_scal_v4qi" > + [(plus:V4QI > +(match_operand:QI 0 "register_operand") > +(match_operand:V4QI 1 "register_operand"))] > + "TARGET_SSE2" > +{ > + rtx op1 = gen_reg_rtx (V16QImode); > + emit_insn (gen_vec_setv4si_0 (lowpart_subreg (V4SImode, op1, V16QImode), > + CONST0_RTX (V4SImode), > + lowpart_subreg (SImode, > + operands[1], > + V4QImode))); > + rtx tmp = gen_reg_rtx (V16QImode); > + emit_move_insn (tmp, CONST0_RTX (V16QImode)); > + rtx tmp2 = gen_reg_rtx (V2DImode); > + emit_insn (gen_sse2_psadbw (tmp2, op1, tmp)); > + tmp2 = gen_lowpart (V16QImode, tmp2); > + emit_insn (gen_vec_extractv16qiqi (operands[0], tmp2, const0_rtx)); > + DONE; > +}) > + > (define_expand "usadv8qi" >[(match_operand:V2SI 0 "register_operand") > (match_operand:V8QI 1 "register_operand") > diff --git a/gcc/testsuite/gcc.target/i386/pr102483-2.c > b/gcc/testsuite/gcc.target/i386/pr102483-2.c > new file mode 100644 > index 000..d477c53db08 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr102483-2.c > @@ -0,0 +1,26 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target sse4 } */ > +/* { dg-options "-O2 -msse4.1" } */ > + > +#include "sse4_1-check.h" > + > +#include "pr102483.c" > + > +static void > +sse4_1_test () > +{ > + char p[4] = { -103, 23, 41, -56 }; > + unsigned char up[4] = { 100, 30, 255, 9 }; > + > + char res = reduce_add (p); > + if (res != -95) > +abort (); > + if (reduce_smin (p) != -103) > +abort (); > + if (reduce_smax (p) != 41) > +abort (); > + if (reduce_umin (up) != 9) > +abort (); > + if (reduce_umax (up) != 255) > +abort(); > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr102483.c > b/gcc/testsuite/gcc.target/i386/pr102483.c > new file mode 100644 > index 000..681b57598ef > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr102483.c > @@ -0,0
[PATCH] x86_64: Some SUBREG related optimization tweaks to i386 backend.
This patch contains two SUBREG-related optimization enabling tweaks to the x86 backend. The first change, to ix86_expand_vector_extract, cures the strange -march=cascadelake related non-determinism that affected my new test cases last week. Extracting a QImode or HImode element from an SSE vector performs a zero-extension to SImode, which is currently represented as: (set (subreg:SI (reg:QI target)) (zero_extend:SI (...)) Unfortunately, the semantics of this RTL doesn't quite match what was intended. A set of a paradoxical subreg allows the high-bits to take an arbitrary value (hence the non-determinism). A more correct representation should be: (set (reg:SI temp) (zero_extend:SI (...)) (set (reg:QI target) (subreg:QI (reg:SI temp)) Optionally with the SUBREG rtx annotated as SUBREG_PROMOTED_VAR_P to indicate that value is already zero-extended in the SUBREG_REG. The second change is very similar, which is why I've included it in this patch, where currently the early RTL optimizers can produce: (set (reg:V?? hardreg) (subreg ...)) where this instruction may require a spill/reload from memory when the modes aren't tieable. Alas the presence of the hard register prevents combine/gcse etc. optimizing this away, or reusing the result which would increase the lifetime of the hard register before reload. The solution is to treat vector hard registers the same way as the x86 backend handles scalar hard registers, and only allow sets from pseudos before register allocation, which is achieved by checking ix86_hardreg_mov_ok. Hence the above instruction is expanded and maintained as: (set (reg:V?? pseudo) (subreg ...)) (set (reg:V?? hardreg) (reg:V?? pseudo)) which allows the RTL optimizers freedom to optimize the SUBREG. This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap" and "make -k check" with no new failures. In theory, my recent "obvious" regexp fix to accommodate -march=cascadelake is no longer required, but there's no harm leaving the testsuite as it is. Ok for mainline? 2021-10-11 Roger Sayle gcc/ChangeLog * config/i386/i386-expand.c (ix86_expand_vector_move): Use a pseudo intermediate when moving a SUBREG into a hard register, by checking ix86_hardreg_mov_ok. (ix86_expand_vector_extract): Store zero-extended SImode intermediate in a pseudo, then set target using a SUBREG_PROMOTED annotated subreg. * config/i386/sse.md (mov_internal): Prevent CSE creating complex (SUBREG) sets of (vector) hard registers before reload, by checking ix86_hardreg_mov_ok. Thanks in advance, Roger -- diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 4780b99..44404bd 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -617,8 +617,9 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) /* Make operand1 a register if it isn't already. */ if (can_create_pseudo_p () - && !register_operand (op0, mode) - && !register_operand (op1, mode)) + && (!ix86_hardreg_mov_ok (op0, op1) + || (!register_operand (op0, mode) + && !register_operand (op1, mode { rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0)); emit_move_insn (tmp, op1); @@ -16005,11 +16006,15 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) /* Let the rtl optimizers know about the zero extension performed. */ if (inner_mode == QImode || inner_mode == HImode) { + rtx reg = gen_reg_rtx (SImode); tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); - target = gen_lowpart (SImode, target); + emit_move_insn (reg, tmp); + tmp = gen_lowpart (inner_mode, reg); + SUBREG_PROMOTED_VAR_P (tmp) = 1; + SUBREG_PROMOTED_SET (tmp, 1); } - emit_insn (gen_rtx_SET (target, tmp)); + emit_move_insn (target, tmp); } else { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4559b0c..e43f597 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1270,7 +1270,8 @@ " C,,vm,v"))] "TARGET_SSE && (register_operand (operands[0], mode) - || register_operand (operands[1], mode))" + || register_operand (operands[1], mode)) + && ix86_hardreg_mov_ok (operands[0], operands[1])" { switch (get_attr_type (insn)) {
Re: [PATCH] Adjust more testcases for O2 vectorization enabling.
On Sat, Oct 09, 2021 at 12:55:15PM +0800, liuhongt via Gcc-patches wrote: > Pushed to trunk. > > libgomp/ChangeLog: > > * testsuite/libgomp.c++/scan-10.C: Add option -fvect-cost-model=cheap. > * testsuite/libgomp.c++/scan-11.C: Ditto. > * testsuite/libgomp.c++/scan-12.C: Ditto. > * testsuite/libgomp.c++/scan-13.C: Ditto. > * testsuite/libgomp.c++/scan-14.C: Ditto. > * testsuite/libgomp.c++/scan-15.C: Ditto. > * testsuite/libgomp.c++/scan-16.C: Ditto. > * testsuite/libgomp.c++/scan-9.C: Ditto. > * testsuite/libgomp.c-c++-common/lastprivate-conditional-7.c: Ditto. > * testsuite/libgomp.c-c++-common/lastprivate-conditional-8.c: Ditto. > * testsuite/libgomp.c/scan-11.c: Ditto. > * testsuite/libgomp.c/scan-12.c: Ditto. > * testsuite/libgomp.c/scan-13.c: Ditto. > * testsuite/libgomp.c/scan-14.c: Ditto. > * testsuite/libgomp.c/scan-15.c: Ditto. > * testsuite/libgomp.c/scan-16.c: Ditto. > * testsuite/libgomp.c/scan-17.c: Ditto. > * testsuite/libgomp.c/scan-18.c: Ditto. > * testsuite/libgomp.c/scan-19.c: Ditto. > * testsuite/libgomp.c/scan-20.c: Ditto. > * testsuite/libgomp.c/scan-21.c: Ditto. > * testsuite/libgomp.c/scan-22.c: Ditto. I don't think this is the right thing to do. This just means that at some point between 2013 when -fsimd-cost-model has been introduced and now -fsimd-cost-model= option at least partially stopped working properly. As documented, -fsimd-cost-model= overrides the -fvect-cost-model= setting for OpenMP simd loops (loop->force_vectorize is true) if specified differently from default. In tree-vectorizer.h we have: static inline bool unlimited_cost_model (loop_p loop) { if (loop != NULL && loop->force_vectorize && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); } and use it in various places, but we also just use flag_vect_cost_model in lots of places (and in one spot use flag_simd_cost_model, not sure if we are sure it is a force_vectorize loop or what). So, IMHO we should change the above inline function to loop_cost_model and let it return the cost model and then just reimplement unlimited_cost_model as return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; and then adjust the direct uses of the flag and revert these changes. Jakub
[Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.
Hi, The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP 5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and Tobias (Fortran, commit fff15bad1ab). This patch adds two tests to check if omp_atv_serialized is available (one test for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as deprecated in C/C++ and Fortran for OpenMP 5.1. The patch was tested on x86_64-linux and powerpc64le-linux with nvptx offloading and on x86_64-linux with amdgcn offloading with no regressions. Marcel - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential. The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP 5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and Tobias (Fortran, commit fff15bad1ab). This patch adds two tests to check if omp_atv_serialized is available (one test for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as deprecated in C/C++ and Fortran for OpenMP 5.1. libgomp/ChangeLog: * allocator.c (omp_init_allocator): Replace omp_atv_sequential with omp_atv_serialized. * omp.h.in: Add deprecated flag for omp_atv_sequential. * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential. * testsuite/libgomp.c-c++-common/alloc-10.c: New test. * testsuite/libgomp.fortran/alloc-12.f90: New test. diff --git a/libgomp/allocator.c b/libgomp/allocator.c index dce600f..deebb6a 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -82,7 +82,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, break; case omp_atv_contended: case omp_atv_uncontended: - case omp_atv_sequential: + case omp_atv_serialized: case omp_atv_private: data.sync_hint = traits[i].value; break; diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index d75ee13..e57e192 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -157,7 +157,7 @@ typedef enum omp_alloctrait_value_t omp_atv_contended = 3, omp_atv_uncontended = 4, omp_atv_serialized = 5, - omp_atv_sequential = omp_atv_serialized, + omp_atv_sequential __GOMP_DEPRECATED_5_1 = omp_atv_serialized, omp_atv_private = 6, omp_atv_all = 7, omp_atv_thread = 8, diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index 1063eee..57766b5 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -810,7 +810,7 @@ #endif #if _OPENMP >= 202011 -!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master +!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master, omp_atv_sequential #endif end module omp_lib diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c new file mode 100644 index 000..742c64a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c @@ -0,0 +1,25 @@ +#include +#include +#include + +const omp_alloctrait_t traits[] += { { omp_atk_alignment, 64 }, +{ omp_atk_sync_hint, omp_atv_serialized }, +{ omp_atk_fallback, omp_atv_null_fb } }; + +int +main () +{ + omp_allocator_handle_t a; + int *volatile p; + a = omp_init_allocator (omp_default_mem_space, 3, traits); + if (a == omp_null_allocator) +abort (); + p = (int *) omp_alloc (3072, a); + if uintptr_t) p) % 64) != 0) +abort (); + p[0] = 1; + p[3071 / sizeof (int)] = 2; + omp_free (p, a); + omp_destroy_allocator (a); +} \ No newline at end of file diff --git a/libgomp/testsuite/libgomp.fortran/alloc-12.f90 b/libgomp/testsuite/libgomp.fortran/alloc-12.f90 new file mode 100644 index 000..3d10959 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/alloc-12.f90 @@ -0,0 +1,28 @@ +! { dg-additional-options "-Wall -Wextra" } +program main + use omp_lib + use ISO_C_Binding + implicit none (external, type) + type(c_ptr) :: p + integer, pointer, contiguous :: ip(:) + type (omp_alloctrait) :: traits(3) + integer (omp_allocator_handle_kind) :: a + integer (c_ptrdiff_t) :: iptr + + traits = [omp_alloctrait (omp_atk_alignment, 64), & +omp_alloctrait (omp_atk_fallback, omp_atv_null_fb), & +omp_alloctrait (omp_atk_sync_hint, omp_atv_serialized)] + a = omp_init_allocator (omp_default_mem_space, 3, traits) + if (a == omp_null_allocator) stop 1 + + p = omp_alloc (3 * c_sizeof (0), a) + if (.not. c_associated (p)) stop 2 + call c_f_pointer (p, ip, [3]) + if (mod (TRANSFER (p, iptr), 64) /= 0) & +stop 3 + ip(1) = 1 + ip(2) = 2 + ip(3) = 3 + call omp_free (p, a) + call omp_destroy_allocator (a) +end program main
Re: [Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.
On Mon, Oct 11, 2021 at 11:40:54AM +0200, Marcel Vollweiler wrote: > libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential. > > The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP > 5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and > Tobias (Fortran, commit fff15bad1ab). > > This patch adds two tests to check if omp_atv_serialized is available (one > test > for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as > deprecated in C/C++ and Fortran for OpenMP 5.1. > > libgomp/ChangeLog: > > * allocator.c (omp_init_allocator): Replace omp_atv_sequential with > omp_atv_serialized. > * omp.h.in: Add deprecated flag for omp_atv_sequential. > * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential. > * testsuite/libgomp.c-c++-common/alloc-10.c: New test. > * testsuite/libgomp.fortran/alloc-12.f90: New test. LGTM, except one nit. > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c > +} > \ No newline at end of file Please make sure the file ends with a newline before committing. Jakub
[committed] openmp: Add omp_set_num_teams, omp_get_max_teams, omp_[gs]et_teams_thread_limit
Hi! OpenMP 5.1 adds env vars and functions to set and query new ICVs used as fallback if thread_limit or num_teams clauses aren't specified on teams construct. The following patch implements those, though further work will be needed: 1) OpenMP 5.1 also changed the num_teams clause, so that it can specify both lower and upper limit for how many teams should be created and changed the meaning when only one expression is provided, instead of num_teams(expr) in 5.0 meaning num_teams(1:expr) in 5.1, it now means num_teams(expr:expr), i.e. while previously we could create 1 to expr teams, in 5.1 we have some low limit by default equal to the single expression provided and may not create fewer teams. For host teams (which we don't currently implement efficiently for NUMA hosts) we trivially satisfy it now by always honoring what the user asked for, but for the offloading teams I think we'll need to rethink the APIs; currently teams construct is just a call that returns and possibly lowers the number of teams; and whenever possible we try to evaluate num_teams/thread_limit already on the target construct and the GOMP_teams call just sets the number of teams to the minimum of provided and requested teams; for some cases e.g. where target is not combined with teams and num_teams expression calls some functions etc., we need to call those functions in the target region and so it is late to figure number of teams, but also hw could just limit what it is willing to create; in that case I'm afraid we need to run the target body multiple times and arrange for omp_get_team_num () returning the right values 2) we need to finally implement the NUMA handling for GOMP_teams_reg 3) I now realize I haven't added some testcase coverage, will do that incrementally 4) libgomp.texi needs updates for these new APIs, but also others like the allocator Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2021-10-11 Jakub Jelinek gcc/ * omp-low.c (omp_runtime_api_call): Handle omp_get_max_teams, omp_[sg]et_teams_thread_limit and omp_set_num_teams. libgomp/ * omp.h.in (omp_set_num_teams, omp_get_max_teams, omp_set_teams_thread_limit, omp_get_teams_thread_limit): Declare. * omp_lib.f90.in (omp_set_num_teams, omp_get_max_teams, omp_set_teams_thread_limit, omp_get_teams_thread_limit): Declare. * omp_lib.h.in (omp_set_num_teams, omp_get_max_teams, omp_set_teams_thread_limit, omp_get_teams_thread_limit): Declare. * libgomp.h (gomp_nteams_var, gomp_teams_thread_limit_var): Declare. * libgomp.map (OMP_5.1): Export omp_get_max_teams{,_}, omp_get_teams_thread_limit{,_}, omp_set_num_teams{,_,_8_} and omp_set_teams_thread_limit{,_,_8_}. * icv.c (omp_set_num_teams, omp_get_max_teams, omp_set_teams_thread_limit, omp_get_teams_thread_limit): New functions. * env.c (gomp_nteams_var, gomp_teams_thread_limit_var): Define. (omp_display_env): Print OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT. (initialize_env): Handle OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env vars. * teams.c (GOMP_teams_reg): If thread_limit is not specified, use gomp_teams_thread_limit_var as fallback if not zero. If num_teams is not specified, use gomp_nteams_var. * fortran.c (omp_set_num_teams, omp_get_max_teams, omp_set_teams_thread_limit, omp_get_teams_thread_limit): Add ialias_redirect. (omp_set_num_teams_, omp_set_num_teams_8_, omp_get_max_teams_, omp_set_teams_thread_limit_, omp_set_teams_thread_limit_8_, omp_get_teams_thread_limit_): New functions. --- gcc/omp-low.c.jj2021-09-30 17:12:15.236586906 +0200 +++ gcc/omp-low.c 2021-10-09 14:34:21.119388958 +0200 @@ -3953,6 +3953,7 @@ omp_runtime_api_call (const_tree fndecl) "get_level", "get_max_active_levels", "get_max_task_priority", + "get_max_teams", "get_max_threads", "get_nested", "get_num_devices", @@ -3965,6 +3966,7 @@ omp_runtime_api_call (const_tree fndecl) "get_proc_bind", "get_supported_active_levels", "get_team_num", + "get_teams_thread_limit", "get_thread_limit", "get_thread_num", "get_wtick", @@ -3998,8 +4000,10 @@ omp_runtime_api_call (const_tree fndecl) "set_dynamic", "set_max_active_levels", "set_nested", + "set_num_teams", "set_num_threads", - "set_schedule" + "set_schedule", + "set_teams_thread_limit" }; int mode = 0; --- libgomp/omp.h.in.jj 2021-10-01 10:32:03.024954096 +0200 +++ libgomp/omp.h.in2021-10-09 15:06:38.173661594 +0200 @@ -261,6 +261,11 @@ extern int omp_get_max_task_priority (vo extern void omp_fulfill_event (omp_event_handle_t) __GOMP_NOTHROW; +extern void omp_set_num_teams (int) __GOMP_NOTHROW; +extern int omp_get_max
[PATCH v3] MIPS: R6: load/store can process unaligned address
MIPS release 6 requires the lw/ld/sw/sd can work with unaligned address, while it can be implemented by full hardware or trap&emulate. Since it doesn't have to be fully done by hardware, we add a pair of options -m(no-)unaligned-access. Kernels may need them. gcc/ChangeLog: * config/mips/mips.h (ISA_HAS_UNALIGNED_ACCESS): (STRICT_ALIGNMENT): R6 can unaligned access. * config/mips/mips.md (movmisalign): Likewise. * config/mips/mips.opt: add -m(no-)unaligned-access * doc/invoke.texi: Likewise. gcc/testsuite/ChangeLog: * gcc.target/mips/mips.exp: add unaligned-access * gcc.target/mips/unaligned-2.c: New test. * gcc.target/mips/unaligned-3.c: New test. --- gcc/config/mips/mips.h | 6 ++- gcc/config/mips/mips.md | 10 gcc/config/mips/mips.opt| 4 ++ gcc/doc/invoke.texi | 10 gcc/testsuite/gcc.target/mips/mips.exp | 1 + gcc/testsuite/gcc.target/mips/unaligned-2.c | 53 + gcc/testsuite/gcc.target/mips/unaligned-3.c | 53 + 7 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/mips/unaligned-2.c create mode 100644 gcc/testsuite/gcc.target/mips/unaligned-3.c diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 973372e78..34490bfc2 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -243,6 +243,10 @@ struct mips_cpu_info { && (mips_isa_rev >= 6 \ || ISA_HAS_MSA)) +/* ISA load/store instructions can handle unaligned address */ +#define ISA_HAS_UNALIGNED_ACCESS (TARGET_UNALIGNED_ACCESS \ +&& (mips_isa_rev >= 6)) + /* The ISA compression flags that are currently in effect. */ #define TARGET_COMPRESSION (target_flags & (MASK_MIPS16 | MASK_MICROMIPS)) @@ -1684,7 +1688,7 @@ FP_ASM_SPEC "\ (ISA_HAS_MSA ? BITS_PER_MSA_REG : LONG_DOUBLE_TYPE_SIZE) /* All accesses must be aligned. */ -#define STRICT_ALIGNMENT 1 +#define STRICT_ALIGNMENT (!ISA_HAS_UNALIGNED_ACCESS) /* Define this if you wish to imitate the way many other C compilers handle alignment of bitfields and the structures that contain diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 455b9b802..e35d57d9e 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -4459,6 +4459,16 @@ (define_insn "mov_r" [(set_attr "move_type" "store") (set_attr "mode" "")]) +;; Unaligned direct access +(define_expand "movmisalign" + [(set (match_operand:JOIN_MODE 0) + (match_operand:JOIN_MODE 1))] + "ISA_HAS_UNALIGNED_ACCESS" +{ + if (mips_legitimize_move (mode, operands[0], operands[1])) +DONE; +}) + ;; An instruction to calculate the high part of a 64-bit SYMBOL_ABSOLUTE. ;; The required value is: ;; diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index 6af8037e9..ebb4c6164 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -404,6 +404,10 @@ mtune= Target RejectNegative Joined Var(mips_tune_option) ToLower Enum(mips_arch_opt_value) -mtune=PROCESSOR Optimize the output for PROCESSOR. +munaligned-access +Target Var(TARGET_UNALIGNED_ACCESS) Init(1) +Generate code with unaligned load store, valid for MIPS R6. + muninit-const-in-rodata Target Var(TARGET_UNINIT_CONST_IN_RODATA) Put uninitialized constants in ROM (needs -membedded-data). diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 8b3ebcfbc..ffb1f4676 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1069,6 +1069,7 @@ Objective-C and Objective-C++ Dialects}. -mcheck-zero-division -mno-check-zero-division @gol -mdivide-traps -mdivide-breaks @gol -mload-store-pairs -mno-load-store-pairs @gol +-munaligned-access -mno-unaligned-access @gol -mmemcpy -mno-memcpy -mlong-calls -mno-long-calls @gol -mmad -mno-mad -mimadd -mno-imadd -mfused-madd -mno-fused-madd -nocpp @gol -mfix-24k -mno-fix-24k @gol @@ -25503,6 +25504,15 @@ instructions to enable load/store bonding. This option is enabled by default but only takes effect when the selected architecture is known to support bonding. +@item -munaligned-access +@itemx -mno-unaligned-access +@opindex munaligned-access +@opindex mno-unaligned-access +Enable (disable) direct unaligned access for MIPS Release 6. +MIPSr6 requires load/store unaligned-access support, +by hardware or trap&emulate. +So @option{-mno-unaligned-access} may be needed by kernel. + @item -mmemcpy @itemx -mno-memcpy @opindex mmemcpy diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp index d4d4b90d8..f76ab7adc 100644 --- a/gcc/testsuite/gcc.target/mips/mips.exp +++ b/gcc/testsuite/gcc.target/mips/mips.exp @@ -264,6 +264,7 @@ set mips_option_groups { frame-header "-mframe-header-opt|-mno-frame-header-opt" stack-protecto
[PATCH] opts: Remove AUTODETECT_VALUE usage.
The patch is about using OPTION_SET_P instead of a default option value set in common.opt. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin gcc/ChangeLog: * common.opt: Remove Init(2) for some options. * toplev.c (process_options): Do not use AUTODETECT_VALUE, but use rather OPTION_SET_P. --- gcc/common.opt | 6 +++--- gcc/toplev.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 59ecc9fbdf7..4099effcc80 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2399,7 +2399,7 @@ Common Var(flag_live_range_shrinkage) Init(0) Optimization Relief of register pressure through live range shrinkage. frename-registers -Common Var(flag_rename_registers) Init(2) Optimization +Common Var(flag_rename_registers) Optimization Perform a register renaming optimization pass. fschedule-fusion @@ -2797,7 +2797,7 @@ Common Ignore Does nothing. Preserved for backward compatibility. ftree-cselim -Common Var(flag_tree_cselim) Init(2) Optimization +Common Var(flag_tree_cselim) Optimization Transform condition stores into unconditional ones. ftree-switch-conversion @@ -3158,7 +3158,7 @@ Common Var(flag_value_profile_transformations) Optimization Use expression value profiles in optimizations. fweb -Common Var(flag_web) Init(2) Optimization +Common Var(flag_web) Optimization Construct webs and split unrelated uses of single variable. ftree-builtin-call-dce diff --git a/gcc/toplev.c b/gcc/toplev.c index b878234f3f2..167feac2583 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1332,10 +1332,10 @@ process_options (bool no_backend) } /* web and rename-registers help when run after loop unrolling. */ - if (flag_web == AUTODETECT_VALUE) + if (!OPTION_SET_P (flag_web)) flag_web = flag_unroll_loops; - if (flag_rename_registers == AUTODETECT_VALUE) + if (!OPTION_SET_P (flag_rename_registers)) flag_rename_registers = flag_unroll_loops; if (flag_non_call_exceptions) @@ -1598,7 +1598,7 @@ process_options (bool no_backend) debug_inline_points = 0; } - if (flag_tree_cselim == AUTODETECT_VALUE) + if (!OPTION_SET_P (flag_tree_cselim)) { if (HAVE_conditional_move) flag_tree_cselim = 1; -- 2.33.0
[PATCH] Remove usage of IRA_REGION_AUTODETECT
Similar patch, let's rely on OPTION_SET_P and not a default options value. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin gcc/ChangeLog: * common.opt: Remove usage of IRA_REGION_AUTODETECT. * flag-types.h (enum ira_region): Likewise. * toplev.c (process_options): Use OPTION_SET_P instead of IRA_REGION_AUTODETECT. --- gcc/common.opt | 2 +- gcc/flag-types.h | 4 gcc/toplev.c | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 52693e226d2..59ecc9fbdf7 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1923,7 +1923,7 @@ EnumValue Enum(ira_algorithm) String(priority) Value(IRA_ALGORITHM_PRIORITY) fira-region= -Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) Init(IRA_REGION_AUTODETECT) Optimization +Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) Init(IRA_REGION_ONE) Optimization -fira-region=[one|all|mixed] Set regions for IRA. Enum diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 5bd1f771c8b..ae0b216e8a3 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -191,10 +191,6 @@ enum ira_region IRA_REGION_ONE, IRA_REGION_ALL, IRA_REGION_MIXED, - /* This value means that there were no options -fira-region on the - command line and that we should choose a value depending on the - used -O option. */ - IRA_REGION_AUTODETECT }; /* The options for excess precision. */ diff --git a/gcc/toplev.c b/gcc/toplev.c index 81748b1152a..b878234f3f2 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1319,7 +1319,7 @@ process_options (bool no_backend) } /* One region RA really helps to decrease the code size. */ - if (flag_ira_region == IRA_REGION_AUTODETECT) + if (!OPTION_SET_P (flag_ira_region)) flag_ira_region = optimize_size || !optimize ? IRA_REGION_ONE : IRA_REGION_MIXED; -- 2.33.0
Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
On Fri, 8 Oct 2021 at 21:19, Richard Sandiford wrote: > > Thanks for looking at this. > > Prathamesh Kulkarni writes: > > Hi, > > As mentioned in PR, for the following test-case: > > > > typedef unsigned char uint8_t; > > > > static inline uint8_t > > x264_clip_uint8(uint8_t x) > > { > > uint8_t t = -x; > > uint8_t t1 = x & ~63; > > return (t1 != 0) ? t : x; > > } > > > > void > > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n) > > { > > for (int x = 0; x < n*16; x++) > > dst[x] = x264_clip_uint8(src[x]); > > } > > > > -O3 -mcpu=generic+sve generates following code for the inner loop: > > > > .L3: > > ld1bz0.b, p0/z, [x1, x2] > > movprfx z2, z0 > > and z2.b, z2.b, #0xc0 > > movprfx z1, z0 > > neg z1.b, p1/m, z0.b > > cmpeq p2.b, p1/z, z2.b, #0 > > sel z0.b, p2, z0.b, z1.b > > st1bz0.b, p0, [x0, x2] > > add x2, x2, x4 > > whilelo p0.b, w2, w3 > > b.any .L3 > > > > The sel is redundant since we could conditionally negate z0 based on > > the predicate > > comparing z2 with 0. > > > > As suggested in the PR, the attached patch, introduces a new > > conditional internal function .COND_NEG, and in gimple-isel replaces > > the following sequence: > >op2 = -op1 > >op0 = A cmp B > >lhs = op0 ? op1 : op2 > > > > with: > >op0 = A inverted_cmp B > >lhs = .COND_NEG (op0, op1, op1). > > > > lhs = .COD_NEG (op0, op1, op1) > > implies > > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false. > > > > With patch, it generates the following code-gen: > > .L3: > > ld1bz0.b, p0/z, [x1, x2] > > movprfx z1, z0 > > and z1.b, z1.b, #0xc0 > > cmpne p1.b, p2/z, z1.b, #0 > > neg z0.b, p1/m, z0.b > > st1bz0.b, p0, [x0, x2] > > add x2, x2, x4 > > whilelo p0.b, w2, w3 > > b.any .L3 > > > > While it seems to work for this test-case, I am not entirely sure if > > the patch is correct. Does it look in the right direction ? > > For binary ops we use match.pd rather than isel: > > (for uncond_op (UNCOND_BINARY) > cond_op (COND_BINARY) > (simplify > (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) > (with { tree op_type = TREE_TYPE (@4); } >(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) > && is_truth_type_for (op_type, TREE_TYPE (@0))) > (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)) > (simplify > (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) > (with { tree op_type = TREE_TYPE (@4); } >(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type) > && is_truth_type_for (op_type, TREE_TYPE (@0))) > (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))) > > I think it'd be good to do the same here, using new (UN)COND_UNARY > iterators. (The iterators will only have one value to start with, > but other unary ops could get the same treatment in future.) Thanks for the suggestions. The attached patch adds a pattern to match.pd to replace: cond = a cmp b r = cond ? x : -x with: cond = a inverted_cmp b r = cond ? -x : x Code-gen with patch for inner loop: .L3: ld1bz0.b, p0/z, [x1, x2] movprfx z1, z0 and z1.b, z1.b, #0xc0 cmpne p1.b, p2/z, z1.b, #0 neg z0.b, p1/m, z0.b st1bz0.b, p0, [x0, x2] add x2, x2, x4 whilelo p0.b, w2, w3 b.any .L3 Does it look OK ? I didn't add it under (UN)COND_UNARY since it inverts the comparison, which we might not want to do for other unary ops ? Also, I am not sure, how to test if target supports conditional internal function ? I tried to use: (for cmp (tcc_comparison) icmp (inverted_tcc_comparison) (simplify (vec_cond (cmp@2 @0 @1) @3 (negate @3)) (with { auto op_type = TREE_TYPE (@2); } (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type) && is_truth_type_for (op_type, TREE_TYPE (@0))) (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3) but both the conditions seem to fail. Thanks, Prathamesh > > Richard > > > > > > Thanks, > > Prathamesh > > > > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc > > index 38e90933c3e..5b0dd3c1993 100644 > > --- a/gcc/gimple-isel.cc > > +++ b/gcc/gimple-isel.cc > > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3. If not see > > #include "optabs.h" > > #include "gimple-fold.h" > > #include "internal-fn.h" > > +#include "fold-const.h" > > +#include "tree-pretty-print.h" > > > > /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to > > internal function based on vector type of selected expansion. > > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, > > return new_stmt; > > } > > > > + /* Replace: > > + op2 = -op1 > > + op0 = A cmp B >
[PATCH] options: Fix variable tracking option processing.
After the recent change in Optimize attribute handling, we need finish_option function properly auto-detecting variable tracking options. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin PR debug/102585 gcc/ChangeLog: * common.opt: Do not init flag_var_tracking* options. * opts.c (finish_options): Handle flag_var_tracking* options. * toplev.c (process_options): Move to opts.c. gcc/testsuite/ChangeLog: * gcc.dg/pr102585.c: New test. --- gcc/common.opt | 14 +- gcc/opts.c | 28 gcc/testsuite/gcc.dg/pr102585.c | 6 ++ gcc/toplev.c| 33 +++-- 4 files changed, 42 insertions(+), 39 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr102585.c diff --git a/gcc/common.opt b/gcc/common.opt index 52693e226d2..ec020f4e642 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3003,19 +3003,16 @@ Common Undocumented Var(flag_use_linker_plugin) ; Positive if we should track variables, negative if we should run ; the var-tracking pass only to discard debug annotations, zero if -; we're not to run it. When flag_var_tracking == 2 (AUTODETECT_VALUE) it -; will be set according to optimize, debug_info_level and debug_hooks -; in process_options (). +; we're not to run it. fvar-tracking -Common Var(flag_var_tracking) Init(2) PerFunction +Common Var(flag_var_tracking) PerFunction Perform variable tracking. ; Positive if we should track variables at assignments, negative if ; we should run the var-tracking pass only to discard debug -; annotations. When flag_var_tracking_assignments == -; AUTODETECT_VALUE it will be set according to flag_var_tracking. +; annotations. fvar-tracking-assignments -Common Var(flag_var_tracking_assignments) Init(2) PerFunction +Common Var(flag_var_tracking_assignments) PerFunction Perform variable tracking by annotating assignments. ; Nonzero if we should toggle flag_var_tracking_assignments after @@ -3026,8 +3023,7 @@ Toggle -fvar-tracking-assignments. ; Positive if we should track uninitialized variables, negative if ; we should run the var-tracking pass only to discard debug -; annotations. When flag_var_tracking_uninit == AUTODETECT_VALUE it -; will be set according to flag_var_tracking. +; annotations. fvar-tracking-uninit Common Var(flag_var_tracking_uninit) PerFunction Perform variable tracking and also tag variables that are uninitialized. diff --git a/gcc/opts.c b/gcc/opts.c index 2116c2991dd..eeb6b1dcc7c 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1353,6 +1353,34 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, SET_OPTION_IF_UNSET (opts, opts_set, flag_vect_cost_model, VECT_COST_MODEL_CHEAP); + /* If the user specifically requested variable tracking with tagging + uninitialized variables, we need to turn on variable tracking. + (We already determined above that variable tracking is feasible.) */ + if (opts->x_flag_var_tracking_uninit == 1) +opts->x_flag_var_tracking = 1; + + if (!opts_set->x_flag_var_tracking) +opts->x_flag_var_tracking = optimize >= 1; + + if (!opts_set->x_flag_var_tracking_uninit) +opts->x_flag_var_tracking_uninit = opts->x_flag_var_tracking; + + if (!opts_set->x_flag_var_tracking_assignments) +opts->x_flag_var_tracking_assignments + = (opts->x_flag_var_tracking +&& !(opts->x_flag_selective_scheduling + || opts->x_flag_selective_scheduling2)); + + if (opts->x_flag_var_tracking_assignments_toggle) +opts->x_flag_var_tracking_assignments = !opts->x_flag_var_tracking_assignments; + + if (opts->x_flag_var_tracking_assignments && !opts->x_flag_var_tracking) +opts->x_flag_var_tracking = opts->x_flag_var_tracking_assignments = -1; + + if (opts->x_flag_var_tracking_assignments + && (opts->x_flag_selective_scheduling || opts->x_flag_selective_scheduling2)) +warning_at (loc, 0, + "var-tracking-assignments changes selective scheduling"); } #define LEFT_COLUMN 27 diff --git a/gcc/testsuite/gcc.dg/pr102585.c b/gcc/testsuite/gcc.dg/pr102585.c new file mode 100644 index 000..efd066b4a4e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr102585.c @@ -0,0 +1,6 @@ +/* PR debug/102585 */ +/* { dg-do compile } */ +/* { dg-options "-fvar-tracking-assignments -fno-var-tracking" } */ + +#pragma GCC optimize 0 +void d_demangle_callback_Og() { int c = 0; } diff --git a/gcc/toplev.c b/gcc/toplev.c index 81748b1152a..2f13d740b98 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1490,8 +1490,8 @@ process_options (bool no_backend) || !dwarf_debuginfo_p () || debug_hooks->var_location == do_nothing_debug_hooks.var_location) { - if (flag_var_tracking == 1 - || flag_var_tracking_uninit == 1) + if ((OPTION_SET_P (flag_var_tracking) && flag_var_trac
Re: [PATCH][i386] target: support spaces in target attribute.
On 10/4/21 23:02, Andrew Pinski wrote: It might be useful to skip tabs for the same reason as spaces really. Sure, be my guest. MartinFrom b66d7be2c1b3ac286257e3df4d9796e391751bef Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Mon, 4 Oct 2021 14:06:14 +0200 Subject: [PATCH] target: support spaces in target attribute. PR target/102374 gcc/ChangeLog: * config/i386/i386-options.c (ix86_valid_target_attribute_inner_p): Strip whitespaces. * system.h (strip_whilespaces): New function. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102374.c: New test. --- gcc/config/i386/i386-options.c | 2 ++ gcc/system.h | 21 + gcc/testsuite/gcc.target/i386/pr102374.c | 3 +++ 3 files changed, 26 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr102374.c diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index e7a3bd4aaea..c9523b26f49 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -1146,6 +1146,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], next_optstr = NULL; } + p = strip_whitespaces (p, &len); + /* Recognize no-xxx. */ if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') { diff --git a/gcc/system.h b/gcc/system.h index adde3e264b6..17a6a553b0b 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -1305,4 +1305,25 @@ startswith (const char *str, const char *prefix) return strncmp (str, prefix, strlen (prefix)) == 0; } +/* Strip white spaces from STRING with LEN length. + A stripped string is returned and LEN is updated accordingly. */ + +static inline char * +strip_whitespaces (char *string, size_t *len) +{ + while (string[0] == ' ' || string[0] == '\t') +{ + --(*len); + ++string; +} + + while (string[*len - 1] == ' ' || string[*len - 1] == '\t') +{ + string[*len - 1] = '\0'; + --(*len); +} + + return string; +} + #endif /* ! GCC_SYSTEM_H */ diff --git a/gcc/testsuite/gcc.target/i386/pr102374.c b/gcc/testsuite/gcc.target/i386/pr102374.c new file mode 100644 index 000..21aa76011ed --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102374.c @@ -0,0 +1,3 @@ +/* PR target/102374 */ + +void calculate_sse(void) __attribute__ ((__target__ (" no-avx, sse2 "))); -- 2.33.0
[PATCH] c++: Fix up push_local_extern_decl_alias error recovery [PR102642]
Hi! My recent push_local_extern_decl_alias change broke error-recovery, do_pushdecl can return error_mark_node and set_decl_tls_model can't be called on that. There are other code paths that store error_mark_node into DECL_LOCAL_DECL_ALIAS, with the intent to differentiate the cases where we haven't yet tried to push it into the namespace scope (NULL) and one where we have tried it but it failed (error_mark_node), but looking around, there are other spots where we call functions or do processing which doesn't tolerate error_mark_node. So, the first hunk with the testcase fixes the testcase, the others fix what I've spotted and the fix was easy to figure out (there are I think 3 other spots mainly for function multiversioning). Ok for trunk and 11.3 (where I've backported the tls fix before) if it passes bootstrap/regtest? 2021-10-11 Jakub Jelinek PR c++/102642 * name-lookup.c (push_local_extern_decl_alias): Don't call set_decl_tls_model on error_mark_node. * decl.c (make_rtl_for_nonlocal_decl): Don't call set_user_assembler_name on error_mark_node. * parser.c (cp_parser_oacc_declare): Ignore DECL_LOCAL_DECL_ALIAS if it is error_mark_node. (cp_parser_omp_declare_target): Likewise. * g++.dg/tls/pr102642.C: New test. --- gcc/cp/name-lookup.c.jj 2021-10-01 10:30:07.674588541 +0200 +++ gcc/cp/name-lookup.c2021-10-11 12:43:39.261051228 +0200 @@ -3474,7 +3474,9 @@ push_local_extern_decl_alias (tree decl) push_nested_namespace (ns); alias = do_pushdecl (alias, /* hiding= */true); pop_nested_namespace (ns); - if (VAR_P (decl) && CP_DECL_THREAD_LOCAL_P (decl)) + if (VAR_P (decl) + && CP_DECL_THREAD_LOCAL_P (decl) + && alias != error_mark_node) set_decl_tls_model (alias, DECL_TLS_MODEL (decl)); } } --- gcc/cp/decl.c.jj2021-10-09 10:07:51.883704975 +0200 +++ gcc/cp/decl.c 2021-10-11 12:49:33.810977118 +0200 @@ -7373,7 +7373,8 @@ make_rtl_for_nonlocal_decl (tree decl, t This is horrible, as we're affecting a possibly-shared decl. Again, a one-true-decl model breaks down. */ - set_user_assembler_name (ns_decl, asmspec); + if (ns_decl != error_mark_node) + set_user_assembler_name (ns_decl, asmspec); } } --- gcc/cp/parser.c.jj 2021-10-09 10:14:24.043098112 +0200 +++ gcc/cp/parser.c 2021-10-11 12:47:21.220874667 +0200 @@ -44437,7 +44437,8 @@ cp_parser_oacc_declare (cp_parser *parse dependent local extern variable decls are as rare as hen's teeth. */ if (auto alias = DECL_LOCAL_DECL_ALIAS (decl)) - decl = alias; + if (alias != error_mark_node) + decl = alias; if (OMP_CLAUSE_MAP_KIND (t) == GOMP_MAP_LINK) id = get_identifier ("omp declare target link"); @@ -45665,7 +45666,8 @@ cp_parser_omp_declare_target (cp_parser if (VAR_OR_FUNCTION_DECL_P (t) && DECL_LOCAL_DECL_P (t) && DECL_LANG_SPECIFIC (t) - && DECL_LOCAL_DECL_ALIAS (t)) + && DECL_LOCAL_DECL_ALIAS (t) + && DECL_LOCAL_DECL_ALIAS (t) != error_mark_node) handle_omp_declare_target_clause (c, DECL_LOCAL_DECL_ALIAS (t), device_type); } --- gcc/testsuite/g++.dg/tls/pr102642.C.jj 2021-10-11 13:00:35.889503002 +0200 +++ gcc/testsuite/g++.dg/tls/pr102642.C 2021-10-11 13:00:20.388724721 +0200 @@ -0,0 +1,10 @@ +// PR c++/102642 +// { dg-do compile { target c++11 } } + +thread_local int *z; // { dg-message "previous declaration" } + +void +foo () +{ + extern thread_local int z; // { dg-error "conflicting declaration" } +} Jakub
Re: [PATCH] x86_64: Some SUBREG related optimization tweaks to i386 backend.
On Mon, Oct 11, 2021 at 4:55 PM Roger Sayle wrote: > > > This patch contains two SUBREG-related optimization enabling tweaks to > the x86 backend. > > The first change, to ix86_expand_vector_extract, cures the strange > -march=cascadelake related non-determinism that affected my new test > cases last week. Extracting a QImode or HImode element from an SSE > vector performs a zero-extension to SImode, which is currently > represented as: > > (set (subreg:SI (reg:QI target)) (zero_extend:SI (...)) > > Unfortunately, the semantics of this RTL doesn't quite match what was > intended. A set of a paradoxical subreg allows the high-bits to take > an arbitrary value (hence the non-determinism). A more correct > representation should be: > > (set (reg:SI temp) (zero_extend:SI (...)) > (set (reg:QI target) (subreg:QI (reg:SI temp)) > > Optionally with the SUBREG rtx annotated as SUBREG_PROMOTED_VAR_P to > indicate that value is already zero-extended in the SUBREG_REG. > > The second change is very similar, which is why I've included it in > this patch, where currently the early RTL optimizers can produce: > > (set (reg:V?? hardreg) (subreg ...)) > > where this instruction may require a spill/reload from memory when > the modes aren't tieable. Alas the presence of the hard register > prevents combine/gcse etc. optimizing this away, or reusing the result > which would increase the lifetime of the hard register before reload. > > The solution is to treat vector hard registers the same way as the > x86 backend handles scalar hard registers, and only allow sets from > pseudos before register allocation, which is achieved by checking > ix86_hardreg_mov_ok. Hence the above instruction is expanded and > maintained as: > > (set (reg:V?? pseudo) (subreg ...)) > (set (reg:V?? hardreg) (reg:V?? pseudo)) > > which allows the RTL optimizers freedom to optimize the SUBREG. > > > This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap" > and "make -k check" with no new failures. In theory, my recent "obvious" > regexp fix to accommodate -march=cascadelake is no longer required, but > there's no harm leaving the testsuite as it is. > > Ok for mainline? > > > 2021-10-11 Roger Sayle > > gcc/ChangeLog > * config/i386/i386-expand.c (ix86_expand_vector_move): Use a > pseudo intermediate when moving a SUBREG into a hard register, > by checking ix86_hardreg_mov_ok. /* Make operand1 a register if it isn't already. */ if (can_create_pseudo_p () - && !register_operand (op0, mode) - && !register_operand (op1, mode)) + && (!ix86_hardreg_mov_ok (op0, op1) + || (!register_operand (op0, mode) + && !register_operand (op1, mode { rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0)); ix86_gen_scratch_sse_rtx probably returns a hard register, but here you want a pseudo register. > (ix86_expand_vector_extract): Store zero-extended SImode > intermediate in a pseudo, then set target using a SUBREG_PROMOTED > annotated subreg. > * config/i386/sse.md (mov_internal): Prevent CSE creating > complex (SUBREG) sets of (vector) hard registers before reload, by > checking ix86_hardreg_mov_ok. > > > Thanks in advance, > Roger > -- > -- BR, Hongtao
Re: [Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.
Hi Jakub, Am 11.10.2021 um 11:49 schrieb Jakub Jelinek: On Mon, Oct 11, 2021 at 11:40:54AM +0200, Marcel Vollweiler wrote: libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential. The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP 5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and Tobias (Fortran, commit fff15bad1ab). This patch adds two tests to check if omp_atv_serialized is available (one test for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as deprecated in C/C++ and Fortran for OpenMP 5.1. libgomp/ChangeLog: * allocator.c (omp_init_allocator): Replace omp_atv_sequential with omp_atv_serialized. * omp.h.in: Add deprecated flag for omp_atv_sequential. * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential. * testsuite/libgomp.c-c++-common/alloc-10.c: New test. * testsuite/libgomp.fortran/alloc-12.f90: New test. LGTM, except one nit. --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c +} \ No newline at end of file Please make sure the file ends with a newline before committing. Changed :) Jakub Thanks, Marcel - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential. The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP 5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and Tobias (Fortran, commit fff15bad1ab). This patch adds two tests to check if omp_atv_serialized is available (one test for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as deprecated in C/C++ and Fortran for OpenMP 5.1. libgomp/ChangeLog: * allocator.c (omp_init_allocator): Replace omp_atv_sequential with omp_atv_serialized. * omp.h.in: Add deprecated flag for omp_atv_sequential. * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential. * testsuite/libgomp.c-c++-common/alloc-10.c: New test. * testsuite/libgomp.fortran/alloc-12.f90: New test. diff --git a/libgomp/allocator.c b/libgomp/allocator.c index dce600f..deebb6a 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -82,7 +82,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, break; case omp_atv_contended: case omp_atv_uncontended: - case omp_atv_sequential: + case omp_atv_serialized: case omp_atv_private: data.sync_hint = traits[i].value; break; diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index d75ee13..e57e192 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -157,7 +157,7 @@ typedef enum omp_alloctrait_value_t omp_atv_contended = 3, omp_atv_uncontended = 4, omp_atv_serialized = 5, - omp_atv_sequential = omp_atv_serialized, + omp_atv_sequential __GOMP_DEPRECATED_5_1 = omp_atv_serialized, omp_atv_private = 6, omp_atv_all = 7, omp_atv_thread = 8, diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index 1063eee..57766b5 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -810,7 +810,7 @@ #endif #if _OPENMP >= 202011 -!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master +!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master, omp_atv_sequential #endif end module omp_lib diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c new file mode 100644 index 000..01ae150d --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c @@ -0,0 +1,25 @@ +#include +#include +#include + +const omp_alloctrait_t traits[] += { { omp_atk_alignment, 64 }, +{ omp_atk_sync_hint, omp_atv_serialized }, +{ omp_atk_fallback, omp_atv_null_fb } }; + +int +main () +{ + omp_allocator_handle_t a; + int *volatile p; + a = omp_init_allocator (omp_default_mem_space, 3, traits); + if (a == omp_null_allocator) +abort (); + p = (int *) omp_alloc (3072, a); + if uintptr_t) p) % 64) != 0) +abort (); + p[0] = 1; + p[3071 / sizeof (int)] = 2; + omp_free (p, a); + omp_destroy_allocator (a); +} diff --git a/libgomp/testsuite/libgomp.fortran/alloc-12.f90 b/libgomp/testsuite/libgomp.fortran/alloc-12.f90 new file mode 100644 index 000..3d10959 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/alloc-12.f90 @@ -0,0 +1,28 @@ +! { dg-additional-options "-Wall -Wextra" } +program main + use omp_lib + use ISO_C_Binding + implicit none (external, type) + type(c_ptr) :: p + integer, pointer, contiguous :: ip(:) + type (omp_alloctrait) :: traits(3) + integer (omp_allocator_handle_kind) :: a + integer (c_ptrdiff_t) :: iptr + + traits = [omp_alloctrait (omp_atk_alignment,
Re: [Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.
On Mon, Oct 11, 2021 at 01:28:11PM +0200, Marcel Vollweiler wrote: > > > libgomp/ChangeLog: > > > > > > * allocator.c (omp_init_allocator): Replace omp_atv_sequential with > > > omp_atv_serialized. > > > * omp.h.in: Add deprecated flag for omp_atv_sequential. > > > * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential. > > > * testsuite/libgomp.c-c++-common/alloc-10.c: New test. > > > * testsuite/libgomp.fortran/alloc-12.f90: New test. > > > > LGTM, except one nit. > > > > > --- /dev/null > > > +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c > > > +} > > > \ No newline at end of file > > > > Please make sure the file ends with a newline before committing. > > Changed :) Thanks, just commit it, the above was meant as "please commit with this nit fixed", I'm sorry if it wasn't obvious. Jakub
Re: [Patch] Fortran: Various CLASS + assumed-rank fixed [PR102541]
Hi Harald, On 10.10.21 21:27, Harald Anlauf via Fortran wrote: just some random remarks from initially browsing your patch. Thanks for browsing the patch :-) - leftover from debugging? Yes. - code that could be shortened/made slightly more readable: ... Is there a reason to not use strcmp (comp->name, "_data") == 0? Just (pre-mature) optimization. I think the latter is clearer; I will change it. Tobias - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
RE: [PATCH]middle-end convert negate + right shift into compare greater.
Hi all, Here's a new version of the patch. > >>> " If an exceptional condition occurs during the evaluation of an > >>> expression > >> (that is, if the result is not mathematically defined or not in the > >> range of representable values for its type), the behavior is undefined." > >>> > >>> So it should still be acceptable to do in this case. > >> > >> -fwrapv > > > > If I understand correctly, you're happy with this is I guard it on ! > > flag_wrapv ? > > I did some more digging. Right shift of a negative value is IMP_DEF (not > UNDEF - this keeps catching me out). So yes, wrapping this with !wrapv > would address my concern. > > I've not reviewed the patch itself, though. I've never even written a patch > for match.pd, so don't feel qualified to do that. No problem, thanks for catching this! I'm sure one of the Richards will review it when they have a chance. Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no regressions. Ok for master? Thanks, Tamar gcc/ChangeLog: * match.pd: New negate+shift pattern. gcc/testsuite/ChangeLog: * gcc.dg/signbit-2.c: New test. * gcc.dg/signbit-3.c: New test. * gcc.target/aarch64/signbit-1.c: New test. --- inline copy of patch --- diff --git a/gcc/match.pd b/gcc/match.pd index 7d2a24dbc5e9644a09968f877e12a824d8ba1caa..3d48eda826f889483a83267409c3f278ee907b57 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -826,6 +826,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { tree utype = unsigned_type_for (type); } (convert (rshift (lshift (convert:utype @0) @2) @3)) +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1. */ +(for cst (INTEGER_CST VECTOR_CST) + (simplify + (rshift (negate:s @0) cst@1) + (if (!flag_wrapv) +(with { tree ctype = TREE_TYPE (@0); + tree stype = TREE_TYPE (@1); + tree bt = truth_type_for (ctype); } + (switch + /* Handle scalar case. */ + (if (INTEGRAL_TYPE_P (ctype) + && !VECTOR_TYPE_P (ctype) + && !TYPE_UNSIGNED (ctype) + && canonicalize_math_after_vectorization_p () + && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1)) + (convert:bt (gt:bt @0 { build_zero_cst (stype); }))) + /* Handle vector case with a scalar immediate. */ + (if (VECTOR_INTEGER_TYPE_P (ctype) + && !VECTOR_TYPE_P (stype) + && !TYPE_UNSIGNED (ctype) + && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1)) + (convert:bt (gt:bt @0 { build_zero_cst (ctype); }))) + /* Handle vector case with a vector immediate. */ + (if (VECTOR_INTEGER_TYPE_P (ctype) + && VECTOR_TYPE_P (stype) + && !TYPE_UNSIGNED (ctype) + && uniform_vector_p (@1)) + (with { tree cst = vector_cst_elt (@1, 0); + tree t = TREE_TYPE (cst); } +(if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1)) + (convert:bt (gt:bt @0 { build_zero_cst (ctype); })) + /* Fold (C1/X)*C2 into (C1*C2)/X. */ (simplify (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2) diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c new file mode 100644 index ..fc0157cbc5c7996b481f2998bc30176c96a669bb --- /dev/null +++ b/gcc/testsuite/gcc.dg/signbit-2.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */ + +#include + +void fun1(int32_t *x, int n) +{ +for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 31; +} + +void fun2(int32_t *x, int n) +{ +for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 30; +} + +/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } */ +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c new file mode 100644 index ..19e9c06c349b3287610f817628f00938ece60bf7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/signbit-3.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */ + +#include + +void fun1(int32_t *x, int n) +{ +for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 31; +} + +/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */ +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c b/gcc/testsuite/gcc.target/aarch64/signbit-1.c new file mode 100644 index ..3ebfb0586f37de29cf58635b27fe48503714447e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +void fun1(int32_t *x, int n) +{ +for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 31; +} + +void fun2(int32_t *x, int n) +{ +for (int i = 0; i < (n & -16); i++
Re: [PATCH] Adjust testcase for O2 vectorization enabling.
On Sun, Oct 10, 2021 at 10:19 PM liuhongt wrote: > > gcc/testsuite/ChangeLog: > > PR middle-end/102669 > * gnat.dg/unroll1.adb: Add -fno-tree-vectorize. > --- > gcc/testsuite/gnat.dg/unroll1.adb | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/testsuite/gnat.dg/unroll1.adb > b/gcc/testsuite/gnat.dg/unroll1.adb > index 34d8a8f3f38..8b732dd8f44 100644 > --- a/gcc/testsuite/gnat.dg/unroll1.adb > +++ b/gcc/testsuite/gnat.dg/unroll1.adb > @@ -1,5 +1,5 @@ > -- { dg-do compile } > --- { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll-details > -fdump-tree-cunrolli-details" } > +-- { dg-options "-O2 -funroll-all-loops -fno-tree-vectorize > -fdump-rtl-loop2_unroll-details -fdump-tree-cunrolli-details" } > > package body Unroll1 is Should no-unroll pragma work with -ftree-vectorize? -- H.J.
Re: [PATCH] regcprop: Determine subreg offset depending on endianness [PR101260]
On Mon, Oct 11, 2021 at 09:38:36AM +0200, Richard Biener wrote: > On Fri, Oct 8, 2021 at 1:31 PM Stefan Schulze Frielinghaus via > Gcc-patches wrote: > > > > gcc/ChangeLog: > > > > * regcprop.c (maybe_mode_change): Determine offset relative to > > high or low part depending on endianness. > > > > Bootstrapped and regtested on IBM Z. Ok for mainline and gcc-{11,10,9}? > > Is there a testcase to add? I've updated the patch and added the testcase from the PR. > > > --- > > gcc/regcprop.c | 11 --- > > 1 file changed, 8 insertions(+), 3 deletions(-) > > > > diff --git a/gcc/regcprop.c b/gcc/regcprop.c > > index d2a01130fe1..0e1ac12458a 100644 > > --- a/gcc/regcprop.c > > +++ b/gcc/regcprop.c > > @@ -414,9 +414,14 @@ maybe_mode_change (machine_mode orig_mode, > > machine_mode copy_mode, > > copy_nregs, &bytes_per_reg)) > > return NULL_RTX; > >poly_uint64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs); > > - poly_uint64 offset > > - = subreg_size_lowpart_offset (GET_MODE_SIZE (new_mode) + > > copy_offset, > > - GET_MODE_SIZE (orig_mode)); > > + poly_uint64 offset = > > +#if WORDS_BIG_ENDIAN > > + subreg_size_highpart_offset > > +#else > > + subreg_size_lowpart_offset > > +#endif > > + (GET_MODE_SIZE (new_mode) + copy_offset, > > +GET_MODE_SIZE (orig_mode)); > >regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); > >if (targetm.hard_regno_mode_ok (regno, new_mode)) > > return gen_raw_REG (new_mode, regno); > > -- > > 2.31.1 > > >From 299959788321e21c27f0d4a6d437a586c5f6c92e Mon Sep 17 00:00:00 2001 From: Stefan Schulze Frielinghaus Date: Mon, 4 Oct 2021 09:36:21 +0200 Subject: [PATCH] regcprop: Determine subreg offset depending on endianness [PR101260] gcc/ChangeLog: * regcprop.c (maybe_mode_change): Determine offset relative to high or low part depending on endianness. gcc/testsuite/ChangeLog: * gcc.dg/pr101260.c: New test. --- gcc/regcprop.c | 11 ++-- gcc/testsuite/gcc.dg/pr101260.c | 49 + 2 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr101260.c diff --git a/gcc/regcprop.c b/gcc/regcprop.c index d2a01130fe1..0e1ac12458a 100644 --- a/gcc/regcprop.c +++ b/gcc/regcprop.c @@ -414,9 +414,14 @@ maybe_mode_change (machine_mode orig_mode, machine_mode copy_mode, copy_nregs, &bytes_per_reg)) return NULL_RTX; poly_uint64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs); - poly_uint64 offset - = subreg_size_lowpart_offset (GET_MODE_SIZE (new_mode) + copy_offset, - GET_MODE_SIZE (orig_mode)); + poly_uint64 offset = +#if WORDS_BIG_ENDIAN + subreg_size_highpart_offset +#else + subreg_size_lowpart_offset +#endif + (GET_MODE_SIZE (new_mode) + copy_offset, +GET_MODE_SIZE (orig_mode)); regno += subreg_regno_offset (regno, orig_mode, offset, new_mode); if (targetm.hard_regno_mode_ok (regno, new_mode)) return gen_raw_REG (new_mode, regno); diff --git a/gcc/testsuite/gcc.dg/pr101260.c b/gcc/testsuite/gcc.dg/pr101260.c new file mode 100644 index 000..0e9ec4e203a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101260.c @@ -0,0 +1,49 @@ +/* PR rtl-optimization/101260 */ +/* { dg-do run } */ +/* { dg-options -O1 } */ +struct a { + unsigned b : 7; + int c; + int d; + short e; +} p, *q = &p; +int f, g, h, i, r, s; +static short j[8][1][6] = {0}; +char k[7]; +short l, m; +int *n; +int **o = &n; +void t() { + for (; f;) +; +} +static struct a u(int x) { + struct a a = {4, 8, 5, 4}; + for (; i <= 6; i++) { +struct a v = {0}; +for (; l; l++) + h = 0; +for (; h >= 0; h--) { + struct a *w; + j[i]; + w = &p; + s = 0; + for (; s < 3; s++) { +r ^= x; +m = j[i][g][h] == (k[g] = g); +*w = v; + } + r = 2; + for (; r; r--) +*o = &r; +} + } + t(); + return a; +} +int main() { + *q = u(636); + if (p.b != 4) +__builtin_abort (); + return 0; +} -- 2.31.1
[PATCH] vectorizer: Fix up -fsimd-cost-model= handling
On Mon, Oct 11, 2021 at 11:28:55AM +0200, Jakub Jelinek via Gcc-patches wrote: > I don't think this is the right thing to do. > This just means that at some point between 2013 when -fsimd-cost-model has > been introduced and now -fsimd-cost-model= option at least partially stopped > working properly. > As documented, -fsimd-cost-model= overrides the -fvect-cost-model= setting > for OpenMP simd loops (loop->force_vectorize is true) if specified differently > from default. > In tree-vectorizer.h we have: > static inline bool > unlimited_cost_model (loop_p loop) > { > if (loop != NULL && loop->force_vectorize > && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) > return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; > return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); > } > and use it in various places, but we also just use flag_vect_cost_model > in lots of places (and in one spot use flag_simd_cost_model, not sure if > we are sure it is a force_vectorize loop or what). > > So, IMHO we should change the above inline function to > loop_cost_model and let it return the cost model and then just > reimplement unlimited_cost_model as > return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; > and then adjust the direct uses of the flag and revert these changes. Here is a patch that implements it. Ok for trunk if it passes bootstrap/regtest? 2021-10-11 Jakub Jelinek gcc/ * tree-vectorizer.h (loop_cost_model): New function. (unlimited_cost_model): Use it. * tree-vect-loop.c (vect_analyze_loop_costing): Use loop_cost_model call instead of flag_vect_cost_model. * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Likewise. (vect_prune_runtime_alias_test_list): Likewise. Also use it instead of flag_simd_cost_model. libgomp/ * testsuite/libgomp.c/scan-11.c: Remove option -fvect-cost-model=cheap. * testsuite/libgomp.c/scan-12.c: Likewise. * testsuite/libgomp.c/scan-13.c: Likewise. * testsuite/libgomp.c/scan-14.c: Likewise. * testsuite/libgomp.c/scan-15.c: Likewise. * testsuite/libgomp.c/scan-16.c: Likewise. * testsuite/libgomp.c/scan-17.c: Likewise. * testsuite/libgomp.c/scan-18.c: Likewise. * testsuite/libgomp.c/scan-19.c: Likewise. * testsuite/libgomp.c/scan-20.c: Likewise. * testsuite/libgomp.c/scan-21.c: Likewise. * testsuite/libgomp.c/scan-22.c: Likewise. * testsuite/libgomp.c++/scan-9.C: Likewise. * testsuite/libgomp.c++/scan-10.C: Likewise. * testsuite/libgomp.c++/scan-11.C: Likewise. * testsuite/libgomp.c++/scan-12.C: Likewise. * testsuite/libgomp.c++/scan-13.C: Likewise. * testsuite/libgomp.c++/scan-14.C: Likewise. * testsuite/libgomp.c++/scan-15.C: Likewise. * testsuite/libgomp.c++/scan-16.C: Likewise. --- gcc/tree-vectorizer.h.jj2021-09-27 10:47:15.839084866 +0200 +++ gcc/tree-vectorizer.h 2021-10-11 13:46:55.169767481 +0200 @@ -1701,14 +1701,22 @@ get_dr_vinfo_offset (vec_info *vinfo, } +/* Return the vect cost model for LOOP. */ +static inline enum vect_cost_model +loop_cost_model (loop_p loop) +{ + if (loop != NULL + && loop->force_vectorize + && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) +return flag_simd_cost_model; + return flag_vect_cost_model; +} + /* Return true if the vect cost model is unlimited. */ static inline bool unlimited_cost_model (loop_p loop) { - if (loop != NULL && loop->force_vectorize - && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) -return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; - return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); + return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; } /* Return true if the loop described by LOOP_VINFO is fully-masked and --- gcc/tree-vect-loop.c.jj 2021-09-22 09:25:15.199030463 +0200 +++ gcc/tree-vect-loop.c2021-10-11 13:48:33.183366790 +0200 @@ -1850,7 +1850,7 @@ vect_analyze_loop_costing (loop_vec_info /* If using the "very cheap" model. reject cases in which we'd keep a copy of the scalar code (even if we might be able to vectorize it). */ - if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP + if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))) @@ -1922,7 +1922,7 @@ vect_analyze_loop_costing (loop_vec_info /* If the vector loop needs multiple iterations to be beneficial then things are probably too close to call, and the conservative thing would be to stick with the scalar code. */ - if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP + if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo)) { i
Re: [PATCH] vectorizer: Fix up -fsimd-cost-model= handling
On Mon, 11 Oct 2021, Jakub Jelinek wrote: > On Mon, Oct 11, 2021 at 11:28:55AM +0200, Jakub Jelinek via Gcc-patches wrote: > > I don't think this is the right thing to do. > > This just means that at some point between 2013 when -fsimd-cost-model has > > been introduced and now -fsimd-cost-model= option at least partially stopped > > working properly. > > As documented, -fsimd-cost-model= overrides the -fvect-cost-model= setting > > for OpenMP simd loops (loop->force_vectorize is true) if specified > > differently > > from default. > > In tree-vectorizer.h we have: > > static inline bool > > unlimited_cost_model (loop_p loop) > > { > > if (loop != NULL && loop->force_vectorize > > && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) > > return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; > > return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); > > } > > and use it in various places, but we also just use flag_vect_cost_model > > in lots of places (and in one spot use flag_simd_cost_model, not sure if > > we are sure it is a force_vectorize loop or what). > > > > So, IMHO we should change the above inline function to > > loop_cost_model and let it return the cost model and then just > > reimplement unlimited_cost_model as > > return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; > > and then adjust the direct uses of the flag and revert these changes. > > Here is a patch that implements it. > Ok for trunk if it passes bootstrap/regtest? OK. I'll note that we have #pragma GCC vect set force_vectorize as well so we'd eventually want the cost model to be used stored in struct loop? I suppose different -fvect-cost-model settings also prevent inlining at the moment (and we could likely handle more opt settings as to be reflected into struct loop to lessen issues like that). Thanks, Richard. > 2021-10-11 Jakub Jelinek > > gcc/ > * tree-vectorizer.h (loop_cost_model): New function. > (unlimited_cost_model): Use it. > * tree-vect-loop.c (vect_analyze_loop_costing): Use loop_cost_model > call instead of flag_vect_cost_model. > * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Likewise. > (vect_prune_runtime_alias_test_list): Likewise. Also use it instead > of flag_simd_cost_model. > libgomp/ > * testsuite/libgomp.c/scan-11.c: Remove option -fvect-cost-model=cheap. > * testsuite/libgomp.c/scan-12.c: Likewise. > * testsuite/libgomp.c/scan-13.c: Likewise. > * testsuite/libgomp.c/scan-14.c: Likewise. > * testsuite/libgomp.c/scan-15.c: Likewise. > * testsuite/libgomp.c/scan-16.c: Likewise. > * testsuite/libgomp.c/scan-17.c: Likewise. > * testsuite/libgomp.c/scan-18.c: Likewise. > * testsuite/libgomp.c/scan-19.c: Likewise. > * testsuite/libgomp.c/scan-20.c: Likewise. > * testsuite/libgomp.c/scan-21.c: Likewise. > * testsuite/libgomp.c/scan-22.c: Likewise. > * testsuite/libgomp.c++/scan-9.C: Likewise. > * testsuite/libgomp.c++/scan-10.C: Likewise. > * testsuite/libgomp.c++/scan-11.C: Likewise. > * testsuite/libgomp.c++/scan-12.C: Likewise. > * testsuite/libgomp.c++/scan-13.C: Likewise. > * testsuite/libgomp.c++/scan-14.C: Likewise. > * testsuite/libgomp.c++/scan-15.C: Likewise. > * testsuite/libgomp.c++/scan-16.C: Likewise. > > --- gcc/tree-vectorizer.h.jj 2021-09-27 10:47:15.839084866 +0200 > +++ gcc/tree-vectorizer.h 2021-10-11 13:46:55.169767481 +0200 > @@ -1701,14 +1701,22 @@ get_dr_vinfo_offset (vec_info *vinfo, > } > > > +/* Return the vect cost model for LOOP. */ > +static inline enum vect_cost_model > +loop_cost_model (loop_p loop) > +{ > + if (loop != NULL > + && loop->force_vectorize > + && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) > +return flag_simd_cost_model; > + return flag_vect_cost_model; > +} > + > /* Return true if the vect cost model is unlimited. */ > static inline bool > unlimited_cost_model (loop_p loop) > { > - if (loop != NULL && loop->force_vectorize > - && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) > -return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; > - return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); > + return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; > } > > /* Return true if the loop described by LOOP_VINFO is fully-masked and > --- gcc/tree-vect-loop.c.jj 2021-09-22 09:25:15.199030463 +0200 > +++ gcc/tree-vect-loop.c 2021-10-11 13:48:33.183366790 +0200 > @@ -1850,7 +1850,7 @@ vect_analyze_loop_costing (loop_vec_info > >/* If using the "very cheap" model. reject cases in which we'd keep > a copy of the scalar code (even if we might be able to vectorize it). > */ > - if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP > + if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP >&& (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > || LOOP_VINFO_PEELI
Re: [PATCH] vectorizer: Fix up -fsimd-cost-model= handling
On Monday, October 11, 2021, Jakub Jelinek via Gcc-patches < gcc-patches@gcc.gnu.org> wrote: > On Mon, Oct 11, 2021 at 11:28:55AM +0200, Jakub Jelinek via Gcc-patches > wrote: > > I don't think this is the right thing to do. > > This just means that at some point between 2013 when -fsimd-cost-model > has > > been introduced and now -fsimd-cost-model= option at least partially > stopped > > working properly. > > As documented, -fsimd-cost-model= overrides the -fvect-cost-model= > setting > > for OpenMP simd loops (loop->force_vectorize is true) if specified > differently > > from default. > > In tree-vectorizer.h we have: > > static inline bool > > unlimited_cost_model (loop_p loop) > > { > > if (loop != NULL && loop->force_vectorize > > && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) > > return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; > > return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); > > } > > and use it in various places, but we also just use flag_vect_cost_model > > in lots of places (and in one spot use flag_simd_cost_model, not sure if > > we are sure it is a force_vectorize loop or what). > > > > So, IMHO we should change the above inline function to > > loop_cost_model and let it return the cost model and then just > > reimplement unlimited_cost_model as > > return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; > > and then adjust the direct uses of the flag and revert these changes. > > Here is a patch that implements it. > Ok for trunk if it passes bootstrap/regtest? > > 2021-10-11 Jakub Jelinek > > gcc/ > * tree-vectorizer.h (loop_cost_model): New function. > (unlimited_cost_model): Use it. > * tree-vect-loop.c (vect_analyze_loop_costing): Use loop_cost_model > call instead of flag_vect_cost_model. > * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): > Likewise. > (vect_prune_runtime_alias_test_list): Likewise. Also use it > instead > of flag_simd_cost_model. > libgomp/ > * testsuite/libgomp.c/scan-11.c: Remove option > -fvect-cost-model=cheap. > * testsuite/libgomp.c/scan-12.c: Likewise. > * testsuite/libgomp.c/scan-13.c: Likewise. > * testsuite/libgomp.c/scan-14.c: Likewise. > * testsuite/libgomp.c/scan-15.c: Likewise. > * testsuite/libgomp.c/scan-16.c: Likewise. > * testsuite/libgomp.c/scan-17.c: Likewise. > * testsuite/libgomp.c/scan-18.c: Likewise. > * testsuite/libgomp.c/scan-19.c: Likewise. > * testsuite/libgomp.c/scan-20.c: Likewise. > * testsuite/libgomp.c/scan-21.c: Likewise. > * testsuite/libgomp.c/scan-22.c: Likewise. > * testsuite/libgomp.c++/scan-9.C: Likewise. > * testsuite/libgomp.c++/scan-10.C: Likewise. > * testsuite/libgomp.c++/scan-11.C: Likewise. > * testsuite/libgomp.c++/scan-12.C: Likewise. > * testsuite/libgomp.c++/scan-13.C: Likewise. > * testsuite/libgomp.c++/scan-14.C: Likewise. > * testsuite/libgomp.c++/scan-15.C: Likewise. > * testsuite/libgomp.c++/scan-16.C: Likewise. Also for gcc.dg/gomp/simd-2.c, gcc.dg/gomp/simd-3.c --- gcc/tree-vectorizer.h.jj2021-09-27 10:47:15.839084866 +0200 > +++ gcc/tree-vectorizer.h 2021-10-11 13:46:55.169767481 +0200 > @@ -1701,14 +1701,22 @@ get_dr_vinfo_offset (vec_info *vinfo, > } > > > +/* Return the vect cost model for LOOP. */ > +static inline enum vect_cost_model > +loop_cost_model (loop_p loop) > +{ > + if (loop != NULL > + && loop->force_vectorize > + && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) > +return flag_simd_cost_model; > + return flag_vect_cost_model; > +} > + > /* Return true if the vect cost model is unlimited. */ > static inline bool > unlimited_cost_model (loop_p loop) > { > - if (loop != NULL && loop->force_vectorize > - && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) > -return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; > - return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); > + return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED; > } > > /* Return true if the loop described by LOOP_VINFO is fully-masked and > --- gcc/tree-vect-loop.c.jj 2021-09-22 09:25:15.199030463 +0200 > +++ gcc/tree-vect-loop.c2021-10-11 13:48:33.183366790 +0200 > @@ -1850,7 +1850,7 @@ vect_analyze_loop_costing (loop_vec_info > >/* If using the "very cheap" model. reject cases in which we'd keep > a copy of the scalar code (even if we might be able to vectorize > it). */ > - if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP > + if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP >&& (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) > || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))) > @@ -1922,7 +1922,7 @@ vect_analyze_loop_costing (loop_vec_info >/* If the vector loop needs multiple it
Re: [Patch 1/7, Arm, GCC] Add Armv8.1-M Mainline target feature +pacbti.
On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote: Hi, This patch adds the -march feature +pacbti to Armv8.1-M Mainline. This feature enables pointer signing and authentication instructions on M-class architectures. Tested on arm-none-eabi. OK for trunk? 2021-10-04 Tejas Belagod gcc/Changelog: * config/arm/arm-cpus.in: Define new feature pacbti. * config/arm/arm.h (TARGET_HAVE_PACBTI): New. "+pacbti" needs to be documented in invoke.texi at the appropriate place. R.
[RFC] Port git gcc-descr to Python
On 10/8/21 13:12, Jakub Jelinek wrote: I think changing the default is ok, but dropping --full is not, it should stay and behave the way it did before (i.e. print r12-4245-gdb3d7270b42fe27fb05664c4fdf524ab7ad13a75 same thing as the new default except for full hash instead of first 14 chars from it). All right, makes sense, but updating the git alias doesn't work for me. Thus I'm suggesting a Python replacement of it, which does: $ ./describe.py basepoints/gcc-11 r11-0-g50ee04838efc16 $ ./describe.py r12-4285-g07dd3bcda17f97 $ ./describe.py --short r12-4285 $ ./describe.py --full r12-4285-g07dd3bcda17f97cf5476c3d6f2f2501c1e0712e6 Motivation behind usage of Python: - the script can be put to contrib and nobody would have to re-run contrib/gcc-git-customization.sh locally - it's not a single line script and one doesn't have to do bash escaping in the contrib/gcc-git-customization.sh script - positional argument support Thoughts? Note I can port the same way the counterpart script. One another note: $ git gcc-descr basepoints/gcc-11 r11-0 seems to me like bogus. Cheers, Martin#!/usr/bin/env python3 import argparse import subprocess import sys DEFAULT_REV = 'master' PREFIX = 'basepoints/gcc-' hash_length = 14 def run_git(cmd): return subprocess.run(cmd, shell=True, encoding='utf8', stdout=subprocess.PIPE, stderr=subprocess.PIPE) parser = argparse.ArgumentParser(description='Describe a GCC git commit.') parser.add_argument('revision', nargs='?', default=DEFAULT_REV, help=f'Described revision ("{DEFAULT_REV}" by default)') parser.add_argument('--full', '-f', action='store_true', help='Print complete git hash') parser.add_argument('--short', '-s', action='store_true', help='Shorten described revision') args = parser.parse_args() if args.full: hash_length = 40 r = run_git(f'git describe --all --match {PREFIX}[0-9]* ' f'{args.revision} --abbrev={hash_length}') if r.returncode != 0: print(r.stderr, end='') sys.exit(1) # produces e.g. r12-4285-g07dd3bcda17f97 descr = r.stdout.strip() assert PREFIX in descr descr = 'r' + descr[descr.find(PREFIX) + len(PREFIX):] # handle basepoints if '-' not in descr: r = run_git(f'git rev-parse {args.revision}') descr += '-0-g' + r.stdout.strip()[:hash_length] parts = descr.split('-') assert len(parts) == 3 if args.short: descr = '-'.join(parts[:-1]) # verify common ancestor r = run_git('git config --get gcc-config.upstream') upstream = r.stdout.strip() if r.returncode else 'origin' gcc_branch = parts[0][1:] r = run_git(f'git rev-parse --quiet --verify origin/releases/gcc-{gcc_branch}') branch = f'releases/gcc-{gcc_branch}' if r.returncode == 0 else 'master' r = run_git(f'git merge-base --is-ancestor {args.revision} ' f'{upstream}/{branch}') if r.returncode != 0: print(r.stderr) sys.exit(2) print(descr)
Re: [PATCH 03/13] arm: Add test for PR target/101325
On 28/09/2021 15:30, Christophe LYON via Gcc-patches wrote: On 28/09/2021 13:14, Kyrylo Tkachov wrote: -Original Message- From: Gcc-patches On Behalf Of Christophe Lyon via Gcc-patches Sent: 07 September 2021 10:15 To: gcc-patches@gcc.gnu.org Subject: [PATCH 03/13] arm: Add test for PR target/101325 This test is derived from the one provided in the PR: it is a compile-only test because I do not have access to anything that could execute it. We can switch it do 'dg-do run' later, however it would be better to write a new executable test to ensure coverage in case the tester cannot execute such code (and it will need a new arm_v8_1m_mve_hw or similar effective-target). The test is okay for now. I think we'll want to have a arm_v8_1m_mve_hw target sooner or later. Maybe Alex or Andrea can help to write one we can use? Since I posted the patch series, QEMU has gained support for MVE, I plan to write a similar testcase which is executable. There's already an executable testcase in the PR. Thanks Christophe Here is an updated version of this patch, which adds an executable test. I thought I would re-post the whole series later, but I haven't yet received feedback on the main patches, which I expect to trigger some discussions. Christophe Thanks, Kyrill 2021-09-01 Christophe Lyon gcc/testsuite/ PR target/101325 * gcc.target/arm/simd/pr101325.c: New. diff --git a/gcc/testsuite/gcc.target/arm/simd/pr101325.c b/gcc/testsuite/gcc.target/arm/simd/pr101325.c new file mode 100644 index 000..a466683a0b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/pr101325.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include + +unsigned foo(int8x16_t v, int8x16_t w) +{ + return vcmpeqq (v, w); +} +/* { dg-final { scan-assembler {\tvcmp.i8 eq} } } */ +/* { dg-final { scan-assembler {\tvmrs\t r[0-9]+, P0} } } */ +/* { dg-final { scan-assembler {\tuxth} } } */ -- 2.25.1From ef48339f8048ee6417845ed2e6fd95f550ee798e Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Wed, 25 Aug 2021 17:26:31 + Subject: [PATCH v2 03/14] arm: Add tests for PR target/101325 These tests are derived from the one provided in the PR: there is a compile-only test because I did not have access to anything that could execute MVE code until recently. I have been able to add an executable test since QEMU supports MVE. Instead of adding arm_v8_1m_mve_hw, I update arm_mve_hw so that it uses add_options_for_arm_v8_1m_mve_fp, like arm_neon_hw does. This ensures arm_mve_hw passes even if the toolchain does not generate MVE code by default. 2021-10-01 Christophe Lyon gcc/testsuite/ PR target/101325 * gcc.target/arm/simd/pr101325.c: New. * gcc.target/arm/simd/pr101325-2.c: New. * lib/target-supports.exp (check_effective_target_arm_mve_hw): Use add_options_for_arm_v8_1m_mve_fp. add executable test and update check_effective_target_arm_mve_hw diff --git a/gcc/testsuite/gcc.target/arm/simd/pr101325-2.c b/gcc/testsuite/gcc.target/arm/simd/pr101325-2.c new file mode 100644 index 000..7907a386385 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/pr101325-2.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_mve_hw } */ +/* { dg-options "-O3" } */ +/* { dg-add-options arm_v8_1m_mve } */ + +#include + + +__attribute((noinline,noipa)) +unsigned foo(int8x16_t v, int8x16_t w) +{ + return vcmpeqq (v, w); +} + +int main(void) +{ + if (foo (vdupq_n_s8(0), vdupq_n_s8(0)) != 0xU) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/arm/simd/pr101325.c b/gcc/testsuite/gcc.target/arm/simd/pr101325.c new file mode 100644 index 000..a466683a0b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/pr101325.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include + +unsigned foo(int8x16_t v, int8x16_t w) +{ + return vcmpeqq (v, w); +} +/* { dg-final { scan-assembler {\tvcmp.i8 eq} } } */ +/* { dg-final { scan-assembler {\tvmrs\t r[0-9]+, P0} } } */ +/* { dg-final { scan-assembler {\tuxth} } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index e030e4f376b..b0e35b602af 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4889,6 +4889,7 @@ proc check_effective_target_arm_cmse_hw { } { } } "-mcmse -Wl,--section-start,.gnu.sgstubs=0x0040"] } + # Return 1 if the target supports executing MVE instructions, 0 # otherwise. @@ -4904,7 +4905,7 @@ proc check_effective_target_arm_mve_hw {} { : "0" (a), "r" (b)); return (a != 2); } -} ""] +} [add_options_for_arm_v8_1m_mve_fp ""]]
Re: [PATCH 04/13] arm: Add GENERAL_AND_VPR_REGS regclass
On 28/09/2021 15:32, Christophe LYON via Gcc-patches wrote: On 28/09/2021 13:18, Kyrylo Tkachov wrote: Hi Christophe, -Original Message- From: Gcc-patches On Behalf Of Christophe LYON via Gcc-patches Sent: 08 September 2021 08:49 To: Richard Earnshaw ; gcc- patc...@gcc.gnu.org Subject: Re: [PATCH 04/13] arm: Add GENERAL_AND_VPR_REGS regclass On 07/09/2021 15:35, Richard Earnshaw wrote: On 07/09/2021 13:05, Christophe LYON wrote: On 07/09/2021 11:42, Richard Earnshaw wrote: On 07/09/2021 10:15, Christophe Lyon via Gcc-patches wrote: At some point during the development of this patch series, it appeared that in some cases the register allocator wants “VPR or general” rather than “VPR or general or FP” (which is the same thing as ALL_REGS). The series does not seem to require this anymore, but it seems to be a good thing to do anyway, to give the register allocator more freedom. 2021-09-01 Christophe Lyon gcc/ * config/arm/arm.h (reg_class): Add GENERAL_AND_VPR_REGS. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. Add VPR_REG to ALL_REGS. diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 015299c1534..fab39d05916 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1286,6 +1286,7 @@ enum reg_class SFP_REG, AFP_REG, VPR_REG, + GENERAL_AND_VPR_REGS, ALL_REGS, LIM_REG_CLASSES }; @@ -1315,6 +1316,7 @@ enum reg_class "SFP_REG", \ "AFP_REG", \ "VPR_REG", \ + "GENERAL_AND_VPR_REGS", \ "ALL_REGS" \ } @@ -1343,7 +1345,8 @@ enum reg_class { 0x, 0x, 0x, 0x0040 }, /* SFP_REG */ \ { 0x, 0x, 0x, 0x0080 }, /* AFP_REG */ \ { 0x, 0x, 0x, 0x0400 }, /* VPR_REG. */ \ - { 0x7FFF, 0x, 0x, 0x000F } /* ALL_REGS. */ \ + { 0x5FFF, 0x, 0x, 0x0400 }, /* GENERAL_AND_VPR_REGS. */ \ + { 0x7FFF, 0x, 0x, 0x040F } /* ALL_REGS. */ \ } You've changed the definition of ALL_REGS here (to include VPR_REG), but not really explained why. Is that the source of the underlying issue with the 'appeared' you mention? I first added VPR_REG to ALL_REGS, but Richard Sandiford suggested I create a new GENERAL_AND_VPR_REGS that would be more restrictive. I did not remove VPR_REG from ALL_REGS because I thought it was an omission: shouldn't ALL_REGS contain all registers? Surely that should be a separate patch then. OK, I can remove that line from this patch and make a separate one-liner for ALL_REGS. Did you end up sending that patch out? (Sorry, I may have missed it in my archive). This patch to add GENERAL_AND_VPR_REGS is okay with the ALL_REGS change separated out. No I didn't send it yet: I suspect there will be iterations on the next patches in the series, this small change alone wasn't worth sending a v2 :-) Here is the patch now split into two parts. Christophe Thanks, Christophe Thanks, Kyrill Thanks, Christophe R. R. #define FP_SYSREGS \ From c57fb3fc853d8bf04f589682f03e9d3baac2dbd5 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Thu, 26 Aug 2021 16:01:58 + Subject: [PATCH v2 04/14] arm: Add GENERAL_AND_VPR_REGS regclass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At some point during the development of this patch series, it appeared that in some cases the register allocator wants “VPR or general” rather than “VPR or general or FP” (which is the same thing as ALL_REGS). The series does not seem to require this anymore, but it seems to be a good thing to do anyway, to give the register allocator more freedom. 2021-09-01 Christophe Lyon gcc/ * config/arm/arm.h (reg_class): Add GENERAL_AND_VPR_REGS. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 015299c1534..eae1b1cd0fb 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1286,6 +1286,7 @@ enum reg_class SFP_REG, AFP_REG, VPR_REG, + GENERAL_AND_VPR_REGS, ALL_REGS, LIM_REG_CLASSES }; @@ -1315,6 +1316,7 @@ enum reg_class "SFP_REG", \ "AFP_REG", \ "VPR_REG", \ + "GENERAL_AND_VPR_REGS", \ "ALL_REGS" \ } @@ -1343,6 +1345,7 @@ enum reg_class { 0x, 0x, 0x, 0x0040 }, /* SFP_REG */\ { 0x, 0x, 0x, 0x0080 }, /* AFP_REG */\ { 0x, 0x, 0x, 0x0400 }, /* VPR_REG. */ \ + { 0x5FFF, 0x, 0x, 0x0400 }, /* GENERAL_AND_VPR_REGS. */ \ { 0x7FFF, 0x, 0x, 0x000F } /* ALL_REGS. */ \ } -- 2.25.1 From ce9429d59d513b2998f73c6e256702ad447f2ae7 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Wed, 8 Sep 2021 08:
Re: [PATCH] opts: Remove AUTODETECT_VALUE usage.
On Mon, Oct 11, 2021 at 12:58 PM Martin Liška wrote: > > The patch is about using OPTION_SET_P instead of a default > option value set in common.opt. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? OK, but... > Thanks, > Martin > > gcc/ChangeLog: > > * common.opt: Remove Init(2) for some options. you could enumerate the three changed opts here. > * toplev.c (process_options): Do not use AUTODETECT_VALUE, but > use rather OPTION_SET_P. > --- > gcc/common.opt | 6 +++--- > gcc/toplev.c | 6 +++--- > 2 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/gcc/common.opt b/gcc/common.opt > index 59ecc9fbdf7..4099effcc80 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -2399,7 +2399,7 @@ Common Var(flag_live_range_shrinkage) Init(0) > Optimization > Relief of register pressure through live range shrinkage. > > frename-registers > -Common Var(flag_rename_registers) Init(2) Optimization > +Common Var(flag_rename_registers) Optimization > Perform a register renaming optimization pass. > > fschedule-fusion > @@ -2797,7 +2797,7 @@ Common Ignore > Does nothing. Preserved for backward compatibility. > > ftree-cselim > -Common Var(flag_tree_cselim) Init(2) Optimization > +Common Var(flag_tree_cselim) Optimization > Transform condition stores into unconditional ones. > > ftree-switch-conversion > @@ -3158,7 +3158,7 @@ Common Var(flag_value_profile_transformations) > Optimization > Use expression value profiles in optimizations. > > fweb > -Common Var(flag_web) Init(2) Optimization > +Common Var(flag_web) Optimization > Construct webs and split unrelated uses of single variable. > > ftree-builtin-call-dce > diff --git a/gcc/toplev.c b/gcc/toplev.c > index b878234f3f2..167feac2583 100644 > --- a/gcc/toplev.c > +++ b/gcc/toplev.c > @@ -1332,10 +1332,10 @@ process_options (bool no_backend) > } > > /* web and rename-registers help when run after loop unrolling. */ > - if (flag_web == AUTODETECT_VALUE) > + if (!OPTION_SET_P (flag_web)) > flag_web = flag_unroll_loops; > > - if (flag_rename_registers == AUTODETECT_VALUE) > + if (!OPTION_SET_P (flag_rename_registers)) > flag_rename_registers = flag_unroll_loops; > > if (flag_non_call_exceptions) > @@ -1598,7 +1598,7 @@ process_options (bool no_backend) > debug_inline_points = 0; > } > > - if (flag_tree_cselim == AUTODETECT_VALUE) > + if (!OPTION_SET_P (flag_tree_cselim)) > { > if (HAVE_conditional_move) > flag_tree_cselim = 1; > -- > 2.33.0 >
Re: [Patch 2/7, Arm, GCC] Add option -mbranch-protection.
On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote: Hi, Add -mbranch-protection option and its associated parsing routines. This option enables the code-generation of pointer signing and authentication instructions in function prologues and epilogues. Tested on arm-none-eabi. OK for trunk? 2021-10-04 Tejas Belagod gcc/ChangeLog: * common/config/arm/arm-common.c (arm_print_hit_for_pacbti_option): New. (arm_progress_next_token): New. (arm_parse_pac_ret_clause): New routine for parsing the pac-ret clause for -mbranch-protection. (arm_parse_pacbti_option): New routine to parse all the options to -mbranch-protection. * config/arm/arm-protos.h (arm_parse_pacbti_option): Export. * config/arm/arm.c (arm_configure)build_target): Handle option to -mbranch-protection. * config/arm/arm.opt (mbranch-protection). New. (arm_enable_pacbti): New. You're missing documentation for invoke.texi. Also, how does this differ from the exising option in aarch64? Can the code from that be adapted to be made common to both targets rather than doing a new implementation? Finally, there are far to many manifest constants in this patch, they need replacing with enums or #defines as appropriate if we cannot share the aarch64 code. R.
Re: [PATCH] Remove usage of IRA_REGION_AUTODETECT
On Mon, Oct 11, 2021 at 12:58 PM Martin Liška wrote: > > Similar patch, let's rely on OPTION_SET_P and not a default > options value. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? OK. > Thanks, > Martin > > gcc/ChangeLog: > > * common.opt: Remove usage of IRA_REGION_AUTODETECT. > * flag-types.h (enum ira_region): Likewise. > * toplev.c (process_options): Use OPTION_SET_P instead of > IRA_REGION_AUTODETECT. > --- > gcc/common.opt | 2 +- > gcc/flag-types.h | 4 > gcc/toplev.c | 2 +- > 3 files changed, 2 insertions(+), 6 deletions(-) > > diff --git a/gcc/common.opt b/gcc/common.opt > index 52693e226d2..59ecc9fbdf7 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -1923,7 +1923,7 @@ EnumValue > Enum(ira_algorithm) String(priority) Value(IRA_ALGORITHM_PRIORITY) > > fira-region= > -Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) > Init(IRA_REGION_AUTODETECT) Optimization > +Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) > Init(IRA_REGION_ONE) Optimization > -fira-region=[one|all|mixed] Set regions for IRA. > > Enum > diff --git a/gcc/flag-types.h b/gcc/flag-types.h > index 5bd1f771c8b..ae0b216e8a3 100644 > --- a/gcc/flag-types.h > +++ b/gcc/flag-types.h > @@ -191,10 +191,6 @@ enum ira_region > IRA_REGION_ONE, > IRA_REGION_ALL, > IRA_REGION_MIXED, > - /* This value means that there were no options -fira-region on the > - command line and that we should choose a value depending on the > - used -O option. */ > - IRA_REGION_AUTODETECT > }; > > /* The options for excess precision. */ > diff --git a/gcc/toplev.c b/gcc/toplev.c > index 81748b1152a..b878234f3f2 100644 > --- a/gcc/toplev.c > +++ b/gcc/toplev.c > @@ -1319,7 +1319,7 @@ process_options (bool no_backend) > } > > /* One region RA really helps to decrease the code size. */ > - if (flag_ira_region == IRA_REGION_AUTODETECT) > + if (!OPTION_SET_P (flag_ira_region)) > flag_ira_region > = optimize_size || !optimize ? IRA_REGION_ONE : IRA_REGION_MIXED; > > -- > 2.33.0 >
Re: [PATCH] options: Fix variable tracking option processing.
On Mon, Oct 11, 2021 at 1:02 PM Martin Liška wrote: > > After the recent change in Optimize attribute handling, we need > finish_option function properly auto-detecting variable tracking options. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? > Thanks, > Martin > > PR debug/102585 > > gcc/ChangeLog: > > * common.opt: Do not init flag_var_tracking* options. > * opts.c (finish_options): Handle flag_var_tracking* options. > * toplev.c (process_options): Move to opts.c. > > gcc/testsuite/ChangeLog: > > * gcc.dg/pr102585.c: New test. > --- > gcc/common.opt | 14 +- > gcc/opts.c | 28 > gcc/testsuite/gcc.dg/pr102585.c | 6 ++ > gcc/toplev.c| 33 +++-- > 4 files changed, 42 insertions(+), 39 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/pr102585.c > > diff --git a/gcc/common.opt b/gcc/common.opt > index 52693e226d2..ec020f4e642 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -3003,19 +3003,16 @@ Common Undocumented Var(flag_use_linker_plugin) > > ; Positive if we should track variables, negative if we should run > ; the var-tracking pass only to discard debug annotations, zero if > -; we're not to run it. When flag_var_tracking == 2 (AUTODETECT_VALUE) it > -; will be set according to optimize, debug_info_level and debug_hooks > -; in process_options (). > +; we're not to run it. > fvar-tracking > -Common Var(flag_var_tracking) Init(2) PerFunction > +Common Var(flag_var_tracking) PerFunction > Perform variable tracking. > > ; Positive if we should track variables at assignments, negative if > ; we should run the var-tracking pass only to discard debug > -; annotations. When flag_var_tracking_assignments == > -; AUTODETECT_VALUE it will be set according to flag_var_tracking. > +; annotations. > fvar-tracking-assignments > -Common Var(flag_var_tracking_assignments) Init(2) PerFunction > +Common Var(flag_var_tracking_assignments) PerFunction > Perform variable tracking by annotating assignments. > > ; Nonzero if we should toggle flag_var_tracking_assignments after > @@ -3026,8 +3023,7 @@ Toggle -fvar-tracking-assignments. > > ; Positive if we should track uninitialized variables, negative if > ; we should run the var-tracking pass only to discard debug > -; annotations. When flag_var_tracking_uninit == AUTODETECT_VALUE it > -; will be set according to flag_var_tracking. > +; annotations. > fvar-tracking-uninit > Common Var(flag_var_tracking_uninit) PerFunction > Perform variable tracking and also tag variables that are uninitialized. > diff --git a/gcc/opts.c b/gcc/opts.c > index 2116c2991dd..eeb6b1dcc7c 100644 > --- a/gcc/opts.c > +++ b/gcc/opts.c > @@ -1353,6 +1353,34 @@ finish_options (struct gcc_options *opts, struct > gcc_options *opts_set, > SET_OPTION_IF_UNSET (opts, opts_set, flag_vect_cost_model, > VECT_COST_MODEL_CHEAP); > > + /* If the user specifically requested variable tracking with tagging > + uninitialized variables, we need to turn on variable tracking. > + (We already determined above that variable tracking is feasible.) */ > + if (opts->x_flag_var_tracking_uninit == 1) > +opts->x_flag_var_tracking = 1; > + > + if (!opts_set->x_flag_var_tracking) > +opts->x_flag_var_tracking = optimize >= 1; That's still not equivalent to the old code for -fvar-tracking-uninit which sets opts->x_flag_var_tracking to 1 and the old code checked that for AUTOINIT_VALUE but you override it here for -O0. > + if (!opts_set->x_flag_var_tracking_uninit) > +opts->x_flag_var_tracking_uninit = opts->x_flag_var_tracking; > + > + if (!opts_set->x_flag_var_tracking_assignments) > +opts->x_flag_var_tracking_assignments > + = (opts->x_flag_var_tracking > +&& !(opts->x_flag_selective_scheduling > + || opts->x_flag_selective_scheduling2)); > + > + if (opts->x_flag_var_tracking_assignments_toggle) > +opts->x_flag_var_tracking_assignments = > !opts->x_flag_var_tracking_assignments; > + > + if (opts->x_flag_var_tracking_assignments && !opts->x_flag_var_tracking) > +opts->x_flag_var_tracking = opts->x_flag_var_tracking_assignments = -1; > + > + if (opts->x_flag_var_tracking_assignments > + && (opts->x_flag_selective_scheduling || > opts->x_flag_selective_scheduling2)) > +warning_at (loc, 0, > + "var-tracking-assignments changes selective scheduling"); > } > > #define LEFT_COLUMN 27 > diff --git a/gcc/testsuite/gcc.dg/pr102585.c b/gcc/testsuite/gcc.dg/pr102585.c > new file mode 100644 > index 000..efd066b4a4e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr102585.c > @@ -0,0 +1,6 @@ > +/* PR debug/102585 */ > +/* { dg-do compile } */ > +/* { dg-options "-fvar-tracking-assignments -fno-var-tracking" } */ > + > +#pragma GCC
Re: [PATCH] tree-optimization: [PR102622]: wrong code due to signed one bit integer and "a?-1:0"
Hello, On Sat, 9 Oct 2021, apinski--- via Gcc-patches wrote: > + (lshift (convert (convert:boolean_type_node @0)) { shift; }))) > +/* a ? -1 : 0 -> -a. No need to check the TYPE_PRECISION not being 1 > + here as the powerof2cst case above will handle that case correctly. > */ Well, but the QoI will improve quite a bit when you just do the check, instead of relying on order of patterns. It's not slow or harmful to check and will make the order irrelevant, which, given the number of patterns we already have, is a good thing. (It will also be smaller to check than to document why the check isn't needed :-) ) Ciao, Michael.
Re: [PATCH 05/13] arm: Add support for VPR_REG in arm_class_likely_spilled_p
Sorry for the very long delay in reviewing this. Things have been a bit hectic recently. Christophe Lyon via Gcc-patches writes: > VPR_REG is the only register in its class, so it should be handled by > TARGET_CLASS_LIKELY_SPILLED_P. No test fails without this patch, but > it seems it should be implemented. I think instead we should change the “return false” so that it calls the default implementation (default_class_likely_spilled_p). That should handle this case correctly, as well as any future single-register classes that we might add. Thanks, Richard > > 2021-09-01 Christophe Lyon > > gcc/ > * config/arm/arm.c (arm_class_likely_spilled_p): Handle VPR_REG. > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 11dafc70067..1222cb0d0fe 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -29307,6 +29307,9 @@ arm_class_likely_spilled_p (reg_class_t rclass) >|| rclass == CC_REG) > return true; > > + if (TARGET_HAVE_MVE && (rclass == VPR_REG)) > +return true; > + >return false; > }
Re: [PATCH] options: Fix variable tracking option processing.
On 10/11/21 15:05, Richard Biener wrote: + if (!opts_set->x_flag_var_tracking) +opts->x_flag_var_tracking = optimize >= 1; That's still not equivalent to the old code for -fvar-tracking-uninit which sets opts->x_flag_var_tracking to 1 and the old code checked that for AUTOINIT_VALUE but you override it here for -O0. Do you mean the newly added code: + if (!opts_set->x_flag_var_tracking) +opts->x_flag_var_tracking = optimize >= 1; that should be equivalent to: - if (flag_var_tracking == AUTODETECT_VALUE) -flag_var_tracking = optimize >= 1; ? Or do I miss something? Thanks, Martin
Re: [Patch 3/7, Arm, GCC] Add testsuite library support for PACBTI target.
On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote: Hi, Add targeting-checking entities for PACBTI in testsuite framework. Tested on arm-none-eabi. OK for trunk? 2021-10-04 Tejas Belagod gcc/ChangeLog: * testsuite/lib/target-supports.exp (check_effective_target_arm_pacbti_hw): New. OK. R.
Re: [Patch 3/7, Arm, GCC] Add testsuite library support for PACBTI target.
On 11/10/2021 14:36, Richard Earnshaw via Gcc-patches wrote: On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote: Hi, Add targeting-checking entities for PACBTI in testsuite framework. Tested on arm-none-eabi. OK for trunk? 2021-10-04 Tejas Belagod gcc/ChangeLog: * testsuite/lib/target-supports.exp (check_effective_target_arm_pacbti_hw): New. OK. R. Oh, wait! Not OK. Needs documentation in sourcebuild.texi. R.
[Ada] Size of time_t in newer verions of VxWorks7
Set the default size of time_t_bits to match the standard runtimes. The size must match that which is used in the VSB, since the same VSB is used to build all of the runtimes. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * libgnat/s-parame__ae653.ads (time_t_bits): Change to Long_Long_Integer'Size. Add some comments to explain.diff --git a/gcc/ada/libgnat/s-parame__ae653.ads b/gcc/ada/libgnat/s-parame__ae653.ads --- a/gcc/ada/libgnat/s-parame__ae653.ads +++ b/gcc/ada/libgnat/s-parame__ae653.ads @@ -104,8 +104,12 @@ package System.Parameters is -- Characteristics of time_t type -- - time_t_bits : constant := Long_Integer'Size; - -- Number of bits in type time_t + -- IMPORTANT NOTE: + -- time_t_bits must match the size specified in the VSB. + + time_t_bits : constant := Long_Long_Integer'Size; + -- Number of bits in type time_t for SR0660 and newer, + -- with the default configuration of the VSB. -- -- Characteristics of types in Interfaces.C --
[Ada] Simplify code for checks within an initialization procedure
It is simpler to access the type of first formal using semantic instead of syntactic query. Behaviour is unaffected. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * sem_util.adb (Compile_Time_Constraint_Error): Simplify getting the type of the first formal parameter.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb --- a/gcc/ada/sem_util.adb +++ b/gcc/ada/sem_util.adb @@ -6590,9 +6590,7 @@ package body Sem_Util is if Inside_Init_Proc then declare Init_Proc_Type : constant Entity_Id := - Entity (Parameter_Type (First - (Parameter_Specifications -(Parent (Current_Scope_No_Loops); + Etype (First_Formal (Current_Scope_No_Loops)); Conc_Typ : constant Entity_Id := (if Present (Init_Proc_Type)
[Ada] Fix crash on array component with Default_Value
When complaining about a compile-time constraint error within a default initialization procedure we assumed that this procedure initializes a record object. However, it can initialize an array object too. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_util.adb (Inside_Init_Proc): Simplify. * sem_aggr.adb (Resolve_Record_Aggregate): Fix style. * sem_util.adb (Compile_Time_Constraint_Error): Guard against calling Corresponding_Concurrent_Type with an array type entity.diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb --- a/gcc/ada/exp_util.adb +++ b/gcc/ada/exp_util.adb @@ -7994,10 +7994,8 @@ package body Exp_Util is -- function Inside_Init_Proc return Boolean is - Proc : constant Entity_Id := Enclosing_Init_Proc; - begin - return Proc /= Empty; + return Present (Enclosing_Init_Proc); end Inside_Init_Proc; -- diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb --- a/gcc/ada/sem_aggr.adb +++ b/gcc/ada/sem_aggr.adb @@ -5307,8 +5307,8 @@ package body Sem_Aggr is Add_Association (Component => Component, -Expr => Empty, -Assoc_List => New_Assoc_List, +Expr => Empty, +Assoc_List => New_Assoc_List, Is_Box_Present => True); elsif Present (Parent (Component)) diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb --- a/gcc/ada/sem_util.adb +++ b/gcc/ada/sem_util.adb @@ -6589,11 +6589,16 @@ package body Sem_Util is if Inside_Init_Proc then declare + Init_Proc_Type : constant Entity_Id := + Entity (Parameter_Type (First + (Parameter_Specifications +(Parent (Current_Scope_No_Loops); + Conc_Typ : constant Entity_Id := - Corresponding_Concurrent_Type -(Entity (Parameter_Type (First - (Parameter_Specifications -(Parent (Current_Scope)); + (if Present (Init_Proc_Type) + and then Init_Proc_Type in E_Record_Type_Id +then Corresponding_Concurrent_Type (Init_Proc_Type) +else Empty); begin -- Don't complain if the corresponding concurrent type
[Ada] Do not clear Analyzed flag in expand if already set by preanalysis
During Expand, prevent the clearing of the Analyzed flag if it has already been set by Fold_Ureal. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * expander.adb (Expand): Skip clearing of Analyzed flag if already set for N_Real_Literal.diff --git a/gcc/ada/expander.adb b/gcc/ada/expander.adb --- a/gcc/ada/expander.adb +++ b/gcc/ada/expander.adb @@ -49,6 +49,7 @@ with Sem_Ch8;use Sem_Ch8; with Sem_Util; use Sem_Util; with Sinfo; use Sinfo; with Sinfo.Nodes;use Sinfo.Nodes; +with Stand; use Stand; with Table; package body Expander is @@ -152,7 +153,19 @@ package body Expander is -- not take place. This prevents cascaded errors due to stack mismatch. elsif not Expander_Active then - Set_Analyzed (N, Full_Analysis); + + -- Do not clear the Analyzed flag if it has been set on purpose + -- during preanalysis in Fold_Ureal. In that case, the Etype field + -- in N_Real_Literal will be set to something different than + -- Universal_Real. + + if Full_Analysis + or else not (Nkind (N) = N_Real_Literal + and then Present (Etype (N)) + and then Etype (N) /= Universal_Real) + then +Set_Analyzed (N, Full_Analysis); + end if; if Serious_Errors_Detected > 0 and then Scope_Is_Transient then Scope_Stack.Table
[Ada] RTEMS: use default stack checking emulation package
Remove the RTEMS specific version of System.Stack_Checking.Operations as the internal RTEMS API it uses can only detect stack overflow after the event, whereas the stack checking emulation is meant to detect the stack overflow before it occurs. Use the standard System.Stack_Checking.Operations package instead and include it's object file in the runtime library. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * Makefile.rtl (RTEMS): Add s-stchop.o to EXTRA_GNATRTL_NONTASKING_OBJS, remove s-stchop__rtems.adb. * libgnat/s-stchop__rtems.adb: Removed.diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl --- a/gcc/ada/Makefile.rtl +++ b/gcc/ada/Makefile.rtl @@ -2057,9 +2057,10 @@ ifeq ($(strip $(filter-out rtems%,$(target_os))),) s-taprop.adbhttp://www.gnu.org/licenses/>. -- --- -- --- GNARL was developed by the GNARL team at Florida State University. -- --- Extensive contributions were provided by Ada Core Technologies, Inc. -- --- -- --- - --- This is the RTEMS version of this package. --- This file should be kept synchronized with the general implementation --- provided by s-stchop.adb. - -pragma Restrictions (No_Elaboration_Code); --- We want to guarantee the absence of elaboration code because the --- binder does not handle references to this package. - -with Ada.Exceptions; - -with Interfaces.C; use Interfaces.C; - -package body System.Stack_Checking.Operations is - - - -- Invalidate_Stack_Cache -- - - - procedure Invalidate_Stack_Cache (Any_Stack : Stack_Access) is - pragma Warnings (Off, Any_Stack); - begin - Cache := Null_Stack; - end Invalidate_Stack_Cache; - - - - -- Notify_Stack_Attributes -- - - - - procedure Notify_Stack_Attributes - (Initial_SP : System.Address; - Size : System.Storage_Elements.Storage_Offset) - is - - -- RTEMS keeps all the information we need. - - pragma Unreferenced (Size); - pragma Unreferenced (Initial_SP); - - begin - null; - end Notify_Stack_Attributes; - - - - -- Stack_Check -- - - - - function Stack_Check - (Stack_Address : System.Address) return Stack_Access - is - pragma Unreferenced (Stack_Address); - - -- RTEMS has a routine to check if the stack is blown. - -- It returns a C99 bool. - function rtems_stack_checker_is_blown return Interfaces.C.unsigned_char; - pragma Import (C, - rtems_stack_checker_is_blown, "rtems_stack_checker_is_blown"); - - begin - -- RTEMS has a routine to check this. So use it. - - if rtems_stack_checker_is_blown /= 0 then - Ada.Exceptions.Raise_Exception - (E => Storage_Error'Identity, -Message => "stack overflow detected"); - end if; - - return null; - - end Stack_Check; - - - -- Update_Stack_Cache -- - - - procedure Update_Stack_Cache (Stack : Stack_Access) is - begin - if not Multi_Processor then - Cache := Stack; - end if; - end Update_Stack_Cache; - -end System.Stack_Checking.Operations;
[Ada] Simplify initialization of concurrent components
Concurrent record types are either task record types or protected record types. Now we detect them collectively (when looking for any of them) or exclusively (when looking for one or the other). Cleanup code related to fixes in expansion of boxes in record aggregates. Behaviour is unaffected. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_ch3.adb (Build_Init_Statements): Simplify detection of concurrent record types.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb --- a/gcc/ada/exp_ch3.adb +++ b/gcc/ada/exp_ch3.adb @@ -3206,9 +3206,7 @@ package body Exp_Ch3 is -- types moving any expanded code from the spec to the body of the -- init procedure. - if Is_Task_Record_Type (Rec_Type) - or else Is_Protected_Record_Type (Rec_Type) - then + if Is_Concurrent_Record_Type (Rec_Type) then declare Decl : constant Node_Id := Parent (Corresponding_Concurrent_Type (Rec_Type)); @@ -3589,12 +3587,11 @@ package body Exp_Ch3 is end loop; end if; end; - end if; -- For a protected type, add statements generated by -- Make_Initialize_Protection. - if Is_Protected_Record_Type (Rec_Type) then + elsif Is_Protected_Record_Type (Rec_Type) then Append_List_To (Stmts, Make_Initialize_Protection (Rec_Type)); end if;
[Ada] Remove redundant guard against an empty component list
There is no need to explicitly guard against an empty list where the subsequent iteration with First/Next/Present works fine. Cleanup related to expansion of aggregates in GNATprove mode; behaviour is unaffected. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_aggr.adb (Component_OK_For_Backend): Remove redundant guard.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb --- a/gcc/ada/exp_aggr.adb +++ b/gcc/ada/exp_aggr.adb @@ -8547,10 +8547,6 @@ package body Exp_Aggr is Expr_Q : Node_Id; begin - if No (Comps) then -return True; - end if; - C := First (Comps); while Present (C) loop
[Ada] Move rewriting of boxes in aggregates from resolution to expansion
Rewriting of boxes in record aggregates into the corresponding default values was done in resolution, where we special-cased access types and scalar types with a Default_Value aspect. However, this rewriting rather belong to expansion. Also, the special-casing didn't take Normalize_Scalars nor Initialize_Scalars pragmas into account and it didn't work for private types. Now the resolution keeps boxes that require simple initialization, while expansion reuses existing routines for initialization of record types. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_aggr.adb (Initialize_Record_Component): Add assertion about one of the parameters, so that illegal attempts to initialize record components with Empty node are detected early on. (Build_Record_Aggr_Code): Handle boxes in aggregate component associations just the components with no initialization in Build_Record_Init_Proc. * sem_aggr.adb (Resolve_Record_Aggregate): For components that require simple initialization carry boxes from resolution to expansion. * sem_util.adb (Needs_Simple_Initialization): Remove redundant paren.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb --- a/gcc/ada/exp_aggr.adb +++ b/gcc/ada/exp_aggr.adb @@ -3209,6 +3209,8 @@ package body Exp_Aggr is Init_Stmt : Node_Id; begin + pragma Assert (Nkind (Init_Expr) in N_Subexpr); + -- Protect the initialization statements from aborts. Generate: --Abort_Defer; @@ -3793,6 +3795,26 @@ package body Exp_Aggr is With_Default_Init => True, Constructor_Ref => Expression (Comp))); + elsif Box_Present (Comp) + and then Needs_Simple_Initialization (Etype (Selector)) + then +Comp_Expr := + Make_Selected_Component (Loc, +Prefix=> New_Copy_Tree (Target), +Selector_Name => New_Occurrence_Of (Selector, Loc)); + +Initialize_Record_Component + (Rec_Comp => Comp_Expr, + Comp_Typ => Etype (Selector), + Init_Expr => Get_Simple_Init_Val + (Typ => Etype (Selector), + N=> Comp, + Size => + (if Known_Esize (Selector) + then Esize (Selector) + else Uint_0)), + Stmts => L); + -- Ada 2005 (AI-287): For each default-initialized component generate -- a call to the corresponding IP subprogram if available. diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb --- a/gcc/ada/sem_aggr.adb +++ b/gcc/ada/sem_aggr.adb @@ -5387,74 +5387,12 @@ package body Sem_Aggr is Assoc_List => New_Assoc_List); Set_Has_Self_Reference (N); - -- A box-defaulted access component gets the value null. Also - -- included are components of private types whose underlying - -- type is an access type. In either case set the type of the - -- literal, for subsequent use in semantic checks. - - elsif Present (Underlying_Type (Ctyp)) - and then Is_Access_Type (Underlying_Type (Ctyp)) - then - -- If the component's type is private with an access type as - -- its underlying type then we have to create an unchecked - -- conversion to satisfy type checking. - - if Is_Private_Type (Ctyp) then - declare -Qual_Null : constant Node_Id := - Make_Qualified_Expression (Sloc (N), -Subtype_Mark => - New_Occurrence_Of -(Underlying_Type (Ctyp), Sloc (N)), -Expression => Make_Null (Sloc (N))); - -Convert_Null : constant Node_Id := - Unchecked_Convert_To - (Ctyp, Qual_Null); - - begin -Analyze_And_Resolve (Convert_Null, Ctyp); -Add_Association - (Component => Component, - Expr => Convert_Null, - Assoc_List => New_Assoc_List); - end; - - -- Otherwise the component type is non-private - - else - Expr := Make_Null (Sloc (N)); - Set_Etype (Expr, Ctyp); - - Add_Association - (Component => Component, -Expr =>
[Ada] Simplify detection of record components with default initialization
When detecting record components with default initialization we did two iteration over the component list; now we do only one. Also, there was no need to explicitly guard against an empty list where the subsequent iteration with First/Next/Present works fine. Cleanup related to expansion of aggregates in GNATprove mode; behaviour is unaffected. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_aggr.adb (Has_Default_Init_Comps): Simplify.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb --- a/gcc/ada/exp_aggr.adb +++ b/gcc/ada/exp_aggr.adb @@ -8897,46 +8897,41 @@ package body Exp_Aggr is function Has_Default_Init_Comps (N : Node_Id) return Boolean is - Comps : constant List_Id := Component_Associations (N); - C : Node_Id; + Assoc : Node_Id; Expr : Node_Id; + -- Component association and expression, respectively begin pragma Assert (Nkind (N) in N_Aggregate | N_Extension_Aggregate); - if No (Comps) then - return False; - end if; - if Has_Self_Reference (N) then return True; end if; - -- Check if any direct component has default initialized components + Assoc := First (Component_Associations (N)); + while Present (Assoc) loop + -- Each component association has either a box or an expression - C := First (Comps); - while Present (C) loop - if Box_Present (C) then -return True; - end if; + pragma Assert (Box_Present (Assoc) xor Present (Expression (Assoc))); - Next (C); - end loop; + -- Check if any direct component has default initialized components - -- Recursive call in case of aggregate expression + if Box_Present (Assoc) then +return True; - C := First (Comps); - while Present (C) loop - Expr := Expression (C); + -- Recursive call in case of aggregate expression - if Present (Expr) - and then Nkind (Expr) in N_Aggregate | N_Extension_Aggregate - and then Has_Default_Init_Comps (Expr) - then -return True; + else +Expr := Expression (Assoc); + +if Nkind (Expr) in N_Aggregate | N_Extension_Aggregate + and then Has_Default_Init_Comps (Expr) +then + return True; +end if; end if; - Next (C); + Next (Assoc); end loop; return False;
[Ada] Simplify detection of delayed aggregates
Replace IF with a single RETURN statement. Cleanup related to expansion of aggregates in GNATprove mode; semantics is unaffected. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_aggr.adb (Is_Delayed_Aggregate): Simplify.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb --- a/gcc/ada/exp_aggr.adb +++ b/gcc/ada/exp_aggr.adb @@ -8978,11 +8978,8 @@ package body Exp_Aggr is Kind := Nkind (Node); end if; - if Kind not in N_Aggregate | N_Extension_Aggregate then - return False; - else - return Expansion_Delayed (Node); - end if; + return Kind in N_Aggregate | N_Extension_Aggregate +and then Expansion_Delayed (Node); end Is_Delayed_Aggregate;
[Ada] Rewrite extended names in derived class-wide expressions
When building a derived class-wide pre- or postcondition we are mapping references to inherited formals and subprogram. Originally we only did it for simple names; recently we fixed this mapping to also work for operator symbols; with this patch we also do this for extended names. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_util.adb (Build_Class_Wide_Expression): Replace entities of both simple and extended names.diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb --- a/gcc/ada/exp_util.adb +++ b/gcc/ada/exp_util.adb @@ -1293,7 +1293,7 @@ package body Exp_Util is Adjust_Inherited_Pragma_Sloc (N); end if; - if Nkind (N) in N_Identifier | N_Operator_Symbol + if Nkind (N) in N_Identifier | N_Expanded_Name | N_Operator_Symbol and then Present (Entity (N)) and then (Is_Formal (Entity (N)) or else Is_Subprogram (Entity (N)))
[Ada] Reorder subprogram spec and bodies in alphabetical order
Required by the style guide and by future changes in this function. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * sem_ch4.adb (Analyze_Membership_Op): Reorder subprogram spec and bodies in alphabetical order.diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb --- a/gcc/ada/sem_ch4.adb +++ b/gcc/ada/sem_ch4.adb @@ -2956,47 +2956,16 @@ package body Sem_Ch4 is I_F : Interp_Index; T_F : Entity_Id; + procedure Analyze_Set_Membership; + -- If a set of alternatives is present, analyze each and find the + -- common type to which they must all resolve. + procedure Try_One_Interp (T1 : Entity_Id); -- Routine to try one proposed interpretation. Note that the context -- of the operation plays no role in resolving the arguments, so that -- if there is more than one interpretation of the operands that is -- compatible with a membership test, the operation is ambiguous. - - -- Try_One_Interp -- - - - procedure Try_One_Interp (T1 : Entity_Id) is - begin - if Has_Compatible_Type (R, T1) then -if Found - and then Base_Type (T1) /= Base_Type (T_F) -then - It := Disambiguate (L, I_F, Index, Any_Type); - - if It = No_Interp then - Ambiguous_Operands (N); - Set_Etype (L, Any_Type); - return; - - else - T_F := It.Typ; - end if; - -else - Found := True; - T_F := T1; - I_F := Index; -end if; - -Set_Etype (L, T_F); - end if; - end Try_One_Interp; - - procedure Analyze_Set_Membership; - -- If a set of alternatives is present, analyze each and find the - -- common type to which they must all resolve. - -- Analyze_Set_Membership -- @@ -3095,6 +3064,37 @@ package body Sem_Ch4 is end if; end Analyze_Set_Membership; + + -- Try_One_Interp -- + + + procedure Try_One_Interp (T1 : Entity_Id) is + begin + if Has_Compatible_Type (R, T1) then +if Found + and then Base_Type (T1) /= Base_Type (T_F) +then + It := Disambiguate (L, I_F, Index, Any_Type); + + if It = No_Interp then + Ambiguous_Operands (N); + Set_Etype (L, Any_Type); + return; + + else + T_F := It.Typ; + end if; + +else + Found := True; + T_F := T1; + I_F := Index; +end if; + +Set_Etype (L, T_F); + end if; + end Try_One_Interp; + Op : Node_Id; -- Start of processing for Analyze_Membership_Op
[Ada] Import binder globals as constant
The various __gl_XYZ binder globals prevent some link-time optimizations when imported as mutable. Work around this by turning them into constants. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * libgnarl/s-intman__android.adb, libgnarl/s-intman__lynxos.adb, libgnarl/s-intman__posix.adb, libgnarl/s-intman__qnx.adb, libgnarl/s-intman__solaris.adb, libgnarl/s-intman__susv3.adb, libgnarl/s-taprob.adb, libgnarl/s-taprop__hpux-dce.adb, libgnarl/s-taprop__linux.adb, libgnarl/s-taprop__mingw.adb, libgnarl/s-taprop__posix.adb, libgnarl/s-taprop__qnx.adb, libgnarl/s-taprop__solaris.adb, libgnarl/s-taprop__vxworks.adb, libgnarl/s-taskin.adb, libgnarl/s-tasque.adb, libgnarl/s-tpoben.adb, libgnat/a-calend.adb, libgnat/a-excach.adb, libgnat/a-except.adb, libgnat/a-tags.adb, libgnat/a-textio.adb, libgnat/a-witeio.adb, libgnat/a-ztexio.adb, libgnat/g-binenv.adb, libgnat/s-parame.adb, libgnat/s-parame__vxworks.adb, libgnat/s-stratt.adb, libgnat/s-trasym__dwarf.adb: Mark imported binder globals as constant.diff --git a/gcc/ada/libgnarl/s-intman__android.adb b/gcc/ada/libgnarl/s-intman__android.adb --- a/gcc/ada/libgnarl/s-intman__android.adb +++ b/gcc/ada/libgnarl/s-intman__android.adb @@ -68,7 +68,7 @@ package body System.Interrupt_Management is Exception_Interrupts : constant Interrupt_List := (SIGFPE, SIGILL, SIGSEGV, SIGBUS); - Unreserve_All_Interrupts : Interfaces.C.int; + Unreserve_All_Interrupts : constant Interfaces.C.int; pragma Import (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts"); diff --git a/gcc/ada/libgnarl/s-intman__lynxos.adb b/gcc/ada/libgnarl/s-intman__lynxos.adb --- a/gcc/ada/libgnarl/s-intman__lynxos.adb +++ b/gcc/ada/libgnarl/s-intman__lynxos.adb @@ -68,7 +68,7 @@ package body System.Interrupt_Management is Exception_Interrupts : constant Interrupt_List := (SIGFPE, SIGILL, SIGSEGV, SIGBUS); - Unreserve_All_Interrupts : Interfaces.C.int; + Unreserve_All_Interrupts : constant Interfaces.C.int; pragma Import (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts"); diff --git a/gcc/ada/libgnarl/s-intman__posix.adb b/gcc/ada/libgnarl/s-intman__posix.adb --- a/gcc/ada/libgnarl/s-intman__posix.adb +++ b/gcc/ada/libgnarl/s-intman__posix.adb @@ -68,7 +68,7 @@ package body System.Interrupt_Management is Exception_Interrupts : constant Interrupt_List := (SIGFPE, SIGILL, SIGSEGV, SIGBUS); - Unreserve_All_Interrupts : Interfaces.C.int; + Unreserve_All_Interrupts : constant Interfaces.C.int; pragma Import (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts"); diff --git a/gcc/ada/libgnarl/s-intman__qnx.adb b/gcc/ada/libgnarl/s-intman__qnx.adb --- a/gcc/ada/libgnarl/s-intman__qnx.adb +++ b/gcc/ada/libgnarl/s-intman__qnx.adb @@ -68,7 +68,7 @@ package body System.Interrupt_Management is Exception_Interrupts : constant Interrupt_List := (SIGFPE, SIGILL, SIGSEGV, SIGBUS); - Unreserve_All_Interrupts : Interfaces.C.int; + Unreserve_All_Interrupts : constant Interfaces.C.int; pragma Import (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts"); diff --git a/gcc/ada/libgnarl/s-intman__solaris.adb b/gcc/ada/libgnarl/s-intman__solaris.adb --- a/gcc/ada/libgnarl/s-intman__solaris.adb +++ b/gcc/ada/libgnarl/s-intman__solaris.adb @@ -47,7 +47,7 @@ package body System.Interrupt_Management is Exception_Interrupts : constant Interrupt_List := (SIGFPE, SIGILL, SIGSEGV, SIGBUS); - Unreserve_All_Interrupts : Interfaces.C.int; + Unreserve_All_Interrupts : constant Interfaces.C.int; pragma Import (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts"); diff --git a/gcc/ada/libgnarl/s-intman__susv3.adb b/gcc/ada/libgnarl/s-intman__susv3.adb --- a/gcc/ada/libgnarl/s-intman__susv3.adb +++ b/gcc/ada/libgnarl/s-intman__susv3.adb @@ -56,7 +56,7 @@ package body System.Interrupt_Management is use Interfaces.C; use System.OS_Interface; - Unreserve_All_Interrupts : Interfaces.C.int; + Unreserve_All_Interrupts : constant Interfaces.C.int; pragma Import (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts"); diff --git a/gcc/ada/libgnarl/s-taprob.adb b/gcc/ada/libgnarl/s-taprob.adb --- a/gcc/ada/libgnarl/s-taprob.adb +++ b/gcc/ada/libgnarl/s-taprob.adb @@ -47,7 +47,7 @@ package body System.Tasking.Protected_Objects is -- Local Data -- - Locking_Policy : Character; + Locking_Policy : constant Character; pragma Import (C, Locking_Policy, "__gl_locking_policy"); - diff --git a/gcc/ada/libgnarl/s-taprop__hpux-dce.adb b/gcc/ada/libgnarl/s-taprop__hpux-dce.adb --- a/gcc/ada/libgnarl/s-taprop__hpux-dce.adb +++ b/gcc/ada/libgnarl/s-taprop__hpux-dce.adb @@ -87,10 +87,10 @@ package body System.Task_Primitives.Operations is Unbl
[Ada] RTEMS: use hardware interrupts instead of signals for interrupt handling
RTEMS supports attaching interrupt handlers to hardware interrupt vectors, which is superior to the current approach of attaching handlers to signals. Direct attachment of handlers removes the execution overhead of converting hardware interrupts to signals and their subsequent propagation to the interrupt manager. It also removes the limitation of the number of hardware interrupts that can be supported under the signals model, as RTEMS is limited to 32 signals. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * Makefile.rtl (VxWorks): Rename s-inmaop__vxworks.adb to s-inmaop__hwint.adb. (RTEMS): Use s-inmaop__hwint.adb, s-intman__rtems.adb/s, s-taprop__rtems.adb. * libgnarl/a-intnam__rtems.ads: Remove signals definitions and replace with Hardware_Interrupts. * libgnarl/s-inmaop__vxworks.adb: Rename as... * libgnarl/s-inmaop__hwint.adb: ... this. * libgnarl/s-interr__hwint.adb: Remove unnecessary comments. * libgnarl/s-intman__rtems.ads, libgnarl/s-intman__rtems.adb: New files. * libgnarl/s-osinte__rtems.adb: Add RTEMS API bindings. (Binary_Semaphore_Create, Binary_Semaphore_Delete, Binary_Semaphore_Obtain, Binary_Semaphore_Release, Binary_Semaphore_Flush, Interrupt_Connect, Interrupt_Number_To_Vector): New functions. * libgnarl/s-osinte__rtems.ads (Num_HW_Interrupts, Signal): Removed. (NSIG, Interrupt_Range): New. (Binary_Semaphore_Create, Binary_Semaphore_Delete, Binary_Semaphore_Obtain, Binary_Semaphore_Release, Binary_Semaphore_Flush, Interrupt_Connect, Interrupt_Number_To_Vector): Remove Import pragma. * libgnarl/s-taprop__rtems.adb: New file. patch.diff.gz Description: application/gzip
[Ada] Fix internal error on fixed-point divide, multiply and scaling
This fixes a couple of long-standing oversights in the fixed-point multiply implementation that were recently copied into the divide implementation and thus made more visible: when computing the operand size for compile-time known values, the negative case must be taken into account and comparisons with powers of 2 must be strict. The patch also performs some refactoring. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_fixd.adb (Get_Size_For_Value): New function returning a size suitable for a non-negative integer value. (Get_Type_For_Size): New function returning a standard type suitable for a size. (Build_Divide): Call both functions to compute the result type, but make sure to pass a non-negative value to the first. (Build_Multiply): Likewise. (Do_Multiply_Fixed_Universal): Minor consistency tweak. (Integer_Literal): Call both functions to compute the type.diff --git a/gcc/ada/exp_fixd.adb b/gcc/ada/exp_fixd.adb --- a/gcc/ada/exp_fixd.adb +++ b/gcc/ada/exp_fixd.adb @@ -190,6 +190,15 @@ package body Exp_Fixd is -- The expression returned is neither analyzed nor resolved. The Etype -- of the result is properly set (to Universal_Real). + function Get_Size_For_Value (V : Uint) return Pos; + -- Given a non-negative universal integer value, return the size of a small + -- signed integer type covering -V .. V, or Pos'Max if no such type exists. + + function Get_Type_For_Size (Siz : Pos; Force : Boolean) return Entity_Id; + -- Return the smallest signed integer type containing at least Siz bits. + -- If no such type exists, return Empty if Force is False or the largest + -- signed integer type if Force is True. + function Integer_Literal (N: Node_Id; V: Uint; @@ -324,7 +333,6 @@ package body Exp_Fixd is Right_Type : constant Entity_Id := Base_Type (Etype (R)); Left_Size : Int; Right_Size : Int; - Rsize : Int; Result_Type : Entity_Id; Rnode : Node_Id; @@ -354,20 +362,17 @@ package body Exp_Fixd is -- the effective size of an operand is the RM_Size of the operand. -- But a special case arises with operands whose size is known at -- compile time. In this case, we can use the actual value of the - -- operand to get its size if it would fit in signed 8/16/32 bits. + -- operand to get a size if it would fit in a small signed integer. Left_Size := UI_To_Int (RM_Size (Left_Type)); if Compile_Time_Known_Value (L) then declare - Val : constant Uint := Expr_Value (L); + Siz : constant Int := + Get_Size_For_Value (UI_Abs (Expr_Value (L))); begin - if Val < Uint_2 ** 7 then - Left_Size := 8; - elsif Val < Uint_2 ** 15 then - Left_Size := 16; - elsif Val < Uint_2 ** 31 then - Left_Size := 32; + if Siz < Left_Size then + Left_Size := Siz; end if; end; end if; @@ -376,35 +381,19 @@ package body Exp_Fixd is if Compile_Time_Known_Value (R) then declare - Val : constant Uint := Expr_Value (R); + Siz : constant Int := + Get_Size_For_Value (UI_Abs (Expr_Value (R))); begin - if Val <= Int'(2 ** 7) then - Right_Size := 8; - elsif Val <= Int'(2 ** 15) then - Right_Size := 16; + if Siz < Right_Size then + Right_Size := Siz; end if; end; end if; -- Do the operation using the longer of the two sizes - Rsize := Int'Max (Left_Size, Right_Size); - - if Rsize <= 8 then -Result_Type := Standard_Integer_8; - - elsif Rsize <= 16 then -Result_Type := Standard_Integer_16; - - elsif Rsize <= 32 then -Result_Type := Standard_Integer_32; - - elsif Rsize <= 64 or else System_Max_Integer_Size < 128 then -Result_Type := Standard_Integer_64; - - else -Result_Type := Standard_Integer_128; - end if; + Result_Type := + Get_Type_For_Size (Int'Max (Left_Size, Right_Size), Force => True); Rnode := Make_Op_Divide (Loc, @@ -664,7 +653,6 @@ package body Exp_Fixd is Right_Type : constant Entity_Id := Etype (R); Left_Size : Int; Right_Size : Int; - Rsize : Int; Result_Type : Entity_Id; Rnode : Node_Id; @@ -697,20 +685,17 @@ package body Exp_Fixd is -- the effective size of an operand is the RM_Size of the operand. -- But a special case arises with operands whose size is known at
[Ada] Find an interpretation for membership test with a singleton value
When resolving type Color is (Blue, Orange); function Get_Color return Color is begin return Blue; end Get_Color; function Get_Color return String is begin return "Blue"; end Get_Color; Test : Boolean := Get_Color in Blue; we did not try all the possible interpretations of Get_Color but only the latest. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * sem_ch4.adb (Analyze_Membership_Op): Finds interpretation for the case of a membership test with a singleton value in case of overloading.diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb --- a/gcc/ada/sem_ch4.adb +++ b/gcc/ada/sem_ch4.adb @@ -2960,6 +2960,13 @@ package body Sem_Ch4 is -- If a set of alternatives is present, analyze each and find the -- common type to which they must all resolve. + procedure Find_Interpretation; + function Find_Interpretation return Boolean; + -- Routine and wrapper to find a matching interpretation in case + -- of overloading. The wrapper returns True iff a matching + -- interpretation is found. Beware, in absence of overloading, + -- using this function will break gnat's bootstrapping. + procedure Try_One_Interp (T1 : Entity_Id); -- Routine to try one proposed interpretation. Note that the context -- of the operation plays no role in resolving the arguments, so that @@ -3064,6 +3071,26 @@ package body Sem_Ch4 is end if; end Analyze_Set_Membership; + - + -- Find_Interpretation -- + - + + procedure Find_Interpretation is + begin + Get_First_Interp (L, Index, It); + while Present (It.Typ) loop +Try_One_Interp (It.Typ); +Get_Next_Interp (Index, It); + end loop; + end Find_Interpretation; + + function Find_Interpretation return Boolean is + begin + Find_Interpretation; + + return Found; + end Find_Interpretation; + -- Try_One_Interp -- @@ -3119,11 +3146,7 @@ package body Sem_Ch4 is Try_One_Interp (Etype (L)); else -Get_First_Interp (L, Index, It); -while Present (It.Typ) loop - Try_One_Interp (It.Typ); - Get_Next_Interp (Index, It); -end loop; +Find_Interpretation; end if; -- If not a range, it can be a subtype mark, or else it is a degenerate @@ -3139,13 +3162,14 @@ package body Sem_Ch4 is Find_Type (R); Check_Fully_Declared (Entity (R), R); - elsif Ada_Version >= Ada_2012 - and then Has_Compatible_Type (R, Etype (L)) + elsif Ada_Version >= Ada_2012 and then + ((Is_Overloaded (L) and then Find_Interpretation) or else + (not Is_Overloaded (L) and then Has_Compatible_Type (R, Etype (L then if Nkind (N) = N_In then - Op := Make_Op_Eq (Loc, Left_Opnd => L, Right_Opnd => R); + Op := Make_Op_Eq (Loc, Left_Opnd => L, Right_Opnd => R); else - Op := Make_Op_Ne (Loc, Left_Opnd => L, Right_Opnd => R); + Op := Make_Op_Ne (Loc, Left_Opnd => L, Right_Opnd => R); end if; if Is_Record_Or_Limited_Type (Etype (L)) then
[PATCH] middle-end/102682 - avoid invalid subreg on the LHS
The following avoids generating (insn 6 5 7 2 (set (subreg:OI (concatn/v:TI [ (reg:DI 92 [ buffer ]) (reg:DI 93 [ buffer+8 ]) ]) 0) (subreg:OI (reg/v:V8SI 85 [ __x ]) 0)) "t.ii":76:21 74 {*movoi_internal_avx} (nil)) via store_bit_field_1 when we try to store excess data into a register allocated temporary. The case was supposed to /* Use the subreg machinery either to narrow OP0 to the required words... but the check ensured only an register-aligned but not a large enough piece. The following adds such missed check which ends up decomposing the set to (insn 6 5 7 (set (subreg:DI (reg/v:TI 84 [ buffer ]) 0) (subreg:DI (reg/v:V8SI 85 [ __x ]) 0)) "t.ii":76:21 -1 (nil)) (insn 7 6 0 (set (subreg:DI (reg/v:TI 84 [ buffer ]) 8) (subreg:DI (reg/v:V8SI 85 [ __x ]) 8)) "t.ii":76:21 -1 (nil)) Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk? Thanks, Richard. 2021-10-11 Richard Biener PR middle-end/102682 * expmed.c (store_bit_field_1): Ensure a LHS subreg would not create a paradoxical subreg. --- gcc/expmed.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/expmed.c b/gcc/expmed.c index 59734d4841c..bbdd0e71d20 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -806,7 +806,8 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, } } else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, ®num) - && multiple_p (bitsize, regsize * BITS_PER_UNIT)) + && multiple_p (bitsize, regsize * BITS_PER_UNIT) + && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize)) { sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), regnum * regsize); -- 2.31.1
[Ada] Remove constant arguments
All these arguments were identified programmatically as being always used with the same value (often the default one). As such, they can be omitted. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * ali.adb (Get_Name): Ignore_Spaces is always False. * bindo-graphs.adb (Set_Is_Existing_Source_Target_Relation): Val is always True. * cstand.adb (New_Standard_Entity): New_Node_Kind is always N_Defininig_Identifier. * exp_ch3.adb (Predef_Stream_Attr_Spec): For_Body is always False. * exp_dist.adb (Add_Parameter_To_NVList): RACW_Ctrl is always False. * gnatls.adb (Add_Directories): Prepend is always False. * sem_ch10.adb, sem_ch10.ads (Load_Needed_Body): Do_Analyze is always True. * sem_ch3.adb, sem_ch3.ads (Process_Range_Expr_In_Decl): R_Check_Off is always False. * sem_elab.adb: (Info_Variable_Reference): Info_Msg is always False, In_SPARK is always True. (Set_Is_Traversed_Body, Set_Is_Saved_Construct, Set_Is_Saved_Relation): Val is always True. * treepr.adb (Visit_Descendant): No_Indent is always False. (Print_Node): Fmt does not need such a big scope. patch.diff.gz Description: application/gzip
[Ada] Simplify membership tests with N_Generic_Declaration
Use collective N_Generic_Declaration subtype instead of its members N_Generic_Subprogram_Declaration and N_Generic_Package_Declaration where reasonable. Code cleanup related to handling of Global contracts in generic units; semantics is unaffected. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * sem_ch10.adb, sem_prag.adb, sem_util.adb: Use N_Generic_Declaration in membership tests.diff --git a/gcc/ada/sem_ch10.adb b/gcc/ada/sem_ch10.adb --- a/gcc/ada/sem_ch10.adb +++ b/gcc/ada/sem_ch10.adb @@ -4162,8 +4162,7 @@ package body Sem_Ch10 is end if; if Ekind (P_Name) = E_Generic_Package -and then Nkind (Lib_Unit) not in N_Generic_Subprogram_Declaration - | N_Generic_Package_Declaration +and then Nkind (Lib_Unit) not in N_Generic_Declaration | N_Generic_Renaming_Declaration then Error_Msg_N @@ -6193,9 +6192,7 @@ package body Sem_Ch10 is ("subprogram not allowed in `LIMITED WITH` clause", N); return; - when N_Generic_Package_Declaration -| N_Generic_Subprogram_Declaration - => + when N_Generic_Declaration => Error_Msg_N ("generic not allowed in `LIMITED WITH` clause", N); return; diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -6678,9 +6678,7 @@ package body Sem_Prag is then Pragma_Misplaced; - elsif (Nkind (Parent_Node) = N_Generic_Package_Declaration - or else Nkind (Parent_Node) = - N_Generic_Subprogram_Declaration) + elsif Nkind (Parent_Node) in N_Generic_Declaration and then Plist = Generic_Formal_Declarations (Parent_Node) then Pragma_Misplaced; diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb --- a/gcc/ada/sem_util.adb +++ b/gcc/ada/sem_util.adb @@ -8030,8 +8030,7 @@ package body Sem_Util is if Present (Spec_Id) and then Nkind (Unit_Declaration_Node (Spec_Id)) in - N_Generic_Package_Declaration | - N_Generic_Subprogram_Declaration + N_Generic_Declaration then return Par; end if; @@ -8055,9 +8054,7 @@ package body Sem_Util is begin Par := Parent (N); while Present (Par) loop - if Nkind (Par) in N_Generic_Package_Declaration - | N_Generic_Subprogram_Declaration - then + if Nkind (Par) in N_Generic_Declaration then return Par; elsif Nkind (Par) in N_Package_Body | N_Subprogram_Body then @@ -8066,9 +8063,7 @@ package body Sem_Util is if Present (Spec_Id) then Spec_Decl := Unit_Declaration_Node (Spec_Id); - if Nkind (Spec_Decl) in N_Generic_Package_Declaration - | N_Generic_Subprogram_Declaration - then + if Nkind (Spec_Decl) in N_Generic_Declaration then return Spec_Decl; end if; end if; @@ -17891,9 +17886,7 @@ package body Sem_Util is -- a generic body modifies the Ekind of its spec to allow for recursive -- calls. - return -Nkind (Spec_Decl) in N_Generic_Package_Declaration - | N_Generic_Subprogram_Declaration; + return Nkind (Spec_Decl) in N_Generic_Declaration; end Is_Generic_Declaration_Or_Body; ---
[Ada] RTEMS: use regular RTEMS API for minimum stack size calculation
Use _POSIX_Threads_Minimum_stack_size instead of ada_pthread_minimum_stack_size so the runtime does not require the RTEMS kernel to be configured to have Ada support. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * libgnat/s-parame__rtems.adb: use _POSIX_Threads_Minimum_stack_size instead of ada_pthread_minimum_stack_size.diff --git a/gcc/ada/libgnat/s-parame__rtems.adb b/gcc/ada/libgnat/s-parame__rtems.adb --- a/gcc/ada/libgnat/s-parame__rtems.adb +++ b/gcc/ada/libgnat/s-parame__rtems.adb @@ -35,10 +35,6 @@ with Interfaces.C; package body System.Parameters is - function ada_pthread_minimum_stack_size return Interfaces.C.size_t; - pragma Import (C, ada_pthread_minimum_stack_size, - "_ada_pthread_minimum_stack_size"); - - -- Adjust_Storage_Size -- - @@ -61,8 +57,15 @@ package body System.Parameters is function Default_Stack_Size return Size_Type is + Default_Stack_Size : constant Integer +with Import, Convention => C, + External_Name => "__gl_default_stack_size"; begin - return Size_Type (ada_pthread_minimum_stack_size); + if Default_Stack_Size = -1 then + return 32 * 1024; + else + return Size_Type (Default_Stack_Size); + end if; end Default_Stack_Size; @@ -70,9 +73,11 @@ package body System.Parameters is function Minimum_Stack_Size return Size_Type is - + POSIX_Threads_Minimum_stack_size : constant Interfaces.C.size_t +with Import, Convention => C, + External_Name => "_POSIX_Threads_Minimum_stack_size"; begin - return Size_Type (ada_pthread_minimum_stack_size); + return Size_Type (POSIX_Threads_Minimum_stack_size); end Minimum_Stack_Size; end System.Parameters;
[Ada] Incorrect Dynamic_Predicate results for static arguments
In determining at run time whether a statically-known discrete value satisifies the predicate of a subtype where both - a Dynamic_Predicate aspect specification applies (directly or indirectly) to a subtype; and - at least one other predicate aspect specification (that is, either a Static_Predicate aspect specification, a GNAT-defined Predicate aspect specification, or a second Dynamic_Predicate aspect specification) applies (directly or indirectly) to that same subtype, sometimes only the "last" Dynamic_Predicate aspect's condition was checked; the other predicate aspects were incorrectly ignored. This could result in a subtype membership test incorrectly yielding a result of True. This error is corrected. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_ch6.adb (Can_Fold_Predicate_Call): Do not attempt folding if there is more than one predicate involved. Recall that predicate aspect specification are additive, not overriding, and that there are three different predicate aspects (Dynamic_Predicate, Static_Predicate, and the GNAT-defined Predicate aspect). These various ways of introducing multiple predicates are all checked for. A new nested function, Augments_Other_Dynamic_Predicate, is introduced. * sem_ch4.adb (Analyze_Indexed_Component_Form.Process_Function_Call): When determining whether a name like "X (Some_Discrete_Type)" might be interpreted as a slice, the answer should be "no" if the type/subtype name denotes the current instance of type/subtype.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb --- a/gcc/ada/exp_ch6.adb +++ b/gcc/ada/exp_ch6.adb @@ -3143,6 +3143,13 @@ package body Exp_Ch6 is function Can_Fold_Predicate_Call (P : Entity_Id) return Boolean is Actual : Node_Id; + function Augments_Other_Dynamic_Predicate (DP_Aspect_Spec : Node_Id) + return Boolean; + -- Given a Dynamic_Predicate aspect aspecification for a + -- discrete type, returns True iff another DP specification + -- applies (indirectly, via a subtype type or a derived type) + -- to the same entity that this aspect spec applies to. + function May_Fold (N : Node_Id) return Traverse_Result; -- The predicate expression is foldable if it only contains operators -- and literals. During this check, we also replace occurrences of @@ -3150,6 +3157,36 @@ package body Exp_Ch6 is -- value of the actual. This is done on a copy of the analyzed -- expression for the predicate. + -- + -- Augments_Other_Dynamic_Predicate -- + -- + + function Augments_Other_Dynamic_Predicate (DP_Aspect_Spec : Node_Id) + return Boolean + is +Aspect_Bearer : Entity_Id := Entity (DP_Aspect_Spec); + begin +loop + Aspect_Bearer := Nearest_Ancestor (Aspect_Bearer); + + if not Present (Aspect_Bearer) then + return False; + end if; + + declare + Aspect_Spec : constant Node_Id := +Find_Aspect (Aspect_Bearer, Aspect_Dynamic_Predicate); + begin + if Present (Aspect_Spec) +and then Aspect_Spec /= DP_Aspect_Spec + then + -- Found another Dynamic_Predicate aspect spec + return True; + end if; + end; +end loop; + end Augments_Other_Dynamic_Predicate; + -- -- May_Fold -- -- @@ -3192,7 +3229,7 @@ package body Exp_Ch6 is function Try_Fold is new Traverse_Func (May_Fold); - -- Other lLocal variables + -- Other Local variables Subt : constant Entity_Id := Etype (First_Entity (P)); Aspect : Node_Id; @@ -3220,6 +3257,11 @@ package body Exp_Ch6 is or else Nkind (Actual) /= N_Integer_Literal or else not Has_Dynamic_Predicate_Aspect (Subt) or else No (Aspect) + + -- Do not fold if multiple applicable predicate aspects + or else Present (Find_Aspect (Subt, Aspect_Static_Predicate)) + or else Present (Find_Aspect (Subt, Aspect_Predicate)) + or else Augments_Other_Dynamic_Predicate (Aspect) or else CodePeer_Mode then return False; diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb --- a/gcc/ada/sem_ch4.adb +++ b/gcc/ada/sem_ch4.adb @@ -2534,6 +2534,7 @@ package body Sem_Ch4 is and then Is_Entity_Name (Actual) and then Is_Type (Entity (Actual)) and then Is_Discrete_Type (Entity (Actual)) + and the
[Ada] Warn about conversion with any predefined time types
We already had a warning for unchecked conversions that involve the private type Ada.Calendar.Time, whose representation might differ between releases and targets of the compiler. Now this warning is extended to Ada.Real_Time.Time and Ada.Real_Time.Time_Span, which is similarly non-portable. Previously the warning message referred to Time with no quotes; now all the type names are in quotes, both because that's how we refer to entity names in messages and because it is actually hard to omit the quotes with the current API for error reporting. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * sem_ch13.adb (Validate_Unchecked_Conversion): Simplify code for detecting conversions with Ada.Calendar.Time type and extend it to similar types in the Ada.Real_Time package.diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb --- a/gcc/ada/sem_ch13.adb +++ b/gcc/ada/sem_ch13.adb @@ -17335,8 +17335,32 @@ package body Sem_Ch13 is is Source : Entity_Id; Target : Entity_Id; + + procedure Warn_Nonportable (RE : RE_Id); + -- Warn if either source or target of the conversion is a predefined + -- private type, whose representation might differ between releases and + -- targets of the compiler. + + -- + -- Warn_Nonportable -- + -- + + procedure Warn_Nonportable (RE : RE_Id) is + begin + if Is_RTE (Source, RE) or else Is_RTE (Target, RE) then +pragma Assert (Is_Private_Type (RTE (RE))); +Error_Msg_NE + ("?z?representation of & values may change between " + & "'G'N'A'T versions", N, RTE (RE)); + end if; + end Warn_Nonportable; + + -- Local variables + Vnode : Node_Id; + -- Start of processing for Validate_Unchecked_Conversion + begin -- Obtain source and target types. Note that we call Ancestor_Subtype -- here because the processing for generic instantiation always makes @@ -17353,6 +17377,18 @@ package body Sem_Ch13 is return; end if; + -- Warn if one of the operands is a private type declared in + -- Ada.Calendar or Ada.Real_Time. Do not emit a warning when compiling + -- GNAT-related sources. + + if Warn_On_Unchecked_Conversion +and then not In_Predefined_Unit (N) + then + Warn_Nonportable (RO_CA_Time); + Warn_Nonportable (RO_RT_Time); + Warn_Nonportable (RE_Time_Span); + end if; + -- If we are dealing with private types, then do the check on their -- fully declared counterparts if the full declarations have been -- encountered (they don't have to be visible, but they must exist). @@ -17399,32 +17435,6 @@ package body Sem_Ch13 is end if; end if; - -- Warn if one of the operands is Ada.Calendar.Time. Do not emit a - -- warning when compiling GNAT-related sources. - - if Warn_On_Unchecked_Conversion -and then not In_Predefined_Unit (N) -and then RTU_Loaded (Ada_Calendar) -and then (Chars (Source) = Name_Time -or else - Chars (Target) = Name_Time) - then - -- If Ada.Calendar is loaded and the name of one of the operands is - -- Time, there is a good chance that this is Ada.Calendar.Time. - - declare -Calendar_Time : constant Entity_Id := Full_View (RTE (RO_CA_Time)); - begin -pragma Assert (Present (Calendar_Time)); - -if Source = Calendar_Time or else Target = Calendar_Time then - Error_Msg_N - ("?z?representation of 'Time values may change between " - & "'G'N'A'T versions", N); -end if; - end; - end if; - -- Make entry in unchecked conversion table for later processing by -- Validate_Unchecked_Conversions, which will check sizes and alignments -- (using values set by the back end where possible). This is only done
[Ada] Valid postconditions incorrectly rejected.
For users, 'Old attribute references are only allowed within postcondition expressions. Internally, the FE may build trees that transiently (before some subsequent transformation) violate these rules; this is ok, but these violations were being incorrectly flagged in some cases. Fix this problem. The customer's example for this ticket also demonstrates a second problem. Exp_Util.Insert_Actions was willing to take an action (e.g., the constraint check for an array indexing expression) that contains a reference to the loop parameter of a N_Iterated_Component/Element_Association and insert it in the tree somewhere above that node, so that the reference ends up outside of the scope of the declaration it refers to. This leads to a bugbox failure (gigi is understandably unhappy with the resulting malformed tree). Fix this too. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * sem_attr.adb (Analyze_Attribute_Old_Result): Permit an attribute reference inside a compiler-generated _Postconditions procedure. In this case, Subp_Decl is assigned the declaration of the enclosing subprogram. * exp_util.adb (Insert_Actions): When climbing up the tree looking for an insertion point, do not climb past an N_Iterated_Component/Element_Association, since this could result in inserting a reference to a loop parameter at a location outside of the scope of that loop parameter. On the other hand, be careful to preserve existing behavior in the case of an N_Component_Association node.diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb --- a/gcc/ada/exp_util.adb +++ b/gcc/ada/exp_util.adb @@ -7619,8 +7619,18 @@ package body Exp_Util is | N_Iterated_Component_Association | N_Iterated_Element_Association => - if Nkind (Parent (P)) = N_Aggregate - and then Present (Loop_Actions (P)) + if Nkind (Parent (P)) in N_Aggregate | N_Delta_Aggregate + + -- We must not climb up out of an N_Iterated_xxx_Association + -- because the actions might contain references to the loop + -- parameter. But it turns out that setting the Loop_Actions + -- attribute in the case of an N_Component_Association + -- when the attribute was not already set can lead to + -- (as yet not understood) bugboxes (gcc failures that are + -- presumably due to malformed trees). So we don't do that. + + and then (Nkind (P) /= N_Component_Association +or else Present (Loop_Actions (P))) then if Is_Empty_List (Loop_Actions (P)) then Set_Loop_Actions (P, Ins_Actions); diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb --- a/gcc/ada/sem_attr.adb +++ b/gcc/ada/sem_attr.adb @@ -1413,6 +1413,15 @@ package body Sem_Attr is return; end if; + -- 'Old attribute reference ok in a _Postconditions procedure + + elsif Nkind (Prag) = N_Subprogram_Body + and then not Comes_From_Source (Prag) + and then Nkind (Corresponding_Spec (Prag)) = N_Defining_Identifier + and then Chars (Corresponding_Spec (Prag)) = Name_uPostconditions + then +null; + -- Otherwise the placement of the attribute is illegal else @@ -1424,6 +1433,15 @@ package body Sem_Attr is if Nkind (Prag) = N_Aspect_Specification then Subp_Decl := Parent (Prag); + elsif Nkind (Prag) = N_Subprogram_Body then +declare + Enclosing_Scope : constant Node_Id := + Scope (Corresponding_Spec (Prag)); +begin + pragma Assert (Postconditions_Proc (Enclosing_Scope) + = Corresponding_Spec (Prag)); + Subp_Decl := Parent (Parent (Enclosing_Scope)); +end; else Subp_Decl := Find_Related_Declaration_Or_Body (Prag); end if;
[Ada] Runtime transition: System.Threads
Rewrite the former System.Threads implementation for AE653 to work on the new Light runtime for VxWworks7r2Cert. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * libgnat/s-thread.ads: Fix comments. Remove unused package imports. (Thread_Body_Exception_Exit): Remove Exception_Occurrence parameter. (ATSD): Declare type locally. * libgnat/s-thread__ae653.adb: Fix comments. Remove unused package imports. Remove package references to Stack_Limit checking. (Install_Handler): Remove. (Set_Sec_Stack): Likewise. (Thread_Body_Enter): Remove calls to Install_Handler and Stack_Limit checking. (Thread_Body_Exception_Exit): Remove Exception_Occurrence parameter. (Init_RTS): Call local Get_Sec_Stack. Remove call to Install_Handler. Remove references to accessors for Get_Sec_Stack and Set_Sec_Stack. Remove OS check. (Set_Sec_Stack): Remove.diff --git a/gcc/ada/libgnat/s-thread.ads b/gcc/ada/libgnat/s-thread.ads --- a/gcc/ada/libgnat/s-thread.ads +++ b/gcc/ada/libgnat/s-thread.ads @@ -34,16 +34,13 @@ -- This package is currently implemented for: ---VxWorks AE653 rts-cert ---VxWorks AE653 rts-full (not rts-kernel) +--VxWorks7r2Cert Light -with Ada.Exceptions; with Ada.Unchecked_Conversion; with Interfaces.C; with System.Secondary_Stack; -with System.Soft_Links; package System.Threads is @@ -81,12 +78,15 @@ package System.Threads is procedure Thread_Body_Leave; -- Leave thread body (normally), see above for details - procedure Thread_Body_Exceptional_Exit - (EO : Ada.Exceptions.Exception_Occurrence); + procedure Thread_Body_Exceptional_Exit; -- Leave thread body (abnormally on exception), see above for details private - type ATSD is new System.Soft_Links.TSD; + type ATSD is record + Sec_Stack_Ptr : SST.SS_Stack_Ptr; + -- Pointer of the allocated secondary stack + + end record; end System.Threads; diff --git a/gcc/ada/libgnat/s-thread__ae653.adb b/gcc/ada/libgnat/s-thread__ae653.adb --- a/gcc/ada/libgnat/s-thread__ae653.adb +++ b/gcc/ada/libgnat/s-thread__ae653.adb @@ -29,22 +29,19 @@ -- -- -- --- This is the VxWorks 653 version of this package +-- This is the VxWorks7r2Cert Light version of this package pragma Restrictions (No_Tasking); --- The VxWorks 653 version of this package is intended only for programs --- which do not use Ada tasking. This restriction ensures that this --- will be checked by the binder. +-- The VxWorks7r2Cert Light version of this package is intended only +-- for programs which do not use Ada tasking. This restriction ensures +-- that this will be checked by the binder. with System.Storage_Elements; use System.Storage_Elements; -with System.OS_Versions; use System.OS_Versions; package body System.Threads is use Interfaces.C; - package SSL renames System.Soft_Links; - Main_ATSD : aliased ATSD; -- TSD for environment task @@ -52,21 +49,7 @@ package body System.Threads is pragma Thread_Local_Storage (Current_ATSD); -- pragma TLS needed since TaskVarAdd no longer available - -- Assume guard pages for Helix APEX partitions, but leave - -- checking mechanism in for now, in case of surprises. ??? - Stack_Limit : Address; - pragma Import (C, Stack_Limit, "__gnat_stack_limit"); - - type Set_Stack_Limit_Proc_Acc is access procedure; - pragma Convention (C, Set_Stack_Limit_Proc_Acc); - - Set_Stack_Limit_Hook : Set_Stack_Limit_Proc_Acc; - pragma Import (C, Set_Stack_Limit_Hook, "__gnat_set_stack_limit_hook"); - -- Procedure to be called when a task is created to set stack limit if - -- limit checking is used. - -- VxWorks specific API - ERROR : constant STATUS := Interfaces.C.int (-1); OK: constant STATUS := Interfaces.C.int (0); @@ -85,13 +68,8 @@ package body System.Threads is -- It installs System.Threads versions of certain operations of the -- run-time lib. - procedure Install_Handler; - pragma Import (C, Install_Handler, "__gnat_install_handler"); - function Get_Sec_Stack return SST.SS_Stack_Ptr; - procedure Set_Sec_Stack (Stack : SST.SS_Stack_Ptr); - --- -- Thread_Body_Enter -- --- @@ -108,27 +86,14 @@ package body System.Threads is ATSD.Sec_Stack_Ptr := Sec_Stack_Ptr; SST.SS_Init (ATSD.Sec_Stack_Ptr); Current_ATSD := Process_ATSD_Address; - Install_Handler; - - -- Assume guard pages for Helix/Vx7, but leave in for now ??? - -- Initialize stack limit if needed. - if Current_ATSD /= Main_ATSD'Address -and then Set_Stack_Limit_Hook /= null - then - Set_Stack_Limit_Hook.all; - en
[Ada] Remove redundant guard in expansion of dispatching calls
Routines Make_Predefined_Primitive_Specs and Predefined_Primitive_Bodies, which create predefined primitives for derived tagged types, are only called when restriction No_Dispatching_Calls is inactive. There is no need to recheck this restriction when creating individual primitive operations related to tasking. Code cleanup related to handling of dispatching equality in SPARK. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * exp_ch3.adb (Make_Predefined_Primitive_Specs, Predefined_Primitive_Bodies): Remove guard with restriction No_Dispatching_Calls.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb --- a/gcc/ada/exp_ch3.adb +++ b/gcc/ada/exp_ch3.adb @@ -10611,11 +10611,9 @@ package body Exp_Ch3 is --Disp_Requeue --Disp_Timed_Select - -- Disable the generation of these bodies if No_Dispatching_Calls, - -- Ravenscar or ZFP is active. + -- Disable the generation of these bodies if Ravenscar or ZFP is active if Ada_Version >= Ada_2005 -and then not Restriction_Active (No_Dispatching_Calls) and then not Restriction_Active (No_Select_Statements) and then RTE_Available (RE_Select_Specific_Data) then @@ -11094,8 +11092,7 @@ package body Exp_Ch3 is -- The interface versions will have null bodies - -- Disable the generation of these bodies if No_Dispatching_Calls, - -- Ravenscar or ZFP is active. + -- Disable the generation of these bodies if Ravenscar or ZFP is active -- In VM targets we define these primitives in all root tagged types -- that are not interface types. Done because in VM targets we don't @@ -4,7 +1,6 @@ package body Exp_Ch3 is or else (not Tagged_Type_Expansion and then Tag_Typ = Root_Type (Tag_Typ))) -and then not Restriction_Active (No_Dispatching_Calls) and then not Restriction_Active (No_Select_Statements) and then RTE_Available (RE_Select_Specific_Data) then
[Ada] Fix for atomic wrongly rejected on object of discriminated type
The reason is that the automatic alignment promotion is not yet performed in the case where the nominal subtype is of variable size. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * gcc-interface/decl.c (promote_object_alignment): Add GNU_SIZE parameter and use it for the size of the object if not null. (gnat_to_gnu_entity) : Perform the automatic alignment promotion for objects whose nominal subtype is of variable size. (gnat_to_gnu_field): Adjust call to promote_object_alignment.diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -239,7 +239,7 @@ static tree validate_size (Uint, tree, Entity_Id, enum tree_code, bool, bool, const char *, const char *); static void set_rm_size (Uint, tree, Entity_Id); static unsigned int validate_alignment (Uint, Entity_Id, unsigned int); -static unsigned int promote_object_alignment (tree, Entity_Id); +static unsigned int promote_object_alignment (tree, tree, Entity_Id); static void check_ok_for_atomic_type (tree, Entity_Id, bool); static tree create_field_decl_from (tree, tree, tree, tree, tree, vec); @@ -897,7 +897,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) or a reference to another object, and the size of its type is a constant, set the alignment to the smallest one which is not smaller than the size, with an appropriate cap. */ - if (!gnu_size && align == 0 + if (!Known_Esize (gnat_entity) + && !Known_Alignment (gnat_entity) && (Is_Full_Access (gnat_entity) || (!Optimize_Alignment_Space (gnat_entity) && kind != E_Exception @@ -908,8 +909,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) && !imported_p && No (gnat_renamed_obj) && No (Address_Clause (gnat_entity - && TREE_CODE (TYPE_SIZE (gnu_type)) == INTEGER_CST) - align = promote_object_alignment (gnu_type, gnat_entity); + && (TREE_CODE (TYPE_SIZE (gnu_type)) == INTEGER_CST || gnu_size)) + align = promote_object_alignment (gnu_type, gnu_size, gnat_entity); /* If the object is set to have atomic components, find the component type and validate it. @@ -7322,7 +7323,7 @@ gnat_to_gnu_field (Entity_Id gnat_field, tree gnu_record_type, int packed, if (Is_Full_Access (gnat_field)) { const unsigned int align - = promote_object_alignment (gnu_field_type, gnat_field); + = promote_object_alignment (gnu_field_type, NULL_TREE, gnat_field); if (align > 0) gnu_field_type = maybe_pad_type (gnu_field_type, NULL_TREE, align, gnat_field, @@ -9393,11 +9394,11 @@ validate_alignment (Uint alignment, Entity_Id gnat_entity, unsigned int align) return align; } -/* Promote the alignment of GNU_TYPE corresponding to GNAT_ENTITY. Return - a positive value on success or zero on failure. */ +/* Promote the alignment of GNU_TYPE for an object with GNU_SIZE corresponding + to GNAT_ENTITY. Return a positive value on success or zero on failure. */ static unsigned int -promote_object_alignment (tree gnu_type, Entity_Id gnat_entity) +promote_object_alignment (tree gnu_type, tree gnu_size, Entity_Id gnat_entity) { unsigned int align, size_cap, align_cap; @@ -9418,14 +9419,17 @@ promote_object_alignment (tree gnu_type, Entity_Id gnat_entity) align_cap = get_mode_alignment (ptr_mode); } + if (!gnu_size) +gnu_size = TYPE_SIZE (gnu_type); + /* Do the promotion within the above limits. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (gnu_type)) - || compare_tree_int (TYPE_SIZE (gnu_type), size_cap) > 0) + if (!tree_fits_uhwi_p (gnu_size) + || compare_tree_int (gnu_size, size_cap) > 0) align = 0; - else if (compare_tree_int (TYPE_SIZE (gnu_type), align_cap) > 0) + else if (compare_tree_int (gnu_size, align_cap) > 0) align = align_cap; else -align = ceil_pow2 (tree_to_uhwi (TYPE_SIZE (gnu_type))); +align = ceil_pow2 (tree_to_uhwi (gnu_size)); /* But make sure not to under-align the object. */ if (align <= TYPE_ALIGN (gnu_type))
[Ada] Tweak the warning about missing local raises
This prevents the warning from being given when there may still be regular exception handlers in the code, although some of them have been turned into local raises, by querying the predicate that determines whether such regular handlers are removed or not in the front-end. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * gcc-interface/trans.c (gnat_to_gnu) : Given the warning only if No_Exception_Propagation is active. : Likewise. : Likewise.diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c --- a/gcc/ada/gcc-interface/trans.c +++ b/gcc/ada/gcc-interface/trans.c @@ -7872,21 +7872,24 @@ gnat_to_gnu (Node_Id gnat_node) case N_Pop_Constraint_Error_Label: gnat_temp = gnu_constraint_error_label_stack.pop (); if (Present (gnat_temp) - && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false))) + && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false)) + && No_Exception_Propagation_Active ()) Warn_If_No_Local_Raise (gnat_temp); break; case N_Pop_Storage_Error_Label: gnat_temp = gnu_storage_error_label_stack.pop (); if (Present (gnat_temp) - && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false))) + && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false)) + && No_Exception_Propagation_Active ()) Warn_If_No_Local_Raise (gnat_temp); break; case N_Pop_Program_Error_Label: gnat_temp = gnu_program_error_label_stack.pop (); if (Present (gnat_temp) - && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false))) + && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false)) + && No_Exception_Propagation_Active ()) Warn_If_No_Local_Raise (gnat_temp); break;
[Ada] Fix problematic import of type-generic GCC atomic builtin
This implements the support for most type-generic GCC atomic builtins. Tested on x86_64-pc-linux-gnu, committed on trunk gcc/ada/ * gcc-interface/gigi.h (resolve_atomic_size): Declare. (list_third): New inline function. * gcc-interface/decl.c (type_for_atomic_builtin_p): New function. (resolve_atomic_builtin): Likewise. (gnat_to_gnu_subprog_type): Perform type resolution for most of type-generic GCC atomic builtins and give an error for the rest. * gcc-interface/utils2.c (resolve_atomic_size): Make public.diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -241,6 +241,8 @@ static void set_rm_size (Uint, tree, Entity_Id); static unsigned int validate_alignment (Uint, Entity_Id, unsigned int); static unsigned int promote_object_alignment (tree, tree, Entity_Id); static void check_ok_for_atomic_type (tree, Entity_Id, bool); +static bool type_for_atomic_builtin_p (tree); +static tree resolve_atomic_builtin (enum built_in_function, tree); static tree create_field_decl_from (tree, tree, tree, tree, tree, vec); static tree create_rep_part (tree, tree, tree); @@ -6312,14 +6314,106 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition, the checker is expected to post diagnostics in this case. */ if (gnu_builtin_decl) { - const intrin_binding_t inb - = { gnat_subprog, gnu_type, TREE_TYPE (gnu_builtin_decl) }; - - if (!intrin_profiles_compatible_p (&inb)) - post_error - ("??profile of& doesn''t match the builtin it binds!", - gnat_subprog); - return gnu_builtin_decl; + if (fndecl_built_in_p (gnu_builtin_decl, BUILT_IN_NORMAL)) + { + const enum built_in_function fncode + = DECL_FUNCTION_CODE (gnu_builtin_decl); + + switch (fncode) + { + case BUILT_IN_SYNC_FETCH_AND_ADD_N: + case BUILT_IN_SYNC_FETCH_AND_SUB_N: + case BUILT_IN_SYNC_FETCH_AND_OR_N: + case BUILT_IN_SYNC_FETCH_AND_AND_N: + case BUILT_IN_SYNC_FETCH_AND_XOR_N: + case BUILT_IN_SYNC_FETCH_AND_NAND_N: + case BUILT_IN_SYNC_ADD_AND_FETCH_N: + case BUILT_IN_SYNC_SUB_AND_FETCH_N: + case BUILT_IN_SYNC_OR_AND_FETCH_N: + case BUILT_IN_SYNC_AND_AND_FETCH_N: + case BUILT_IN_SYNC_XOR_AND_FETCH_N: + case BUILT_IN_SYNC_NAND_AND_FETCH_N: + case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N: + case BUILT_IN_SYNC_LOCK_TEST_AND_SET_N: + case BUILT_IN_ATOMIC_EXCHANGE_N: + case BUILT_IN_ATOMIC_LOAD_N: + case BUILT_IN_ATOMIC_ADD_FETCH_N: + case BUILT_IN_ATOMIC_SUB_FETCH_N: + case BUILT_IN_ATOMIC_AND_FETCH_N: + case BUILT_IN_ATOMIC_NAND_FETCH_N: + case BUILT_IN_ATOMIC_XOR_FETCH_N: + case BUILT_IN_ATOMIC_OR_FETCH_N: + case BUILT_IN_ATOMIC_FETCH_ADD_N: + case BUILT_IN_ATOMIC_FETCH_SUB_N: + case BUILT_IN_ATOMIC_FETCH_AND_N: + case BUILT_IN_ATOMIC_FETCH_NAND_N: + case BUILT_IN_ATOMIC_FETCH_XOR_N: + case BUILT_IN_ATOMIC_FETCH_OR_N: + /* This is a generic builtin overloaded on its return + type, so do type resolution based on it. */ + if (!VOID_TYPE_P (gnu_return_type) + && type_for_atomic_builtin_p (gnu_return_type)) + gnu_builtin_decl + = resolve_atomic_builtin (fncode, gnu_return_type); + else + { + post_error + ("??cannot import type-generic 'G'C'C builtin!", + gnat_subprog); + post_error + ("\\?use a supported result type", + gnat_subprog); + gnu_builtin_decl = NULL_TREE; + } + break; + + case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N: + /* This is a generic builtin overloaded on its third + parameter type, so do type resolution based on it. */ + if (list_length (gnu_param_type_list) >= 4 + && type_for_atomic_builtin_p + (list_third (gnu_param_type_list))) + gnu_builtin_decl + = resolve_atomic_builtin + (fncode, list_third (gnu_param_type_list)); + else + { + post_error + ("??cannot import type-generic 'G'C'C builtin!", + gnat_subprog); + post_error + ("\\?use a supported third parameter type", + gnat_subprog); + gnu_builtin_decl = NULL_TREE; + } + break; + + case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N: + case BUILT_IN_SYNC_LOCK_RELEASE_N: + case BUILT_IN_ATOMIC_STORE_N: + post_error + ("??unsupported type-generic 'G'C'C builtin!", + gnat_subprog); + gnu_builtin_decl = NULL_TREE; + break; + + default: + break; + } + } + + if (gnu_builtin_decl) + { + const intrin_binding_t inb + = { gnat_subprog, gnu_type, TREE_TYPE (gnu_builtin_decl) }; + + if (!intrin_profiles_compatible_p (&inb)) + post_error + ("??profile of& doesn''t match the builtin it binds!", + gnat_subprog); + + return gnu_builtin_decl; + } } /* Inability to find
[PATCH 1/2] OpenMP: Handle reference-typed struct members
This patch fixes the baseptrs-3.C test case introduced in the patch: https://gcc.gnu.org/pipermail/gcc-patches/2021-October/580729.html The problematic case concerns OpenMP mapping clauses containing struct members of reference type, e.g. "mystruct.myref.myptr[:N]". To be able to access the array slice through the reference in the middle, we need to perform an attach action for that reference, since it is represented internally as a pointer. I don't think the spec allows for this case explicitly. The closest clause is (OpenMP 5.0, "2.19.7.1 map Clause"): "If the type of a list item is a reference to a type T then the reference in the device data environment is initialized to refer to the object in the device data environment that corresponds to the object referenced by the list item. If mapping occurs, it occurs as though the object were mapped through a pointer with an array section of type T and length one." The patch as is allows the mapping to work with just "mystruct.myref.myptr[:N]", without an explicit "mystruct.myref" mapping also (because, would that refer to the hidden pointer used by the reference, or the automatically-dereferenced data itself?). An attach/detach operation is thus synthesised for the reference. Tested with offloading to NVPTX and bootstrapped. OK (pending previously-posted series?). Julian Brown 2021-10-11 Julian Brown gcc/cp/ * semantics.c (finish_omp_clauses): Handle reference-typed members. gcc/ * gimplify.c (build_struct_group): Arrange for attach/detach nodes to be created for reference-typed struct members for OpenMP. Only create firstprivate_pointer/firstprivate_reference nodes for innermost struct accesses, those with an optionally-indirected DECL_P base. (omp_build_struct_sibling_lists): Handle two-element chain for inner struct component returned from build_struct_group. libgomp/ * testsuite/libgomp.c++/baseptrs-3.C: Remove XFAILs and extend test. --- gcc/cp/semantics.c | 4 + gcc/gimplify.c | 56 +-- libgomp/testsuite/libgomp.c++/baseptrs-3.C | 109 +++-- 3 files changed, 154 insertions(+), 15 deletions(-) diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index a50ec0ad883..bb8577d0d36 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -7862,6 +7862,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) STRIP_NOPS (t); if (TREE_CODE (t) == POINTER_PLUS_EXPR) t = TREE_OPERAND (t, 0); + if (REFERENCE_REF_P (t)) + t = TREE_OPERAND (t, 0); } } while (TREE_CODE (t) == COMPONENT_REF); @@ -7961,6 +7963,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) { t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); indir_component_ref_p = true; + if (REFERENCE_REF_P (t)) + t = TREE_OPERAND (t, 0); STRIP_NOPS (t); if (TREE_CODE (t) == POINTER_PLUS_EXPR) t = TREE_OPERAND (t, 0); diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 3d444d1836f..d187dfe1ef2 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -10249,7 +10249,10 @@ build_struct_group (enum omp_region_type region_type, enum tree_code code, /* FIXME: If we're not mapping the base pointer in some other clause on this directive, I think we want to create ALLOC/RELEASE here -- i.e. not early-exit. */ - if (openmp && attach_detach) + if (openmp + && attach_detach + && !(TREE_CODE (TREE_TYPE (ocd)) == REFERENCE_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (ocd))) != POINTER_TYPE)) return NULL; #ifdef NOISY_SIBLING_LISTS @@ -10317,9 +10320,32 @@ build_struct_group (enum omp_region_type region_type, enum tree_code code, tree noind = strip_indirections (base); - if (!openmp + if (openmp + && TREE_CODE (TREE_TYPE (noind)) == REFERENCE_TYPE && (region_type & ORT_TARGET) && TREE_CODE (noind) == COMPONENT_REF) + { + tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), + OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_TO); + OMP_CLAUSE_DECL (c2) = unshare_expr (base); + OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (TREE_TYPE (noind)); + + tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), + OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (c3) = unshare_expr (noind); + OMP_CLAUSE_SIZE (c3) = size_zero_node; + + OMP_CLAUSE_CHAIN (c2) = c3; + OMP_CLAUSE_CHAIN (c3) = NULL_TREE; + + *inner = c2;
[PATCH 2/2] OpenACC: Make deep-copy-arrayofstruct.c a libgomp/runtime test
I noticed that the test in question now compiles properly, and in fact runs properly too. Thus it's more useful as a runtime test than a passing compilation test that otherwise doesn't do much. This patch moves it to libgomp. Tested with offloading to NVPTX and bootstrapped. (I can probably self-approve as a testsuite-only change, but the patch depends on previously-posted series). Julian 2021-10-11 Julian Brown gcc/testsuite/ * libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c: Move test from here. libgomp/ * testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c: Move test to here. --- .../libgomp.oacc-c-c++-common}/deep-copy-arrayofstruct.c| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename {gcc/testsuite/c-c++-common/goacc => libgomp/testsuite/libgomp.oacc-c-c++-common}/deep-copy-arrayofstruct.c (98%) diff --git a/gcc/testsuite/c-c++-common/goacc/deep-copy-arrayofstruct.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c similarity index 98% rename from gcc/testsuite/c-c++-common/goacc/deep-copy-arrayofstruct.c rename to libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c index 4247607b61c..a11c64749cc 100644 --- a/gcc/testsuite/c-c++-common/goacc/deep-copy-arrayofstruct.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do run } */ #include #include -- 2.29.2
Re: [PATCH 06/13] arm: Fix mve_vmvnq_n_ argument mode
Christophe Lyon via Gcc-patches writes: > The vmvnq_n* intrinsics and have [u]int[16|32]_t arguments, so use > iterator instead of HI in mve_vmvnq_n_. > > 2021-09-03 Christophe Lyon > > gcc/ > * config/arm/mve.md (mve_vmvnq_n_): Use V_elem mode > for operand 1. > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index e393518ea88..14d17060290 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -617,7 +617,7 @@ (define_insn "mve_vcvtaq_" > (define_insn "mve_vmvnq_n_" >[ > (set (match_operand:MVE_5 0 "s_register_operand" "=w") > - (unspec:MVE_5 [(match_operand:HI 1 "immediate_operand" "i")] > + (unspec:MVE_5 [(match_operand: 1 "immediate_operand" "i")] >VMVNQ_N)) >] >"TARGET_HAVE_MVE" I agree this is correct, but there's also the issue that the predicate is too broad. At the moment it allows any immediate, so things like: #include int32x4_t foo(void) { return vmvnq_n_s32(0x12345678); } are accepted by the compiler and only rejected by the assembler. Not your bug to fix, just saying :-) Thanks, Richard
Re: [PATCH] options: Fix variable tracking option processing.
On Mon, Oct 11, 2021 at 3:21 PM Martin Liška wrote: > > On 10/11/21 15:05, Richard Biener wrote: > >> + if (!opts_set->x_flag_var_tracking) > >> +opts->x_flag_var_tracking = optimize >= 1; > > That's still not equivalent to the old code for -fvar-tracking-uninit which > > sets opts->x_flag_var_tracking to 1 and the old code checked that > > for AUTOINIT_VALUE but you override it here for -O0. > > > > Do you mean the newly added code: > > + if (!opts_set->x_flag_var_tracking) > > +opts->x_flag_var_tracking = optimize >= 1; > > > that should be equivalent to: > > - if (flag_var_tracking == AUTODETECT_VALUE) > > -flag_var_tracking = optimize >= 1; > > > ? Or do I miss something? Yes. I think to be equivalent it would need to be if (!opts_set->x_flag_var_tracking_uninit && !opts_set->x_flag_var_tracking) opts->x_flag_var_tracking = optimize >= 1; see how in the old code the order of the tests makes a difference because we test flag_* we also set. Please double-check the change with regard to that. Btw, I'd be more comfortable when the move of the code would be independent of the adjustment to not rely on AUTODETECT_VALUE. Can we do the latter change first (IIRC the former one failed already)? Richard. > > Thanks, > Martin
Re: [PATCH v3 1/6] rs6000: Support SSE4.1 "round" intrinsics
On Fri, Oct 08, 2021 at 05:31:11PM -0500, Segher Boessenkool wrote: > On Fri, Oct 08, 2021 at 02:27:28PM -0500, Paul A. Clarke wrote: > > On Fri, Oct 08, 2021 at 12:39:15PM -0500, Segher Boessenkool wrote: > > I see. Thanks for the reference. If I understand correctly, volatile > > prevents some optimizations based on the defined inputs/outputs, but > > the asm could still be subject to reordering. > > "asm volatile" means there is a side effect in the asm. This means that > it has to be executed on the real machine the same as on the abstract > machine, with the side effects in the same order. > > It can still be reordered, modulo those restrictions. It can be merged > with an identical asm as well. And the compiler can split this into two > identical asms on two paths. It seems odd to me that the compiler can make any assumptions about the side-effect(s). How does it know that a side-effect does not alter computation (as it indeed does in this case), such that reordering is a still correct (which it wouldn't be in this case)? > In this case you might want a side effect (the instructions writes to > the FPSCR after all). But you need this to be tied to the FP code that > you want the flags to be changed for, and to the restore of the flags, > and finally you need to prevent other FP code from being scheduled in > between. > > You need more for that than just volatile, and the solution may well > make volatile not wanted: tying the insns together somehow will > naturally make the flags restored to a sane situation again, so the > whole group can be removed if you want, etc. > > > In this particular case, I don't think it's an issue with respect to > > reordering. The code in question is: > > + __asm__ __volatile__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > > The output (__fpscr_save) is a source for the following assignment, > > so the order should be respected, no? > > Other FP code can be interleaved, and then do the wrong thing. > > > With respect to volatile, I worry about removing it, because I do > > indeed need that instruction to execute in order to clear the FPSCR > > exception enable bits. That side-effect is not otherwise known to the > > compiler. > > Yes. But as said above, volatile isn't enough to get this to behave > correctly. > > The easiest way out is to write this all in one piece of (inline) asm. Ugh. I really don't want to go there, not just because it's work, but I think this is a paradigm that should work without needing to drop fully into asm. Is there something unique about using an "asm" statement versus using, say, a builtin like __builtin_mtfsf or a hypothetical __builtin_mffsce? Very similar methods are used in glibc today. Are those broken? Would creating a __builtin_mffsce be another solution? Would adding memory barriers between the FPSCR manipulations and the code which is bracketed by them be sufficient? PC
Re: [PATCH] gcov: make profile merging smarter
On 10/5/21 12:04, Richard Biener wrote: On Mon, Oct 4, 2021 at 1:32 PM Martin Liška wrote: On 10/4/21 13:16, Richard Biener wrote: I meant in merge_one_data do not check ->stamp or ->checksum but instead rely on the counter merging code to detect mismatches (there's read_mismatch and read_error). There's multiple things we can do when we run into those: - when we did not actually merged any counter yet we could issue the warning as before and drop the old data on the floor - when we_did_ merge some counters already we could hard-error (I suppose we can't roll-back merging that took place already) - we could do the merging two-stage, first see whether the data matches and only if it did perform the merging I've got your point, you are basically suggesting a fine grained merging (function based). Huh, I don't like it much as it's typically a mistake in the build setup that 2 objects (with a different checksum) want to emit profile to the same .gcda file. I agree, it's usually a mistake. My patch handles the obvious situation where an object file is built exactly the same way (so no e.g. -O0 and -O2). Yeah, but then the two profiles may not be related at all ... Well, it's quite common case that one object file is then linked into multiple binaries (e.g. util.o in a project). We collect also sum_max: Sum of individual run max values. which helps handling such a situation. Note that all of the changes (including yours) have user-visible effects and the behavior is somewhat unobvious. Not merging when the object was re-built is indeed the most obvious behavior so I'm not sure it's a good idea. A new env variable to say whether to simply keep the_old_ data when merging in new data isn't possible would be another "fix" I guess? Even for a situation when checksum matches, but the timestamp is different? Sure, we can provide env. variables that can tweak the behavior. I suppose another distinguishing factor might be the name of the executable. Well, at compile time, we don't know name of a final executable. But yeah, in the end it's a fishy area ... So I guess your originally posted patch might be the best way to go - can you try to amend the documentation as for the behavior with respect to re-compiling and profile merging? I suppose that if you re-compile just a single .o you currently merge into all the other .o file counters but _not_ into the newly compiled old counters. Yes, I can update the documentation. That would make coverage off as well for incremental re-compiling? Yes. I only can find @item Run the program on a representative workload to generate the arc profile information. This may be repeated any number of times. You can run concurrent instances of your program, and provided that the file system supports locking, the data files will be correctly updated. Unless a strict ISO C dialect option is in effect, @code{fork} calls are detected and correctly handled without double counting. but that's under -coverage, not sure if there's a better place to amend. Note I see there's -fprofile-dir which eventually can be used to "fix" the SPEC issue as well? We would have to provide a different option value of -fprofile-dir for both binaries. That's something we can't easily do in a SPEC config file. Let me update the documentation bits. Martin Richard. Cheers, Martin
Re: [PATCH 07/13] arm: Implement MVE predicates as vectors of booleans
Christophe Lyon via Gcc-patches writes: > This patch implements support for vectors of booleans to support MVE > predicates, instead of HImode. Since the ABI mandates pred16_t (aka > uint16_t) to represent predicates in intrinsics prototypes, we > introduce a new "predicate" type qualifier so that we can map relevant > builtins HImode arguments and return value to the appropriate vector > of booleans (VxBI). > > We have to update test_vector_ops_duplicate, because it iterates using > an offset in bytes, where we would need to iterate in bits: we stop > iterating when we reach the end of the vector of booleans. > > 2021-09-01 Christophe Lyon > > gcc/ > PR target/100757 > PR target/101325 > * config/arm/arm-builtins.c (arm_type_qualifiers): Add > qualifier_predicate. > (arm_init_simd_builtin_types): Add new simd types. > (arm_init_builtin): Map predicate vectors arguments to HImode. > (arm_expand_builtin_args): Move HImode predicate arguments to VxBI > rtx. Move return value to HImode rtx. > * config/arm/arm-modes.def (V16BI, V8BI, V4BI): New modes. > * config/arm/arm-simd-builtin-types.def (Pred1x16_t, > Pred2x8_t,Pred4x4_t): New. > * simplify-rtx.c (test_vector_ops_duplicate): Avoid going past the > end of the test vector. > > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index 3a9ff8f26b8..771759f0cdd 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -92,7 +92,9 @@ enum arm_type_qualifiers >qualifier_lane_pair_index = 0x1000, >/* Lane indices selected in quadtuplets - must be within range of previous > argument = a vector. */ > - qualifier_lane_quadtup_index = 0x2000 > + qualifier_lane_quadtup_index = 0x2000, > + /* MVE vector predicates. */ > + qualifier_predicate = 0x4000 > }; > > /* The qualifier_internal allows generation of a unary builtin from > @@ -1633,6 +1635,13 @@ arm_init_simd_builtin_types (void) >arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node; >arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node; > > + if (TARGET_HAVE_MVE) > +{ > + arm_simd_types[Pred1x16_t].eltype = unsigned_intHI_type_node; > + arm_simd_types[Pred2x8_t].eltype = unsigned_intHI_type_node; > + arm_simd_types[Pred4x4_t].eltype = unsigned_intHI_type_node; > +} > + >for (i = 0; i < nelts; i++) > { >tree eltype = arm_simd_types[i].eltype; > @@ -1780,6 +1789,11 @@ arm_init_builtin (unsigned int fcode, > arm_builtin_datum *d, >if (qualifiers & qualifier_map_mode) > op_mode = d->mode; > > + /* MVE Predicates use HImode as mandated by the ABI: pred16_t is > unsigned > + short. */ > + if (qualifiers & qualifier_predicate) > + op_mode = HImode; > + >/* For pointers, we want a pointer to the basic type >of the vector. */ >if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) > @@ -3024,6 +3038,11 @@ arm_expand_builtin_args (rtx target, machine_mode > map_mode, int fcode, > case ARG_BUILTIN_COPY_TO_REG: > if (POINTER_TYPE_P (TREE_TYPE (arg[argc]))) > op[argc] = convert_memory_address (Pmode, op[argc]); > + > + /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates. > */ > + if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL) > + op[argc] = gen_lowpart (mode[argc], op[argc]); > + > /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */ > if (!(*insn_data[icode].operand[opno].predicate) > (op[argc], mode[argc])) > @@ -3229,6 +3248,13 @@ constant_arg: >else > emit_insn (insn); > > + if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL) > +{ > + rtx HItarget = gen_reg_rtx (HImode); > + emit_move_insn (HItarget, gen_lowpart (HImode, target)); > + return HItarget; > +} > + >return target; > } > > diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def > index a5e74ba3943..b414a709a62 100644 > --- a/gcc/config/arm/arm-modes.def > +++ b/gcc/config/arm/arm-modes.def > @@ -84,6 +84,11 @@ VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */ > VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */ > VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */ > > +/* Predicates for MVE. */ > +VECTOR_BOOL_MODE (V16BI, 16, 2); > +VECTOR_BOOL_MODE (V8BI, 8, 2); > +VECTOR_BOOL_MODE (V4BI, 4, 2); > + > /* Fraction and accumulator vector modes. */ > VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ > VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ > diff --git a/gcc/config/arm/arm-simd-builtin-types.def > b/gcc/config/arm/arm-simd-builtin-types.def > index c19a1b6e3eb..d3987985b4c 100644 > --- a/gcc/config/arm/arm-simd-builtin-types.def > +++ b/gcc/config/arm/arm-simd-builtin-types.def > @@ -51,3 +51,7 @@ >ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20) >EN
Re: [Patch 4/7, Arm. GCC] Implement target feature macros for PACBTI.
On 08/10/2021 13:18, Tejas Belagod via Gcc-patches wrote: Hi, This patch implements target feature macros when PACBTI is enabled through the -march option or -mbranch-protection. Tested on arm-none-eabi. OK for trunk? 2021-10-04 Tejas Belagod gcc/ChangeLog: * config/arm/arm-c.c (arm_cpu_builtins): Define __ARM_FEATURE_BTI_DEFAULT and __ARM_FEATURE_PAC_DEFAULT. gcc/testsuite/ChangeLog: * gcc.target/arm/acle/pacbti-m-predef-2.c: New test. * gcc.target/arm/acle/pacbti-m-predef-4.c: New test. * gcc.target/arm/acle/pacbti-m-predef-5.c: New test. I presume the specification for this is ACLE - please say so rather than making me guess. + cpp_undef (pfile, "__ARM_FEATURE_BTI_DEFAULT"); + cpp_undef (pfile, "__ARM_FEATURE_PAC_DEFAULT"); + if (TARGET_HAVE_PACBTI) +{ + builtin_define_with_int_value ("__ARM_FEATURE_BTI_DEFAULT", +arm_enable_pacbti & 0x1); My reading of the ACLE specification would suggest this shouldn't be defined if it would have a value of 0, but that's not what this code does. I think it would be better to move this outside the TARGET_HAVE_PACBTI and use the def_or_undef approach. + builtin_define_with_int_value ("__ARM_FEATURE_PAC_DEFAULT", +arm_enable_pacbti >> 1); This one is less clear, could the value ever be zero? I guess exactly one of a-key and b-key must be defined and each has a separate bit. +} + + Not more than one blank line at the end of a block. diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c Given what I've said above, I think you need to also test that __ARM_FEATURE_BTI_DEFAULT is defined before testing the value (and emitting #error if it isn't). R.
Re: [PATCH 08/13] arm: Implement auto-vectorized MVE comparisons with vectors of boolean predicates
Christophe Lyon via Gcc-patches writes: > We make use of qualifier_predicate to describe MVE builtins > prototypes, restricting to auto-vectorizable vcmp* and vpsel builtins, > as they are exercised by the tests added earlier in the series. > > Special handling is needed for mve_vpselq because it has a v2di > variant, which has no natural VPR.P0 representation: we keep HImode > for it. > > The vector_compare expansion code is updated to use the right VxBI > mode instead of HI for the result. > > New mov patterns are introduced to handle the new modes. > > 2021-09-01 Christophe Lyon > > gcc/ > PR target/100757 > PR target/101325 > * config/arm/arm-builtins.c (BINOP_PRED_UNONE_UNONE_QUALIFIERS) > (BINOP_PRED_NONE_NONE_QUALIFIERS) > (TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS) > (TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS): New. > * config/arm/arm.c (arm_hard_regno_mode_ok): Handle new VxBI > modes. > (arm_mode_to_pred_mode): New. > (arm_expand_vector_compare): Use the right VxBI mode instead of > HI. > (arm_expand_vcond): Likewise. > * config/arm/arm_mve_builtins.def (vcmpneq_, vcmphiq_, vcmpcsq_) > (vcmpltq_, vcmpleq_, vcmpgtq_, vcmpgeq_, vcmpeqq_, vcmpneq_f) > (vcmpltq_f, vcmpleq_f, vcmpgtq_f, vcmpgeq_f, vcmpeqq_f, vpselq_u) > (vpselq_s, vpselq_f): Use new predicated qualifiers. > * config/arm/iterators.md (MVE_7): New mode iterator. > (MVE_VPRED, MVE_vpred): New attribute iterators. > * config/arm/mve.md (@mve_vcmpq_) > (@mve_vcmpq_f, @mve_vpselq_) > (@mve_vpselq_f): Use MVE_VPRED instead of HI. > (@mve_vpselq_v2di): Define separately. > (mov): New expander for VxBI modes. > (mve_mov): New insn for VxBI modes. > > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index 771759f0cdd..6e3638869f1 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -469,6 +469,12 @@ > arm_binop_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define BINOP_UNONE_UNONE_UNONE_QUALIFIERS \ >(arm_binop_unone_unone_unone_qualifiers) > > +static enum arm_type_qualifiers > +arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned }; > +#define BINOP_PRED_UNONE_UNONE_QUALIFIERS \ > + (arm_binop_pred_unone_unone_qualifiers) > + > static enum arm_type_qualifiers > arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_none, qualifier_immediate }; > @@ -487,6 +493,12 @@ > arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define BINOP_UNONE_NONE_NONE_QUALIFIERS \ >(arm_binop_unone_none_none_qualifiers) > > +static enum arm_type_qualifiers > +arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_predicate, qualifier_none, qualifier_none }; > +#define BINOP_PRED_NONE_NONE_QUALIFIERS \ > + (arm_binop_pred_none_none_qualifiers) > + > static enum arm_type_qualifiers > arm_binop_unone_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_unsigned, qualifier_none }; > @@ -558,6 +570,12 @@ > arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \ >(arm_ternop_none_none_none_unone_qualifiers) > > +static enum arm_type_qualifiers > +arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate }; > +#define TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS \ > + (arm_ternop_none_none_none_pred_qualifiers) > + > static enum arm_type_qualifiers > arm_ternop_none_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_none, qualifier_immediate, > qualifier_unsigned }; > @@ -577,6 +595,13 @@ > arm_ternop_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define TERNOP_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \ >(arm_ternop_unone_unone_unone_unone_qualifiers) > > +static enum arm_type_qualifiers > +arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, > +qualifier_predicate }; > +#define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \ > + (arm_ternop_unone_unone_unone_pred_qualifiers) > + > static enum arm_type_qualifiers > arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 1222cb0d0fe..5f6637d9a5f 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -25304,7 +25304,7 @@ arm_hard_regno_mode_ok (unsigned int regno, > machine_mode mode) > return false; > >if (IS_VPR_REGNUM (regno)) > -return mode == HImode; > +return mode == HImode || mode == V16BImode || mode == V8BImode || mode > == V4BImod
Re: [PATCH] gcov: make profile merging smarter
On 10/11/21 15:49, Martin Liška wrote: Let me update the documentation bits. There's the updated patch. May I install the patch now? Thanks, MartinFrom fdeb81a960faa19f75316e279a79c231da212f99 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 9 Sep 2021 13:02:24 +0200 Subject: [PATCH] gcov: make profile merging smarter Support merging of profiles that are built from a different .o files but belong to the same source file. Moreover, a checksum is verified during profile merging and so we can safely combine such profile. PR gcov-profile/90364 gcc/ChangeLog: * coverage.c (build_info): Emit checksum to the global variable. (build_info_type): Add new field for checksum. (coverage_obj_finish): Pass object_checksum. (coverage_init): Use 0 as checksum for .gcno files. * gcov-dump.c (dump_gcov_file): Dump also new checksum field. * gcov.c (read_graph_file): Read also checksum. * doc/invoke.texi: Document the behaviour change. libgcc/ChangeLog: * libgcov-driver.c (merge_one_data): Skip timestamp and verify checksums. (write_one_data): Write also checksum. * libgcov-util.c (read_gcda_file): Read also checksum field. * libgcov.h (struct gcov_info): Add new field. diff --git a/gcc/coverage.c b/gcc/coverage.c index 10d7f8366cb..4467f1eaa5c 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -129,16 +129,7 @@ static const char *const ctr_names[GCOV_COUNTERS] = { #undef DEF_GCOV_COUNTER /* Forward declarations. */ -static void read_counts_file (void); static tree build_var (tree, tree, int); -static void build_fn_info_type (tree, unsigned, tree); -static void build_info_type (tree, tree); -static tree build_fn_info (const struct coverage_data *, tree, tree); -static tree build_info (tree, tree); -static bool coverage_obj_init (void); -static vec *coverage_obj_fn -(vec *, tree, struct coverage_data const *); -static void coverage_obj_finish (vec *); /* Return the type node for gcov_type. */ @@ -218,6 +209,9 @@ read_counts_file (void) tag = gcov_read_unsigned (); bbg_file_stamp = crc32_unsigned (bbg_file_stamp, tag); + /* Read checksum. */ + gcov_read_unsigned (); + counts_hash = new hash_table (10); while ((tag = gcov_read_unsigned ())) { @@ -935,6 +929,12 @@ build_info_type (tree type, tree fn_info_ptr_type) DECL_CHAIN (field) = fields; fields = field; + /* Checksum. */ + field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, + get_gcov_unsigned_t ()); + DECL_CHAIN (field) = fields; + fields = field; + /* Filename */ field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, build_pointer_type (build_qualified_type @@ -977,7 +977,7 @@ build_info_type (tree type, tree fn_info_ptr_type) function info objects. */ static tree -build_info (tree info_type, tree fn_ary) +build_info (tree info_type, tree fn_ary, unsigned object_checksum) { tree info_fields = TYPE_FIELDS (info_type); tree merge_fn_type, n_funcs; @@ -996,13 +996,19 @@ build_info (tree info_type, tree fn_ary) /* next -- NULL */ CONSTRUCTOR_APPEND_ELT (v1, info_fields, null_pointer_node); info_fields = DECL_CHAIN (info_fields); - + /* stamp */ CONSTRUCTOR_APPEND_ELT (v1, info_fields, build_int_cstu (TREE_TYPE (info_fields), bbg_file_stamp)); info_fields = DECL_CHAIN (info_fields); + /* Checksum. */ + CONSTRUCTOR_APPEND_ELT (v1, info_fields, + build_int_cstu (TREE_TYPE (info_fields), + object_checksum)); + info_fields = DECL_CHAIN (info_fields); + /* Filename */ da_file_name_len = strlen (da_file_name); filename_string = build_string (da_file_name_len + 1, da_file_name); @@ -1214,7 +1220,8 @@ coverage_obj_fn (vec *ctor, tree fn, function objects from CTOR. Generate the gcov_info initializer. */ static void -coverage_obj_finish (vec *ctor) +coverage_obj_finish (vec *ctor, + unsigned object_checksum) { unsigned n_functions = vec_safe_length (ctor); tree fn_info_ary_type = build_array_type @@ -1231,7 +1238,7 @@ coverage_obj_finish (vec *ctor) varpool_node::finalize_decl (fn_info_ary); DECL_INITIAL (gcov_info_var) -= build_info (TREE_TYPE (gcov_info_var), fn_info_ary); += build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum); varpool_node::finalize_decl (gcov_info_var); } @@ -1300,7 +1307,6 @@ coverage_init (const char *filename) strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX); bbg_file_stamp = local_tick; - if (flag_auto_profile) read_autofdo_file (); else if (flag_branch_probabilities) @@ -1328,6 +1334,8 @@ coverage_init (const char *filename) gcov_write_unsigned (GCOV_NOTE_MAGIC); gcov_write_unsigned (GCOV_VERSION); gcov_write_unsigned (bbg_file_stamp); + /* Use an arbitrary checksum */ + gcov_write_unsigned (0); gcov_write_string (getpwd ()); /* Do not support has_unexecuted_blocks for Ada. */ @@ -1353,14 +1361,24 @@ coverage_finish (void) cannot uniquely stam
Re: [PATCH 09/13] arm: Fix vcond_mask expander for MVE (PR target/100757)
Christophe Lyon via Gcc-patches writes: > From: Christophe Lyon > > The problem in this PR is that we call VPSEL with a mask of vector > type instead of HImode. This happens because operand 3 in vcond_mask > is the pre-computed vector comparison and has vector type. > > This patch fixes it by implementing TARGET_VECTORIZE_GET_MASK_MODE, > returning the appropriate VxBI mode when targeting MVE. In turn, this > implies implementing vec_cmp, > vec_cmpu and vcond_mask_, and we can > move vec_cmp, vec_cmpu and > vcond_mask_ back to neon.md since they are not > used by MVE anymore. The new * patterns listed above are > implemented in mve.md since they are only valid for MVE. However this > may make maintenance/comparison more painful than having all of them > in vec-common.md. > > In the process, we can get rid of the recently added vcond_mve > parameter of arm_expand_vector_compare. > > Compared to neon.md's vcond_mask_ before my "arm: > Auto-vectorization for MVE: vcmp" patch (r12-834), it keeps the VDQWH > iterator added in r12-835 (to have V4HF/V8HF support), as well as the > (! || flag_unsafe_math_optimizations) condition which > was not present before r12-834 although SF modes were enabled by VDQW > (I think this was a bug). > > Using TARGET_VECTORIZE_GET_MASK_MODE has the advantage that we no > longer need to generate vpsel with vectors of 0 and 1: the masks are > now merged via scalar 'ands' instructions operating on 16-bit masks > after converting the boolean vectors. > > In addition, this patch fixes a problem in arm_expand_vcond() where > the result would be a vector of 0 or 1 instead of operand 1 or 2. > > Reducing the number of iterations in pr100757-3.c from 32 to 8, we > generate the code below: > > float a[32]; > float fn1(int d) { > float c = 4.0f; > for (int b = 0; b < 8; b++) > if (a[b] != 2.0f) > c = 5.0f; > return c; > } > > fn1: > ldr r3, .L3+48 > vldr.64 d4, .L3 // q2=(2.0,2.0,2.0,2.0) > vldr.64 d5, .L3+8 > vldrw.32q0, [r3] // q0=a(0..3) > addsr3, r3, #16 > vcmp.f32eq, q0, q2 // cmp a(0..3) == (2.0,2.0,2.0,2.0) > vldrw.32q1, [r3] // q1=a(4..7) > vmrs r3, P0 > vcmp.f32eq, q1, q2 // cmp a(4..7) == (2.0,2.0,2.0,2.0) > vmrsr2, P0 @ movhi > andsr3, r3, r2 // r3=select(a(0..3]) & select(a(4..7)) > vldr.64 d4, .L3+16 // q2=(5.0,5.0,5.0,5.0) > vldr.64 d5, .L3+24 > vmsr P0, r3 > vldr.64 d6, .L3+32 // q3=(4.0,4.0,4.0,4.0) > vldr.64 d7, .L3+40 > vpsel q3, q3, q2 // q3=vcond_mask(4.0,5.0) > vmov.32 r2, q3[1]// keep the scalar max > vmov.32 r0, q3[3] > vmov.32 r3, q3[2] > vmov.f32s11, s12 > vmovs15, r2 > vmovs14, r3 > vmaxnm.f32 s15, s11, s15 > vmaxnm.f32 s15, s15, s14 > vmovs14, r0 > vmaxnm.f32 s15, s15, s14 > vmovr0, s15 > bx lr > .L4: > .align 3 > .L3: > .word 1073741824 // 2.0f > .word 1073741824 > .word 1073741824 > .word 1073741824 > .word 1084227584 // 5.0f > .word 1084227584 > .word 1084227584 > .word 1084227584 > .word 1082130432 // 4.0f > .word 1082130432 > .word 1082130432 > .word 1082130432 > > 2021-09-02 Christophe Lyon > > PR target/100757 > gcc/ > * config/arm/arm-protos.h (arm_get_mask_mode): New prototype. > (arm_expand_vector_compare): Update prototype. > * config/arm/arm.c (TARGET_VECTORIZE_GET_MASK_MODE): New. > (arm_vector_mode_supported_p): Add support for VxBI modes. > (arm_expand_vector_compare): Remove useless generation of vpsel. > (arm_expand_vcond): Fix select operands. > (arm_get_mask_mode): New. > * config/arm/mve.md (vec_cmp): New. > (vec_cmpu): New. > (vcond_mask_): New. > * config/arm/vec-common.md (vec_cmp) > (vec_cmpu): Move to ... > * config/arm/neon.md (vec_cmp) > (vec_cmpu): ... here > and disable for MVE. > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index 9b1f61394ad..9e3d71e0c29 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -201,6 +201,7 @@ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, > tree, rtx, tree); > extern bool arm_pad_reg_upward (machine_mode, tree, int); > #endif > extern int arm_apply_result_size (void); > +extern opt_machine_mode arm_get_mask_mode (machine_mode mode); > > #endif /* RTX_CODE */ > > @@ -372,7 +373,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, > rtx, rtx, rtx, rtx, > extern bool arm_fusion_enabled_p (tune_params::fuse_ops); > extern bool arm_valid_symbolic_address_p (rtx); > extern bool arm_validize_comparison (rtx *, rtx *, rtx *)
Re: [PATCH 10/13] arm: Convert remaining MVE vcmp builtins to predicate qualifiers
Christophe Lyon via Gcc-patches writes: > This is mostly a mechanical change, only tested by the intrinsics > expansion tests. > > 2021-09-02 Christophe Lyon > > gcc/ > PR target/100757 > PR target/101325 > * config/arm/arm-builtins.c (BINOP_UNONE_NONE_NONE_QUALIFIERS): > Delete. > (TERNOP_UNONE_NONE_NONE_UNONE_QUALIFIERS): Change to ... > (TERNOP_PRED_NONE_NONE_PRED_QUALIFIERS): ... this. > (TERNOP_PRED_UNONE_UNONE_PRED_QUALIFIERS): New. > * config/arm/arm_mve_builtins.def (vcmp*q_n_, vcmp*q_m_f): Use new > predicated qualifiers. > * config/arm/mve.md (mve_vcmpq_n_) > (mve_vcmp*q_m_f): Use MVE_VPRED instead of HI. OK, thanks. Richard > > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index 6e3638869f1..b3455d87d4f 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -487,12 +487,6 @@ > arm_binop_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define BINOP_NONE_NONE_UNONE_QUALIFIERS \ >(arm_binop_none_none_unone_qualifiers) > > -static enum arm_type_qualifiers > -arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] > - = { qualifier_unsigned, qualifier_none, qualifier_none }; > -#define BINOP_UNONE_NONE_NONE_QUALIFIERS \ > - (arm_binop_unone_none_none_qualifiers) > - > static enum arm_type_qualifiers > arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_predicate, qualifier_none, qualifier_none }; > @@ -553,10 +547,10 @@ > arm_ternop_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] >(arm_ternop_unone_unone_imm_unone_qualifiers) > > static enum arm_type_qualifiers > -arm_ternop_unone_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > - = { qualifier_unsigned, qualifier_none, qualifier_none, qualifier_unsigned > }; > -#define TERNOP_UNONE_NONE_NONE_UNONE_QUALIFIERS \ > - (arm_ternop_unone_none_none_unone_qualifiers) > +arm_ternop_pred_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_predicate, qualifier_none, qualifier_none, > qualifier_predicate }; > +#define TERNOP_PRED_NONE_NONE_PRED_QUALIFIERS \ > + (arm_ternop_pred_none_none_pred_qualifiers) > > static enum arm_type_qualifiers > arm_ternop_none_none_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] > @@ -602,6 +596,13 @@ > arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \ >(arm_ternop_unone_unone_unone_pred_qualifiers) > > +static enum arm_type_qualifiers > +arm_ternop_pred_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned, > +qualifier_predicate }; > +#define TERNOP_PRED_UNONE_UNONE_PRED_QUALIFIERS \ > + (arm_ternop_pred_unone_unone_pred_qualifiers) > + > static enum arm_type_qualifiers > arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; > diff --git a/gcc/config/arm/arm_mve_builtins.def > b/gcc/config/arm/arm_mve_builtins.def > index 58a05e61bd9..91ed2073918 100644 > --- a/gcc/config/arm/arm_mve_builtins.def > +++ b/gcc/config/arm/arm_mve_builtins.def > @@ -118,9 +118,9 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, > v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si) > VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si) > @@ -142,17 +142,17 @@ VAR3 (BINOP_UNONE_UNONE_NONE, vbrsrq_n_u, v16qi, v8hi, > v4si) > VAR3 (BINOP_UNONE_UNONE_IMM, vshlq_n_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si) > VAR3 (BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si) > -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si) > +VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si) > VAR3 (BINOP_PRED_NONE_N
[PATCH] middle-end/101480 - overloaded global new/delete
The following fixes the issue of ignoring side-effects on memory from overloaded global new/delete operators by not marking them as effectively 'const' apart from other explicitely specified side-effects. This will cause FAIL: g++.dg/warn/Warray-bounds-16.C -std=gnu++1? (test for excess errors) because we now no longer statically see the initialization loop never executes because the call to operator new can now clobber 'a.m'. This seems to be an issue with the warning code and/or ranger so I'm leaving this FAIL to be addressed as followup. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. 2021-10-11 Richard Biener PR middle-end/101480 * gimple.c (gimple_call_fnspec): Do not mark operator new/delete as const. * g++.dg/torture/pr10148.C: New testcase. --- gcc/gimple.c | 4 +- gcc/testsuite/g++.dg/torture/pr10148.C | 52 ++ 2 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/torture/pr10148.C diff --git a/gcc/gimple.c b/gcc/gimple.c index bed7ff9e71c..cc7a88e822b 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -1549,12 +1549,12 @@ gimple_call_fnspec (const gcall *stmt) && DECL_IS_OPERATOR_DELETE_P (fndecl) && DECL_IS_REPLACEABLE_OPERATOR (fndecl) && gimple_call_from_new_or_delete (stmt)) -return ".co "; +return ". o "; /* Similarly operator new can be treated as malloc. */ if (fndecl && DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl) && gimple_call_from_new_or_delete (stmt)) -return "mC"; +return "m "; return ""; } diff --git a/gcc/testsuite/g++.dg/torture/pr10148.C b/gcc/testsuite/g++.dg/torture/pr10148.C new file mode 100644 index 000..ed278f9f8d8 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr10148.C @@ -0,0 +1,52 @@ +/* { dg-do run } */ + +#include +#include + +static bool flag = false; + +class C +{ + bool prev; + +public: + C() : prev(flag) + { +flag = true; + } + + ~C() { +flag = prev; + } +}; + +void* operator new(size_t size) +{ + assert(flag); + return malloc(size); +} + +void operator delete(void *p) +{ + free(p); +} + +void g(int* p) +{ + delete p; +} + +void f() +{ + int* p; + { +C c; +p = new int; + } + g(p); +} + +int main(int, char**) +{ + f(); +} -- 2.31.1
Re: [PATCH 12/13] arm: Convert more load/store MVE builtins to predicate qualifiers
Christophe Lyon via Gcc-patches writes: > This patch covers a few builtins where we do not use the > iterator and thus we cannot use . > > However this introduces a problem for the v2di instructions, because > there is not predicate for this case. For instance, changing > STRSBS_P_QUALIFIERS breaks mve_vstrdq_scatter_base_p_v2di. > Similarly, this patch introduces problems with: > mve_vldrdq_gather_base_z_v2di > mve_vldrdq_gather_base_wb_z_v2di > mve_vldrdq_gather_base_nowb_z_v2di > mve_vstrdq_scatter_base_wb_p_v2di I think we should treat HImode as the predicate mode for V2DI, alongside the others, even though it isn't a vector boolean mode. Like you say, it'll end up being too much of a special case otherwise. Thanks, Richard > 2021-09-02 Christophe Lyon > > gcc/ > PR target/100757 > PR target/101325 > * config/arm/arm-builtins.c (STRSBS_P_QUALIFIERS): Use predicate > qualifier. > (STRSBU_P_QUALIFIERS): Likewise. > (LDRGBS_Z_QUALIFIERS): Likewise. > (LDRGBU_Z_QUALIFIERS): Likewise. > (LDRGBWBXU_Z_QUALIFIERS): Likewise. > (LDRGBWBS_Z_QUALIFIERS): Likewise. > (LDRGBWBU_Z_QUALIFIERS): Likewise. > (STRSBWBS_P_QUALIFIERS): Likewise. > (STRSBWBU_P_QUALIFIERS): Likewise. > * config/arm/mve.md: Use VxBI instead of HI. > > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index 06ff9d2278a..e58580bb828 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -738,13 +738,13 @@ arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > static enum arm_type_qualifiers > arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_void, qualifier_unsigned, qualifier_immediate, > - qualifier_none, qualifier_unsigned}; > + qualifier_none, qualifier_predicate}; > #define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers) > > static enum arm_type_qualifiers > arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_void, qualifier_unsigned, qualifier_immediate, > - qualifier_unsigned, qualifier_unsigned}; > + qualifier_unsigned, qualifier_predicate}; > #define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers) > > static enum arm_type_qualifiers > @@ -780,13 +780,13 @@ arm_ldrgbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] > static enum arm_type_qualifiers > arm_ldrgbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_unsigned, qualifier_immediate, > - qualifier_unsigned}; > + qualifier_predicate}; > #define LDRGBS_Z_QUALIFIERS (arm_ldrgbs_z_qualifiers) > > static enum arm_type_qualifiers > arm_ldrgbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, > - qualifier_unsigned}; > + qualifier_predicate}; > #define LDRGBU_Z_QUALIFIERS (arm_ldrgbu_z_qualifiers) > > static enum arm_type_qualifiers > @@ -826,7 +826,7 @@ arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] > static enum arm_type_qualifiers > arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, > - qualifier_unsigned}; > + qualifier_predicate}; > #define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers) > > static enum arm_type_qualifiers > @@ -842,13 +842,13 @@ arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] > static enum arm_type_qualifiers > arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_unsigned, qualifier_immediate, > - qualifier_unsigned}; > + qualifier_predicate}; > #define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) > > static enum arm_type_qualifiers > arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, > - qualifier_unsigned}; > + qualifier_predicate}; > #define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) > > static enum arm_type_qualifiers > @@ -864,13 +864,13 @@ arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] > static enum arm_type_qualifiers > arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_unsigned, qualifier_const, > - qualifier_none, qualifier_unsigned}; > + qualifier_none, qualifier_predicate}; > #define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers) > > static enum arm_type_qualifiers > arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_unsigned, qualifier_const, > - qualifier_unsigned, qualifier_unsigned}; > + qualifier_unsigned, qualifier_predicate}; > #define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers) > > static enum arm_type_qualifiers > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 2f36d47c800..241195909da 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -7294,7 +7294,7 @@ (define_insn "mve_vstrwq_scatter_base_p_v4si" > [(match_operand:V4SI 0 "s_register_operand" "w") >
Re: [PATCH 13/13] arm: Convert more MVE/CDE builtins to predicate qualifiers
Christophe Lyon via Gcc-patches writes: > This patch covers a few non-load/store builtins where we do not use > the iterator and thus we cannot use . > > We need to update the expected code in cde-mve-full-assembly.c because > we now use mve_movv16qi instead of movhi to generate the vmsr > instruction. > > 2021-09-02 Christophe Lyon > > gcc/ > PR target/100757 > PR target/101325 > * config/arm/arm-builtins.c (CX_UNARY_UNONE_QUALIFIERS): Use > predicate. > (CX_BINARY_UNONE_QUALIFIERS): Likewise. > (CX_TERNARY_UNONE_QUALIFIERS): Likewise. > (TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS): Delete. > (QUADOP_NONE_NONE_NONE_NONE_UNONE_QUALIFIERS): Delete. > (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE_QUALIFIERS): Delete. > * config/arm/arm_mve_builtins.def: Use predicated qualifiers. > * config/arm/mve.md: Use VxBI instead of HI. > > gcc/testsuite/ > * gcc.target/arm/acle/cde-mve-full-assembly.c: Remove expected '@ > movhi'. OK, thanks. Richard > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index e58580bb828..d725458f1ad 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -344,7 +344,7 @@ static enum arm_type_qualifiers > arm_cx_unary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_immediate, qualifier_none, >qualifier_unsigned_immediate, > - qualifier_unsigned }; > + qualifier_predicate }; > #define CX_UNARY_UNONE_QUALIFIERS (arm_cx_unary_unone_qualifiers) > > /* T (immediate, T, T, unsigned immediate). */ > @@ -353,7 +353,7 @@ arm_cx_binary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_immediate, >qualifier_none, qualifier_none, >qualifier_unsigned_immediate, > - qualifier_unsigned }; > + qualifier_predicate }; > #define CX_BINARY_UNONE_QUALIFIERS (arm_cx_binary_unone_qualifiers) > > /* T (immediate, T, T, T, unsigned immediate). */ > @@ -362,7 +362,7 @@ arm_cx_ternary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_immediate, >qualifier_none, qualifier_none, qualifier_none, >qualifier_unsigned_immediate, > - qualifier_unsigned }; > + qualifier_predicate }; > #define CX_TERNARY_UNONE_QUALIFIERS (arm_cx_ternary_unone_qualifiers) > > /* The first argument (return type) of a store should be void type, > @@ -558,12 +558,6 @@ > arm_ternop_none_none_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define TERNOP_NONE_NONE_NONE_IMM_QUALIFIERS \ >(arm_ternop_none_none_none_imm_qualifiers) > > -static enum arm_type_qualifiers > -arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > - = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned }; > -#define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \ > - (arm_ternop_none_none_none_unone_qualifiers) > - > static enum arm_type_qualifiers > arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate }; > @@ -616,13 +610,6 @@ > arm_quadop_unone_unone_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define QUADOP_UNONE_UNONE_NONE_NONE_PRED_QUALIFIERS \ >(arm_quadop_unone_unone_none_none_pred_qualifiers) > > -static enum arm_type_qualifiers > -arm_quadop_none_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > - = { qualifier_none, qualifier_none, qualifier_none, qualifier_none, > -qualifier_unsigned }; > -#define QUADOP_NONE_NONE_NONE_NONE_UNONE_QUALIFIERS \ > - (arm_quadop_none_none_none_none_unone_qualifiers) > - > static enum arm_type_qualifiers > arm_quadop_none_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_none, qualifier_none, qualifier_none, qualifier_none, > @@ -637,13 +624,6 @@ > arm_quadop_none_none_none_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define QUADOP_NONE_NONE_NONE_IMM_PRED_QUALIFIERS \ >(arm_quadop_none_none_none_imm_pred_qualifiers) > > -static enum arm_type_qualifiers > -arm_quadop_unone_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] > - = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, > -qualifier_unsigned, qualifier_unsigned }; > -#define QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \ > - (arm_quadop_unone_unone_unone_unone_unone_qualifiers) > - > static enum arm_type_qualifiers > arm_quadop_unone_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] >= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, > diff --git a/gcc/config/arm/arm_mve_builtins.def > b/gcc/config/arm/arm_mve_builtins.def > index bb79edf83ca..0fb53d866ec 100644 > --- a/gcc/config/arm/arm_mve_builtins.def > +++ b/gcc/config/arm/arm_mve_builtins.def > @@ -87,8 +87,8 @@ VAR4 (BINOP_UNONE_UNONE_UNONE, vcreateq_u, v16qi, v8hi, > v4si, v2di) > VAR4 (BINOP_NONE_UNONE_UNONE, vcreateq_s, v16qi, v8hi, v4si, v2di) > VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_
[PATCH] middle-end/102683 - fix .DEFERRED_INIT expansion
This avoids using an integer type for which we don't have an approprate mode when expanding .DEFERRED_INIT to a non-memory entity. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Will push after it finished. Richard. 2021-10-11 Richard Biener PR middle-end/102683 * internal-fn.c (expand_DEFERRED_INIT): Check for mode availability before building an integer type for storage purposes. --- gcc/internal-fn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 6bc256832f7..b3638192fb9 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -3074,7 +3074,9 @@ expand_DEFERRED_INIT (internal_fn, gcall *stmt) tree init; if (tree_fits_uhwi_p (var_size) && (init_type == AUTO_INIT_PATTERN - || !is_gimple_reg_type (var_type))) + || !is_gimple_reg_type (var_type)) + && int_mode_for_size (tree_to_uhwi (var_size) * BITS_PER_UNIT, + 0).exists ()) { unsigned HOST_WIDE_INT total_bytes = tree_to_uhwi (var_size); unsigned char *buf = (unsigned char *) xmalloc (total_bytes); -- 2.31.1
Re: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang
*ping* From: Paul Keir Sent: 03 September 2021 11:31 To: gcc-patches@gcc.gnu.org Cc: libstd...@gcc.gnu.org Subject: Re: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang *ping* From: Paul Keir Sent: 20 August 2021 21:17 To: gcc-patches@gcc.gnu.org Cc: libstd...@gcc.gnu.org Subject: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang Hi, The current compare_three_way implementation makes provision for constant evaluation contexts (avoiding reinterpret_cast etc.), but the approach fails with Clang; when it compares two const volatile void pointers: "comparison between unequal pointers to void has unspecified result". I include a fix and test. Could someone commit the attached patch for me? Thanks, Paul Please consider the environment and think before you print. The University of the West of Scotland is a registered Scottish charity. Charity number SC002520. This e-mail and any attachment is for authorised use by the intended recipient(s) only. It may contain proprietary material, confidential information and/or be subject to legal privilege. It should not be copied, disclosed to, retained or used by, any other party. If you are not an intended recipient then please promptly delete this e-mail and any attachment and all copies and inform the sender. Please note that any views or opinions presented in this email are solely those of the author and do not necessarily represent those of the University of the West of Scotland. As a public body, the University of the West of Scotland may be required to make available emails as well as other written forms of information as a result of a request made under the Freedom of Information (Scotland) Act 2002.
[PATCH] libgomp: alloc* test fixes [PR102628, PR102668]
Hi! As reported, the alloc-9.c test and alloc-{1,2,3}.F* and alloc-11.f90 tests fail on powerpc64-linux with -m32. The reason why it fails just there is that malloc doesn't guarantee there 128-bit alignment (historically glibc guaranteed 2 * sizeof (void *) alignment from malloc). There are two separate issues. One is a thinko on my side. In this part of alloc-9.c test (copied to alloc-11.f90), we have 2 allocators, a with pool size 1024B and alignment 16B and default fallback and a2 with pool size 512B and alignment 32B and a as fallback allocator. We start at no allocations in both at line 194 and do: p = (int *) omp_alloc (sizeof (int), a2); // This succeeds in a2 and needs 4+overhead bytes (which includes the 32B alignment) p = (int *) omp_realloc (p, 420, a, a2); // This allocates 420 bytes+overhead in a, with 16B alignment and deallocates the above q = (int *) omp_alloc (sizeof (int), a); // This allocates 4+overhead bytes in a, with 16B alignment q = (int *) omp_realloc (q, 420, a2, a); // This allocates 420+overhead in a2 with 32B alignment q = (int *) omp_realloc (q, 768, a2, a2); // This attempts to reallocate, but as there are elevated alignment // requirements doesn't try to just realloc (even if it wanted to try that // a2 is almost full, with 512-420-overhead bytes left in it), so it // tries to alloc in a2, but there is no space left in the pool, falls // back to a, which already has 420+overhead bytes allocated in it and // 1024-420-overhead bytes left and so fails too and fails to default // non-pool allocator that allocates it, but doesn't guarantee alignment // higher than malloc guarantees. // But, the test expected 16B alignment. So, I've slightly lowered the allocation sizes in that part of the test 420->320 and 768 -> 568, so that the last test still fails to allocate in a2 (568 > 512-320-overhead) but succeeds in a as fallback, which was the intent of the test. Another thing is that alloc-1.F90 seems to be transcription of libgomp.c-c++-common/alloc-1.c into Fortran, but alloc-1.c had: q = (int *) omp_alloc (768, a2); if uintptr_t) q) % 16) != 0) abort (); q[0] = 7; q[767 / sizeof (int)] = 8; r = (int *) omp_alloc (512, a2); if uintptr_t) r) % __alignof (int)) != 0) abort (); there but Fortran has: cq = omp_alloc (768_c_size_t, a2) if (mod (transfer (cq, intptr), 16_c_intptr_t) /= 0) stop 12 call c_f_pointer (cq, q, [768 / c_sizeof (i)]) q(1) = 7 q(768 / c_sizeof (i)) = 8 cr = omp_alloc (512_c_size_t, a2) if (mod (transfer (cr, intptr), 16_c_intptr_t) /= 0) stop 13 I'm changing the latter to 4_c_intptr_t because other spots in the testcase do that, Fortran sadly doesn't have c_alignof, but strictly speaking it isn't correct, __alignof (int) could be on some architectures smaller than 4. So probably alloc-1.F90 etc. should also have ! { dg-additional-sources alloc-7.c } ! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid for Fortran but not for C" } and use get__alignof_int. Tested on powerpc64-linux with -m32 and -m64, will commit after full bootstrap/regtest on x86_64-linux and i686-linux. 2021-10-11 Jakub Jelinek PR libgomp/102628 PR libgomp/102668 * testsuite/libgomp.c-c++-common/alloc-9.c (main): Decrease allocation sizes from 420 to 320 and from 768 to 568. * testsuite/libgomp.fortran/alloc-11.f90: Likewise. * testsuite/libgomp.fortran/alloc-1.F90: Change expected alignment for cr from 16 to 4. --- libgomp/testsuite/libgomp.c-c++-common/alloc-9.c.jj 2021-10-01 10:32:03.030954011 +0200 +++ libgomp/testsuite/libgomp.c-c++-common/alloc-9.c2021-10-11 15:34:07.719040377 +0200 @@ -195,25 +195,25 @@ main () if uintptr_t) p) % 32) != 0) abort (); p[0] = 85; - p = (int *) omp_realloc (p, 420, a, a2); + p = (int *) omp_realloc (p, 320, a, a2); if uintptr_t) p) % 16) != 0 || p[0] != 85) abort (); p[0] = 5; - p[419 / sizeof (int)] = 6; + p[319 / sizeof (int)] = 6; q = (int *) omp_alloc (sizeof (int), a); if uintptr_t) q) % 16) != 0) abort (); q[0] = 43; - q = (int *) omp_realloc (q, 420, a2, a); + q = (int *) omp_realloc (q, 320, a2, a); if uintptr_t) q) % 32) != 0 || q[0] != 43) abort (); q[0] = 44; - q[419 / sizeof (int)] = 8; - q = (int *) omp_realloc (q, 768, a2, a2); + q[319 / sizeof (int)] = 8; + q = (int *) omp_realloc (q, 568, a2, a2); if uintptr_t) q) % 16) != 0 || q[0] != 44) abort (); q[0] = 7; - q[767 / sizeof (int)] = 8; + q[567 / sizeof (int)] = 8; omp_free (p, omp_null_allocator); omp_free (q, a2); omp_destroy_allocator (a2); --- libgomp/testsuite/libgomp.fortran/alloc-11.f90.jj 2021-10-04 10:16:11.013138378 +0200 +++ libgomp/testsuite/libgomp.fortran/alloc-11.f90 2021-10-11 15:51:08.938495429 +0200 @@ -230,32 +230,32 @@ program main if (mod (TRANSFER (p, iptr), 32) /= 0)
Re: [PATCH 07/11] OpenMP: Fix non-zero attach/detach bias for struct dereferences
On Fri, 1 Oct 2021 10:09:05 -0700 Julian Brown wrote: > libgomp/ > * testsuite/libgomp.c++/baseptrs-3.C: Add test (XFAILed for > now). This XFAILed test is addressed in the followup patch: https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581342.html Cheers, Julian
Re: [PATCH 12/13] arm: Convert more load/store MVE builtins to predicate qualifiers
Richard Sandiford via Gcc-patches writes: > Christophe Lyon via Gcc-patches writes: >> This patch covers a few builtins where we do not use the >> iterator and thus we cannot use . >> >> However this introduces a problem for the v2di instructions, because >> there is not predicate for this case. For instance, changing >> STRSBS_P_QUALIFIERS breaks mve_vstrdq_scatter_base_p_v2di. >> Similarly, this patch introduces problems with: >> mve_vldrdq_gather_base_z_v2di >> mve_vldrdq_gather_base_wb_z_v2di >> mve_vldrdq_gather_base_nowb_z_v2di >> mve_vstrdq_scatter_base_wb_p_v2di > > I think we should treat HImode as the predicate mode for V2DI, > alongside the others, even though it isn't a vector boolean mode. > Like you say, it'll end up being too much of a special case otherwise. Actually: couldn't we have a V8BI too? Sorry, I was getting confused with something else and thought that that wouldn't be possible for some reason. V8BI might not be used as much as the other boolean modes, but it still seems like conceptually the right thing to do. Thanks, Richard > > Thanks, > Richard > >> 2021-09-02 Christophe Lyon >> >> gcc/ >> PR target/100757 >> PR target/101325 >> * config/arm/arm-builtins.c (STRSBS_P_QUALIFIERS): Use predicate >> qualifier. >> (STRSBU_P_QUALIFIERS): Likewise. >> (LDRGBS_Z_QUALIFIERS): Likewise. >> (LDRGBU_Z_QUALIFIERS): Likewise. >> (LDRGBWBXU_Z_QUALIFIERS): Likewise. >> (LDRGBWBS_Z_QUALIFIERS): Likewise. >> (LDRGBWBU_Z_QUALIFIERS): Likewise. >> (STRSBWBS_P_QUALIFIERS): Likewise. >> (STRSBWBU_P_QUALIFIERS): Likewise. >> * config/arm/mve.md: Use VxBI instead of HI. >> >> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c >> index 06ff9d2278a..e58580bb828 100644 >> --- a/gcc/config/arm/arm-builtins.c >> +++ b/gcc/config/arm/arm-builtins.c >> @@ -738,13 +738,13 @@ arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >> static enum arm_type_qualifiers >> arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_void, qualifier_unsigned, qualifier_immediate, >> - qualifier_none, qualifier_unsigned}; >> + qualifier_none, qualifier_predicate}; >> #define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers) >> >> static enum arm_type_qualifiers >> arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_void, qualifier_unsigned, qualifier_immediate, >> - qualifier_unsigned, qualifier_unsigned}; >> + qualifier_unsigned, qualifier_predicate}; >> #define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers) >> >> static enum arm_type_qualifiers >> @@ -780,13 +780,13 @@ arm_ldrgbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] >> static enum arm_type_qualifiers >> arm_ldrgbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_none, qualifier_unsigned, qualifier_immediate, >> - qualifier_unsigned}; >> + qualifier_predicate}; >> #define LDRGBS_Z_QUALIFIERS (arm_ldrgbs_z_qualifiers) >> >> static enum arm_type_qualifiers >> arm_ldrgbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, >> - qualifier_unsigned}; >> + qualifier_predicate}; >> #define LDRGBU_Z_QUALIFIERS (arm_ldrgbu_z_qualifiers) >> >> static enum arm_type_qualifiers >> @@ -826,7 +826,7 @@ arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] >> static enum arm_type_qualifiers >> arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, >> - qualifier_unsigned}; >> + qualifier_predicate}; >> #define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers) >> >> static enum arm_type_qualifiers >> @@ -842,13 +842,13 @@ arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] >> static enum arm_type_qualifiers >> arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_none, qualifier_unsigned, qualifier_immediate, >> - qualifier_unsigned}; >> + qualifier_predicate}; >> #define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) >> >> static enum arm_type_qualifiers >> arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, >> - qualifier_unsigned}; >> + qualifier_predicate}; >> #define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) >> >> static enum arm_type_qualifiers >> @@ -864,13 +864,13 @@ arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] >> static enum arm_type_qualifiers >> arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_unsigned, qualifier_unsigned, qualifier_const, >> - qualifier_none, qualifier_unsigned}; >> + qualifier_none, qualifier_predicate}; >> #define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers) >> >> static enum arm_type_qualifiers >> arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] >>= { qualifier_unsigned, qualifier_unsigned, qualifier_const, >> - qualifier_unsigned, qualifier_unsigned}; >> + qualifier_unsi
Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
Prathamesh Kulkarni writes: > On Fri, 8 Oct 2021 at 21:19, Richard Sandiford > wrote: >> >> Thanks for looking at this. >> >> Prathamesh Kulkarni writes: >> > Hi, >> > As mentioned in PR, for the following test-case: >> > >> > typedef unsigned char uint8_t; >> > >> > static inline uint8_t >> > x264_clip_uint8(uint8_t x) >> > { >> > uint8_t t = -x; >> > uint8_t t1 = x & ~63; >> > return (t1 != 0) ? t : x; >> > } >> > >> > void >> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n) >> > { >> > for (int x = 0; x < n*16; x++) >> > dst[x] = x264_clip_uint8(src[x]); >> > } >> > >> > -O3 -mcpu=generic+sve generates following code for the inner loop: >> > >> > .L3: >> > ld1bz0.b, p0/z, [x1, x2] >> > movprfx z2, z0 >> > and z2.b, z2.b, #0xc0 >> > movprfx z1, z0 >> > neg z1.b, p1/m, z0.b >> > cmpeq p2.b, p1/z, z2.b, #0 >> > sel z0.b, p2, z0.b, z1.b >> > st1bz0.b, p0, [x0, x2] >> > add x2, x2, x4 >> > whilelo p0.b, w2, w3 >> > b.any .L3 >> > >> > The sel is redundant since we could conditionally negate z0 based on >> > the predicate >> > comparing z2 with 0. >> > >> > As suggested in the PR, the attached patch, introduces a new >> > conditional internal function .COND_NEG, and in gimple-isel replaces >> > the following sequence: >> >op2 = -op1 >> >op0 = A cmp B >> >lhs = op0 ? op1 : op2 >> > >> > with: >> >op0 = A inverted_cmp B >> >lhs = .COND_NEG (op0, op1, op1). >> > >> > lhs = .COD_NEG (op0, op1, op1) >> > implies >> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false. >> > >> > With patch, it generates the following code-gen: >> > .L3: >> > ld1bz0.b, p0/z, [x1, x2] >> > movprfx z1, z0 >> > and z1.b, z1.b, #0xc0 >> > cmpne p1.b, p2/z, z1.b, #0 >> > neg z0.b, p1/m, z0.b >> > st1bz0.b, p0, [x0, x2] >> > add x2, x2, x4 >> > whilelo p0.b, w2, w3 >> > b.any .L3 >> > >> > While it seems to work for this test-case, I am not entirely sure if >> > the patch is correct. Does it look in the right direction ? >> >> For binary ops we use match.pd rather than isel: >> >> (for uncond_op (UNCOND_BINARY) >> cond_op (COND_BINARY) >> (simplify >> (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3) >> (with { tree op_type = TREE_TYPE (@4); } >>(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), >> op_type) >> && is_truth_type_for (op_type, TREE_TYPE (@0))) >> (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3)) >> (simplify >> (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3))) >> (with { tree op_type = TREE_TYPE (@4); } >>(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), >> op_type) >> && is_truth_type_for (op_type, TREE_TYPE (@0))) >> (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1))) >> >> I think it'd be good to do the same here, using new (UN)COND_UNARY >> iterators. (The iterators will only have one value to start with, >> but other unary ops could get the same treatment in future.) > Thanks for the suggestions. > The attached patch adds a pattern to match.pd to replace: > cond = a cmp b > r = cond ? x : -x > with: > cond = a inverted_cmp b > r = cond ? -x : x > > Code-gen with patch for inner loop: > .L3: > ld1bz0.b, p0/z, [x1, x2] > movprfx z1, z0 > and z1.b, z1.b, #0xc0 > cmpne p1.b, p2/z, z1.b, #0 > neg z0.b, p1/m, z0.b > st1bz0.b, p0, [x0, x2] > add x2, x2, x4 > whilelo p0.b, w2, w3 > b.any .L3 > > Does it look OK ? > I didn't add it under (UN)COND_UNARY since it inverts the comparison, > which we might not want to do for other unary ops ? I think we should follow the structure of the current binary and ternary patterns: cope with unary operations in either arm of the vec_cond and use bit_not for the case in which the unary operation is in the “false” arm of the vec_cond. The bit_not will be folded away if the comparison can be inverted, but it will be left in-place if the comparison can't be inverted (as for some FP comparisons). Thanks, Richard > > Also, I am not sure, how to test if target supports conditional > internal function ? > I tried to use: > (for cmp (tcc_comparison) > icmp (inverted_tcc_comparison) > (simplify > (vec_cond (cmp@2 @0 @1) @3 (negate @3)) >(with { auto op_type = TREE_TYPE (@2); } > (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type) > && is_truth_type_for (op_type, TREE_TYPE (@0))) > (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3) > > but both the conditions seem to fail. > > Thanks, > Prathamesh > > >> >> Richard >> >> >> > >> > Thanks, >> > Prathamesh >> > >> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc >> > index 38e90933c3e..5b0dd3c1993 100644 >> > --- a/
Re: [PATCH v4] attribs: Implement -Wno-attributes=vendor::attr [PR101940]
Ping. On Tue, Sep 28, 2021 at 04:20:46PM -0400, Marek Polacek wrote: > On Thu, Sep 23, 2021 at 02:25:16PM -0400, Jason Merrill wrote: > > On 9/20/21 18:59, Marek Polacek via Gcc-patches wrote: > > > +void > > > +handle_ignored_attributes_option (vec *v) > > > +{ > > > + if (v == nullptr) > > > +return; > > > + > > > + for (auto opt : v) > > > +{ > > > + if (strcmp (opt, "clang") == 0) > > > + { > > > + // TODO > > > + continue; > > > + } > > > > If this doesn't work yet, let's not accept it at all for now. > > Ok. > > > > + char *q = strstr (opt, "::"); > > > + /* We don't accept '::attr'. */ > > > + if (q == nullptr || q == opt) > > > + { > > > + error ("wrong argument to ignored attributes"); > > > + inform (input_location, "valid format is %, %, " > > > + "or %"); > > > > ...or even mention it. Users can ignore clang:: instead, it doesn't matter > > to us if clang attributes are misspelled. > > Removed. > > > > + continue; > > > + } > > > + /* Cut off the vendor part. */ > > > + *q = '\0'; > > > + char *vendor = opt; > > > + char *attr = q + 2; > > > + /* Verify that they look valid. */ > > > + auto valid_p = [](const char *s) { > > > + for (; *s != '\0'; ++s) > > > + if (!ISALNUM (*s) && *s != '_') > > > + return false; > > > + return true; > > > + }; > > > + if (!valid_p (vendor) || !valid_p (attr)) > > > + { > > > + error ("wrong argument to ignored attributes"); > > > + continue; > > > + } > > > + /* Turn "__attr__" into "attr" so that we have a canonical form of > > > + attribute names. Likewise for vendor. */ > > > + auto strip = [](char *&s) { > > > + const size_t l = strlen (s); > > > + if (l > 4 && s[0] == '_' && s[1] == '_' > > > + && s[l - 1] == '_' && s[l - 2] == '_') > > > + { > > > + s[l - 2] = '\0'; > > > + s += 2; > > > + } > > > + }; > > > + strip (attr); > > > + strip (vendor); > > > + /* If we've already seen this vendor::attr, ignore it. Attempting > > > to > > > + register it twice would lead to a crash. */ > > > + if (lookup_scoped_attribute_spec (get_identifier (vendor), > > > + get_identifier (attr))) > > > + continue; > > > + /* In the "vendor::" case, we should ignore *any* attribute coming > > > + from this attribute namespace. */ > > > + const bool ignored_ns = attr[0] == '\0'; > > > > Maybe set attr to nullptr instead of declaring ignored_ns? > > > > > + /* Create a table with extra attributes which we will register. > > > + We can't free it here, so squirrel away the pointers. */ > > > + attribute_spec *table = new attribute_spec[2]; > > > + ignored_attributes_table.safe_push (table); > > > + table[0] = { ignored_ns ? nullptr : attr, 0, 0, false, false, > > > > ...so this can just use attr. > > I also need ignored_ns... > > > > +false, false, nullptr, nullptr }; > > > + table[1] = { nullptr, 0, 0, false, false, false, false, nullptr, > > > nullptr }; > > > + register_scoped_attributes (table, vendor, ignored_ns); > > ...here, but I tweaked this a bit to get rid of the bool. > > > > +} > > > +} > > > + > > > +/* Free data we might have allocated when adding extra attributes. */ > > > + > > > +void > > > +free_attr_data () > > > +{ > > > + for (auto x : ignored_attributes_table) > > > +delete[] x; > > > +} > > > > You probably also want to zero out ignored_attributes_table at this point. > > Done. > > > > /* Initialize attribute tables, and make some sanity checks if checking > > > is > > > enabled. */ > > > @@ -252,6 +353,9 @@ init_attributes (void) > > > /* Put all the GNU attributes into the "gnu" namespace. */ > > > register_scoped_attributes (attribute_tables[i], "gnu"); > > > + vec *ignored = (vec *) flag_ignored_attributes; > > > + handle_ignored_attributes_option (ignored); > > > + > > > invoke_plugin_callbacks (PLUGIN_ATTRIBUTES, NULL); > > > attributes_initialized = true; > > > } > > > @@ -456,6 +560,19 @@ diag_attr_exclusions (tree last_decl, tree node, > > > tree attrname, > > > return found; > > > } > > > +/* Return true iff we should not complain about unknown attributes > > > + coming from the attribute namespace NS. This is the case for > > > + the -Wno-attributes=ns:: command-line option. */ > > > + > > > +static bool > > > +attr_namespace_ignored_p (tree ns) > > > +{ > > > + if (ns == NULL_TREE) > > > +return false; > > > + scoped_attributes *r = find_attribute_namespace (IDENTIFIER_POINTER > > > (ns)); > > > + return r && r->ignored_p; > > > +} > > > + > > > /* Process the attributes listed in ATTRIBUTES and install them in > > > *NODE, > > > which is either a DECL (including a TYPE_DECL) or a TYPE. If a DECL, > > > it should be modified in place; if a TYPE, a copy should be created > > > @@ -556,7 +673,8 @@ decl_a
Re: [PATCH] attribs: Allow optional second arg for attr deprecated [PR102049]
Any thoughts? On Thu, Sep 23, 2021 at 12:16:36PM -0400, Marek Polacek via Gcc-patches wrote: > Clang implements something we don't have: > > __attribute__((deprecated("message", "replacement"))); > > which seems pretty neat so I wrote this patch to add it to gcc. > > It doesn't allow the optional second argument in the standard [[]] > form so as not to clash with possible future standard additions. > > I had hoped we could print a nice fix-it replacement hint, but that > won't be possible until warn_deprecated_use gets something better than > input_location. > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? > > PR c++/102049 > > gcc/c-family/ChangeLog: > > * c-attribs.c (c_common_attribute_table): Increase max_len for > deprecated. > (handle_deprecated_attribute): Allow an optional second argument > in the GNU form of attribute deprecated. > > gcc/c/ChangeLog: > > * c-parser.c (c_parser_std_attribute): Give a diagnostic when > the standard form of an attribute deprecated has a second argument. > > gcc/ChangeLog: > > * doc/extend.texi: Document attribute deprecated with an > optional second argument. > * tree.c (warn_deprecated_use): Print the replacement argument, > if any. > > gcc/testsuite/ChangeLog: > > * gcc.dg/c2x-attr-deprecated-3.c: Adjust dg-error. > * c-c++-common/Wdeprecated-arg-1.c: New test. > --- > gcc/c-family/c-attribs.c | 17 - > gcc/c/c-parser.c | 8 ++ > gcc/doc/extend.texi | 24 ++ > .../c-c++-common/Wdeprecated-arg-1.c | 21 > gcc/testsuite/gcc.dg/c2x-attr-deprecated-3.c | 2 +- > gcc/tree.c| 25 +++ > 6 files changed, 90 insertions(+), 7 deletions(-) > create mode 100644 gcc/testsuite/c-c++-common/Wdeprecated-arg-1.c > > diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c > index 007b928c54b..ef857a9ae2c 100644 > --- a/gcc/c-family/c-attribs.c > +++ b/gcc/c-family/c-attribs.c > @@ -409,7 +409,7 @@ const struct attribute_spec c_common_attribute_table[] = > to prevent its usage in source code. */ >{ "no vops",0, 0, true, false, false, false, > handle_novops_attribute, NULL }, > - { "deprecated", 0, 1, false, false, false, false, > + { "deprecated", 0, 2, false, false, false, false, > handle_deprecated_attribute, NULL }, >{ "unavailable",0, 1, false, false, false, false, > handle_unavailable_attribute, NULL }, > @@ -4107,6 +4107,21 @@ handle_deprecated_attribute (tree *node, tree name, >error ("deprecated message is not a string"); >*no_add_attrs = true; > } > + else if (TREE_CHAIN (args) != NULL_TREE) > +{ > + /* We allow an optional second argument in the GNU form of > + attribute deprecated, which specifies the replacement. */ > + if (flags & ATTR_FLAG_CXX11) > + { > + error ("replacement argument only allowed in GNU attributes"); > + *no_add_attrs = true; > + } > + else if (TREE_CODE (TREE_VALUE (TREE_CHAIN (args))) != STRING_CST) > + { > + error ("replacement argument is not a string"); > + *no_add_attrs = true; > + } > +} > >if (DECL_P (*node)) > { > diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c > index fa29d2c15fc..2b47f01d166 100644 > --- a/gcc/c/c-parser.c > +++ b/gcc/c/c-parser.c > @@ -4952,6 +4952,14 @@ c_parser_std_attribute (c_parser *parser, bool for_tm) > TREE_VALUE (attribute) > = c_parser_attribute_arguments (parser, takes_identifier, > require_string, false); > + if (c_parser_next_token_is (parser, CPP_COMMA) > + && strcmp (IDENTIFIER_POINTER (name), "deprecated") == 0) > + { > + error_at (open_loc, "replacement argument only allowed in " > + "GNU attributes"); > + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL); > + return error_mark_node; > + } >} > else >c_parser_balanced_token_sequence (parser); > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index 9501a60f20e..7d399f4b2bc 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -2860,6 +2860,7 @@ StrongAlias (allocate, alloc); > > @item deprecated > @itemx deprecated (@var{msg}) > +@itemx deprecated (@var{msg}, @var{replacement}) > @cindex @code{deprecated} function attribute > The @code{deprecated} attribute results in a warning if the function > is used anywhere in the source file. This is useful when identifying > @@ -2880,6 +2881,13 @@ results in a warning on line 3 but not line 2. The > optional @var{msg} > argument, which must be a string, is printed in the warning if
Re: [PATCH] rs6000/test: Adjust some cases due to O2 vect [PR102658]
Hi! On Mon, Oct 11, 2021 at 10:47:00AM +0800, Kewen.Lin wrote: > As PR102658 shows, commit r12-4240 enables vectorization at O2, > some cases need to be adjusted accordingly for rs6000 port. > > - For target specific test cases, this adds -fno-tree-vectorize > to retain original test points, otherwise vectorization can > make some expected scalar instructions gone or generate some > unexpected instructions for vector construction. Ah good choice. > - For generic test cases, it follows the existing suggested > practice with necessary target/xfail selector. Not such a great choice. Many of those tests do not make sense with vectorisation enabled. This should have been thought about, in some cases resulting in not running the test with vectorisation enabled, and in some cases duplicating the test, once with and once without vectorisation. But you are just following established practice, so :-) > - struct A1 a = { 0, { 1 } }; // { dg-warning "\\\[-Wstringop-overflow" "" > { target { i?86-*-* x86_64-*-* } } } > + struct A1 a = { 0, { 1 } }; // { dg-warning "\\\[-Wstringop-overflow" "" > { target { i?86-*-* x86_64-*-* powerpc*-*-* } } } I don't know if powerpc*-*-* is the correct choice in all these cases. Sometimes it might have to be powerpc*-*-linux* or similar. We'll find out :-) (An xfail causes XPASS if the test does *not* fail). > +/* Now O2 enables vectorization by default, which generates unexpected float > + conversion for vector construction, so simply disable it. */ It is good to see these comments. I love puzzles, but not in the testsuite! :-) Okay for trunk. Thanks! Segher
Re: [PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set
Hi! On Thu, Aug 26, 2021 at 09:19:30AM +0800, HAO CHEN GUI wrote: > gcc/ > ?? * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin): > ?? Modify the VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP, > ?? VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP expansions. Something mangles your email, giving all those question marks. It is hard to review like this. Don't send patches as replies in threads please. It is impossible to keep track of such things. > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -12159,6 +12159,11 @@ rs6000_gimple_fold_builtin > (gimple_stmt_iterator *gsi) > return true; > /* flavors of vec_min.?? */ > case VSX_BUILTIN_XVMINDP: > +?? case ALTIVEC_BUILTIN_VMINFP: > +?? if (!flag_finite_math_only || flag_signed_zeros) > +?? return false; > +?? /* Fall through to MIN_EXPR.?? */ > +?? gcc_fallthrough (); > case P8V_BUILTIN_VMINSD: > case P8V_BUILTIN_VMINUD: > case ALTIVEC_BUILTIN_VMINSB: Yeah I would rather not review this like this :-) Segher
Re: [RFC] ldist: Recognize rawmemchr loop patterns
On Fri, Sep 17, 2021 at 10:08:27AM +0200, Richard Biener wrote: > On Mon, Sep 13, 2021 at 4:53 PM Stefan Schulze Frielinghaus > wrote: > > > > On Mon, Sep 06, 2021 at 11:56:21AM +0200, Richard Biener wrote: > > > On Fri, Sep 3, 2021 at 10:01 AM Stefan Schulze Frielinghaus > > > wrote: > > > > > > > > On Fri, Aug 20, 2021 at 12:35:58PM +0200, Richard Biener wrote: > > > > [...] > > > > > > > > > > > > > > + /* Handle strlen like loops. */ > > > > > > > + if (store_dr == NULL > > > > > > > + && integer_zerop (pattern) > > > > > > > + && TREE_CODE (reduction_iv.base) == INTEGER_CST > > > > > > > + && TREE_CODE (reduction_iv.step) == INTEGER_CST > > > > > > > + && integer_onep (reduction_iv.step) > > > > > > > + && (types_compatible_p (TREE_TYPE (reduction_var), > > > > > > > size_type_node) > > > > > > > + || TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (reduction_var > > > > > > > +{ > > > > > > > > > > > > > > I wonder what goes wrong with a larger or smaller wrapping IV > > > > > > > type? > > > > > > > The iteration > > > > > > > only stops when you load a NUL and the increments just wrap along > > > > > > > (you're > > > > > > > using the pointer IVs to compute the strlen result). Can't you > > > > > > > simply truncate? > > > > > > > > > > > > I think truncation is enough as long as no overflow occurs in > > > > > > strlen or > > > > > > strlen_using_rawmemchr. > > > > > > > > > > > > > For larger than size_type_node (actually larger than > > > > > > > ptr_type_node would matter > > > > > > > I guess), the argument is that since pointer wrapping would be > > > > > > > undefined anyway > > > > > > > the IV cannot wrap either. Now, the correct check here would > > > > > > > IMHO be > > > > > > > > > > > > > > TYPE_PRECISION (TREE_TYPE (reduction_var)) < TYPE_PRECISION > > > > > > > (ptr_type_node) > > > > > > >|| TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (pointer-iv-var)) > > > > > > > > > > > > > > ? > > > > > > > > > > > > Regarding the implementation which makes use of rawmemchr: > > > > > > > > > > > > We can count at most PTRDIFF_MAX many bytes without an overflow. > > > > > > Thus, > > > > > > the maximal length we can determine of a string where each > > > > > > character has > > > > > > size S is PTRDIFF_MAX / S without an overflow. Since an overflow > > > > > > for > > > > > > ptrdiff type is undefined we have to make sure that if an overflow > > > > > > occurs, then an overflow occurs for reduction variable, too, and > > > > > > that > > > > > > this is undefined, too. However, I'm not sure anymore whether we > > > > > > want > > > > > > to respect overflows in all cases. If TYPE_PRECISION > > > > > > (ptr_type_node) > > > > > > equals TYPE_PRECISION (ptrdiff_type_node) and an overflow occurs, > > > > > > then > > > > > > this would mean that a single string consumes more than half of the > > > > > > virtual addressable memory. At least for architectures where > > > > > > TYPE_PRECISION (ptrdiff_type_node) == 64 holds, I think it is > > > > > > reasonable > > > > > > to neglect the case where computing pointer difference may overflow. > > > > > > Otherwise we are talking about strings with lenghts of multiple > > > > > > pebibytes. For other architectures we might have to be more precise > > > > > > and make sure that reduction variable overflows first and that this > > > > > > is > > > > > > undefined. > > > > > > > > > > > > Thus a conservative condition would be (I assumed that the size of > > > > > > any > > > > > > integral type is a power of two which I'm not sure if this really > > > > > > holds; > > > > > > IIRC the C standard requires only that the alignment is a power of > > > > > > two > > > > > > but not necessarily the size so I might need to change this): > > > > > > > > > > > > /* Compute precision (reduction_var) < (precision (ptrdiff_type) - > > > > > > 1 - log2 (sizeof (load_type)) > > > > > >or in other words return true if reduction variable overflows > > > > > > first > > > > > >and false otherwise. */ > > > > > > > > > > > > static bool > > > > > > reduction_var_overflows_first (tree reduction_var, tree load_type) > > > > > > { > > > > > > unsigned precision_ptrdiff = TYPE_PRECISION (ptrdiff_type_node); > > > > > > unsigned precision_reduction_var = TYPE_PRECISION (TREE_TYPE > > > > > > (reduction_var)); > > > > > > unsigned size_exponent = wi::exact_log2 (wi::to_wide > > > > > > (TYPE_SIZE_UNIT (load_type))); > > > > > > return wi::ltu_p (precision_reduction_var, precision_ptrdiff - 1 > > > > > > - size_exponent); > > > > > > } > > > > > > > > > > > > TYPE_PRECISION (ptrdiff_type_node) == 64 > > > > > > || (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (reduction_var)) > > > > > > && reduction_var_overflows_first (reduction_var, load_type) > > > > > > > > > > > > Regarding the implementation which makes use of strlen: > > > > > > > > > > > > I'm not sure what it means if strlen is called
Re: [PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set
On Mon, Oct 11, 2021 at 10:55:36AM -0500, Segher Boessenkool wrote: > On Thu, Aug 26, 2021 at 09:19:30AM +0800, HAO CHEN GUI wrote: > > gcc/ > > ?? * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin): > > ?? Modify the VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP, > > ?? VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP expansions. > > Something mangles your email, giving all those question marks. It is > hard to review like this. These were non-breaking spaces (u+00a0). Probably caused by format=flowed, the grest destroyer of patches. Segher
Re: *PING* [PATCH] doc: improve -fsanitize=undefined description
Diane Meirowitz via Gcc-patches writes: > Please review my patch. It is tiny. Thank you. Thanks for the patch and sorry for the very slow response. I've now pushed this to master and all active branches. Thanks, Richard > Diane > > On 9/15/21, 5:02 PM, "Diane Meirowitz" wrote: > > > doc: improve -fsanitize=undefined description > > gcc/ChangeLog: > * doc/invoke.texi: add link to UndefinedBehaviorSanitizer > documentation, > mention UBSAN_OPTIONS, similar to what is done for > AddressSanitizer. > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 78cfc100ac2..f022885edf8 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -15200,7 +15200,8 @@ The option cannot be combined with > @option{-fsanitize=thread}. > @opindex fsanitize=undefined > Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector. > Various computations are instrumented to detect undefined behavior > -at runtime. Current suboptions are: > +at runtime. See > @uref{https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html} for more > details. The run-time behavior can be influenced using the > +@env{UBSAN_OPTIONS} environment variable. Current suboptions are: > > @table @gcctabopt
Re: [PATCH] rs6000/test: Adjust some cases due to O2 vect [PR102658]
On 10/11/21 9:30 AM, Segher Boessenkool wrote: Hi! On Mon, Oct 11, 2021 at 10:47:00AM +0800, Kewen.Lin wrote: As PR102658 shows, commit r12-4240 enables vectorization at O2, some cases need to be adjusted accordingly for rs6000 port. - For target specific test cases, this adds -fno-tree-vectorize to retain original test points, otherwise vectorization can make some expected scalar instructions gone or generate some unexpected instructions for vector construction. Ah good choice. - For generic test cases, it follows the existing suggested practice with necessary target/xfail selector. Not such a great choice. Many of those tests do not make sense with vectorisation enabled. This should have been thought about, in some cases resulting in not running the test with vectorisation enabled, and in some cases duplicating the test, once with and once without vectorisation. The tests detect bugs that are present both with and without vetctorization, so they should pass both ways. That they don't tells us that that the warnings need work (they were written with an assumption that doesn't hold anymore). We need to track that work somehow, but simply xfailing them without making a record of what underlying problem the xfails correspond to isn't the best way. In my experience, what works well is opening a bug for each distinct limitation (if one doesn't already exist) and adding a reference to it as a comment to the xfail. But you are just following established practice, so :-) - struct A1 a = { 0, { 1 } }; // { dg-warning "\\\[-Wstringop-overflow" "" { target { i?86-*-* x86_64-*-* } } } + struct A1 a = { 0, { 1 } }; // { dg-warning "\\\[-Wstringop-overflow" "" { target { i?86-*-* x86_64-*-* powerpc*-*-* } } } As I mentioned in the bug, when adding xfails for regressions please be sure to reference the bug that tracks the underlying root cause. There may be multiple problems, and we need to identify what it is in each instance. As the author of the tests I can help with that but not if I'm not in the loop on these changes (it would seem prudent to get the author's thoughts on such sweeping changes to their work). I discussed one of these failures with Hongtao in detail at the time autovectorization was being enabled and made the same request then but I didn't realize the problem was so pervasive. In addition, the target-specific conditionals in the xfails are going to be difficult to maintain. It might be okay for one or two in a single test but for so many we need a better solution than that. If autovectorization is only enabled for a subset of targets then a solution might be to add a new DejagGNU test for it and conditionalize the xfails on it. Martin I don't know if powerpc*-*-* is the correct choice in all these cases. Sometimes it might have to be powerpc*-*-linux* or similar. We'll find out :-) (An xfail causes XPASS if the test does *not* fail). +/* Now O2 enables vectorization by default, which generates unexpected float + conversion for vector construction, so simply disable it. */ It is good to see these comments. I love puzzles, but not in the testsuite! :-) Okay for trunk. Thanks! Segher
Re: [PATCH 1/5]AArch64 sve: combine inverted masks into NOTs
Tamar Christina writes: > Hi, > > Sending a new version of the patch because I noticed the pattern was > overriding the nor pattern. > > A second pattern is needed to capture the nor case as combine will match the > longest sequence first. So without this pattern we end up de-optimizing nor > and instead emit two nots. I did not find a better way to do this. Hmm, that's unfortunate. But yeah, I don't know of a better way of avoiding it either. There's a risk we might need a pattern with the operands swapped as well (so that the (not (reg …)) comes first) but it would be better to avoid that using a new canonicalisation rule if necessary. > Note: This patch series is working incrementally towards generating the most > efficient code for this and other loops in small steps. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-sve.md (*fcm_bic_combine, > *fcm_nor_combine, *fcmuo_bic_combine, > *fcmuo_nor_combine): New. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/pred-not-gen.c-1: New test. > * gcc.target/aarch64/sve/pred-not-gen.c-2: New test. > * gcc.target/aarch64/sve/pred-not-gen.c-3: New test. > * gcc.target/aarch64/sve/pred-not-gen.c-4: New test. OK, thanks. Richard > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > 359fe0e457096cf4042a774789a5c241420703d3..8fe4c721313e70592d2cf0acbfbe2f07b070b51a > 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -8126,6 +8126,160 @@ (define_insn_and_split "*fcmuo_and_combine" > UNSPEC_COND_FCMUO))] > ) > > +;; Similar to *fcm_and_combine, but for BIC rather than AND. > +;; In this case, we still need a separate NOT/BIC operation, but predicating > +;; the comparison on the BIC operand removes the need for a PTRUE. > +(define_insn_and_split "*fcm_bic_combine" > + [(set (match_operand: 0 "register_operand" "=Upa") > + (and: > + (and: > + (not: > + (unspec: > + [(match_operand: 1) > +(const_int SVE_KNOWN_PTRUE) > +(match_operand:SVE_FULL_F 2 "register_operand" "w") > +(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" > "wDz")] > + SVE_COND_FP_CMP_I0)) > + (match_operand: 4 "register_operand" "Upa")) > + (match_dup: 1))) > + (clobber (match_scratch: 5 "=&Upl"))] > + "TARGET_SVE" > + "#" > + "&& 1" > + [(set (match_dup 5) > + (unspec: > + [(match_dup 4) > + (const_int SVE_MAYBE_NOT_PTRUE) > + (match_dup 2) > + (match_dup 3)] > + SVE_COND_FP_CMP_I0)) > + (set (match_dup 0) > + (and: > + (not: > + (match_dup 5)) > + (match_dup 4)))] > +{ > + if (can_create_pseudo_p ()) > +operands[5] = gen_reg_rtx (mode); > +} > +) > + > +;; Make sure that we expand to a nor when the operand 4 of > +;; *fcm_bic_combine is a not. > +(define_insn_and_split "*fcm_nor_combine" > + [(set (match_operand: 0 "register_operand" "=Upa") > + (and: > + (and: > + (not: > + (unspec: > + [(match_operand: 1) > +(const_int SVE_KNOWN_PTRUE) > +(match_operand:SVE_FULL_F 2 "register_operand" "w") > +(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" > "wDz")] > + SVE_COND_FP_CMP_I0)) > + (not: > + (match_operand: 4 "register_operand" "Upa"))) > + (match_dup: 1))) > + (clobber (match_scratch: 5 "=&Upl"))] > + "TARGET_SVE" > + "#" > + "&& 1" > + [(set (match_dup 5) > + (unspec: > + [(match_dup 1) > + (const_int SVE_KNOWN_PTRUE) > + (match_dup 2) > + (match_dup 3)] > + SVE_COND_FP_CMP_I0)) > + (set (match_dup 0) > + (and: > + (and: > + (not: > + (match_dup 5)) > + (not: > + (match_dup 4))) > + (match_dup 1)))] > +{ > + if (can_create_pseudo_p ()) > +operands[5] = gen_reg_rtx (mode); > +} > +) > + > +(define_insn_and_split "*fcmuo_bic_combine" > + [(set (match_operand: 0 "register_operand" "=Upa") > + (and: > + (and: > + (not: > + (unspec: > + [(match_operand: 1) > +(const_int SVE_KNOWN_PTRUE) > +(match_operand:SVE_FULL_F 2 "register_operand" "w") > +(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" > "wDz")] > + UNSPEC_COND_FCMUO)) > + (match_operand: 4 "register_operand" "Upa")) > + (match_dup: 1))) > + (clobber (match_scratch: 5 "=&Upl"))] > + "TARGET_SVE" > + "#" > + "&& 1" > + [(set (match_dup 5) > + (unspec: > + [(match_dup 4) > +
Re: *PING* [PATCH] doc: improve -fsanitize=undefined description
Richard, Thank you! Diane > On Oct 11, 2021, at 12:15 PM, Richard Sandiford > wrote: > > Diane Meirowitz via Gcc-patches writes: >> Please review my patch. It is tiny. Thank you. > > Thanks for the patch and sorry for the very slow response. > I've now pushed this to master and all active branches. > > Thanks, > Richard > >> Diane >> >> On 9/15/21, 5:02 PM, "Diane Meirowitz" wrote: >> >> >>doc: improve -fsanitize=undefined description >> >>gcc/ChangeLog: >>* doc/invoke.texi: add link to UndefinedBehaviorSanitizer >> documentation, >>mention UBSAN_OPTIONS, similar to what is done for >> AddressSanitizer. >> >>diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi >>index 78cfc100ac2..f022885edf8 100644 >>--- a/gcc/doc/invoke.texi >>+++ b/gcc/doc/invoke.texi >>@@ -15200,7 +15200,8 @@ The option cannot be combined with >> @option{-fsanitize=thread}. >>@opindex fsanitize=undefined >>Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector. >>Various computations are instrumented to detect undefined behavior >>-at runtime. Current suboptions are: >>+at runtime. See >> @uref{https://urldefense.com/v3/__https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html__;!!ACWV5N9M2RV99hQ!dN9bn2CtYUf6VFb3UO4i75Zzr6HpusAphlsd6n2QvKtF42rMqfoQhVZ3ZGKy-lviZid-$ >> } for more details. The run-time behavior can be influenced using the >>+@env{UBSAN_OPTIONS} environment variable. Current suboptions are: >> >>@table @gcctabopt
Re: [PATCH v3 1/6] rs6000: Support SSE4.1 "round" intrinsics
On Mon, Oct 11, 2021 at 08:46:17AM -0500, Paul A. Clarke wrote: > On Fri, Oct 08, 2021 at 05:31:11PM -0500, Segher Boessenkool wrote: > > "asm volatile" means there is a side effect in the asm. This means that > > it has to be executed on the real machine the same as on the abstract > > machine, with the side effects in the same order. > > > > It can still be reordered, modulo those restrictions. It can be merged > > with an identical asm as well. And the compiler can split this into two > > identical asms on two paths. > > It seems odd to me that the compiler can make any assumptions about > the side-effect(s). How does it know that a side-effect does not alter > computation (as it indeed does in this case), such that reordering is > a still correct (which it wouldn't be in this case)? Because by definition side effects do not change the computation (where "computation" means "the outputs of the asm")! And if you are talking about changing future computations, as floating point control flags can be used for: this falls ouside of the C abstract machine, other than fe[gs]etround etc. > > > With respect to volatile, I worry about removing it, because I do > > > indeed need that instruction to execute in order to clear the FPSCR > > > exception enable bits. That side-effect is not otherwise known to the > > > compiler. > > > > Yes. But as said above, volatile isn't enough to get this to behave > > correctly. > > > > The easiest way out is to write this all in one piece of (inline) asm. > > Ugh. I really don't want to go there, not just because it's work, but > I think this is a paradigm that should work without needing to drop > fully into asm. Yes. Let's say GCC still has some challenges here :-( > Is there something unique about using an "asm" statement versus using, > say, a builtin like __builtin_mtfsf or a hypothetical __builtin_mffsce? Nope. > Very similar methods are used in glibc today. Are those broken? Maybe. If you get a real (i.e. not inline) function call there, that can save you often. > Would creating a __builtin_mffsce be another solution? Yes. And not a bad idea in the first place. > Would adding memory barriers between the FPSCR manipulations and the > code which is bracketed by them be sufficient? No, what you want to order is not memory accesses, but FP computations relative to the insns that change the FP control bits. If *both* of those change memory you can artificially order them with that. But most FP computations do not access memory. Segher
Re: [PATCH 2/5]AArch64 sve: combine nested if predicates
Tamar Christina writes: >> > Note: This patch series is working incrementally towards generating the >> most >> > efficient code for this and other loops in small steps. >> >> It looks like this could be done in the vectoriser via an extension of the >> scalar_cond_masked_set mechanism. We have: >> >> mask__54.13_59 = vect_a_15.9_55 > vect_b_17.12_58; >> vec_mask_and_60 = loop_mask_32 & mask__54.13_59; >> … >> mask__30.17_67 = vect_a_15.9_55 > vect_cst__66; >> mask__29.18_68 = mask__54.13_59 & mask__30.17_67; >> vec_mask_and_69 = loop_mask_32 & mask__29.18_68; >> >> When vectorising mask__29.18_68, we could test whether each side of the >> "&" is already in scalar_cond_masked_set and AND in the loop mask if so, like >> we do in vectorizable_condition. We could then separately record that the & >> result includes the loop mask. > > When never a mask is being generated from an BIT_AND we mask the operands of > the and instead and then just AND the result. > > This allows us to be able to CSE the masks and generate the right combination. > However because re-assoc will try to re-order the masks in the & we have to > now > perform a small local CSE on the vectorized loop is vectorization is > successful. > > Note: This patch series is working incrementally towards generating the most > efficient code for this and other loops in small steps. > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no > issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * tree-vectorizer.c (vectorize_loops): Do local CSE through RPVN upon > successful vectorization. > * tree-vect-stmts.c (prepare_load_store_mask): When combining two masks > mask the operands instead of the combined operation. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/pred-combine-and.c: New test. > > --- inline copy of patch --- > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c > b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c > new file mode 100644 > index > ..d395b7f84bb15b588493611df5a47549726ac24a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c > @@ -0,0 +1,18 @@ > +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ > +/* { dg-options "-O3 --save-temps" } */ > + > +void f5(float * restrict z0, float * restrict z1, float *restrict x, float * > restrict y, float c, int n) > +{ > +for (int i = 0; i < n; i++) { > +float a = x[i]; > +float b = y[i]; > +if (a > b) { > +z0[i] = a + b; > +if (a > c) { > +z1[i] = a - b; > +} > +} > +} > +} > + > +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, > z[0-9]+\.s, z[0-9]+\.s} 2 } } */ > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index > 4e0b2adf1dc2404bc345af30cfeb9c819084894e..717a25f46aa72534eebeb382c92b9145d7d44d04 > 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -1799,6 +1799,19 @@ prepare_load_store_mask (tree mask_type, tree > loop_mask, tree vec_mask, > return vec_mask; > >gcc_assert (TREE_TYPE (loop_mask) == mask_type); > + > + /* Check if the mask is a combination of two different masks. */ > + gimple *def_stmt = SSA_NAME_DEF_STMT (vec_mask); > + if (is_gimple_assign (def_stmt) > + && gimple_assign_rhs_code (def_stmt) == BIT_AND_EXPR) > +{ > + tree lhs1 = gimple_assign_rhs1 (def_stmt); > + tree lhs2 = gimple_assign_rhs2 (def_stmt); > + > + vec_mask = prepare_load_store_mask (mask_type, loop_mask, lhs1, gsi); > + loop_mask = prepare_load_store_mask (mask_type, loop_mask, lhs2, gsi); > +} > + I think this is doing something different from what I suggested above. I was thinking that we should do this when vectorising the AND itself (mask__29.18_68 in the example above), using scalar_cond_masked_set to check whether either side is or is going to be ANDed with the loop mask. That way we never generate more loop masks than we need to, whereas the above version could. Thanks, Richard
[PATCH] libstdc++: Check [ptr,end) and [ptr,ptr+n) ranges with _GLIBCXX_ASSERTIONS
This enables lightweight checks for the __glibcxx_requires_valid_range and __glibcxx_requires_string_len macros when _GLIBCXX_ASSERTIONS is defined. By using __builtin_object_size we can check whether the end of the range is part of the same object as the start of the range, and detect problems like in PR 89927. libstdc++-v3/ChangeLog: * include/debug/debug.h (__valid_range_p, __valid_range_n): New inline functions using __builtin_object_size to check ranges delimited by pointers. [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use __valid_range_p. [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use __valid_range_n. The first patch allows us to detect bugs like string("foo", "bar"), like in PR 89927. Debug mode cannot currently detect this. The new check uses the compiler built-in to detect when the two arguments are not part of the same object. This assumes we're optimizing and the compiler knows the values of the pointers. If it doesn't, then the function just returns true and should inline to nothing. I would like to also enable that for Debug Mode, otherwise we have checks that work for _GLIBCXX_ASSERTIONS but not for _GLIBCXX_DEBUG. I tried to make that work with the second patch attached to this mail, but it doesn't abort for the example in PR 89927. I think puttingthe checks inside the "real" debug checking functions is too many levels of inlining and the compiler "forgets" the pointer values. I think the first patch is worth committing. It should add no overhead for optimized builds, and diagnoses some bugs that we do not diagnose today. I'm less sure about the second, since it doesn't actually help. Maybe the second one should wait for Siddhesh's __builtin_dynamic_object_size to land on trunk. Taking this idea further, we could do something similar for __glibcxx_requires_string, which is currently almost useless (it only checks if the pointer is null) but could be changed to use __valid_range_n(_String, char_traits<...>::length(_String)) so that we can diagnose non-null terminated strings (because the length that char-traits would give us would be larger than the size that __builtin_object_size would give us). Thoughts? commit b008cc08c6b05e32c896ed6e5a3e289ccf8f3c91 Author: Jonathan Wakely Date: Mon Oct 11 15:58:43 2021 libstdc++: Check [ptr,end) and [ptr,ptr+n) ranges with _GLIBCXX_ASSERTIONS This enables lightweight checks for the __glibcxx_requires_valid_range and __glibcxx_requires_string_len macros when _GLIBCXX_ASSERTIONS is defined. By using __builtin_object_size we can check whether the end of the range is part of the same object as the start of the range, and detect problems like in PR 89927. libstdc++-v3/ChangeLog: * include/debug/debug.h (__valid_range_p, __valid_range_n): New inline functions using __builtin_object_size to check ranges delimited by pointers. [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use __valid_range_p. [_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use __valid_range_n. diff --git a/libstdc++-v3/include/debug/debug.h b/libstdc++-v3/include/debug/debug.h index 116f2f023e2..1db5aa34c55 100644 --- a/libstdc++-v3/include/debug/debug.h +++ b/libstdc++-v3/include/debug/debug.h @@ -59,12 +59,46 @@ namespace __gnu_debug template struct _Safe_iterator; + +#ifdef _GLIBCXX_ASSERTIONS + template +__attribute__((__always_inline__)) +_GLIBCXX14_CONSTEXPR inline bool +__valid_range_p(_Tp* __first, _Tp* __last) _GLIBCXX_NOEXCEPT +{ + __UINTPTR_TYPE__ __f = (__UINTPTR_TYPE__)__first; + __UINTPTR_TYPE__ __l = (__UINTPTR_TYPE__)__last; + if (const std::size_t __sz = __builtin_object_size(__first, 3)) + return __f <= __l && (__l - __f) <= __sz; + return true; +} + +#ifndef _GLIBCXX_DEBUG + // __glibcxx_requires_valid_range uses this overload for non-pointers. + template +__attribute__((__always_inline__)) +_GLIBCXX14_CONSTEXPR inline bool +__valid_range_p(_Tp, _Tp) _GLIBCXX_NOEXCEPT +{ return true; } +#endif + + template +_GLIBCXX14_CONSTEXPR __attribute__((__always_inline__)) +inline bool +__valid_range_n(_Tp* __first, std::size_t __n) _GLIBCXX_NOEXCEPT +{ + if (const std::size_t __sz = __builtin_object_size(__first, 3)) + return __n <= __sz; + return true; +} +#endif } #ifndef _GLIBCXX_DEBUG # define __glibcxx_requires_cond(_Cond,_Msg) -# define __glibcxx_requires_valid_range(_First,_Last) +# define __glibcxx_requires_valid_range(_First,_Last) \ + __glibcxx_assert(__gnu_debug::__valid_range_p(_First, _Last)) # define __glibcxx_requires_can_increment(_First,_Size) # define __glibcxx_requires_can_increment_range(_First1,_Last1,_First2) # define __glibcxx_requires_can_decrement_range(_First1,_Last1,_First2) @@ -79,7 +113,8 @@ namesp