[PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
Hi, Add missing macro under O0 and adjust macro format for scalf intrinsics. Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}. Ok for master and backport to GCC 9/10/11? gcc/ChangeLog: PR target/105339 * config/i386/avx512fintrin.h (_mm512_scalef_round_pd): Add parentheses for parameters and djust format. (_mm512_mask_scalef_round_pd): Ditto. (_mm512_maskz_scalef_round_pd): Ditto. (_mm512_scalef_round_ps): Ditto. (_mm512_mask_scalef_round_ps): Ditto. (_mm512_maskz_scalef_round_ps): Ditto. (_mm_scalef_round_sd): Use _mm_undefined_pd. (_mm_scalef_round_ss): Use _mm_undefined_ps. (_mm_mask_scalef_round_sd): New macro. (_mm_mask_scalef_round_ss): Ditto. (_mm_maskz_scalef_round_sd): Ditto. (_mm_maskz_scalef_round_ss): Ditto. --- gcc/config/i386/avx512fintrin.h | 76 - 1 file changed, 56 insertions(+), 20 deletions(-) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 29511fd2831..6dc69ff0234 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) (__mmask8) __U, __R); } #else -#define _mm512_scalef_round_pd(A, B, C)\ -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) - -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) - -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) +#define _mm512_scalef_round_pd(A, B, C) \ + ((__m512d) \ + __builtin_ia32_scalefpd512_mask((A), (B), \ + (__v8df) _mm512_undefined_pd(), \ + -1, (C))) + +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) + +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ + ((__m512d) \ + __builtin_ia32_scalefpd512_mask((A), (B), \ + (__v8df) _mm512_setzero_pd(),\ + (U), (C))) + +#define _mm512_scalef_round_ps(A, B, C) \ + ((__m512)\ + __builtin_ia32_scalefps512_mask((A), (B), \ + (__v16sf) _mm512_undefined_ps(), \ + -1, (C))) + +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) + +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ + ((__m512)\ + __builtin_ia32_scalefps512_mask((A), (B), \ + (__v16sf) _mm512_setzero_ps(), \ + (U), (C))) + +#define _mm_scalef_round_sd(A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), \ + (__v2df) _mm_undefined_pd (),\ + -1, (C))) -#define _mm512_scalef_round_ps(A, B, C)\ -(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) +#define _mm_scalef_round_ss(A, B, C) \ + ((__m128)\ + __builtin_ia32_scalefss_mask_round ((A), (B), \ + (__v4sf) _mm_undefined_ps (),\ + -1, (C))) -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ -(__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) +#define _mm_mask_scalef_round_sd(W, U, A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ -(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) +#define _mm_mask_scalef_round_ss(W, U, A, B, C) \ + ((__m128)\ + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
Re: [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches wrote: > > Hi, > > Add missing macro under O0 and adjust macro format for scalf > intrinsics. > Please add the corresponding intrinsic test in sse-14.c. > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}. > > Ok for master and backport to GCC 9/10/11? > > gcc/ChangeLog: > > PR target/105339 > * config/i386/avx512fintrin.h (_mm512_scalef_round_pd): > Add parentheses for parameters and djust format. > (_mm512_mask_scalef_round_pd): Ditto. > (_mm512_maskz_scalef_round_pd): Ditto. > (_mm512_scalef_round_ps): Ditto. > (_mm512_mask_scalef_round_ps): Ditto. > (_mm512_maskz_scalef_round_ps): Ditto. > (_mm_scalef_round_sd): Use _mm_undefined_pd. > (_mm_scalef_round_ss): Use _mm_undefined_ps. > (_mm_mask_scalef_round_sd): New macro. > (_mm_mask_scalef_round_ss): Ditto. > (_mm_maskz_scalef_round_sd): Ditto. > (_mm_maskz_scalef_round_ss): Ditto. > --- > gcc/config/i386/avx512fintrin.h | 76 - > 1 file changed, 56 insertions(+), 20 deletions(-) > > diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h > index 29511fd2831..6dc69ff0234 100644 > --- a/gcc/config/i386/avx512fintrin.h > +++ b/gcc/config/i386/avx512fintrin.h > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, > __m128 __B, const int __R) > (__mmask8) __U, __R); > } > #else > -#define _mm512_scalef_round_pd(A, B, C)\ > -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, > (__v8df)_mm512_undefined_pd(), -1, C) > - > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) > - > -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, > (__v8df)_mm512_setzero_pd(), U, C) > +#define _mm512_scalef_round_pd(A, B, C) > \ > + ((__m512d) \ > + __builtin_ia32_scalefpd512_mask((A), (B), \ > + (__v8df) _mm512_undefined_pd(), \ > + -1, (C))) > + > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) > + > +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > + ((__m512d) \ > + __builtin_ia32_scalefpd512_mask((A), (B), \ > + (__v8df) _mm512_setzero_pd(),\ > + (U), (C))) > + > +#define _mm512_scalef_round_ps(A, B, C) > \ > + ((__m512)\ > + __builtin_ia32_scalefps512_mask((A), (B), \ > + (__v16sf) _mm512_undefined_ps(), \ > + -1, (C))) > + > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ > + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) > + > +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ > + ((__m512)\ > + __builtin_ia32_scalefps512_mask((A), (B), \ > + (__v16sf) _mm512_setzero_ps(), \ > + (U), (C))) > + > +#define _mm_scalef_round_sd(A, B, C) \ > + ((__m128d) \ > + __builtin_ia32_scalefsd_mask_round ((A), (B), \ > + (__v2df) _mm_undefined_pd (),\ > + -1, (C))) > > -#define _mm512_scalef_round_ps(A, B, C)\ > -(__m512)__builtin_ia32_scalefps512_mask(A, B, > (__v16sf)_mm512_undefined_ps(), -1, C) > +#define _mm_scalef_round_ss(A, B, C) \ > + ((__m128)\ > + __builtin_ia32_scalefss_mask_round ((A), (B), \ > + (__v4sf) _mm_undefined_ps (),\ > + -1, (C))) > > -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ > -(__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) > +#define _mm_mask_scalef_round_sd(W, U, A, B, C) > \ > + ((__m128d) \ > + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) > > -#define _mm512_maskz_scalef_round_ps(U, A,
[PATCH][v4] rtl-optimization/105231 - distribute_notes and REG_EH_REGION
I was still unhappy with the previous patch and indeed, re-thinking all the special casing I put in there I found a hole with respect to externally throwing stmts which I totally forgot about and which might end up in must-not-throw regions after the previous patch. Fortunately all of the complicated situations only arise with non-call exceptions and when there's a REG_EH_REGION note to distribute. So first of all the new patch makes that explicit and does not affect the not non-call EH path (apart from the new assert in distribute_notes). It also does not affect the non-call EH path when there is no REG_EH_REGION on any of the insns. I resisted trying to be clever with lp_nr == 0 or INT_MIN (the nothrow notes that we could in theory just drop), but I put in an extra check in case we have a REG_EH_REGION note on an insn that cannot ever throw (and drop those at distribute_notes time). The patch preserves the main part of the very original patch, that we only ever place the REG_EH_REGION on i3. The split precondition should make sure that i2 never throws (but no assert since the trigger happy may_trap_p might be confused by some "optimization" done on the split part later). The hunk at the start of try_combine makes sure that we only have a single REG_EH_REGION note to distribute which ends up on i3 which accumulates all possibly throwing side-effects thanks to the split precondition. Bootstrapped and tested on x86_64-unknown-linux-gnu. OK for trunk? Thanks, Richard. --- The following mitigates a problem in combine distribute_notes which places an original REG_EH_REGION based on only may_trap_p which is good to test whether a non-call insn can possibly throw but not if actually it does or we care. That's something we decided at RTL expansion time where we possibly still know the insn evaluates to a constant. In fact, the REG_EH_REGION note with lp > 0 can only come from the original i3 and an assert is added to that effect. That means we only need to retain the note on i3 or, if that cannot trap, drop it but we should never move it to i2. The following places constraints on the insns to combine with non-call exceptions since we cannot handle the case where we have more than one EH side-effect in the IL. The patch also makes sure we can accumulate that on i3 and do not split a possible exception raising part of it to i2. As a special case we do not place any restriction on all externally throwing insns when there is no REG_EH_REGION present. 2022-04-22 Richard Biener PR rtl-optimization/105231 * combine.cc (distribute_notes): Assert that a REG_EH_REGION with landing pad > 0 is from i3. Put any REG_EH_REGION note on i3 or drop it if the insn can not trap. (try_combine): Ensure that we can merge REG_EH_REGION notes with non-call exceptions. Ensure we are not splitting a trapping part of an insn with non-call exceptions when there is any REG_EH_REGION note to preserve. * gcc.dg/torture/pr105231.c: New testcase. --- gcc/combine.cc | 81 +++-- gcc/testsuite/gcc.dg/torture/pr105231.c | 15 + 2 files changed, 78 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr105231.c diff --git a/gcc/combine.cc b/gcc/combine.cc index 53dcac92abc..dede573982a 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -2569,6 +2569,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, rtx new_other_notes; int i; scalar_int_mode dest_mode, temp_mode; + bool nce_any_eh_region_note = false; /* Immediately return if any of I0,I1,I2 are the same insn (I3 can never be). */ @@ -2951,6 +2952,32 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, return 0; } + /* With non-call exceptions we can end up trying to combine multiple + stmts with possible EH side effects. Make sure we can combine + that to a single stmt which means there must be at most one insn + in the combination with an EH side effect. */ + if (cfun->can_throw_non_call_exceptions) +{ + if (find_reg_note (i3, REG_EH_REGION, NULL_RTX) + || find_reg_note (i2, REG_EH_REGION, NULL_RTX) + || (i1 && find_reg_note (i1, REG_EH_REGION, NULL_RTX)) + || (i0 && find_reg_note (i0, REG_EH_REGION, NULL_RTX))) + { + nce_any_eh_region_note = true; + if (insn_could_throw_p (i3) + + insn_could_throw_p (i2) + + (i1 ? insn_could_throw_p (i1) : 0) + + (i0 ? insn_could_throw_p (i0) : 0) > 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Can't combine multiple insns with EH " +"side-effects\n"); + undo_all (); + return 0; + } + } +} + /* Record whether i2 and i3 are trivial moves. */ i2_was_move = is_just_move
Re: [PATCH] Add condition coverage profiling
On 22/04/2022 07:37, Sebastian Huber wrote: > > > On 17/04/2022 13:27, Jørgen Kvalsvik wrote: >>> In theory, would it be possible to print the state of the truth table with >>> the >>> information available in the gcda and gcno files? For example: >>> >>> Truth table for: a && (b || c)) && d >>> >>> 0 | 1 | 2 | 3 || covered >>> --+---+---+---++ >>> 0 | X | X | X || Y >>> 0 | X | X | X || Y >>> 0 | X | X | X || Y >>> 0 | X | X | X || Y >>> 0 | X | X | X || Y >>> 0 | X | X | X || Y >>> 0 | X | X | X || Y >>> 0 | X | X | X || Y >>> 1 | 0 | 0 | X || N >>> 1 | 0 | 0 | X || N >>> 1 | 0 | 1 | 0 || N >>> 1 | 0 | 1 | 1 || N >>> 1 | 1 | X | 0 || Y >>> 1 | 1 | X | 0 || Y >>> 1 | 1 | X | 1 || Y >>> 1 | 1 | X | 1 || Y >> Maybe? We would at least need to store the masking tables too, which right >> now >> are implicitly stored as in the instrumentation. It's not too bad, but it >> probably means the two functions should return some richer structure, which >> in >> turn means a little bit of redesign. Computing the truth table itself >> shouldn't >> be difficult. > > Using the tool in the context of safety-critical application would normally > require also a tool qualification. For GCC, this is a bit unrealistic. It > would > help if the tool output can be verified. Being able to inspect the masking > tables could help a reviewer to check what the tool did for a sample set of > inputs. > It would be useful for the developer too. Recording the masking vectors isn't hard (they have to be computed after all), maybe it as opt-in behind a flag?
[PATCH v2] fortran: Detect duplicate unlimited polymorphic types [PR103662]
Le 21/04/2022 à 23:14, Mikael Morin a écrit : Hello, this is a fix for PR103662, a TBAA issue with unlimited polymorphic types. I attached a draft patch to the PR which was accumulating all unlimited polymorphic symbols to a single namespace, avoiding duplicate symbols and thus eliminating the problem. After reviewing the code more in detail, I was afraid that some symbols could still end up in the local namespace, and that the problem would remain for them after all. Despite not being able to generate a testcase where it happened, I decided to produce a patch based on Jakub’s analysis in the PR audit trail, as that way supports duplicates by design. On top of Jakub’s patch, there are a couple more types registrations just in case (they handle duplicates so that’s fine), and the type comparison fix that he was too fortran-uncomfortable to do. The testcase had to be fixed as we found out in the PR audit trail. Regression tested on x86_64-pc-linux-gnu. OK for master? Mikael I have read Jakub’s analysis again, and it says the type registration is useless for unlimited polymorphic fake symbols, as they are all translated as ptr_type_node. So it can be dropped, which brings this v2 patch closer to Jakub’s original. Regression tested again. OK?From e53ecc979ec2a798626eb94c60d18b015d6f52e5 Mon Sep 17 00:00:00 2001 From: Mikael Morin Date: Wed, 20 Apr 2022 12:04:38 +0200 Subject: [PATCH v2] fortran: Detect duplicate unlimited polymorphic types [PR103662] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a type-based alias analysis issue with unlimited polymorphic class descriptors (types behind class(*)) causing data initialisation to be removed by optimization. The fortran front-end may create multiple declarations for types, for example if a type is redeclared in each program unit it is used in. To avoid optimization seeing them as non-aliasing, a list of derived types is created at resolution time, and used at translation to set the same TYPE_CANONICAL type for each duplicate type declaration. This mechanism didn’t work for unlimited polymorphic descriptors types, as there is a short-circuit return skipping all the resolution handling for them, including the type registration. This change adds type registration at the short-circuit return, and updates type comparison to handle specifically unlimited polymorphic fake symbols, class descriptor types and virtual table types. The test, which exhibited mismatching dynamic types had to be fixed as well. PR fortran/103662 gcc/fortran/ChangeLog: * interface.cc (gfc_compare_derived_types): Support comparing unlimited polymorphic fake symbols. Recursively compare class descriptor types and virtual table types. * resolve.cc (resolve_fl_derived): Add type to the types list on unlimited polymorphic short-circuit return. gcc/testsuite/ChangeLog: * gfortran.dg/unlimited_polymorphic_3.f03 (foo): Separate bind(c) and sequence checks to... (foo_bc, foo_sq): ... two different procedures. (main, foo*): Change type declarations so that type name, component name, and either bind(c) or sequence attribute match between the main type declarations and the procedure type declarations. (toplevel): Add optimization dump checks. Co-Authored-By: Jakub Jelinek --- gcc/fortran/interface.cc | 19 +-- gcc/fortran/resolve.cc| 5 +- .../gfortran.dg/unlimited_polymorphic_3.f03 | 56 +-- 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/gcc/fortran/interface.cc b/gcc/fortran/interface.cc index 000a530cba4..7ed6e13711f 100644 --- a/gcc/fortran/interface.cc +++ b/gcc/fortran/interface.cc @@ -618,6 +618,14 @@ gfc_compare_derived_types (gfc_symbol *derived1, gfc_symbol *derived2) if (!derived1 || !derived2) gfc_internal_error ("gfc_compare_derived_types: invalid derived type"); + if (derived1->attr.unlimited_polymorphic + && derived2->attr.unlimited_polymorphic) +return true; + + if (derived1->attr.unlimited_polymorphic + != derived2->attr.unlimited_polymorphic) +return false; + /* Compare UNION types specially. */ if (derived1->attr.flavor == FL_UNION || derived2->attr.flavor == FL_UNION) return compare_union_types (derived1, derived2); @@ -630,10 +638,11 @@ gfc_compare_derived_types (gfc_symbol *derived1, gfc_symbol *derived2) && strcmp (derived1->module, derived2->module) == 0) return true; - /* Compare type via the rules of the standard. Both types must have - the SEQUENCE or BIND(C) attribute to be equal. STRUCTUREs are special - because they can be anonymous; therefore two structures with different - names may be equal. */ + /* Compare type via the rules of the standard. Both types must have the + SEQUENCE or BIND(C) attribute to be equal. We also compare types + recursively if they are class descriptors types or virtual tables types. +
*PING* [PATCH 0/4] Use pointer arithmetic for array references [PR102043]
Ping for the four patches starting at https://gcc.gnu.org/pipermail/fortran/2022-April/057759.html : https://gcc.gnu.org/pipermail/fortran/2022-April/057757.html https://gcc.gnu.org/pipermail/fortran/2022-April/057760.html https://gcc.gnu.org/pipermail/fortran/2022-April/057758.html https://gcc.gnu.org/pipermail/fortran/2022-April/057761.html Richi accepted the general direction and the middle-end interaction. I need a fortran frontend ack as well.
[PATCH] i386: Fix up ix86_gimplify_va_arg [PR105331]
Hi! On the following testcase we emit a bogus 'va_arg_tmp.5' may be used uninitialized warning. The reason is that when gimplifying the addr = &temp; statement, the va_arg_tmp temporary var for which we emit ADDR_EXPR is not TREE_ADDRESSABLE, prepare_gimple_addressable emits some extra code to initialize the newly addressable var from its previous value, but it is a new variable which hasn't been initialized yet and will be later, so we end up initializing it with uninitialized SSA_NAME: va_arg_tmp.6 = va_arg_tmp.5_14(D); addr.2_16 = &va_arg_tmp.6; _17 = MEM[(double *)sse_addr.4_13]; MEM[(double * {ref-all})addr.2_16] = _17; and with -O1 we actually don't DSE it before the warning is emitted. If we make the temp TREE_ADDRESSABLE before the gimplification, then this prepare_gimple_addressable path isn't taken and we effectively omit the first statement above and so the bogus warning is gone. I went through other backends and didn't find another instance of this problem. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-04-22 Jakub Jelinek PR target/105331 * config/i386/i386.cc (ix86_gimplify_va_arg): Mark va_arg_tmp temporary TREE_ADDRESSABLE before trying to gimplify ADDR_EXPR of it. * gcc.dg/pr105331.c: New test. --- gcc/config/i386/i386.cc.jj 2022-04-12 09:20:07.52842 +0200 +++ gcc/config/i386/i386.cc 2022-04-21 12:03:32.201951522 +0200 @@ -4891,6 +4891,7 @@ ix86_gimplify_va_arg (tree valist, tree { int i, prev_size = 0; tree temp = create_tmp_var (type, "va_arg_tmp"); + TREE_ADDRESSABLE (temp) = 1; /* addr = &temp; */ t = build1 (ADDR_EXPR, build_pointer_type (type), temp); --- gcc/testsuite/gcc.dg/pr105331.c.jj 2022-04-21 12:09:34.398906718 +0200 +++ gcc/testsuite/gcc.dg/pr105331.c 2022-04-21 12:09:07.304283903 +0200 @@ -0,0 +1,11 @@ +/* PR target/105331 */ +/* { dg-do compile } */ +/* { dg-options "-O -Wuninitialized" } */ + +#include + +int +foo (va_list *va) +{ + return va_arg (*va, double _Complex);/* { dg-bogus "may be used uninitialized" } */ +} Jakub
[PATCH] rtlanal: Fix up replace_rtx [PR105333]
Hi! The following testcase FAILs, because replace_rtx replaces a REG with CONST_WIDE_INT inside of a SUBREG, which is an invalid transformation because a SUBREG relies on SUBREG_REG having non-VOIDmode but CONST_WIDE_INT has VOIDmode. replace_rtx already has code to deal with it, but it was doing it only for CONST_INTs. The following patch does it also for VOIDmode CONST_DOUBLE or CONST_WIDE_INT. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-04-22 Jakub Jelinek PR rtl-optimization/105333 * rtlanal.cc (replace_rtx): Use simplify_subreg or simplify_unary_operation if CONST_SCALAR_INT_P rather than just CONST_INT_P. * gcc.dg/pr105333.c: New test. --- gcc/rtlanal.cc.jj 2022-02-08 20:08:03.912540713 +0100 +++ gcc/rtlanal.cc 2022-04-21 15:45:23.219769629 +0200 @@ -3390,7 +3390,7 @@ replace_rtx (rtx x, rtx from, rtx to, bo { rtx new_rtx = replace_rtx (SUBREG_REG (x), from, to, all_regs); - if (CONST_INT_P (new_rtx)) + if (CONST_SCALAR_INT_P (new_rtx)) { x = simplify_subreg (GET_MODE (x), new_rtx, GET_MODE (SUBREG_REG (x)), @@ -3406,7 +3406,7 @@ replace_rtx (rtx x, rtx from, rtx to, bo { rtx new_rtx = replace_rtx (XEXP (x, 0), from, to, all_regs); - if (CONST_INT_P (new_rtx)) + if (CONST_SCALAR_INT_P (new_rtx)) { x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x), new_rtx, GET_MODE (XEXP (x, 0))); --- gcc/testsuite/gcc.dg/pr105333.c.jj 2022-04-21 15:48:08.310468926 +0200 +++ gcc/testsuite/gcc.dg/pr105333.c 2022-04-21 15:47:49.578729973 +0200 @@ -0,0 +1,21 @@ +/* PR rtl-optimization/105333 */ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-Og -fno-tree-coalesce-vars -fno-tree-fre" } */ + +int g; +short s; + +static inline unsigned short +bar (short a, __int128 b) +{ + b ^= (unsigned long) -a; + __builtin_strncpy ((void *) &s, (void *) &a, 1); + b *= 14; + return b; +} + +void +foo (void) +{ + g *= (__int128) bar (1, 1); +} Jakub
Re: [PATCH] rtlanal: Fix up replace_rtx [PR105333]
On Fri, 22 Apr 2022, Jakub Jelinek wrote: > Hi! > > The following testcase FAILs, because replace_rtx replaces a REG with > CONST_WIDE_INT inside of a SUBREG, which is an invalid transformation > because a SUBREG relies on SUBREG_REG having non-VOIDmode but > CONST_WIDE_INT has VOIDmode. > > replace_rtx already has code to deal with it, but it was doing > it only for CONST_INTs. The following patch does it also for > VOIDmode CONST_DOUBLE or CONST_WIDE_INT. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Richard. > 2022-04-22 Jakub Jelinek > > PR rtl-optimization/105333 > * rtlanal.cc (replace_rtx): Use simplify_subreg or > simplify_unary_operation if CONST_SCALAR_INT_P rather than just > CONST_INT_P. > > * gcc.dg/pr105333.c: New test. > > --- gcc/rtlanal.cc.jj 2022-02-08 20:08:03.912540713 +0100 > +++ gcc/rtlanal.cc2022-04-21 15:45:23.219769629 +0200 > @@ -3390,7 +3390,7 @@ replace_rtx (rtx x, rtx from, rtx to, bo > { >rtx new_rtx = replace_rtx (SUBREG_REG (x), from, to, all_regs); > > - if (CONST_INT_P (new_rtx)) > + if (CONST_SCALAR_INT_P (new_rtx)) > { > x = simplify_subreg (GET_MODE (x), new_rtx, > GET_MODE (SUBREG_REG (x)), > @@ -3406,7 +3406,7 @@ replace_rtx (rtx x, rtx from, rtx to, bo > { >rtx new_rtx = replace_rtx (XEXP (x, 0), from, to, all_regs); > > - if (CONST_INT_P (new_rtx)) > + if (CONST_SCALAR_INT_P (new_rtx)) > { > x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x), > new_rtx, GET_MODE (XEXP (x, 0))); > --- gcc/testsuite/gcc.dg/pr105333.c.jj2022-04-21 15:48:08.310468926 > +0200 > +++ gcc/testsuite/gcc.dg/pr105333.c 2022-04-21 15:47:49.578729973 +0200 > @@ -0,0 +1,21 @@ > +/* PR rtl-optimization/105333 */ > +/* { dg-do compile { target int128 } } */ > +/* { dg-options "-Og -fno-tree-coalesce-vars -fno-tree-fre" } */ > + > +int g; > +short s; > + > +static inline unsigned short > +bar (short a, __int128 b) > +{ > + b ^= (unsigned long) -a; > + __builtin_strncpy ((void *) &s, (void *) &a, 1); > + b *= 14; > + return b; > +} > + > +void > +foo (void) > +{ > + g *= (__int128) bar (1, 1); > +} > > Jakub > > -- Richard Biener SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)
Re: [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
> Please add the corresponding intrinsic test in sse-14.c Sorry for forgetting this part. Updated patch. Thanks. Hongtao Liu via Gcc-patches 于2022年4月22日周五 16:49写道: > > On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches > wrote: > > > > Hi, > > > > Add missing macro under O0 and adjust macro format for scalf > > intrinsics. > > > Please add the corresponding intrinsic test in sse-14.c. > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}. > > > > Ok for master and backport to GCC 9/10/11? > > > > gcc/ChangeLog: > > > > PR target/105339 > > * config/i386/avx512fintrin.h (_mm512_scalef_round_pd): > > Add parentheses for parameters and djust format. > > (_mm512_mask_scalef_round_pd): Ditto. > > (_mm512_maskz_scalef_round_pd): Ditto. > > (_mm512_scalef_round_ps): Ditto. > > (_mm512_mask_scalef_round_ps): Ditto. > > (_mm512_maskz_scalef_round_ps): Ditto. > > (_mm_scalef_round_sd): Use _mm_undefined_pd. > > (_mm_scalef_round_ss): Use _mm_undefined_ps. > > (_mm_mask_scalef_round_sd): New macro. > > (_mm_mask_scalef_round_ss): Ditto. > > (_mm_maskz_scalef_round_sd): Ditto. > > (_mm_maskz_scalef_round_ss): Ditto. > > --- > > gcc/config/i386/avx512fintrin.h | 76 - > > 1 file changed, 56 insertions(+), 20 deletions(-) > > > > diff --git a/gcc/config/i386/avx512fintrin.h > > b/gcc/config/i386/avx512fintrin.h > > index 29511fd2831..6dc69ff0234 100644 > > --- a/gcc/config/i386/avx512fintrin.h > > +++ b/gcc/config/i386/avx512fintrin.h > > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 > > __A, __m128 __B, const int __R) > > (__mmask8) __U, __R); > > } > > #else > > -#define _mm512_scalef_round_pd(A, B, C)\ > > -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, > > (__v8df)_mm512_undefined_pd(), -1, C) > > - > > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > > -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) > > - > > -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > > -(__m512d)__builtin_ia32_scalefpd512_mask(A, B, > > (__v8df)_mm512_setzero_pd(), U, C) > > +#define _mm512_scalef_round_pd(A, B, C) > > \ > > + ((__m512d) \ > > + __builtin_ia32_scalefpd512_mask((A), (B), \ > > + (__v8df) _mm512_undefined_pd(), \ > > + -1, (C))) > > + > > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > > + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) > > + > > +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > > + ((__m512d) \ > > + __builtin_ia32_scalefpd512_mask((A), (B), \ > > + (__v8df) _mm512_setzero_pd(),\ > > + (U), (C))) > > + > > +#define _mm512_scalef_round_ps(A, B, C) > > \ > > + ((__m512)\ > > + __builtin_ia32_scalefps512_mask((A), (B), \ > > + (__v16sf) _mm512_undefined_ps(), \ > > + -1, (C))) > > + > > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ > > + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) > > + > > +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ > > + ((__m512)\ > > + __builtin_ia32_scalefps512_mask((A), (B), \ > > + (__v16sf) _mm512_setzero_ps(), \ > > + (U), (C))) > > + > > +#define _mm_scalef_round_sd(A, B, C) \ > > + ((__m128d) \ > > + __builtin_ia32_scalefsd_mask_round ((A), (B), \ > > + (__v2df) _mm_undefined_pd (),\ > > + -1, (C))) > > > > -#define _mm512_scalef_round_ps(A, B, C)\ > > -(__m512)__builtin_ia32_scalefps512_mask(A, B, > > (__v16sf)_mm512_undefined_ps(), -1, C) > > +#define _mm_scalef_round_ss(A, B, C) \ > > + ((__m128)\ > > + __builtin_ia32_scalefss_mask_round ((A), (B), \ > > + (__v4sf) _mm_undefined_ps (),\ > > + -1, (C))) > > > > -#define _mm512_mask_
Re: *PING* [PATCH 0/4] Use pointer arithmetic for array references [PR102043]
Hi Mikael, Ping for the four patches starting at https://gcc.gnu.org/pipermail/fortran/2022-April/057759.html : https://gcc.gnu.org/pipermail/fortran/2022-April/057757.html https://gcc.gnu.org/pipermail/fortran/2022-April/057760.html https://gcc.gnu.org/pipermail/fortran/2022-April/057758.html https://gcc.gnu.org/pipermail/fortran/2022-April/057761.html Richi accepted the general direction and the middle-end interaction. I need a fortran frontend ack as well. Looks good to me. Thanks a lot for taking this on! This would have been a serious regression if released with gcc 12. Best regards Thomas
Re: [PATCH] fold, simplify-rtx: Punt on non-representable floating point constants [PR104522]
> On Apr 20, 2022, at 5:38 AM, Richard Biener > wrote: > > On Tue, Apr 19, 2022 at 11:36 PM Qing Zhao wrote: >> >> >> >>> On Apr 14, 2022, at 1:53 AM, Richard Biener >>> wrote: >>> >>> On Wed, Apr 13, 2022 at 5:22 PM Qing Zhao wrote: Hi, Richard, Thanks a lot for taking a look at this issue (and Sorry that I haven’t fixed this one yet, I was distracted by other tasks then just forgot this one….) > On Apr 13, 2022, at 3:41 AM, Richard Biener > wrote: > > On Tue, Feb 15, 2022 at 5:31 PM Qing Zhao via Gcc-patches > wrote: >> >> >> >>> On Feb 15, 2022, at 3:58 AM, Jakub Jelinek wrote: >>> >>> Hi! >>> >>> For IBM double double I've added in PR95450 and PR99648 verification >>> that >>> when we at the tree/GIMPLE or RTL level interpret target bytes as a >>> REAL_CST >>> or CONST_DOUBLE constant, we try to encode it back to target bytes and >>> verify it is the same. >>> This is because our real.c support isn't able to represent all valid >>> values >>> of IBM double double which has variable precision. >>> In PR104522, it has been noted that we have similar problem with the >>> Intel/Motorola extended XFmode formats, our internal representation >>> isn't >>> able to record pseudo denormals, pseudo infinities, pseudo NaNs and >>> unnormal >>> values. >>> So, the following patch is an attempt to extend that verification to all >>> floats. >>> Unfortunately, it wasn't that straightforward, because the >>> __builtin_clear_padding code exactly for the XFmode long doubles needs >>> to >>> discover what bits are padding and does that by interpreting memory of >>> all 1s. That is actually a valid supported value, a qNaN with negative >>> sign with all mantissa bits set, but the verification includes also the >>> padding bits (exactly what __builtin_clear_padding wants to figure out) >>> and so fails the comparison check and so we ICE. >>> The patch fixes that case by moving that verification from >>> native_interpret_real to its caller, so that clear_padding_type can >>> call native_interpret_real and avoid that extra check. >>> >>> With this, the only thing that regresses in the testsuite is >>> +FAIL: gcc.target/i386/auto-init-4.c scan-assembler-times >>> long\\t-16843010 5 >>> because it decides to use a pattern that has non-zero bits in the >>> padding >>> bits of the long double, so the simplify-rtx.cc change prevents folding >>> a SUBREG into a constant. We emit (the testcase is -O0 but we emit >>> worse >>> code at all opt levels) something like: >>> movabsq $-72340172838076674, %rax >>> movabsq $-72340172838076674, %rdx >>> movq%rax, -48(%rbp) >>> movq%rdx, -40(%rbp) >>> fldt-48(%rbp) >>> fstpt -32(%rbp) >>> instead of >>> fldt.LC2(%rip) >>> fstpt -32(%rbp) >>> ... >>> .LC2: >>> .long -16843010 >>> .long -16843010 >>> .long 65278 >>> .long 0 >>> Note, neither of those sequences actually stores the padding bits, fstpt >>> simply doesn't touch them. >>> For vars with clear_padding_real_needs_padding_p types that are >>> allocated >>> to memory at expansion time, I'd say much better would be to do the >>> stores >>> using integral modes rather than XFmode, so do that: >>> movabsq $-72340172838076674, %rax >>>movq%rax, -32(%rbp) >>>movq%rax, -24(%rbp) >>> directly. That is the only way to ensure the padding bits are >>> initialized >>> (or expand __builtin_clear_padding, but then you initialize separately >>> the >>> value bits and padding bits). >>> >>> Bootstrapped/regtested on x86_64-linux and i686-linux, though as >>> mentioned >>> above, the gcc.target/i386/auto-init-4.c case is unresolved. >> >> Thanks, I will try to fix this testing case in a later patch. > > I've looked at this FAIL now and really wonder whether "pattern init" as > implemented makes any sense for non-integral types. > We end up with > initializing a register (SSA name) with > > VIEW_CONVERT_EXPR(0xfefefefefefefefefefefefefefefefe) > > as we go building a TImode constant (we verified we have a TImode SET!) > but then > >/* Pun the LHS to make sure its type has constant size > unless it is an SSA name where that's already known. */ >if (TREE_CODE (lhs) != SSA_NAME) > lhs = build1 (VIEW_CONVERT_EXPR, itype, lhs); >else > init = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), init); > ... >expand_assignment (lhs, init, false); > > and generally registers do not have any padding. This wei
Re: [PATCH] fold, simplify-rtx: Punt on non-representable floating point constants [PR104522]
> On Apr 21, 2022, at 2:09 AM, Richard Biener > wrote: > > On Wed, Apr 20, 2022 at 6:02 PM Qing Zhao wrote: >> >> >> >>> On Apr 20, 2022, at 5:38 AM, Richard Biener >>> wrote: >>> >>> On Tue, Apr 19, 2022 at 11:36 PM Qing Zhao wrote: > On Apr 14, 2022, at 1:53 AM, Richard Biener > wrote: > > On Wed, Apr 13, 2022 at 5:22 PM Qing Zhao wrote: >> >> Hi, Richard, >> >> Thanks a lot for taking a look at this issue (and Sorry that I haven’t >> fixed this one yet, I was distracted by other tasks then just forgot >> this one….) >> >>> On Apr 13, 2022, at 3:41 AM, Richard Biener >>> wrote: >>> >>> On Tue, Feb 15, 2022 at 5:31 PM Qing Zhao via Gcc-patches >>> wrote: > On Feb 15, 2022, at 3:58 AM, Jakub Jelinek wrote: > > Hi! > > For IBM double double I've added in PR95450 and PR99648 verification > that > when we at the tree/GIMPLE or RTL level interpret target bytes as a > REAL_CST > or CONST_DOUBLE constant, we try to encode it back to target bytes and > verify it is the same. > This is because our real.c support isn't able to represent all valid > values > of IBM double double which has variable precision. > In PR104522, it has been noted that we have similar problem with the > Intel/Motorola extended XFmode formats, our internal representation > isn't > able to record pseudo denormals, pseudo infinities, pseudo NaNs and > unnormal > values. > So, the following patch is an attempt to extend that verification to > all > floats. > Unfortunately, it wasn't that straightforward, because the > __builtin_clear_padding code exactly for the XFmode long doubles > needs to > discover what bits are padding and does that by interpreting memory of > all 1s. That is actually a valid supported value, a qNaN with > negative > sign with all mantissa bits set, but the verification includes also > the > padding bits (exactly what __builtin_clear_padding wants to figure > out) > and so fails the comparison check and so we ICE. > The patch fixes that case by moving that verification from > native_interpret_real to its caller, so that clear_padding_type can > call native_interpret_real and avoid that extra check. > > With this, the only thing that regresses in the testsuite is > +FAIL: gcc.target/i386/auto-init-4.c scan-assembler-times > long\\t-16843010 5 > because it decides to use a pattern that has non-zero bits in the > padding > bits of the long double, so the simplify-rtx.cc change prevents > folding > a SUBREG into a constant. We emit (the testcase is -O0 but we emit > worse > code at all opt levels) something like: >movabsq $-72340172838076674, %rax >movabsq $-72340172838076674, %rdx >movq%rax, -48(%rbp) >movq%rdx, -40(%rbp) >fldt-48(%rbp) >fstpt -32(%rbp) > instead of >fldt.LC2(%rip) >fstpt -32(%rbp) > ... > .LC2: >.long -16843010 >.long -16843010 >.long 65278 >.long 0 > Note, neither of those sequences actually stores the padding bits, > fstpt > simply doesn't touch them. > For vars with clear_padding_real_needs_padding_p types that are > allocated > to memory at expansion time, I'd say much better would be to do the > stores > using integral modes rather than XFmode, so do that: >movabsq $-72340172838076674, %rax > movq%rax, -32(%rbp) > movq%rax, -24(%rbp) > directly. That is the only way to ensure the padding bits are > initialized > (or expand __builtin_clear_padding, but then you initialize > separately the > value bits and padding bits). > > Bootstrapped/regtested on x86_64-linux and i686-linux, though as > mentioned > above, the gcc.target/i386/auto-init-4.c case is unresolved. Thanks, I will try to fix this testing case in a later patch. >>> >>> I've looked at this FAIL now and really wonder whether "pattern init" as >>> implemented makes any sense for non-integral types. >>> We end up with >>> initializing a register (SSA name) with >>> >>> VIEW_CONVERT_EXPR(0xfefefefefefefefefefefefefefefefe) >>> >>> as we go building a TImode constant (we verified we have a TImode SET!) >>> but then >>> >>> /* Pun the LHS to make sure its type has constant siz
Re: [PATCH][v4] rtl-optimization/105231 - distribute_notes and REG_EH_REGION
Hi! On Fri, Apr 22, 2022 at 10:53:30AM +0200, Richard Biener wrote: > I was still unhappy with the previous patch and indeed, re-thinking > all the special casing I put in there I found a hole with respect > to externally throwing stmts which I totally forgot about and which > might end up in must-not-throw regions after the previous patch. > > Fortunately all of the complicated situations only arise with > non-call exceptions and when there's a REG_EH_REGION note to > distribute. So first of all the new patch makes that explicit > and does not affect the not non-call EH path (apart from the > new assert in distribute_notes). It also does not affect the > non-call EH path when there is no REG_EH_REGION on any of the > insns. > > I resisted trying to be clever with lp_nr == 0 or INT_MIN > (the nothrow notes that we could in theory just drop), but I > put in an extra check in case we have a REG_EH_REGION note > on an insn that cannot ever throw (and drop those at > distribute_notes time). > > The patch preserves the main part of the very original patch, > that we only ever place the REG_EH_REGION on i3. The > split precondition should make sure that i2 never throws > (but no assert since the trigger happy may_trap_p might > be confused by some "optimization" done on the split part later). > The hunk at the start of try_combine makes sure that we only > have a single REG_EH_REGION note to distribute which ends up > on i3 which accumulates all possibly throwing side-effects > thanks to the split precondition. Right, that is foolproof :-) > + bool nce_any_eh_region_note = false; Name this "has_non_call_exception" please? > + /* With non-call exceptions we can end up trying to combine multiple > + stmts with possible EH side effects. Make sure we can combine > + that to a single stmt which means there must be at most one insn > + in the combination with an EH side effect. */ s/stmt/insn/g Or, what else does "statement" mean here? > + if (cfun->can_throw_non_call_exceptions) > +{ > + if (find_reg_note (i3, REG_EH_REGION, NULL_RTX) > + || find_reg_note (i2, REG_EH_REGION, NULL_RTX) > + || (i1 && find_reg_note (i1, REG_EH_REGION, NULL_RTX)) > + || (i0 && find_reg_note (i0, REG_EH_REGION, NULL_RTX))) > + { > + nce_any_eh_region_note = true; > + if (insn_could_throw_p (i3) > + + insn_could_throw_p (i2) > + + (i1 ? insn_could_throw_p (i1) : 0) > + + (i0 ? insn_could_throw_p (i0) : 0) > 1) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, "Can't combine multiple insns with EH " > + "side-effects\n"); > + undo_all (); > + return 0; > + } > + } > +} Nice :-) > + /* We should not split a possibly trapping part when we > + care about non-call EH and have REG_EH_REGION notes > + to distribute. */ > + && (!cfun->can_throw_non_call_exceptions > + || !nce_any_eh_region_note > + || !may_trap_p (*split))) I'm still not a fan of manual De Morgan, it is too easy to make a mistake in it, or in reading it. It is better as && (!(cfun->can_throw_non_call_exceptions && nce_any_eh_region_note && may_trap_p (*split imo. But, this kind of thing is not new of course, and combine is one of the worst "complicated expressions are great!" offenders around ;-) > + { > + /* This handling needs to be kept in sync with the > +prerequisite checking in try_combine. */ > + int lp_nr = INTVAL (XEXP (note, 0)); Please spell out "landing pad", in the comment if not in the code. > + /* A REG_EH_REGION note transfering control can only ever come > +from i3. */ > + if (lp_nr > 0) > + gcc_assert (from_insn == i3); > + /* We are making sure there is a single effective REG_EH_REGION > +note and it's valid to put it on i3. */ > + if (!insn_could_throw_p (from_insn)) > + /* Deal with stray notes on insns that can never throw. */ > + ; "Throw away stray notes" etc.? > +/* { dg-do compile } */ > +/* { dg-require-effective-target int32plus } */ > +/* { dg-require-effective-target dfp } */ > +/* { dg-additional-options "-fsanitize-coverage=trace-pc > -fnon-call-exceptions --param=max-cse-insns=1 -frounding-math" } */ > +/* { dg-additional-options "-mstack-arg-probe" { target x86_64-*-* i?86-*-* > } } */ > + > +void baz (int *); > +void bar (double, double, _Decimal64); > + > +void > +foo (void) > +{ > + int s __attribute__((cleanup (baz))); > + bar (0xfffe, 0xebf3fff2fbebaf7f, 0xff); > +} Why the int32plus? It needs 64-bit integers, and the size of "int" does not matter at all afaics? Maybe you want lp64? Okay for trunk with the naming and comment stuff looked at. Thank you! Segher
Re: [PATCH] rtlanal: Fix up replace_rtx [PR105333]
> 2022-04-22 Jakub Jelinek > > PR rtl-optimization/105333 > * rtlanal.cc (replace_rtx): Use simplify_subreg or > simplify_unary_operation if CONST_SCALAR_INT_P rather than just > CONST_INT_P. > > * gcc.dg/pr105333.c: New test. OK, thanks. -- Eric Botcazou
[COMMITTED] docs: Correct "This functions" to "These functions"
2022-04-22 Paul A. Clarke gcc * doc/extend.texi: Correct "This" to "These". --- Committed as trivial/obvious. gcc/doc/extend.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e10b10bc1f14..931e5ae3769f 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -13525,7 +13525,7 @@ exceptions handling functions @code{fegetround}, @code{feclearexcept} and @code{feraiseexcept}. They may not be available for all targets, and because they need close interaction with libc internal values, they may not be available for all target libcs, but in all cases they will gracefully fallback to libc -calls. This built-in functions appear both with and without the +calls. These built-in functions appear both with and without the @code{__builtin_} prefix. @deftypefn {Built-in Function} void *__builtin_alloca (size_t size) -- 2.27.0
[PATCH] c++: partial ordering with dependent NTTP type [PR105289]
Here ever since r11-6483-ge2e2f3f2c9400f we're rejecting and crashing (respectively) on two testcases that we used to accept in C++17 mode. Both testcases declare partial specializations for which the primary template contains an NTTP with dependent type, but the correctness of these partial specializations is unclear according to PR86193. This patch restores the previous C++17 behavior for such partial specializations by restricting the r11-6483 change to just ordinary deduction as opposed to deduction for sake of partial ordering. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk/11? PR c++/105289 PR c++/86193 gcc/cp/ChangeLog: * pt.cc (unify) : Restrict the r11-6483 change to just ordinary deduction for function templates. When substituting into the NTTP type the second time, use the original type not the substituted type. Remove now unnecessary level check. gcc/testsuite/ChangeLog: * g++.dg/template/partial5.C: Revert r11-6483 change. * g++.dg/template/partial-specialization11.C: New test. * g++.dg/template/partial-specialization12.C: New test. --- gcc/cp/pt.cc | 25 --- .../template/partial-specialization11.C | 10 .../template/partial-specialization12.C | 12 + gcc/testsuite/g++.dg/template/partial5.C | 2 +- 4 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization11.C create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization12.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index dde62ee052d..52bd130b7e7 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -24287,8 +24287,7 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, /* We haven't deduced the type of this parameter yet. */ if (cxx_dialect >= cxx17 /* We deduce from array bounds in try_array_deduction. */ - && !(strict & UNIFY_ALLOW_INTEGER) - && TEMPLATE_PARM_LEVEL (parm) <= TMPL_ARGS_DEPTH (targs)) + && !(strict & UNIFY_ALLOW_INTEGER)) { /* Deduce it from the non-type argument. As above, ignore top-level quals here too. */ @@ -24296,13 +24295,21 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, RECUR_AND_CHECK_FAILURE (tparms, targs, tparm, atype, UNIFY_ALLOW_NONE, explain_p); - /* Now check whether the type of this parameter is still -dependent, and give up if so. */ - ++processing_template_decl; - tparm = tsubst (tparm, targs, tf_none, NULL_TREE); - --processing_template_decl; - if (uses_template_parms (tparm)) - return unify_success (explain_p); + if (!processing_template_decl + && TPARMS_PRIMARY_TEMPLATE (tparms) + && DECL_FUNCTION_TEMPLATE_P (TPARMS_PRIMARY_TEMPLATE + (tparms))) + { + /* If the NTTP's type uses still-undeduced template +parameters, then don't unify it now. This gives +type_unification_real a chance to retry deduction +with default template arguments substituted in. */ + ++processing_template_decl; + tparm = tsubst (TREE_TYPE (parm), targs, tf_none, NULL_TREE); + --processing_template_decl; + if (uses_template_parms (tparm)) + return unify_success (explain_p); + } } else /* Try again later. */ diff --git a/gcc/testsuite/g++.dg/template/partial-specialization11.C b/gcc/testsuite/g++.dg/template/partial-specialization11.C new file mode 100644 index 000..20da407d422 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/partial-specialization11.C @@ -0,0 +1,10 @@ +// PR c++/105289 + +template struct value_type; + +template::type V> +struct push_front_vlist; + +template +struct push_front_vlist { }; +// { dg-error "not more specialized" "PR86193" { target c++14_down } .-1 } diff --git a/gcc/testsuite/g++.dg/template/partial-specialization12.C b/gcc/testsuite/g++.dg/template/partial-specialization12.C new file mode 100644 index 000..d70f7592790 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/partial-specialization12.C @@ -0,0 +1,12 @@ +// PR c++/105289 +// { dg-do compile { target c++11 } } + +template +struct value_type; + +template ::type Element> +struct push_front_vlist; + +template class XList, class T, T Arg, T... Vs> +struct push_front_vlist, Arg> { }; +// { dg-error "not more specialized" "PR86193" { target c++14_down } .-1 } diff --git a/gcc/testsuite/g++.dg/template/partial5.C b/gcc/testsuite
PING [PATCH] Fix 'modff' reference in extend.texi
On Mon, Apr 11, 2022 at 11:23:48AM -0500, Paul A. Clarke via Gcc-patches wrote: > In commit a2a919aa501e3 (2003), built-ins for modf and modff were added. > In extend.texi, section "Other Builtins", "modf" was added to the paragraph > "There are also built-in versions of the ISO C99 functions [...]" and > "modf" was also added to the paragraph "The ISO C90 functions [...]". > "modff" was not added to either paragraph. > > Based on the context clues about where "modfl" and other similar function > pairs like "powf/powl" appear, I believe the reference to "modf" in the > first paragraph (C99) should instead be "modff". > > 2022-04-11 Paul A. Clarke > > gcc > * doc/extend.texi (Other Builtins): Correct reference to 'modff'. > --- > gcc/doc/extend.texi | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index e10b10bc1f14..05c99f4284a6 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -13460,7 +13460,7 @@ There are also built-in versions of the ISO C99 > functions > @code{expl}, @code{fabsf}, @code{fabsl}, @code{floorf}, @code{floorl}, > @code{fmodf}, @code{fmodl}, @code{frexpf}, @code{frexpl}, @code{ldexpf}, > @code{ldexpl}, @code{log10f}, @code{log10l}, @code{logf}, @code{logl}, > -@code{modfl}, @code{modf}, @code{powf}, @code{powl}, @code{sinf}, > +@code{modfl}, @code{modff}, @code{powf}, @code{powl}, @code{sinf}, > @code{sinhf}, @code{sinhl}, @code{sinl}, @code{sqrtf}, @code{sqrtl}, > @code{tanf}, @code{tanhf}, @code{tanhl} and @code{tanl} > that are recognized in any mode since ISO C90 reserves these names for > --
[PATCH] c++: partial ordering with dependent NTTP type [PR105289]
Here ever since r11-6483-ge2e2f3f2c9400f we're rejecting and crashing (respectively) on two testcases that we used to accept in C++17 mode. Both testcases declare partial specializations for which the primary template contains an NTTP with dependent type, but the correctness of these partial specializations is unclear according to PR86193. This patch restores the previous C++17 behavior for such partial specializations by restricting the r11-6483 change to just ordinary deduction as opposed to deduction for sake of partial ordering. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk/11? PR c++/105289 PR c++/86193 gcc/cp/ChangeLog: * pt.cc (unify) : Restrict the r11-6483 change to just ordinary deduction for function templates. When substituting into the NTTP type the second time, use the original type not the substituted type. Remove now unnecessary level check. gcc/testsuite/ChangeLog: * g++.dg/template/partial5.C: Revert r11-6483 change. * g++.dg/template/partial-specialization11.C: New test. * g++.dg/template/partial-specialization12.C: New test. --- gcc/cp/pt.cc | 25 --- .../template/partial-specialization11.C | 10 .../template/partial-specialization12.C | 12 + gcc/testsuite/g++.dg/template/partial5.C | 2 +- 4 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization11.C create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization12.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index dde62ee052d..52bd130b7e7 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -24287,8 +24287,7 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, /* We haven't deduced the type of this parameter yet. */ if (cxx_dialect >= cxx17 /* We deduce from array bounds in try_array_deduction. */ - && !(strict & UNIFY_ALLOW_INTEGER) - && TEMPLATE_PARM_LEVEL (parm) <= TMPL_ARGS_DEPTH (targs)) + && !(strict & UNIFY_ALLOW_INTEGER)) { /* Deduce it from the non-type argument. As above, ignore top-level quals here too. */ @@ -24296,13 +24295,21 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, RECUR_AND_CHECK_FAILURE (tparms, targs, tparm, atype, UNIFY_ALLOW_NONE, explain_p); - /* Now check whether the type of this parameter is still -dependent, and give up if so. */ - ++processing_template_decl; - tparm = tsubst (tparm, targs, tf_none, NULL_TREE); - --processing_template_decl; - if (uses_template_parms (tparm)) - return unify_success (explain_p); + if (!processing_template_decl + && TPARMS_PRIMARY_TEMPLATE (tparms) + && DECL_FUNCTION_TEMPLATE_P (TPARMS_PRIMARY_TEMPLATE + (tparms))) + { + /* If the NTTP's type uses still-undeduced template +parameters, then don't unify it now. This gives +type_unification_real a chance to retry deduction +with default template arguments substituted in. */ + ++processing_template_decl; + tparm = tsubst (TREE_TYPE (parm), targs, tf_none, NULL_TREE); + --processing_template_decl; + if (uses_template_parms (tparm)) + return unify_success (explain_p); + } } else /* Try again later. */ diff --git a/gcc/testsuite/g++.dg/template/partial-specialization11.C b/gcc/testsuite/g++.dg/template/partial-specialization11.C new file mode 100644 index 000..20da407d422 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/partial-specialization11.C @@ -0,0 +1,10 @@ +// PR c++/105289 + +template struct value_type; + +template::type V> +struct push_front_vlist; + +template +struct push_front_vlist { }; +// { dg-error "not more specialized" "PR86193" { target c++14_down } .-1 } diff --git a/gcc/testsuite/g++.dg/template/partial-specialization12.C b/gcc/testsuite/g++.dg/template/partial-specialization12.C new file mode 100644 index 000..d70f7592790 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/partial-specialization12.C @@ -0,0 +1,12 @@ +// PR c++/105289 +// { dg-do compile { target c++11 } } + +template +struct value_type; + +template ::type Element> +struct push_front_vlist; + +template class XList, class T, T Arg, T... Vs> +struct push_front_vlist, Arg> { }; +// { dg-error "not more specialized" "PR86193" { target c++14_down } .-1 } diff --git a/gcc/testsuite/g++.dg/template/partial5.C b/gcc/testsuite
[PATCH] c++: crash with requires-expr and -Wsequence-point [PR105304]
Here we're crashing from verify_sequence_points for this requires-expr condition because it contains a templated CAST_EXPR with empty operand, and verify_tree doesn't ignore this empty operand only because the manual tail recursion that it perform for unary expression trees skips the NULL test. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for 10/11/trunk? PR c++/105304 gcc/c-family/ChangeLog: * c-common.cc (verify_tree) [restart]: Move up to before the NULL test. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/concepts-requires30.C: New test. --- gcc/c-family/c-common.cc | 2 +- gcc/testsuite/g++.dg/cpp2a/concepts-requires30.C | 10 ++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-requires30.C diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc index 70f55f3a346..bb0544eeaea 100644 --- a/gcc/c-family/c-common.cc +++ b/gcc/c-family/c-common.cc @@ -2009,12 +2009,12 @@ verify_tree (tree x, struct tlist **pbefore_sp, struct tlist **pno_sp, enum tree_code code; enum tree_code_class cl; + restart: /* X may be NULL if it is the operand of an empty statement expression ({ }). */ if (x == NULL) return; - restart: code = TREE_CODE (x); cl = TREE_CODE_CLASS (code); diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-requires30.C b/gcc/testsuite/g++.dg/cpp2a/concepts-requires30.C new file mode 100644 index 000..f500af3f616 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-requires30.C @@ -0,0 +1,10 @@ +// PR c++/105304 +// { dg-do compile { target c++20 } } +// { dg-additional-options "-Wall -Wsequence-point" } + +struct A { }; + +int main() { + if (requires { A(); }) +; +} -- 2.36.0.rc2.10.g1ac7422e39
Re: [PATCH] c++: partial ordering with dependent NTTP type [PR105289]
Whoops, this patch is identical to https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593502.html that I sent about an hour ago, sorry for the noise. On Fri, Apr 22, 2022 at 2:33 PM Patrick Palka wrote: > > Here ever since r11-6483-ge2e2f3f2c9400f we're rejecting and crashing > (respectively) on two testcases that we used to accept in C++17 mode. > Both testcases declare partial specializations for which the primary > template contains an NTTP with dependent type, but the correctness of > these partial specializations is unclear according to PR86193. > > This patch restores the previous C++17 behavior for such partial > specializations by restricting the r11-6483 change to just ordinary > deduction as opposed to deduction for sake of partial ordering. > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for > trunk/11? > > PR c++/105289 > PR c++/86193 > > gcc/cp/ChangeLog: > > * pt.cc (unify) : Restrict the > r11-6483 change to just ordinary deduction for function > templates. When substituting into the NTTP type the second > time, use the original type not the substituted type. Remove > now unnecessary level check. > > gcc/testsuite/ChangeLog: > > * g++.dg/template/partial5.C: Revert r11-6483 change. > * g++.dg/template/partial-specialization11.C: New test. > * g++.dg/template/partial-specialization12.C: New test. > --- > gcc/cp/pt.cc | 25 --- > .../template/partial-specialization11.C | 10 > .../template/partial-specialization12.C | 12 + > gcc/testsuite/g++.dg/template/partial5.C | 2 +- > 4 files changed, 39 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization11.C > create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization12.C > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc > index dde62ee052d..52bd130b7e7 100644 > --- a/gcc/cp/pt.cc > +++ b/gcc/cp/pt.cc > @@ -24287,8 +24287,7 @@ unify (tree tparms, tree targs, tree parm, tree arg, > int strict, > /* We haven't deduced the type of this parameter yet. */ > if (cxx_dialect >= cxx17 > /* We deduce from array bounds in try_array_deduction. */ > - && !(strict & UNIFY_ALLOW_INTEGER) > - && TEMPLATE_PARM_LEVEL (parm) <= TMPL_ARGS_DEPTH (targs)) > + && !(strict & UNIFY_ALLOW_INTEGER)) > { > /* Deduce it from the non-type argument. As above, ignore > top-level quals here too. */ > @@ -24296,13 +24295,21 @@ unify (tree tparms, tree targs, tree parm, tree > arg, int strict, > RECUR_AND_CHECK_FAILURE (tparms, targs, >tparm, atype, >UNIFY_ALLOW_NONE, explain_p); > - /* Now check whether the type of this parameter is still > -dependent, and give up if so. */ > - ++processing_template_decl; > - tparm = tsubst (tparm, targs, tf_none, NULL_TREE); > - --processing_template_decl; > - if (uses_template_parms (tparm)) > - return unify_success (explain_p); > + if (!processing_template_decl > + && TPARMS_PRIMARY_TEMPLATE (tparms) > + && DECL_FUNCTION_TEMPLATE_P (TPARMS_PRIMARY_TEMPLATE > + (tparms))) > + { > + /* If the NTTP's type uses still-undeduced template > +parameters, then don't unify it now. This gives > +type_unification_real a chance to retry deduction > +with default template arguments substituted in. */ > + ++processing_template_decl; > + tparm = tsubst (TREE_TYPE (parm), targs, tf_none, > NULL_TREE); > + --processing_template_decl; > + if (uses_template_parms (tparm)) > + return unify_success (explain_p); > + } > } > else > /* Try again later. */ > diff --git a/gcc/testsuite/g++.dg/template/partial-specialization11.C > b/gcc/testsuite/g++.dg/template/partial-specialization11.C > new file mode 100644 > index 000..20da407d422 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/template/partial-specialization11.C > @@ -0,0 +1,10 @@ > +// PR c++/105289 > + > +template struct value_type; > + > +template::type V> > +struct push_front_vlist; > + > +template > +struct push_front_vlist { }; > +// { dg-error "not more specialized" "PR86193" { target c++14_down } .-1 } > diff --git a/gcc/testsuite/g++.dg/template/partial-specialization12.C > b/gcc/testsuite/g++.dg/template/partial-specialization12.C > new file mode 100644 > index 000..d70f7592790 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/template/part
Re: [PATCH] c++: partial ordering with dependent NTTP type [PR105289]
On Fri, 22 Apr 2022, Patrick Palka wrote: > Here ever since r11-6483-ge2e2f3f2c9400f we're rejecting and crashing > (respectively) on two testcases that we used to accept in C++17 mode. > Both testcases declare partial specializations for which the primary > template contains an NTTP with dependent type, but the correctness of > these partial specializations is unclear according to PR86193. > > This patch restores the previous C++17 behavior for such partial > specializations by restricting the r11-6483 change to just ordinary > deduction as opposed to deduction for sake of partial ordering. Note that if we're okay with rejecting such partial specializations even in C++17 mode (and thus deeming PR105289 to be ICE-on-invalid instead of ICE-on-valid), then the fix for the reported ICE is just: diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index dde62ee052d..6d65f6ad3cf 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -24299,7 +24299,7 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, /* Now check whether the type of this parameter is still dependent, and give up if so. */ ++processing_template_decl; - tparm = tsubst (tparm, targs, tf_none, NULL_TREE); + tparm = tsubst (TREE_TYPE (parm), targs, tf_none, NULL_TREE); --processing_template_decl; if (uses_template_parms (tparm)) return unify_success (explain_p); i.e. we need to substitute into the original NTTP type, not into the already substituted NTTP type. > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for > trunk/11? > > PR c++/105289 > PR c++/86193 > > gcc/cp/ChangeLog: > > * pt.cc (unify) : Restrict the > r11-6483 change to just ordinary deduction for function > templates. When substituting into the NTTP type the second > time, use the original type not the substituted type. Remove > now unnecessary level check. > > gcc/testsuite/ChangeLog: > > * g++.dg/template/partial5.C: Revert r11-6483 change. > * g++.dg/template/partial-specialization11.C: New test. > * g++.dg/template/partial-specialization12.C: New test. > --- > gcc/cp/pt.cc | 25 --- > .../template/partial-specialization11.C | 10 > .../template/partial-specialization12.C | 12 + > gcc/testsuite/g++.dg/template/partial5.C | 2 +- > 4 files changed, 39 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization11.C > create mode 100644 gcc/testsuite/g++.dg/template/partial-specialization12.C > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc > index dde62ee052d..52bd130b7e7 100644 > --- a/gcc/cp/pt.cc > +++ b/gcc/cp/pt.cc > @@ -24287,8 +24287,7 @@ unify (tree tparms, tree targs, tree parm, tree arg, > int strict, > /* We haven't deduced the type of this parameter yet. */ > if (cxx_dialect >= cxx17 > /* We deduce from array bounds in try_array_deduction. */ > - && !(strict & UNIFY_ALLOW_INTEGER) > - && TEMPLATE_PARM_LEVEL (parm) <= TMPL_ARGS_DEPTH (targs)) > + && !(strict & UNIFY_ALLOW_INTEGER)) > { > /* Deduce it from the non-type argument. As above, ignore >top-level quals here too. */ > @@ -24296,13 +24295,21 @@ unify (tree tparms, tree targs, tree parm, tree > arg, int strict, > RECUR_AND_CHECK_FAILURE (tparms, targs, > tparm, atype, > UNIFY_ALLOW_NONE, explain_p); > - /* Now check whether the type of this parameter is still > - dependent, and give up if so. */ > - ++processing_template_decl; > - tparm = tsubst (tparm, targs, tf_none, NULL_TREE); > - --processing_template_decl; > - if (uses_template_parms (tparm)) > - return unify_success (explain_p); > + if (!processing_template_decl > + && TPARMS_PRIMARY_TEMPLATE (tparms) > + && DECL_FUNCTION_TEMPLATE_P (TPARMS_PRIMARY_TEMPLATE > +(tparms))) > + { > + /* If the NTTP's type uses still-undeduced template > + parameters, then don't unify it now. This gives > + type_unification_real a chance to retry deduction > + with default template arguments substituted in. */ > + ++processing_template_decl; > + tparm = tsubst (TREE_TYPE (parm), targs, tf_none, NULL_TREE); > + --processing_template_decl; > + if (uses_template_parms (tparm)) > + return unify_success (explain_p); > + } > } > else > /* Try again later. */ > diff --git a/gcc/testsuite/g++.dg/template/partial-specialization1
[PATCH] rs6000: Fix pack for soft-float (PR105334)
For PR103623 I fixed unpack, but pack is broken as well, as reported in PR105334. Fixing that is a bit more code, but it is pretty simple code nonetheless. Committing to trunk. Segher 2022-04-22 Segher Boessenkool PR target/105334 * config/rs6000/rs6000.md (pack for FMOVE128): New expander. (pack for FMOVE128): Rename and split the insn_and_split to... (pack_hard for FMOVE128): ... this... (pack_soft for FMOVE128): ... and this. --- gcc/config/rs6000/rs6000.md | 45 +++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index a39b95f7dffa..64049a6e521c 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -14602,13 +14602,26 @@ (define_insn_and_split "unpack_nodm" } [(set_attr "type" "fp,fpstore,store")]) -(define_insn_and_split "pack" +(define_expand "pack" + [(use (match_operand:FMOVE128 0 "register_operand")) + (use (match_operand: 1 "register_operand")) + (use (match_operand: 2 "register_operand"))] + "FLOAT128_2REG_P (mode)" +{ + if (TARGET_HARD_FLOAT) +emit_insn (gen_pack_hard (operands[0], operands[1], operands[2])); + else +emit_insn (gen_pack_soft (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn_and_split "pack_hard" [(set (match_operand:FMOVE128 0 "register_operand" "=&d") (unspec:FMOVE128 [(match_operand: 1 "register_operand" "d") (match_operand: 2 "register_operand" "d")] UNSPEC_PACK_128BIT))] - "FLOAT128_2REG_P (mode)" + "FLOAT128_2REG_P (mode) && TARGET_HARD_FLOAT" "#" "&& reload_completed" [(set (match_dup 3) (match_dup 1)) @@ -14626,6 +14639,34 @@ (define_insn_and_split "pack" [(set_attr "type" "fp") (set_attr "length" "8")]) +(define_insn_and_split "pack_soft" + [(set (match_operand:FMOVE128 0 "register_operand" "=&r") + (unspec:FMOVE128 +[(match_operand: 1 "register_operand" "r") + (match_operand: 2 "register_operand" "r")] +UNSPEC_PACK_128BIT))] + "FLOAT128_2REG_P (mode) && TARGET_SOFT_FLOAT" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (match_dup 2))] +{ + unsigned dest_hi = REGNO (operands[0]); + unsigned dest_lo = dest_hi + (TARGET_POWERPC64 ? 1 : 2); + + gcc_assert (!IN_RANGE (REGNO (operands[1]), dest_hi, dest_lo)); + gcc_assert (!IN_RANGE (REGNO (operands[2]), dest_hi, dest_lo)); + + operands[3] = gen_rtx_REG (mode, dest_hi); + operands[4] = gen_rtx_REG (mode, dest_lo); +} + [(set_attr "type" "integer") + (set (attr "length") + (if_then_else +(match_test "TARGET_POWERPC64") +(const_string "8") +(const_string "16")))]) + (define_insn "unpack" [(set (match_operand:DI 0 "register_operand" "=wa,wa") (unspec:DI [(match_operand:FMOVE128_VSX 1 "register_operand" "0,wa") -- 1.8.3.1
Re: [PATCH] libstdc++: Make atomic notify_one and notify_all non-const
Committed to trunk, backported to releases/gcc-11. On Fri, Feb 11, 2022 at 12:22 PM Jonathan Wakely wrote: > On Fri, 11 Feb 2022 at 17:40, Thomas Rodgers via Libstdc++ > wrote: > > > > > > PR102994 "atomics: std::atomic::wait is not marked const" raises the > > issue that the current libstdc++ implementation marks the notify members > > const, the implementation strategy used by libstdc++, as well as libc++ > > and the Microsoft STL, do not require the atomic to be mutable (it is > hard > > to conceive of a desirable implementation approach that would require > it). > > The original paper proposing the wait/notify functionality for atomics > > (p1185) also had these members marked const for the first three > revisions, > > but that was changed without explanation in r3 and subsequent revisions > of > > the paper. > > > > After raising the issue to the authors of p1185 and the author of the > > libc++ implementation, the consensus seems to be "meh, it's harmless" so > > there seems little appetite for an LWG issue to revisit the subject. > > > > This patch changes the libstdc++ implementation to be in agreement with > > the standard by removing const from those notify_one/notify_all members. > > > > libstdc++-v3/ChangeLog: > > Might as well add a "PR libstdc++/102994" here to the bug gets updated > automatically. > > OK for trunk with that change. > > > * include/bits/atomic_base.h (atomic_flag::notify_one, > > notify_all): Remove const qualification. > > (__atomic_base::notify_one, notify_all): Likewise. > > * include/std/atomic (atomic::notify_one, notify_all): > > Likewise. > > (atomic::notify_one, notify_all): Likewise. > > (atomic::notify_one, notify_all): Likewise. > > (atomic_notify_one, atomic_notify_all): Likewise. > > * testsuite/29_atomics/atomic/wait_notify/102994.cc: Adjust test > > to account for change in notify_one/notify_all signature. > > > > Tested x86_64-pc-linux-gnu. > >
[PATCH] c++: __builtin_shufflevector with value-dep expr [PR105353]
Here we issue an error from c_build_shufflevector while parsing a template because it got a TEMPLATE_PARM_INDEX, but this function expects INTEGER_CSTs (except the first two arguments). It checks if any of the arguments are type-dependent, if so, we leave the processing for later, but it should also check value-dependency for the 3rd+ arguments, so as to avoid the problem above. This is not a regression -- __builtin_shufflevector was introduced in GCC 12, but it looks safe enough. Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? PR c++/105353 gcc/cp/ChangeLog: * typeck.cc (build_x_shufflevector): Use instantiation_dependent_expression_p except for the first two arguments. gcc/testsuite/ChangeLog: * g++.dg/ext/builtin-shufflevector-3.C: New test. --- gcc/cp/typeck.cc | 4 +++- .../g++.dg/ext/builtin-shufflevector-3.C | 23 +++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 26a7cb4b50d..0da6f2485d0 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -6315,7 +6315,9 @@ build_x_shufflevector (location_t loc, vec *args, if (processing_template_decl) { for (unsigned i = 0; i < args->length (); ++i) - if (type_dependent_expression_p ((*args)[i])) + if (i <= 1 + ? type_dependent_expression_p ((*args)[i]) + : instantiation_dependent_expression_p ((*args)[i])) { tree exp = build_min_nt_call_vec (NULL, args); CALL_EXPR_IFN (exp) = IFN_SHUFFLEVECTOR; diff --git a/gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C b/gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C new file mode 100644 index 000..0f3cbbee563 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C @@ -0,0 +1,23 @@ +// PR c++/105353 +// { dg-do compile { target c++17 } } +// { dg-additional-options "-Wno-psabi" } + +typedef unsigned char Simd128U8VectT __attribute__((__vector_size__(16))); + +template +static inline Simd128U8VectT ShufFunc(Simd128U8VectT vect) noexcept { +if constexpr(unsigned(ShuffleIndex) >= 16) +return Simd128U8VectT { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +else if constexpr(ShuffleIndex == 0) +return vect; +else +return __builtin_shufflevector(vect, vect, ShuffleIndex, ShuffleIndex + 1, +ShuffleIndex + 2, ShuffleIndex + 3, ShuffleIndex + 4, ShuffleIndex + 5, +ShuffleIndex + 6, ShuffleIndex + 7, ShuffleIndex + 8, ShuffleIndex + 9, +ShuffleIndex + 10, ShuffleIndex + 11, ShuffleIndex + 12, ShuffleIndex + 13, +ShuffleIndex + 14, ShuffleIndex + 15); +} + +auto func1(Simd128U8VectT vect) noexcept { +return ShufFunc<5>(vect); +} base-commit: 7c21556daf385fe9ece37319f574776dd7d8ab1c -- 2.35.1
Re: [PATCH] c++: __builtin_shufflevector with value-dep expr [PR105353]
> Am 23.04.2022 um 01:58 schrieb Marek Polacek via Gcc-patches > : > > Here we issue an error from c_build_shufflevector while parsing a template > because it got a TEMPLATE_PARM_INDEX, but this function expects INTEGER_CSTs > (except the first two arguments). It checks if any of the arguments are > type-dependent, if so, we leave the processing for later, but it should > also check value-dependency for the 3rd+ arguments, so as to avoid the > problem above. > > This is not a regression -- __builtin_shufflevector was introduced in > GCC 12, but it looks safe enough. > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? Fine with me. Richard >PR c++/105353 > > gcc/cp/ChangeLog: > >* typeck.cc (build_x_shufflevector): Use >instantiation_dependent_expression_p except for the first two >arguments. > > gcc/testsuite/ChangeLog: > >* g++.dg/ext/builtin-shufflevector-3.C: New test. > --- > gcc/cp/typeck.cc | 4 +++- > .../g++.dg/ext/builtin-shufflevector-3.C | 23 +++ > 2 files changed, 26 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C > > diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc > index 26a7cb4b50d..0da6f2485d0 100644 > --- a/gcc/cp/typeck.cc > +++ b/gcc/cp/typeck.cc > @@ -6315,7 +6315,9 @@ build_x_shufflevector (location_t loc, vec > *args, > if (processing_template_decl) > { > for (unsigned i = 0; i < args->length (); ++i) > -if (type_dependent_expression_p ((*args)[i])) > +if (i <= 1 > +? type_dependent_expression_p ((*args)[i]) > +: instantiation_dependent_expression_p ((*args)[i])) > { >tree exp = build_min_nt_call_vec (NULL, args); >CALL_EXPR_IFN (exp) = IFN_SHUFFLEVECTOR; > diff --git a/gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C > b/gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C > new file mode 100644 > index 000..0f3cbbee563 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/ext/builtin-shufflevector-3.C > @@ -0,0 +1,23 @@ > +// PR c++/105353 > +// { dg-do compile { target c++17 } } > +// { dg-additional-options "-Wno-psabi" } > + > +typedef unsigned char Simd128U8VectT __attribute__((__vector_size__(16))); > + > +template > +static inline Simd128U8VectT ShufFunc(Simd128U8VectT vect) noexcept { > +if constexpr(unsigned(ShuffleIndex) >= 16) > +return Simd128U8VectT { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, > 0 }; > +else if constexpr(ShuffleIndex == 0) > +return vect; > +else > +return __builtin_shufflevector(vect, vect, ShuffleIndex, > ShuffleIndex + 1, > +ShuffleIndex + 2, ShuffleIndex + 3, ShuffleIndex + 4, > ShuffleIndex + 5, > +ShuffleIndex + 6, ShuffleIndex + 7, ShuffleIndex + 8, > ShuffleIndex + 9, > +ShuffleIndex + 10, ShuffleIndex + 11, ShuffleIndex + 12, > ShuffleIndex + 13, > +ShuffleIndex + 14, ShuffleIndex + 15); > +} > + > +auto func1(Simd128U8VectT vect) noexcept { > +return ShufFunc<5>(vect); > +} > > base-commit: 7c21556daf385fe9ece37319f574776dd7d8ab1c > -- > 2.35.1 >
[PATCH] i386: Improve ix86_expand_int_movcc [PR105338]
Hi! The following testcase regressed on x86_64 on the trunk, due to some GIMPLE pass changes (r12-7687) we end up an *.optimized dump difference of: @@ -8,14 +8,14 @@ int foo (int i) [local count: 1073741824]: if (i_2(D) != 0) -goto ; [35.00%] +goto ; [35.00%] else -goto ; [65.00%] +goto ; [65.00%] - [local count: 697932184]: + [local count: 375809640]: [local count: 1073741824]: - # iftmp.0_1 = PHI <5(2), i_2(D)(3)> + # iftmp.0_1 = PHI <5(3), i_2(D)(2)> return iftmp.0_1; } and similarly for the other functions. That is functionally equivalent and there is no canonical form for those. The reason for i_2(D) in the PHI argument as opposed to 0 is the uncprop pass, that is in many cases beneficial for expansion as we don't need to load the value into some pseudo in one of the if blocks. Now, for the 11.x ordering we have the pseudo = i insn in the extended basic block (it comes first) and so forwprop1 undoes what uncprop does by propagating constant 0 there. But for the 12.x ordering, the extended basic block contains pseudo = 5 and pseudo = i is in the other bb and so fwprop1 doesn't change it. During the ce1 pass, we attempt to emit a conditional move and we have very nice code for the cases where both last operands of ?: are constant, and yet another for !TARGET_CMOVE if at least one of them is. The following patch will undo the uncprop behavior during ix86_expand_int_movcc, but just for those spots that can benefit from both or at least one operands being constant, leaving the pure cmov case as is (because then it is useful not to have to load a constant into a pseudo as it already is in one). We can do that in the op0 == op1 ? op0 : op3 or op0 != op1 ? op2 : op0 cases if op1 is a CONST_INT by pretending it is op0 == op1 ? op1 : op3 or op0 != op1 ? op2 : op1 Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-04-23 Jakub Jelinek PR target/105338 * config/i386/i386-expand.cc (ix86_expand_int_movcc): Handle op0 == cst1 ? op0 : op3 like op0 == cst1 ? cst1 : op3 for the non-cmov cases. * gcc.target/i386/pr105338.c: New test. --- gcc/config/i386/i386-expand.cc.jj 2022-04-13 15:42:39.0 +0200 +++ gcc/config/i386/i386-expand.cc 2022-04-22 14:18:27.347135185 +0200 @@ -3136,6 +3136,8 @@ ix86_expand_int_movcc (rtx operands[]) bool sign_bit_compare_p = false; rtx op0 = XEXP (operands[1], 0); rtx op1 = XEXP (operands[1], 1); + rtx op2 = operands[2]; + rtx op3 = operands[3]; if (GET_MODE (op0) == TImode || (GET_MODE (op0) == DImode @@ -3153,17 +3155,29 @@ ix86_expand_int_movcc (rtx operands[]) || (op1 == constm1_rtx && (code == GT || code == LE))) sign_bit_compare_p = true; + /* op0 == op1 ? op0 : op3 is equivalent to op0 == op1 ? op1 : op3, + but if op1 is a constant, the latter form allows more optimizations, + either through the last 2 ops being constant handling, or the one + constant and one variable cases. On the other side, for cmov the + former might be better as we don't need to load the constant into + another register. */ + if (code == EQ && CONST_INT_P (op1) && rtx_equal_p (op0, op2)) +op2 = op1; + /* Similarly for op0 != op1 ? op2 : op0 and op0 != op1 ? op2 : op1. */ + else if (code == NE && CONST_INT_P (op1) && rtx_equal_p (op0, op3)) +op3 = op1; + /* Don't attempt mode expansion here -- if we had to expand 5 or 6 HImode insns, we'd be swallowed in word prefix ops. */ if ((mode != HImode || TARGET_FAST_PREFIX) && (mode != (TARGET_64BIT ? TImode : DImode)) - && CONST_INT_P (operands[2]) - && CONST_INT_P (operands[3])) + && CONST_INT_P (op2) + && CONST_INT_P (op3)) { rtx out = operands[0]; - HOST_WIDE_INT ct = INTVAL (operands[2]); - HOST_WIDE_INT cf = INTVAL (operands[3]); + HOST_WIDE_INT ct = INTVAL (op2); + HOST_WIDE_INT cf = INTVAL (op3); HOST_WIDE_INT diff; diff = ct - cf; @@ -3559,6 +3573,9 @@ ix86_expand_int_movcc (rtx operands[]) if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) return false; + operands[2] = op2; + operands[3] = op3; + /* If one of the two operands is an interesting constant, load a constant with the above and mask it in with a logical operation. */ --- gcc/testsuite/gcc.target/i386/pr105338.c.jj 2022-04-22 16:14:35.827045371 +0200 +++ gcc/testsuite/gcc.target/i386/pr105338.c2022-04-22 16:20:43.579913630 +0200 @@ -0,0 +1,26 @@ +/* PR target/105338 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-ipa-icf -masm=att" } */ +/* { dg-final { scan-assembler-times "\tnegl\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tsbbl\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tandl\t" 3 } } */ + +int +foo (int i) +{ + return i ? 5 : 0; +} + +int +bar (int b) +{ + return !!b * 5; +} + +int +baz (int b) +{ + if (!b) +re
Re: [PATCH] c++: __builtin_shufflevector with value-dep expr [PR105353]
On Fri, Apr 22, 2022 at 07:57:34PM -0400, Marek Polacek via Gcc-patches wrote: > Here we issue an error from c_build_shufflevector while parsing a template > because it got a TEMPLATE_PARM_INDEX, but this function expects INTEGER_CSTs > (except the first two arguments). It checks if any of the arguments are > type-dependent, if so, we leave the processing for later, but it should > also check value-dependency for the 3rd+ arguments, so as to avoid the > problem above. > > This is not a regression -- __builtin_shufflevector was introduced in > GCC 12, but it looks safe enough. > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? > > PR c++/105353 > > gcc/cp/ChangeLog: > > * typeck.cc (build_x_shufflevector): Use > instantiation_dependent_expression_p except for the first two > arguments. > > gcc/testsuite/ChangeLog: > > * g++.dg/ext/builtin-shufflevector-3.C: New test. LGTM. Jakub