[PATCH] middle-end: Fix unexpected warnings for RISC-V port.
From: zhongjuzhe gcc/ChangeLog: * tree-vect-loop-manip.cc (vect_gen_vector_loop_niters): Simply initialize const_vf to 0. --- gcc/tree-vect-loop-manip.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 86d2264054a..c3a24e6c39a 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -1977,8 +1977,9 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, } else ni_minus_gap = niters; - - unsigned HOST_WIDE_INT const_vf; + + /* To silence some unexpected warnings, simply initialize to 0. */ + unsigned HOST_WIDE_INT const_vf = 0; if (vf.is_constant (&const_vf) && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { -- 2.36.1
Re: [PATCH] middle-end: Fix unexpected warnings for RISC-V port.
On Tue, 23 Aug 2022, juzhe.zh...@rivai.ai wrote: > From: zhongjuzhe OK. > gcc/ChangeLog: > > * tree-vect-loop-manip.cc (vect_gen_vector_loop_niters): Simply > initialize const_vf to 0. > > --- > gcc/tree-vect-loop-manip.cc | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index 86d2264054a..c3a24e6c39a 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -1977,8 +1977,9 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, > tree niters, > } >else > ni_minus_gap = niters; > - > - unsigned HOST_WIDE_INT const_vf; > + > + /* To silence some unexpected warnings, simply initialize to 0. */ > + unsigned HOST_WIDE_INT const_vf = 0; >if (vf.is_constant (&const_vf) >&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) > { > -- Richard Biener SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)
Re: [PATCH] Enhance final_value_replacement_loop to handle bitop with an invariant induction.[PR105735]
On Thu, Aug 18, 2022 at 8:48 AM Kong, Lingling via Gcc-patches wrote: > > Hi, > > This patch is for pr105735/pr101991. It will enable below optimization: > { > - long unsigned int bit; > - > - [local count: 32534376]: > - > - [local count: 1041207449]: > - # tmp_10 = PHI > - # bit_12 = PHI > - tmp_7 = bit2_6(D) & tmp_10; > - bit_8 = bit_12 + 1; > - if (bit_8 != 32) > -goto ; [96.97%] > - else > -goto ; [3.03%] > - > - [local count: 1009658865]: > - goto ; [100.00%] > - > - [local count: 32534376]: > - # tmp_11 = PHI > - return tmp_11; > + tmp_11 = tmp_4(D) & bit2_6(D); > + return tmp_11; > > } > > Ok for master ? > > gcc/ChangeLog: > > PR middle-end/105735 > * match.pd (bitop_with_inv_p): New match. > * tree-scalar-evolution.cc (gimple_bitop_with_inv_p): Declare. > (analyze_and_compute_bitop_with_inv_effect): New function. > (final_value_replacement_loop): Enhanced to handle bitop > with inv induction. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr105735-1.c: New test. > * gcc.target/i386/pr105735-2.c: New test. > --- > gcc/match.pd | 4 + > gcc/testsuite/gcc.target/i386/pr105735-1.c | 88 ++ > gcc/testsuite/gcc.target/i386/pr105735-2.c | 28 +++ > gcc/tree-scalar-evolution.cc | 59 +++ > 4 files changed, 179 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-2.c > > diff --git a/gcc/match.pd b/gcc/match.pd index 562138a8034..cfe593ebb02 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -8050,6 +8050,10 @@ and, > (bit_not >(nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3 > > +(for bit_op (bit_and bit_ior bit_xor) > + (match (bitop_with_inv_p @0 @1) > + (bit_op:c @0 @1))) > + That's a single-stmt match, you shouldn't use match.pd matching for this. Instead just do if (is_gimple_assign (stmt) && ((code = gimple_assign_rhs_code (stmt)), true) && (code == BIT_AND_EXPR || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR)) .. and pick gimple_assign_rhs{1,2} (stmt) as the operands. The :c in bit_op:c is redundant btw. - while the name suggests "with invariant" you don't actually check for that. But again, given canonicalization rules the invariant will be rhs2 so above add && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST for example. > /* n - (((n > C1) ? n : C1) & -C2) -> n & C1 for unsigned case. > n - (((n > C1) ? n : C1) & -C2) -> (n <= C1) ? n : (n & C1) for signed > case. */ (simplify diff --git a/gcc/testsuite/gcc.target/i386/pr105735-1.c > b/gcc/testsuite/gcc.target/i386/pr105735-1.c > new file mode 100644 > index 000..8d2123ed351 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr105735-1.c > @@ -0,0 +1,88 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ > +/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp" > +} } */ > + > +unsigned long long > +__attribute__((noipa)) > +foo (unsigned long long tmp, unsigned long long bit2) { you probably need dg-require-effective-target longlong, but is it necessary to use long long for the testcases in the first place? The IV seems to be unused, if it should match the variables bit size use sizeof (type) * 8 > + for (int bit = 0; bit < 64; bit++) > +tmp &= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo1 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 63; bit >= 0; bit -=3) > +tmp &= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo2 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 0; bit < 64; bit++) > +tmp |= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo3 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 63; bit >= 0; bit -=3) > +tmp |= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo4 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 0; bit < 64; bit++) > +tmp ^= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +foo5 (unsigned long long tmp, unsigned long long bit2) { > + for (int bit = 0; bit < 63; bit++) > +tmp ^= bit2; > + return tmp; > +} > + > +unsigned long long > +__attribute__((noipa)) > +f (unsigned long long tmp, long long bit, unsigned long long bit2) { > + unsigned long long res = tmp; > + for (long long i = 0; i < bit; i++) > +res &= bit2; > + return res; > +} > + > +unsigned long long > +__attribute__((noipa)) > +f1 (unsigned long long tmp, long long bit, unsigned long long bit2) { > + unsigned long long res = tmp; > + for (long long i = 0; i < bit; i++) > +res |= bit2; > + return res; > +} > + > +unsigned long long >
Patch ping (Re: [PATCH] Implement __builtin_issignaling)
On Tue, Aug 16, 2022 at 11:41:06AM +, Richard Biener wrote: > I'm OK with the rest of the patch if Joseph doesn't have comments > on the actual issignaling lowerings (which I didn't review for > correctness due to lack of knowledge). I'd like to ping this patch. Patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599697.html with https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599794.html https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599800.html incremental additions. gcc/testsuite/gcc.dg/torture/builtin-issignaling-1.c's testcase f4-f7 functions (i.e. functions which just return __builtin_issignaling of the argument which is float, double, long double and _Float128 on x86_64-linux) with -O2 are in assembly: .p2align 4 .globl f4 .type f4, @function f4: .LFB0: .cfi_startproc movd%xmm0, %eax xorl$4194304, %eax andl$2147483647, %eax cmpl$2143289344, %eax seta%al movzbl %al, %eax ret .cfi_endproc .LFE0: .size f4, .-f4 .p2align 4 .globl f5 .type f5, @function f5: .LFB1: .cfi_startproc movabsq $9221120237041090560, %rdx movq%xmm0, %rax btcq$51, %rax btrq$63, %rax cmpq%rax, %rdx setb%al movzbl %al, %eax ret .cfi_endproc .LFE1: .size f5, .-f5 .p2align 4 .globl f6 .type f6, @function f6: .LFB2: .cfi_startproc fldt8(%rsp) fstpt -24(%rsp) movl-20(%rsp), %edx movl-24(%rsp), %eax movl%edx, %ecx negl%eax orl -24(%rsp), %eax notl%edx xorl$1073741824, %ecx shrl$31, %eax orl %ecx, %eax movzwl -16(%rsp), %ecx cmpl$-1073741824, %eax movl%ecx, %esi seta%al notl%ecx andl$32767, %esi movzbl %al, %eax negl%esi testw $32767, %cx sete%cl andl%esi, %edx movzbl %cl, %ecx shrl$31, %edx andl%ecx, %eax orl %edx, %eax ret .cfi_endproc .LFE2: .size f6, .-f6 .p2align 4 .globl f7 .type f7, @function f7: .LFB3: .cfi_startproc movaps %xmm0, -24(%rsp) movq-24(%rsp), %rsi movq-16(%rsp), %rdi movq%rsi, %rdx movq%rdi, %rax negq%rdx btcq$47, %rax orq %rsi, %rdx shrq$63, %rdx orq %rdx, %rax movabsq $9223231299366420480, %rdx btrq$63, %rax cmpq%rax, %rdx setb%al movzbl %al, %eax ret .cfi_endproc .LFE3: .size f7, .-f7 Thanks Jakub
Re: [PATCH 1/3] libstdc++: Separate construct/convertibility tests for std::tuple
On Tue, 23 Aug 2022 at 02:35, Patrick Palka via Libstdc++ wrote: > > P2321R2 adds new conditionally explicit constructors to std::tuple which > we'll concisely implement in a subsequent patch using explicit(bool), like > in our C++20 std::pair implementation. But before we can do that, this > patch first adds members to _TupleConstraints that test for constructibility > and convertibility separately; we'll use the first in the new constructors' > constraints, and the second in their explicit specifier. > > In passing, this patch also redefines the existing predicates > __is_ex/implicitly_constructible in terms of these new members. This > seems to reduce compile time and memory usage by about 10% for large Nice. > tuples when using the relevant constructors constrained by > _Explicit/_ImplicitCtor (since we no longer have to redundantly expand > and process is_constructible<_Types, _UTypes>... twice for each pair of > such constructors). In order to retain maximal short circuiting, do > this only when constexpr if is available. Would we get similar benefits for C++11 and C++14 by doing: return __and_<__and_...>, __and_...> >::value; This is slightly more work in total, but if we have __and_ and __and_> then the A and B instantiations will be cached and can be reused. > Tested on x86_64-pc-linux-gnu, does this look OK for trunk? Yes, thanks.
Re: [PATCH] RISC-V: Add runtime invariant support
On Mon, Aug 22, 2022 at 8:15 AM Richard Biener via Gcc-patches wrote: > > On Sat, 20 Aug 2022, Andrew Pinski wrote: > > > On Sat, Aug 20, 2022 at 3:34 PM Andreas Schwab > > wrote: > > > > > > This breaks bootstrap: > > > > > > ../../gcc/tree-vect-loop-manip.cc: In function 'void > > > vect_gen_vector_loop_niters(loop_vec_info, tree, tree_node**, > > > tree_node**, bool)': > > > ../../gcc/tree-vect-loop-manip.cc:1981:26: error: 'const_vf' may be used > > > uninitialized [-Werror=maybe-uninitialized] > > > 1981 | unsigned HOST_WIDE_INT const_vf; > > > | ^~~~ > > > cc1plus: all warnings being treated as errors > > > make[3]: *** [Makefile:1146: tree-vect-loop-manip.o] Error 1 > > > make[2]: *** [Makefile:4977: all-stage2-gcc] Error 2 > > > make[1]: *** [Makefile:30363: stage2-bubble] Error 2 > > > make: *** [Makefile:1065: all] Error 2 > > > > > > This looks like a real uninitialized variable issue. > > I even can't tell if the paths that lead to using const_vf will be > > always set so how we expect GCC to do the same. > > The code that uses const_vf was added with r11-5820-cdcbef3c3310, > > CCing the author there. > > The key is > > tree log_vf = NULL_TREE; > ... > unsigned HOST_WIDE_INT const_vf; > if (vf.is_constant (&const_vf) > && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) > { > ... > log_vf = build_int_cst (type, exact_log2 (const_vf)); > ... > } > ... > if (stmts != NULL && log_vf) > { > ... use const_vf ... > > so it's uninit analysis little mind that is confused. There is code > that's supposed to handle the situation (setting flag under condition, > testing that flag instead of condition) but maybe it's too twisted > here. One could refector this as > > bool const_vf_p = vf.is_constant (&const_vf); > if (const_vf_p > && ...) > ... >if (stmts != NULL && const_vf_p) > ... > > and hope uninits mind is good enough to see log_vf is not used > uninitialized. > > I can also look into why uninit doesn't get it, but preprocessed > source would be handy then. Btw, the riscv speciality is gcc/config/riscv/riscv.h:#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 with --param logical-op-non-short-circuit=1 the diagnostic does not occur. > > Richard.
[PATCH] Add newline when checking path profitability.
It looks like we're missing a newline for cases where we don't print anything. OK? gcc/ChangeLog: * tree-ssa-threadbackward.cc (possibly_profitable_path_p): Always add newline. (profitable_path_p): Same. --- gcc/tree-ssa-threadbackward.cc | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc index 3218ad931ef..9725f50e639 100644 --- a/gcc/tree-ssa-threadbackward.cc +++ b/gcc/tree-ssa-threadbackward.cc @@ -719,7 +719,11 @@ back_threader_profitability::possibly_profitable_path_p gimple *stmt = gsi_stmt (gsi); if (gimple_call_internal_p (stmt, IFN_UNIQUE) || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)) - return false; + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fputc ('\n', dump_file); + return false; + } /* Do not count empty statements and labels. */ if (gimple_code (stmt) != GIMPLE_NOP && !is_gimple_debug (stmt)) @@ -821,6 +825,8 @@ back_threader_profitability::possibly_profitable_path_p && (m_n_insns * param_fsm_scale_path_stmts >= param_max_jump_thread_duplication_stmts)); + if (dump_file && (dump_flags & TDF_DETAILS)) +fputc ('\n', dump_file); return true; } @@ -947,6 +953,8 @@ back_threader_profitability::profitable_path_p (const vec &m_path, "non-empty latch\n"); return false; } + if (dump_file && (dump_flags & TDF_DETAILS)) +fputc ('\n', dump_file); return true; } -- 2.37.1
[COMMITTED] Copy range from op2 in foperator_equal::op1_range.
Like the integer version, when op1 == op2 is known to be true the ranges are also equal. gcc/ChangeLog: * range-op-float.cc (foperator_equal::op1_range): Set range to range of op2. --- gcc/range-op-float.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 4fbd96a7479..ad2fae578d2 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -252,7 +252,8 @@ foperator_equal::op1_range (frange &r, tree type, switch (get_bool_state (r, lhs, type)) { case BRS_TRUE: - r.set_varying (type); + // If it's true, the result is the same as OP2. + r = op2; // The TRUE side of op1 == op2 implies op1 is !NAN. r.set_nan (fp_prop::NO); break; -- 2.37.1
[PATCH] Add set/get functions for negative infinity in real.*
For the frange implementation with endpoints I'm about to contribute, we need to set REAL_VALUE_TYPEs with negative infinity. The support is already there in real.cc, but it is awkward to get at. One could call real_inf() and then negate the value, but I've added the ability to pass the sign argument like many of the existing real.* functions. I've declared the functions in such a way to avoid changes to the existing code base: // Unchanged function returning true for either +-INF. bool real_isinf (const REAL_VALUE_TYPE *r); // New overload to be able to specify the sign. bool real_isinf (const REAL_VALUE_TYPE *r, int sign); // Replacement function for setting INF, defaults to +INF. void real_inf (REAL_VALUE_TYPE *, int sign = 0); Tested on x86-64 Linux. OK? gcc/ChangeLog: * real.cc (real_isinf): New overload. (real_inf): Add sign argument. * real.h (real_isinf): New overload. (real_inf): Add sign argument. --- gcc/real.cc | 14 +++--- gcc/real.h | 5 - 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/gcc/real.cc b/gcc/real.cc index 4e63b1449c5..f570ca8e85b 100644 --- a/gcc/real.cc +++ b/gcc/real.cc @@ -1234,6 +1234,14 @@ real_isinf (const REAL_VALUE_TYPE *r) return (r->cl == rvc_inf); } +/* Determine whether a floating-point value X is infinite with SIGN. */ + +bool +real_isinf (const REAL_VALUE_TYPE *r, int sign) +{ + return real_isinf (r) && r->sign == sign; +} + /* Determine whether a floating-point value X is a NaN. */ bool @@ -2484,12 +2492,12 @@ dconst_sqrt2_ptr (void) return &value; } -/* Fills R with +Inf. */ +/* Fills R with Inf with SIGN. */ void -real_inf (REAL_VALUE_TYPE *r) +real_inf (REAL_VALUE_TYPE *r, int sign) { - get_inf (r, 0); + get_inf (r, sign); } /* Fills R with a NaN whose significand is described by STR. If QUIET, diff --git a/gcc/real.h b/gcc/real.h index 845ef29e3a4..76360b603fb 100644 --- a/gcc/real.h +++ b/gcc/real.h @@ -277,6 +277,9 @@ extern bool real_compare (int, const REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *) /* Determine whether a floating-point value X is infinite. */ extern bool real_isinf (const REAL_VALUE_TYPE *); +/* Determine whether a floating-point value X is infinite with SIGN. */ +extern bool real_isinf (const REAL_VALUE_TYPE *, int sign); + /* Determine whether a floating-point value X is a NaN. */ extern bool real_isnan (const REAL_VALUE_TYPE *); @@ -331,7 +334,7 @@ extern long real_to_target (long *, const REAL_VALUE_TYPE *, format_helper); extern void real_from_target (REAL_VALUE_TYPE *, const long *, format_helper); -extern void real_inf (REAL_VALUE_TYPE *); +extern void real_inf (REAL_VALUE_TYPE *, int sign = 0); extern bool real_nan (REAL_VALUE_TYPE *, const char *, int, format_helper); -- 2.37.1
[committed] gfortran.dg/gomp/depend-4.f90: Minor fix
Not really crucial as it is a compile-time test, but there was an off-by-one typo: !$omp depobj(elem(5)) depend(in: daaa(2)) !$omp depobj(elem(6)) depend(in: daap(2)) - !$omp depobj(elem(6)) depend(in: doaa(2)) + !$omp depobj(elem(7)) depend(in: doaa(2)) !$omp depobj(elem(8)) depend(in: doaaa(2)) Committed as https://gcc.gnu.org/r13-2151-g6b2a584ed5bed1b851ee7b4668ac705f20cbb2c4 Tobias - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 commit 6b2a584ed5bed1b851ee7b4668ac705f20cbb2c4 Author: Tobias Burnus Date: Tue Aug 23 12:31:43 2022 +0200 gfortran.dg/gomp/depend-4.f90: Minor fix gcc/testsuite/ * gfortran.dg/gomp/depend-4.f90: Fix array index use for depobj var + update scan-tree-dump-times. --- gcc/testsuite/gfortran.dg/gomp/depend-4.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/gomp/depend-4.f90 b/gcc/testsuite/gfortran.dg/gomp/depend-4.f90 index f6cf2fd2dd4..edc30fe7291 100644 --- a/gcc/testsuite/gfortran.dg/gomp/depend-4.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/depend-4.f90 @@ -52,7 +52,7 @@ subroutine foo(dss, dsp, dsa, daa, daaa, daap, doss, dosp, dosa, doaa, doaaa, do !$omp depobj(elem(4)) depend(in: daa(2)) !$omp depobj(elem(5)) depend(in: daaa(2)) !$omp depobj(elem(6)) depend(in: daap(2)) - !$omp depobj(elem(6)) depend(in: doaa(2)) + !$omp depobj(elem(7)) depend(in: doaa(2)) !$omp depobj(elem(8)) depend(in: doaaa(2)) !$omp depobj(elem(9)) depend(in: doaap(2)) @@ -199,7 +199,7 @@ end ! { dg-final { scan-tree-dump-times "&elem\\\[3\\\] = &\\(\\*daa\\)\\\[1\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[4\\\] = &\\(\\*\\(integer.kind=4.\\\[0:\\\] \\* restrict\\) daaa->data\\)\\\[daaa->offset \\+ 2\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = \\(integer.kind=4. \\*\\) \\(daap->data \\+ .sizetype. \\(\\(daap->offset \\+ daap->dim\\\[0\\\].stride \\* 2\\) \\* daap->span\\)\\);" 1 "original" } } -! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } } +! { dg-final { scan-tree-dump-times "&elem\\\[6\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[7\\\] = &\\(\\*\\(integer.kind=4.\\\[0:\\\] \\* restrict\\) doaaa->data\\)\\\[doaaa->offset \\+ 2\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[8\\\] = \\(integer.kind=4. \\*\\) \\(doaap->data \\+ .sizetype. \\(\\(doaap->offset \\+ doaap->dim\\\[0\\\].stride \\* 2\\) \\* doaap->span\\)\\);" 1 "original" } }
Re: [committed] gfortran.dg/gomp/depend-4.f90: Minor fix
And the same for depend-6.f90, which was obviously copied from depend-4.f90. Committed as https://gcc.gnu.org/r13-2152-gf05e3b2c63f3307ba405900f1a80c25b2e87b0a3 On 23.08.22 12:34, Tobias Burnus wrote: Not really crucial as it is a compile-time test, but there was an off-by-one typo: !$omp depobj(elem(5)) depend(in: daaa(2)) !$omp depobj(elem(6)) depend(in: daap(2)) - !$omp depobj(elem(6)) depend(in: doaa(2)) + !$omp depobj(elem(7)) depend(in: doaa(2)) !$omp depobj(elem(8)) depend(in: doaaa(2)) Committed as https://gcc.gnu.org/r13-2151-g6b2a584ed5bed1b851ee7b4668ac705f20cbb2c4 Tobias - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 commit f05e3b2c63f3307ba405900f1a80c25b2e87b0a3 Author: Tobias Burnus Date: Tue Aug 23 13:14:19 2022 +0200 gfortran.dg/gomp/depend-6.f90: Minor fix Exactly the same as previous commit for depend-4.f90, r13-2151. gcc/testsuite/ * gfortran.dg/gomp/depend-6.f90: Fix array index use for depobj var + update scan-tree-dump-times. --- gcc/testsuite/gfortran.dg/gomp/depend-6.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/gomp/depend-6.f90 b/gcc/testsuite/gfortran.dg/gomp/depend-6.f90 index b6c1afee127..fbac14b1b94 100644 --- a/gcc/testsuite/gfortran.dg/gomp/depend-6.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/depend-6.f90 @@ -52,7 +52,7 @@ subroutine foo(dss, dsp, dsa, daa, daaa, daap, doss, dosp, dosa, doaa, doaaa, do !$omp depobj(elem(4)) depend(in: daa(2)) !$omp depobj(elem(5)) depend(in: daaa(2)) !$omp depobj(elem(6)) depend(in: daap(2)) - !$omp depobj(elem(6)) depend(in: doaa(2)) + !$omp depobj(elem(7)) depend(in: doaa(2)) !$omp depobj(elem(8)) depend(in: doaaa(2)) !$omp depobj(elem(9)) depend(in: doaap(2)) @@ -199,7 +199,7 @@ end ! { dg-final { scan-tree-dump-times "&elem\\\[3\\\] = &\\(\\*daa\\)\\\[1\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[4\\\] = &\\(\\*\\(void \\*\\\[0:\\\] \\* restrict\\) daaa->data\\)\\\[daaa->offset \\+ 2\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = \\(void \\* \\*\\) \\(daap->data \\+ .sizetype. \\(\\(daap->offset \\+ daap->dim\\\[0\\\].stride \\* 2\\) \\* daap->span\\)\\);" 1 "original" } } -! { dg-final { scan-tree-dump-times "&elem\\\[5\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } } +! { dg-final { scan-tree-dump-times "&elem\\\[6\\\] = &\\(\\*doaa\\)\\\[1\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[7\\\] = &\\(\\*\\(void \\*\\\[0:\\\] \\* restrict\\) doaaa->data\\)\\\[doaaa->offset \\+ 2\\\];" 1 "original" } } ! { dg-final { scan-tree-dump-times "&elem\\\[8\\\] = \\(void \\* \\*\\) \\(doaap->data \\+ .sizetype. \\(\\(doaap->offset \\+ doaap->dim\\\[0\\\].stride \\* 2\\) \\* doaap->span\\)\\);" 1 "original" } }
[PATCH] Add support for floating point endpoints to frange.
The current implementation of frange is just a type with some bits to represent NAN and INF. We can do better and represent endpoints to ultimately solve longstanding PRs such as PR24021. This patch adds these endpoints. In follow-up patches I will add support for relational operators using them, as well as a bare bones PLUS_EXPR range-op-float entry to solve the PR. I have chosen to use REAL_VALUE_TYPEs for the endpoints, since that's what we use underneath the trees. This will be somewhat analogous to our eventual use of wide-ints in the irange. No sense going through added levels of indirection if we can avoid it. That, plus real.* already has a nice API for dealing with floats. With this patch, ranges will be closed float point intervals, which make the implementation simpler, since we don't have to keep track of open/closed intervals. This is conservative enough for use in the ranger world, as we'd rather err on the side of more elements in a range, than less. For example, even though we cannot precisely represent the open interval (3.0, 5.0) with this approach, it is perfectably reasonable to represent it as [3.0, 5.0] since the closed interval is a super set of the open one. In the VRP/ranger world, it is always better to err on the side of more information in a range, than not. After all, when we don't know anything about a range, we just use VARYING which is a fancy term for a range spanning the entire domain. Since REAL_VALUE_TYPEs have properly defined infinity and NAN semantics, all the math can be made to work: [-INF, 3.0] !NAN=> Numbers <= 3.0 (NAN cannot happen) [3.0, +INF] => Numbers >= 3.0 (NAN is possible) [-INF, +INF]=> VARYING (NAN is possible) [-INF, +INF] !NAN => Entire domain. NAN cannot happen. Also, since REAL_VALUE_TYPEs can represent the minimum and maximum representable values of a TYPE_MODE, we can disambiguate between them and negative and positive infinity (see get_max_float in real.cc). This also makes the math all work. For example, suppose we know nothing about x and y (VARYING). On the TRUE side of x > y, we can deduce that: (a) x cannot be NAN (b) y cannot be NAN (c) y cannot be +INF. (c) means that we can drop the upper bound of "y" from +INF to the maximum representable value for its type. Having endpoints with different representation for infinity and the maximum representable values, means we can drop the +-INF properties we currently have in the frange. I will do this as a follow-up patch, as well as contributing more detailed relational operators. I am still tweaking some regressed tests because of the usual premature threading and VRP in the presence of smarter ranges. I figured I'd post now to give others a chance to comment in the meantime. gcc/ChangeLog: * value-range-pretty-print.cc (vrange_printer::visit): Adapt for endpoints. * value-range.cc (frange::set): Same. (frange::normalize_kind): Same. (frange::union_): Same. (frange::intersect): Same. (frange::operator=): Same. (frange::verify_range): Same. (frange::contains_p): New. (frange::singleton_p): New. (frange_float): New. (real_max_representable): New. (real_min_representable): New. (range_tests_floats): New. (range_tests): Call range_tests_floats. * value-range.h (frange::lower_bound): New. (frange::upper_bound): New. (vrp_val_min): Use real_inf with a sign argument. (frange::frange): New. (frange::set_varying): Adapt for endpoints. (frange::set_undefined): Adapt for endpoints. --- gcc/value-range-pretty-print.cc | 12 +- gcc/value-range.cc | 259 ++-- gcc/value-range.h | 53 ++- 3 files changed, 312 insertions(+), 12 deletions(-) diff --git a/gcc/value-range-pretty-print.cc b/gcc/value-range-pretty-print.cc index cbf50d3d854..1a026c22f99 100644 --- a/gcc/value-range-pretty-print.cc +++ b/gcc/value-range-pretty-print.cc @@ -122,19 +122,29 @@ vrange_printer::print_irange_bitmasks (const irange &r) const void vrange_printer::visit (const frange &r) const { + tree type = r.type (); + pp_string (pp, "[frange] "); if (r.undefined_p ()) { pp_string (pp, "UNDEFINED"); return; } - dump_generic_node (pp, r.type (), 0, TDF_NONE, false); + dump_generic_node (pp, type, 0, TDF_NONE, false); pp_string (pp, " "); if (r.varying_p ()) { pp_string (pp, "VARYING"); return; } + pp_character (pp, '['); + dump_generic_node (pp, +build_real (type, r.lower_bound ()), 0, TDF_NONE, false); + pp_string (pp, ", "); + dump_generic_node (pp, +build_real (type, r.upper_bound ()), 0, TDF_NONE, false); + pp_string (pp, "] "); + print_frange_prop ("NAN", r.get_nan ()); print_frange_prop ("INF", r.get_inf ())
[PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]
sched1: Fix -fcompare-debug issue in schedule_region [PR105586] In schedule_region(), a basic block that does not contain any real insns is not scheduled and the dfa state at the entry of the bb is not copied to the fallthru basic block. However a DEBUG insn is treated as a real insn, and if a bb contains non-real insns and a DEBUG insn, it's dfa state is copied to the fallthru bb. This was resulting in -fcompare-debug failure as the incoming dfa state of the fallthru block is different with -g. We should always copy the dfa state of a bb to it's fallthru bb even if the bb does not contain real insns. 2022-08-22 Surya Kumari Jangala gcc/ PR rtl-optimization/105586 * sched-rgn.cc (schedule_region): Always copy dfa state to fallthru block. gcc/testsuite/ PR rtl-optimization/105586 * gcc.target/powerpc/pr105586.c: New test. diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc index 0dc2a8f2851..6c9202c0b2b 100644 --- a/gcc/sched-rgn.cc +++ b/gcc/sched-rgn.cc @@ -3082,6 +3082,24 @@ free_bb_state_array (void) bb_state = NULL; } +static void +save_state_for_fallthru_edge (basic_block last_bb, state_t state) +{ + edge f = find_fallthru_edge (last_bb->succs); + if (f + && (!f->probability.initialized_p () + || (f->probability.to_reg_br_prob_base () * 100 + / REG_BR_PROB_BASE + >= param_sched_state_edge_prob_cutoff))) + { +memcpy (bb_state[f->dest->index], state, + dfa_state_size); +if (sched_verbose >= 5) + fprintf (sched_dump, "saving state for edge %d->%d\n", + f->src->index, f->dest->index); + } +} + /* Schedule a region. A region is either an inner loop, a loop-free subroutine, or a single basic block. Each bb in the region is scheduled after its flow predecessors. */ @@ -3155,6 +3173,7 @@ schedule_region (int rgn) if (no_real_insns_p (head, tail)) { gcc_assert (first_bb == last_bb); + save_state_for_fallthru_edge (last_bb, bb_state[first_bb->index]); continue; } @@ -3173,26 +3192,13 @@ schedule_region (int rgn) curr_bb = first_bb; if (dbg_cnt (sched_block)) { - edge f; int saved_last_basic_block = last_basic_block_for_fn (cfun); schedule_block (&curr_bb, bb_state[first_bb->index]); gcc_assert (EBB_FIRST_BB (bb) == first_bb); sched_rgn_n_insns += sched_n_insns; realloc_bb_state_array (saved_last_basic_block); - f = find_fallthru_edge (last_bb->succs); - if (f - && (!f->probability.initialized_p () - || (f->probability.to_reg_br_prob_base () * 100 - / REG_BR_PROB_BASE - >= param_sched_state_edge_prob_cutoff))) - { - memcpy (bb_state[f->dest->index], curr_state, - dfa_state_size); - if (sched_verbose >= 5) - fprintf (sched_dump, "saving state for edge %d->%d\n", -f->src->index, f->dest->index); - } + save_state_for_fallthru_edge (last_bb, curr_state); } else { diff --git a/gcc/testsuite/gcc.target/powerpc/pr105586.c b/gcc/testsuite/gcc.target/powerpc/pr105586.c new file mode 100644 index 000..bd397f58bc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr105586.c @@ -0,0 +1,19 @@ +/* { dg-options "-mdejagnu-tune=power4 -O2 -fcompare-debug -fno-if-conversion -fno-guess-branch-probability" } */ + +extern int bar(int i); + +typedef unsigned long u64; +int g; + +__int128 h; + +void +foo(int a, int b) { + int i; + char u8_1 = g, u8_3 = a; + u64 u64_1 = bar(0), u64_3 = u8_3 * u64_1; + __int128 u128_1 = h ^ __builtin_expect(i, 0); + u64 u64_4 = u64_1 << ((__builtin_sub_overflow_p(0, u8_1, (u64)0) ^ u128_1) & 8); + u64 u64_r = b + u64_3 + u64_4; + bar((short)u64_r + u8_3); +}
Re: [PATCH 2/3] libstdc++: Implement std::pair/tuple/misc enhancements from P2321R2
On Tue, 23 Aug 2022 at 02:36, Patrick Palka via Libstdc++ wrote: > --- a/libstdc++-v3/include/bits/stl_pair.h > +++ b/libstdc++-v3/include/bits/stl_pair.h > @@ -212,6 +212,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > swap(second, __p.second); >} > > +#if __cplusplus > 202002L > + /// Swap the first members and then the second members. > + constexpr void > + swap(const pair& __p) const > + noexcept(__and_<__is_nothrow_swappable, > + __is_nothrow_swappable>::value) This could use __and_v (which is just __and_::value today, but could theoretically be optimized to use a requires expression and avoid instantiating __and_ one day). Is consistency with the C++11 overload more important? I *hope* we won't need to make many changes to these noexcept-specifiers, so the maintenance burden of using __ad_::value in one and __and_v in the other shouldn't be too high. > @@ -710,6 +792,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > noexcept(noexcept(__x.swap(__y))) > { __x.swap(__y); } > > +#if __cplusplus > 202002L > + template > +requires is_swappable::value && is_swappable::value is_swappable_v instead of ::value here ... this is already using a requires-clause and so is substantially different to the old overload anyway. > + >// tuple swap >_GLIBCXX20_CONSTEXPR >void >swap(tuple& __in) >noexcept(__and_<__is_nothrow_swappable<_Elements>...>::value) >{ _Inherited::_M_swap(__in); } > + > +#if __cplusplus > 202002L > + constexpr void > + swap(const tuple& __in) const > + noexcept(__and_<__is_nothrow_swappable...>::value) __and_v ? >_GLIBCXX20_CONSTEXPR >void >swap(tuple& __in) >noexcept(__and_<__is_nothrow_swappable<_T1>, > __is_nothrow_swappable<_T2>>::value) >{ _Inherited::_M_swap(__in); } > + > +#if __cplusplus > 202002L > + constexpr void > + swap(const tuple& __in) const > + noexcept(__and_<__is_nothrow_swappable, > + __is_nothrow_swappable>::value) __and_v ? Thanks for doing this, those changes looked tedious to implement and test! If you agree with the suggestions to use _v variable templates, this is OK for trunk with those changes. I am willing to be persuaded to not use the variable templates if there's a good reason I've missed.
Re: Ping [PATCH V2] libcpp: Optimize #pragma once with a hash table [PR58770]
On 8/22/22 13:39, Paul Hollinsky wrote: On Mon, Aug 22, 2022 at 09:19:29AM -0400, Nathan Sidwell wrote: On 8/19/22 16:27, Paul Hollinsky wrote: Hi all, Would love some feedback on this patch! Thanks, Paul On Mon, Aug 01, 2022 at 05:18:40AM +, Paul Hollinsky wrote: Rather than traversing the all_files linked list for every include, this factors out the quick idempotency checks (modification time and size) to be the keys in a hash table so we can find matching files quickly. I'm probably confused, but why is the existing idempotency logic insufficiently optimal? Can it be improved directly? The idempotency logic breaks down to a couple integer comparisons, I doubt it can be made much faster. Usually a file will be skipped after the very first idempotency check. The reason it's suboptimal right now is that it's O(n^2) behavior -- for each include that's seen we follow the entire linked list and do those integeger comparisons. Traversing a list is O(N) -- why are you encountering N^2 here? (Are you summing over all includes?) The cost of hashing, performing a lookup against the hash table, and then traversing the list of results (any hash collisions) is much lower than traversing the full list of includes. I suspect that has something to do with cache locality, or lack thereof, when accessing each member of the linked list. It's likely also just far fewer instructions executed. I don't see why a hash table cannot be applied to all idempotent files, not just #import/pragma once. WRT using modification time as a key. I don't see how that provides sufficient additional randomization to file size? Groups of headers are often installed with the same mtime. This is a fair point, and I was going to say something here about the codebases being compiled, but git also doesn't preserve mtime. The real reason mtime is because that's what was done in the previous implementation, and I felt it best not to mess with. But, if there are better attributes that could be used without adding much overhead we could use those instead. I was personally worried that reading the file to perform any sort of hash would be expensive. doing anything other than compare the pathname is going to mean going to disk. That's likely cached by the OS from the first read. I suppose one could encounter horrible LRU behaviour, but you're probably going to lose in that case anyway. Hm, why not just key on the inode/drive number? that way one even avoids the inode read (oh, I think stat isn't that fine grained is it) I suppose one could have two hashtables -- one keyed by pathname, which one could check before the inode, which would deal with the majority of non-symlink-farm cases. Then one keyed by inode/m_time or whatever to deal with symlinks. nathan In any case, thank you very much for the feedback! --Paul The hash table value type is a linked list, in case more than one file matches the quick check. The table is only built if a once-only file is seen, so include guard performance is not affecte My laptop would previously complete Ricardo's benchmark from the PR in ~1.1s using #pragma once, and ~0.35s using include guards. After this change, both benchmarks now complete in ~0.35s. I did have to randomize the modification dates on the benchmark headers so the files did not all end up in the same hash table list, but that would likely not come up outside of the contrived benchmark. I bootstrapped and ran the testsuite on x86_64 Darwin, as well as ppc64le and aarch64 Linux. libcpp/ChangeLog: PR preprocessor/58770 * internal.h: Add hash table for #pragma once * files.cc: Optimize #pragma once with the hash table Signed-off-by: Paul Hollinsky --- libcpp/files.cc | 116 +++--- libcpp/internal.h | 3 ++ 2 files changed, 112 insertions(+), 7 deletions(-) diff --git a/libcpp/files.cc b/libcpp/files.cc index 24208f7b0f8..d4ffd77578e 100644 --- a/libcpp/files.cc +++ b/libcpp/files.cc @@ -167,6 +167,33 @@ struct file_hash_entry_pool struct cpp_file_hash_entry pool[FILE_HASH_POOL_SIZE]; }; +/* A set of attributes designed to quickly identify obviously different files + in a hashtable. Just in case there are collisions, we still maintain a + list. These sub-lists can then be checked for #pragma once rather than + interating through all_files. */ +struct file_quick_idempotency_attrs +{ + file_quick_idempotency_attrs(const _cpp_file *f) +: mtime(f->st.st_mtime), size(f->st.st_size) {} + + time_t mtime; + off_t size; + + static hashval_t hash (/* _cpp_file* */ const void *p); +}; + +/* Sub-list of very similar files kept in a hashtable to check for #pragma + once. */ +struct file_sublist +{ + _cpp_file *f; + file_sublist *next; + + static int eq (/* _cpp_file* */ const void *p, +/* file_sublist* */ const void *q); + static void del (/* file_sublist* */ void *p); +}; +
[PATCH] Speedup path discovery in predicate::use_cannot_happen
The following reverts a hunk from r8-5789-g11ef0b22d68cd1 that made compute_control_dep_chain start from function entry rather than the immediate dominator of the source block of the edge with the undefined value on the PHI node. Reverting at that point does not reveal any testsuite FAIL, in particular the added testcase still passes. The following adjusts this to the other function that computes predicates that hold on the PHI incoming edges with undefined values, predicate::init_from_phi_def, which starts at the immediate dominator of the PHI. That's much less likely to run into the CFG walking limit. Bootstrapped and tested on x86_64-unknown-linux-gnu. Aldy - you did this change, do you remember anything here? In fact the whole thing that's now called predicate::use_cannot_happen seems to be redundant - the two testcases attributed to its history do not fail when that's disabled, nor did they fail when it was introduced. In principle whats now called predicate::superset_of should cover this (but different implementation limits might apply). OK? * gimple-predicate-analysis.cc (predicate::use_cannot_happen): Start the compute_control_dep_chain walk from the immediate dominator of the PHI. --- gcc/gimple-predicate-analysis.cc | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc index a4291657d0b..ea81daacd4f 100644 --- a/gcc/gimple-predicate-analysis.cc +++ b/gcc/gimple-predicate-analysis.cc @@ -1293,6 +1293,12 @@ predicate::use_cannot_happen (gphi *phi, unsigned opnds) use_guard = &phi_use_guard_intersection; } + basic_block phi_bb = gimple_bb (phi); + /* Find the closest dominating bb to be the control dependence root. */ + basic_block cd_root = get_immediate_dominator (CDI_DOMINATORS, phi_bb); + if (!cd_root) +return false; + /* Look for the control dependencies of all the interesting operands and build guard predicates describing them. */ unsigned n = gimple_phi_num_args (phi); @@ -1308,7 +1314,7 @@ predicate::use_cannot_happen (gphi *phi, unsigned opnds) unsigned num_calls = 0; /* Build the control dependency chain for the PHI argument... */ - if (!compute_control_dep_chain (ENTRY_BLOCK_PTR_FOR_FN (cfun), + if (!compute_control_dep_chain (cd_root, e->src, dep_chains, &num_chains, cur_chain, &num_calls)) { -- 2.35.3
[PATCH] New uninit testcase
I've reduced the following which doesn't seem covered in a good enough way in the testsuite. Pushed. * gcc.dg/uninit-pred-10.c: New testcase. --- gcc/testsuite/gcc.dg/uninit-pred-10.c | 36 +++ 1 file changed, 36 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/uninit-pred-10.c diff --git a/gcc/testsuite/gcc.dg/uninit-pred-10.c b/gcc/testsuite/gcc.dg/uninit-pred-10.c new file mode 100644 index 000..468b4d3ad4b --- /dev/null +++ b/gcc/testsuite/gcc.dg/uninit-pred-10.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wuninitialized" } */ + +enum demangle_component_type { + DEMANGLE_COMPONENT_NAME, + DEMANGLE_COMPONENT_REFERENCE +}; +struct demangle_component { + enum demangle_component_type type; +} d_print_comp_inner_mod_inner, *d_print_comp_inner_dc; +struct d_print_mod { + struct d_print_mod *next; +}; +struct d_print_info { + int templates; + struct d_print_mod *modifiers; +}; +void d_print_comp_inner(struct d_print_info *dpi) +{ + int saved_templates, need_template_restore = 0; + switch (d_print_comp_inner_dc->type) { + case DEMANGLE_COMPONENT_NAME: +goto modifier; + case DEMANGLE_COMPONENT_REFERENCE: +saved_templates = dpi->templates; +need_template_restore = 1; + modifier: +struct d_print_mod dpm; +dpm.next = dpi->modifiers; +d_print_comp_inner_mod_inner = *d_print_comp_inner_dc; +d_print_comp_inner(dpi); +dpi->modifiers = dpm.next; +if (need_template_restore) + dpi->templates = saved_templates; /* { dg-bogus "uninitialized" } */ + } +} -- 2.35.3
Re: [PATCH] Add newline when checking path profitability.
On Tue, Aug 23, 2022 at 12:12 PM Aldy Hernandez wrote: > > It looks like we're missing a newline for cases where we don't print > anything. > > OK? OK > gcc/ChangeLog: > > * tree-ssa-threadbackward.cc (possibly_profitable_path_p): Always > add newline. > (profitable_path_p): Same. > --- > gcc/tree-ssa-threadbackward.cc | 10 +- > 1 file changed, 9 insertions(+), 1 deletion(-) > > diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc > index 3218ad931ef..9725f50e639 100644 > --- a/gcc/tree-ssa-threadbackward.cc > +++ b/gcc/tree-ssa-threadbackward.cc > @@ -719,7 +719,11 @@ back_threader_profitability::possibly_profitable_path_p > gimple *stmt = gsi_stmt (gsi); > if (gimple_call_internal_p (stmt, IFN_UNIQUE) > || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)) > - return false; > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fputc ('\n', dump_file); > + return false; > + } > /* Do not count empty statements and labels. */ > if (gimple_code (stmt) != GIMPLE_NOP > && !is_gimple_debug (stmt)) > @@ -821,6 +825,8 @@ back_threader_profitability::possibly_profitable_path_p > && (m_n_insns * param_fsm_scale_path_stmts > >= param_max_jump_thread_duplication_stmts)); > > + if (dump_file && (dump_flags & TDF_DETAILS)) > +fputc ('\n', dump_file); >return true; > } > > @@ -947,6 +953,8 @@ back_threader_profitability::profitable_path_p (const > vec &m_path, > "non-empty latch\n"); >return false; > } > + if (dump_file && (dump_flags & TDF_DETAILS)) > +fputc ('\n', dump_file); >return true; > } > > -- > 2.37.1 >
Re: [PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]
On 8/23/22 6:49 AM, Surya Kumari Jangala wrote: > sched1: Fix -fcompare-debug issue in schedule_region [PR105586] > > In schedule_region(), a basic block that does not contain any real insns > is not scheduled and the dfa state at the entry of the bb is not copied > to the fallthru basic block. However a DEBUG insn is treated as a real > insn, and if a bb contains non-real insns and a DEBUG insn, it's dfa > state is copied to the fallthru bb. This was resulting in > -fcompare-debug failure as the incoming dfa state of the fallthru block > is different with -g. We should always copy the dfa state of a bb to > it's fallthru bb even if the bb does not contain real insns. > > 2022-08-22 Surya Kumari Jangala > > gcc/ > PR rtl-optimization/105586 > * sched-rgn.cc (schedule_region): Always copy dfa state to > fallthru block. It looks good to me, but I cannot approve it. That said, you're missing a ChangeLog entry for your new helper function. The ChangeLog mentions what changed, not why it changed. Maybe something like the following? gcc/ PR rtl-optimization/105586 * sched-rgn.cc (save_state_for_fallthru_edge): New function. (schedule_region): Use it for all blocks. Peter
[PATCH] tree-optimization/106722 - uninit analysis with long def -> use path
The following applies similar measures as r13-2133-ge66cf626c72d58 to the computation of the use predicate when the path from PHI def to use is too long and we run into compute_control_dep_chain limits. It also moves the preprocessor define limits internal. This resolves the reduced testcase but not the original one. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. PR tree-optimization/106722 * gimple-predicate-analysis.h (MAX_NUM_CHAINS, MAX_CHAIN_LEN, MAX_POSTDOM_CHECK, MAX_SWITCH_CASES): Move ... * gimple-predicate-analysis.cc: ... here and document. (simple_control_dep_chain): New function, factored from predicate::use_cannot_happen. (predicate::use_cannot_happen): Adjust. (predicate::predicate): Use simple_control_dep_chain as fallback. * g++.dg/uninit-pr106722-1.C: New testcase. --- gcc/gimple-predicate-analysis.cc | 68 ++-- gcc/gimple-predicate-analysis.h | 4 -- gcc/testsuite/g++.dg/uninit-pr106722-1.C | 65 ++ 3 files changed, 117 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/g++.dg/uninit-pr106722-1.C diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc index e8e2dbf7034..a4291657d0b 100644 --- a/gcc/gimple-predicate-analysis.cc +++ b/gcc/gimple-predicate-analysis.cc @@ -46,6 +46,19 @@ #define DEBUG_PREDICATE_ANALYZER 1 +/* In our predicate normal form we have MAX_NUM_CHAINS or predicates + and in those MAX_CHAIN_LEN (inverted) and predicates. */ +#define MAX_NUM_CHAINS 8 +#define MAX_CHAIN_LEN 5 + +/* When enumerating paths between two blocks this limits the number of + post-dominator skips between two edges possibly defining a predicate. */ +#define MAX_POSTDOM_CHECK 8 + +/* The limit for the number of switch cases when we do the linear search + for the case corresponding to an edge. */ +#define MAX_SWITCH_CASES 40 + /* Return true if, when BB1 is postdominating BB2, BB1 is a loop exit. */ static bool @@ -1034,6 +1047,36 @@ is_degenerate_phi (gimple *phi, pred_info *pred) return true; } +/* If compute_control_dep_chain bailed out due to limits this routine + tries to build a partial sparse path using dominators. Returns + path edges whose predicates are always true when reaching E. */ + +static void +simple_control_dep_chain (vec& chain, basic_block from, basic_block to) +{ + if (!dominated_by_p (CDI_DOMINATORS, to, from)) +return; + + basic_block src = to; + while (src != from +&& chain.length () <= MAX_CHAIN_LEN) +{ + basic_block dest = src; + src = get_immediate_dominator (CDI_DOMINATORS, src); + edge pred_e; + if (single_pred_p (dest) + && (pred_e = find_edge (src, dest))) + chain.safe_push (pred_e); +} +} + +static void +simple_control_dep_chain (vec& chain, basic_block from, edge to) +{ + chain.safe_push (to); + simple_control_dep_chain (chain, from, to->src); +} + /* Recursively compute the control dependence chains (paths of edges) from the dependent basic block, DEP_BB, up to the dominating basic block, DOM_BB (the head node of a chain should be dominated by it), @@ -1273,20 +1316,8 @@ predicate::use_cannot_happen (gphi *phi, unsigned opnds) /* If compute_control_dep_chain bailed out due to limits build a partial sparse path using dominators. Collect only edges whose predicates are always true when reaching E. */ - cur_chain.truncate (0); - cur_chain.quick_push (e); - basic_block src = e->src; - while (src->index != ENTRY_BLOCK -&& cur_chain.length () <= MAX_CHAIN_LEN) - { - basic_block dest = src; - src = get_immediate_dominator (CDI_DOMINATORS, src); - edge pred_e; - if (single_pred_p (dest) - && (pred_e = find_edge (src, dest))) - cur_chain.quick_push (pred_e); - } - dep_chains[0] = cur_chain.copy (); + simple_control_dep_chain (dep_chains[0], + ENTRY_BLOCK_PTR_FOR_FN (cfun), e); num_chains++; } @@ -1737,8 +1768,13 @@ predicate::predicate (basic_block def_bb, basic_block use_bb, func_t &eval) auto_vec dep_chains[MAX_NUM_CHAINS]; auto_vec cur_chain; - compute_control_dep_chain (cd_root, use_bb, dep_chains, &num_chains, -cur_chain, &num_calls); + if (!compute_control_dep_chain (cd_root, use_bb, dep_chains, &num_chains, + cur_chain, &num_calls)) +{ + gcc_assert (num_chains == 0); + simple_control_dep_chain (dep_chains[0], cd_root, use_bb); + num_chains++; +} if (DEBUG_PREDICATE_ANALYZER && dump_file) { diff --git a/gcc/gimple-predicate-analysis.h b/gcc/gimple-predicate-analysis.h index 204cdbccfc7..77672291c36 100644 --- a/gcc/gimple-predicate-
Re: [PATCH] sched1: Fix -fcompare-debug issue in schedule_region [PR105586]
Hi! On Tue, Aug 23, 2022 at 07:55:22AM -0500, Peter Bergner wrote: > It looks good to me, but I cannot approve it. Same here (both statements). > That said, you're missing > a ChangeLog entry for your new helper function. The ChangeLog mentions > what changed, not why it changed. And that is correct! Changelogs should not say that, that isn't their purpose (in GCC), not what they are used for. Explanations like that go in the email and/or the commit message. The main remaining usefulness of changelogs is to spot unintended commmits. > Maybe something like the following? > > gcc/ > PR rtl-optimization/105586 > * sched-rgn.cc (save_state_for_fallthru_edge): New function. > (schedule_region): Use it for all blocks. That looks perfect, it doesn't say "why" either :-) Segher
Re: [PATCH 1/3] libstdc++: Separate construct/convertibility tests for std::tuple
On Tue, 23 Aug 2022, Jonathan Wakely wrote: > On Tue, 23 Aug 2022 at 02:35, Patrick Palka via Libstdc++ > wrote: > > > > P2321R2 adds new conditionally explicit constructors to std::tuple which > > we'll concisely implement in a subsequent patch using explicit(bool), like > > in our C++20 std::pair implementation. But before we can do that, this > > patch first adds members to _TupleConstraints that test for constructibility > > and convertibility separately; we'll use the first in the new constructors' > > constraints, and the second in their explicit specifier. > > > > In passing, this patch also redefines the existing predicates > > __is_ex/implicitly_constructible in terms of these new members. This > > seems to reduce compile time and memory usage by about 10% for large > > Nice. > > > tuples when using the relevant constructors constrained by > > _Explicit/_ImplicitCtor (since we no longer have to redundantly expand > > and process is_constructible<_Types, _UTypes>... twice for each pair of > > such constructors). In order to retain maximal short circuiting, do > > this only when constexpr if is available. > > Would we get similar benefits for C++11 and C++14 by doing: > >return __and_<__and_...>, > __and_...> > >::value; > > This is slightly more work in total, but if we have __and_ and > __and_> then the A and B instantiations will be cached and > can be reused. Good idea, it seems we get pretty much the same 10% improvement for C++11/14 with this approach. I reckon we might as well then define __convertible and __constructible as alias templates instead of as variable templates and use them unconditionally in __is_im/explicitly_constructible for benefit of all language versions. Using constexpr if instead of the outer __and_ seems to yield a marginal improvement at best and __and_v is currently just __and_::value, so it doesn't seem worth it to have different definitions for C++17 at least for now. What do you think about the following? -- >8 -- libstdc++-v3/ChangeLog: * include/std/tuple (_TupleConstraints::__convertible): Define. (_TupleConstraints::__constructible): Likewise. (_TupleConstraints::__is_explicitly_constructible): Redefine this in terms of __convertible and __constructible. (_TupleConstraints::__is_implicitly_constructible): Likewise. --- libstdc++-v3/include/std/tuple | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple index 6d0060a191c..f8f48ccc370 100644 --- a/libstdc++-v3/include/std/tuple +++ b/libstdc++-v3/include/std/tuple @@ -553,15 +553,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template struct _TupleConstraints { + template + using __constructible = __and_...>; + + template + using __convertible = __and_...>; + // Constraint for a non-explicit constructor. // True iff each Ti in _Types... can be constructed from Ui in _UTypes... // and every Ui is implicitly convertible to Ti. template static constexpr bool __is_implicitly_constructible() { - return __and_..., - is_convertible<_UTypes, _Types>... - >::value; + return __and_<__constructible<_UTypes...>, + __convertible<_UTypes...>>::value; } // Constraint for a non-explicit constructor. @@ -570,9 +575,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template static constexpr bool __is_explicitly_constructible() { - return __and_..., - __not_<__and_...>> - >::value; + return __and_<__constructible<_UTypes...>, + __not_<__convertible<_UTypes...>>>::value; } static constexpr bool __is_implicitly_default_constructible() -- 2.37.2.382.g795ea8776b
Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions
Hi! On Fri, Aug 19, 2022 at 10:35:54AM +0800, HAO CHEN GUI wrote: > --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c > @@ -1,7 +1,8 @@ > /* { dg-do compile { target { powerpc*-*-* } } } */ > -/* { dg-require-effective-target lp64 } */ > -/* { dg-require-effective-target powerpc_p9vector_ok } */ > /* { dg-options "-mdejagnu-cpu=power9" } */ > +/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && > ilp32 } } } */ You can add this always. It is default on 64-bit systems, but it is simpler to just always add it: /* { dg-additional-options "-mpowerpc64" } */ Or are there subtargets that will error on this? > +/* { dg-require-effective-target has_arch_ppc64 } */ This is redundant, the previous line makes this always pass. Segher
Re: [PATCH v3] rs6000: Rework ELFv2 support for -fpatchable-function-entry* [PR99888]
On Fri, Aug 19, 2022 at 10:40:10AM +0800, Kewen.Lin wrote: > Since you proposed to update the documentation, I'm thinking if we can > reconsider Fangrui's proposal in the PR which Alan seconded: Put preceding > nops before GEP and succeeding nops after LEP. Previously I had the concern > that the nops inserted doesn't respect to a same function entry, it looks > inconsistent to the documentation, and you also noted that "The nops have > to be consecutive". If we want to update the documentation, could we reword > it for PowerPC ELFv2 ABI? > > What's your opinion? I'm not sure what the question is, sorry. If you want different semantics for ELFv2 (which might well be useful), we need some new command line option for that. I suggested here to just describe in the existing doc what is done for global and local entry points on ELFv2. Segher
LoongArch: add model attribute
Another attempt to make kernel module happy. One remaining issue: the patch cannot diagnostic some insane thing like int x __attribute__((model("normal"))); int x __attribute__((model("extreme"))); It seems incredibly difficult to diagnose such thing: I can't figure out any solution w/o invasion into the frontend. IA-64 model() only accepts small so this is not a problem for them, and M32R model() also cannot diagnose such error. Any suggestion is welcomed. -- >8 -- A linker script and/or a section attribute may locate some object specially, so we need to handle the code model for such objects differently than the -mcmodel setting. This happens when the Linux kernel loads a module with per-CPU variables. Add an attribute to override the code model for a specific variable. gcc/ChangeLog: * config/loongarch/loongarch-protos.h (loongarch_symbol_type): Add SYMBOL_PCREL32 and SYMBOL_PCREL64. * config/loongarch/loongarch.cc (loongarch_attribute_table): New attribute table. (TARGET_ATTRIBUTE_TABLE): Define the target hook. (loongarch_handle_model_attribute): New static function. (loongarch_classify_symbol): Return SYMBOL_PCREL{32,64} for SYMBOL_REF_DECL with model attribute. (loongarch_use_anchors_for_symbol_p): New static function. (TARGET_USE_ANCHORS_FOR_SYMBOL_P): Define the target hook. * doc/extend.texi (Variable Attributes): Document new LoongArch specific attribute. gcc/testsuite/ChangeLog: * gcc.target/loongarch/attr-model-test.c: New test. * gcc.target/loongarch/attr-model-1.c: New test. * gcc.target/loongarch/attr-model-2.c: New test. * gcc.target/loongarch/attr-model-diag.c: New test. --- gcc/config/loongarch/loongarch-protos.h | 8 + gcc/config/loongarch/loongarch.cc | 175 +- gcc/doc/extend.texi | 16 ++ .../gcc.target/loongarch/attr-model-1.c | 6 + .../gcc.target/loongarch/attr-model-2.c | 6 + .../gcc.target/loongarch/attr-model-diag.c| 7 + .../gcc.target/loongarch/attr-model-test.c| 25 +++ 7 files changed, 236 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-1.c create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-2.c create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-diag.c create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-test.c diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index cadaad7519c..4e925aa3876 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -30,6 +30,12 @@ along with GCC; see the file COPYING3. If not see SYMBOL_PCREL The symbol's value will be loaded directly from data section. + SYMBOL_PCREL32 + SYMBOL_PCREL64 + Like SYMBOL_PCREL, but the PC-relative offset of the symbol is + assumed to be in the +/- 2GiB or +/- 8 EiB range, instead of the + code model specification. + SYMBOL_TLS A thread-local symbol. @@ -42,6 +48,8 @@ along with GCC; see the file COPYING3. If not see enum loongarch_symbol_type { SYMBOL_GOT_DISP, SYMBOL_PCREL, + SYMBOL_PCREL32, + SYMBOL_PCREL64, SYMBOL_TLS, SYMBOL_TLS_IE, SYMBOL_TLS_LE, diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 207ac2762c6..3917c53dd27 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -1643,6 +1643,32 @@ loongarch_classify_symbol (const_rtx x) && !loongarch_symbol_binds_local_p (x)) return SYMBOL_GOT_DISP; + if (SYMBOL_REF_P (x)) +{ + tree t = SYMBOL_REF_DECL (x); + if (!t) + return SYMBOL_PCREL; + + t = lookup_attribute ("model", DECL_ATTRIBUTES (t)); + if (!t) + return SYMBOL_PCREL; + + t = TREE_VALUE (TREE_VALUE (t)); + + /* loongarch_handle_model_attribute should reject other values. */ + gcc_assert (TREE_CODE (t) == STRING_CST); + + const char *model = TREE_STRING_POINTER (t); + if (strcmp (model, "normal") == 0) + return SYMBOL_PCREL32; + if (strcmp (model, "extreme") == 0) + return SYMBOL_PCREL64; + + /* loongarch_handle_model_attribute should reject unknown model +name. */ + gcc_unreachable (); +} + return SYMBOL_PCREL; } @@ -1696,6 +1722,8 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) case SYMBOL_TLSGD: case SYMBOL_TLSLDM: case SYMBOL_PCREL: +case SYMBOL_PCREL32: +case SYMBOL_PCREL64: /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); @@ -1721,7 +1749,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) return 3; -case SYMBOL_PCREL: +case SYMBOL
Re: [PATCH 1/3] libstdc++: Separate construct/convertibility tests for std::tuple
On Tue, 23 Aug 2022 at 14:44, Patrick Palka wrote: > > On Tue, 23 Aug 2022, Jonathan Wakely wrote: > > > On Tue, 23 Aug 2022 at 02:35, Patrick Palka via Libstdc++ > > wrote: > > > > > > P2321R2 adds new conditionally explicit constructors to std::tuple which > > > we'll concisely implement in a subsequent patch using explicit(bool), like > > > in our C++20 std::pair implementation. But before we can do that, this > > > patch first adds members to _TupleConstraints that test for > > > constructibility > > > and convertibility separately; we'll use the first in the new > > > constructors' > > > constraints, and the second in their explicit specifier. > > > > > > In passing, this patch also redefines the existing predicates > > > __is_ex/implicitly_constructible in terms of these new members. This > > > seems to reduce compile time and memory usage by about 10% for large > > > > Nice. > > > > > tuples when using the relevant constructors constrained by > > > _Explicit/_ImplicitCtor (since we no longer have to redundantly expand > > > and process is_constructible<_Types, _UTypes>... twice for each pair of > > > such constructors). In order to retain maximal short circuiting, do > > > this only when constexpr if is available. > > > > Would we get similar benefits for C++11 and C++14 by doing: > > > >return __and_<__and_...>, > > __and_...> > > >::value; > > > > This is slightly more work in total, but if we have __and_ and > > __and_> then the A and B instantiations will be cached and > > can be reused. > > Good idea, it seems we get pretty much the same 10% improvement for > C++11/14 with this approach. I reckon we might as well then define > __convertible and __constructible as alias templates instead of as > variable templates and use them unconditionally in > __is_im/explicitly_constructible for benefit of all language versions. I had a similar thought after hitting send. > Using constexpr if instead of the outer __and_ seems to yield a marginal > improvement at best and __and_v is currently just __and_::value, so it > doesn't seem worth it to have different definitions for C++17 at least > for now. > > What do you think about the following? OK for trunk - thanks.
Re: [PATCH 2/3] libstdc++: Implement std::pair/tuple/misc enhancements from P2321R2
On Tue, 23 Aug 2022, Jonathan Wakely wrote: > On Tue, 23 Aug 2022 at 02:36, Patrick Palka via Libstdc++ > wrote: > > --- a/libstdc++-v3/include/bits/stl_pair.h > > +++ b/libstdc++-v3/include/bits/stl_pair.h > > @@ -212,6 +212,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > swap(second, __p.second); > >} > > > > +#if __cplusplus > 202002L > > + /// Swap the first members and then the second members. > > + constexpr void > > + swap(const pair& __p) const > > + noexcept(__and_<__is_nothrow_swappable, > > + __is_nothrow_swappable>::value) > > This could use __and_v (which is just __and_::value today, but could > theoretically be optimized to use a requires expression and avoid > instantiating __and_ one day). > > Is consistency with the C++11 overload more important? I *hope* we > won't need to make many changes to these noexcept-specifiers, so the > maintenance burden of using __ad_::value in one and __and_v in the > other shouldn't be too high. Makes sense. > > > @@ -710,6 +792,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > noexcept(noexcept(__x.swap(__y))) > > { __x.swap(__y); } > > > > +#if __cplusplus > 202002L > > + template > > +requires is_swappable::value && is_swappable > _T2>::value > > is_swappable_v instead of ::value here ... this is already using a > requires-clause and so is substantially different to the old overload > anyway. > > > > > + > >// tuple swap > >_GLIBCXX20_CONSTEXPR > >void > >swap(tuple& __in) > >noexcept(__and_<__is_nothrow_swappable<_Elements>...>::value) > >{ _Inherited::_M_swap(__in); } > > + > > +#if __cplusplus > 202002L > > + constexpr void > > + swap(const tuple& __in) const > > + noexcept(__and_<__is_nothrow_swappable...>::value) > > __and_v ? > > > > > >_GLIBCXX20_CONSTEXPR > >void > >swap(tuple& __in) > >noexcept(__and_<__is_nothrow_swappable<_T1>, > > __is_nothrow_swappable<_T2>>::value) > >{ _Inherited::_M_swap(__in); } > > + > > +#if __cplusplus > 202002L > > + constexpr void > > + swap(const tuple& __in) const > > + noexcept(__and_<__is_nothrow_swappable, > > + __is_nothrow_swappable>::value) > > __and_v ? > > > Thanks for doing this, those changes looked tedious to implement and test! > > If you agree with the suggestions to use _v variable templates, this > is OK for trunk with those changes. I am willing to be persuaded to > not use the variable templates if there's a good reason I've missed. Agreed on all points! Thanks a lot.
Re: [PATCH v2] ipa-visibility: Optimize TLS access [PR99619]
Ping^3. On Fri, 5 Aug 2022, Alexander Monakov wrote: > Ping^2. > > On Wed, 20 Jul 2022, Alexander Monakov wrote: > > > > > Ping. > > > > On Thu, 7 Jul 2022, Alexander Monakov via Gcc-patches wrote: > > > > > From: Artem Klimov > > > > > > Fix PR99619, which asks to optimize TLS model based on visibility. > > > The fix is implemented as an IPA optimization: this allows to take > > > optimized visibility status into account (as well as avoid modifying > > > all language frontends). > > > > > > 2022-04-17 Artem Klimov > > > > > > gcc/ChangeLog: > > > > > > * ipa-visibility.cc (function_and_variable_visibility): Promote > > > TLS access model afer visibility optimizations. > > > * varasm.cc (have_optimized_refs): New helper. > > > (optimize_dyn_tls_for_decl_p): New helper. Use it ... > > > (decl_default_tls_model): ... here in place of 'optimize' check. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.dg/tls/vis-attr-gd.c: New test. > > > * gcc.dg/tls/vis-attr-hidden-gd.c: New test. > > > * gcc.dg/tls/vis-attr-hidden.c: New test. > > > * gcc.dg/tls/vis-flag-hidden-gd.c: New test. > > > * gcc.dg/tls/vis-flag-hidden.c: New test. > > > * gcc.dg/tls/vis-pragma-hidden-gd.c: New test. > > > * gcc.dg/tls/vis-pragma-hidden.c: New test. > > > > > > Co-Authored-By: Alexander Monakov > > > Signed-off-by: Artem Klimov > > > --- > > > > > > v2: run the new loop in ipa-visibility only in the whole-program IPA pass; > > > in decl_default_tls_model, check if any referring function is > > > optimized > > > when 'optimize == 0' (when running in LTO mode) > > > > > > > > > Note for reviewers: I noticed there's a place which tries to avoid TLS > > > promotion, but the comment seems wrong and I could not find a testcase. > > > I'd suggest we remove it. The compiler can only promote general-dynamic > > > to local-dynamic and initial-exec to local-exec. The comment refers to > > > promoting x-dynamic to y-exec, but that cannot happen AFAICT: > > > https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=8e1ba78f1b8eedd6c65c6f0e6d6d09a801de5d3d > > > > > > > > > gcc/ipa-visibility.cc | 19 +++ > > > gcc/testsuite/gcc.dg/tls/vis-attr-gd.c| 12 +++ > > > gcc/testsuite/gcc.dg/tls/vis-attr-hidden-gd.c | 13 > > > gcc/testsuite/gcc.dg/tls/vis-attr-hidden.c| 12 +++ > > > gcc/testsuite/gcc.dg/tls/vis-flag-hidden-gd.c | 13 > > > gcc/testsuite/gcc.dg/tls/vis-flag-hidden.c| 12 +++ > > > .../gcc.dg/tls/vis-pragma-hidden-gd.c | 17 ++ > > > gcc/testsuite/gcc.dg/tls/vis-pragma-hidden.c | 16 ++ > > > gcc/varasm.cc | 32 ++- > > > 9 files changed, 145 insertions(+), 1 deletion(-) > > > create mode 100644 gcc/testsuite/gcc.dg/tls/vis-attr-gd.c > > > create mode 100644 gcc/testsuite/gcc.dg/tls/vis-attr-hidden-gd.c > > > create mode 100644 gcc/testsuite/gcc.dg/tls/vis-attr-hidden.c > > > create mode 100644 gcc/testsuite/gcc.dg/tls/vis-flag-hidden-gd.c > > > create mode 100644 gcc/testsuite/gcc.dg/tls/vis-flag-hidden.c > > > create mode 100644 gcc/testsuite/gcc.dg/tls/vis-pragma-hidden-gd.c > > > create mode 100644 gcc/testsuite/gcc.dg/tls/vis-pragma-hidden.c > > > > > > diff --git a/gcc/ipa-visibility.cc b/gcc/ipa-visibility.cc > > > index 8a27e7bcd..3ed2b7cf6 100644 > > > --- a/gcc/ipa-visibility.cc > > > +++ b/gcc/ipa-visibility.cc > > > @@ -873,6 +873,25 @@ function_and_variable_visibility (bool whole_program) > > > } > > > } > > > > > > + if (symtab->state >= IPA_SSA) > > > +{ > > > + FOR_EACH_VARIABLE (vnode) > > > + { > > > + tree decl = vnode->decl; > > > + > > > + /* Upgrade TLS access model based on optimized visibility status, > > > + unless it was specified explicitly or no references remain. */ > > > + if (DECL_THREAD_LOCAL_P (decl) > > > + && !lookup_attribute ("tls_model", DECL_ATTRIBUTES (decl)) > > > + && vnode->ref_list.referring.length ()) > > > + { > > > + enum tls_model new_model = decl_default_tls_model (decl); > > > + gcc_checking_assert (new_model >= decl_tls_model (decl)); > > > + set_decl_tls_model (decl, new_model); > > > + } > > > + } > > > +} > > > + > > >if (dump_file) > > > { > > >fprintf (dump_file, "\nMarking local functions:"); > > > diff --git a/gcc/varasm.cc b/gcc/varasm.cc > > > index 4db8506b1..de149e82c 100644 > > > --- a/gcc/varasm.cc > > > +++ b/gcc/varasm.cc > > > @@ -6679,6 +6679,36 @@ init_varasm_once (void) > > > #endif > > > } > > > > > > +/* Determine whether SYMBOL is used in any optimized function. */ > > > + > > > +static bool > > > +have_optimized_refs (struct symtab_node *symbol) > > > +{ > > > + struct ipa_ref *ref; > > > + > > > + for (int i = 0; symbol->iterate_referring (i, ref); i++) > > > +{ > > > + cgraph_node *cnode = dyn_cast (ref->referring); > > > + > > > + if (
Re: [PATCH] Speedup path discovery in predicate::use_cannot_happen
On Tue, Aug 23, 2022 at 2:16 PM Richard Biener wrote: > > The following reverts a hunk from r8-5789-g11ef0b22d68cd1 that > made compute_control_dep_chain start from function entry rather > than the immediate dominator of the source block of the edge with > the undefined value on the PHI node. Reverting at that point > does not reveal any testsuite FAIL, in particular the added > testcase still passes. The following adjusts this to the other > function that computes predicates that hold on the PHI incoming > edges with undefined values, predicate::init_from_phi_def, which > starts at the immediate dominator of the PHI. That's much less > likely to run into the CFG walking limit. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > Aldy - you did this change, do you remember anything here? In > fact the whole thing that's now called predicate::use_cannot_happen > seems to be redundant - the two testcases attributed to its history > do not fail when that's disabled, nor did they fail when it was > introduced. In principle whats now called predicate::superset_of > should cover this (but different implementation limits might apply). OMG, I'm drawing a complete blank here. I have no recollection of this. I'm tempted to say either my account was hacked or that old code was all wrong ;-). Sorry. Aldy
[committed] libstdc++: Fix visit(v) for non-void visitors [PR106589]
Tested powerpc64le-linux, pushed to trunk. Backport to gcc-12 needed. -- >8 -- The optimization for the common case of std::visit forgot to handle the edge case of passing zero variants to a non-void visitor and converting the result to void. libstdc++-v3/ChangeLog: PR libstdc++/106589 * include/std/variant (__do_visit): Handle is_void for zero argument case. * testsuite/20_util/variant/visit_r.cc: Check std::visit(v). --- libstdc++-v3/include/std/variant | 7 ++- libstdc++-v3/testsuite/20_util/variant/visit_r.cc | 8 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant index f8f15665433..c234b54421e 100644 --- a/libstdc++-v3/include/std/variant +++ b/libstdc++-v3/include/std/variant @@ -1728,7 +1728,12 @@ namespace __variant { // Get the silly case of visiting no variants out of the way first. if constexpr (sizeof...(_Variants) == 0) - return std::forward<_Visitor>(__visitor)(); + { + if constexpr (is_void_v<_Result_type>) + return (void) std::forward<_Visitor>(__visitor)(); + else + return std::forward<_Visitor>(__visitor)(); + } else { constexpr size_t __max = 11; // "These go to eleven." diff --git a/libstdc++-v3/testsuite/20_util/variant/visit_r.cc b/libstdc++-v3/testsuite/20_util/variant/visit_r.cc index 712459f25e3..c77b259c386 100644 --- a/libstdc++-v3/testsuite/20_util/variant/visit_r.cc +++ b/libstdc++-v3/testsuite/20_util/variant/visit_r.cc @@ -54,10 +54,18 @@ void test02() std::visit(Visitor(), v); } +void test03() +{ + // PR libstdc++/106589 - visit rejects lambdas that do not return void + auto visitor = []{ return 0; }; + std::visit(visitor); + std::visit(static_cast(visitor)); +} int main() { test01(); test02(); + test03(); } -- 2.37.2
New template for 'gcc' made available
Hello, gentle maintainer. This is a message from the Translation Project robot. (If you have any questions, send them to .) A new POT file for textual domain 'gcc' has been made available to the language teams for translation. It is archived as: https://translationproject.org/POT-files/gcc-12.2.0.pot Whenever you have a new distribution with a new version number ready, containing a newer POT file, please send the URL of that distribution tarball to the address below. The tarball may be just a pretest or a snapshot, it does not even have to compile. It is just used by the translators when they need some extra translation context. Below is the URL which has been provided to the translators of your package. Please inform the translation coordinator, at the address at the bottom, if this information is not current: https://gcc.gnu.org/pub/gcc/releases/gcc-12.2.0/gcc-12.2.0.tar.xz Translated PO files will later be automatically e-mailed to you. Thank you for all your work, The Translation Project robot, in the name of your translation coordinator.
[PATCH v4] RISC-V: Add support for inlining subword atomic operations
From: Patrick O'Neill RISC-V has no support for subword atomic operations; code currently generates libatomic library calls. This patch changes the default behavior to inline subword atomic calls (using the same logic as the existing library call). Behavior can be specified using the -minline-atomics and -mno-inline-atomics command line flags. gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm. This will need to stay for backwards compatibility and the -mno-inline-atomics flag. 2022-04-19 Patrick O'Neill PR target/104338 * riscv-protos.h: Add helper function stubs. * riscv.cc: Add helper functions for subword masking. * riscv.opt: Add command-line flag. * sync.md: Add masking logic and inline asm for fetch_and_op, fetch_and_nand, CAS, and exchange ops. * invoke.texi: Add blurb regarding command-line flag. * inline-atomics-1.c: New test. * inline-atomics-2.c: Likewise. * inline-atomics-3.c: Likewise. * inline-atomics-4.c: Likewise. * inline-atomics-5.c: Likewise. * inline-atomics-6.c: Likewise. * inline-atomics-7.c: Likewise. * inline-atomics-8.c: Likewise. * atomic.c: Add reference to duplicate logic. Signed-off-by: Patrick O'Neill Signed-off-by: Palmer Dabbelt --- This is pretty much the same as the v3, I just rebased it onto trunk and adjusted the documentation to mention libatomic. I think there's still room to do a more optimized implemnetation here, but we still need to sort out the memory model implications. Given that this is causing a headache for the distro folks I think it's best to just take the simple version now rather than waiting for something better -- we can always do something better, it's just probably going to miss the next release. This alone is pretty safe: we're just inlining the same code sequences that libatomic generates, and since we provide static libatomic that doesn't add to any future binary compatibility issues. This has no new failures on trunk. I also backported it to gcc-10 (as that's what my buildroot is using, which itself is a problem I need to fix) and it can build/boot a simple glibc-based userspace fine. OK for trunk? --- gcc/config/riscv/riscv-protos.h | 2 + gcc/config/riscv/riscv.cc | 52 ++ gcc/config/riscv/riscv.opt| 4 + gcc/config/riscv/sync.md | 318 ++ gcc/doc/invoke.texi | 8 + .../gcc.target/riscv/inline-atomics-1.c | 18 + .../gcc.target/riscv/inline-atomics-2.c | 19 + .../gcc.target/riscv/inline-atomics-3.c | 569 ++ .../gcc.target/riscv/inline-atomics-4.c | 566 + .../gcc.target/riscv/inline-atomics-5.c | 87 +++ .../gcc.target/riscv/inline-atomics-6.c | 87 +++ .../gcc.target/riscv/inline-atomics-7.c | 69 +++ .../gcc.target/riscv/inline-atomics-8.c | 69 +++ libgcc/config/riscv/atomic.c | 2 + 14 files changed, 1870 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 20c2381c21a..14f3c8f0d4e 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -74,6 +74,8 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx); extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); +extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *); +extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *); /* Routines implemented in riscv-c.cc. */ void riscv_cpu_cpp_builtins (cpp_reader *); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 9d70974c893..1d8730d80e9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -5791,6 +5791,58 @@ riscv_init_libfuncs (void) set_optab_libfunc (unord_optab, HFmode, NULL); } +/* Helper function for extracting a subword from memory. */ + +void +riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask, + rtx *not_mask) +{ + /* Align the memory addess to a word. */ + rtx addr = force_reg (Pmode, XEXP (mem, 0)); + + rtx aligned_addr = gen_reg_rtx (Pmode); + emit_move_insn (aligned_addr, gen_rtx_AND (Pmo
[PATCH] i386: avoid zero extension for crc32q
The crc32q instruction takes 64-bit operands, but ignores high 32 bits of the destination operand, and zero-extends the result from 32 bits. Let's model this in the RTL pattern to avoid zero-extension when the _mm_crc32_u64 intrinsic is used with a 32-bit type. PR target/106453 gcc/ChangeLog: * config/i386/i386.md (sse4_2_crc32di): Model that only low 32 bits of operand 0 are consumed, and the result is zero-extended to 64 bits. gcc/testsuite/ChangeLog: * gcc.target/i386/pr106453.c: New test. --- gcc/config/i386/i386.md | 6 +++--- gcc/testsuite/gcc.target/i386/pr106453.c | 13 + 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr106453.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 58fcc382f..b5760bb23 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -23823,10 +23823,10 @@ (define_insn "sse4_2_crc32di" [(set (match_operand:DI 0 "register_operand" "=r") - (unspec:DI - [(match_operand:DI 1 "register_operand" "0") + (zero_extend:DI (unspec:SI + [(match_operand:SI 1 "register_operand" "0") (match_operand:DI 2 "nonimmediate_operand" "rm")] - UNSPEC_CRC32))] + UNSPEC_CRC32)))] "TARGET_64BIT && TARGET_CRC32" "crc32{q}\t{%2, %0|%0, %2}" [(set_attr "type" "sselog1") diff --git a/gcc/testsuite/gcc.target/i386/pr106453.c b/gcc/testsuite/gcc.target/i386/pr106453.c new file mode 100644 index 0..bab5b1cb2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106453.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-msse4.2 -O2 -fdump-rtl-final" } */ +/* { dg-final { scan-rtl-dump-not "zero_extendsidi" "final" } } */ + +#include +#include + +uint32_t f(uint32_t c, uint64_t *p, size_t n) +{ +for (size_t i = 0; i < n; i++) +c = _mm_crc32_u64(c, p[i]); +return c; +} -- 2.35.1
Re: [PATCH] libstdc++: Optimize operator+(string/char*, string/char*) equally
On Mon, 22 Aug 2022 at 19:15, Will Hawkins wrote: > > Until now operator+(char*, string) and operator+(string, char*) had > different performance characteristics. The former required a single > memory allocation and the latter required two. This patch makes the > performance equal. If you don't have a GCC copyright assignment on file with the FSF then please follow the procedure described at https://gcc.gnu.org/dco.html > libstdc++-v3/ChangeLog: > * libstdc++-v3/include/bits/basic_string.h (operator+(string, char*)): > Remove naive implementation. > * libstdc++-v3/include/bits/basic_string.tcc (operator+(string, > char*)): > Add single-allocation implementation. > --- > libstdc++-v3/include/bits/basic_string.h | 7 +-- > libstdc++-v3/include/bits/basic_string.tcc | 21 + > 2 files changed, 22 insertions(+), 6 deletions(-) > > diff --git a/libstdc++-v3/include/bits/basic_string.h > b/libstdc++-v3/include/bits/basic_string.h > index b04fba95678..bc048fc0689 100644 > --- a/libstdc++-v3/include/bits/basic_string.h > +++ b/libstdc++-v3/include/bits/basic_string.h > @@ -3523,12 +3523,7 @@ _GLIBCXX_END_NAMESPACE_CXX11 > _GLIBCXX20_CONSTEXPR > inline basic_string<_CharT, _Traits, _Alloc> Please remove the 'inline' specifier here, since you're moving the definition into the non-inline .tcc file. There's a separate discussion to be had about whether these operator+ overloads *should* be inline. But for the purposes of this change, we want these two operator+ overloads to be consistent, and so they should both be non-inline. > operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, > - const _CharT* __rhs) > -{ > - basic_string<_CharT, _Traits, _Alloc> __str(__lhs); > - __str.append(__rhs); > - return __str; > -} > + const _CharT* __rhs); > >/** > * @brief Concatenate string and character. > diff --git a/libstdc++-v3/include/bits/basic_string.tcc > b/libstdc++-v3/include/bits/basic_string.tcc > index 4563c61429a..95ba8e503e9 100644 > --- a/libstdc++-v3/include/bits/basic_string.tcc > +++ b/libstdc++-v3/include/bits/basic_string.tcc > @@ -640,6 +640,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >return __str; > } > > + template > +_GLIBCXX20_CONSTEXPR > +basic_string<_CharT, _Traits, _Alloc> > +operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, > + const _CharT* __rhs) > +{ > + __glibcxx_requires_string(__rhs); > + typedef basic_string<_CharT, _Traits, _Alloc> __string_type; > + typedef typename __string_type::size_type __size_type; > + typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template > + rebind<_CharT>::other _Char_alloc_type; > + typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits; > + const __size_type __len = _Traits::length(__rhs); > + __string_type __str(_Alloc_traits::_S_select_on_copy( > + __lhs.get_allocator())); > + __str.reserve(__len + __lhs.size()); > + __str.append(__lhs); > + __str.append(__rhs, __len); > + return __str; > +} > + >template > _GLIBCXX_STRING_CONSTEXPR > typename basic_string<_CharT, _Traits, _Alloc>::size_type > -- > 2.34.1 >
[PATCH v4] c++: Implement -Wself-move warning [PR81159]
On Sat, Aug 20, 2022 at 05:31:52PM -0400, Jason Merrill wrote: > On 8/19/22 15:34, Marek Polacek wrote: > > On Thu, Aug 18, 2022 at 08:33:47PM -0400, Jason Merrill wrote: > > > On 8/18/22 13:19, Marek Polacek wrote: > > > > On Mon, Aug 15, 2022 at 03:54:05PM -0400, Jason Merrill wrote: > > > > > On 8/9/22 09:37, Marek Polacek wrote: > > > > > > + /* We're looking for *std::move ((T &) &arg), or > > > > > > + *std::move ((T &) (T *) r) if the argument it a > > > > > > reference. */ > > > > > > + if (!REFERENCE_REF_P (rhs) > > > > > > + || TREE_CODE (TREE_OPERAND (rhs, 0)) != CALL_EXPR) > > > > > > +return; > > > > > > + tree fn = TREE_OPERAND (rhs, 0); > > > > > > + if (!is_std_move_p (fn)) > > > > > > +return; > > > > > > + tree arg = CALL_EXPR_ARG (fn, 0); > > > > > > + if (TREE_CODE (arg) != NOP_EXPR) > > > > > > +return; > > > > > > + /* Strip the (T &). */ > > > > > > + arg = TREE_OPERAND (arg, 0); > > > > > > + /* Strip the (T *) or &. */ > > > > > > + arg = TREE_OPERAND (arg, 0); > > > > > > > > > > Are you sure these are the only two expressions that can make it > > > > > here? What > > > > > if the argument to move is *Tptr? > > > > > > > > Not 100% sure but I couldn't find any other form. For *Tptr we get > > > > *std::move ((int * &) &Tptr) > > > > > > That likes like what you'd get when the argument is Tptr, not when it's > > > *Tptr. And indeed that's what I see in the testcase: > > > > > > > + Tptr = std::move (Tptr); // { dg-warning "moving a variable to > > > > itself" } > > > > > > is missing the * > > > > Duh, sorry. The previous patch didn't handle the *Tptr case. Further > > poking > > revealed that we need special care to handle (*(Tptr)) and **Tptr etc. So > > in > > this patch I'm stripping all *s and V_C_Es. Sigh. > > Ah, I was just thinking that the old patch needed more checking of > TREE_CODEs. But I suppose it's also good to diagnose these cases as well. Probably. I notice that clang++ doesn't detect it, but that doesn't mean we can't. > > + if (cp_tree_equal (lhs, arg)) > > +{ > > + auto_diagnostic_group d; > > + if (warning_at (loc, OPT_Wself_move, "moving a variable to itself")) > > Maybe only say "variable" when the original operand is in fact a variable? Fixed by the print_var_p thingie I introduced. I saw that clang++ points out the type so I'm printing it too in this patch. > > +// Test various other arguments to std::move. > > +template > > +void > > +testargs (T *Tptr, T **Tpptr, T& Tref, T&& Trref, const T *Tcptr) > > +{ > > + Tptr = std::move (Tptr); // { dg-warning "moving a variable to itself" } > > + *Tptr = std::move (*Tptr); // { dg-warning "moving a variable to itself" > > } > > + *Tptr = std::move (*(Tptr)); // { dg-warning "moving a variable to > > itself" } > > + *(Tptr) = std::move (*Tptr); // { dg-warning "moving a variable to > > itself" } > > + *(Tptr + 1) = std::move (*Tptr + 1); > > + *(Tptr + 1) = std::move (*Tptr + 2); > > Do you mean to have *(Tptr + 1) on both sides? The difference in * operand > is surprising. Definitely not intended, good spotting! Fixed (we emit a warning for the +1 case but not the +2 case -- good). Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? -- >8 -- About 5 years ago we got a request to implement -Wself-move, which warns about useless moves like this: int x; x = std::move (x); This patch implements that warning. PR c++/81159 gcc/c-family/ChangeLog: * c.opt (Wself-move): New option. gcc/cp/ChangeLog: * typeck.cc (maybe_warn_self_move): New. (cp_build_modify_expr): Call maybe_warn_self_move. gcc/ChangeLog: * doc/invoke.texi: Document -Wself-move. gcc/testsuite/ChangeLog: * g++.dg/warn/Wself-move1.C: New test. --- gcc/c-family/c.opt | 4 + gcc/cp/typeck.cc| 61 +++- gcc/doc/invoke.texi | 23 - gcc/testsuite/g++.dg/warn/Wself-move1.C | 125 4 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/warn/Wself-move1.C diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index dfdebd596ef..f776efd39d8 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1229,6 +1229,10 @@ Wselector ObjC ObjC++ Var(warn_selector) Warning Warn if a selector has multiple methods. +Wself-move +C++ ObjC++ Var(warn_self_move) Warning LangEnabledBy(C++ ObjC++, Wall) +Warn when a value is moved to itself with std::move. + Wsequence-point C ObjC C++ ObjC++ Var(warn_sequence_point) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) Warn about possible violations of sequence point rules. diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 992ebfd99fb..194c403bd2b 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -8897,7 +8897,64 @@ cp_build_c_cast (location_t loc, tree type, tree expr, return error_mark_node; } - + +/* Warn wh
New German PO file for 'gcc' (version 12.2.0)
Hello, gentle maintainer. This is a message from the Translation Project robot. A revised PO file for textual domain 'gcc' has been submitted by the German team of translators. The file is available at: https://translationproject.org/latest/gcc/de.po (This file, 'gcc-12.2.0.de.po', has just now been sent to you in a separate email.) All other PO files for your package are available in: https://translationproject.org/latest/gcc/ Please consider including all of these in your next release, whether official or a pretest. Whenever you have a new distribution with a new version number ready, containing a newer POT file, please send the URL of that distribution tarball to the address below. The tarball may be just a pretest or a snapshot, it does not even have to compile. It is just used by the translators when they need some extra translation context. The following HTML page has been updated: https://translationproject.org/domain/gcc.html If any question arises, please contact the translation coordinator. Thank you for all your work, The Translation Project robot, in the name of your translation coordinator.
Re: Patch ping (Re: [PATCH] Implement __builtin_issignaling)
On Tue, 23 Aug 2022, Jakub Jelinek via Gcc-patches wrote: > On Tue, Aug 16, 2022 at 11:41:06AM +, Richard Biener wrote: > > I'm OK with the rest of the patch if Joseph doesn't have comments > > on the actual issignaling lowerings (which I didn't review for > > correctness due to lack of knowledge). > > I'd like to ping this patch. > Patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599697.html > with > https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599794.html > https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599800.html > incremental additions. One testsuite comment: > +#ifdef __SIZEOF_FLOAT128__ > + if (f7 (w) || !f7 (__builtin_nansf128 ("0x123")) || f7 (42.0Q) || f7 > (__builtin_nanf128 ("0x234")) > + || f7 (__builtin_inff128 ()) || f7 (-__builtin_inff128 ()) || f7 > (-42.0Q) || f7 (-0.0Q) || f7 (0.0Q)) > +__builtin_abort (); __SIZEOF_FLOAT128__ is a target-specific macro for two targets. I think it's better to model things on the existing _FloatN and _FloatNx tests, so have such a test for each such type, preferably with most of the test implementation type-generic (see e.g. floatn-builtin.h) and then with the individual tests including e.g. /* { dg-add-options float128 } */ /* { dg-require-effective-target float128_runtime } */ to enable and test for the relevant support. That would mean _Float128 gets tested wherever supported - and also that the support is properly tested for _Float16, which doesn't look like it's covered by the tests in this patch at all at present. -- Joseph S. Myers jos...@codesourcery.com
[PATCH] v2: Implement __builtin_issignaling
On Tue, Aug 23, 2022 at 06:23:23PM +, Joseph Myers wrote: > On Tue, 23 Aug 2022, Jakub Jelinek via Gcc-patches wrote: > > > On Tue, Aug 16, 2022 at 11:41:06AM +, Richard Biener wrote: > > > I'm OK with the rest of the patch if Joseph doesn't have comments > > > on the actual issignaling lowerings (which I didn't review for > > > correctness due to lack of knowledge). > > > > I'd like to ping this patch. > > Patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599697.html > > with > > https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599794.html > > https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599800.html > > incremental additions. > > One testsuite comment: > > > +#ifdef __SIZEOF_FLOAT128__ > > + if (f7 (w) || !f7 (__builtin_nansf128 ("0x123")) || f7 (42.0Q) || f7 > > (__builtin_nanf128 ("0x234")) > > + || f7 (__builtin_inff128 ()) || f7 (-__builtin_inff128 ()) || f7 > > (-42.0Q) || f7 (-0.0Q) || f7 (0.0Q)) > > +__builtin_abort (); > > __SIZEOF_FLOAT128__ is a target-specific macro for two targets. I think > it's better to model things on the existing _FloatN and _FloatNx tests, so > have such a test for each such type, preferably with most of the test > implementation type-generic (see e.g. floatn-builtin.h) and then with the > individual tests including e.g. > > /* { dg-add-options float128 } */ > /* { dg-require-effective-target float128_runtime } */ > > to enable and test for the relevant support. That would mean _Float128 > gets tested wherever supported - and also that the support is properly > tested for _Float16, which doesn't look like it's covered by the tests in > this patch at all at present. Ok, implemented in the updated patch. Tested with make check-gcc RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} dg-torture.exp=*builtin-issignaling* i386.exp=*builtin-issignaling*' ok if it passes full bootstrap/regtest? 2022-08-23 Jakub Jelinek gcc/ * builtins.def (BUILT_IN_ISSIGNALING): New built-in. * builtins.cc (expand_builtin_issignaling): New function. (expand_builtin_signbit): Don't overwrite target. (expand_builtin): Handle BUILT_IN_ISSIGNALING. (fold_builtin_classify): Likewise. (fold_builtin_1): Likewise. * optabs.def (issignaling_optab): New. * fold-const-call.cc (fold_const_call_ss): Handle BUILT_IN_ISSIGNALING. * config/i386/i386.md (issignalingxf2): New expander. * doc/extend.texi (__builtin_issignaling): Document. (__builtin_isinf, __builtin_isnan): Clarify behavior with -ffinite-math-only. * doc/md.texi (issignaling2): Likewise. gcc/c-family/ * c-common.cc (check_builtin_function_arguments): Handle BUILT_IN_ISSIGNALING. gcc/c/ * c-typeck.cc (convert_arguments): Handle BUILT_IN_ISSIGNALING. gcc/fortran/ * f95-lang.cc (gfc_init_builtin_functions): Initialize BUILT_IN_ISSIGNALING. gcc/testsuite/ * gcc.dg/torture/builtin-issignaling-1.c: New test. * gcc.dg/torture/builtin-issignaling-2.c: New test. * gcc.dg/torture/float16-builtin-issignaling-1.c: New test. * gcc.dg/torture/float32-builtin-issignaling-1.c: New test. * gcc.dg/torture/float32x-builtin-issignaling-1.c: New test. * gcc.dg/torture/float64-builtin-issignaling-1.c: New test. * gcc.dg/torture/float64x-builtin-issignaling-1.c: New test. * gcc.dg/torture/float128-builtin-issignaling-1.c: New test. * gcc.dg/torture/float128x-builtin-issignaling-1.c: New test. * gcc.target/i386/builtin-issignaling-1.c: New test. --- gcc/builtins.def.jj 2022-08-17 16:58:47.666820640 +0200 +++ gcc/builtins.def2022-08-23 20:59:35.286564045 +0200 @@ -908,6 +908,7 @@ DEF_GCC_BUILTIN(BUILT_IN_ISLESS, DEF_GCC_BUILTIN(BUILT_IN_ISLESSEQUAL, "islessequal", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF) DEF_GCC_BUILTIN(BUILT_IN_ISLESSGREATER, "islessgreater", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF) DEF_GCC_BUILTIN(BUILT_IN_ISUNORDERED, "isunordered", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF) +DEF_GCC_BUILTIN(BUILT_IN_ISSIGNALING, "issignaling", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF) DEF_LIB_BUILTIN(BUILT_IN_LABS, "labs", BT_FN_LONG_LONG, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_C99_BUILTIN(BUILT_IN_LLABS, "llabs", BT_FN_LONGLONG_LONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GCC_BUILTIN(BUILT_IN_LONGJMP, "longjmp", BT_FN_VOID_PTR_INT, ATTR_NORETURN_NOTHROW_LIST) --- gcc/builtins.cc.jj 2022-08-17 16:58:47.637821012 +0200 +++ gcc/builtins.cc 2022-08-23 20:59:35.287564032 +0200 @@ -123,6 +123,7 @@ static rtx expand_builtin_fegetround (tr static rtx expand_builtin_feclear_feraise_except (tree, rtx, machine_mode, optab); static rtx expand_builtin_cexpi (tree, rtx); +static rtx expand_builtin_issignaling (tree, rtx)
[PATCH] Fortran: improve error recovery while simplifying size of bad array [PR103694]
Dear all, the simplification of the size of an array with a bad decl should not be successful. Improve the error recovery for such a situation. The patch is nearly obvious. I therefore intend to commit it in the next few days unless someone comes up with a better solution. Regtested on x86_64-pc-linux-gnu. The PR is marked as a 12/13 regression. Will therefore commit to both. Thanks, Harald From d306c0b171e502e3c87b92b6bc63b532f734e754 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Tue, 23 Aug 2022 22:16:14 +0200 Subject: [PATCH] Fortran: improve error recovery while simplifying size of bad array [PR103694] gcc/fortran/ChangeLog: PR fortran/103694 * simplify.cc (simplify_size): The size expression of an array cannot be simplified if an error occurs while resolving the array spec. gcc/testsuite/ChangeLog: PR fortran/103694 * gfortran.dg/pr103694.f90: New test. --- gcc/fortran/simplify.cc| 5 +++-- gcc/testsuite/gfortran.dg/pr103694.f90 | 11 +++ 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr103694.f90 diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc index f992c31e5d7..bc178d54891 100644 --- a/gcc/fortran/simplify.cc +++ b/gcc/fortran/simplify.cc @@ -7536,8 +7536,9 @@ simplify_size (gfc_expr *array, gfc_expr *dim, int k) } for (ref = array->ref; ref; ref = ref->next) -if (ref->type == REF_ARRAY && ref->u.ar.as) - gfc_resolve_array_spec (ref->u.ar.as, 0); +if (ref->type == REF_ARRAY && ref->u.ar.as + && !gfc_resolve_array_spec (ref->u.ar.as, 0)) + return NULL; if (dim == NULL) { diff --git a/gcc/testsuite/gfortran.dg/pr103694.f90 b/gcc/testsuite/gfortran.dg/pr103694.f90 new file mode 100644 index 000..3ed8b2088da --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr103694.f90 @@ -0,0 +1,11 @@ +! { dg-do compile } +! PR fortran/103694 - ICE in gfc_conv_expr_op +! Contributed by G.Steinmetz + +subroutine s + type t + integer :: a(2) + end type + type(t) :: x((0.)/0) + integer :: n = size(x(1)%a) ! { dg-error "does not reduce to a constant expression" } +end -- 2.35.3
[PATCH] x86: Replace vmovdqu with movdqu in BF16 XMM ABI tests
I am checking in this as an obvious fix. H.J. --- Since XMM BF16 tests only require SSE2, replace vmovdqu with movdqu in BF16 XMM ABI tests to support SSE2 machines without AVX. Tested on x86-64 machines with and without AVX. * gcc.target/x86_64/abi/bf16/asm-support.S: Replace vmovdqu with movdqu. --- .../gcc.target/x86_64/abi/bf16/asm-support.S | 36 +-- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S index a8165d86317..7559aa910c4 100644 --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support.S @@ -20,22 +20,22 @@ snapshot: movq%r13, r13(%rip) movq%r14, r14(%rip) movq%r15, r15(%rip) - vmovdqu %xmm0, xmm_regs+0(%rip) - vmovdqu %xmm1, xmm_regs+16(%rip) - vmovdqu %xmm2, xmm_regs+32(%rip) - vmovdqu %xmm3, xmm_regs+48(%rip) - vmovdqu %xmm4, xmm_regs+64(%rip) - vmovdqu %xmm5, xmm_regs+80(%rip) - vmovdqu %xmm6, xmm_regs+96(%rip) - vmovdqu %xmm7, xmm_regs+112(%rip) - vmovdqu %xmm8, xmm_regs+128(%rip) - vmovdqu %xmm9, xmm_regs+144(%rip) - vmovdqu %xmm10, xmm_regs+160(%rip) - vmovdqu %xmm11, xmm_regs+176(%rip) - vmovdqu %xmm12, xmm_regs+192(%rip) - vmovdqu %xmm13, xmm_regs+208(%rip) - vmovdqu %xmm14, xmm_regs+224(%rip) - vmovdqu %xmm15, xmm_regs+240(%rip) + movdqu %xmm0, xmm_regs+0(%rip) + movdqu %xmm1, xmm_regs+16(%rip) + movdqu %xmm2, xmm_regs+32(%rip) + movdqu %xmm3, xmm_regs+48(%rip) + movdqu %xmm4, xmm_regs+64(%rip) + movdqu %xmm5, xmm_regs+80(%rip) + movdqu %xmm6, xmm_regs+96(%rip) + movdqu %xmm7, xmm_regs+112(%rip) + movdqu %xmm8, xmm_regs+128(%rip) + movdqu %xmm9, xmm_regs+144(%rip) + movdqu %xmm10, xmm_regs+160(%rip) + movdqu %xmm11, xmm_regs+176(%rip) + movdqu %xmm12, xmm_regs+192(%rip) + movdqu %xmm13, xmm_regs+208(%rip) + movdqu %xmm14, xmm_regs+224(%rip) + movdqu %xmm15, xmm_regs+240(%rip) jmp *callthis(%rip) .LFE3: .size snapshot, .-snapshot @@ -50,8 +50,8 @@ snapshot_ret: addq$8, %rsp movq%rax, rax(%rip) movq%rdx, rdx(%rip) - vmovdqu %xmm0, xmm_regs+0(%rip) - vmovdqu %xmm1, xmm_regs+16(%rip) + movdqu %xmm0, xmm_regs+0(%rip) + movdqu %xmm1, xmm_regs+16(%rip) fstpt x87_regs(%rip) fstpt x87_regs+16(%rip) fldtx87_regs+16(%rip) -- 2.37.2
Re: [PATCH] Improve converting between 128-bit modes that use the same format
Please do not send new patches as replies to other patches. On Thu, Aug 18, 2022 at 05:48:29PM -0400, Michael Meissner wrote: > mprove converting between 128-bit modes that use the same format. You are missing some characters? But this is an edited version of the subject anyway. Just don't do that (neither the copying or the editing), it just confuses things. Please factor this patch into more pieces, pieces that can be reviewed more easily, pieces that change one thing only. As is you are just rewriting the lot, and it is not an improvement at all this way. No doubt there are many good pieces in it, but mixed with a non-trivial amount of bad pieces I cannot approve it. It also isn't clear at all what you want to do; piece by piece it is easier to explain. > -; Iterators for converting to/from TFmode > -(define_mode_iterator IFKF [IF KF]) Yes, IFmode and KFmode have almost nothing in common. Good to see this go. It would be even better if we would not use rs6000_expand_float128_convert when not needed, either, and all this would be just gone after expand. > +(define_expand "extendkfif2" > + [(set (match_operand:IF 0 "gpc_reg_operand") > + (float_extend:IF (match_operand:KF 1 "gpc_reg_operand")))] > + "TARGET_FLOAT128_TYPE" > +{ > + rs6000_expand_float128_convert (operands[0], operands[1], false); > + DONE; > +}) This does not belong here. It really shouldn't *exist* at all: this is *not* a float_extend! It is not converting to a wider mode (as required!), but not even to a mode of higher precision: both IFmode and KFmode can represent (finite, normal) numbers the other can not. But it certainly does not belong here in the middle of no-op moves. > +(define_expand "trunckfif2" > + [(set (match_operand:IF 0 "gpc_reg_operand") > + (float_truncate:IF (match_operand:KF 1 "gpc_reg_operand")))] > + "TARGET_FLOAT128_TYPE" > +{ > + rs6000_expand_float128_convert (operands[0], operands[1], false); > + DONE; > +}) I also would expect IBM128 instead of just IF. This would simplify a lot. Why do you not use that, is there a reason? > +;; Convert between KFmode and TFmode when -mabi=ieeelongdouble > +(define_insn_and_split "*extendkftf2_internal" Same for IEEE128. And this isn't a conversion at all (it's a no-op move), please don't confuse things by saying it is. > + [(set_attr "type" "two") > + (set_attr "num_insns" "2")]) Btw, that really should never be needed. insn_type "two" already means exactly that. > +(define_insn_and_split "*extendtfif2_internal" > + [(set (match_operand:IF 0 "gpc_reg_operand" "=d,&d") > + (float_extend:IF > + (match_operand:TF 1 "input_operand" "0,d")))] > + "FLOAT128_IBM_P (TFmode)" > + "#" > + "&& reload_completed" Why would this ever need reload_completed? It is a no-op move! Segher
Re: [PATCH v4] c++: Implement -Wself-move warning [PR81159]
On 8/23/22 09:39, Marek Polacek wrote: On Sat, Aug 20, 2022 at 05:31:52PM -0400, Jason Merrill wrote: On 8/19/22 15:34, Marek Polacek wrote: On Thu, Aug 18, 2022 at 08:33:47PM -0400, Jason Merrill wrote: On 8/18/22 13:19, Marek Polacek wrote: On Mon, Aug 15, 2022 at 03:54:05PM -0400, Jason Merrill wrote: On 8/9/22 09:37, Marek Polacek wrote: + /* We're looking for *std::move ((T &) &arg), or + *std::move ((T &) (T *) r) if the argument it a reference. */ + if (!REFERENCE_REF_P (rhs) + || TREE_CODE (TREE_OPERAND (rhs, 0)) != CALL_EXPR) +return; + tree fn = TREE_OPERAND (rhs, 0); + if (!is_std_move_p (fn)) +return; + tree arg = CALL_EXPR_ARG (fn, 0); + if (TREE_CODE (arg) != NOP_EXPR) +return; + /* Strip the (T &). */ + arg = TREE_OPERAND (arg, 0); + /* Strip the (T *) or &. */ + arg = TREE_OPERAND (arg, 0); Are you sure these are the only two expressions that can make it here? What if the argument to move is *Tptr? Not 100% sure but I couldn't find any other form. For *Tptr we get *std::move ((int * &) &Tptr) That likes like what you'd get when the argument is Tptr, not when it's *Tptr. And indeed that's what I see in the testcase: + Tptr = std::move (Tptr); // { dg-warning "moving a variable to itself" } is missing the * Duh, sorry. The previous patch didn't handle the *Tptr case. Further poking revealed that we need special care to handle (*(Tptr)) and **Tptr etc. So in this patch I'm stripping all *s and V_C_Es. Sigh. Ah, I was just thinking that the old patch needed more checking of TREE_CODEs. But I suppose it's also good to diagnose these cases as well. Probably. I notice that clang++ doesn't detect it, but that doesn't mean we can't. + if (cp_tree_equal (lhs, arg)) +{ + auto_diagnostic_group d; + if (warning_at (loc, OPT_Wself_move, "moving a variable to itself")) Maybe only say "variable" when the original operand is in fact a variable? Fixed by the print_var_p thingie I introduced. I saw that clang++ points out the type so I'm printing it too in this patch. +// Test various other arguments to std::move. +template +void +testargs (T *Tptr, T **Tpptr, T& Tref, T&& Trref, const T *Tcptr) +{ + Tptr = std::move (Tptr); // { dg-warning "moving a variable to itself" } + *Tptr = std::move (*Tptr); // { dg-warning "moving a variable to itself" } + *Tptr = std::move (*(Tptr)); // { dg-warning "moving a variable to itself" } + *(Tptr) = std::move (*Tptr); // { dg-warning "moving a variable to itself" } + *(Tptr + 1) = std::move (*Tptr + 1); + *(Tptr + 1) = std::move (*Tptr + 2); Do you mean to have *(Tptr + 1) on both sides? The difference in * operand is surprising. Definitely not intended, good spotting! Fixed (we emit a warning for the +1 case but not the +2 case -- good). Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? -- >8 -- About 5 years ago we got a request to implement -Wself-move, which warns about useless moves like this: int x; x = std::move (x); This patch implements that warning. PR c++/81159 gcc/c-family/ChangeLog: * c.opt (Wself-move): New option. gcc/cp/ChangeLog: * typeck.cc (maybe_warn_self_move): New. (cp_build_modify_expr): Call maybe_warn_self_move. gcc/ChangeLog: * doc/invoke.texi: Document -Wself-move. gcc/testsuite/ChangeLog: * g++.dg/warn/Wself-move1.C: New test. --- gcc/c-family/c.opt | 4 + gcc/cp/typeck.cc| 61 +++- gcc/doc/invoke.texi | 23 - gcc/testsuite/g++.dg/warn/Wself-move1.C | 125 4 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/warn/Wself-move1.C diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index dfdebd596ef..f776efd39d8 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1229,6 +1229,10 @@ Wselector ObjC ObjC++ Var(warn_selector) Warning Warn if a selector has multiple methods. +Wself-move +C++ ObjC++ Var(warn_self_move) Warning LangEnabledBy(C++ ObjC++, Wall) +Warn when a value is moved to itself with std::move. + Wsequence-point C ObjC C++ ObjC++ Var(warn_sequence_point) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) Warn about possible violations of sequence point rules. diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 992ebfd99fb..194c403bd2b 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -8897,7 +8897,64 @@ cp_build_c_cast (location_t loc, tree type, tree expr, return error_mark_node; } - + +/* Warn when a value is moved to itself with std::move. LHS is the target, + RHS may be the std::move call, and LOC is the location of the whole + assignment. */ + +static void +maybe_warn_self_move (location_t loc, tree lhs, tree rhs) +{ + if (!warn_self_move) +return; + + /* C++98 doesn't know move. */ + if (cxx_dialect < cxx11) +return; + + if (processing_templa
Re: [PATCH] c++: Quash bogus -Wredundant-move warning
On 8/22/22 17:10, Marek Polacek wrote: This patch fixes a pretty stoopid thinko. When I added code to warn about pessimizing std::move in initializations like T t{std::move(T())}; I also added code to unwrap the expression from { }. But when we have return {std::move(t)}; we cannot warn about a redundant std::move because the implicit move wouldn't happen for "return {t};" because the expression isn't just a name. However, we still want to warn about return {std::move(T())}; so let's not disable the -Wpessimizing-move warning. Tests added for both cases. Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? OK. gcc/cp/ChangeLog: * typeck.cc (maybe_warn_pessimizing_move): Don't warn about redundant std::move when the expression was wrapped in { }. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/Wpessimizing-move10.C: New test. * g++.dg/cpp0x/Wredundant-move12.C: New test. --- gcc/cp/typeck.cc | 13 +-- .../g++.dg/cpp0x/Wpessimizing-move10.C| 30 .../g++.dg/cpp0x/Wredundant-move12.C | 36 +++ 3 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 992ebfd99fb..7fde65adaa4 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -10397,11 +10397,15 @@ maybe_warn_pessimizing_move (tree expr, tree type, bool return_p) if (!CLASS_TYPE_P (type)) return; + bool wrapped_p = false; /* A a = std::move (A()); */ if (TREE_CODE (expr) == TREE_LIST) { if (list_length (expr) == 1) - expr = TREE_VALUE (expr); + { + expr = TREE_VALUE (expr); + wrapped_p = true; + } else return; } @@ -10410,7 +10414,10 @@ maybe_warn_pessimizing_move (tree expr, tree type, bool return_p) else if (TREE_CODE (expr) == CONSTRUCTOR) { if (CONSTRUCTOR_NELTS (expr) == 1) - expr = CONSTRUCTOR_ELT (expr, 0)->value; + { + expr = CONSTRUCTOR_ELT (expr, 0)->value; + wrapped_p = true; + } else return; } @@ -10458,6 +10465,8 @@ maybe_warn_pessimizing_move (tree expr, tree type, bool return_p) /* Warn if the move is redundant. It is redundant when we would do maybe-rvalue overload resolution even without std::move. */ else if (warn_redundant_move + /* This doesn't apply for return {std::move (t)};. */ + && !wrapped_p && !warning_suppressed_p (expr, OPT_Wredundant_move) && (moved = treat_lvalue_as_rvalue_p (arg, /*return*/true))) { diff --git a/gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C b/gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C new file mode 100644 index 000..77314141da3 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move10.C @@ -0,0 +1,30 @@ +// { dg-do compile { target c++11 } } +// { dg-options "-Wpessimizing-move" } + +// Define std::move. +namespace std { + template +struct remove_reference +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&> +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&&> +{ typedef _Tp type; }; + + template +constexpr typename std::remove_reference<_Tp>::type&& +move(_Tp&& __t) noexcept +{ return static_cast::type&&>(__t); } +} + +struct S { }; + +S +f () +{ + return {std::move(S())}; // { dg-warning "moving a temporary object prevents copy elision" } +} diff --git a/gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C new file mode 100644 index 000..192d981ba5e --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move12.C @@ -0,0 +1,36 @@ +// { dg-do compile { target c++11 } } +// { dg-options "-Wredundant-move" } + +// Define std::move. +namespace std { + template +struct remove_reference +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&> +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&&> +{ typedef _Tp type; }; + + template +constexpr typename std::remove_reference<_Tp>::type&& +move(_Tp&& __t) noexcept +{ return static_cast::type&&>(__t); } +} + +struct S1 { + S1(); + S1(S1 const &) = delete; + S1(S1 &&); + S1 operator =(S1 const &) = delete; + S1 operator =(S1 &&); +}; + +struct S2 { S2(S1); }; +S2 f() { + S1 s; + return { std::move(s) }; // { dg-bogus "redundant move" } +} base-commit: cc4fa7a210b638d6a46f14dab17f2361389d18e1
Re: [PATCH] preprocessor: Implement C++23 P2437R1 - Support for #warning [PR106646]
On 8/22/22 02:53, Jakub Jelinek wrote: Hi! On Thu, Aug 18, 2022 at 11:02:44PM +, Joseph Myers wrote: ISO C2x standardizes the existing #warning extension. Arrange accordingly for it not to be diagnosed with -std=c2x -pedantic, but to be diagnosed with -Wc11-c2x-compat. And here is the corresponding C++ version. Don't pedwarn about this for C++23/GNU++23 and tweak the diagnostics for C++ otherwise, + testsuite coverage. The diagnostic wording is similar e.g. to the #elifdef diagnostics. So far lightly tested, ok for trunk if it passes full bootstrap/regtest? OK. 2022-08-22 Jakub Jelinek PR c++/106646 * init.cc: Implement C++23 P2437R1 - Support for #warning. (lang_defaults): Set warning_directive for GNUCXX23 and CXX23. * directives.cc (directive_diagnostics): Use different wording of #warning pedwarn for C++. * g++.dg/cpp/warning-1.C: New test. * g++.dg/cpp/warning-2.C: New test. * g++.dg/cpp/warning-3.C: New test. --- libcpp/init.cc.jj 2022-08-20 10:25:17.613071845 +0200 +++ libcpp/init.cc 2022-08-22 11:30:57.642622570 +0200 @@ -123,8 +123,8 @@ static const struct lang_flags lang_defa /* CXX17*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0 }, /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, /* CXX20*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1 }, - /* CXX23*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1 }, + /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, + /* CXX23*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; --- libcpp/directives.cc.jj 2022-08-19 16:00:05.295386974 +0200 +++ libcpp/directives.cc2022-08-22 11:30:03.239357642 +0200 @@ -388,8 +388,14 @@ directive_diagnostics (cpp_reader *pfile else if (dir == &dtable[T_WARNING]) { if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, warning_directive)) - cpp_error (pfile, CPP_DL_PEDWARN, - "#%s before C2X is a GCC extension", dir->name); + { + if (CPP_OPTION (pfile, cplusplus)) + cpp_error (pfile, CPP_DL_PEDWARN, + "#%s before C++23 is a GCC extension", dir->name); + else + cpp_error (pfile, CPP_DL_PEDWARN, + "#%s before C2X is a GCC extension", dir->name); + } else if (CPP_OPTION (pfile, cpp_warn_c11_c2x_compat) > 0) cpp_warning (pfile, CPP_W_C11_C2X_COMPAT, "#%s before C2X is a GCC extension", dir->name); --- gcc/testsuite/g++.dg/cpp/warning-1.C.jj 2022-08-22 11:39:41.284547323 +0200 +++ gcc/testsuite/g++.dg/cpp/warning-1.C2022-08-22 11:44:03.925988332 +0200 @@ -0,0 +1,6 @@ +// P2437R1 - Support for #warning +// { dg-do preprocess } +// { dg-options "-pedantic-errors" } + +#warning example text /* { dg-warning "example text" } */ +// { dg-error "#warning before C\\\+\\\+23 is a GCC extension" "pedantic" { target c++20_down } .-1 } --- gcc/testsuite/g++.dg/cpp/warning-2.C.jj 2022-08-22 11:39:44.116509055 +0200 +++ gcc/testsuite/g++.dg/cpp/warning-2.C2022-08-22 11:44:14.933839041 +0200 @@ -0,0 +1,6 @@ +// P2437R1 - Support for #warning +// { dg-do preprocess } +// { dg-options "-pedantic" } + +#warning example text /* { dg-warning "example text" } */ +// { dg-warning "#warning before C\\\+\\\+23 is a GCC extension" "pedantic" { target c++20_down } .-1 } --- gcc/testsuite/g++.dg/cpp/warning-3.C.jj 2022-08-22 11:39:47.020469826 +0200 +++ gcc/testsuite/g++.dg/cpp/warning-3.C2022-08-22 11:42:23.640348405 +0200 @@ -0,0 +1,6 @@ +// P2437R1 - Support for #warning +// { dg-do preprocess } +// { dg-options "" } + +#warning example text /* { dg-warning "example text" } */ +// { dg-bogus "#warning before C\\\+\\\+23 is a GCC extension" "" { target *-*-* } .-1 } Jakub
Re: [PATCH V4] rs6000: Optimize cmp on rotated 16bits constant
Hi! On Mon, Jul 25, 2022 at 09:29:22PM +0800, Jiufu Guo wrote: > When checking eq/neq with a constant which has only 16bits, it can be > optimized to check the rotated data. By this, the constant building > is optimized. "ne", not "neq". > gcc/ChangeLog: > > * config/rs6000/rs6000-protos.h (rotate_from_leading_zeros_const): > New decl. "New declaration." or just "New". Also, don't break lines early please, especially if that means ending a line in a colon, which then looks as if you forgot to write something there. > * config/rs6000/rs6000.cc (rotate_from_leading_zeros_const): New > define for checking simply rotated constant. "New." or "New definition." or such. > +/* Check if C can be rotated from an immediate which contains leading > + zeros at least CLZ. "Which starts (as 64 bit integer) with at least CLZ bits zero" or such. > + /* xx0..0xx: rotate enough bits firstly, then check case a. */ > + const int rot_bits = HOST_BITS_PER_WIDE_INT - clz + 1; > + unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1)); > + tz = ctz_hwi (rc); > + if (clz_hwi (rc) + tz >= clz) > +return tz + rot_bits; This could use some more explanation. > +(define_code_iterator eqne [eq ne]) > +;; "i == C" ==> "rotl(i,N) == rotl(C,N)" > +(define_insn_and_split "*rotate_on_cmpdi" > + [(set (pc) > + (if_then_else (eqne (match_operand:DI 1 "gpc_reg_operand" "r") Wrong indentation. The ( should be in the same column as the preceding ( it matches. Setting "pc" to either 0 or 1 is never correct. > + "TARGET_POWERPC64 && !reload_completed && can_create_pseudo_p () reload_completed in splitters is almost always wrong. It isn't any better if it is in the insn condition of a define_insn_and_split :-) > + && num_insns_constant (operands[2], DImode) > 1 > + && (rotate_from_leading_zeros_const (~UINTVAL (operands[2]), 49) > 0 > + || rotate_from_leading_zeros_const (UINTVAL (operands[2]), 48) > 0)" There must be a better way to describe this. > + if (rot < 0) > + { > + sgn = true; > + rot = rotate_from_leading_zeros_const (~C, 49); > + } Bad indentation. > + rtx cmp = ne ? gen_rtx_NE (CCmode, cc, const0_rtx) > +: gen_rtx_EQ (CCmode, cc, const0_rtx); rtx cmp = gen_rtx_ (...); (and define a code_attr EQNE to just output the uppercase EQ or NE). Why is this doing a conditional branch at all? Unpredictable conditional branches are extremely costly. > +/* { dg-require-effective-target lp64 } */ arch_ppc64 > +/* { dg-final { scan-assembler-times "cmpldi" 10 } } */ > +/* { dg-final { scan-assembler-times "cmpdi" 4 } } */ > +/* { dg-final { scan-assembler-times "rotldi" 14 } } */ Please use \m and \M . Thanks, Segher
Re: Ping^2: 2 libcpp patches
On Tue, Aug 16, 2022 at 9:50 AM Lewis Hyatt wrote: > > On Wed, Jul 20, 2022 at 8:56 PM Lewis Hyatt wrote: > > > > Hello- > > > > May I please ping these two preprocessor patches? > > > > For PR103902: > > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596704.html > > > > For PR55971: > > https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596820.html > > > > Thanks! > > Hello- > > I would very much appreciate feedback on these two patches please? > > For the first patch, I think it is a worthwhile goal to fix all the > places where libcpp fails to support UTF-8 correctly, and this is one > of two remaining ones that I'm aware of. I can fix the other case > (handling of #pragma push_macro) once this one is in place. > > The second patch is about libcpp not allowing raw strings containing > newlines in preprocessor directives, which is a nearly decade-old > glitch that I think is also worth addressing. > > Thanks! > > -Lewis Hello- I CCed Joseph on this ping last week, but he suggested asking other maintainers to take a look, so I hope it's OK I am doing that now? Jakub, I thought it could make sense to ask you please, since I saw you working on lex.cc recently as well, and also since you commented on PR55971 nine years ago :). With an uptick in development of libcpp to support new C++23 features, I think the chances that my patches will eventually conflict with those are increasing, so it would be great to get them reviewed. As of current master branch, they still apply and regtest fine. Thanks very much. -Lewis
tree.cc: Fix optimization of DFP default initialization
When an object of decimal floating-point type is default-initialized, GCC is inconsistent about whether it is given the all-zero-bits representation (zero with the least quantum exponent) or whether it acts like a conversion of integer 0 to the DFP type (zero with quantum exponent 0). In particular, the representation stored in memory can have all zero bits, but optimization of access to the same object based on its known constant value can then produce zero with quantum exponent 0 instead. C2x leaves the quantum exponent for default initialization implementation-defined, but that doesn't allow such inconsistency in the interpretation of a single object. All zero bits seems most appropriate; change build_real to special-case dconst0 the same way other constants are special-cased and ensure that the correct zero for the type is generated. Bootstrapped with no regressions for x86_64-pc-linux-gnu. OK to commit? gcc/ * tree.cc (build_real): Give DFP dconst0 the minimum quantum exponent for the type. gcc/testsuite/ * gcc.dg/torture/dfp-default-init-1.c, gcc.dg/torture/dfp-default-init-2.c, gcc.dg/torture/dfp-default-init-3.c: New tests. diff --git a/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c new file mode 100644 index 000..f893ddb52b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c @@ -0,0 +1,113 @@ +/* Test that default-initialized DFP values consistently have the least quantum + exponent. */ +/* { dg-do run } */ +/* { dg-require-effective-target dfp } */ + +extern void exit (int); +extern void abort (void); +void *memset (void *, int, __SIZE_TYPE__); +int memcmp (const void *, const void *, __SIZE_TYPE__); + +#ifndef TYPE +#define TYPE _Decimal32 +#endif + +#ifndef ZEROFP +#define ZEROFP 0e-101DF +#endif + +TYPE zero_int = 0; +TYPE zero_fp = ZEROFP; +TYPE default_init; +TYPE zero_bytes; +TYPE x; + +struct s { TYPE a, b; }; +struct s s_default_init; +struct s s_empty_init = {}; +struct s s_first_int = { 0 }; +struct s s_both_int = { 0, 0 }; +struct s sx; + +const TYPE a_default_init[10]; +const TYPE a_empty_init[10] = {}; +const TYPE a_first_int[10] = { 0 }; +const TYPE a_two_int[10] = { 0, 0 }; + +#define CHECK_ZERO_BYTES(expr) \ + do \ +{ \ + if (memcmp (expr, &zero_bytes, sizeof zero_bytes) != 0) \ + abort (); \ + TYPE tmp = *expr;\ + if (memcmp (&tmp, &zero_bytes, sizeof zero_bytes) != 0) \ + abort (); \ +} \ + while (0) + +#define CHECK_INT_BYTES(expr) \ + do \ +{ \ + if (memcmp (expr, &zero_int, sizeof zero_int) != 0) \ + abort (); \ + TYPE tmp = *expr;\ + if (memcmp (&tmp, &zero_int, sizeof zero_int) != 0) \ + abort (); \ +} \ + while (0) + +int +main (void) +{ + memset (&zero_bytes, 0, sizeof zero_bytes); + if (memcmp (&zero_bytes, &zero_int, sizeof zero_int) == 0) +abort (); + CHECK_ZERO_BYTES (&zero_fp); + CHECK_ZERO_BYTES (&default_init); + CHECK_ZERO_BYTES (&s_default_init.a); + CHECK_ZERO_BYTES (&s_default_init.b); + CHECK_ZERO_BYTES (&s_empty_init.a); + CHECK_ZERO_BYTES (&s_empty_init.b); + CHECK_INT_BYTES (&s_first_int.a); + CHECK_ZERO_BYTES (&s_first_int.b); + CHECK_INT_BYTES (&s_both_int.a); + CHECK_INT_BYTES (&s_both_int.b); + CHECK_ZERO_BYTES (&a_default_init[0]); + CHECK_ZERO_BYTES (&a_default_init[1]); + CHECK_ZERO_BYTES (&a_default_init[2]); + CHECK_ZERO_BYTES (&a_default_init[9]); + CHECK_ZERO_BYTES (&a_empty_init[0]); + CHECK_ZERO_BYTES (&a_empty_init[1]); + CHECK_ZERO_BYTES (&a_empty_init[2]); + CHECK_ZERO_BYTES (&a_empty_init[9]); + CHECK_INT_BYTES (&a_first_int[0]); + CHECK_ZERO_BYTES (&a_first_int[1]); + CHECK_ZERO_BYTES (&a_first_int[2]); + CHECK_ZERO_BYTES (&a_first_int[9]); + CHECK_INT_BYTES (&a_two_int[0]); + CHECK_INT_BYTES (&a_two_int[1]); + CHECK_ZERO_BYTES (&a_two_int[2]); + CHECK_ZERO_BYTES (&a_two_int[9]); + struct s s2 = {}; + CHECK_ZERO_BYTES (&s2.a); + CHECK_ZERO_BYTES (&s2.b); + struct s s3 = { 0 }; + CHECK_INT_BYTES (&s3.a); + CHECK_ZERO_BYTES (&s3.b); + struct s s4 = { 0, 0 }; + CHECK_INT_BYTES (&s4.a); + CHECK_INT_BYTES (&s4.b); + struct s s5 = { 0 }; + sx = s5; + CHECK_INT_BYTES (&sx.a); + CHECK_ZERO_BYTES (&sx.b); + x =
Re: Re: [PATCH] RISC-V: Add runtime invariant support
I tried #define LOGICAL_OP_NON_SHORT_CIRCUIT 1 in RISC-V port. The warning is still there. Are you considering this patch:https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600120.html to solve this issue ? Or you are trying another solution to fix this ? juzhe.zh...@rivai.ai From: Richard Biener Date: 2022-08-23 17:34 To: Richard Biener CC: Andrew Pinski; GCC Patches; Andrew Waterman; Andreas Schwab; Kito Cheng; juzhe.zhong Subject: Re: [PATCH] RISC-V: Add runtime invariant support On Mon, Aug 22, 2022 at 8:15 AM Richard Biener via Gcc-patches wrote: > > On Sat, 20 Aug 2022, Andrew Pinski wrote: > > > On Sat, Aug 20, 2022 at 3:34 PM Andreas Schwab > > wrote: > > > > > > This breaks bootstrap: > > > > > > ../../gcc/tree-vect-loop-manip.cc: In function 'void > > > vect_gen_vector_loop_niters(loop_vec_info, tree, tree_node**, > > > tree_node**, bool)': > > > ../../gcc/tree-vect-loop-manip.cc:1981:26: error: 'const_vf' may be used > > > uninitialized [-Werror=maybe-uninitialized] > > > 1981 | unsigned HOST_WIDE_INT const_vf; > > > | ^~~~ > > > cc1plus: all warnings being treated as errors > > > make[3]: *** [Makefile:1146: tree-vect-loop-manip.o] Error 1 > > > make[2]: *** [Makefile:4977: all-stage2-gcc] Error 2 > > > make[1]: *** [Makefile:30363: stage2-bubble] Error 2 > > > make: *** [Makefile:1065: all] Error 2 > > > > > > This looks like a real uninitialized variable issue. > > I even can't tell if the paths that lead to using const_vf will be > > always set so how we expect GCC to do the same. > > The code that uses const_vf was added with r11-5820-cdcbef3c3310, > > CCing the author there. > > The key is > > tree log_vf = NULL_TREE; > ... > unsigned HOST_WIDE_INT const_vf; > if (vf.is_constant (&const_vf) > && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) > { > ... > log_vf = build_int_cst (type, exact_log2 (const_vf)); > ... > } > ... > if (stmts != NULL && log_vf) > { > ... use const_vf ... > > so it's uninit analysis little mind that is confused. There is code > that's supposed to handle the situation (setting flag under condition, > testing that flag instead of condition) but maybe it's too twisted > here. One could refector this as > > bool const_vf_p = vf.is_constant (&const_vf); > if (const_vf_p > && ...) > ... >if (stmts != NULL && const_vf_p) > ... > > and hope uninits mind is good enough to see log_vf is not used > uninitialized. > > I can also look into why uninit doesn't get it, but preprocessed > source would be handy then. Btw, the riscv speciality is gcc/config/riscv/riscv.h:#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 with --param logical-op-non-short-circuit=1 the diagnostic does not occur. > > Richard.
[PATCH] Don't gimple fold ymm-version vblendvpd/vblendvps/vpblendvb w/o TARGET_AVX2
Since 256-bit vector integer comparison is under TARGET_AVX2, and gimple folding for vblendvpd/vblendvps/vpblendvb relies on that. Restrict gimple fold condition to TARGET_AVX2. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/106704 * config/i386/i386-builtin.def (BDESC): Add CODE_FOR_avx_blendvpd256/CODE_FOR_avx_blendvps256 to corresponding builtins. * config/i386/i386.cc (ix86_gimple_fold_builtin): Don't fold IX86_BUILTIN_PBLENDVB256, IX86_BUILTIN_BLENDVPS256, IX86_BUILTIN_BLENDVPD256 w/o TARGET_AVX2. gcc/testsuite/ChangeLog: * gcc.target/i386/pr106704.c: New test. --- gcc/config/i386/i386-builtin.def | 4 ++-- gcc/config/i386/i386.cc | 12 +--- gcc/testsuite/gcc.target/i386/pr106704.c | 16 3 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr106704.c diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index acb7e8ca64b..f9c7abde2cf 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1036,8 +1036,8 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpe BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF) +BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF) +BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index e27c87f8c83..c4d0e36e9c0 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -18452,6 +18452,15 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) } break; +case IX86_BUILTIN_PBLENDVB256: +case IX86_BUILTIN_BLENDVPS256: +case IX86_BUILTIN_BLENDVPD256: + /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower +to scalar operations and not combined back. */ + if (!TARGET_AVX2) + break; + + /* FALLTHRU. */ case IX86_BUILTIN_BLENDVPD: /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2, w/o sse4.2, it's veclowered to scalar operations and @@ -18460,10 +18469,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) break; /* FALLTHRU. */ case IX86_BUILTIN_PBLENDVB128: -case IX86_BUILTIN_PBLENDVB256: case IX86_BUILTIN_BLENDVPS: -case IX86_BUILTIN_BLENDVPS256: -case IX86_BUILTIN_BLENDVPD256: gcc_assert (n_args == 3); arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); diff --git a/gcc/testsuite/gcc.target/i386/pr106704.c b/gcc/testsuite/gcc.target/i386/pr106704.c new file mode 100644 index 000..44e052a4caa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106704.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx -O2 -mno-avx2" } */ +/* { dg-final { scan-assembler-times {vblendvps[ \t]+%ymm[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vblendvpd[ \t]+%ymm[0-9]+} 1 } } */ + +#include + +__m256 bend_stuff( __m256 a, __m256 b, __m256 mask) +{ + return _mm256_blendv_ps(a, b, mask); +} + +__m256d bend_stuff1( __m256d a, __m256d b, __m256d mask) +{ + return _mm256_blendv_pd(a, b, mask); +} -- 2.18.1
Ping: [PATCH v4] rs6000: Fix incorrect RTL for Power LE when removing the UNSPECS [PR106069]
Hi Segher, I'd like to resend and ping for this patch. Thanks. From 23bffdacdf0eb1140c7a3571e6158797f4818d57 Mon Sep 17 00:00:00 2001 From: Xionghu Luo Date: Thu, 4 Aug 2022 03:44:58 + Subject: [PATCH v4] rs6000: Fix incorrect RTL for Power LE when removing the UNSPECS [PR106069] v4: Update per comments. v3: rename altivec_vmrghb_direct_le to altivec_vmrglb_direct_le to match the actual output ASM vmrglb. Likewise for all similar xxx_direct_le patterns. v2: Split the direct pattern to be and le with same RTL but different insn. The native RTL expression for vec_mrghw should be same for BE and LE as they are register and endian-independent. So both BE and LE need generate exactly same RTL with index [0 4 1 5] when expanding vec_mrghw with vec_select and vec_concat. (set (reg:V4SI 141) (vec_select:V4SI (vec_concat:V8SI (subreg:V4SI (reg:V16QI 139) 0) (subreg:V4SI (reg:V16QI 140) 0)) [const_int 0 4 1 5])) Then combine pass could do the nested vec_select optimization in simplify-rtx.c:simplify_binary_operation_1 also on both BE and LE: 21: r150:V4SI=vec_select(vec_concat(r141:V4SI,r146:V4SI),parallel [0 4 1 5]) 24: {r151:SI=vec_select(r150:V4SI,parallel [const_int 3]);} => 21: r150:V4SI=vec_select(vec_concat(r141:V4SI,r146:V4SI),parallel) 24: {r151:SI=vec_select(r146:V4SI,parallel [const_int 1]);} The endianness check need only once at ASM generation finally. ASM would be better due to nested vec_select simplified to simple scalar load. Regression tested pass for Power8{LE,BE}{32,64} and Power{9,10}LE{32,64} Linux. gcc/ChangeLog: PR target/106069 * config/rs6000/altivec.md (altivec_vmrghb_direct): Remove. (altivec_vmrghb_direct_be): New pattern for BE. (altivec_vmrghb_direct_le): New pattern for LE. (altivec_vmrghh_direct): Remove. (altivec_vmrghh_direct_be): New pattern for BE. (altivec_vmrghh_direct_le): New pattern for LE. (altivec_vmrghw_direct_): Remove. (altivec_vmrghw_direct__be): New pattern for BE. (altivec_vmrghw_direct__le): New pattern for LE. (altivec_vmrglb_direct): Remove. (altivec_vmrglb_direct_be): New pattern for BE. (altivec_vmrglb_direct_le): New pattern for LE. (altivec_vmrglh_direct): Remove. (altivec_vmrglh_direct_be): New pattern for BE. (altivec_vmrglh_direct_le): New pattern for LE. (altivec_vmrglw_direct_): Remove. (altivec_vmrglw_direct__be): New pattern for BE. (altivec_vmrglw_direct__le): New pattern for LE. * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Adjust. * config/rs6000/vsx.md: Likewise. gcc/testsuite/ChangeLog: PR target/106069 * g++.target/powerpc/pr106069.C: New test. Signed-off-by: Xionghu Luo --- gcc/config/rs6000/altivec.md| 222 ++-- gcc/config/rs6000/rs6000.cc | 24 +-- gcc/config/rs6000/vsx.md| 28 +-- gcc/testsuite/g++.target/powerpc/pr106069.C | 118 +++ 4 files changed, 307 insertions(+), 85 deletions(-) create mode 100644 gcc/testsuite/g++.target/powerpc/pr106069.C diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 2c4940f2e21..c6a381908cb 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1144,15 +1144,16 @@ (define_expand "altivec_vmrghb" (use (match_operand:V16QI 2 "register_operand"))] "TARGET_ALTIVEC" { - rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct - : gen_altivec_vmrglb_direct; - if (!BYTES_BIG_ENDIAN) -std::swap (operands[1], operands[2]); - emit_insn (fun (operands[0], operands[1], operands[2])); + if (BYTES_BIG_ENDIAN) +emit_insn ( + gen_altivec_vmrghb_direct_be (operands[0], operands[1], operands[2])); + else +emit_insn ( + gen_altivec_vmrglb_direct_le (operands[0], operands[2], operands[1])); DONE; }) -(define_insn "altivec_vmrghb_direct" +(define_insn "altivec_vmrghb_direct_be" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_select:V16QI (vec_concat:V32QI @@ -1166,7 +1167,25 @@ (define_insn "altivec_vmrghb_direct" (const_int 5) (const_int 21) (const_int 6) (const_int 22) (const_int 7) (const_int 23)])))] - "TARGET_ALTIVEC" + "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" + "vmrghb %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghb_direct_le" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 2 "register_operand" "v") + (match_operand:V16QI 1 "register_operand" "v")) + (parallel [(const_int 8) (const_int 24) +(const_int 9) (const_int 25) +(const_int 10) (cons
Re: [PATCH] MIPS: improve -march=native arch detection
YunQiang Su 于2022年8月2日周二 19:11写道: > > If we cannot get info from options and cpuinfo, we try to get from: > 1. getauxval(AT_BASE_PLATFORM), introduced since Linux 5.7 > 2. _MIPS_ARCH from host compiler. > > This can fix the wrong loader usage on r5/r6 platform with > -march=native. > Is it treat as minor fixes? > gcc/ChangeLog: > * config/mips/driver-native.cc (host_detect_local_cpu): > try getauxval(AT_BASE_PLATFORM) and _MIPS_ARCH, too. > --- > gcc/config/mips/driver-native.cc | 22 +++--- > 1 file changed, 19 insertions(+), 3 deletions(-) > > diff --git a/gcc/config/mips/driver-native.cc > b/gcc/config/mips/driver-native.cc > index 47627f85ce1..9aa7044c0b8 100644 > --- a/gcc/config/mips/driver-native.cc > +++ b/gcc/config/mips/driver-native.cc > @@ -19,6 +19,7 @@ along with GCC; see the file COPYING3. If not see > > #define IN_TARGET_CODE 1 > > +#include > #include "config.h" > #include "system.h" > #include "coretypes.h" > @@ -46,15 +47,15 @@ host_detect_local_cpu (int argc, const char **argv) >bool arch; > >if (argc < 1) > -return NULL; > +goto fallback_cpu; > >arch = strcmp (argv[0], "arch") == 0; >if (!arch && strcmp (argv[0], "tune")) > -return NULL; > +goto fallback_cpu; > >f = fopen ("/proc/cpuinfo", "r"); >if (f == NULL) > -return NULL; > +goto fallback_cpu; > >while (fgets (buf, sizeof (buf), f) != NULL) > if (startswith (buf, "cpu model")) > @@ -84,8 +85,23 @@ host_detect_local_cpu (int argc, const char **argv) > >fclose (f); > > +fallback_cpu: > +/*FIXME: how about other OSes, like FreeBSD? */ > +#ifdef __linux__ > + /*Note: getauxval may return NULL as: > + * AT_BASE_PLATFORM is supported since Linux 5.7 > + * Or from older version of qemu-user > + * */ > + if (cpu == NULL) > +cpu = (const char *) getauxval (AT_BASE_PLATFORM); > +#endif > + >if (cpu == NULL) > +#if defined (_MIPS_ARCH) > +cpu = _MIPS_ARCH; > +#else > return NULL; > +#endif > >return concat ("-m", argv[0], "=", cpu, NULL); > } > -- > 2.30.2 >
Re: tree.cc: Fix optimization of DFP default initialization
> Am 24.08.2022 um 01:54 schrieb Joseph Myers : > > When an object of decimal floating-point type is default-initialized, > GCC is inconsistent about whether it is given the all-zero-bits > representation (zero with the least quantum exponent) or whether it > acts like a conversion of integer 0 to the DFP type (zero with quantum > exponent 0). In particular, the representation stored in memory can > have all zero bits, but optimization of access to the same object > based on its known constant value can then produce zero with quantum > exponent 0 instead. > > C2x leaves the quantum exponent for default initialization > implementation-defined, but that doesn't allow such inconsistency in > the interpretation of a single object. All zero bits seems most > appropriate; change build_real to special-case dconst0 the same way > other constants are special-cased and ensure that the correct zero for > the type is generated. > > Bootstrapped with no regressions for x86_64-pc-linux-gnu. OK to commit? Ok Thanks, Richard > gcc/ >* tree.cc (build_real): Give DFP dconst0 the minimum quantum >exponent for the type. > > gcc/testsuite/ >* gcc.dg/torture/dfp-default-init-1.c, >gcc.dg/torture/dfp-default-init-2.c, >gcc.dg/torture/dfp-default-init-3.c: New tests. > > diff --git a/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c > b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c > new file mode 100644 > index 000..f893ddb52b9 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/torture/dfp-default-init-1.c > @@ -0,0 +1,113 @@ > +/* Test that default-initialized DFP values consistently have the least > quantum > + exponent. */ > +/* { dg-do run } */ > +/* { dg-require-effective-target dfp } */ > + > +extern void exit (int); > +extern void abort (void); > +void *memset (void *, int, __SIZE_TYPE__); > +int memcmp (const void *, const void *, __SIZE_TYPE__); > + > +#ifndef TYPE > +#define TYPE _Decimal32 > +#endif > + > +#ifndef ZEROFP > +#define ZEROFP 0e-101DF > +#endif > + > +TYPE zero_int = 0; > +TYPE zero_fp = ZEROFP; > +TYPE default_init; > +TYPE zero_bytes; > +TYPE x; > + > +struct s { TYPE a, b; }; > +struct s s_default_init; > +struct s s_empty_init = {}; > +struct s s_first_int = { 0 }; > +struct s s_both_int = { 0, 0 }; > +struct s sx; > + > +const TYPE a_default_init[10]; > +const TYPE a_empty_init[10] = {}; > +const TYPE a_first_int[10] = { 0 }; > +const TYPE a_two_int[10] = { 0, 0 }; > + > +#define CHECK_ZERO_BYTES(expr)\ > + do\ > +{\ > + if (memcmp (expr, &zero_bytes, sizeof zero_bytes) != 0)\ > +abort ();\ > + TYPE tmp = *expr;\ > + if (memcmp (&tmp, &zero_bytes, sizeof zero_bytes) != 0)\ > +abort ();\ > +}\ > + while (0) > + > +#define CHECK_INT_BYTES(expr)\ > + do\ > +{\ > + if (memcmp (expr, &zero_int, sizeof zero_int) != 0)\ > +abort ();\ > + TYPE tmp = *expr;\ > + if (memcmp (&tmp, &zero_int, sizeof zero_int) != 0)\ > +abort ();\ > +}\ > + while (0) > + > +int > +main (void) > +{ > + memset (&zero_bytes, 0, sizeof zero_bytes); > + if (memcmp (&zero_bytes, &zero_int, sizeof zero_int) == 0) > +abort (); > + CHECK_ZERO_BYTES (&zero_fp); > + CHECK_ZERO_BYTES (&default_init); > + CHECK_ZERO_BYTES (&s_default_init.a); > + CHECK_ZERO_BYTES (&s_default_init.b); > + CHECK_ZERO_BYTES (&s_empty_init.a); > + CHECK_ZERO_BYTES (&s_empty_init.b); > + CHECK_INT_BYTES (&s_first_int.a); > + CHECK_ZERO_BYTES (&s_first_int.b); > + CHECK_INT_BYTES (&s_both_int.a); > + CHECK_INT_BYTES (&s_both_int.b); > + CHECK_ZERO_BYTES (&a_default_init[0]); > + CHECK_ZERO_BYTES (&a_default_init[1]); > + CHECK_ZERO_BYTES (&a_default_init[2]); > + CHECK_ZERO_BYTES (&a_default_init[9]); > + CHECK_ZERO_BYTES (&a_empty_init[0]); > + CHECK_ZERO_BYTES (&a_empty_init[1]); > + CHECK_ZERO_BYTES (&a_empty_init[2]); > + CHECK_ZERO_BYTES (&a_empty_init[9]); > + CHECK_INT_BYTES (&a_first_int[0]); > + CHECK_ZERO_BYTES (&a_first_int[1]); > + CHECK_ZERO_BYTES (&a_first_int[2]); > + CHECK_ZERO_BYTES (&a_first_int[9]); > + CHECK_INT_BYTES (&a_two_int[0]); > + CHECK_INT_BYTES (&a_two_int[1]); > + CHECK_ZERO_BYTES (&a_two_int[2]); > + CHECK_ZERO_BYTES (&a_two_int[9]); > + struct s s2 = {}; > + CHECK_ZERO_BYTES (&s2.a); > + CHECK_ZERO_BYTES (&s2.b); > + struct s s3 = { 0 }; > + CHECK_INT_BYTES (&s3.a); > + CHECK_ZERO_BYTES (&s3.b); > + struct s s4 = { 0, 0 }; > + CHECK_INT_BYTES (&s4.a); > + CHECK_INT_BYTES (&s4.b); > + struct s s5 = { 0 }; > + sx = s5; > + CHECK_INT_BYTES (&sx.a); > + CHECK_ZERO_BYTES (&sx.b); > + x = default_
Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions
Hi Segher, On 23/8/2022 下午 10:26, Segher Boessenkool wrote: > Hi! > > On Fri, Aug 19, 2022 at 10:35:54AM +0800, HAO CHEN GUI wrote: >> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c >> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c >> @@ -1,7 +1,8 @@ >> /* { dg-do compile { target { powerpc*-*-* } } } */ >> -/* { dg-require-effective-target lp64 } */ >> -/* { dg-require-effective-target powerpc_p9vector_ok } */ >> /* { dg-options "-mdejagnu-cpu=power9" } */ >> +/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && >> ilp32 } } } */ > > You can add this always. It is default on 64-bit systems, but it is > simpler to just always add it: > /* { dg-additional-options "-mpowerpc64" } */ > > Or are there subtargets that will error on this? Yes, AIX fails if TARGET_POWERPC64 is set and TARGET_64BIT is not set. So I add "-mpowerpc64" for Linux 32-bit environment. if (TARGET_POWERPC64 && ! TARGET_64BIT) \ { \ error ("%<-maix64%> required: 64-bit computation with 32-bit addressing not yet supported"); \ } Thanks a lot Gui Haochen
Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions
on 2022/8/24 13:11, HAO CHEN GUI wrote: > Hi Segher, > > On 23/8/2022 下午 10:26, Segher Boessenkool wrote: >> Hi! >> >> On Fri, Aug 19, 2022 at 10:35:54AM +0800, HAO CHEN GUI wrote: >>> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c >>> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c >>> @@ -1,7 +1,8 @@ >>> /* { dg-do compile { target { powerpc*-*-* } } } */ >>> -/* { dg-require-effective-target lp64 } */ >>> -/* { dg-require-effective-target powerpc_p9vector_ok } */ >>> /* { dg-options "-mdejagnu-cpu=power9" } */ >>> +/* { dg-additional-options "-mpowerpc64" { target { powerpc*-*-linux* && >>> ilp32 } } } */ >> >> You can add this always. It is default on 64-bit systems, but it is >> simpler to just always add it: >> /* { dg-additional-options "-mpowerpc64" } */ >> >> Or are there subtargets that will error on this? > Yes, AIX fails if TARGET_POWERPC64 is set and TARGET_64BIT is not set. > So I add "-mpowerpc64" for Linux 32-bit environment. > > if (TARGET_POWERPC64 && ! TARGET_64BIT) \ > { \ > error ("%<-maix64%> required: 64-bit computation with 32-bit addressing > not yet supported"); \ > } > Could you try to test with dg-options "-mdejagnu-cpu=power9 -mpowerpc64" all the time, but still having that has_arch_ppc64 effective target on aix? I'd expect has_arch_ppc64 check to fail on aix 32bit, the error will not be a problem (turning into an UNSUPPORTED then)? BR, Kewen
Re: [PATCH] MIPS: improve -march=native arch detection
On Tue, 2022-08-02 at 11:10 +, YunQiang Su wrote: > If we cannot get info from options and cpuinfo, we try to get from: > 1. getauxval(AT_BASE_PLATFORM), introduced since Linux 5.7 > 2. _MIPS_ARCH from host compiler. > > This can fix the wrong loader usage on r5/r6 platform with > -march=native. /* snip */ > if (argc < 1) > - return NULL; > + goto fallback_cpu; I don't think this should be changed, if argc < 1 it means the spec (disambiguation: the thing printed by "gcc -dumpspecs", hard coded in gnu-user.h) is wrong. It cannot happen with the built-in spec, but if a user specifies a bad custom spec with "-spec", we shouldn't be tricked. > arch = strcmp (argv[0], "arch") == 0; > if (!arch && strcmp (argv[0], "tune")) > - return NULL; > + goto fallback_cpu; Likewise. > f = fopen ("/proc/cpuinfo", "r"); > if (f == NULL) > - return NULL; > + goto fallback_cpu; OK. > +fallback_cpu: > +/*FIXME: how about other OSes, like FreeBSD? */ https://reviews.freebsd.org/D12743 added elf_aux_info as a counterpart of getauxinfo, but it looks like FreeBSD does not have AT_BASE_PLATFORM. > +#ifdef __linux__ > + /*Note: getauxval may return NULL as: > + * AT_BASE_PLATFORM is supported since Linux 5.7 > + * Or from older version of qemu-user > + * */ > + if (cpu == NULL) > + cpu = (const char *) getauxval (AT_BASE_PLATFORM); getauxval is added in Glibc-2.16 so it will fail to build on hosts with old glibc or other libc implementation. Check if getauxval and AT_BASE_PLATFORM are available (in gcc/configure.ac) instead of an inaccurate "#ifdef __linux__". > +#endif > + > if (cpu == NULL) > +#if defined (_MIPS_ARCH) > + cpu = _MIPS_ARCH; > +#else Ok. > return NULL; > +#endif > > return concat ("-m", argv[0], "=", cpu, NULL); > }
[PATCH v2] libstdc++: Optimize operator+(string/char*,string/char*)
A revision of the original patch -- based on the feedback from Jonathan -- that removes the `inline` specifier is attached.
[PATCH] libstdc++: Optimize operator+(string/char*, string/char*) equally
From: Will Hawkins Until now operator+(char*, string) and operator+(string, char*) had different performance characteristics. The former required a single memory allocation and the latter required two. This patch makes the performance equal. libstdc++-v3/ChangeLog: * libstdc++-v3/include/bits/basic_string.h (operator+(string, char*)): Remove naive implementation. * libstdc++-v3/include/bits/basic_string.tcc (operator+(string, char*)): Add single-allocation implementation. Signed-off-by: Will Hawkins --- libstdc++-v3/include/bits/basic_string.h | 9 ++--- libstdc++-v3/include/bits/basic_string.tcc | 21 + 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h index b04fba95678..fa6738925bb 100644 --- a/libstdc++-v3/include/bits/basic_string.h +++ b/libstdc++-v3/include/bits/basic_string.h @@ -3521,14 +3521,9 @@ _GLIBCXX_END_NAMESPACE_CXX11 */ template _GLIBCXX20_CONSTEXPR -inline basic_string<_CharT, _Traits, _Alloc> +basic_string<_CharT, _Traits, _Alloc> operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, - const _CharT* __rhs) -{ - basic_string<_CharT, _Traits, _Alloc> __str(__lhs); - __str.append(__rhs); - return __str; -} + const _CharT* __rhs); /** * @brief Concatenate string and character. diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc index 4563c61429a..95ba8e503e9 100644 --- a/libstdc++-v3/include/bits/basic_string.tcc +++ b/libstdc++-v3/include/bits/basic_string.tcc @@ -640,6 +640,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __str; } + template +_GLIBCXX20_CONSTEXPR +basic_string<_CharT, _Traits, _Alloc> +operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, + const _CharT* __rhs) +{ + __glibcxx_requires_string(__rhs); + typedef basic_string<_CharT, _Traits, _Alloc> __string_type; + typedef typename __string_type::size_type __size_type; + typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template + rebind<_CharT>::other _Char_alloc_type; + typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits; + const __size_type __len = _Traits::length(__rhs); + __string_type __str(_Alloc_traits::_S_select_on_copy( + __lhs.get_allocator())); + __str.reserve(__len + __lhs.size()); + __str.append(__lhs); + __str.append(__rhs, __len); + return __str; +} + template _GLIBCXX_STRING_CONSTEXPR typename basic_string<_CharT, _Traits, _Alloc>::size_type -- 2.34.1
Re: [PATCH] libstdc++: Optimize operator+(string/char*, string/char*) equally
On Tue, Aug 23, 2022 at 12:33 PM Jonathan Wakely wrote: > > On Mon, 22 Aug 2022 at 19:15, Will Hawkins wrote: > > > > Until now operator+(char*, string) and operator+(string, char*) had > > different performance characteristics. The former required a single > > memory allocation and the latter required two. This patch makes the > > performance equal. > > If you don't have a GCC copyright assignment on file with the FSF then > please follow the procedure described at https://gcc.gnu.org/dco.html Thank you. > > > > libstdc++-v3/ChangeLog: > > * libstdc++-v3/include/bits/basic_string.h (operator+(string, > > char*)): > > Remove naive implementation. > > * libstdc++-v3/include/bits/basic_string.tcc (operator+(string, > > char*)): > > Add single-allocation implementation. > > --- > > libstdc++-v3/include/bits/basic_string.h | 7 +-- > > libstdc++-v3/include/bits/basic_string.tcc | 21 + > > 2 files changed, 22 insertions(+), 6 deletions(-) > > > > diff --git a/libstdc++-v3/include/bits/basic_string.h > > b/libstdc++-v3/include/bits/basic_string.h > > index b04fba95678..bc048fc0689 100644 > > --- a/libstdc++-v3/include/bits/basic_string.h > > +++ b/libstdc++-v3/include/bits/basic_string.h > > @@ -3523,12 +3523,7 @@ _GLIBCXX_END_NAMESPACE_CXX11 > > _GLIBCXX20_CONSTEXPR > > inline basic_string<_CharT, _Traits, _Alloc> > > Please remove the 'inline' specifier here, since you're moving the > definition into the non-inline .tcc file. > > There's a separate discussion to be had about whether these operator+ > overloads *should* be inline. But for the purposes of this change, we > want these two operator+ overloads to be consistent, and so they > should both be non-inline. Thank you for the feedback. I sent out a v2 of the patch. Again, I hope that I followed the proper procedure by having my mailer put the patch in reply to my previous message. Thank you again! Will > > > operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, > > - const _CharT* __rhs) > > -{ > > - basic_string<_CharT, _Traits, _Alloc> __str(__lhs); > > - __str.append(__rhs); > > - return __str; > > -} > > + const _CharT* __rhs); > > > >/** > > * @brief Concatenate string and character. > > diff --git a/libstdc++-v3/include/bits/basic_string.tcc > > b/libstdc++-v3/include/bits/basic_string.tcc > > index 4563c61429a..95ba8e503e9 100644 > > --- a/libstdc++-v3/include/bits/basic_string.tcc > > +++ b/libstdc++-v3/include/bits/basic_string.tcc > > @@ -640,6 +640,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > >return __str; > > } > > > > + template > > +_GLIBCXX20_CONSTEXPR > > +basic_string<_CharT, _Traits, _Alloc> > > +operator+(const basic_string<_CharT, _Traits, _Alloc>& __lhs, > > + const _CharT* __rhs) > > +{ > > + __glibcxx_requires_string(__rhs); > > + typedef basic_string<_CharT, _Traits, _Alloc> __string_type; > > + typedef typename __string_type::size_type __size_type; > > + typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template > > + rebind<_CharT>::other _Char_alloc_type; > > + typedef __gnu_cxx::__alloc_traits<_Char_alloc_type> _Alloc_traits; > > + const __size_type __len = _Traits::length(__rhs); > > + __string_type __str(_Alloc_traits::_S_select_on_copy( > > + __lhs.get_allocator())); > > + __str.reserve(__len + __lhs.size()); > > + __str.append(__lhs); > > + __str.append(__rhs, __len); > > + return __str; > > +} > > + > >template > > _GLIBCXX_STRING_CONSTEXPR > > typename basic_string<_CharT, _Traits, _Alloc>::size_type > > -- > > 2.34.1 > > >
Re: [PATCH, rs6000] Change insn condition from TARGET_64BIT to TARGET_POWERPC64 for VSX scalar extract/insert instructions
Hi Kewen, On 24/8/2022 下午 1:24, Kewen.Lin wrote: > Could you try to test with dg-options "-mdejagnu-cpu=power9 -mpowerpc64" all > the time, but still > having that has_arch_ppc64 effective target on aix? > > I'd expect has_arch_ppc64 check to fail on aix 32bit, the error will not be a > problem (turning > into an UNSUPPORTED then)? Good point. I will get an AIX to test it. Thanks Gui Haochen
Re: [PATCH] Speedup path discovery in predicate::use_cannot_happen
On Tue, 23 Aug 2022, Aldy Hernandez wrote: > On Tue, Aug 23, 2022 at 2:16 PM Richard Biener wrote: > > > > The following reverts a hunk from r8-5789-g11ef0b22d68cd1 that > > made compute_control_dep_chain start from function entry rather > > than the immediate dominator of the source block of the edge with > > the undefined value on the PHI node. Reverting at that point > > does not reveal any testsuite FAIL, in particular the added > > testcase still passes. The following adjusts this to the other > > function that computes predicates that hold on the PHI incoming > > edges with undefined values, predicate::init_from_phi_def, which > > starts at the immediate dominator of the PHI. That's much less > > likely to run into the CFG walking limit. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > Aldy - you did this change, do you remember anything here? In > > fact the whole thing that's now called predicate::use_cannot_happen > > seems to be redundant - the two testcases attributed to its history > > do not fail when that's disabled, nor did they fail when it was > > introduced. In principle whats now called predicate::superset_of > > should cover this (but different implementation limits might apply). > > OMG, I'm drawing a complete blank here. I have no recollection of > this. I'm tempted to say either my account was hacked or that old > code was all wrong ;-). Eh. Pushed then. I'm going to baby-step changes so we can figure out things when later bisecting regressions ... Thanks, Richard.
Re: LoongArch: add model attribute
在 2022/8/23 下午10:51, Xi Ruoyao 写道: diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index cadaad7519c..4e925aa3876 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -30,6 +30,12 @@ along with GCC; see the file COPYING3. If not see SYMBOL_PCREL The symbol's value will be loaded directly from data section. + SYMBOL_PCREL32 + SYMBOL_PCREL64 + Like SYMBOL_PCREL, but the PC-relative offset of the symbol is + assumed to be in the ± 2GiB or ± 8 EiB range, instead of the + code model specification. + SYMBOL_TLS A thread-local symbol. I don't think it's necessary to add SYMBOL_PCREL32, SYMBOL_PCREL can represent relative pc operation in normal mode.
Re: [commited PATCH v1] LoongArch: Add new code model 'medium'.
Pushed to r13-2165. 在 2022/8/20 下午5:18, Huacai Chen 写道: Hi, Lulu, I think there is a typo in your subject line. Huacai On Sat, Aug 20, 2022 at 5:05 PM Lulu Cheng wrote: The function jump instruction in normal mode is 'bl', so the scope of the function jump is +-128MB. Now we've added support for 'medium' mode, this mode is to complete the function jump through two instructions: pcalau12i + jirl So in this mode the function jump range is increased to +-2GB. Compared with 'normal' mode, 'medium' mode only affects the jump range of functions. gcc/ChangeLog: * config/loongarch/genopts/loongarch-strings: Support code model medium. * config/loongarch/genopts/loongarch.opt.in: Likewise. * config/loongarch/loongarch-def.c: Likewise. * config/loongarch/loongarch-def.h (CMODEL_LARGE): Likewise. (CMODEL_EXTREME): Likewise. (N_CMODEL_TYPES): Likewise. (CMODEL_MEDIUM): Likewise. * config/loongarch/loongarch-opts.cc: Likewise. * config/loongarch/loongarch-opts.h (TARGET_CMODEL_MEDIUM): Likewise. * config/loongarch/loongarch-str.h (STR_CMODEL_MEDIUM): Likewise. * config/loongarch/loongarch.cc (loongarch_call_tls_get_addr): Tls symbol Loading support medium mode. (loongarch_legitimize_call_address): When medium mode, make a symbolic jump with two instructions. (loongarch_option_override_internal): Support medium. * config/loongarch/loongarch.md (@pcalau12i): New template. (@sibcall_internal_1): New function call templates added to support medium mode. (@sibcall_value_internal_1): Likewise. (@sibcall_value_multiple_internal_1): Likewise. (@call_internal_1): Likewise. (@call_value_internal_1): Likewise. (@call_value_multiple_internal_1): Likewise. * config/loongarch/loongarch.opt: Support medium. * config/loongarch/predicates.md: Add processing about medium mode. * doc/invoke.texi: Document for '-mcmodel=medium'. gcc/testsuite/ChangeLog: * gcc.target/loongarch/func-call-medium-1.c: New test. * gcc.target/loongarch/func-call-medium-2.c: New test. * gcc.target/loongarch/func-call-medium-3.c: New test. * gcc.target/loongarch/func-call-medium-4.c: New test. * gcc.target/loongarch/func-call-medium-5.c: New test. * gcc.target/loongarch/func-call-medium-6.c: New test. * gcc.target/loongarch/func-call-medium-7.c: New test. * gcc.target/loongarch/func-call-medium-8.c: New test. * gcc.target/loongarch/tls-gd-noplt.c: Add compile parameter '-mexplicit-relocs'. --- .../loongarch/genopts/loongarch-strings | 1 + gcc/config/loongarch/genopts/loongarch.opt.in | 3 + gcc/config/loongarch/loongarch-def.c | 1 + gcc/config/loongarch/loongarch-def.h | 7 +- gcc/config/loongarch/loongarch-opts.cc| 15 ++- gcc/config/loongarch/loongarch-opts.h | 1 + gcc/config/loongarch/loongarch-str.h | 1 + gcc/config/loongarch/loongarch.cc | 123 + gcc/config/loongarch/loongarch.md | 125 +- gcc/config/loongarch/loongarch.opt| 3 + gcc/config/loongarch/predicates.md| 15 ++- gcc/doc/invoke.texi | 3 + .../gcc.target/loongarch/func-call-medium-1.c | 41 ++ .../gcc.target/loongarch/func-call-medium-2.c | 41 ++ .../gcc.target/loongarch/func-call-medium-3.c | 41 ++ .../gcc.target/loongarch/func-call-medium-4.c | 41 ++ .../gcc.target/loongarch/func-call-medium-5.c | 42 ++ .../gcc.target/loongarch/func-call-medium-6.c | 42 ++ .../gcc.target/loongarch/func-call-medium-7.c | 43 ++ .../gcc.target/loongarch/func-call-medium-8.c | 42 ++ .../gcc.target/loongarch/tls-gd-noplt.c | 4 +- 21 files changed, 595 insertions(+), 40 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings index cb88ed56b68..44ebb7ab10b 100644 --- a/gcc/config/loongarch/genopts/loongarch-strings +++ b/gcc/config/loongarch/genopts/loongarch-strings @@ -54,5 +54,6 @@ OPTSTR_CMODEL cmodel STR_CMODEL_NORMAL normal STR_CMODEL_TINY