Re: [PATCH v3] [aarch64] Add CPU support for Ampere Computing's eMAG.
On 11/22/18 8:54 AM, Andrew Pinski wrote: > One small comment. > > On Tue, Nov 20, 2018 at 10:01 AM Christoph Muellner > wrote: >> >> Tested with "make check" and no regressions found. >> >> This patch depends on the struct xgene1_prefetch_tune, >> which has been acknowledged already: >> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html >> >> *** gcc/ChangeLog *** >> >> 2018-xx-xx Christoph Muellner >> >> * config/aarch64/aarch64-cores.def: Define emag. >> * config/aarch64/aarch64-tune.md: Regenerated with emag. >> * config/aarch64/aarch64.c (emag_tunings): New struct. >> * doc/invoke.texi: Document mtune value. >> >> Signed-off-by: Christoph Muellner >> --- >> gcc/config/aarch64/aarch64-cores.def | 3 +++ >> gcc/config/aarch64/aarch64-tune.md | 2 +- >> gcc/config/aarch64/aarch64.c | 25 + >> gcc/doc/invoke.texi | 2 +- >> 4 files changed, 30 insertions(+), 2 deletions(-) >> >> diff --git a/gcc/config/aarch64/aarch64-cores.def >> b/gcc/config/aarch64/aarch64-cores.def >> index 1f3ac56..68cca00 100644 >> --- a/gcc/config/aarch64/aarch64-cores.def >> +++ b/gcc/config/aarch64/aarch64-cores.def >> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88", thunderxt88, thunderx, >> 8A, AARCH64_FL_FOR_ARCH >> AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, >> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, >> 0x0a2, -1) >> AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, >> AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, >> 0x0a3, -1) >> >> +/* Ampere Computing cores. */ >> +AARCH64_CORE("emag",emag, xgene1,8A, AARCH64_FL_FOR_ARCH8 >> | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) > > I think you should add a comment to say why this order is required > like above for thunderxt88p1. Ok, will do. Thanks, Christoph > > Thanks, > Andrew Pinski > >> + >> /* APM ('P') cores. */ >> AARCH64_CORE("xgene1", xgene1,xgene1,8A, >> AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1) >> >> diff --git a/gcc/config/aarch64/aarch64-tune.md >> b/gcc/config/aarch64/aarch64-tune.md >> index fade1d4..2fc7f03 100644 >> --- a/gcc/config/aarch64/aarch64-tune.md >> +++ b/gcc/config/aarch64/aarch64-tune.md >> @@ -1,5 +1,5 @@ >> ;; -*- buffer-read-only: t -*- >> ;; Generated automatically by gentune.sh from aarch64-cores.def >> (define_attr "tune" >> - >> "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" >> + >> "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" >> (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> index f7f88a9..995aafe 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings = >>&xgene1_prefetch_tune >> }; >> >> +static const struct tune_params emag_tunings = >> +{ >> + &xgene1_extra_costs, >> + &xgene1_addrcost_table, >> + &xgene1_regmove_cost, >> + &xgene1_vector_cost, >> + &generic_branch_cost, >> + &xgene1_approx_modes, >> + 6, /* memmov_cost */ >> + 4, /* issue_rate */ >> + AARCH64_FUSE_NOTHING, /* fusible_ops */ >> + "16",/* function_align. */ >> + "16",/* jump_align. */ >> + "16",/* loop_align. */ >> + 2, /* int_reassoc_width. */ >> + 4, /* fp_reassoc_width. */ >> + 1, /* vec_reassoc_width. */ >> + 2, /* min_div_recip_mul_sf. */ >> + 2, /* min_div_recip_mul_df. */ >> + 17, /* max_case_values. */ >> + tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ >> + (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */ >> + &xgene1_prefetch_tune >> +}; >> + >> static const struct tune_params qdf24xx_tunings = >> { >>&qdf24xx_extra_costs, >> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi >> index e016dce..ac81fb2 100644 >> --- a/gcc/doc/invoke.texi >> +++ b/gcc/doc/invoke.texi >> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which >> GCC should tune the >> performance of the code. Permissible values for this option are: >> @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55}, >> @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-
Re: C++ PATCH to implement P1094R2, Nested inline namespaces
On Tue, Nov 20, 2018 at 04:59:46PM -0500, Jason Merrill wrote: > On 11/19/18 5:12 PM, Marek Polacek wrote: >> > + /* Don't forget that the innermost namespace might have been >> > + marked as inline. */ >> > + is_inline |= nested_inline_p; >> This looks wrong: an inline namespace does not make its nested namespaces >> inline as well. >A nested namespace definition cannot be inline. This is supposed to handle >cases such as >namespace A::B::inline C { ... } >because after 'C' we don't see :: so it breaks and we call push_namespace >outside the for loop. So I still don't see a bug; do you have a test that >I got wrong? The way I read the question is "what does namespace A::inline B::C::D {...} do?". C and D are not inline. For what it's worth, I had an earlier very incomplete stab at it, haven't looked how complete it really was; I know that it didn't handle diagnostics as well as yours, and I have no recollection of whether it handles the cases like the above. See attached. diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 82b8ef8..cc5427a 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -12729,6 +12729,7 @@ cp_parser_declaration (cp_parser* parser) || (token2.type == CPP_OPEN_SQUARE && cp_lexer_peek_nth_token (parser->lexer, 3)->type == CPP_OPEN_SQUARE) + || token2.keyword == RID_INLINE /* An unnamed namespace definition. */ || token2.type == CPP_OPEN_BRACE || token2.keyword == RID_ATTRIBUTE)) @@ -18533,10 +18534,24 @@ cp_parser_namespace_definition (cp_parser* parser) /* Parse any specified attributes before the identifier. */ tree attribs = cp_parser_attributes_opt (parser); + bool is_maybe_nested_inline = is_inline; + for (;;) { identifier = NULL_TREE; + if (cxx_dialect > cxx17) + { + is_maybe_nested_inline = + cp_lexer_next_token_is_keyword (parser->lexer, RID_INLINE); + + if (is_maybe_nested_inline) + { + maybe_warn_cpp0x (CPP0X_INLINE_NAMESPACES); + cp_lexer_consume_token (parser->lexer); + } + } + if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)) { identifier = cp_parser_identifier (parser); @@ -18555,7 +18570,8 @@ cp_parser_namespace_definition (cp_parser* parser) /* Nested namespace names can create new namespaces (unlike other qualified-ids). */ - if (int count = identifier ? push_namespace (identifier) : 0) + if (int count = identifier ? + push_namespace (identifier, is_maybe_nested_inline) : 0) nested_definition_count += count; else cp_parser_error (parser, "nested namespace name required"); @@ -18573,7 +18589,8 @@ cp_parser_namespace_definition (cp_parser* parser) "a nested namespace definition cannot be inline"); /* Start the namespace. */ - nested_definition_count += push_namespace (identifier, is_inline); + nested_definition_count += +push_namespace (identifier, is_maybe_nested_inline); bool has_visibility = handle_namespace_attrs (current_namespace, attribs);
Re: [PATCH] Fix -fstack-protector* on darwin/mingw etc. (PR target/85644)
On Wed, Nov 21, 2018 at 11:21 PM Jakub Jelinek wrote: > > Hi! > > As I wrote in the PR, before PR81708 commits, > while i386 defaulted to SSP_TLS rather than SSP_GLOBAL on everything but > Android, the -mstack-protector-guard= switch controlled pretty much > whether the i386.md special stack protector patterns are used (if tls) > or whether generic code is used (global). These special stack protector > patterns did one thing if TARGET_THREAD_SSP_OFFSET macro was defined > (only defined on glibc targets) - code like: > movq%fs:40, %rax > movq%rax, -8(%rbp) > xorl%eax, %eax > in the prologue and > movq-8(%rbp), %rdx > xorq%fs:40, %rdx > je .L4 > in the epilogue. If TARGET_THREAD_SSP_OFFSET macro wasn't defined, it would > do instead: > movq.refptr.__stack_chk_guard(%rip), %rax > movq(%rax), %rcx > movq%rcx, -8(%rbp) > xorl%ecx, %ecx > and > movq.refptr.__stack_chk_guard(%rip), %rdx > movq-8(%rbp), %rcx > xorq(%rdx), %rcx > je .L4 > (this is taken from 7.x cross to mingw). > Finally, for Android or when -mstack-protector-guard=global was used, it > emitted: > movq__stack_chk_guard(%rip), %rax > movq%rax, -8(%rbp) > and > movq__stack_chk_guard(%rip), %rdx > cmpq%rdx, %rcx > je .L4 > Note, apart from OS specific details, those =global sequences are similar > to the =tls ones when TARGET_THREAD_SSP_OFFSET is not defined, the main > difference is that the =tls ones are more secure as they clear registers > containing the guard as quickly as possible. The PR81708 changes dropped > the non-tls special stack_protector_* patterns from i386.md and now =tls > implies really tls, but the default remained, so mingw32 or darwin still > default to tls and just use 0 offset by default. > > So, this patch changes the default for mingw32, darwin and everything else > except gnu-user*.h to be =global, and just forces those special i386.md > more secure patterns unconditionally (slightly changing the generated code > on Android, but it is one extra insn in prologue and one fewer in the > epilogue). > > With this patch -mstack-protector-guard=tls is really for tls and =global > for pure var access and user can override the defaults on non-glibc targets, > but they should get a default that works there. > > Bootstrapped/regtested on x86_64-linux and i686-linux, plus tested with a > cross to mingw, ok for trunk? > > 2018-11-21 Jakub Jelinek > > PR target/85644 > PR target/86832 > * config/i386/i386.c (ix86_option_override_internal): Default > ix86_stack_protector_guard to SSP_TLS only if TARGET_THREAD_SSP_OFFSET > is defined. > * config/i386/i386.md (stack_protect_set, stack_protect_set_, > stack_protect_test, stack_protect_test_): Use empty condition > instead of TARGET_SSP_TLS_GUARD. OK for mainline and backports, with a bit of bikeshedding below. Thanks, Uros. > --- gcc/config/i386/i386.c.jj 2018-11-20 21:39:00.905577452 +0100 > +++ gcc/config/i386/i386.c 2018-11-21 18:02:49.448049161 +0100 > @@ -4557,8 +4557,13 @@ ix86_option_override_internal (bool main > >/* Handle stack protector */ >if (!opts_set->x_ix86_stack_protector_guard) > -opts->x_ix86_stack_protector_guard > - = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS; > +{ > + opts->x_ix86_stack_protector_guard = SSP_GLOBAL; > +#ifdef TARGET_THREAD_SSP_OFFSET > + if (!TARGET_HAS_BIONIC) > + opts->x_ix86_stack_protector_guard = SSP_TLS; > +#endif > +} How about: --cut here-- /* Handle stack protector */ if (!opts_set->x_ix86_stack_protector_guard) { #ifdef TARGET_THREAD_SSP_OFFSET if (!TARGET_HAS_BIONIC) opts->x_ix86_stack_protector_guard = SSP_TLS; else #endif opts->x_ix86_stack_protector_guard = SSP_GLOBAL; } --cut here-- > #ifdef TARGET_THREAD_SSP_OFFSET >ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; > --- gcc/config/i386/i386.md.jj 2018-11-21 11:45:12.090721862 +0100 > +++ gcc/config/i386/i386.md 2018-11-21 18:03:46.166119350 +0100 > @@ -19010,7 +19010,7 @@ (define_insn "*prefetch_prefetchwt1" > (define_expand "stack_protect_set" >[(match_operand 0 "memory_operand") > (match_operand 1 "memory_operand")] > - "TARGET_SSP_TLS_GUARD" > + "" > { >rtx (*insn)(rtx, rtx); > > @@ -19028,7 +19028,7 @@ (define_insn "stack_protect_set_" > UNSPEC_SP_SET)) > (set (match_scratch:PTR 2 "=&r") (const_int 0)) > (clobber (reg:CC FLAGS_REG))] > - "TARGET_SSP_TLS_GUARD" > + "" >"mov{}\t{%1, %2|%2, %1}\;mov{}\t{%2, %0|%0, > %2}\;xor{l}\t%k2, %k2" >[(set_attr "type" "multi")]) > > @@ -19036,7 +19036,7 @@ (define_expand "stack_protect_test" >[(match_operand 0 "memory_operand") > (match_operand 1 "memory_operand") > (match_operan
Re: [PATCH] x86: Add -march=cascadelake
On Thu, Nov 22, 2018 at 8:08 AM Wei Xiao wrote: > > Jakub, > > Thanks for the comments! > I have addressed them as attached. > > Wei > > gcc/ > * common/config/i386/i386-common.c (processor_names): Add cascadelake. > (processor_alias_table): Add cascadelake. > * config.gcc: Add -march=cascadelake. > * config/i386/driver-i386.c > (host_detect_local_cpu): Detect cascadelake. > * config/i386/i386-c.c (ix86_target_macros_internal): Handle > cascadelake. > * config/i386/i386.c (ix86_cost): Add m_CASCADELAKE. > (processor_cost_table): Add cascadelake. > (get_builtin_code_for_version): Handle cascadelake. > (fold_builtin_cpu): Ditto. > * config/i386/i386.h (TARGET_CASCADELAKE, PROCESSOR_CASCADELAKE): New. > (PTA_CASCADELAKE): Ditto. > * doc/invoke.texi: Add -march=cascadelake. > gcc/testsuite/ > * g++.target/i386/mv16.C: Handle new march. > * gcc.target/i386/funcspec-56.inc" Ditto. > libgcc/ > * config/i386/cpuinfo.h: Add INTEL_COREI7_CASCADELAKE. Updated patch is OK for mainline. Thanks, Uros.
Re: [C++ PATCH] Improve locations of id-expressions and operator "" (PR c++/87386, take 3)
In file included from ../../gcc/c-family/c-common.h:26:0, from ../../gcc/cp/cp-tree.h:40, from ../../gcc/cp/parser.c:25: ../../gcc/cp/parser.c: In function 'cp_expr cp_parser_operator(cp_parser*, location_t)': ../../gcc/tree.h:383:33: error: invalid cast from type 'cp_expr' to type 'tree_userdef_literal*' #define TREE_CHECK(T, CODE) (T) ^ ./tree-check.h:234:34: note: in expansion of macro 'TREE_CHECK' #define USERDEF_LITERAL_CHECK(t) TREE_CHECK (t, USERDEF_LITERAL) ^ ../../gcc/c-family/c-common.h:1231:36: note: in expansion of macro 'USERDEF_LITERAL_CHECK' (((struct tree_userdef_literal *)USERDEF_LITERAL_CHECK (NODE))->value) ^ ../../gcc/cp/parser.c:15309:20: note: in expansion of macro 'USERDEF_LITERAL_VALUE' string_tree = USERDEF_LITERAL_VALUE (str); ^ ../../gcc/tree.h:383:33: error: invalid cast from type 'cp_expr' to type 'tree_userdef_literal*' #define TREE_CHECK(T, CODE) (T) ^ ./tree-check.h:234:34: note: in expansion of macro 'TREE_CHECK' #define USERDEF_LITERAL_CHECK(t) TREE_CHECK (t, USERDEF_LITERAL) ^ ../../gcc/c-family/c-common.h:1228:36: note: in expansion of macro 'USERDEF_LITERAL_CHECK' (((struct tree_userdef_literal *)USERDEF_LITERAL_CHECK (NODE))->suffix_id) ^ ../../gcc/cp/parser.c:15310:11: note: in expansion of macro 'USERDEF_LITERAL_SUFFIX_ID' id = USERDEF_LITERAL_SUFFIX_ID (str); ^ Andreas. -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."
Re: [PATCH][RFC] Extend locations where to seach for Fortran pre-include.
On 11/20/18 7:11 PM, Joseph Myers wrote: > On Tue, 20 Nov 2018, Jakub Jelinek wrote: > >> hardcoding /usr/include looks just very wrong here. That should always be >> dependent on the configured prefix or better be relative from the driver, >> gcc should be relocatable. Or at least come from configure. It should e.g. >> honor the sysroot stuff etc. >> >> That said, I think you need somebody familiar with the driver, perhaps >> Joseph? Hi Joseph. > > I'd sort of expect structures like those in cppdefault.[ch] to describe > the relevant Fortran directories and their properties (such as being > sysrooted or not - and if sysrooted, I suppose you'll want to make sure > SYSROOT_HEADERS_SUFFIX_SPEC is properly applied). We probably forgot to mention that the search mechanism happens in GCC driver. It's because we want to include the file (via a Fortran -fpre-include) conditionally only when found. > > If this preinclude doesn't pass through the C preprocessor, directories in > which it is searched for will need multilib or multiarch suffixes. No, it's not C preprocessor. > (Multilib suffixes on include directories for C are more or less an > implementation detail of how fixed headers are arranged in the case where > sysroot headers suffixes are used; they aren't really expected to be a > stable interface such that third-party software might install anything > using them, but I'm not sure if this preinclude is meant to come from > external software or be installed by GCC. It will come from glibc-devel package, and it's expected to be installed in: usr/include/finclude/ for 64-bit header and /usr/include/finclude/32/ for the 32-bit header. Multiarch suffixes, for systems > using Debian/Ubuntu-style multiarch directory arrangements, *are* intended > as a stable interface. And multilib *OS* suffixes > (-print-multi-os-directory) are a stable interface, but only really > suitable for libraries, not headers, because they are paths relative to > lib/ such as ../lib64.) If I understand correctly, Jakub wanted to have it working with -m32 being used for 64-bit GCC compiler. So yes, multi os for header files. So the question is what can we really do in the GCC driver? Martin
Re: [C++ PATCH] Improve locations of id-expressions and operator "" (PR c++/87386, take 3)
On Thu, Nov 22, 2018 at 09:53:13AM +0100, Andreas Schwab wrote: > In file included from ../../gcc/c-family/c-common.h:26:0, > from ../../gcc/cp/cp-tree.h:40, > from ../../gcc/cp/parser.c:25: > ../../gcc/cp/parser.c: In function 'cp_expr cp_parser_operator(cp_parser*, > location_t)': > ../../gcc/tree.h:383:33: error: invalid cast from type 'cp_expr' to type > 'tree_userdef_literal*' > #define TREE_CHECK(T, CODE) (T) > ^ Is that with --enable-checking=release or something similar? Built just fine for me with normal --enable-checking=yes,rtl,extra. Jakub
[PATCH, libfortran] PR 88137 Initialize backtrace state once
>From backtrace.h for backtrace_create_state: Calling this function allocates resources that can not be freed. There is no backtrace_free_state function. The state is used to cache information that is expensive to recompute. Programs are expected to call this function at most once and to save the return value for all later calls to backtrace functions. So instead of calling backtrace_create_state every time we wish to show a backtrace, do it once and store the result in a static variable. libbacktrace allows multiple threads to access the state, so no need to use TLS. Regtested on x86_64-pc-linux-gnu, Ok for trunk/8/7? libgfortran/ChangeLog: 2018-11-22 Janne Blomqvist PR libfortran/88137 * runtime/backtrace.c (show_backtrace): Make lbstate a static variable, initialize once. --- libgfortran/runtime/backtrace.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/libgfortran/runtime/backtrace.c b/libgfortran/runtime/backtrace.c index e0c277044b6..3fc973a5e6d 100644 --- a/libgfortran/runtime/backtrace.c +++ b/libgfortran/runtime/backtrace.c @@ -146,11 +146,15 @@ full_callback (void *data, uintptr_t pc, const char *filename, void show_backtrace (bool in_signal_handler) { - struct backtrace_state *lbstate; + /* Note that libbacktrace allows the state to be accessed from + multiple threads, so we don't need to use a TLS variable for the + state here. */ + static struct backtrace_state *lbstate; struct mystate state = { 0, false, in_signal_handler }; - - lbstate = backtrace_create_state (NULL, __gthread_active_p (), - error_callback, NULL); + + if (!lbstate) +lbstate = backtrace_create_state (NULL, __gthread_active_p (), + error_callback, NULL); if (lbstate == NULL) return; -- 2.17.1
Re: [PATCH, libstdc++] Implement P0415 More constexpr for std::complex.
On 20/11/18 17:58 -0500, Ed Smith-Rowland wrote: On 11/19/18 6:13 AM, Jonathan Wakely wrote: On 16/11/18 19:39 -0500, Ed Smith-Rowland wrote: @@ -322,67 +323,43 @@ //@{ /// Return new complex value @a x plus @a y. template - inline complex<_Tp> + inline _GLIBCXX20_CONSTEXPR complex<_Tp> operator+(const complex<_Tp>& __x, const complex<_Tp>& __y) - { - complex<_Tp> __r = __x; - __r += __y; - return __r; - } + { return complex<_Tp>(__x.real() + __y.real(), __x.imag() + __y.imag()); } Is this change (and all the similar ones) really needed? Doesn't the fact that all the constructors and member operators of std::complex mean that the original definition is also valid in a constexpr function? These changes are rolled back. Sorry. @@ -1163,50 +1143,43 @@ #endif template - complex& + _GLIBCXX20_CONSTEXPR complex& operator=(const complex<_Tp>& __z) { - __real__ _M_value = __z.real(); - __imag__ _M_value = __z.imag(); + _M_value = __z.__rep(); These changes look OK, but I wonder if we shouldn't ask the compiler to make it possible to use __real__ and __imag__ in constexpr functions instead. I assume it doesn't, and that's why you made this change. But if it Just Worked, and the other changes I commented on above are also unnecessary, then this patch would *mostly* just be adding _GLIBCXX20_CONSTEXPR which is OK for stage 3 (as it doesn't affect any dialects except C++2a). Yes, this is the issue. I agree that constexpr _real__, __imag__would be better. Do you have any idea where this change would be? I grepped around a little and couldn't figure it out. if you don't I'll look more. No idea, sorry. Actually, looking at constexpr.c it looks like the old way ought to work... OK, plain assignment works but not the others. Interesting. @@ -1872,7 +1831,7 @@ { return _Tp(); } template - inline typename __gnu_cxx::__promote<_Tp>::__type + _GLIBCXX_CONSTEXPR inline typename __gnu_cxx::__promote<_Tp>::__type This should be _GLIBCXX20_CONSTEXPR. Done. Index: testsuite/26_numerics/complex/comparison_operators/more_constexpr.cc === --- testsuite/26_numerics/complex/comparison_operators/more_constexpr.cc (nonexistent) +++ testsuite/26_numerics/complex/comparison_operators/more_constexpr.cc (working copy) @@ -0,0 +1,51 @@ +// { dg-do compile { target c++2a } } All the tests with { target c++2a} should also have: // { dg-options "-std=gnu++2a" } Because otherwise they are skipped by default, and only get run when RUNTESTFLAGS explicitly includes something like --target_board=unix/-std=gnu++2a The dg-options needs to come first, or it doesn't apply before the check for { target c++2a }. Thank you, done. OK for trunk, thanks. Updated patch attached. I'd like to understand why __real__ _M_value += __z.real(); doesn't work though. Yes, I agree it should. If you don't figure it out please file a bug requesting that it works, so somebody else might look into it.
Re: [C++ PATCH] Improve locations of id-expressions and operator "" (PR c++/87386, take 3)
On Nov 22 2018, Jakub Jelinek wrote: > Is that with --enable-checking=release Yes. Andreas. -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."
Re: Improve relocation
On 26/10/18 14:02 +0200, Marc Glisse wrote: Index: libstdc++-v3/include/bits/stl_iterator.h === --- libstdc++-v3/include/bits/stl_iterator.h(revision 265522) +++ libstdc++-v3/include/bits/stl_iterator.h(working copy) @@ -59,20 +59,24 @@ #ifndef _STL_ITERATOR_H #define _STL_ITERATOR_H 1 #include #include #include #include #if __cplusplus > 201402L I think this should be >= 201103L, no? OK for trunk (with that #if changed, if appropriate). Thanks.
Re: [C++ PATCH] Improve locations of id-expressions and operator "" (PR c++/87386, take 3)
On Thu, Nov 22, 2018 at 10:21:53AM +0100, Andreas Schwab wrote: > On Nov 22 2018, Jakub Jelinek wrote: > > > Is that with --enable-checking=release > > Yes. Ok, reproduced myself, fixed thusly, committed to unbreak it. cp_expr has operator tree () const { return m_value; } tree & operator* () { return m_value; } tree operator* () const { return m_value; } tree & operator-> () { return m_value; } tree operator-> () const { return m_value; } so it does the right thing most of the time, like with: TREE_CHECK (NODE, something)->something_else even if #define TREE_CHECK(NODE) NODE But in this case we have: #define USERDEF_LITERAL_VALUE(NODE) \ (((struct tree_userdef_literal *)USERDEF_LITERAL_CHECK (NODE))->value) and so it works only if TREE_CHECK expands to something that forces the cast to tree. 2018-11-22 Jakub Jelinek PR c++/87386 * parser.c (cp_parser_operator): Use str.get_value () instead of just str in USERDEF_LITERAL_VALUE and USERDEF_LITERAL_SUFFIX_ID arguments. --- gcc/cp/parser.c.jj 2018-11-21 23:40:10.999140630 +0100 +++ gcc/cp/parser.c 2018-11-22 10:19:47.384240264 +0100 @@ -15306,8 +15306,8 @@ cp_parser_operator (cp_parser* parser, l return error_mark_node; else if (TREE_CODE (str) == USERDEF_LITERAL) { - string_tree = USERDEF_LITERAL_VALUE (str); - id = USERDEF_LITERAL_SUFFIX_ID (str); + string_tree = USERDEF_LITERAL_VALUE (str.get_value ()); + id = USERDEF_LITERAL_SUFFIX_ID (str.get_value ()); end_loc = str.get_location (); } else Jakub
Re: [PATCH] Alternate fix for PR87229, fix PR88112
On Wed, 21 Nov 2018, Richard Biener wrote: > > My previous fix for PR87229 was too aggressive. The following > simply teaches the LTO streamer to deal with CALL_EXPRs, support > for which was already in place. I've amended it with two > missing pieces, streaming of CALL_EXPR_BY_DESCRIPTOR and CALL_EXPR_IFN. > > LTO bootstrapped and tested on x86_64-unknown-linux-gnu with Ada enabled. > > Any objections? As said elsewhere I'm looking for sth that is > reasonably safe for GCC 8 as well given the PR is a regression there. Now applied. Richard. > Richard. > > 2018-11-21 Richard Biener > > PR lto/87229 > PR lto/88112 > * lto-streamer-out.c (lto_is_streamable): Allow CALL_EXPRs > which can appear in size expressions. > * tree-streamer-in.c (unpack_ts_base_value_fields): Stream > CALL_EXPR_BY_DESCRIPTOR. > (streamer_read_tree_bitfields): Stream CALL_EXPR_IFN. > * tree-streamer-out.c (pack_ts_base_value_fields): Stream > CALL_EXPR_BY_DESCRIPTOR. > (streamer_write_tree_bitfields): Stream CALL_EXPR_IFN. > > Revert > PR lto/87229 > * tree.c (free_lang_data_in_one_sizepos): Free non-gimple-val > sizepos values. > > > Index: gcc/lto-streamer-out.c > === > --- gcc/lto-streamer-out.c(revision 266308) > +++ gcc/lto-streamer-out.c(working copy) > @@ -306,7 +306,6 @@ lto_is_streamable (tree expr) > name version in lto_output_tree_ref (see output_ssa_names). */ >return !is_lang_specific (expr) >&& code != SSA_NAME > - && code != CALL_EXPR >&& code != LANG_TYPE >&& code != MODIFY_EXPR >&& code != INIT_EXPR > Index: gcc/tree-streamer-in.c > === > --- gcc/tree-streamer-in.c(revision 266308) > +++ gcc/tree-streamer-in.c(working copy) > @@ -158,6 +158,11 @@ unpack_ts_base_value_fields (struct bitp >SSA_NAME_IS_DEFAULT_DEF (expr) = (unsigned) bp_unpack_value (bp, 1); >bp_unpack_value (bp, 8); > } > + else if (TREE_CODE (expr) == CALL_EXPR) > +{ > + CALL_EXPR_BY_DESCRIPTOR (expr) = (unsigned) bp_unpack_value (bp, 1); > + bp_unpack_value (bp, 8); > +} >else > bp_unpack_value (bp, 9); > } > @@ -521,6 +526,8 @@ streamer_read_tree_bitfields (struct lto > MR_DEPENDENCE_BASE (expr) > = (unsigned)bp_unpack_value (&bp, sizeof (short) * 8); > } > + else if (code == CALL_EXPR) > + CALL_EXPR_IFN (expr) = bp_unpack_enum (&bp, internal_fn, IFN_LAST); > } > >if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) > Index: gcc/tree-streamer-out.c > === > --- gcc/tree-streamer-out.c (revision 266308) > +++ gcc/tree-streamer-out.c (working copy) > @@ -129,6 +129,11 @@ pack_ts_base_value_fields (struct bitpac >bp_pack_value (bp, SSA_NAME_IS_DEFAULT_DEF (expr), 1); >bp_pack_value (bp, 0, 8); > } > + else if (TREE_CODE (expr) == CALL_EXPR) > +{ > + bp_pack_value (bp, CALL_EXPR_BY_DESCRIPTOR (expr), 1); > + bp_pack_value (bp, 0, 8); > +} >else > bp_pack_value (bp, 0, 9); > } > @@ -457,6 +462,8 @@ streamer_write_tree_bitfields (struct ou > if (MR_DEPENDENCE_CLIQUE (expr) != 0) > bp_pack_value (&bp, MR_DEPENDENCE_BASE (expr), sizeof (short) * 8); > } > + else if (code == CALL_EXPR) > + bp_pack_enum (&bp, internal_fn, IFN_LAST, CALL_EXPR_IFN (expr)); > } > >if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) > Index: gcc/tree.c > === > --- gcc/tree.c(revision 266308) > +++ gcc/tree.c(working copy) > @@ -5254,13 +5254,6 @@ free_lang_data_in_one_sizepos (tree *exp >tree expr = *expr_p; >if (CONTAINS_PLACEHOLDER_P (expr)) > *expr_p = build0 (PLACEHOLDER_EXPR, TREE_TYPE (expr)); > - /* ??? We have to reset all non-GIMPLE sizepos because those eventually > - refer to trees we cannot stream. See for example PR87229 which > - shows an example with non-gimplified abstract origins in C++. > - Note this should only happen for abstract copies so setting sizes > - to NULL is OK (but we cannot easily assert this). */ > - else if (expr && !is_gimple_val (expr)) > -*expr_p = NULL_TREE; > } > > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
[PATCH] Add -fpad-source option
Hi! In the -fdec-include thread, Mark raised privately that some compilers (verified on fortran.godbolt.org for ifort only myself) do not pad fixed-form lines to the line length like gfortran does. Usually it makes no difference, but if a string constant is continued, it does (including when it appears in include string). We don't pad only with -ffixed-line-length-{0,none}, but that has other effects, like not ignoring characters in column 73+, which is needed for certain sources, e.g. if an include file is written with fixed as well as free form compatible line continuations. ifort apparently has a -pad-source option which defaults to -nopad-source. This patch introduces similar option, just with a different default (the one that we default to). What is the behavior of DEC fortran, Sun/Oracle fortran and others? If not padding is common to the DEC fortran derived compilers, perhaps -fdec might imply -fno-pad-source ? Ok for trunk if it passes bootstrap/regtest (so far just checked with make check-gfortran RUNTESTFLAGS='dg.exp=pad_source*' )? 2018-11-22 Jakub Jelinek * lang.opt (fpad-source): New option. * scanner.c (load_line): Don't pad fixed form lines if !flag_pad_source. * invoke.texi (-fno-pad-source): Document. * gfortran.dg/pad_source_1.f: New test. * gfortran.dg/pad_source_2.f: New test. * gfortran.dg/pad_source_3.f: New test. * gfortran.dg/pad_source_4.f: New test. * gfortran.dg/pad_source_5.f: New test. --- gcc/fortran/lang.opt.jj 2018-11-21 09:06:39.937688966 +0100 +++ gcc/fortran/lang.opt2018-11-22 09:23:43.090852471 +0100 @@ -536,6 +536,10 @@ ffixed-line-length- Fortran RejectNegative Joined UInteger Var(flag_fixed_line_length) Init(72) -ffixed-line-length-Use n as character line width in fixed mode. +fpad-source +Fortran Var(flag_pad_source) Init(1) +Pad shorter fixed form lines to line width with spaces. + ffpe-trap= Fortran RejectNegative JoinedOrMissing -ffpe-trap=[...] Stop on following floating point exceptions. --- gcc/fortran/scanner.c.jj2018-11-21 09:06:39.946688820 +0100 +++ gcc/fortran/scanner.c 2018-11-22 09:33:51.861790813 +0100 @@ -1924,6 +1924,7 @@ next_char: /* Pad lines to the selected line length in fixed form. */ if (gfc_current_form == FORM_FIXED && flag_fixed_line_length != 0 + && flag_pad_source && !preprocessor_flag && c != EOF) { --- gcc/fortran/invoke.texi.jj 2018-11-21 21:06:01.869189443 +0100 +++ gcc/fortran/invoke.texi 2018-11-22 10:07:55.285011379 +0100 @@ -121,7 +121,7 @@ by type. Explanations are in the follow -fdec -fdec-structure -fdec-intrinsic-ints -fdec-static -fdec-math @gol -fdec-include -fdefault-double-8 -fdefault-integer-8 -fdefault-real-8 @gol -fdefault-real-10 -fdefault-real-16 -fdollar-ok -ffixed-line-length-@var{n} @gol --ffixed-line-length-none -ffree-form -ffree-line-length-@var{n} @gol +-ffixed-line-length-none -fpad-source -ffree-form -ffree-line-length-@var{n} @gol -ffree-line-length-none -fimplicit-none -finteger-4-integer-8 @gol -fmax-identifier-length -fmodule-private -ffixed-form -fno-range-check @gol -fopenacc -fopenmp -freal-4-real-10 -freal-4-real-16 -freal-4-real-8 @gol @@ -321,8 +321,9 @@ declared as @code{PUBLIC}. @opindex @code{ffixed-line-length-}@var{n} @cindex file format, fixed Set column after which characters are ignored in typical fixed-form -lines in the source file, and through which spaces are assumed (as -if padded to that length) after the ends of short fixed-form lines. +lines in the source file, and, unless @code{-fno-pad-source}, through which +spaces are assumed (as if padded to that length) after the ends of short +fixed-form lines. Popular values for @var{n} include 72 (the standard and the default), 80 (card image), and 132 (corresponding @@ -333,6 +334,15 @@ to them to fill out the line. @option{-ffixed-line-length-0} means the same thing as @option{-ffixed-line-length-none}. +@item -fno-pad-source +@opindex @code{fpad-source} +By default fixed-form lines have spaces assumed (as if padded to that length) +after the ends of short fixed-form lines. This is not done either if +@option{-ffixed-line-length-0}, @option{-ffixed-line-length-none} or +if @option{-fno-pad-source} option is used. With any of those options +continued character constants never have implicit spaces appended +to them to fill out the line. + @item -ffree-line-length-@var{n} @opindex @code{ffree-line-length-}@var{n} @cindex file format, free --- gcc/testsuite/gfortran.dg/pad_source_1.f.jj 2018-11-22 09:43:45.952971238 +0100 +++ gcc/testsuite/gfortran.dg/pad_source_1.f2018-11-22 09:48:35.933177267 +0100 @@ -0,0 +1,8 @@ +c { dg-do run } +c { dg-skip-if "non-standard options" { *-*-* } { "-ffixed-line-length*" "-f*pad-source" } } + character(80) a + a = 'abc + +def' + if (a(:61) .ne. 'abc') stop 1 + if (a(62:) .ne. 'def') stop
Re: Stream TREE_TYPE of TYPE_DECLs again
* g++.dg/lto/odr-2_0.C: Remove extra brace diff --git a/gcc/testsuite/g++.dg/lto/odr-2_0.C b/gcc/testsuite/g++.dg/lto/odr-2_0.C index 222fa2c1db..3ebb49efa2 100644 --- a/gcc/testsuite/g++.dg/lto/odr-2_0.C +++ b/gcc/testsuite/g++.dg/lto/odr-2_0.C @@ -1,5 +1,5 @@ // { dg-lto-do link } -// { dg-lto-options { { -O0 -flto } } +// { dg-lto-options { -O0 -flto } } enum a {} b; // { dg-lto-warning "6: type 'a' violates the C\\+\\+ One Definition Rule" } int main(void) -- 2.19.1 -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."
Re: PR fortran/87919 patch for -fno-dec-structure
On Mon, Nov 12, 2018 at 03:52:42PM -0500, Fritz Reese wrote: > On Mon, Nov 12, 2018 at 3:42 PM Jakub Jelinek wrote: > > Ok, so I'll ack it for trunk now, but please give the other Fortran > > maintainers one day to disagree before committing. > > For the release branches, I'd wait two weeks or so before backporting it. > > > > Roger that. I'll happily give it some time. Thanks for looking it over. I think more than enough time passed, do you plan to commit to trunk now? Note, small adjustment will be needed for the addition of flag_dec_include in set_dec_flags. Jakub
Re: [PATCH] x86: Add -march=cascadelake
On 11/22/18 8:07 AM, Wei Xiao wrote: > Jakub, > > Thanks for the comments! > I have addressed them as attached. Hi. Can you please add the new march into: - gcc/doc/extend.texi:20530 - gcc/testsuite/gcc.target/i386/builtin_target.c - test it here Thanks, Martin
Re: [PATCH] x86: Add -march=cascadelake
On Thu, Nov 22, 2018 at 12:48 PM Martin Liška wrote: > > On 11/22/18 8:07 AM, Wei Xiao wrote: > > Jakub, > > > > Thanks for the comments! > > I have addressed them as attached. > > Hi. > > Can you please add the new march into: > > - gcc/doc/extend.texi:20530 > - gcc/testsuite/gcc.target/i386/builtin_target.c - test it here IIRC, family and model numbers are not yet known for latest processors. Uros.
Re: [PATCH] x86: Add -march=cascadelake
On Thu, Nov 22, 2018 at 1:03 PM Uros Bizjak wrote: > > > Thanks for the comments! > > > I have addressed them as attached. > > > > Hi. > > > > Can you please add the new march into: > > > > - gcc/doc/extend.texi:20530 > > - gcc/testsuite/gcc.target/i386/builtin_target.c - test it here > > IIRC, family and model numbers are not yet known for latest processors. However, something can be found at [1]. [1] https://en.wikichip.org/wiki/intel/cpuid Uros.
Re: [PATCH] add simple attribute introspection
On Sat, 10 Nov 2018 at 00:43, Martin Sebor wrote: > > On 11/09/2018 12:59 PM, Jeff Law wrote: > > On 10/31/18 10:27 AM, Martin Sebor wrote: > >> Ping: https://gcc.gnu.org/ml/gcc-patches/2018-10/msg01473.html > >> > >> With the C++ bits approved I'm still looking for a review/approval > >> of the remaining changes: the C front end and the shared c-family > >> handling of the new built-in. > > I thought I acked those with just a couple minor doc nits: > > I don't see a formal approval for the rest in my Inbox or in > the archive. > > >> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > >> index 8ffb0cd..dcf4747 100644 > >> --- a/gcc/doc/extend.texi > >> +++ b/gcc/doc/extend.texi > >> @@ -2649,8 +2649,9 @@ explicit @code{externally_visible} attributes > > are still necessary. > >> @cindex @code{flatten} function attribute > >> Generally, inlining into a function is limited. For a function > > marked with > >> this attribute, every call inside this function is inlined, if possible. > >> -Whether the function itself is considered for inlining depends on its > > size and > >> -the current inlining parameters. > >> +Functions declared with attribute @code{noinline} and similar are not > >> +inlined. Whether the function itself is considered for inlining depends > >> +on its size and the current inlining parameters. > > Guessing this was from another doc patch that I think has already been > > approved > > Yes. It shouldn't be in the latest patch at the link above. > > >> @@ -11726,6 +11728,33 @@ check its compatibility with @var{size}. > >> > >> @end deftypefn > >> > >> +@deftypefn {Built-in Function} bool __builtin_has_attribute > > (@var{type-or-expression}, @var{attribute}) > >> +The @code{__builtin_has_attribute} function evaluates to an integer > > constant > >> +expression equal to @code{true} if the symbol or type referenced by > >> +the @var{type-or-expression} argument has been declared with > >> +the @var{attribute} referenced by the second argument. Neither argument > >> +is valuated. The @var{type-or-expression} argument is subject to the > > same > > s/valuated/evaluated/ ? > > This should also be fixed in the latest patch at the link above. > > > Did the implementation change significantly requiring another review > > iteration? > > I don't think it changed too significantly between the last two > revisions but I don't have a record of anyone having approved > the C FE and the middle-end bits. (Sorry if I missed it.) Other > than this response from you all I see in the archive is this: > >https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00606.html > > Please let me if the last revision is okay to commit. > Hi, It seems you committed this yesterday as r266335, and I have noticed new failures: on both aarch64 and arm: FAIL: c-c++-common/builtin-has-attribute-3.c -Wc++-compat (test for excess errors) gcc.log says: Excess errors: /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:11:25: error: alignment for 'faligned_1' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:12:25: error: alignment for 'faligned_2' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:39:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:40:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:47:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:48:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:50:3: error: size of array 'Assert' is negative /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:52:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:52:3: error: size of array 'Assert' is negative /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:53:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:56:3: error: size of array 'Assert' is negative /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:58:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:59:3: error: alignment for '__builtin_has_attribute_tmp.' must be at least 4 /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:59:3: error: size of array 'Assert' is negative on arm only: gcc.dg/builtin-has-attribute.c (test for excess errors) gdb.log says: Excess errors: /gcc/testsuite/gcc.dg/builtin-has-attribute.c:12:15: error: size of array 'Assert' is negative Christophe > Thanks > Martin
[PATCH 1/2][libbacktrace] Handle realloc returning NULL if size == 0
Hi, If realloc is called with size 0, realloc can return NULL. When this happens in the backtrace_vector_release in alloc.c, the error callback is called, which should not be the case. Fix this by testing for size == 0 before calling the error callback. Build and tested on x86_64, with mmap.c replaced by alloc.c to ensure that backtrace_vector_release in alloc.c is tested. OK for trunk if bootstrap and reg-test on x86_64 succeeds? Thanks, - Tom [libbacktrace] Handle realloc returning NULL if size == 0 2018-11-22 Tom de Vries * Makefile.am (check_PROGRAMS): Add unittest. * Makefile.in: Regenerate. * alloc.c (backtrace_vector_release): Handle realloc returning NULL if * size == 0. * unittest.c: New file. --- libbacktrace/Makefile.am | 5 +++ libbacktrace/Makefile.in | 25 ++--- libbacktrace/alloc.c | 2 +- libbacktrace/unittest.c | 92 4 files changed, 119 insertions(+), 5 deletions(-) diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am index 3c1bd49dd7b..a2111ee7f67 100644 --- a/libbacktrace/Makefile.am +++ b/libbacktrace/Makefile.am @@ -90,6 +90,11 @@ TESTS = $(check_PROGRAMS) if NATIVE +unittest_SOURCES = unittest.c testlib.c +unittest_LDADD = libbacktrace.la + +check_PROGRAMS += unittest + btest_SOURCES = btest.c testlib.c btest_CFLAGS = $(AM_CFLAGS) -g -O btest_LDADD = libbacktrace.la diff --git a/libbacktrace/Makefile.in b/libbacktrace/Makefile.in index 60a9d887dba..2d62ce20b9a 100644 --- a/libbacktrace/Makefile.in +++ b/libbacktrace/Makefile.in @@ -121,7 +121,7 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) -@NATIVE_TRUE@am__append_1 = btest stest ztest edtest +@NATIVE_TRUE@am__append_1 = unittest btest stest ztest edtest @HAVE_ZLIB_TRUE@@NATIVE_TRUE@am__append_2 = -lz @HAVE_PTHREAD_TRUE@@NATIVE_TRUE@am__append_3 = ttest @HAVE_OBJCOPY_DEBUGLINK_TRUE@@NATIVE_TRUE@am__append_4 = dtest @@ -158,8 +158,8 @@ AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = -@NATIVE_TRUE@am__EXEEXT_1 = btest$(EXEEXT) stest$(EXEEXT) \ -@NATIVE_TRUE@ ztest$(EXEEXT) edtest$(EXEEXT) +@NATIVE_TRUE@am__EXEEXT_1 = unittest$(EXEEXT) btest$(EXEEXT) \ +@NATIVE_TRUE@ stest$(EXEEXT) ztest$(EXEEXT) edtest$(EXEEXT) @HAVE_PTHREAD_TRUE@@NATIVE_TRUE@am__EXEEXT_2 = ttest$(EXEEXT) @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@am__EXEEXT_3 = \ @HAVE_COMPRESSED_DEBUG_TRUE@@NATIVE_TRUE@ ctestg$(EXEEXT) \ @@ -202,6 +202,10 @@ ttest_OBJECTS = $(am_ttest_OBJECTS) ttest_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(ttest_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ +@NATIVE_TRUE@am_unittest_OBJECTS = unittest.$(OBJEXT) \ +@NATIVE_TRUE@ testlib.$(OBJEXT) +unittest_OBJECTS = $(am_unittest_OBJECTS) +@NATIVE_TRUE@unittest_DEPENDENCIES = libbacktrace.la @NATIVE_TRUE@am_ztest_OBJECTS = ztest-ztest.$(OBJEXT) \ @NATIVE_TRUE@ ztest-testlib.$(OBJEXT) ztest_OBJECTS = $(am_ztest_OBJECTS) @@ -246,7 +250,7 @@ am__v_CCLD_1 = SOURCES = $(libbacktrace_la_SOURCES) $(EXTRA_libbacktrace_la_SOURCES) \ $(btest_SOURCES) $(ctesta_SOURCES) $(ctestg_SOURCES) \ $(edtest_SOURCES) $(stest_SOURCES) $(ttest_SOURCES) \ - $(ztest_SOURCES) + $(unittest_SOURCES) $(ztest_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -655,6 +659,8 @@ libbacktrace_la_LIBADD = \ libbacktrace_la_DEPENDENCIES = $(libbacktrace_la_LIBADD) TESTS = $(check_PROGRAMS) $(am__append_4) +@NATIVE_TRUE@unittest_SOURCES = unittest.c testlib.c +@NATIVE_TRUE@unittest_LDADD = libbacktrace.la @NATIVE_TRUE@btest_SOURCES = btest.c testlib.c @NATIVE_TRUE@btest_CFLAGS = $(AM_CFLAGS) -g -O @NATIVE_TRUE@btest_LDADD = libbacktrace.la @@ -800,6 +806,10 @@ ttest$(EXEEXT): $(ttest_OBJECTS) $(ttest_DEPENDENCIES) $(EXTRA_ttest_DEPENDENCIE @rm -f ttest$(EXEEXT) $(AM_V_CCLD)$(ttest_LINK) $(ttest_OBJECTS) $(ttest_LDADD) $(LIBS) +unittest$(EXEEXT): $(unittest_OBJECTS) $(unittest_DEPENDENCIES) $(EXTRA_unittest_DEPENDENCIES) + @rm -f unittest$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(unittest_OBJECTS) $(unittest_LDADD) $(LIBS) + ztest$(EXEEXT): $(ztest_OBJECTS) $(ztest_DEPENDENCIES) $(EXTRA_ztest_DEPENDENCIES) @rm -f ztest$(EXEEXT) $(AM_V_CCLD)$(ztest_LINK) $(ztest_OBJECTS) $(ztest_LDADD) $(LIBS) @@ -1088,6 +1098,13 @@ recheck: all $(check_PROGRAMS) am__force_recheck=am--force-recheck \ TEST_LOGS="$$log_list"; \ exit $$? +unittest.log: unittest$(EXEEXT) + @p='unittest$(EXEEXT)'; \ + b='unittest'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $
[PATCH 2/2][libbacktrace] Don't point to released memory in backtrace_vector_release
Hi, When backtrace_vector_release is called with vec.size == 0, it releases the memory pointed at by vec.base. In case of the backtrace_vector_release in alloc.c, vec.base may then be set to NULL, but this is not guaranteed. Set vec.base set to NULL if vec.size == 0 to ensure we don't point to released memory. OK for trunk if bootstrap and reg-test on x86_64 succeeds? Thanks, - Tom [libbacktrace] Don't point to released memory in backtrace_vector_release 2018-11-22 Tom de Vries * alloc.c (backtrace_vector_release): Set base to NULL if size == 0. * mmap.c (backtrace_vector_release): Same. * unittest.c (test1): Add check. --- libbacktrace/alloc.c| 2 ++ libbacktrace/mmap.c | 2 ++ libbacktrace/unittest.c | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/libbacktrace/alloc.c b/libbacktrace/alloc.c index 2f7ad956088..fb1e754788b 100644 --- a/libbacktrace/alloc.c +++ b/libbacktrace/alloc.c @@ -152,5 +152,7 @@ backtrace_vector_release (struct backtrace_state *state ATTRIBUTE_UNUSED, return 0; } vec->alc = 0; + if (vec->size == 0) +vec->base = NULL; return 1; } diff --git a/libbacktrace/mmap.c b/libbacktrace/mmap.c index 32fcba62399..9f896a1bb99 100644 --- a/libbacktrace/mmap.c +++ b/libbacktrace/mmap.c @@ -321,5 +321,7 @@ backtrace_vector_release (struct backtrace_state *state, backtrace_free (state, (char *) vec->base + aligned, alc, error_callback, data); vec->alc = 0; + if (vec->size == 0) +vec->base = NULL; return 1; } diff --git a/libbacktrace/unittest.c b/libbacktrace/unittest.c index 576aa080935..6c07aff91ee 100644 --- a/libbacktrace/unittest.c +++ b/libbacktrace/unittest.c @@ -58,6 +58,7 @@ test1 (void) { int res; int failed; + void *prev; struct backtrace_vector vec; @@ -68,8 +69,9 @@ test1 (void) vec.size = 0; count = 0; + prev = vec.base; res = backtrace_vector_release (state, &vec, error_callback, NULL); - failed = res != 1 || count != 0; + failed = res != 1 || count != 0 || vec.base != NULL; printf ("%s: unittest backtrace_vector_release size == 0\n", failed ? "FAIL": "PASS");
Re: [RS6000] num_insns_constant ICE
On Tue, Nov 20, 2018 at 06:07:41PM +1030, Alan Modra wrote: [snip] > alternative. movdi_internal32 used "GHF" in an alternative (and > always selected 64 byte insn length) so I replaced that with "F". Huh, no, the insn didn't have length attributes. > The FMOVE128 version of mov_softfloat also had "GHF" in an > alternative but from what I see in rs6000_emit_move, I believe TFmode > insns loading constants will be splt. This isn't true either. The split happens depending on altivec, and in fact when the split occurs we get poor code. > So this insn doesn't need to > handle FP constants (it also doesn't need an iterator as only TFmode > will be selected - not fixed with this patch). Which means this is only half true. By removing "F", I effectively forced all soft-float long double constants to memory. The patch has been revised and broken into a number of pieces. I'll post tomorrow. -- Alan Modra Australia Development Lab, IBM
Re: [PATCH] add simple attribute introspection
Hi Christophe, > On Sat, 10 Nov 2018 at 00:43, Martin Sebor wrote: >> >> On 11/09/2018 12:59 PM, Jeff Law wrote: >> > On 10/31/18 10:27 AM, Martin Sebor wrote: >> >> Ping: https://gcc.gnu.org/ml/gcc-patches/2018-10/msg01473.html >> >> >> >> With the C++ bits approved I'm still looking for a review/approval >> >> of the remaining changes: the C front end and the shared c-family >> >> handling of the new built-in. >> > I thought I acked those with just a couple minor doc nits: >> >> I don't see a formal approval for the rest in my Inbox or in >> the archive. >> >> >> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi >> >> index 8ffb0cd..dcf4747 100644 >> >> --- a/gcc/doc/extend.texi >> >> +++ b/gcc/doc/extend.texi >> >> @@ -2649,8 +2649,9 @@ explicit @code{externally_visible} attributes >> > are still necessary. >> >> @cindex @code{flatten} function attribute >> >> Generally, inlining into a function is limited. For a function >> > marked with >> >> this attribute, every call inside this function is inlined, if possible. >> >> -Whether the function itself is considered for inlining depends on its >> > size and >> >> -the current inlining parameters. >> >> +Functions declared with attribute @code{noinline} and similar are not >> >> +inlined. Whether the function itself is considered for inlining depends >> >> +on its size and the current inlining parameters. >> > Guessing this was from another doc patch that I think has already been >> > approved >> >> Yes. It shouldn't be in the latest patch at the link above. >> >> >> @@ -11726,6 +11728,33 @@ check its compatibility with @var{size}. >> >> >> >> @end deftypefn >> >> >> >> +@deftypefn {Built-in Function} bool __builtin_has_attribute >> > (@var{type-or-expression}, @var{attribute}) >> >> +The @code{__builtin_has_attribute} function evaluates to an integer >> > constant >> >> +expression equal to @code{true} if the symbol or type referenced by >> >> +the @var{type-or-expression} argument has been declared with >> >> +the @var{attribute} referenced by the second argument. Neither argument >> >> +is valuated. The @var{type-or-expression} argument is subject to the >> > same >> > s/valuated/evaluated/ ? >> >> This should also be fixed in the latest patch at the link above. >> >> > Did the implementation change significantly requiring another review >> > iteration? >> >> I don't think it changed too significantly between the last two >> revisions but I don't have a record of anyone having approved >> the C FE and the middle-end bits. (Sorry if I missed it.) Other >> than this response from you all I see in the archive is this: >> >>https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00606.html >> >> Please let me if the last revision is okay to commit. >> > > Hi, > > It seems you committed this yesterday as r266335, and I have noticed > new failures: > on both aarch64 and arm: > FAIL: c-c++-common/builtin-has-attribute-3.c -Wc++-compat (test for > excess errors) > > gcc.log says: > Excess errors: > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:11:25: error: > alignment for 'faligned_1' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:12:25: error: > alignment for 'faligned_2' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:39:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:40:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:47:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:48:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:50:3: error: > size of array 'Assert' is negative > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:52:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:52:3: error: > size of array 'Assert' is negative > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:53:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:56:3: error: > size of array 'Assert' is negative > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:58:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:59:3: error: > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:59:3: error: > size of array 'Assert' is negative > > on arm only: > gcc.dg/builtin-has-attribute.c (test for excess errors) > gdb.log says: > Excess errors: > /gcc/testsuite/gcc.dg/builtin-has-attribute.c:12:15: error: size of > array 'Assert' is negative I'm seeing the same on sparc-sun-solaris2.
Re: [PATCH v2 1/3] Allow memory operands for PTWRITE
On Wed, Nov 21, 2018 at 4:38 PM H.J. Lu wrote: > > On Wed, Nov 21, 2018 at 6:48 AM Richard Biener > wrote: > > > > On Tue, Nov 20, 2018 at 7:36 PM Andi Kleen wrote: > > > > > > On Tue, Nov 20, 2018 at 11:53:15AM +0100, Richard Biener wrote: > > > > On Fri, Nov 16, 2018 at 8:07 AM Uros Bizjak wrote: > > > > > > > > > > On Fri, Nov 16, 2018 at 4:57 AM Andi Kleen > > > > > wrote: > > > > > > > > > > > > From: Andi Kleen > > > > > > > > > > > > The earlier PTWRITE builtin definition was unnecessarily > > > > > > restrictive, > > > > > > only allowing register input to PTWRITE. The instruction actually > > > > > > supports memory operands too, so allow that too. > > > > > > > > > > > > gcc/: > > > > > > > > > > > > 2018-11-15 Andi Kleen > > > > > > > > > > > > * config/i386/i386.md: Allow memory operands to ptwrite. > > > > > > > > > > OK. > > > > > > > > Btw, I wonder why the ptwrite builtin is in SPECIAL_ARGS2 > > > > commented as /* Add all special builtins with variable number of > > > > operands. */? > > > > > > i think i put it in the same place as a similar builtin. AFAIK > > > those others don't have variable arguments either, so the comment > > > may be wrong? > > > > No idea... > > > > > > > > > > On the GIMPLE level this builtin also has quite some (bad) effects on > > > > alias analysis and any related optimization (vectorization, etc.). > > > > I'll have > > > > to see where the instrumenting pass now resides. > > > > > > It's fairly late now. > > > > OK, saw that. > > > > > Any suggestions for improvements? At some point I removed the edges > > > like the old MPX builtins to minimize memory usage, but that was > > > removed during an earlier review cycle. > > > > I guess it's fine now - it will have an effect on TER, limiting its ability > > a bit, but otherwise the builtin only lives up to RTL expansion where > > it becomes the UNSPEC_VOLATILE. As said, instrumenting on > > RTL would be an improvement, I think HJ might be able to help with that. > > > > What are the issues? AFAIU inserting ptwrite only for values where the location allows a variable to be infered from debug location lists _and_ properly extend the location range to cover the ptwrite instruction itself if the value dies otherwise (like for stores). See the thread about the instrumentation pass which currently is implemented on GIMPLE. Richard. > > -- > H.J.
Re: Fix PR37916 (unnecessary spilling)
Hi, On Wed, 21 Nov 2018, Richard Biener wrote: > then this leads to wrong debug info showing a value for 'a' that never > existed. var-tracking was specifically created so that the base-vars of SSA names aren't necessary for debug info anymore ... > when you build this with -O -g -fno-var-tracking-assignments (VTA seems > to hide the issue, probably due to not enough instructions in the end) ... which makes this an non-test. Sure, if you cripple debug info generation you get ... crippled debug info. > and the fact that var-tracking looks at REG_DECL for > debug info location generation. With VTA we get > > # DEBUG BEGIN_STMT > # DEBUG D#3 => a_1(D) + 1 > # DEBUG a => D#3 > # DEBUG BEGIN_STMT > a_11 = d_7(D) + 4; > # DEBUG D#2 => D#3 + b_3(D) > # DEBUG a => D#2 > # DEBUG BEGIN_STMT > a_12 = a_11 + c_5(D); > # DEBUG D#1 => D#2 + c_5(D) > # DEBUG a => D#1 > # DEBUG BEGIN_STMT > a_13 = a_12 + b_3(D); > # DEBUG a => D#1 + d_7(D) > # DEBUG BEGIN_STMT > a_9 = a_13 + a_1(D); > # DEBUG a => a_9 This seems to be exactly correct and reflecting the original code, i.e. VTA works as designed even with "invented" base variables. > So your patch has to be much more careful to never change the LHS of > stmts that are adjusted (which I think reassoc already does). I can't do much more in reassoc than what I implemented, it's constrained to cases where we have a known LHS. If you think that's still too much then the problem needs to remain unfixed in reassoc, and it would need TER changes. An easy way would be to disable TERin also of SSA names having no base var (essentially on the ground that it's the same base var then), but that probably disables TER too much, though I haven't tried. In addition: I'd always happily trade less correct debug info (where the impact of the incorrectness is fairly trivial) with no stack accesses in inner loops; i.e. a tradeoff. Ciao, Michael.
Re: Fix PR37916 (unnecessary spilling)
On Thu, Nov 22, 2018 at 01:30:03PM +, Michael Matz wrote: > On Wed, 21 Nov 2018, Richard Biener wrote: > > > then this leads to wrong debug info showing a value for 'a' that never > > existed. > > var-tracking was specifically created so that the base-vars of SSA names > aren't necessary for debug info anymore ... Not to my knowledge. The original var-tracking used that information almost exclusively (whatever ends up in REG_EXPR/MEM_EXPR and that is based on the base vars of SSA_NAMEs). VTA doesn't need that for vars for which there are debug stmts, but otherwise it is still used. Jakub
[PATCH] PR libstdc++/87520 Always pass type-punned type_info reference
The implementations of std::make_shared for -frtti and -fno-rtti are not compatible, because they pass different arguments to _Sp_counted_ptr_inplace::_M_get_deleter and so can't interoperate. Either the argument doesn't match the expected value, and so the shared_ptr::_M_ptr member is never set, or the type-punned reference is treated as a real std::type_info object and gets dereferenced. This patch removes the differences between -frtti and -fno-rtti, so that typeid is never used, and the type-punned reference is used in both cases. For backwards compatibility with existing code that passes typeid(_Sp_make_shared_tag) that still needs to be handled, but only after checking that the argument is not the type-punned reference (so it's safe to treat as a real std::type_info object). The reference is bound to an object of literal type, so that it doesn't need a guard variable to make its initialization thread-safe. This patch also fixes 87520 by ensuring that the type-punned reference is bound to "a region of storage of suitable size and alignment to contain an object of the reference's type" (as per the proposed resolution of Core DR 453). If all objects are built with the fixed version of GCC then -frtti and -fno-rtti can be mixed freely and std::make_shared will work correctly. If some objects are built with unfixed GCC versions then problems can still arise, depending on which template instantiations are kept by the linker. PR libstdc++/85930 PR libstdc++/87520 * include/bits/shared_ptr_base.h (_Sp_make_shared_tag::_S_ti) [__cpp_rtti]: Define even when RTTI is enabled. Use array of sizeof(type_info) so that type-punned reference binds to an object of the correct size as well as correct alignment. (_Sp_counted_ptr_inplace::_M_get_deleter) [__cpp_rtti]: Check for _S_ti() reference even when RTTI is enabled. (__shared_ptr(_Sp_make_shared_tag, const _Alloc&, _Args&&...)) [__cpp_rtti]: Pass _S_ti() instead of typeid(_Sp_make_shared_tag). Tested x86_64-linux, committed to trunk. Backport to gcc-8-branch will follow soon. commit c3eb187b5da75f0df004e49f3ecad7be906a0231 Author: Jonathan Wakely Date: Thu Oct 4 16:16:57 2018 +0100 PR libstdc++/87520 Always pass type-punned type_info reference The implementations of std::make_shared for -frtti and -fno-rtti are not compatible, because they pass different arguments to _Sp_counted_ptr_inplace::_M_get_deleter and so can't interoperate. Either the argument doesn't match the expected value, and so the shared_ptr::_M_ptr member is never set, or the type-punned reference is treated as a real std::type_info object and gets dereferenced. This patch removes the differences between -frtti and -fno-rtti, so that typeid is never used, and the type-punned reference is used in both cases. For backwards compatibility with existing code that passes typeid(_Sp_make_shared_tag) that still needs to be handled, but only after checking that the argument is not the type-punned reference (so it's safe to treat as a real std::type_info object). The reference is bound to an object of literal type, so that it doesn't need a guard variable to make its initialization thread-safe. This patch also fixes 87520 by ensuring that the type-punned reference is bound to "a region of storage of suitable size and alignment to contain an object of the reference's type" (as per the proposed resolution of Core DR 453). If all objects are built with the fixed version of GCC then -frtti and -fno-rtti can be mixed freely and std::make_shared will work correctly. If some objects are built with unfixed GCC versions then problems can still arise, depending on which template instantiations are kept by the linker. PR libstdc++/85930 PR libstdc++/87520 * include/bits/shared_ptr_base.h (_Sp_make_shared_tag::_S_ti) [__cpp_rtti]: Define even when RTTI is enabled. Use array of sizeof(type_info) so that type-punned reference binds to an object of the correct size as well as correct alignment. (_Sp_counted_ptr_inplace::_M_get_deleter) [__cpp_rtti]: Check for _S_ti() reference even when RTTI is enabled. (__shared_ptr(_Sp_make_shared_tag, const _Alloc&, _Args&&...)) [__cpp_rtti]: Pass _S_ti() instead of typeid(_Sp_make_shared_tag). diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h b/libstdc++-v3/include/bits/shared_ptr_base.h index 870aeb9bfda..46ff4a7cf29 100644 --- a/libstdc++-v3/include/bits/shared_ptr_base.h +++ b/libstdc++-v3/include/bits/shared_ptr_base.h @@ -500,7 +500,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION struct _Sp_make_shared_tag { -#if !__cpp_rtti private: template friend class __shared_ptr; @@ -510,10 +509,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION static
[PATCH] Fix option values for -march.
Hi. The patch makes clear we'll not diverge number of elements in processor_names and the corresponding enum. Plus I fixed -march=znver2 native as valid options that were not listed. Patch survives tests and bootstrap on x86_64-linux-gnu. Ready for trunk? Martin gcc/ChangeLog: 2018-11-22 Martin Liska * common/config/i386/i386-common.c (processor_names): Add static assert and add missing "znver2". (ix86_get_valid_option_values): Add checking assert for null values and add "native" value if feasible. * config/i386/i386.h: Do not declare size of processor_names. --- gcc/common/config/i386/i386-common.c | 26 ++ gcc/config/i386/i386.h | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 1017147599c..3deac94a879 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1478,7 +1478,7 @@ i386_except_unwind_info (struct gcc_options *opts) #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack /* This table must be in sync with enum processor_type in i386.h. */ -const char *const processor_names[PROCESSOR_max] = +const char *const processor_names[] = { "generic", "i386", @@ -1516,9 +1516,14 @@ const char *const processor_names[PROCESSOR_max] = "bdver4", "btver1", "btver2", - "znver1" + "znver1", + "znver2" }; +/* Guarantee that the array is aligned with henum processor_type. */ +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) + == PROCESSOR_max)); + const pta processor_alias_table[] = { {"i386", PROCESSOR_I386, CPU_NONE, 0}, @@ -1734,11 +1739,24 @@ ix86_get_valid_option_values (int option_code, { case OPT_march_: for (unsigned i = 0; i < pta_size; i++) - v.safe_push (processor_alias_table[i].name); + { + const char *name = processor_alias_table[i].name; + gcc_checking_assert (name != NULL); + v.safe_push (name); + } +#ifdef HAVE_LOCAL_CPU_DETECT + /* Add also "native" as possible value. */ + v.safe_push ("native"); +#endif + break; case OPT_mtune_: for (unsigned i = 0; i < PROCESSOR_max; i++) - v.safe_push (processor_names[i]); + { + const char *name = processor_names[i]; + gcc_checking_assert (name != NULL); + v.safe_push (name); + } break; default: break; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 813c86dbdfa..b9e726e3d24 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2279,7 +2279,7 @@ enum processor_type }; #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) -extern const char *const processor_names[PROCESSOR_max]; +extern const char *const processor_names[]; #include "wide-int-bitmask.h"
Re: [PATCH] Fix option values for -march.
On Thu, Nov 22, 2018 at 2:43 PM Martin Liška wrote: > The patch makes clear we'll not diverge number of elements in > processor_names and the corresponding enum. Plus I fixed > -march=znver2 native as valid options that were not listed. > > Patch survives tests and bootstrap on x86_64-linux-gnu. > > Ready for trunk? > Martin > > gcc/ChangeLog: > > 2018-11-22 Martin Liska > > * common/config/i386/i386-common.c (processor_names): Add > static assert and add missing "znver2". > (ix86_get_valid_option_values): Add checking assert for null > values and add "native" value if feasible. > * config/i386/i386.h: Do not declare size of processor_names. > --- > gcc/common/config/i386/i386-common.c | 26 ++ > gcc/config/i386/i386.h | 2 +- > 2 files changed, 23 insertions(+), 5 deletions(-) +/* Guarantee that the array is aligned with henum processor_type. */ +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) + == PROCESSOR_max)); Please use ARRAY_SIZE macro here. +#ifdef HAVE_LOCAL_CPU_DETECT + /* Add also "native" as possible value. */ + v.safe_push ("native"); +#endif "native" is processed by the driver and this option is never passed to cc1. Uros.
Re: [maintainer-scipts] Add a bugzilla script
On 11/21/18 1:43 AM, Martin Sebor wrote: > On 11/20/2018 03:08 AM, Martin Liška wrote: >> Hi. >> >> It's the script that I used to identify potentially resolvable bugs. That's >> done >> by parsing of comments and seeking for trunk/branch commits. Sample output >> looks >> as follows: >> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88084 branches: trunk >> fail: work: >> basic_string_view::copy doesn't use Traits::copy >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88083 branches: trunk >> fail: work: >> ICE in find_constant_pool_ref_1, at >> config/s390/s390.c:8231 >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88077 branches: trunk >> fail: 8.2.0 work: 9.0 >> [8 Regression] ICE: c:378 since r256989 >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88073 branches: trunk >> fail: work: >> [7/8 Regression] Internal compiler error compiling WHERE >> construct with -O or -O2 >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88071 branches: trunk >> fail: 8.2.0, 9.0 work: 7.3.0 >> [8 Regression] ICE: verify_gimple failed (error: dead >> STMT in EH table) >> >> Plus there are generated bugzilla list so that one can go easily through. >> Would you be interested in putting that into maintainer scripts? > > It would be nice to add comments explaining how to use it. Maybe > an example. (FWIW, I document my own scripts using the man page > format at the top.) > > Martin Hi. I updated that and also reorganized imports (pointed by David). I'm going to install it soon. Martin #!/usr/bin/env python3 # The script is used for finding PRs that have a SVN revision that # mentiones the PR and are not closed. It's done by iterating all # comments of a PR and finding SVN commit entries. """ Sample output of the script: Bugzilla URL page size: 50 HINT: bugs with following comment are ignored: Can the bug be marked as resolved? Bug URL SVN commits known-to-fail known-to-work Bug summary https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88129 trunk Two blockage insns are emited in the function epilogue https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88122 trunk [9 Regression] g++ ICE: internal compiler error: Segmentation fault https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88084 trunk basic_string_view::copy doesn't use Traits::copy https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88083 trunk ICE in find_constant_pool_ref_1, at config/s390/s390.c:8231 ... Bugzilla lists: https://gcc.gnu.org/bugzilla/buglist.cgi?bug_id=88129,88122,88084,88083,88074,88073,88071,88070,88051,88018,87985,87955,87926,87917,87913,87898,87895,87874,87871,87855,87853,87826,87824,87819,87818,87799,87793,87789,87788,87787,87754,87725,87674,87665,87649,87647,87645,87625,87611,87610,87598,87593,87582,87566,87556,87547,87544,87541,87537,87528 https://gcc.gnu.org/bugzilla/buglist.cgi?bug_id=87486 """ import argparse import json import requests base_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/' statuses = ['UNCONFIRMED', 'ASSIGNED', 'SUSPENDED', 'NEW', 'WAITING', 'REOPENED'] regex = '(.*\[)([0-9\./]*)( [rR]egression])(.*)' closure_question = 'Can the bug be marked as resolved?' start_page = 20 url_page_size = 50 def get_branches_by_comments(comments): versions = set() for c in comments: text = c['text'] if 'URL: https://gcc.gnu.org/viewcvs' in text: version = 'trunk' for l in text.split('\n'): if 'branches/gcc-' in l: parts = l.strip().split('/') parts = parts[1].split('-') assert len(parts) == 3 versions.add(parts[1]) versions.add(version) return versions def get_bugs(api_key, query): u = base_url + 'bug' r = requests.get(u, params = query) return r.json()['bugs'] def search(api_key): chunk = 1000 ids = [] print('%-53s%-30s%-40s%-40s%-60s' % ('Bug URL', 'SVN commits', 'known-to-fail', 'known-to-work', 'Bug summary')) for
Re: [PATCH v2 2/3] Add a pass to automatically add ptwrite instrumentation
On Tue, Nov 20, 2018 at 7:27 PM Andi Kleen wrote: > > On Tue, Nov 20, 2018 at 01:04:19PM +0100, Richard Biener wrote: > > Since your builtin clobbers memory > > Hmm, maybe we could get rid of that, but then how to avoid > the optimizer moving it around over function calls etc.? > The instrumentation should still be useful when the program > crashes, so we don't want to delay logging too much. You can't avoid moving it, yes. But it can be even moved now, effectively detaching it from the "interesting" $pc ranges we have debug location lists for? > > Maybe even instead pass it a number of bytes so it models how atomics work. > > How could that reject float? Why does it need to reject floats? Note you are already rejecting floats in the instrumentation pass. > Mode seems better for now. > > Eventually might support float/double through memory, but not in the > first version. Why does movq %xmm0, %rax; ptwrite %rax not work? > > > >NEXT_PASS (pass_tsan_O0); > > >NEXT_PASS (pass_sanopt); > > > + NEXT_PASS (pass_vartrace); > > > > I'd move it one lower, after pass_cleanup_eh. Further enhancement > > would make it a > > RTL pass ... > > It's after pass_nrv now. > > > So in reality the restriction is on the size of the object, correct? > > The instruction accepts 32 or 64bit memory or register. > > In principle everything could be logged through this, but i was trying > to limit the cases to integers and pointers for now to simplify > the problem. > > Right now the backend fails up when something else than 4 or 8 bytes > is passed. Fair enough, the instrumentation would need to pad out smaller values and/or split larger values. I think 1 and 2 byte values would be interesting so you can support char and shorts. Eventually 16byte values for __int128 or vectors. > > > > > +{ > > > + if (!supported_type (t)) > > > > You handle some nested cases below via recursion, > > like a.b.c (but not a[i][j]). But then this check will > > fire. I think it would be better to restructure the > > function to look at the outermost level for whether > > the op is of supported type, thus we can log it > > at all and then get all the way down to the base via > > sth like > > > > if (!supported_type (t)) > > return false; > > enum attrstate s = ; > > do > > { > >s = supported_op (t, s); > >if (s == force_off) > > return false; > > } > > while (handled_component_p (t) && (t = TREE_OPERAND (t, 0))) > > > > Now t is either an SSA_NAME, a DECL (you fail to handle PARM_DECL > > Incoming arguments and returns are handled separately. > > > and RESULT_DECL below) or a [TARGET_]MEM_REF. To get rid > > of non-pointer indirections do then > > > > t = get_base_address (t); > > if (DECL_P (t) && is_local (t)) > > > > > > because... > > > > > +return false; > > > + > > > + enum attrstate s = supported_op (t, neutral); > > > + if (s == force_off) > > > +return false; > > > + if (s == force_on) > > > +force = true; > > > + > > > + switch (TREE_CODE (t)) > > > +{ > > > +case VAR_DECL: > > > + if (DECL_ARTIFICIAL (t)) > > > + return false; > > > + if (is_local (t)) > > > + return true; > > > + return s == force_on || force; > > > + > > > +case ARRAY_REF: > > > + t = TREE_OPERAND (t, 0); > > > + s = supported_op (t, s); > > > + if (s == force_off) > > > + return false; > > > + return supported_type (TREE_TYPE (t)); > > > > Your supported_type is said to take a DECL. And you > > already asked for this type - it's the type of the original t > > (well, the type of this type given TREE_TYPE (t) is an array type). > > But you'll reject a[i][j] where the type of this type is an array type as > > well. > > Just to be clear, after your changes above I only need > to handle VAR_DECL and SSA_NAME here then, correct? Yes (and PARM_DECL and RESULT_DECL). > So one of the reasons I handled ARRAY_REF this way is to > trace the index as a local if needed. If I can assume > it was always in a MEM with an own ASSIGN earlier if the local > was a user visible that wouldn't be needed (and also some other similar > code elsewhere) If the index lives in memory it has a corresponding load. For SSA uses see my comment about instrumenting them at all (together with the suggestion on how to handle them in an easier way). > > But when I look at a simple test case like vartrace-6 > > void > f (void) > { > int i; > for (i = 0; i < 10; i++) >f2 (); > } > > i appears to be a SSA name only that is referenced everywhere without > MEM. And if the user wants to track the value of i I would need > to explicitely handle all these cases. Do I miss something here? You handle those in different places I think. > I'm starting to think i should perhaps drop locals support to simplify > everything? But that might limit usability for debugging somewhat. I think you confuse "locals" a bit. In GIMPLE an automatic
Re: Fix PR37916 (unnecessary spilling)
On Thu, Nov 22, 2018 at 2:34 PM Jakub Jelinek wrote: > > On Thu, Nov 22, 2018 at 01:30:03PM +, Michael Matz wrote: > > On Wed, 21 Nov 2018, Richard Biener wrote: > > > > > then this leads to wrong debug info showing a value for 'a' that never > > > existed. > > > > var-tracking was specifically created so that the base-vars of SSA names > > aren't necessary for debug info anymore ... > > Not to my knowledge. The original var-tracking used that information > almost exclusively (whatever ends up in REG_EXPR/MEM_EXPR and that is based > on the base vars of SSA_NAMEs). > VTA doesn't need that for vars for which there are debug stmts, but > otherwise it is still used. Also it still uses it even for vars for which there are debug stmts, no? We just happen to be lucky for the too simple testcase that only the correct values survive for the very few $pc addresses? Richard. > Jakub
Re: [PATCH] Fix option values for -march.
On Thu, Nov 22, 2018 at 2:51 PM Uros Bizjak wrote: > > The patch makes clear we'll not diverge number of elements in > > processor_names and the corresponding enum. Plus I fixed > > -march=znver2 native as valid options that were not listed. > > > > Patch survives tests and bootstrap on x86_64-linux-gnu. > > > > Ready for trunk? > > Martin > > > > gcc/ChangeLog: > > > > 2018-11-22 Martin Liska > > > > * common/config/i386/i386-common.c (processor_names): Add > > static assert and add missing "znver2". > > (ix86_get_valid_option_values): Add checking assert for null > > values and add "native" value if feasible. > > * config/i386/i386.h: Do not declare size of processor_names. > > --- > > gcc/common/config/i386/i386-common.c | 26 ++ > > gcc/config/i386/i386.h | 2 +- > > 2 files changed, 23 insertions(+), 5 deletions(-) > > +/* Guarantee that the array is aligned with henum processor_type. */ Typo above. > +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) > + == PROCESSOR_max)); > > Please use ARRAY_SIZE macro here. BTW: There is another similar table in i386.c, processor_cost_table. Can we add STATIC_ASSERT for this table, too? Uros.
Re: [PATCH] Fix option values for -march.
On 11/22/18 2:51 PM, Uros Bizjak wrote: > On Thu, Nov 22, 2018 at 2:43 PM Martin Liška wrote: > >> The patch makes clear we'll not diverge number of elements in >> processor_names and the corresponding enum. Plus I fixed >> -march=znver2 native as valid options that were not listed. >> >> Patch survives tests and bootstrap on x86_64-linux-gnu. >> >> Ready for trunk? >> Martin >> >> gcc/ChangeLog: >> >> 2018-11-22 Martin Liska >> >> * common/config/i386/i386-common.c (processor_names): Add >> static assert and add missing "znver2". >> (ix86_get_valid_option_values): Add checking assert for null >> values and add "native" value if feasible. >> * config/i386/i386.h: Do not declare size of processor_names. >> --- >> gcc/common/config/i386/i386-common.c | 26 ++ >> gcc/config/i386/i386.h | 2 +- >> 2 files changed, 23 insertions(+), 5 deletions(-) > > +/* Guarantee that the array is aligned with henum processor_type. */ > +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) > + == PROCESSOR_max)); > > Please use ARRAY_SIZE macro here. Fixed, it's definitely nicer! > > +#ifdef HAVE_LOCAL_CPU_DETECT > + /* Add also "native" as possible value. */ > + v.safe_push ("native"); > +#endif > > "native" is processed by the driver and this option is never passed to cc1. But it's a target common hook that is used both from driver and cc1: $ ./xgcc -B. --completion=-march=nat -march=native (Starting program: /dev/shm/objdir/gcc/xgcc -B. --completion=-march=nat) $ ./xgcc -B. --help=target | grep native ... i386 i486 i586 pentium lakemont pentium-mmx winchip-c6 winchip2 c3 samuel-2 c3-2 nehemiah c7 esther i686 pentiumpro pentium2 pentium3 pentium3m pentium-m pentium4 pentium4m prescott nocona core2 nehalem corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont knl knm intel geode k6 k6-2 k6-3 athlon athlon-tbird athlon-4 athlon-xp athlon-mp x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 generic native (Starting program: /dev/shm/objdir/gcc/cc1 -quiet -iprefix /dev/shm/objdir/gcc/../lib64/gcc/x86_64-pc-linux-gnu/9.0.0/ -isystem ./include -isystem ./include-fixed help-dummy -quiet -dumpbase help-dummy -mtune=generic -march=x86-64 -auxbase help-dummy --help=target -o /tmp/cc5NWl4E.s) So should be fine. Martin > > Uros. > >From 7c89f9bde4d61f96005ea94055b86bc1b8e652bf Mon Sep 17 00:00:00 2001 From: marxin Date: Thu, 22 Nov 2018 13:21:44 +0100 Subject: [PATCH] Fix option values for -march. gcc/ChangeLog: 2018-11-22 Martin Liska * common/config/i386/i386-common.c (processor_names): Add static assert and add missing "znver2". (ix86_get_valid_option_values): Add checking assert for null values and add "native" value if feasible. * config/i386/i386.h: Do not declare size of processor_names. --- gcc/common/config/i386/i386-common.c | 25 + gcc/config/i386/i386.h | 2 +- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 1017147599c..d1fb7a7464b 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1478,7 +1478,7 @@ i386_except_unwind_info (struct gcc_options *opts) #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack /* This table must be in sync with enum processor_type in i386.h. */ -const char *const processor_names[PROCESSOR_max] = +const char *const processor_names[] = { "generic", "i386", @@ -1516,9 +1516,13 @@ const char *const processor_names[PROCESSOR_max] = "bdver4", "btver1", "btver2", - "znver1" + "znver1", + "znver2" }; +/* Guarantee that the array is aligned with henum processor_type. */ +STATIC_ASSERT (ARRAY_SIZE (processor_names) == PROCESSOR_max); + const pta processor_alias_table[] = { {"i386", PROCESSOR_I386, CPU_NONE, 0}, @@ -1734,11 +1738,24 @@ ix86_get_valid_option_values (int option_code, { case OPT_march_: for (unsigned i = 0; i < pta_size; i++) - v.safe_push (processor_alias_table[i].name); + { + const char *name = processor_alias_table[i].name; + gcc_checking_assert (name != NULL); + v.safe_push (name); + } +#ifdef HAVE_LOCAL_CPU_DETECT + /* Add also "native" as possible value. */ + v.safe_push ("native"); +#endif + break; case OPT_mtune_: for (unsigned i = 0; i < PROCESSOR_max; i++) - v.safe_push (processor_names[i]); + { + const char *name = processor_names[i]; + gcc_checking_assert (name != NULL); + v.safe_push (name); + } break; default:
Re: [PATCH] Fix option values for -march.
On Thu, Nov 22, 2018 at 3:00 PM Martin Liška wrote: > > On 11/22/18 2:51 PM, Uros Bizjak wrote: > > On Thu, Nov 22, 2018 at 2:43 PM Martin Liška wrote: > > > >> The patch makes clear we'll not diverge number of elements in > >> processor_names and the corresponding enum. Plus I fixed > >> -march=znver2 native as valid options that were not listed. > >> > >> Patch survives tests and bootstrap on x86_64-linux-gnu. > >> > >> Ready for trunk? > >> Martin > >> > >> gcc/ChangeLog: > >> > >> 2018-11-22 Martin Liska > >> > >> * common/config/i386/i386-common.c (processor_names): Add > >> static assert and add missing "znver2". > >> (ix86_get_valid_option_values): Add checking assert for null > >> values and add "native" value if feasible. > >> * config/i386/i386.h: Do not declare size of processor_names. > >> --- > >> gcc/common/config/i386/i386-common.c | 26 ++ > >> gcc/config/i386/i386.h | 2 +- > >> 2 files changed, 23 insertions(+), 5 deletions(-) > > > > +/* Guarantee that the array is aligned with henum processor_type. */ > > +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) > > + == PROCESSOR_max)); > > > > Please use ARRAY_SIZE macro here. > > Fixed, it's definitely nicer! > > > > > +#ifdef HAVE_LOCAL_CPU_DETECT > > + /* Add also "native" as possible value. */ > > + v.safe_push ("native"); > > +#endif > > > > "native" is processed by the driver and this option is never passed to cc1. > > But it's a target common hook that is used both from driver and cc1: > > $ ./xgcc -B. --completion=-march=nat > -march=native > > > (Starting program: /dev/shm/objdir/gcc/xgcc -B. --completion=-march=nat) > > $ ./xgcc -B. --help=target | grep native > ... > i386 i486 i586 pentium lakemont pentium-mmx winchip-c6 winchip2 c3 > samuel-2 c3-2 nehemiah c7 esther i686 pentiumpro pentium2 pentium3 pentium3m > pentium-m pentium4 pentium4m prescott nocona core2 nehalem corei7 westmere > sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell > skylake skylake-avx512 cannonlake icelake-client icelake-server bonnell atom > silvermont slm goldmont goldmont-plus tremont knl knm intel geode k6 k6-2 > k6-3 athlon athlon-tbird athlon-4 athlon-xp athlon-mp x86-64 eden-x2 nano > nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 opteron > opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 > bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 generic native > > > > (Starting program: /dev/shm/objdir/gcc/cc1 -quiet -iprefix > /dev/shm/objdir/gcc/../lib64/gcc/x86_64-pc-linux-gnu/9.0.0/ -isystem > ./include -isystem ./include-fixed help-dummy -quiet -dumpbase help-dummy > -mtune=generic -march=x86-64 -auxbase help-dummy --help=target -o > /tmp/cc5NWl4E.s) > > So should be fine. Is -march=native accepted or rejected by cc1? It should be rejected. Uros.
[PATCH] Fix PR88148
The following fixes the missing VN elimination in loop->nb_iterations which causes ICEs later on. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2018-11-22 Richard Biener PR tree-optimization/88148 * tree-ssa-loop-niter.c (simplify_replace_tree): Get optional valueization callback parameter and handle it. * tree-ssa-loop-niter.h (simplify_replace_tree): Export. * tree-ssa-sccvn.c (process_bb): Eliminate in loop niter trees. * gfortran.dg/pr88148.f90: New testcase. >From 8eb9fcc2eb090fff909b21c7ded50bfd1304fcc5 Mon Sep 17 00:00:00 2001 From: Richard Guenther Date: Thu, 22 Nov 2018 12:11:09 +0100 Subject: [PATCH] fix-pr88148 diff --git a/gcc/testsuite/gfortran.dg/pr88148.f90 b/gcc/testsuite/gfortran.dg/pr88148.f90 new file mode 100644 index 000..cd4cd897c61 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr88148.f90 @@ -0,0 +1,705 @@ +! { dg-do compile } +! { dg-options "-O -fno-tree-fre -fno-tree-sra -ftree-loop-vectorize" } +! { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } + +module lfk_prec + integer, parameter :: dp=kind(1.d0) +end module lfk_prec + +!*** + +SUBROUTINE kernel(tk) +!*** +! * +!KERNEL executes 24 samples of Fortran computation * +! TK(1) - total cpu time to execute only the 24 kernels. * +! TK(2) - total Flops executed by the 24 Kernels * +!*** +! * +! L. L. N. L. F O R T R A N K E R N E L S: M F L O P S * +! * +! These kernels measure Fortran numerical computation rates for a * +! spectrum of CPU-limited computational structures. Mathematical * +! through-put is measured in units of millions of floating-point * +! operations executed per Second, called Mega-Flops/Sec. * +! * +! This program measures a realistic CPU performance range for the * +! Fortran programming system on a given day. The CPU performance * +! rates depend strongly on the maturity of the Fortran compiler's * +! ability to translate Fortran code into efficient machine code. * +! [ The CPU hardware capability apart from compiler maturity (or * +! availability), could be measured (or simulated) by programming the * +! kernels in assembly or machine code directly. These measurements * +! can also serve as a framework for tracking the maturation of the * +! Fortran compiler during system development.] * +! * +! Fonzi's Law: There is not now and there never will be a language * +! in which it is the least bit difficult to write * +! bad programs. * +!F.H.MCMAHON 1972 * +!*** + +! l1 := param-dimension governs the size of most 1-d arrays +! l2 := param-dimension governs the size of most 2-d arrays + +! Loop := multiple pass control to execute kernel long enough to ti +!me. +! n := DO loop control for each kernel. Controls are set in subr. +! SIZES + +! ** +use lfk_prec +implicit double precision (a-h,o-z) +!IBM IMPLICIT REAL*8 (A-H,O-Z) + +REAL(kind=dp), INTENT(inout):: tk +INTEGER :: test !!,AND + +COMMON/alpha/mk,ik,im,ml,il,mruns,nruns,jr,iovec,npfs(8,3,47) +COMMON/beta/tic,times(8,3,47),see(5,3,8,3),terrs(8,3,47),csums(8,3 & +,47),fopn(8,3,47),dos(8,3,47) + +COMMON/spaces/ion,j5,k2,k3,loop1,laps,loop,m,kr,lp,n13h,ibuf,nx,l, & +npass,nfail,n,n1,n2,n13,n213,n813,n14,n16,n416,n21,nt1,nt2,last,idebug & +,mpy,loop2,mucho,mpylim,intbuf(16) + +COMMON/spacer/a11,a12,a13,a21,a22,a23,a31,a32,a33,ar,br,c0,cr,di,dk & +,dm22,dm23,dm24,dm25,dm26,dm27,dm28,dn,e3,e6,expmax,flx,q,qa,r,ri & +,s,scale,sig,stb5,t,xnc,xnei,xnm + +COMMON/space0/time(47),csum(47),ww(47),wt(47),ticks,fr(9),terr1(47 & +),sumw(7),start,skale(47),bias(47),ws(95),total(47),flopn(47),iq(7 & +),npf,npfs1(47) + +COMMON/spacei/wtp(3),mul(3),ispan(47,3),ipass(47,3) + +! ** + + +INTEGER :: e,f,zone +COMMON/ispace/e(96),f(96),ix(1001),ir(1001),zone(300) + +COMMON/space1/u(1001),v(1001),w(1001),x(1001),y(1001),z(1001),g(1001) & +,du1(101),du2(10
Re: [PATCH] Fix option values for -march.
On 11/22/18 3:04 PM, Uros Bizjak wrote: > On Thu, Nov 22, 2018 at 3:00 PM Martin Liška wrote: >> >> On 11/22/18 2:51 PM, Uros Bizjak wrote: >>> On Thu, Nov 22, 2018 at 2:43 PM Martin Liška wrote: >>> The patch makes clear we'll not diverge number of elements in processor_names and the corresponding enum. Plus I fixed -march=znver2 native as valid options that were not listed. Patch survives tests and bootstrap on x86_64-linux-gnu. Ready for trunk? Martin gcc/ChangeLog: 2018-11-22 Martin Liska * common/config/i386/i386-common.c (processor_names): Add static assert and add missing "znver2". (ix86_get_valid_option_values): Add checking assert for null values and add "native" value if feasible. * config/i386/i386.h: Do not declare size of processor_names. --- gcc/common/config/i386/i386-common.c | 26 ++ gcc/config/i386/i386.h | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) >>> >>> +/* Guarantee that the array is aligned with henum processor_type. */ >>> +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) >>> + == PROCESSOR_max)); >>> >>> Please use ARRAY_SIZE macro here. >> >> Fixed, it's definitely nicer! >> >>> >>> +#ifdef HAVE_LOCAL_CPU_DETECT >>> + /* Add also "native" as possible value. */ >>> + v.safe_push ("native"); >>> +#endif >>> >>> "native" is processed by the driver and this option is never passed to cc1. >> >> But it's a target common hook that is used both from driver and cc1: >> >> $ ./xgcc -B. --completion=-march=nat >> -march=native >> >> >> (Starting program: /dev/shm/objdir/gcc/xgcc -B. --completion=-march=nat) >> >> $ ./xgcc -B. --help=target | grep native >> ... >> i386 i486 i586 pentium lakemont pentium-mmx winchip-c6 winchip2 c3 >> samuel-2 c3-2 nehemiah c7 esther i686 pentiumpro pentium2 pentium3 pentium3m >> pentium-m pentium4 pentium4m prescott nocona core2 nehalem corei7 westmere >> sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell >> skylake skylake-avx512 cannonlake icelake-client icelake-server bonnell atom >> silvermont slm goldmont goldmont-plus tremont knl knm intel geode k6 k6-2 >> k6-3 athlon athlon-tbird athlon-4 athlon-xp athlon-mp x86-64 eden-x2 nano >> nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 opteron >> opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 >> bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 generic native >> >> >> >> (Starting program: /dev/shm/objdir/gcc/cc1 -quiet -iprefix >> /dev/shm/objdir/gcc/../lib64/gcc/x86_64-pc-linux-gnu/9.0.0/ -isystem >> ./include -isystem ./include-fixed help-dummy -quiet -dumpbase help-dummy >> -mtune=generic -march=x86-64 -auxbase help-dummy --help=target -o >> /tmp/cc5NWl4E.s) >> >> So should be fine. > > Is -march=native accepted or rejected by cc1? It should be rejected. It's rejected, but we provide bad hints: $ ./cc1 -march=native /tmp/arch.c cc1: error: bad value (‘native’) for ‘-march=’ switch cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont knl knm x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 native It's quite bad because we can't distinguish whether bad -march= value was passed to driver, or directly into a FE: ./cc1 -march=native2 /tmp/arch.c cc1: error: bad value (‘native2’) for ‘-march=’ switch cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont knl knm x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 native; did you mean ‘native’? ./xgcc -B. -march=native2 /tmp/arch.c cc1: error: bad value (‘native2’) for ‘-march=’ switch cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont knl knm x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdve
Re: [PATCH] Fix option values for -march.
On Thu, Nov 22, 2018 at 3:22 PM Martin Liška wrote: > > On 11/22/18 3:04 PM, Uros Bizjak wrote: > > On Thu, Nov 22, 2018 at 3:00 PM Martin Liška wrote: > >> > >> On 11/22/18 2:51 PM, Uros Bizjak wrote: > >>> On Thu, Nov 22, 2018 at 2:43 PM Martin Liška wrote: > >>> > The patch makes clear we'll not diverge number of elements in > processor_names and the corresponding enum. Plus I fixed > -march=znver2 native as valid options that were not listed. > > Patch survives tests and bootstrap on x86_64-linux-gnu. > > Ready for trunk? > Martin > > gcc/ChangeLog: > > 2018-11-22 Martin Liska > > * common/config/i386/i386-common.c (processor_names): Add > static assert and add missing "znver2". > (ix86_get_valid_option_values): Add checking assert for null > values and add "native" value if feasible. > * config/i386/i386.h: Do not declare size of processor_names. > --- > gcc/common/config/i386/i386-common.c | 26 ++ > gcc/config/i386/i386.h | 2 +- > 2 files changed, 23 insertions(+), 5 deletions(-) > >>> > >>> +/* Guarantee that the array is aligned with henum processor_type. */ > >>> +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) > >>> + == PROCESSOR_max)); > >>> > >>> Please use ARRAY_SIZE macro here. > >> > >> Fixed, it's definitely nicer! > >> > >>> > >>> +#ifdef HAVE_LOCAL_CPU_DETECT > >>> + /* Add also "native" as possible value. */ > >>> + v.safe_push ("native"); > >>> +#endif > >>> > >>> "native" is processed by the driver and this option is never passed to > >>> cc1. > >> > >> But it's a target common hook that is used both from driver and cc1: > >> > >> $ ./xgcc -B. --completion=-march=nat > >> -march=native > >> > >> > >> (Starting program: /dev/shm/objdir/gcc/xgcc -B. --completion=-march=nat) > >> > >> $ ./xgcc -B. --help=target | grep native > >> ... > >> i386 i486 i586 pentium lakemont pentium-mmx winchip-c6 winchip2 c3 > >> samuel-2 c3-2 nehemiah c7 esther i686 pentiumpro pentium2 pentium3 > >> pentium3m pentium-m pentium4 pentium4m prescott nocona core2 nehalem > >> corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell > >> core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client > >> icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont > >> knl knm intel geode k6 k6-2 k6-3 athlon athlon-tbird athlon-4 athlon-xp > >> athlon-mp x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 > >> eden-x4 nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 > >> athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 > >> btver1 btver2 generic native > >> > >> > >> > >> (Starting program: /dev/shm/objdir/gcc/cc1 -quiet -iprefix > >> /dev/shm/objdir/gcc/../lib64/gcc/x86_64-pc-linux-gnu/9.0.0/ -isystem > >> ./include -isystem ./include-fixed help-dummy -quiet -dumpbase help-dummy > >> -mtune=generic -march=x86-64 -auxbase help-dummy --help=target -o > >> /tmp/cc5NWl4E.s) > >> > >> So should be fine. > > > > Is -march=native accepted or rejected by cc1? It should be rejected. > > It's rejected, but we provide bad hints: > > $ ./cc1 -march=native /tmp/arch.c > cc1: error: bad value (‘native’) for ‘-march=’ switch > cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem > corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 > broadwell skylake skylake-avx512 cannonlake icelake-client icelake-server > bonnell atom silvermont slm goldmont goldmont-plus tremont knl knm x86-64 > eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 > opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona > bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 native > > It's quite bad because we can't distinguish whether bad -march= value was > passed to driver, or directly into a FE: > > ./cc1 -march=native2 /tmp/arch.c > cc1: error: bad value (‘native2’) for ‘-march=’ switch > cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem > corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 > broadwell skylake skylake-avx512 cannonlake icelake-client icelake-server > bonnell atom silvermont slm goldmont goldmont-plus tremont knl knm x86-64 > eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 > opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona > bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 native; did you mean > ‘native’? > > ./xgcc -B. -march=native2 /tmp/arch.c > cc1: error: bad value (‘native2’) for ‘-march=’ switch > cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem > corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 > broadwell skylake skylake-avx512 cannonlake ice
Re: [PATCH][RFC] Extend locations where to seach for Fortran pre-include.
On Thu, 22 Nov 2018, Martin Liška wrote: > > (Multilib suffixes on include directories for C are more or less an > > implementation detail of how fixed headers are arranged in the case where > > sysroot headers suffixes are used; they aren't really expected to be a > > stable interface such that third-party software might install anything > > using them, but I'm not sure if this preinclude is meant to come from > > external software or be installed by GCC. > > It will come from glibc-devel package, and it's expected to be installed in: > usr/include/finclude/ for 64-bit header > and /usr/include/finclude/32/ for the 32-bit header. So, to be clear, is that /finclude/$($CC $CFLAGS $CPPFLAGS -print-multi-directory) ? (Where glibc would be what uses the $CC $CFLAGS $CPPFLAGS -print-multi-directory to determine where to install the file.) If so, you need to make sure that all of those pieces are properly used. * The sysroot and headers suffix in the case of a sysrooted toolchain. (Sysroot headers suffixes are for e.g. the case of a toolchain with both glibc and uClibc multilibs, so needing multiple sets of headers. Most toolchains with multiple sysroots using the same libc only need a single set of headers and don't use sysroot headers suffixes, only sysroot suffixes (non-headers), with appropriate arrangements being made for all the per-multilib headers, such as gnu/lib-names-*.h and gnu/stubs-*.h, to be copied into the common include directory.) * The native system header directory (which is /include not /usr/include for GNU Hurd, for example; see config.gcc). * Then finclude with the multilib (non-OS) suffix. And you need to consider what's right for non-sysrooted toolchains. If native, the above is right, but without the sysroot-related components. But what about a non-sysrooted cross toolchain? Certainly using the native directory would be wrong there. Also, what's right in the multiarch directory arrangements case - should it be //finclude instead? Would one of the Debian / Ubuntu GCC maintainers like to comment? Are there corresponding versions with /usr/local/include (LOCAL_INCLUDE_DIR, in general), before those with NATIVE_SYSTEM_HEADER_DIR? Even in the driver, the list of directories in cppdefault.c should at least serve as a guide to which directories you want to search and which get sysroots added (of course, the C++-specific ones are irrelevant here). -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH v2 1/3] Allow memory operands for PTWRITE
On Thu, Nov 22, 2018 at 5:24 AM Richard Biener wrote: > > On Wed, Nov 21, 2018 at 4:38 PM H.J. Lu wrote: > > > > On Wed, Nov 21, 2018 at 6:48 AM Richard Biener > > wrote: > > > > > > On Tue, Nov 20, 2018 at 7:36 PM Andi Kleen wrote: > > > > > > > > On Tue, Nov 20, 2018 at 11:53:15AM +0100, Richard Biener wrote: > > > > > On Fri, Nov 16, 2018 at 8:07 AM Uros Bizjak wrote: > > > > > > > > > > > > On Fri, Nov 16, 2018 at 4:57 AM Andi Kleen > > > > > > wrote: > > > > > > > > > > > > > > From: Andi Kleen > > > > > > > > > > > > > > The earlier PTWRITE builtin definition was unnecessarily > > > > > > > restrictive, > > > > > > > only allowing register input to PTWRITE. The instruction actually > > > > > > > supports memory operands too, so allow that too. > > > > > > > > > > > > > > gcc/: > > > > > > > > > > > > > > 2018-11-15 Andi Kleen > > > > > > > > > > > > > > * config/i386/i386.md: Allow memory operands to ptwrite. > > > > > > > > > > > > OK. > > > > > > > > > > Btw, I wonder why the ptwrite builtin is in SPECIAL_ARGS2 > > > > > commented as /* Add all special builtins with variable number of > > > > > operands. */? > > > > > > > > i think i put it in the same place as a similar builtin. AFAIK > > > > those others don't have variable arguments either, so the comment > > > > may be wrong? > > > > > > No idea... > > > > > > > > > > > > > On the GIMPLE level this builtin also has quite some (bad) effects on > > > > > alias analysis and any related optimization (vectorization, etc.). > > > > > I'll have > > > > > to see where the instrumenting pass now resides. > > > > > > > > It's fairly late now. > > > > > > OK, saw that. > > > > > > > Any suggestions for improvements? At some point I removed the edges > > > > like the old MPX builtins to minimize memory usage, but that was > > > > removed during an earlier review cycle. > > > > > > I guess it's fine now - it will have an effect on TER, limiting its > > > ability > > > a bit, but otherwise the builtin only lives up to RTL expansion where > > > it becomes the UNSPEC_VOLATILE. As said, instrumenting on > > > RTL would be an improvement, I think HJ might be able to help with that. > > > > > > > What are the issues? > > AFAIU inserting ptwrite only for values where the location allows a > variable to be infered from debug location lists _and_ properly > extend the location range to cover the ptwrite instruction itself if > the value dies otherwise (like for stores). > > See the thread about the instrumentation pass which currently > is implemented on GIMPLE. What are the issues for instrumenting as an RTL pass? -- H.J.
Re: [PATCH] add simple attribute introspection
On Thu, 22 Nov 2018 at 14:09, Rainer Orth wrote: > > Hi Christophe, > > > On Sat, 10 Nov 2018 at 00:43, Martin Sebor wrote: > >> > >> On 11/09/2018 12:59 PM, Jeff Law wrote: > >> > On 10/31/18 10:27 AM, Martin Sebor wrote: > >> >> Ping: https://gcc.gnu.org/ml/gcc-patches/2018-10/msg01473.html > >> >> > >> >> With the C++ bits approved I'm still looking for a review/approval > >> >> of the remaining changes: the C front end and the shared c-family > >> >> handling of the new built-in. > >> > I thought I acked those with just a couple minor doc nits: > >> > >> I don't see a formal approval for the rest in my Inbox or in > >> the archive. > >> > >> >> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > >> >> index 8ffb0cd..dcf4747 100644 > >> >> --- a/gcc/doc/extend.texi > >> >> +++ b/gcc/doc/extend.texi > >> >> @@ -2649,8 +2649,9 @@ explicit @code{externally_visible} attributes > >> > are still necessary. > >> >> @cindex @code{flatten} function attribute > >> >> Generally, inlining into a function is limited. For a function > >> > marked with > >> >> this attribute, every call inside this function is inlined, if > >> >> possible. > >> >> -Whether the function itself is considered for inlining depends on its > >> > size and > >> >> -the current inlining parameters. > >> >> +Functions declared with attribute @code{noinline} and similar are not > >> >> +inlined. Whether the function itself is considered for inlining > >> >> depends > >> >> +on its size and the current inlining parameters. > >> > Guessing this was from another doc patch that I think has already been > >> > approved > >> > >> Yes. It shouldn't be in the latest patch at the link above. > >> > >> >> @@ -11726,6 +11728,33 @@ check its compatibility with @var{size}. > >> >> > >> >> @end deftypefn > >> >> > >> >> +@deftypefn {Built-in Function} bool __builtin_has_attribute > >> > (@var{type-or-expression}, @var{attribute}) > >> >> +The @code{__builtin_has_attribute} function evaluates to an integer > >> > constant > >> >> +expression equal to @code{true} if the symbol or type referenced by > >> >> +the @var{type-or-expression} argument has been declared with > >> >> +the @var{attribute} referenced by the second argument. Neither > >> >> argument > >> >> +is valuated. The @var{type-or-expression} argument is subject to the > >> > same > >> > s/valuated/evaluated/ ? > >> > >> This should also be fixed in the latest patch at the link above. > >> > >> > Did the implementation change significantly requiring another review > >> > iteration? > >> > >> I don't think it changed too significantly between the last two > >> revisions but I don't have a record of anyone having approved > >> the C FE and the middle-end bits. (Sorry if I missed it.) Other > >> than this response from you all I see in the archive is this: > >> > >>https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00606.html > >> > >> Please let me if the last revision is okay to commit. > >> > > > > Hi, > > > > It seems you committed this yesterday as r266335, and I have noticed > > new failures: > > on both aarch64 and arm: > > FAIL: c-c++-common/builtin-has-attribute-3.c -Wc++-compat (test for > > excess errors) > > > > gcc.log says: > > Excess errors: > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:11:25: error: > > alignment for 'faligned_1' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:12:25: error: > > alignment for 'faligned_2' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:39:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:40:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:47:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:48:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:50:3: error: > > size of array 'Assert' is negative > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:52:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:52:3: error: > > size of array 'Assert' is negative > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:53:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:56:3: error: > > size of array 'Assert' is negative > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:58:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:59:3: error: > > alignment for '__builtin_has_attribute_tmp.' must be at least 4 > > /gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:59:3: error: >
Re: [PATCH, ARM, ping3] PR85434: Prevent spilling of stack protector guard's address on ARM
Thanks Kyrill. Committed the attached patch. Best regards, Thomas On Wed, 21 Nov 2018 at 16:06, Kyrill Tkachov wrote: > > Hi Thomas, > > Sorry for the delay. > > On 16/11/18 14:56, Thomas Preudhomme wrote: > > Ping? > > > > Best regards, > > > > Thomas > > > > On Sat, 10 Nov 2018 at 15:07, Thomas Preudhomme > > wrote: > >> Thanks Kyrill. > >> > >> Updated patch in attachment. Best regards, > >> > >> Thomas > >> On Thu, 8 Nov 2018 at 15:53, Kyrill Tkachov > >> wrote: > >>> Hi Thomas, > >>> > >>> On 08/11/18 09:52, Thomas Preudhomme wrote: > Ping? > > Best regards, > > Thomas > > On Thu, 1 Nov 2018 at 16:03, Thomas Preudhomme > wrote: > > Ping? > > > > Best regards, > > > > Thomas > > On Fri, 26 Oct 2018 at 22:41, Thomas Preudhomme > > wrote: > >> Hi, > >> > >> Please find updated patch to fix PR85434: spilling of stack protector > >> guard's address on ARM. Quite a few changes have been made to the ARM > >> part since last round of review so I think it makes more sense to > >> review it anew. Ran bootstrap + regression testsuite + glibc build + > >> glibc regression testsuite for Arm and Thumb-2 and bootstrap + > >> regression testsuite for Thumb-1. GCC's regression testsuite was run > >> in 3 configurations in all those cases: > >> > >> - default configuration (no RUNTESTFLAGS) > >> - with -fstack-protector-all > >> - with -fPIC -fstack-protector-all (to exercise both codepath in stack > >> protector's split code) > >> > >> None of this show any regression beyond some new scan fail with > >> -fstack-protector-all or -fPIC due to unexpected code sequence for the > >> testcases concerned and some guality swing due to less optimization > >> with new stack protector on. > >> > >> Patch description and ChangeLog below. > >> > >> In case of high register pressure in PIC mode, address of the stack > >> protector's guard can be spilled on ARM targets as shown in PR85434, > >> thus allowing an attacker to control what the canary would be compared > >> against. ARM does lack stack_protect_set and stack_protect_test insn > >> patterns, defining them does not help as the address is expanded > >> regularly and the patterns only deal with the copy and test of the > >> guard with the canary. > >> > >> This problem does not occur for x86 targets because the PIC access and > >> the test can be done in the same instruction. Aarch64 is exempt too > >> because PIC access insn pattern are mov of UNSPEC which prevents it > >> from > >> the second access in the epilogue being CSEd in cse_local pass with the > >> first access in the prologue. > >> > >> The approach followed here is to create new "combined" set and test > >> standard pattern names that take the unexpanded guard and do the set or > >> test. This allows the target to use an opaque pattern (eg. using > >> UNSPEC) > >> to hide the individual instructions being generated to the compiler and > >> split the pattern into generic load, compare and branch instruction > >> after register allocator, therefore avoiding any spilling. This is here > >> implemented for the ARM targets. For targets not implementing these new > >> standard pattern names, the existing stack_protect_set and > >> stack_protect_test pattern names are used. > >> > >> To be able to split PIC access after register allocation, the functions > >> had to be augmented to force a new PIC register load and to control > >> which register it loads into. This is because sharing the PIC register > >> between prologue and epilogue could lead to spilling due to CSE again > >> which an attacker could use to control what the canary gets compared > >> against. > >> > >> ChangeLog entries are as follows: > >> > >> *** gcc/ChangeLog *** > >> > >> 2018-10-26 Thomas Preud'homme > >> > >> * target-insns.def (stack_protect_combined_set): Define new standard > >> pattern name. > >> (stack_protect_combined_test): Likewise. > >> * cfgexpand.c (stack_protect_prologue): Try new > >> stack_protect_combined_set pattern first. > >> * function.c (stack_protect_epilogue): Try new > >> stack_protect_combined_test pattern first. > >> * config/arm/arm.c (require_pic_register): Add pic_reg and compute_now > >> parameters to control which register to use as PIC register and force > >> reloading PIC register respectively. Insert in the stream of insns if > >> possible. > >> (legitimize_pic_address): Expose above new parameters in prototype and > >> adapt recursive calls accordingly. Use pic_reg if non null instead of > >> cached one. > >> (arm_load_pic_register): Add pic_reg parameter and use it if non null. > >> (arm_legitimize_address): Adapt to new legiti
Re: [PATCH] Fix option values for -march.
On 11/22/18 3:28 PM, Uros Bizjak wrote: > On Thu, Nov 22, 2018 at 3:22 PM Martin Liška wrote: >> >> On 11/22/18 3:04 PM, Uros Bizjak wrote: >>> On Thu, Nov 22, 2018 at 3:00 PM Martin Liška wrote: On 11/22/18 2:51 PM, Uros Bizjak wrote: > On Thu, Nov 22, 2018 at 2:43 PM Martin Liška wrote: > >> The patch makes clear we'll not diverge number of elements in >> processor_names and the corresponding enum. Plus I fixed >> -march=znver2 native as valid options that were not listed. >> >> Patch survives tests and bootstrap on x86_64-linux-gnu. >> >> Ready for trunk? >> Martin >> >> gcc/ChangeLog: >> >> 2018-11-22 Martin Liska >> >> * common/config/i386/i386-common.c (processor_names): Add >> static assert and add missing "znver2". >> (ix86_get_valid_option_values): Add checking assert for null >> values and add "native" value if feasible. >> * config/i386/i386.h: Do not declare size of processor_names. >> --- >> gcc/common/config/i386/i386-common.c | 26 ++ >> gcc/config/i386/i386.h | 2 +- >> 2 files changed, 23 insertions(+), 5 deletions(-) > > +/* Guarantee that the array is aligned with henum processor_type. */ > +STATIC_ASSERT ((sizeof (processor_names) / sizeof (processor_names[0]) > + == PROCESSOR_max)); > > Please use ARRAY_SIZE macro here. Fixed, it's definitely nicer! > > +#ifdef HAVE_LOCAL_CPU_DETECT > + /* Add also "native" as possible value. */ > + v.safe_push ("native"); > +#endif > > "native" is processed by the driver and this option is never passed to > cc1. But it's a target common hook that is used both from driver and cc1: $ ./xgcc -B. --completion=-march=nat -march=native (Starting program: /dev/shm/objdir/gcc/xgcc -B. --completion=-march=nat) $ ./xgcc -B. --help=target | grep native ... i386 i486 i586 pentium lakemont pentium-mmx winchip-c6 winchip2 c3 samuel-2 c3-2 nehemiah c7 esther i686 pentiumpro pentium2 pentium3 pentium3m pentium-m pentium4 pentium4m prescott nocona core2 nehalem corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont knl knm intel geode k6 k6-2 k6-3 athlon athlon-tbird athlon-4 athlon-xp athlon-mp x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 generic native (Starting program: /dev/shm/objdir/gcc/cc1 -quiet -iprefix /dev/shm/objdir/gcc/../lib64/gcc/x86_64-pc-linux-gnu/9.0.0/ -isystem ./include -isystem ./include-fixed help-dummy -quiet -dumpbase help-dummy -mtune=generic -march=x86-64 -auxbase help-dummy --help=target -o /tmp/cc5NWl4E.s) So should be fine. >>> >>> Is -march=native accepted or rejected by cc1? It should be rejected. >> >> It's rejected, but we provide bad hints: >> >> $ ./cc1 -march=native /tmp/arch.c >> cc1: error: bad value (‘native’) for ‘-march=’ switch >> cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem >> corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell >> core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client >> icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont >> knl knm x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 >> nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx >> amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 >> native >> >> It's quite bad because we can't distinguish whether bad -march= value was >> passed to driver, or directly into a FE: >> >> ./cc1 -march=native2 /tmp/arch.c >> cc1: error: bad value (‘native2’) for ‘-march=’ switch >> cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem >> corei7 westmere sandybridge corei7-avx ivybridge core-avx-i haswell >> core-avx2 broadwell skylake skylake-avx512 cannonlake icelake-client >> icelake-server bonnell atom silvermont slm goldmont goldmont-plus tremont >> knl knm x86-64 eden-x2 nano nano-1000 nano-2000 nano-3000 nano-x2 eden-x4 >> nano-x4 k8 k8-sse3 opteron opteron-sse3 athlon64 athlon64-sse3 athlon-fx >> amdfam10 barcelona bdver1 bdver2 bdver3 bdver4 znver1 znver2 btver1 btver2 >> native; did you mean ‘native’? >> >> ./xgcc -B. -march=native2 /tmp/arch.c >> cc1: error: bad value (‘native2’) for ‘-march=’ switch >> cc1: note: valid arguments to ‘-march=’ switch are: nocona core2 nehalem >> corei7 westmere sandybridge corei7-avx ivybridge
Re: [RFC, RFT PATCH, mingw]: Do not cancel vzeroupper when XMM registers live across call
On Wed, Nov 21, 2018 at 9:26 AM Uros Bizjak wrote: > Before vzeroupper gets emitted before function call, the compiler > checks if if there are live call-saved SSE registers at the insertion > point. This functionality is intended to handle Windows ABI, so we > don't clear upper parts of the XMM registers that live across the > call. > > However, the called function saves only lower 128bit part of the XMM > register, so it seems that wider modes have to be saved and restored > by the caller function anyway. If this is the case, we don't have to > cancel vzeroupper insertion before the call. > > Attached patch removes this cancellation, since all other ABIs clobber > all XMM registers. > > 2018-21-11 Uros Bizjak > > * config/i386/i386.c (ix86_avx_emit_vzeroupper): Remove. > (ix86_emit_mode_set) : Emit vzeroupper here. > > The patch is untested, since I have no Windows target here. Daniel, > can you please review the above assumptions and test the patch on > Windows target? It is evident from the generated asm and the compiler source that only lower 128 bits of xmm registers are saved. Now committed to mainline SVN. Uros.
Re: [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach)
On 20 November 2018 22:54:49 CET, Julian Brown wrote: > >Previously posted upstream: >https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00826.html As said in https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00861.html + bool array_only_p = true; + /* Disallow duplicate bare variable references and multiple +subarrays of the same array here, but allow multiple components of +the same (e.g. derived-type) variable. For the latter, duplicate +components are detected elsewhere. */ + if (openacc && n->expr && n->expr->expr_type == EXPR_VARIABLE) + for (gfc_ref *ref = n->expr->ref; ref; ref = ref->next) + if (ref->type != REF_ARRAY) + array_only_p = false; Looks like you could break here when setting array_only_p to false. + if (array_only_p) + { + if (n->sym->mark) + gfc_error ("Symbol %qs present on multiple clauses at %L", + n->sym->name, &n->where); + else + n->sym->mark = 1; + } } + if (ptr && (region_type & ORT_ACC) != 0) + { + /* Turning a GOMP_MAP_ALWAYS_POINTER clause into a +GOMP_MAP_ATTACH clause after we have detected a case +that needs a GOMP_MAP_STRUCT mapping adding. As said: s/adding/added/ i think.
Re: [PATCH, ARM, ping3] PR85434: Prevent spilling of stack protector guard's address on ARM
I'm talking about the PIC access to the guard's variable. See for example the pr85434.c testcase contributed with this patch when compiled for aarch64 with -Os -fpic -march=armv8-a -fstack-protector-strong: (insn 227 226 228 33 (set (reg:DI 90) (high:DI (symbol_ref:DI ("_GLOBAL_OFFSET_TABLE_" "/data/dev/checkouts/private/linaro/gcc/gcc/testsuite/gcc.target/arm/pr85434.c":148:1 -1 (nil)) (insn 228 227 229 33 (set (reg/f:DI 244) (unspec:DI [ (mem/u/c:DI (lo_sum:DI (reg:DI 90) (symbol_ref:DI ("__stack_chk_guard") [flags 0xc0] )) [0 S8 A8]) ] UNSPEC_GOTSMALLPIC28K)) "/data/dev/checkouts/private/linaro/gcc/gcc/testsuite/gcc.target/arm/pr85434.c":148:1 -1 (expr_list:REG_EQUAL (symbol_ref:DI ("__stack_chk_guard") [flags 0xc0] ) (nil))) (insn 229 228 230 33 (parallel [ (set (reg:DI 245) (unspec:DI [ (mem/v/f/c:DI (plus:DI (reg/f:DI 85 virtual-stack-vars) (const_int -8 [0xfff8])) [4 D.3715+0 S8 A64]) (mem/v/f/c:DI (reg/f:DI 244) [4 __stack_chk_guard+0 S8 A64]) ] UNSPEC_SP_TEST)) (clobber (scratch:DI)) ]) "/data/dev/checkouts/private/linaro/gcc/gcc/testsuite/gcc.target/arm/pr85434.c":148:1 -1 (nil)) The unspec in insn 228 is not CSEd in my experiment despite the same instruction happening in the prologue to set the canary. In arm backend it was but the PIC access is of the form (mem (reg) (unspec offset)), ie the outermost rtx in the source is not an unspec. Best regards, Thomas On Wed, 21 Nov 2018 at 17:54, Segher Boessenkool wrote: > > On Fri, Nov 16, 2018 at 02:56:46PM +, Thomas Preudhomme wrote: > > In case of high register pressure in PIC mode, address of the stack > > protector's guard can be spilled on ARM targets as shown in PR85434, > > thus allowing an attacker to control what the canary would be compared > > against. ARM does lack stack_protect_set and stack_protect_test insn > > patterns, defining them does not help as the address is expanded > > regularly and the patterns only deal with the copy and test of the > > guard with the canary. > > > > This problem does not occur for x86 targets because the PIC access and > > the test can be done in the same instruction. Aarch64 is exempt too > > because PIC access insn pattern are mov of UNSPEC which prevents it from > > the second access in the epilogue being CSEd in cse_local pass with the > > first access in the prologue. > > The unspecs are not CSEd because they are *different* unspecs (UNSPEC_SP_SET > vs. UNSPEC_SP_TEST; they have different args too, different number of args > even). Two the same unspecs can be CSEd just fine. > > > Segher
Re: Stream TREE_TYPE of TYPE_DECLs again
> * g++.dg/lto/odr-2_0.C: Remove extra brace > > diff --git a/gcc/testsuite/g++.dg/lto/odr-2_0.C > b/gcc/testsuite/g++.dg/lto/odr-2_0.C > index 222fa2c1db..3ebb49efa2 100644 > --- a/gcc/testsuite/g++.dg/lto/odr-2_0.C > +++ b/gcc/testsuite/g++.dg/lto/odr-2_0.C > @@ -1,5 +1,5 @@ > // { dg-lto-do link } > -// { dg-lto-options { { -O0 -flto } } > +// { dg-lto-options { -O0 -flto } } Doesn't this make the testcase to be run twice, once with -O0 and second time with -flto rather than running it once with -O0 -flto? Honza > enum a {} b; // { dg-lto-warning "6: type 'a' violates the C\\+\\+ One > Definition Rule" } > int > main(void) > -- > 2.19.1 > > -- > Andreas Schwab, SUSE Labs, sch...@suse.de > GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 > "And now for something completely different."
Re: Stream TREE_TYPE of TYPE_DECLs again
On Nov 22 2018, Jan Hubicka wrote: >> * g++.dg/lto/odr-2_0.C: Remove extra brace >> >> diff --git a/gcc/testsuite/g++.dg/lto/odr-2_0.C >> b/gcc/testsuite/g++.dg/lto/odr-2_0.C >> index 222fa2c1db..3ebb49efa2 100644 >> --- a/gcc/testsuite/g++.dg/lto/odr-2_0.C >> +++ b/gcc/testsuite/g++.dg/lto/odr-2_0.C >> @@ -1,5 +1,5 @@ >> // { dg-lto-do link } >> -// { dg-lto-options { { -O0 -flto } } >> +// { dg-lto-options { -O0 -flto } } > > Doesn't this make the testcase to be run twice, once with -O0 and second > time with -flto rather than running it once with -O0 -flto? It matches what odr-3_0.C does. Andreas. -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."
Re: [PATCH 2/3][GCC][AARCH64] Add new -mbranch-protection option to combine pointer signing and BTI
On 11/12/18 6:24 PM, Sudakshina Das wrote: > Hi Sam > > On 02/11/18 17:31, Sam Tebbs wrote: >> Hi all, >> >> The -mbranch-protection option combines the functionality of >> -msign-return-address and the BTI features new in Armv8.5 to better reflect >> their relationship. This new option therefore supersedes and deprecates the >> existing -msign-return-address option. >> >> -mbranch-protection=[none|standard|] - Turns on different types of >> branch >> protection available where: >> >>* "none": Turn of all types of branch protection >>* "standard" : Turns on all the types of protection to their >> respective >> standard levels. >>* can be "+" separated protection types: >> >> * "bti" : Branch Target Identification Mechanism. >> * "pac-ret{+leaf+b-key}": Return Address Signing. The default return >>address signing is enabled by signing functions that save the return >>address to memory (non-leaf functions will practically always do this) >>using the a-key. The optional tuning arguments allow the user to >>extend the scope of return address signing to include leaf functions >>and to change the key to b-key. The tuning arguments must proceed the >>protection type "pac-ret". >> >> Thus -mbranch-protection=standard -> -mbranch-protection=bti+pac-ret. >> >> Its mapping to -msign-return-address is as follows: >> >>* -mbranch-protection=none -> -msign-return-address=none >>* -mbranch-protection=standard -> -msign-return-address=leaf >>* -mbranch-protection=pac-ret -> -msign-return-address=non-leaf >>* -mbranch-protection=pac-ret+leaf -> -msign-return-address=all >> >> This patch implements the option's skeleton and the "none", "standard" and >> "pac-ret" types (along with its "leaf" subtype). >> >> The previous patch in this series is here: >> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00103.html >> >> Bootstrapped successfully and tested on aarch64-none-elf with no regressions. >> >> OK for trunk? >> > Thank for doing this. I am not a maintainer so you will need a > maintainer's approval. Only nit, that I would add is that it would > be good to have more test coverage, specially for the new parsing > functions that have been added and the errors that are added. > > Example checking a few valid and invalid combinations of the options > like: > -mbranch-protection=pac-ret -mbranch-protection=none //disables > everything > -mbranch-protection=leaf //errors out > -mbranch-protection=none+pac-ret //errors out > ... etc > Also instead of removing all the old deprecated options, you can keep > one (or a copy of one) to check for the deprecated warning. Hi Sudi, Thanks for the feedback, I've incorporated your suggestions into the attached patch and the updated changelog. gcc/ChangeLog: 2018-11-22 Sam Tebbs * config/aarch64/aarch64.c (BRANCH_PROTEC_STR_MAX, aarch64_parse_branch_protection, struct aarch64_branch_protec_type, aarch64_handle_no_branch_protection, aarch64_handle_standard_branch_protection, aarch64_validate_mbranch_protection, aarch64_handle_pac_ret_protection, aarch64_handle_attr_branch_protection, accepted_branch_protection_string, aarch64_pac_ret_subtypes, aarch64_branch_protec_types, aarch64_handle_pac_ret_leaf): Define. (aarch64_override_options_after_change_1): Add check for accepted_branch_protection_string. (aarch64_override_options): Add check for accepted_branch_protection_string. (aarch64_option_save): Save accepted_branch_protection_string. (aarch64_option_restore): Save accepted_branch_protection_string. * config/aarch64/aarch64.c (aarch64_attributes): Add branch-protection. * config/aarch64/aarch64.opt: Add mbranch-protection. Deprecate msign-return-address. * doc/invoke.texi: Add mbranch-protection. gcc/testsuite/ChangeLog: 2018-11-22 Sam Tebbs * (gcc.target/aarch64/return_address_sign_1.c, gcc.target/aarch64/return_address_sign_2.c, gcc.target/aarch64/return_address_sign_3.c (__attribute__)): Change option to -mbranch-protection. * gcc.target/aarch64/(branch-protection-option.c, branch-protection-option-2.c, branch-protection-attr.c, branch-protection-attr-2.c): New file. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0d89ba27e4a7a02903d6cb3de6c19b097cb84d16..c938cdf93f255949969a7fe7a4f8a2c18bfa6115 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -183,6 +183,12 @@ bool aarch64_pcrelative_literal_loads; /* Global flag for whether frame pointer is enabled. */ bool aarch64_use_frame_pointer; +#define BRANCH_PROTEC_STR_MAX 255 +char *accepted_branch_protection_string = NULL; + +static enum aarch64_parse_opt_result +aarch64_parse_branch_protectio
Re: [PATCH v2 1/3] Allow memory operands for PTWRITE
> > See the thread about the instrumentation pass which currently > > is implemented on GIMPLE. > > What are the issues for instrumenting as an RTL pass? It just needs to be completely rewritten and might be more complicated. And it might be more difficult to avoid redundant instrumentation without SSA. It might also be more fragile as RTL changes by target, although right now it's only used on x86, so that wouldn't be a major concern. -Andi
LRA patch for PR87718
The following patch fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87718 The patch adds a special treatment for moves with a hard register in register cost and class calculation. The patch was bootstrapped and tested on x86-64 and ppc64. I found two testsuite regressions because of the patch. The expected generated code for PR82361 test is too specific. GCC with the patch generates the same quality code but with a different hard register on x86-64. So I just changed the test for PR82361. Another test is for ppc64. I think the expected generated code for this test is wrong. I'll submit a changed test for a discussion later. Although I spent much time on the solution and I think it is the right one, the patch is in very sensitive area of RA and may affect expected code generation for many targets. I am ready to work on the new regressions as soon as they are found. The patch was committed as rev. 260385. Index: ChangeLog === --- ChangeLog (revision 266384) +++ ChangeLog (working copy) @@ -1,3 +1,10 @@ +2018-11-22 Vladimir Makarov + + PR rtl-optimization/87718 + * ira-costs.c: Remove trailing white-spaces. + (record_operand_costs): Add a special treatment for moves + involving a hard register. + 2018-11-22 Uros Bizjak * config/i386/i386.c (ix86_avx_emit_vzeroupper): Remove. Index: ira-costs.c === --- ira-costs.c (revision 266155) +++ ira-costs.c (working copy) @@ -1257,7 +1257,7 @@ record_address_regs (machine_mode mode, add_cost = (move_in_cost[i][rclass] * scale) / 2; if (INT_MAX - add_cost < pp_costs[k]) pp_costs[k] = INT_MAX; - else + else pp_costs[k] += add_cost; } } @@ -1283,10 +1283,100 @@ record_operand_costs (rtx_insn *insn, en { const char *constraints[MAX_RECOG_OPERANDS]; machine_mode modes[MAX_RECOG_OPERANDS]; - rtx ops[MAX_RECOG_OPERANDS]; rtx set; int i; + if ((set = single_set (insn)) != NULL_RTX + /* In rare cases the single set insn might have less 2 operands + as the source can be a fixed special reg. */ + && recog_data.n_operands > 1 + && recog_data.operand[0] == SET_DEST (set) + && recog_data.operand[1] == SET_SRC (set)) +{ + int regno, other_regno; + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + + if (GET_CODE (dest) == SUBREG + && known_eq (GET_MODE_SIZE (GET_MODE (dest)), + GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest) + dest = SUBREG_REG (dest); + if (GET_CODE (src) == SUBREG + && known_eq (GET_MODE_SIZE (GET_MODE (src)), + GET_MODE_SIZE (GET_MODE (SUBREG_REG (src) + src = SUBREG_REG (src); + if (REG_P (src) && REG_P (dest) + && (((regno = REGNO (src)) >= FIRST_PSEUDO_REGISTER + && (other_regno = REGNO (dest)) < FIRST_PSEUDO_REGISTER) + || ((regno = REGNO (dest)) >= FIRST_PSEUDO_REGISTER + && (other_regno = REGNO (src)) < FIRST_PSEUDO_REGISTER))) + { + machine_mode mode = GET_MODE (SET_SRC (set)); + cost_classes_t cost_classes_ptr = regno_cost_classes[regno]; + enum reg_class *cost_classes = cost_classes_ptr->classes; + reg_class_t rclass, hard_reg_class, pref_class; + int cost, k; + bool dead_p = find_regno_note (insn, REG_DEAD, REGNO (src)); + + hard_reg_class = REGNO_REG_CLASS (other_regno); + i = regno == (int) REGNO (src) ? 1 : 0; + for (k = cost_classes_ptr->num - 1; k >= 0; k--) + { + rclass = cost_classes[k]; + cost = ((i == 0 + ? ira_register_move_cost[mode][hard_reg_class][rclass] + : ira_register_move_cost[mode][rclass][hard_reg_class]) + * frequency); + op_costs[i]->cost[k] = cost; + /* If we have assigned a class to this allocno in our + first pass, add a cost to this alternative + corresponding to what we would add if this allocno + were not in the appropriate class. */ + if (pref) + { + if ((pref_class = pref[COST_INDEX (regno)]) == NO_REGS) + op_costs[i]->cost[k] + += ((i == 0 ? ira_memory_move_cost[mode][rclass][0] : 0) + + (i == 1 ? ira_memory_move_cost[mode][rclass][1] : 0) + * frequency); + else if (ira_reg_class_intersect[pref_class][rclass] + == NO_REGS) + op_costs[i]->cost[k] + += (ira_register_move_cost[mode][pref_class][rclass] + * frequency); + } + /* If this insn is a single set copying operand 1 to + operand 0 and one operand is an allocno with the + other a hard reg or an allocno that prefers a hard + register that is in its own register class then we + may want to adjust the cost of that register class to + -1. + + Avoid the adjustment if the source does not die to + avoid stressing of register allocator by preferencing + two colliding registers into single class. */ + if (dead_p + && TEST_HARD_REG_BIT (reg_class_contents[rclass], other_regno)
Re: Stream TREE_TYPE of TYPE_DECLs again
On November 22, 2018 5:30:14 PM GMT+01:00, Jan Hubicka wrote: >> * g++.dg/lto/odr-2_0.C: Remove extra brace >> >> diff --git a/gcc/testsuite/g++.dg/lto/odr-2_0.C >b/gcc/testsuite/g++.dg/lto/odr-2_0.C >> index 222fa2c1db..3ebb49efa2 100644 >> --- a/gcc/testsuite/g++.dg/lto/odr-2_0.C >> +++ b/gcc/testsuite/g++.dg/lto/odr-2_0.C >> @@ -1,5 +1,5 @@ >> // { dg-lto-do link } >> -// { dg-lto-options { { -O0 -flto } } >> +// { dg-lto-options { -O0 -flto } } > >Doesn't this make the testcase to be run twice, once with -O0 and >second >time with -flto rather than running it once with -O0 -flto? Yes. >Honza > >> enum a {} b; // { dg-lto-warning "6: type 'a' violates the C\\+\\+ >One Definition Rule" } >> int >> main(void) >> -- >> 2.19.1 >> >> -- >> Andreas Schwab, SUSE Labs, sch...@suse.de >> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA >B9D7 >> "And now for something completely different."
Re: Improve relocation
On Thu, 22 Nov 2018, Jonathan Wakely wrote: On 26/10/18 14:02 +0200, Marc Glisse wrote: Index: libstdc++-v3/include/bits/stl_iterator.h === --- libstdc++-v3/include/bits/stl_iterator.h(revision 265522) +++ libstdc++-v3/include/bits/stl_iterator.h(working copy) @@ -59,20 +59,24 @@ #ifndef _STL_ITERATOR_H #define _STL_ITERATOR_H 1 #include #include #include #include #if __cplusplus > 201402L I think this should be >= 201103L, no? Ah, yes, I guess I started from a copy-paste and got interrupted before updating it :-( OK for trunk (with that #if changed, if appropriate). Thanks. Re-tested and committed. Do you have an opinion on the following item, which may be a bug in the current code? "For __relocate_a_1, I should also test if copying, ++ and != are noexcept, but I wanted to ask first because there might be restrictions on what iterators are allowed to do, even if I didn't see them." -- Marc Glisse
Re: Stream TREE_TYPE of TYPE_DECLs again
> On November 22, 2018 5:30:14 PM GMT+01:00, Jan Hubicka wrote: > >>* g++.dg/lto/odr-2_0.C: Remove extra brace > >> > >> diff --git a/gcc/testsuite/g++.dg/lto/odr-2_0.C > >b/gcc/testsuite/g++.dg/lto/odr-2_0.C > >> index 222fa2c1db..3ebb49efa2 100644 > >> --- a/gcc/testsuite/g++.dg/lto/odr-2_0.C > >> +++ b/gcc/testsuite/g++.dg/lto/odr-2_0.C > >> @@ -1,5 +1,5 @@ > >> // { dg-lto-do link } > >> -// { dg-lto-options { { -O0 -flto } } > >> +// { dg-lto-options { -O0 -flto } } > > > >Doesn't this make the testcase to be run twice, once with -O0 and > >second > >time with -flto rather than running it once with -O0 -flto? > > Yes. Actually it would be useful to have ODR tested with optimization on because streaming is somewhat optimization level specific. I will rework the testcases today so they do not need dg-lto-options Honza
Re: [PATCH 1/2][libbacktrace] Handle realloc returning NULL if size == 0
On Thu, 22 Nov 2018, Tom de Vries wrote: > Hi, > > If realloc is called with size 0, realloc can return NULL. Note that, as of C17, realloc with size 0 is marked as an obsolescent feature (because of inconsistencies between implementations regarding whether the old object is deallocated). So it would be advisable for code intended to be portable to avoid calling realloc with size 0 at all. -- Joseph S. Myers jos...@codesourcery.com
Re: Improve relocation
On 22/11/18 19:10 +0100, Marc Glisse wrote: On Thu, 22 Nov 2018, Jonathan Wakely wrote: On 26/10/18 14:02 +0200, Marc Glisse wrote: Index: libstdc++-v3/include/bits/stl_iterator.h === --- libstdc++-v3/include/bits/stl_iterator.h(revision 265522) +++ libstdc++-v3/include/bits/stl_iterator.h(working copy) @@ -59,20 +59,24 @@ #ifndef _STL_ITERATOR_H #define _STL_ITERATOR_H 1 #include #include #include #include #if __cplusplus > 201402L I think this should be >= 201103L, no? Ah, yes, I guess I started from a copy-paste and got interrupted before updating it :-( OK for trunk (with that #if changed, if appropriate). Thanks. Re-tested and committed. Do you have an opinion on the following item, which may be a bug in the current code? "For __relocate_a_1, I should also test if copying, ++ and != are noexcept, but I wanted to ask first because there might be restrictions on what iterators are allowed to do, even if I didn't see them." I decided to postpone thinking about that :-) In general iterators can throw on those operations. But for container iterators copy construction of "a returned iterator" never throws. I think that means that copying a singular iterator is allowed to throw, but no container member functions ever return singular iterators. Increment and decrement could throw, but for our implementation they don't (and so far __relocate_a_1 is only used with pointers, or vector or deque iterators, right?) Since we know we're using valid iterator ranges (because we choose the begin and end iterators of the ranges being relocated) it should also be safe to assume we never increment a past-the-end iterator or anything else that might give an iterator a reasom to throw. Is that good enough, or do you want to make __relocate_a_1 general enough to work with arbitrary iterators?
Re: [PATCH v3] Add sinh(atanh(x)) and cosh(atanh(x)) optimizations
Hi, Sorry, but I am a little confused. On Wed, Nov 14, 2018 at 11:28 AM Wilco Dijkstra wrote: > > Hi, > > > > Indeed. After plotting the graph of both functions, it is very clear > > that this check isn't required. Sorry about that. > > It wouldn't be clear from the graph, you need to check that +0.0, -0.0, > out of range values, infinities, NaNs give the same answer before/after > your transformation. If so, then you don't need anything extra except > for unsafe-math-optimizations and no-math-errno (given errno handling > is changed). I checked it. They are all the same on x86_64: https://pastebin.com/e63FxDAy I even forced to call the glibc sinh and atanh, but use the sqrtsd instruction. But I do agree that there may be an arch that sets an errno for sinh or cosh but not for sqrt, implying in a unexpected behavior. Is the no-math-errno really necessary? > > > There can be NaNs and Infinities. For NaNs, take any input that is > > outside the [-1, 1] line. > > For Infinities, take x = -1, or x = 1. I think these must be 'honored' > > as to ensure compatibility with the original expression. > > The question is whether you get the same answer for these, not whether > you can end up with an infinity or NaN. The idea is that we optimize based > on the assumption there are no infinities or NaNs. FP operations can still > produce infinities or NaNs, the compiler just doesn't need to worry about > treating them correctly, and it's the programmer's reponsibility to ensure > they are not generated. > > > so I must check for > > !HONOR_SIGNED_ZEROS (type) && HONOR_NANS (type) && HONOR_INFINITIES (type) > > that is correct? Also, is it safe to remove the !finite_math_only with > > this, as now it is stated that the type supports infinity and NaNs? > > No that doesn't look quite right. First check whether the transformation > handles zero/inf/NaN correctly, if so you don't need any of this. > It does handle these correctly, as far as I am concerned. At least in IEEE 754. Richard Biener asked to add !flag_siged_zeros for documentation reasons but this is already covered by unsafe-math. I also checked the flags from the optimization page, but only unsafe math seems to be applicable. https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html So maybe just check for unsafe-math? > > > However, I am not sure if it is OK to remove unsafe-math-optimizations > > even if it enables > > finite_math_only because of the 2 ULP error. As stated in the first > > iteration, the user can be > > using a very precise math library that yields 0 ULP. > > Well 0 ULP would be an impossibility. Unsafe math seems reasonable since > it does behave slightly differently (including in terms of exception flags > set). > It's unfortunate GCC doesn't have clear definition of IEEE conformance > modes like various other compilers. > > Wilco > > >
Re: Improve relocation
On Thu, 22 Nov 2018, Jonathan Wakely wrote: "For __relocate_a_1, I should also test if copying, ++ and != are noexcept, but I wanted to ask first because there might be restrictions on what iterators are allowed to do, even if I didn't see them." I decided to postpone thinking about that :-) In general iterators can throw on those operations. But for container iterators copy construction of "a returned iterator" never throws. I think that means that copying a singular iterator is allowed to throw, but no container member functions ever return singular iterators. Increment and decrement could throw, but for our implementation they don't (and so far __relocate_a_1 is only used with pointers, or vector or deque iterators, right?) Since we know we're using valid iterator ranges (because we choose the begin and end iterators of the ranges being relocated) it should also be safe to assume we never increment a past-the-end iterator or anything else that might give an iterator a reasom to throw. Indeed, I think currently we only use relocate with vector iterators, so it is not urgent to fix this. Is that good enough, or do you want to make __relocate_a_1 general enough to work with arbitrary iterators? Eventually yes, but now that I think about it, as long as relocation is a private implementation detail, all uses are likely to be with standard iterators that we control, so it may not matter. So now I agree with the "postpone" decision, thanks. -- Marc Glisse
Re: Stream TREE_TYPE of TYPE_DECLs again
On Thu, 22 Nov 2018 at 19:14, Jan Hubicka wrote: > > > On November 22, 2018 5:30:14 PM GMT+01:00, Jan Hubicka > > wrote: > > >>* g++.dg/lto/odr-2_0.C: Remove extra brace > > >> > > >> diff --git a/gcc/testsuite/g++.dg/lto/odr-2_0.C > > >b/gcc/testsuite/g++.dg/lto/odr-2_0.C > > >> index 222fa2c1db..3ebb49efa2 100644 > > >> --- a/gcc/testsuite/g++.dg/lto/odr-2_0.C > > >> +++ b/gcc/testsuite/g++.dg/lto/odr-2_0.C > > >> @@ -1,5 +1,5 @@ > > >> // { dg-lto-do link } > > >> -// { dg-lto-options { { -O0 -flto } } > > >> +// { dg-lto-options { -O0 -flto } } > > > > > >Doesn't this make the testcase to be run twice, once with -O0 and > > >second > > >time with -flto rather than running it once with -O0 -flto? > > > > Yes. > > Actually it would be useful to have ODR tested with optimization on > because streaming is somewhat optimization level specific. I will rework > the testcases today so they do not need dg-lto-options > At least the extra { or missing } causes Tcl errors: ERROR: tcl error sourcing /gcc/testsuite/g++.dg/lto/lto.exp. ERROR: unmatched open brace in list while executing "foreach op $tmp { set cmd [lindex $op 0] verbose "cmd is $cmd" if { [string match "dg-skip-if" $cmd] || [string match "dg-require-*" $cmd] } { ..." (procedure "lto-get-options-main" line 26) > Honza
Re: [PATCH] Add -fpad-source option
On Thu, Nov 22, 2018 at 11:00:13AM +0100, Jakub Jelinek wrote: > Ok for trunk if it passes bootstrap/regtest (so far just checked with > make check-gfortran RUNTESTFLAGS='dg.exp=pad_source*' > )? Note, it passed bootstrap/regtest on x86_64-linux. > 2018-11-22 Jakub Jelinek > > * lang.opt (fpad-source): New option. > * scanner.c (load_line): Don't pad fixed form lines if > !flag_pad_source. > * invoke.texi (-fno-pad-source): Document. > > * gfortran.dg/pad_source_1.f: New test. > * gfortran.dg/pad_source_2.f: New test. > * gfortran.dg/pad_source_3.f: New test. > * gfortran.dg/pad_source_4.f: New test. > * gfortran.dg/pad_source_5.f: New test. Jakub
RFA: patch for test PR70669 test
Today I committed a patch https://gcc.gnu.org/ml/gcc-patches/2018-11/msg01945.html But it makes gcc.target/powerpc/pr70669.c to fail. Here is the patch to fix the failure. The expected code assumes that variable r should get a general reg. I suspect the expectation is wrong. There are three possible choices for r: a general reg, a vsx reg, and memory. The cost of move of vsx to/from mem is 4, the cost of move of general reg of this mode to/from mem is 8, and the cost of move of general reg to/from vsx is 12. So approximately if r is assigned memory we have cost 8 * 2 (asm) + 4 * 2 (plus) = 24 (when we use memory at *q), if r is assigned to vsx the cost is 12*2(asm) + 4(r=*q) = 28, and if r is assigned a general reg the cost is 12 * 2 (plus) + 8 (r = *q) = 32. ira-costs should choose mem although it chose a general reg before my patch (it is a big topic why it chose a general reg. In brief, it is because some inherent drawbacks of the current cost calculation algorithm). The patch I recently submitted solves some drawbacks and memory is chosen for r. To generate the expected code with r in a general reg, I just decrease the number of getting r value into vsx reg by changing r + r onto -r. Is the following patch ok for trunk? Index: testsuite/ChangeLog === --- testsuite/ChangeLog (revision 266318) +++ testsuite/ChangeLog (working copy) @@ -1,3 +1,8 @@ +2018-11-22 Vladimir Makarov + + * gcc.target/powerpc/pr70669.c: Use unary minus instead of + addition. + 2018-11-20 Jan Hubicka PR ipa/87706 Index: testsuite/gcc.target/powerpc/pr70669.c === --- testsuite/gcc.target/powerpc/pr70669.c (revision 266318) +++ testsuite/gcc.target/powerpc/pr70669.c (working copy) @@ -13,7 +13,7 @@ void foo (TYPE *p, TYPE *q) #ifndef NO_ASM __asm__ (" # %0" : "+r" (r)); #endif - *p = r + r; + *p = -r; } /* { dg-final { scan-assembler "mfvsrd"} } */
[PATCH] Clean up stack-protector-guard handling in ix86_option_override_internal
Hi! While adjusting the PR85644 patch, I've noticed that while pretty much the whole ix86_option_override_internal works only with opts_set->x_ and opts->x_, the stack protector guard code was accessing global_options_set.x_ or using the macros that expand to global_options.x_ . While it is probably not breaking anything because the target attribute doesn't (at least so far) allow any of these options, it looks inconsistent with the rest. So, the following patch adjusts it to do what the rest of ix86_option_override_internal is doing. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-11-22 Jakub Jelinek * config/i386/i386.c (ix86_option_override_internal): For stack_protector_guard related options, use opts_set->x_ instead of global_options_set. and prefix options with opts->x_ . Move defaults for offset and reg into else block. --- gcc/config/i386/i386.c.jj 2018-11-22 10:44:32.767690142 +0100 +++ gcc/config/i386/i386.c 2018-11-22 11:35:08.294512244 +0100 @@ -4568,14 +4568,10 @@ ix86_option_override_internal (bool main opts->x_ix86_stack_protector_guard = SSP_GLOBAL; } -#ifdef TARGET_THREAD_SSP_OFFSET - ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; -#endif - - if (global_options_set.x_ix86_stack_protector_guard_offset_str) + if (opts_set->x_ix86_stack_protector_guard_offset_str) { char *endp; - const char *str = ix86_stack_protector_guard_offset_str; + const char *str = opts->x_ix86_stack_protector_guard_offset_str; errno = 0; int64_t offset; @@ -4595,20 +4591,16 @@ ix86_option_override_internal (bool main error ("%qs is not a valid offset " "in -mstack-protector-guard-offset=", str); - ix86_stack_protector_guard_offset = offset; + opts->x_ix86_stack_protector_guard_offset = offset; } +#ifdef TARGET_THREAD_SSP_OFFSET + else +opts->x_ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; +#endif - ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; - - /* The kernel uses a different segment register for performance - reasons; a system call would not have to trash the userspace - segment register, which would be expensive. */ - if (ix86_cmodel == CM_KERNEL) -ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; - - if (global_options_set.x_ix86_stack_protector_guard_reg_str) + if (opts_set->x_ix86_stack_protector_guard_reg_str) { - const char *str = ix86_stack_protector_guard_reg_str; + const char *str = opts->x_ix86_stack_protector_guard_reg_str; addr_space_t seg = ADDR_SPACE_GENERIC; /* Discard optional register prefix. */ @@ -4626,9 +4618,19 @@ ix86_option_override_internal (bool main if (seg == ADDR_SPACE_GENERIC) error ("%qs is not a valid base register " "in -mstack-protector-guard-reg=", - ix86_stack_protector_guard_reg_str); + opts->x_ix86_stack_protector_guard_reg_str); + + opts->x_ix86_stack_protector_guard_reg = seg; +} + else +{ + opts->x_ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; - ix86_stack_protector_guard_reg = seg; + /* The kernel uses a different segment register for performance +reasons; a system call would not have to trash the userspace +segment register, which would be expensive. */ + if (opts->x_ix86_cmodel == CM_KERNEL) + opts->x_ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; } /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ Jakub
[PATCH] Small formatting cleanup in i386.c
Hi! And while working on the previously posted patch, I've noticed a ton of lines with = at the end of line rather than at the start of next one (I think = at the end of line is fine only for array initializers). Bootstrapped/regtested on x86_64-linux, ok for trunk? 2018-11-22 Jakub Jelinek * config/i386/i386.c (ix86_option_override_internal, ix86_can_inline_p, classify_argument, construct_container, ix86_expand_prologue, ix86_expand_split_stack_prologue, ix86_expand_carry_flag_compare, expand_set_or_movmem_via_loop, expand_setmem_epilogue_via_loop, promote_duplicated_reg, ix86_expand_set_or_movmem, ix86_init_builtins_va_builtins_abi): Formatting fixes. --- gcc/config/i386/i386.c.jj 2018-11-22 11:35:08.294512244 +0100 +++ gcc/config/i386/i386.c 2018-11-22 11:34:09.868478428 +0100 @@ -4655,8 +4655,8 @@ ix86_option_override_internal (bool main = build_target_option_node (opts); if (opts->x_flag_cf_protection != CF_NONE) -opts->x_flag_cf_protection = - (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); +opts->x_flag_cf_protection + = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS]) maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128, @@ -5472,10 +5472,10 @@ ix86_can_inline_p (tree caller, tree cal struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); bool ret = false; - bool always_inline = - (DECL_DISREGARD_INLINE_LIMITS (callee) - && lookup_attribute ("always_inline", - DECL_ATTRIBUTES (callee))); + bool always_inline += (DECL_DISREGARD_INLINE_LIMITS (callee) + && lookup_attribute ("always_inline", + DECL_ATTRIBUTES (callee))); cgraph_node *callee_node = cgraph_node::get (callee); /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 @@ -7372,8 +7372,8 @@ static int classify_argument (machine_mode mode, const_tree type, enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) { - HOST_WIDE_INT bytes = -(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + HOST_WIDE_INT bytes += mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); /* Variable sized entities are always passed/returned in memory. */ @@ -7429,9 +7429,8 @@ classify_argument (machine_mode mode, co i < ((int_bit_position (field) + (bit_offset % 64)) + tree_to_shwi (DECL_SIZE (field)) + 63) / 8 / 8; i++) - classes[i] = - merge_classes (X86_64_INTEGER_CLASS, -classes[i]); + classes[i] + = merge_classes (X86_64_INTEGER_CLASS, classes[i]); } else { @@ -7468,8 +7467,8 @@ classify_argument (machine_mode mode, co pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; for (i = 0; i < num && (i + pos) < words; i++) - classes[i + pos] = - merge_classes (subclasses[i], classes[i + pos]); + classes[i + pos] + = merge_classes (subclasses[i], classes[i + pos]); } } } @@ -7824,8 +7823,8 @@ construct_container (machine_mode mode, static bool issued_x87_ret_error; machine_mode tmpmode; - int bytes = -(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + int bytes += mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); enum x86_64_reg_class regclass[MAX_CLASSES]; int n; int i; @@ -13366,8 +13365,8 @@ ix86_expand_prologue (void) && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE && !sse_registers_saved) { - HOST_WIDE_INT sse_size = - frame.sse_reg_save_offset - frame.reg_save_offset; + HOST_WIDE_INT sse_size + = frame.sse_reg_save_offset - frame.reg_save_offset; gcc_assert (int_registers_saved); @@ -14648,8 +14647,8 @@ ix86_expand_split_stack_prologue (void) if (split_stack_fn_large == NULL_RTX) { - split_stack_fn_large = - gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); + split_stack_fn_large + = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; } if (ix86_cmodel == CM_LARGE_PIC) @@ -22728,8 +22727,8 @@ ix86_expand_setcc (rtx dest, enum rtx_c
[PATCH] Fix tree-ssa/phi-opt-11.c on s390x (PR testsuite/85368)
Hi! This test apparently FAILs on s390x-linux, which is an effective target of both logical_op_short_circuit and branch_cost. The test has /* { dg-additional-options "-mbranch-cost=2" { target branch_cost } } */ and that option effectively makes the target ! logical_op_short_circuit, but the effective target just reflects the default setting. So, I think it should expect 0 ifs rather than 2 if -mbranch-cost=2 is used and 2 when that option isn't used. Tested on x86_64-linux (which is a branch_cost but not logical_op_short_circuit effective target), Andreas, does this fix it on s390x-linux (I've only looked at the optimized dump using a cross-compiler)? Ok for trunk? 2018-11-22 Jakub Jelinek PR testsuite/85368 * gcc.dg/tree-ssa/phi-opt-11.c: For branch_cost targets, expect 0 ifs rather than 0 or 2 depending on logical_op_short_circuit. --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-11.c.jj 2018-01-17 12:03:37.768893116 +0100 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-11.c 2018-11-22 16:33:15.54194 +0100 @@ -23,5 +23,5 @@ int h(int a, int b, int c, int d) return a; } -/* { dg-final { scan-tree-dump-times "if" 0 "optimized" { target { ! logical_op_short_circuit } } } } */ -/* { dg-final { scan-tree-dump-times "if" 2 "optimized" { target logical_op_short_circuit } } } */ +/* { dg-final { scan-tree-dump-times "if" 0 "optimized" { target { { ! logical_op_short_circuit } || branch_cost } } } } */ +/* { dg-final { scan-tree-dump-times "if" 2 "optimized" { target { logical_op_short_circuit && { ! branch_cost } } } } } */ Jakub
[committed] Add testcase for already fixed PR tree-optimization/85794
Hi! This testcase has been fixed by the PR85793 fix r260317. I've committed following test as obvious so that we can close the PR. 2018-11-22 Jakub Jelinek PR tree-optimization/85794 * gcc.dg/vect/O3-pr85794.c: New test. --- gcc/testsuite/gcc.dg/vect/O3-pr85794.c.jj 2018-11-22 17:49:10.898893803 +0100 +++ gcc/testsuite/gcc.dg/vect/O3-pr85794.c 2018-11-22 17:47:04.261981744 +0100 @@ -0,0 +1,12 @@ +/* { dg-do compile } */ + +int a, b, *c, d; +int *f[6]; + +void +foo (void) +{ + for (b = 1; b >= 0; b--) +for (d = 0; d <= 3; d++) + a |= f[b * 5] != c; +} Jakub
[PATCH] Avoid duplicate -Warray-bounds warnings (PR tree-optimization/86614)
Hi! On the following testcases, we warn twice, once in the FE array bounds warning code and once later on. The FE array bounds warning code sets TREE_NO_WARNING on the corresponding MEM_REF, so it is easy to avoid the duplicate warning later. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2018-11-22 Jakub Jelinek PR tree-optimization/86614 * gimple-ssa-warn-restrict.c (maybe_diag_offset_bounds): Return early if TREE_NO_WARNING is set on ref.ref. * c-c++-common/Warray-bounds-2.c (wrap_strncpy_dstarray_diff_neg, call_strncpy_dstarray_diff_neg): Don't expect late -Warray-bounds warnings, just early ones from FE. Remove dg-prune-output. * c-c++-common/Warray-bounds-6.c: New test. --- gcc/gimple-ssa-warn-restrict.c.jj 2018-10-19 10:59:08.279393367 +0200 +++ gcc/gimple-ssa-warn-restrict.c 2018-11-22 18:13:33.813739648 +0100 @@ -1582,6 +1582,9 @@ maybe_diag_offset_bounds (location_t loc if (!warn_array_bounds) return false; + if (ref.ref && TREE_NO_WARNING (ref.ref)) +return false; + offset_int ooboff[] = { ref.offrange[0], ref.offrange[1] }; tree oobref = ref.offset_out_of_bounds (strict, ooboff); if (!oobref) --- gcc/testsuite/c-c++-common/Warray-bounds-2.c.jj 2018-07-23 09:46:57.352997850 +0200 +++ gcc/testsuite/c-c++-common/Warray-bounds-2.c2018-11-22 18:29:49.911602501 +0100 @@ -201,18 +201,16 @@ void call_strncpy_dst_diff_max (const ch static void wrap_strncpy_dstarray_diff_neg (char *d, const char *s, ptrdiff_t i, size_t n) { - strncpy (d + i, s, n); /* { dg-warning "offset -\[0-9\]+ is out of the bounds \\\[0, 90] of object .ar10. with type .(struct )?Array ?\\\[2]." "strncpy" } */ -} + strncpy (d + i, s, n); /* { dg-bogus "offset -\[0-9\]+ is out of the bounds \\\[0, 90] of object .ar10. with type .(struct )?Array ?\\\[2]." "strncpy" } */ +} /* { dg-warning "array subscript -1 is outside array bounds" "" { target *-*-* } .-1 } */ void call_strncpy_dstarray_diff_neg (const char *s, size_t n) { - struct Array ar10[2];/* { dg-message ".ar10. declared here" } */ - sink (&ar10); + struct Array ar10[2];/* { dg-bogus ".ar10. declared here" } */ + sink (&ar10); /* { dg-message "while referencing" "" { target *-*-* } .-1 } */ int off = (char*)ar10[1].a17 - (char*)ar10 + 1; wrap_strncpy_dstarray_diff_neg (ar10[1].a17, s, -off, n); sink (&ar10); } - -/* { dg-prune-output "outside array bounds" } */ --- gcc/testsuite/c-c++-common/Warray-bounds-6.c.jj 2018-11-22 18:26:00.286397043 +0100 +++ gcc/testsuite/c-c++-common/Warray-bounds-6.c2018-11-22 18:31:55.479527492 +0100 @@ -0,0 +1,18 @@ +/* PR tree-optimization/86614 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -Warray-bounds" } */ + +extern char *strncpy (char *, const char *, __SIZE_TYPE__); + +void sink (void *); + +struct A { char b[17]; } a[2]; + +void g (const char *s, unsigned n) +{ + int i = (char *)a[1].b - (char *)a + 1; + char *d = a[1].b; + /* Ensure the same bug is not diagnosed more than once. */ + strncpy (d + i, s, n); /* { dg-warning "array subscript \[0-9]+ is outside array bounds of" } */ + /* { dg-bogus "offset \[0-9]+ is out of the bounds \\\[0, \[0-9]+\\\] of object 'a' with type" "" { target *-*-* } .-1 } */ +} Jakub
Re: Stream TREE_TYPE of TYPE_DECLs again
Hi, I have tweaked the testcases as follows. They now work with -O2 and additionally test better for locations of individual warnings. The output is as follows: ../../gcc/testsuite/g++.dg/lto/odr-3_0.C:5:3: warning: type ‘struct YYSTYPE’ violates the C++ One Definition Rule [-Wodr] 5 | } YYSTYPE; // { dg-lto-message "violates the C\\+\\+ One Definition Rule" 2 } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_1.C:1:16: note: a different type is defined in another translation unit 1 | typedef struct YYSTYPE { // { dg-lto-message "type" 2 } |^ ../../gcc/testsuite/g++.dg/lto/odr-3_0.C:4:7: note: the first difference of corresponding definitions is field ‘a’ 4 | int a; // { dg-lto-message "the first difference of corresponding definitions is field 'a'" } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_1.C:1:16: note: a type with different number of fields is defined in another translation unit 1 | typedef struct YYSTYPE { // { dg-lto-message "type" 2 } |^ ../../gcc/testsuite/g++.dg/lto/odr-3_0.C:9:7: warning: type ‘union yyalloc’ violates the C++ One Definition Rule [-Wodr] 9 | union yyalloc { // { dg-lto-warning "7: type 'union yyalloc' violates the C\\+\\+ One Definition Rule" } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_1.C:6:7: note: a different type is defined in another translation unit 6 | union yyalloc { // { dg-lto-message "type" 2 } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_0.C:11:11: note: the first difference of corresponding definitions is field ‘yyvs’ 11 | YYSTYPE yyvs; // { dg-lto-message "the first difference of corresponding definitions is field 'yyvs'" } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_1.C:11:11: note: a field of same name but different type is defined in another translation unit 11 | YYSTYPE yyvs; // { dg-lto-message "field of same name but different type is defined in another translation unit" } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_0.C:5:3: note: type ‘struct YYSTYPE’ itself violates the C++ One Definition Rule 5 | } YYSTYPE; // { dg-lto-message "violates the C\\+\\+ One Definition Rule" 2 } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_0.C:13:16: warning: ‘a’ violates the C++ One Definition Rule [-Wodr] 13 | extern yyalloc a; // { dg-lto-warning "16: 'a' violates the C\\+\\+ One Definition Rule" } |^ ../../gcc/testsuite/g++.dg/lto/odr-3_1.C:6:7: note: type ‘union yyalloc’ itself violates the C++ One Definition Rule 6 | union yyalloc { // { dg-lto-message "type" 2 } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_0.C:9:7: note: the incompatible type is defined here 9 | union yyalloc { // { dg-lto-warning "7: type 'union yyalloc' violates the C\\+\\+ One Definition Rule" } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_1.C:15:9: note: ‘a’ was previously declared here 15 | yyalloc a; // { dg-lto-message "'a' was previously declared here" } | ^ ../../gcc/testsuite/g++.dg/lto/odr-3_1.C:15:9: note: code may be misoptimized unless -fno-strict-aliasing is used which I hope is informative enough. One problem is that I not know how to pattern match warning and note or multiple notes pointing to single location, so I have added somewhat generic dg-message patterns. Help would be apprechiated. I think ODR warnings should be in reasonable shape and it would be nice to have more testsuite coverage. I find it very time consuming to write the pattern matching, hope I will improve with time :) The reason for verbosity is that they are often very cryptic to analyze when difference comes from some ifdefs or other things not obvious in the sources. Honza * g++.dg/lto/odr-2_0.C: Drop dg-lto-options. * g++.dg/lto/odr-3_0.C: Likewise; harden for optimizing compilatoin. Index: g++.dg/lto/odr-2_0.C === --- g++.dg/lto/odr-2_0.C(revision 266382) +++ g++.dg/lto/odr-2_0.C(working copy) @@ -1,5 +1,4 @@ // { dg-lto-do link } -// { dg-lto-options { -O0 -flto } } enum a {} b; // { dg-lto-warning "6: type 'a' violates the C\\+\\+ One Definition Rule" } int main(void) Index: g++.dg/lto/odr-3_0.C === --- g++.dg/lto/odr-3_0.C(revision 266382) +++ g++.dg/lto/odr-3_0.C(working copy) @@ -1,12 +1,32 @@ // { dg-lto-do link } -// { dg-lto-options { -O0 -flto } } typedef struct { int a; // { dg-lto-message "the first difference of corresponding definitions is field 'a'" } -} YYSTYPE; // { dg-lto-warning "3: warning: type ‘struct YYSTYPE’ violates the C\\+\\+ One Definition Rule" } -union yyalloc { // { dg-lto-warning "7: type ‘union yyalloc’ violates the C\\+\\+ One Definition Rule" } +} YYSTYPE; // { dg-lto-message "violates the C\\+\\+ One Definition Rule" 2 } + // Here we get warning and a n
Re: [PATCH v2 2/3] Add a pass to automatically add ptwrite instrumentation
On Thu, Nov 22, 2018 at 02:53:11PM +0100, Richard Biener wrote: > > the optimizer moving it around over function calls etc.? > > The instrumentation should still be useful when the program > > crashes, so we don't want to delay logging too much. > > You can't avoid moving it, yes. But it can be even moved now, effectively > detaching it from the "interesting" $pc ranges we have debug location lists > for? > > > > Maybe even instead pass it a number of bytes so it models how atomics > > > work. > > > > How could that reject float? > > Why does it need to reject floats? Note you are already rejecting floats > in the instrumentation pass. The backend doesn't know how to generate the code for ptwrite %xmm0. So it would either need to learn about all these conversions, or reject them in the hook so that the middle end eventually can generate them. Or maybe hard code the knowledge in the middle end? > > Mode seems better for now. > > > > Eventually might support float/double through memory, but not in the > > first version. > > Why does movq %xmm0, %rax; ptwrite %rax not work? It works of course, the question is just who is in charge of figuring out that the movq needs to be generated (and that it's not a round, but a bit cast) > Fair enough, the instrumentation would need to pad out smaller values > and/or split larger values. I think 1 and 2 byte values would be interesting > so you can support char and shorts. Eventually 16byte values for __int128 > or vectors. Right. And for copies/clears too. But I was hoping to just get the basics solid and then do these more advanced features later. > > > btw, I wonder whether the vartrace attribute should have an argument like > > > vartrace(locals,reads,...)? > > > > I was hoping this could be delayed until actually needed. It'll need > > some changes because I don't want to do the full parsing for every decl > > all the time, so would need to store a bitmap of options somewhere > > in tree. > > Hmm, indeed. I wonder if you need the function attribute at all then? Maybe I really would like an opt-in per function. I think that's an important use case. Just instrument a few functions that contribute to the bug you're debugging to cut down the bandwidth. The idea was that __attribute__((vartrace)) for a function would log everything in that function, including locals. I could see a use case to opt-in into a function without locals (mainly because local tracking is more likely to cause trace overflows if there is a lot of execution). But I think I can do without that in v1 might add it later. > That is, on types and decls you'd interpret it as if locals tracing is on > then locals of type are traced but otherwise locals of type are not traced? When the opt-in is on the type or the variable then the variable should be tracked, even if it is an local (or maybe even a cast) The locals check is mainly for the command line. > That is, if I just want to trace variable i and do > > int i __attribute__((vartrace)); > > then what options do I enable to make that work and how can I avoid > tracing anything else? Similar for Just enable -mptwrite (or nothing if it's implied with -mcpu=...) > > typedef int traced_int __attribute_((vartrace)); Same. > > ? I guess we'd need -fvar-trace=locals to get the described effect for > the type attribute and then -fvar-trace=locals,all to have _all_ locals > traced? Or -fvar-trace=locals,only-marked? ... or forgo with the idea > of marking types? You want a command line override to not trace if the attribute is set? -mno-ptwrite would work. Could also add something separate in -fvartrace (perhaps implied in =off) but not sure if it's worth it. I suppose it could make sense if someone wants to use the _ptwrite builtin separately still. I'll add it to =off. > so there's no 'i' anymore. If you enable debug-info you Hmm I guess that's ok for now. I suppose it'll work with -Og. > and instrumenting late. (but ptwrite is only > interesting for optimized code, no?) It's very interesting for non optimized code too. In fact that would be a common use case during debugging. -Andi
[RFH] testcases for lto-stream-out dumps
Hi, the following testcase is supposed to check that "stream_me_once" is streamed just once from WPA to ltrans and thus I want to scan dump .000i.0.lto-stream-out I think however that dejagnu is confused by the extra .0 in filename (which distinguishes the partition number and is intended) and I get FAIL: gcc.dg/lto/type-merge-1 c_lto_type-merge-1_0.o-c_lto_type-merge-1_1.o link, -O0 -flto -flto-partition=none -fuse-linker-plugin ERROR: /aux/hubicka/trunk4/gcc/testsuite/gcc.dg/lto/type-merge-1_0.c: error executing dg-final: bad level "6" Any idea what to do here? I found discussion at https://patchwork.ozlabs.org/patch/330161/ but I am still unsure how to fix this. Honza Index: gcc.dg/lto/type-merge-1_0.c === --- gcc.dg/lto/type-merge-1_0.c (nonexistent) +++ gcc.dg/lto/type-merge-1_0.c (working copy) @@ -0,0 +1,9 @@ +/* { dg-lto-do link } */ +/* { dg-extra-ld-options {"-fdump-ipa-lto-stream-out -fdump-noaddr"} } */ +enum a {aa,bb,cc}; +struct stream_me_once {enum a *ptr[5];} b; +void set(void) +{ + b.ptr[0] = 0; +} +/* { dg-final { scan-wpa-ipa-dump-times "record_type . stream_me_once" 1 "0.lto-stream-out" } } */ Index: gcc.dg/lto/type-merge-1_1.c === --- gcc.dg/lto/type-merge-1_1.c (nonexistent) +++ gcc.dg/lto/type-merge-1_1.c (working copy) @@ -0,0 +1,10 @@ +#include +enum a ; +struct stream_me_once {enum a *ptr[5];} b; +void set (void); +int +main(void) +{ + set(); + return (size_t)b.ptr[0]; +}
[PATCH, i386]: Simplify ix86_check_avx_upper_register
2018-11-22 Uros Bizjak * config/i386/i386.c (ix86_check_avx_upper_register): Return true for all SSE registers with mode bitsize > 128. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros. Index: config/i386/i386.c === --- config/i386/i386.c (revision 266382) +++ config/i386/i386.c (working copy) @@ -18856,12 +18856,7 @@ static bool ix86_check_avx_upper_register (const_rtx exp) { - if (SUBREG_P (exp)) -exp = SUBREG_REG (exp); - - return (REG_P (exp) - && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)) - || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp; + return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128; } /* Return needed mode for entity in optimize_mode_switching pass. */
Re: [PATCH] Small formatting cleanup in i386.c
On Thu, Nov 22, 2018 at 8:51 PM Jakub Jelinek wrote: > > Hi! > > And while working on the previously posted patch, I've noticed a ton of > lines with = at the end of line rather than at the start of next one > (I think = at the end of line is fine only for array initializers). > > Bootstrapped/regtested on x86_64-linux, ok for trunk? > > 2018-11-22 Jakub Jelinek > > * config/i386/i386.c (ix86_option_override_internal, > ix86_can_inline_p, classify_argument, construct_container, > ix86_expand_prologue, ix86_expand_split_stack_prologue, > ix86_expand_carry_flag_compare, expand_set_or_movmem_via_loop, > expand_setmem_epilogue_via_loop, promote_duplicated_reg, > ix86_expand_set_or_movmem, ix86_init_builtins_va_builtins_abi): > Formatting fixes. LGTM. Thanks, Uros. > --- gcc/config/i386/i386.c.jj 2018-11-22 11:35:08.294512244 +0100 > +++ gcc/config/i386/i386.c 2018-11-22 11:34:09.868478428 +0100 > @@ -4655,8 +4655,8 @@ ix86_option_override_internal (bool main >= build_target_option_node (opts); > >if (opts->x_flag_cf_protection != CF_NONE) > -opts->x_flag_cf_protection = > - (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); > +opts->x_flag_cf_protection > + = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); > >if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS]) > maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128, > @@ -5472,10 +5472,10 @@ ix86_can_inline_p (tree caller, tree cal >struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); >struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); >bool ret = false; > - bool always_inline = > - (DECL_DISREGARD_INLINE_LIMITS (callee) > - && lookup_attribute ("always_inline", > - DECL_ATTRIBUTES (callee))); > + bool always_inline > += (DECL_DISREGARD_INLINE_LIMITS (callee) > + && lookup_attribute ("always_inline", > + DECL_ATTRIBUTES (callee))); > >cgraph_node *callee_node = cgraph_node::get (callee); >/* Callee's isa options should be a subset of the caller's, i.e. a SSE4 > @@ -7372,8 +7372,8 @@ static int > classify_argument (machine_mode mode, const_tree type, >enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) > { > - HOST_WIDE_INT bytes = > -(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE > (mode); > + HOST_WIDE_INT bytes > += mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE > (mode); >int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); > >/* Variable sized entities are always passed/returned in memory. */ > @@ -7429,9 +7429,8 @@ classify_argument (machine_mode mode, co >i < ((int_bit_position (field) + (bit_offset % 64)) > + tree_to_shwi (DECL_SIZE (field)) > + 63) / 8 / 8; i++) > - classes[i] = > - merge_classes (X86_64_INTEGER_CLASS, > -classes[i]); > + classes[i] > + = merge_classes (X86_64_INTEGER_CLASS, classes[i]); > } > else > { > @@ -7468,8 +7467,8 @@ classify_argument (machine_mode mode, co > pos = (int_bit_position (field) > + (bit_offset % 64)) / 8 / 8; > for (i = 0; i < num && (i + pos) < words; i++) > - classes[i + pos] = > - merge_classes (subclasses[i], classes[i + pos]); > + classes[i + pos] > + = merge_classes (subclasses[i], classes[i + pos]); > } > } > } > @@ -7824,8 +7823,8 @@ construct_container (machine_mode mode, >static bool issued_x87_ret_error; > >machine_mode tmpmode; > - int bytes = > -(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE > (mode); > + int bytes > += mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE > (mode); >enum x86_64_reg_class regclass[MAX_CLASSES]; >int n; >int i; > @@ -13366,8 +13365,8 @@ ix86_expand_prologue (void) >&& frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE >&& !sse_registers_saved) > { > - HOST_WIDE_INT sse_size = > - frame.sse_reg_save_offset - frame.reg_save_offset; > + HOST_WIDE_INT sse_size > + = frame.sse_reg_save_offset - frame.reg_save_offset; > >gcc_assert (int_registers_saved); > > @@ -14648,8 +14647,8 @@ ix86_expand_split_stack_prologue (void) > > if (split_stack_fn_large == NULL_RTX) > { > - split_stack_fn_large = > - gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); > + spli
Re: [PATCH] Clean up stack-protector-guard handling in ix86_option_override_internal
On Thu, Nov 22, 2018 at 8:47 PM Jakub Jelinek wrote: > > Hi! > > While adjusting the PR85644 patch, I've noticed that while pretty much > the whole ix86_option_override_internal works only with opts_set->x_ > and opts->x_, the stack protector guard code was accessing > global_options_set.x_ or using the macros that expand to > global_options.x_ . While it is probably not breaking anything because > the target attribute doesn't (at least so far) allow any of these options, > it looks inconsistent with the rest. > > So, the following patch adjusts it to do what the rest of > ix86_option_override_internal is doing. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2018-11-22 Jakub Jelinek > > * config/i386/i386.c (ix86_option_override_internal): For > stack_protector_guard related options, use opts_set->x_ instead > of global_options_set. and prefix options with opts->x_ . Move > defaults for offset and reg into else block. OK. Thanks, Uros. > --- gcc/config/i386/i386.c.jj 2018-11-22 10:44:32.767690142 +0100 > +++ gcc/config/i386/i386.c 2018-11-22 11:35:08.294512244 +0100 > @@ -4568,14 +4568,10 @@ ix86_option_override_internal (bool main > opts->x_ix86_stack_protector_guard = SSP_GLOBAL; > } > > -#ifdef TARGET_THREAD_SSP_OFFSET > - ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; > -#endif > - > - if (global_options_set.x_ix86_stack_protector_guard_offset_str) > + if (opts_set->x_ix86_stack_protector_guard_offset_str) > { >char *endp; > - const char *str = ix86_stack_protector_guard_offset_str; > + const char *str = opts->x_ix86_stack_protector_guard_offset_str; > >errno = 0; >int64_t offset; > @@ -4595,20 +4591,16 @@ ix86_option_override_internal (bool main > error ("%qs is not a valid offset " >"in -mstack-protector-guard-offset=", str); > > - ix86_stack_protector_guard_offset = offset; > + opts->x_ix86_stack_protector_guard_offset = offset; > } > +#ifdef TARGET_THREAD_SSP_OFFSET > + else > +opts->x_ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; > +#endif > > - ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; > - > - /* The kernel uses a different segment register for performance > - reasons; a system call would not have to trash the userspace > - segment register, which would be expensive. */ > - if (ix86_cmodel == CM_KERNEL) > -ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; > - > - if (global_options_set.x_ix86_stack_protector_guard_reg_str) > + if (opts_set->x_ix86_stack_protector_guard_reg_str) > { > - const char *str = ix86_stack_protector_guard_reg_str; > + const char *str = opts->x_ix86_stack_protector_guard_reg_str; >addr_space_t seg = ADDR_SPACE_GENERIC; > >/* Discard optional register prefix. */ > @@ -4626,9 +4618,19 @@ ix86_option_override_internal (bool main >if (seg == ADDR_SPACE_GENERIC) > error ("%qs is not a valid base register " >"in -mstack-protector-guard-reg=", > - ix86_stack_protector_guard_reg_str); > + opts->x_ix86_stack_protector_guard_reg_str); > + > + opts->x_ix86_stack_protector_guard_reg = seg; > +} > + else > +{ > + opts->x_ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; > > - ix86_stack_protector_guard_reg = seg; > + /* The kernel uses a different segment register for performance > +reasons; a system call would not have to trash the userspace > +segment register, which would be expensive. */ > + if (opts->x_ix86_cmodel == CM_KERNEL) > + opts->x_ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; > } > >/* Handle -mmemcpy-strategy= and -mmemset-strategy= */ > > Jakub
Remove sorting while register odr types
Hi, late in stage4 of GCC 8 Martin introduced code that sorts odr types so they get registered independently on the hashing order since we was getting different sets of warnings depending on the target in some of testcases. This should be solved now by recursing to subtypes while register ODR types and also by breaking up the SCCs in majority of cases. Note that one SCC still can contain more than one type when they are entangled via TYPE_CONTEXT, so the order of warings can be target specific, but they should be the same. This loop shows up as mesurable factor of WPA due to slow location lokup for large translation units. This patch removes it. Bootstrapped/regtested x86_64-linux, will commit it shortly. Honza * lto.c (cmp_type_location): Remove. (lto_read_decls): Do not allocate odr_types. Index: lto.c === --- lto.c (revision 266382) +++ lto.c (working copy) @@ -1712,36 +1712,6 @@ unify_scc (struct data_in *data_in, unsi } -/* Compare types based on source file location. */ - -static int -cmp_type_location (const void *p1_, const void *p2_) -{ - tree *p1 = (tree*)(const_cast(p1_)); - tree *p2 = (tree*)(const_cast(p2_)); - if (*p1 == *p2) -return 0; - - tree tname1 = TYPE_NAME (*p1); - tree tname2 = TYPE_NAME (*p2); - expanded_location xloc1 = expand_location (DECL_SOURCE_LOCATION (tname1)); - expanded_location xloc2 = expand_location (DECL_SOURCE_LOCATION (tname2)); - - const char *f1 = lbasename (xloc1.file); - const char *f2 = lbasename (xloc2.file); - int r = strcmp (f1, f2); - if (r == 0) -{ - int l1 = xloc1.line; - int l2 = xloc2.line; - if (l1 != l2) - return l1 - l2; - return xloc1.column - xloc2.column; -} - else -return r; -} - /* Read all the symbols from buffer DATA, using descriptors in DECL_DATA. RESOLUTIONS is the set of symbols picked by the linker (read from the resolution file when the linker plugin is being used). */ @@ -1758,7 +1728,6 @@ lto_read_decls (struct lto_file_decl_dat unsigned int i; const uint32_t *data_ptr, *data_end; uint32_t num_decl_states; - auto_vec odr_types; lto_input_block ib_main ((const char *) data + main_offset, header->main_size, decl_data->mode_table); @@ -1828,7 +1797,7 @@ lto_read_decls (struct lto_file_decl_dat if (!TYPE_CANONICAL (t)) gimple_register_canonical_type (t); if (TYPE_MAIN_VARIANT (t) == t && odr_type_p (t)) - odr_types.safe_push (t); + register_odr_type (t); } /* Link shared INTEGER_CSTs into TYPE_CACHED_VALUEs of its type which is also member of this SCC. */ @@ -1890,15 +1859,6 @@ lto_read_decls (struct lto_file_decl_dat *slot = state; } - /* Sort types for the file before registering in ODR machinery. */ - if (lto_location_cache::current_cache) -lto_location_cache::current_cache->apply_location_cache (); - odr_types.qsort (cmp_type_location); - - /* Register ODR types. */ - for (unsigned i = 0; i < odr_types.length (); i++) -register_odr_type (odr_types[i]); - if (data_ptr != data_end) internal_error ("bytecode stream: garbage at the end of symbols section");
Fix another ICE in ipa-devirt
Hi, this patch fixes ICE in ipa-devirt that was caused by simple typo, but I also noticed that the comparsion machinery is way too active on type varaints outputting main variants as mismatching when only variants differs by quallifiers. Fixed thus. Bootstrapped/regtested x86_64-linux, comitted. PR lto/88142 * ipa-devirt.c (type_variants_equivalent_p): Drop warn and warned parameters; do not warn here. (odr_subtypes_equivalent_p): Likewise. (warn_odr): Fix typo. (warn_types_mismatch): Do not output confused warnings on integer types. (odr_types_equivalent_p): Update. * g++.dg/lto/odr-5_0.C: New testcase. * g++.dg/lto/odr-5_1.C: New testcase. Index: ipa-devirt.c === --- ipa-devirt.c(revision 266382) +++ ipa-devirt.c(working copy) @@ -636,32 +636,17 @@ set_type_binfo (tree type, tree binfo) same type. */ static bool -type_variants_equivalent_p (tree t1, tree t2, bool warn, bool *warned) +type_variants_equivalent_p (tree t1, tree t2) { if (TYPE_QUALS (t1) != TYPE_QUALS (t2)) -{ - warn_odr (t1, t2, NULL, NULL, warn, warned, - G_("a type with different qualifiers is defined in another " - "translation unit")); - return false; -} +return false; if (comp_type_attributes (t1, t2) != 1) -{ - warn_odr (t1, t2, NULL, NULL, warn, warned, - G_("a type with different attributes " - "is defined in another translation unit")); - return false; -} +return false; if (COMPLETE_TYPE_P (t1) && COMPLETE_TYPE_P (t2) && TYPE_ALIGN (t1) != TYPE_ALIGN (t2)) -{ - warn_odr (t1, t2, NULL, NULL, warn, warned, - G_("a type with different alignment " - "is defined in another translation unit")); - return false; -} +return false; return true; } @@ -669,7 +654,7 @@ type_variants_equivalent_p (tree t1, tre /* Compare T1 and T2 based on name or structure. */ static bool -odr_subtypes_equivalent_p (tree t1, tree t2, bool warn, bool *warned, +odr_subtypes_equivalent_p (tree t1, tree t2, hash_set *visited, location_t loc1, location_t loc2) { @@ -698,7 +683,7 @@ odr_subtypes_equivalent_p (tree t1, tree return false; if (!types_same_for_odr (t1, t2)) return false; - if (!type_variants_equivalent_p (t1, t2, warn, warned)) + if (!type_variants_equivalent_p (t1, t2)) return false; /* Limit recursion: If subtypes are ODR types and we know that they are same, be happy. */ @@ -725,7 +710,7 @@ odr_subtypes_equivalent_p (tree t1, tree if (!odr_types_equivalent_p (TYPE_MAIN_VARIANT (t1), TYPE_MAIN_VARIANT (t2), false, NULL, visited, loc1, loc2)) return false; - if (!type_variants_equivalent_p (t1, t2, warn, warned)) + if (!type_variants_equivalent_p (t1, t2)) return false; return true; } @@ -1017,7 +1002,7 @@ warn_odr (tree t1, tree t2, tree st1, tr auto_diagnostic_group d; if (t1 != TYPE_MAIN_VARIANT (t1) - && TYPE_NAME (t1) != DECL_NAME (TYPE_MAIN_VARIANT (t1))) + && TYPE_NAME (t1) != TYPE_NAME (TYPE_MAIN_VARIANT (t1))) { if (!warning_at (DECL_SOURCE_LOCATION (TYPE_NAME (TYPE_MAIN_VARIANT (t1))), OPT_Wodr, "type %qT (typedef of %qT) violates the " @@ -1279,6 +1264,11 @@ warn_types_mismatch (tree t1, tree t2, l } if (types_odr_comparable (t1, t2) + /* We make assign integers mangled names to be able to handle +signed/unsigned chars. Accepting them here would however lead to +confussing message like +"type ‘const int’ itself violates the C++ One Definition Rule" */ + && TREE_CODE (t1) != INTEGER_TYPE && types_same_for_odr (t1, t2)) inform (loc_t1, "type %qT itself violates the C++ One Definition Rule", t1); @@ -1410,7 +1400,7 @@ odr_types_equivalent_p (tree t1, tree t2 } if (!odr_subtypes_equivalent_p (TREE_TYPE (t1), TREE_TYPE (t2), - warn, warned, visited, loc1, loc2)) + visited, loc1, loc2)) { warn_odr (t1, t2, NULL, NULL, warn, warned, G_("it is defined as a pointer to different type " @@ -1424,7 +1414,6 @@ odr_types_equivalent_p (tree t1, tree t2 if ((TREE_CODE (t1) == VECTOR_TYPE || TREE_CODE (t1) == COMPLEX_TYPE) && !odr_subtypes_equivalent_p (TREE_TYPE (t1), TREE_TYPE (t2), -warn, warned, visited, loc1, loc2)) { /* Probably specific enough. */ @@ -1444,7 +1433,7 @@ odr_types_equivalent_p (tree t1, tree t2 /* Array types are the same if the ele
[PATCH] [PR85569] skip constexpr target_expr constructor dummy type conversion
The testcase is the work-around testcase for the PR; even that had started failing. The problem was that, when unqualifying the type of a TARGET_EXPR, we'd create a variant of the type, then request the conversion of the TARGET_EXPR_INITIAL to that variant type. Though the types are different pointer-wise, they're the same_type_p, so the resulting modified expr compares cp_tree_equal to the original, which maybe_constant_value flags as an error. There's no reason to construct an alternate TARGET_EXPR or CONSTRUCTOR just because of an equivalent type, except for another spot that expected pointer equality that would no longer be satisfied. Without relaxing the assert in constexpr_call_hasher::equal, g++.robertl/eb73.C would trigger an assertion failure. Regstrapped on i686- and x86_64-linux-gnu. Ok to install? for gcc/cp/ChangeLog PR c++/85569 * constexpr.c (adjust_temp_type): Test for type equality with same_type_p. for gcc/testsuite PR c++/85569 * g++.dg/cpp1z/pr85569.C: New. --- gcc/cp/constexpr.c |4 + gcc/testsuite/g++.dg/cpp1z/pr85569.C | 93 ++ 2 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/pr85569.C diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index 92fd2b2d9d59..bb5d1301b332 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -1060,7 +1060,7 @@ constexpr_call_hasher::equal (constexpr_call *lhs, constexpr_call *rhs) { tree lhs_arg = TREE_VALUE (lhs_bindings); tree rhs_arg = TREE_VALUE (rhs_bindings); - gcc_assert (TREE_TYPE (lhs_arg) == TREE_TYPE (rhs_arg)); + gcc_assert (same_type_p (TREE_TYPE (lhs_arg), TREE_TYPE (rhs_arg))); if (!cp_tree_equal (lhs_arg, rhs_arg)) return false; lhs_bindings = TREE_CHAIN (lhs_bindings); @@ -1276,7 +1276,7 @@ cxx_eval_builtin_function_call (const constexpr_ctx *ctx, tree t, tree fun, static tree adjust_temp_type (tree type, tree temp) { - if (TREE_TYPE (temp) == type) + if (TREE_TYPE (temp) == type || same_type_p (TREE_TYPE (temp), type)) return temp; /* Avoid wrapping an aggregate value in a NOP_EXPR. */ if (TREE_CODE (temp) == CONSTRUCTOR) diff --git a/gcc/testsuite/g++.dg/cpp1z/pr85569.C b/gcc/testsuite/g++.dg/cpp1z/pr85569.C new file mode 100644 index ..aec543041a0f --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/pr85569.C @@ -0,0 +1,93 @@ +// { dg-do compile { target c++17 } } + +#include +#include + +#define LIFT_FWD(x) std::forward(x) + +template +inline +constexpr +auto +equal( + T &&t) +{ + return [t = std::forward(t)](const auto& obj) +-> decltype(obj == t) +{ + return obj == t; +}; +} + +template +struct is_tuple_invocable; + +template +struct is_tuple_invocable> +{ + using type = typename std::is_invocable::type; +}; + +template +inline +constexpr +auto +compose( + F&& f +) + noexcept +-> F +{ + return std::forward(f); +} + +namespace detail { + template + inline + constexpr + auto + compose( +std::true_type, +F&& f, +Tail&& tail, +T&& ... objs) + noexcept(noexcept(f(tail(std::forward(objs)... + -> decltype(f(tail(std::forward(objs)...))) + { +return f(tail(std::forward(objs)...)); + } +} +template +inline +constexpr +auto +compose( + F&& f, + Fs&&... fs) +{ + return [f = std::forward(f), tail = compose(std::forward(fs)...)] +(auto&& ... objs) +-> decltype(detail::compose(typename std::is_invocable(fs)...)), decltype(objs)...>::type{}, +f, +compose(std::forward(fs)...), +LIFT_FWD(objs)...)) + { +using tail_type = decltype(compose(std::forward(fs)...)); + +#ifndef NOT_VIA_TUPLE +using args_type = std::tuple; +constexpr auto unitail = typename is_tuple_invocable::type{}; +#else +constexpr auto unitail = typename std::is_invocable::type{}; +#endif + +return detail::compose(unitail, f, tail, LIFT_FWD(objs)...); + }; +} + +template +constexpr auto eq = equal(N); + +static_assert(compose(eq<3>, + std::plus<>{})(1,2), + "compose is constexpr"); -- Alexandre Oliva, freedom fighter https://FSFLA.org/blogs/lxo Be the change, be Free! FSF Latin America board member GNU Toolchain EngineerFree Software Evangelist Hay que enGNUrecerse, pero sin perder la terGNUra jamás-GNUChe
[PATCH] [PR86397] set p_t_decl while canonicalizing eh specs for mangling
Mangling visits the base template function type, prior to template resolution, and on such types, exception specifications may contain unresolved noexcept expressions. nothrow_spec_p is called on them even whe exception specifications are not part of function types, and it rejects unresolved noexcept expressions if processing_template_decl is not set. Setting processing_template_decl while mangling what is indeed the (generic) type of a function template would solve this problem, but it would cause others: no_linkage_check, for example, returns early if processing_template_decl, and this regresses g++.dg/pr79091.C and g++.dg/abi/no-linkage-expr1.C. So, instead of setting processing_template_decl throughout, I'm introducing another flag, processing_template_function_signature, and using it to set processing_template_decl only while canonicalizing the exception spec, which is where nothrow_spec_p gets called. Regstrapped on i686- and x86_64-linux-gnu. Ok to install? for gcc/cp/ChangeLog PR c++/86397 * mangle.c (processing_template_function_signature): New var. (write_encoding): Set it while mangling generic template type. (canonicalize_for_substitution): Set processing_template_decl while handling exception specs if processing_template_function_signature. for gcc/testsuite/ChangeLog PR c++/86397 * g++.dg/cpp0x/pr86397-1.C: New. * g++.dg/cpp0x/pr86397-2.C: New. --- gcc/cp/mangle.c| 19 +++ gcc/testsuite/g++.dg/cpp0x/pr86397-1.C |4 gcc/testsuite/g++.dg/cpp0x/pr86397-2.C |4 3 files changed, 27 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr86397-1.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr86397-2.C diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c index 64415894bc57..a499ef97b3c6 100644 --- a/gcc/cp/mangle.c +++ b/gcc/cp/mangle.c @@ -170,6 +170,17 @@ integer_type_codes[itk_none] = '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0' }; +/* Exception specifications may carry unresolved noexcept expressions + before template substitution, but nothrow_spec_p will fail an + assert check if it comes across such a spec while + !processing_template_decl. So, when we start processing the + signature of a template function, that may contain a such + unresolved expressions, we set this variable. Then, as we process + the exception spec, we set processing_template_decl. We don't want + to set processing_template_decl throughout, becuase this affects + other relevant tests, such as no_linkage_check. */ +static int processing_template_function_signature; + static int decl_is_template_id (const tree, tree* const); /* Functions for handling substitutions. */ @@ -418,7 +429,11 @@ canonicalize_for_substitution (tree node) || TREE_CODE (node) == METHOD_TYPE) { node = build_ref_qualified_type (node, type_memfn_rqual (orig)); + if (processing_template_function_signature) + processing_template_decl++; tree r = canonical_eh_spec (TYPE_RAISES_EXCEPTIONS (orig)); + if (processing_template_function_signature) + processing_template_decl--; if (flag_noexcept_type) node = build_exception_variant (node, r); else @@ -836,6 +851,7 @@ write_encoding (const tree decl) write_bare_function_type -- otherwise, it will get confused about which artificial parameters to skip. */ d = NULL_TREE; + ++processing_template_function_signature; } else { @@ -846,6 +862,9 @@ write_encoding (const tree decl) write_bare_function_type (fn_type, mangle_return_type_p (decl), d); + + if (tmpl) + --processing_template_function_signature; } } diff --git a/gcc/testsuite/g++.dg/cpp0x/pr86397-1.C b/gcc/testsuite/g++.dg/cpp0x/pr86397-1.C new file mode 100644 index ..4f9f5fa7e4c8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/pr86397-1.C @@ -0,0 +1,4 @@ +// { dg-do compile { target c++11 } } +void e(); +template void f(int() noexcept(e)) {} +template void f(int()); // { dg-error "does not match" "" { target c++17 } } diff --git a/gcc/testsuite/g++.dg/cpp0x/pr86397-2.C b/gcc/testsuite/g++.dg/cpp0x/pr86397-2.C new file mode 100644 index ..fb43499526e8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/pr86397-2.C @@ -0,0 +1,4 @@ +// { dg-do compile { target c++11 } } +void e(); +template void f(int() noexcept(e)) {} +template void f(int() noexcept); -- Alexandre Oliva, freedom fighter https://FSFLA.org/blogs/lxo Be the change, be Free! FSF Latin America board member GNU Toolchain EngineerFree Software Evangelist Hay que enGNUrecerse, pero sin perder la terGNUra jamás-GNUChe
Re: [PATCH] Add -fpad-source option
On 11/22/18 11:43 AM, Jakub Jelinek wrote: On Thu, Nov 22, 2018 at 11:00:13AM +0100, Jakub Jelinek wrote: Ok for trunk if it passes bootstrap/regtest (so far just checked with make check-gfortran RUNTESTFLAGS='dg.exp=pad_source*' )? Note, it passed bootstrap/regtest on x86_64-linux. OK, and thanks for the work. Jerry
Re: RFA: patch for test PR70669 test
Hi! On Thu, Nov 22, 2018 at 02:47:10PM -0500, Vladimir Makarov wrote: > Today I committed a patch > > https://gcc.gnu.org/ml/gcc-patches/2018-11/msg01945.html > > But it makes gcc.target/powerpc/pr70669.c to fail. Here is the patch > to fix the failure. The expected code assumes that variable r should > get a general reg. I suspect the expectation is wrong. There are three > possible choices for r: a general reg, a vsx reg, and memory. > > The cost of move of vsx to/from mem is 4, the cost of move of general > reg of this mode to/from mem is 8, and the cost of move of general reg > to/from vsx is 12. > > So approximately if r is assigned memory we have cost 8 * 2 (asm) + 4 > * 2 (plus) = 24 (when we use memory at *q), if r is assigned to vsx the > cost is 12*2(asm) + 4(r=*q) = 28, and if r is assigned a general reg the > cost is 12 * 2 (plus) + 8 (r = *q) = 32. > > ira-costs should choose mem although it chose a general reg before my > patch (it is a big topic why it chose a general reg. In brief, it is > because some inherent drawbacks of the current cost calculation > algorithm). The patch I recently submitted solves some drawbacks and > memory is chosen for r. That sounds all correct. Thanks for fixing this! > To generate the expected code with r in a general reg, I just decrease > the number of getting r value into vsx reg by changing r + r onto -r. > > Is the following patch ok for trunk? Yes, the test now again tests what it is meant to test (that you get m[tf]vsrd from reloads). It's fine for trunk, thanks! Segher 2018-11-22 Vladimir Makarov * gcc.target/powerpc/pr70669.c: Use unary minus instead of addition.
[doc, committed] Clarify docs for designated initializers of unions
I've checked in this patch for PR 53608. It's derived from the text suggested in the issue. -Sandra 2018-11-22 Sandra Loosemore Alan Coopersmith PR c/53608 gcc/ * doc/extend.texi (Designated Inits): Clarify handling of multiple initializers for unions. Index: gcc/doc/extend.texi === --- gcc/doc/extend.texi (revision 266400) +++ gcc/doc/extend.texi (working copy) @@ -2104,7 +2104,7 @@ Another syntax that has the same meaning struct point p = @{ y: yvalue, x: xvalue @}; @end smallexample -Omitted field members are implicitly initialized the same as objects +Omitted fields are implicitly initialized the same as for objects that have static storage duration. @cindex designators @@ -2162,11 +2162,13 @@ example, with the @samp{struct point} de struct point ptarray[10] = @{ [2].y = yv2, [2].x = xv2, [0].x = xv0 @}; @end smallexample -@noindent -If the same field is initialized multiple times, it has the value from -the last initialization. If any such overridden initialization has -side effect, it is unspecified whether the side effect happens or not. -Currently, GCC discards them and issues a warning. +If the same field is initialized multiple times, or overlapping +fields of a union are initialized, the value from the last +initialization is used. When a field of a union is itself a structure, +the entire structure from the last field initialized is used. If any previous +initializer has side effect, it is unspecified whether the side effect +happens or not. Currently, GCC discards the side-effecting +initializer expressions and issues a warning. @node Case Ranges @section Case Ranges
Re: [PATCH] Avoid duplicate -Warray-bounds warnings (PR tree-optimization/86614)
On Thu, 22 Nov 2018, Jakub Jelinek wrote: > Hi! > > On the following testcases, we warn twice, once in the FE array bounds > warning code and once later on. > The FE array bounds warning code sets TREE_NO_WARNING on the corresponding > MEM_REF, so it is easy to avoid the duplicate warning later. > > Bootstrapped/regtested on x86_64-linux, ok for trunk? OK. Richard. > 2018-11-22 Jakub Jelinek > > PR tree-optimization/86614 > * gimple-ssa-warn-restrict.c (maybe_diag_offset_bounds): Return early > if TREE_NO_WARNING is set on ref.ref. > > * c-c++-common/Warray-bounds-2.c (wrap_strncpy_dstarray_diff_neg, > call_strncpy_dstarray_diff_neg): Don't expect late -Warray-bounds > warnings, just early ones from FE. Remove dg-prune-output. > * c-c++-common/Warray-bounds-6.c: New test. > > --- gcc/gimple-ssa-warn-restrict.c.jj 2018-10-19 10:59:08.279393367 +0200 > +++ gcc/gimple-ssa-warn-restrict.c2018-11-22 18:13:33.813739648 +0100 > @@ -1582,6 +1582,9 @@ maybe_diag_offset_bounds (location_t loc >if (!warn_array_bounds) > return false; > > + if (ref.ref && TREE_NO_WARNING (ref.ref)) > +return false; > + >offset_int ooboff[] = { ref.offrange[0], ref.offrange[1] }; >tree oobref = ref.offset_out_of_bounds (strict, ooboff); >if (!oobref) > --- gcc/testsuite/c-c++-common/Warray-bounds-2.c.jj 2018-07-23 > 09:46:57.352997850 +0200 > +++ gcc/testsuite/c-c++-common/Warray-bounds-2.c 2018-11-22 > 18:29:49.911602501 +0100 > @@ -201,18 +201,16 @@ void call_strncpy_dst_diff_max (const ch > static void > wrap_strncpy_dstarray_diff_neg (char *d, const char *s, ptrdiff_t i, size_t > n) > { > - strncpy (d + i, s, n); /* { dg-warning "offset -\[0-9\]+ is out of the > bounds \\\[0, 90] of object .ar10. with type .(struct )?Array ?\\\[2]." > "strncpy" } */ > -} > + strncpy (d + i, s, n); /* { dg-bogus "offset -\[0-9\]+ is out of the > bounds \\\[0, 90] of object .ar10. with type .(struct )?Array ?\\\[2]." > "strncpy" } */ > +} /* { dg-warning "array subscript -1 is outside array > bounds" "" { target *-*-* } .-1 } */ > > void call_strncpy_dstarray_diff_neg (const char *s, size_t n) > { > - struct Array ar10[2];/* { dg-message ".ar10. declared here" } */ > - sink (&ar10); > + struct Array ar10[2];/* { dg-bogus ".ar10. declared here" } */ > + sink (&ar10); /* { dg-message "while referencing" "" { > target *-*-* } .-1 } */ > >int off = (char*)ar10[1].a17 - (char*)ar10 + 1; >wrap_strncpy_dstarray_diff_neg (ar10[1].a17, s, -off, n); > >sink (&ar10); > } > - > -/* { dg-prune-output "outside array bounds" } */ > --- gcc/testsuite/c-c++-common/Warray-bounds-6.c.jj 2018-11-22 > 18:26:00.286397043 +0100 > +++ gcc/testsuite/c-c++-common/Warray-bounds-6.c 2018-11-22 > 18:31:55.479527492 +0100 > @@ -0,0 +1,18 @@ > +/* PR tree-optimization/86614 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -Warray-bounds" } */ > + > +extern char *strncpy (char *, const char *, __SIZE_TYPE__); > + > +void sink (void *); > + > +struct A { char b[17]; } a[2]; > + > +void g (const char *s, unsigned n) > +{ > + int i = (char *)a[1].b - (char *)a + 1; > + char *d = a[1].b; > + /* Ensure the same bug is not diagnosed more than once. */ > + strncpy (d + i, s, n); /* { dg-warning "array subscript \[0-9]+ is > outside array bounds of" } */ > + /* { dg-bogus "offset \[0-9]+ is out of the > bounds \\\[0, \[0-9]+\\\] of object 'a' with type" "" { target *-*-* } .-1 } > */ > +} > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)