Re: [PATCH 11/25] Simplify vec_merge according to the mask.
Andrew Stubbs writes: > On 26/09/18 17:48, Richard Sandiford wrote: >> Andrew Stubbs writes: >>> + /* Nested vec_merge. */ >>> + rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1); >>> + ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0)); >>> + ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1)); >> >> Think the last two should simplify to op0 and op3, which I guess >> means recursing on the "return XEXP (x, op);" > > I thought about doing that, but I noticed that, for example, > simplify_gen_unary does not recurse into its operand. Is that an > omission, or is it expected that those operands will already have been > simplified? Ah, yeah, each operand should already fully be simplified. But then the only thing we testing here compared to: /* Simple vec_merge. */ ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0)); ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1)); is that we *don't* recurse. It would be worth adding a comment to say that, since if we both thought about it, I guess whoever comes next will too. And the assumption that existing VEC_MERGEs are fully simplified means we should return null: if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) { if (!side_effects_p (XEXP (x, 1 - op))) return XEXP (x, op); --->here } On keeping the complexity down: if (side_effects_p (x)) return NULL_RTX; makes this quadratic for chains of unary operations. Is it really needed? The code after it simply recurses on operands and doesn't discard anything itself, so it looks like the VEC_MERGE call to side_effects_p would be enough. Richard
Re: [PATCH] Fix PR87440, extra lexical block in inline instances
On Wed, 26 Sep 2018, Jason Merrill wrote: > On Wed, Sep 26, 2018 at 9:35 AM, Richard Biener wrote: > > > > We do not create a DW_AT_lexical_block for the outermost block in > > functions but we do for DW_AT_inlined_subroutines. That makes > > debuginfo look like if there were two of each local, the outer > > one (from the abstract instance) optimized out (visible via > > info locals in gdb). > > > > The following elides the outermost block also from inline instances. > > It's a bit tricky to reliably track that block given we remove unused > > blocks here and there. The trick is to have the block in the abstract > > instance _not_ point to itself (given we do not output it it isn't > > the abstract origin for itself). > > > > Bootstrapped on x86_64-unkown-linux-gnu, testing in progress. > > > > Again with some scan-assembler testcase, guality cannot do 'info locals'. > > > > OK? > > > > Thanks, > > Richard. > > > > 2018-09-26 Richard Biener > > > > PR debug/87440 > > * dwarf2out.c (set_block_origin_self): Do not mark outermost > > block as we do not output that. > > (gen_inlined_subroutine_die): Elide the originally outermost > > block, matching what we do for concrete instances. > > (decls_for_scope): Add parameter specifying whether to recurse > > to subblocks. > > > > * gcc.dg/debug/dwarf2/inline4.c: New testcase. > > > > Index: gcc/testsuite/gcc.dg/debug/dwarf2/inline4.c > > === > > --- gcc/testsuite/gcc.dg/debug/dwarf2/inline4.c (nonexistent) > > +++ gcc/testsuite/gcc.dg/debug/dwarf2/inline4.c (working copy) > > @@ -0,0 +1,17 @@ > > +/* Verify that the inline instance has no extra DW_TAG_lexical_block > > between > > + the DW_TAG_inlined_subroutine and the DW_TAG_variable for the local. */ > > +/* { dg-options "-O -gdwarf -dA" } */ > > +/* { dg-do compile } */ > > +/* { dg-final { scan-assembler > > "DW_TAG_inlined_subroutine\[^\\(\]*\\(\[^\\)\]*\\)\[^\\(\]*\\(DIE > > \\(0x\[0-9a-f\]*\\) DW_TAG_formal_parameter\[^\\(\]*\\(DIE > > \\(0x\[0-9a-f\]*\\) DW_TAG_variable" } } */ > > +/* { dg-final { scan-assembler-times "DW_TAG_inlined_subroutine" 2 } } */ > > + > > +static int foo (int i) > > +{ > > + volatile int j = i + 3; > > + return j - 2; > > +} > > +int main() > > +{ > > + volatile int z = foo (-1); > > + return z; > > +} > > Index: gcc/dwarf2out.c > > === > > --- gcc/dwarf2out.c (revision 264640) > > +++ gcc/dwarf2out.c (working copy) > > @@ -3867,7 +3867,7 @@ static void gen_subroutine_type_die (tre > > static void gen_typedef_die (tree, dw_die_ref); > > static void gen_type_die (tree, dw_die_ref); > > static void gen_block_die (tree, dw_die_ref); > > -static void decls_for_scope (tree, dw_die_ref); > > +static void decls_for_scope (tree, dw_die_ref, bool = true); > > static bool is_naming_typedef_decl (const_tree); > > static inline dw_die_ref get_context_die (tree); > > static void gen_namespace_die (tree, dw_die_ref); > > @@ -22389,7 +22389,13 @@ set_block_origin_self (tree stmt) > > { > >if (BLOCK_ABSTRACT_ORIGIN (stmt) == NULL_TREE) > > { > > - BLOCK_ABSTRACT_ORIGIN (stmt) = stmt; > > + /* We do not mark the outermost block as we are not outputting it. > > +This is then a reliable way of determining whether we should > > +do the same for an inline instance. */ > > + if (TREE_CODE (BLOCK_SUPERCONTEXT (stmt)) != FUNCTION_DECL) > > + BLOCK_ABSTRACT_ORIGIN (stmt) = stmt; > > + else > > + gcc_assert (DECL_INITIAL (BLOCK_SUPERCONTEXT (stmt)) == stmt); > > > >{ > > tree local_decl; > > @@ -24149,7 +24155,24 @@ gen_inlined_subroutine_die (tree stmt, d > > add_high_low_attributes (stmt, subr_die); > >add_call_src_coords_attributes (stmt, subr_die); > > > > - decls_for_scope (stmt, subr_die); > > + /* The inliner creates an extra BLOCK for the parameter setup, > > + we want to merge that with the actual outermost BLOCK of the > > +inlined function to avoid duplicate locals in consumers. Note > > +we specially mark that not as origin-self. > > +Do that by doing the recursion to subblocks on the single subblock > > +of STMT. */ > > + bool unwrap_one = false; > > + if (BLOCK_SUBBLOCKS (stmt) && !BLOCK_CHAIN (BLOCK_SUBBLOCKS (stmt))) > > + { > > + tree origin = block_ultimate_origin (BLOCK_SUBBLOCKS (stmt)); > > + if (origin > > + && TREE_CODE (origin) == BLOCK > > + && !BLOCK_ABSTRACT_ORIGIN (origin)) > > Can we look at BLOCK_SUPERCONTEXT here rather than above? Ah, yes. It could be simply + && BLOCK_SUPERCONTEXT (origin) == decl) I guess. I also noticed that the very same bug was noticed in the fixed PP37801 in comment #4 by you but the fix that was installed didn't fix
[PATCH] GCOV: introduce --json-format.
Hi. For some time we've been providing an intermediate format as output of GCOV tool. It's documented in our code that primary consumer of it is lcov. Apparently that's not true: https://github.com/linux-test-project/lcov/issues/38#issuecomment-371203750 So that I decided to come up with providing the very similar intermediate format in JSON format. It's much easier for consumers to work with. I'm planning to leave the legacy format for GCC 9.1 and I'll document that it's deprecated. We can then remove that in next release. The patch contains a small change to json.{h,cc}, hope David can approve that? Patch is tested on x86_64-linux-gnu. Thanks, Martin gcc/ChangeLog: 2018-09-26 Martin Liska * Makefile.in: Make gcov dependant on json.o. * doc/gcov.texi: Document new option --json-format. * gcov.c (struct function_info): Rename name to m_name and demangled_name to m_demangled_name. (function_info::function_info): Likewise. (function_info::~function_info): Likewise. (function_info::get_demangled_name): New function. (function_info::get_name): Likewise. (main): Handle newly added flag_json_format. (print_usage): Likewise. (process_args): Likewise. (output_intermediate_line): Fix GNU coding style. (output_intermediate_json_line): New function. (get_gcov_intermediate_filename): Append .json for flag_json_format. (output_intermediate_file): Use get_name function. (output_json_intermediate_file): New. (generate_results): Use get_name function, output json intermediate format. (read_graph_file): Do not get demangled_name name. (read_count_file): Use get_name function. (solve_flow_graph): Likewise. (add_line_counts): Likewise. (accumulate_line_counts): Handle also flag_json_format. (output_function_details): Use get_name function. (output_lines): Likewise. * json.cc (test_writing_literals): Test new json::literal contructor. * json.h (class literal): Add new constructor for boolean type. --- gcc/Makefile.in | 2 +- gcc/doc/gcov.texi | 12 ++- gcc/gcov.c| 242 -- gcc/json.cc | 3 + gcc/json.h| 3 + 5 files changed, 211 insertions(+), 51 deletions(-) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 4b7cec82382..4570c271a30 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2909,7 +2909,7 @@ s-iov: build/gcov-iov$(build_exeext) $(BASEVER) $(DEVPHASE) $(SHELL) $(srcdir)/../move-if-change tmp-gcov-iov.h gcov-iov.h $(STAMP) s-iov -GCOV_OBJS = gcov.o +GCOV_OBJS = gcov.o json.o gcov$(exeext): $(GCOV_OBJS) $(LIBDEPS) +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) $(GCOV_OBJS) \ hash-table.o ggc-none.o $(LIBS) -o $@ diff --git a/gcc/doc/gcov.texi b/gcc/doc/gcov.texi index 3b1b38aebfa..0e021fd1148 100644 --- a/gcc/doc/gcov.texi +++ b/gcc/doc/gcov.texi @@ -138,6 +138,7 @@ gcov [@option{-v}|@option{--version}] [@option{-h}|@option{--help}] [@option{-t}|@option{--stdout}] [@option{-u}|@option{--unconditional-branches}] [@option{-x}|@option{--hash-filenames}] + [@option{-z}|@option{--json-format}] @var{files} @c man end @c man begin SEEALSO @@ -254,6 +255,9 @@ branch:35,nottaken lcount:36,1,0 @end smallexample +The intermediate format option is deprecated, please use @option{--json-format} +option. + @item -j @itemx --human-readable Write counts in human readable format (like 24.6k). @@ -354,6 +358,12 @@ where the @var{source-file} component is the final filename part and the @var{md5} component is calculated from the full mangled name that would have been used otherwise. +@item -z +@itemx --json-format +Output gcov file in an easy-to-parse JSON intermediate format. +The format itself is self-descriptive and output is a single +@file{.gcov.json} file per @file{.gcda} file. No source code is required. + @end table @command{gcov} should be run with the current directory the same as that @@ -842,7 +852,7 @@ some summary information. It is not recommended to access the coverage files directly. Consumers should use the intermediate format that is provided -by @command{gcov} tool via @option{--intermediate-format} option. +by @command{gcov} tool via @option{--json-format} option. @node Cross-profiling @section Data File Relocation to Support Cross-Profiling diff --git a/gcc/gcov.c b/gcc/gcov.c index e255e4e3922..39d9329d6d0 100644 --- a/gcc/gcov.c +++ b/gcc/gcov.c @@ -44,6 +44,7 @@ along with Gcov; see the file COPYING3. If not see #include "version.h" #include "demangle.h" #include "color-macros.h" +#include "json.h" #include @@ -221,6 +222,10 @@ line_info::has_block (block_info *needle) return std::find (blocks.begin (), blocks.end (), needle) != blocks.end (); } +/* Output demangled function names. */ + +static int flag_demangl
Re: [PATCH][Middle-end][Version 4]Add a new option to control inlining only on static functions
On Wed, 26 Sep 2018, Qing Zhao wrote: > > > On Sep 26, 2018, at 6:07 PM, Alexander Monakov wrote: > > > > On Wed, 26 Sep 2018, Qing Zhao wrote: > >> The request is for application developers who want to use gcc's online > >> patching feature. > >> > >> Today, developers can turn off inlining and deliver just the patched > >> routine. They > >> can also allow all inlining and deliver the patched routine and all the > >> routines > >> that the patched routine was inlined into. > >> > >> completely turning off inlining will sacrifice too much run-time > >> performance. completely > >> enable inlining, on the other hand, will have the potential issues with > >> code size, complexity and > >> debuggability for the online patching. > >> > >> the proposed option provides a compromised solution for the above issues. > >> and enable more > >> developers to utilize gcc’s online patching feature. > > > > From this explanation it sounds to me that what you really need is -Os-like > > behavior for IPA passes, without enabling -Os for gimple/rtl passes, as I > > mentioned in my previous email. Honza, how does that sound? > > I don’t think a -Os-like option will do the job. > > As Jeff already mentioned in a very previous email: > > “Presumably one of the cases where this capability is really helpful is > things like ksplice. If you have a function with global scope that has > been potentially inlined, then it's a lot harder track down those > inlining points at DTRT. > > We ran into this internally when looking at hot patching some of the > spinlock code in the kernel. It would have been real helpful if the > kernel had been compiled with this kind of option :-) > > So conceptually I can see value in this kind of option. > “ > > so, specially control inlining on static/global will be helpful to online > patch. But as Honza said this gets you only sofar. IIRC for our kernel livepatching we turn off most IPA passes because while we can "easily" figure what and where things were inlined spotting the effects of IPA analysis and transform is almost impossible. So there's two parts of the knob - one is to make the live-patch size not explode (do less inlining where it doesn't hurt performance - that eventually also applies to static functions called once inlining!). The other is to make it possible to conservatively compute the set of functions you have to replace (the set of functions that are affected by a patch). Having an option to _that_ effect might indeed be useful (to avoid people chasing all the flags they need to disable). So shouldn't this be a -fease-live-patching option rather that -finline-only-static which doesn't really capture the intention nor the effect? That is, -fease-live-patching would guarantee that if you source-patch function X then, if you replace all functions which debuginfo tells you X was inlined to, the result will be semantically equivalent with replacing the whole program? We might even add sth like -fpatch-symbol-list=FOO,BAR that outputs a list of symbols into BAR that are affected this way when functions FOO are changed (you run that on unpatched code of course). Or we add sth to the cgraph dumpfile that for each function lists the set of symbols it was affected by. Thanks, Richard.
[Committed] S/390: Implement speculation barrier
gcc/ChangeLog: 2018-09-27 Andreas Krebbel * config/s390/s390.md (PPA_TX_ABORT, PPA_OOO_BARRIER): New constant definitions. ("tx_assist"): Replace magic number with PPA_TX_ABORT. ("*ppa"): Enable pattern also for -march=zEC12 -mno-htm. ("speculation_barrier"): New expander definition. --- gcc/config/s390/s390.md | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 537ed35..1286d2c 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -410,6 +410,15 @@ (PFPO_RND_MODE_BFP 1) ]) +;; PPA constants + +; Immediate values which can be used as the third operand to the +; perform processor assist instruction + +(define_constants + [(PPA_TX_ABORT 1) + (PPA_OOO_BARRIER 15)]) + ; Immediate operands for tbegin and tbeginc (define_constants [(TBEGIN_MASK 65292)]) ; 0xff0c (define_constants [(TBEGINC_MASK 65288)]) ; 0xff08 @@ -11415,7 +11424,7 @@ (define_expand "tx_assist" [(unspec_volatile [(match_operand:SI 0 "register_operand" "") (reg:SI GPR0_REGNUM) -(const_int 1)] +(const_int PPA_TX_ABORT)] UNSPECV_PPA)] "TARGET_HTM" "") @@ -11425,7 +11434,7 @@ (match_operand:SI 1 "register_operand" "d") (match_operand 2 "const_int_operand" "I")] UNSPECV_PPA)] - "TARGET_HTM && INTVAL (operands[2]) < 16" + "(TARGET_ZEC12 || TARGET_HTM) && INTVAL (operands[2]) < 16" "ppa\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -11592,3 +11601,11 @@ "" "bcr\t7,%%r0" [(set_attr "op_type" "RR")]) + +(define_expand "speculation_barrier" + [(unspec_volatile [(reg:SI GPR0_REGNUM) +(reg:SI GPR0_REGNUM) +(const_int PPA_OOO_BARRIER)] + UNSPECV_PPA)] + "TARGET_ZEC12" + "") -- 2.7.4
Re: [PATCH, GCC/ARM] Fix PR87374: ICE with -mslow-flash-data and -mword-relocations
Hi Thomas, On 26/09/18 18:39, Thomas Preudhomme wrote: Hi, GCC ICEs under -mslow-flash-data and -mword-relocations because there is no way to load an address, both literal pools and MOVW/MOVT being forbidden. This patch gives an error message when both options are specified by the user and adds the according dg-skip-if directives for tests that use either of these options. ChangeLog entries are as follows: *** gcc/ChangeLog *** 2018-09-25 Thomas Preud'homme PR target/87374 * config/arm/arm.c (arm_option_check_internal): Disable the combined use of -mslow-flash-data and -mword-relocations. *** gcc/testsuite/ChangeLog *** 2018-09-25 Thomas Preud'homme PR target/87374 * gcc.target/arm/movdi_movt.c: Skip if both -mslow-flash-data and -mword-relocations would be passed when compiling the test. * gcc.target/arm/movsi_movt.c: Likewise. * gcc.target/arm/pr81863.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-1.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-2.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-3.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-4.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-5.c: Likewise. * gcc.target/arm/tls-disable-literal-pool.c: Likewise. Testing: Bootstrapped in Thumb-2 mode. No testsuite regression when targeting arm-none-eabi. Modified tests get skipped as expected when running the testsuite with -mslow-flash-data (pr81863.c) or -mword-relocations (all the others). Is this ok for trunk? I'd also appreciate guidance on whether this is worth a backport. It's a simple patch but on the other hand it only prevents some option combination, it does not fix anything so I have mixed feelings. In my opinion -mslow-flash-data is more of a tuning option rather than a security/ABI feature and therefore erroring out on its combination with -mword-relocations feels odd. I'm leaning more towards making -mword-relocations or any other option that really requires constant pools to bypass/disable the effects of -mslow-flash-data instead. For me, as a user, it would give a more friendly experience. That said, you have more familiarity with M-profile users. Would such users prefer a hard error when the -mslow-flash-data option has its effects bypassed? Maybe we could emit a warning rather than an error when this happens? Thanks, Kyrill Best regards, Thomas
Re: [PATCH][GCC][testsuite] Fix caching of tests for multiple variant runs and update existing target-supports tests.
Tamar Christina writes: > Hi All, > > Currently some target supports checks such as vect_int cache their > results in a manner that would cause them not to be rechecked when > running the same tests against a different variant in a multi variant > run. This causes tests to be skipped or run when they shouldn't be. > > there is already an existing caching mechanism in place that does the > caching correctly, but presumably these weren't used because some of > these tests originally only contained static data. e.g. only checked > if the target is aarch64*-*-* etc. > > This patch changes every function that needs to do any caching at all > to use check_cached_effective_target which will cache per variant > instead of globally. Thanks for doing this! > For those tests that already parameterize over et_index I have created > check_cached_effective_target_indexed to handle this common case by > creating a list containing the property name and the current value of > et_index. > > These changes result in a much simpler implementation for most tests > and a large reduction in lines for target-supports.exp. > > Regtested on > aarch64-none-elf > x86_64-pc-linux-gnu > powerpc64-unknown-linux-gnu > arm-none-eabi > > and no testsuite errors. Difference would depend on your site.exp. > On arm we get about 4500 new testcases and on aarch64 the low 10s. > On PowerPC and x86_64 no changes as expected since the default exp for these > just test the default configuration. Would be good to try --target_board unix{,-m32} on x86_64. > What this means for new target checks is that they should always use either > check_cached_effective_target or check_cached_effective_target_indexed if the > result of the check is to be cached. > > As an example the new vect_int looks like > > proc check_effective_target_vect_int { } { > return [check_cached_effective_target_indexed { > expr { > > }}] > } > > The debug information that was once there is now all hidden in > check_cached_effective_target, (called from > check_cached_effective_target_indexed) and so the only thing you are > required to do is give it a unique cache name and a condition. > > The condition doesn't need to be an if statement so simple boolean > expressions are enough here: > > [istarget i?86-*-*] || [istarget x86_64-*-*] > || ([istarget powerpc*-*-*] >&& ![istarget powerpc-*-linux*paired*]) > || ... > > The expr may not be required as check_cached_effective_target forces > evaluation, but I have left these here just to be sure (TCL semantics > is confusing at times.). It's required, since "eval" just runs a tcl script (which can give back an arbitrary string) while "expr" evaluates the string as an expression. This means that the caching (as written) should work for arbitrary strings, such as command-line flags or the default value of -march. > @@ -120,22 +121,41 @@ proc check_cached_effective_target { prop args } { > global et_prop_list > > set target [current_target_name] > -if {![info exists et_cache($prop,target)] > - || $et_cache($prop,target) != $target} { > +if {![info exists et_cache($prop,$target)]} { > verbose "check_cached_effective_target $prop: checking $target" 2 > - set et_cache($prop,target) $target > - set et_cache($prop,value) [uplevel eval $args] > + if {[string is true -strict $args] || [string is false -strict $args]} { > + set et_cache($prop,$target) $args > + } else { > + set et_cache($prop,$target) [uplevel eval $args] > + } Why are the checks for true and false needed? We shouldn't be using this function for something that's already true or false. > +# Implements a version of check_cached_effective_target that also takes > et_index > +# into account when creating the key for the cache. > +proc check_cached_effective_target_indexed { prop args } { > +global et_index > +set key "$et_index $prop" > +verbose "check_cached_effective_target_index $prop: returning $key" 2 > + > +# Force the evaluation at this level since check_cached_effective_target > +# may no longer be able to. > +if {[string is true -strict $args] || [string is false -strict $args]} { > + set value $args > +} else { > + set value [uplevel eval $args] > +} > +return [check_cached_effective_target $key $value] > +} This defeats the point of the cache, since we'll evaluate the expression every time before passing it to check_cached_effective_target. How about: return [check_cached_effective_target $key [list uplevel eval $args]] > @@ -149,11 +169,13 @@ proc clear_effective_target_cache { } { > global et_cache > global et_prop_list > > +set target [current_target_name] > if {[info exists et_prop_list]} { > verbose "clear_effective_target_cache: $et_prop_list" 2 > foreach prop $et_prop_list { > - unset et_cache($prop,value) > - unset et_cach
Re: [PATCH][GCC][testsuite] Fix caching of tests for multiple variant runs and update existing target-supports tests.
Richard Sandiford writes: > If instead you want to support "return" (would be nice IMO), you could > use something like: > > set code [catch {uplevel eval $args} result options] > if {$code != 0 && $code != 2} { > return -code $code $result > } > set et_cache($prop,$target) $result Sorry, drop the "options" argument to "catch". I was experimenting with whether -code $code or -options $options gave better backtraces for errors, but -code $code seemed better. Richard
Re: [PATCH][Middle-end][Version 4]Add a new option to control inlining only on static functions
On Wed, 26 Sep 2018, Qing Zhao wrote: > I don’t think a -Os-like option will do the job. > > As Jeff already mentioned in a very previous email: > > “Presumably one of the cases where this capability is really helpful is > things like ksplice. If you have a function with global scope that has > been potentially inlined, then it's a lot harder track down those > inlining points at DTRT. > > We ran into this internally when looking at hot patching some of the > spinlock code in the kernel. It would have been real helpful if the > kernel had been compiled with this kind of option :-) > > So conceptually I can see value in this kind of option. > “ > > so, specially control inlining on static/global will be helpful to online > patch. In addition to what Richard said, I don't follow this reasoning. The issue with spinlock functions is that many of them are defined in header files and get inlined everywhere, correct? But such functions are all 'static inline', so this patch wouldn't affect them at all. Alexander
[PATCH] ASAN: emit line information of stack variables.
Hi. I've noticed ASAN can inform user about location of stack variables when a stack violation is detected. Sample example: ... This frame has 3 object(s): [32, 36) 'counter' (line 3) <== Memory access at offset 36 overflows this variable [96, 100) 'size' (line 5) [160, 164) 'length' (line 6) ... I consider that handy so that I implemented that as well. Ready after it finishes tests? Martin gcc/ChangeLog: 2018-09-27 Martin Liska * asan.c (asan_emit_stack_protection): If a stack variable is located in a same file as current function, then emit line info into variable definition string. gcc/testsuite/ChangeLog: 2018-09-27 Martin Liska * c-c++-common/asan/pr64820.c: Add line number to scanned pattern. * c-c++-common/asan/use-after-return-1.c: Likewise. * g++.dg/asan/function-argument-1.C (main): Likewise. * g++.dg/asan/function-argument-2.C (main): Likewise. * g++.dg/asan/function-argument-3.C (main): Likewise. * g++.dg/asan/use-after-scope-1.C (main): Likewise. * g++.dg/asan/use-after-scope-2.C (main): Likewise. * g++.dg/asan/use-after-scope-types-1.C (main): Likewise. * g++.dg/asan/use-after-scope-types-2.C (main): Likewise. * g++.dg/asan/use-after-scope-types-3.C (main): Likewise. * g++.dg/asan/use-after-scope-types-4.C (main): Likewise. * g++.dg/asan/use-after-scope-types-5.C (main): Likewise. * gcc.dg/asan/pr78541.c (main): Likewise. * gcc.dg/asan/use-after-scope-1.c (main): Likewise. * gcc.dg/asan/use-after-scope-10.c (main): Likewise. * gcc.dg/asan/use-after-scope-2.c (main): Likewise. * gcc.dg/asan/use-after-scope-3.c (main): Likewise. * gcc.dg/asan/use-after-scope-5.c (main): Likewise. * gcc.dg/asan/use-after-scope-9.c (main): Likewise. --- gcc/asan.c| 22 +-- gcc/testsuite/c-c++-common/asan/pr64820.c | 2 +- .../c-c++-common/asan/use-after-return-1.c| 2 +- .../g++.dg/asan/function-argument-1.C | 2 +- .../g++.dg/asan/function-argument-2.C | 2 +- .../g++.dg/asan/function-argument-3.C | 2 +- gcc/testsuite/g++.dg/asan/use-after-scope-1.C | 2 +- gcc/testsuite/g++.dg/asan/use-after-scope-2.C | 2 +- .../g++.dg/asan/use-after-scope-types-1.C | 2 +- .../g++.dg/asan/use-after-scope-types-2.C | 2 +- .../g++.dg/asan/use-after-scope-types-3.C | 2 +- .../g++.dg/asan/use-after-scope-types-4.C | 2 +- .../g++.dg/asan/use-after-scope-types-5.C | 2 +- gcc/testsuite/gcc.dg/asan/pr78541.c | 2 +- gcc/testsuite/gcc.dg/asan/use-after-scope-1.c | 2 +- .../gcc.dg/asan/use-after-scope-10.c | 2 +- gcc/testsuite/gcc.dg/asan/use-after-scope-2.c | 2 +- gcc/testsuite/gcc.dg/asan/use-after-scope-3.c | 2 +- gcc/testsuite/gcc.dg/asan/use-after-scope-5.c | 2 +- gcc/testsuite/gcc.dg/asan/use-after-scope-9.c | 2 +- 20 files changed, 39 insertions(+), 21 deletions(-) diff --git a/gcc/asan.c b/gcc/asan.c index 235e219479d..653bedb385a 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1269,6 +1269,9 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, if (shadow_ptr_types[0] == NULL_TREE) asan_init_shadow_ptr_types (); + expanded_location cfun_xloc += expand_location (DECL_SOURCE_LOCATION (current_function_decl)); + /* First of all, prepare the description string. */ pretty_printer asan_pp; @@ -1281,15 +1284,30 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, pp_space (&asan_pp); pp_wide_integer (&asan_pp, offsets[l - 1] - offsets[l]); pp_space (&asan_pp); + + expanded_location xloc + = expand_location (DECL_SOURCE_LOCATION (decl)); + char location[32]; + + if (xloc.file == cfun_xloc.file) + sprintf (location, ":%d", xloc.line); + else + location[0] = '\0'; + if (DECL_P (decl) && DECL_NAME (decl)) { - pp_decimal_int (&asan_pp, IDENTIFIER_LENGTH (DECL_NAME (decl))); + unsigned l + = IDENTIFIER_LENGTH (DECL_NAME (decl)) + strlen (location); + pp_decimal_int (&asan_pp, l); pp_space (&asan_pp); pp_tree_identifier (&asan_pp, DECL_NAME (decl)); + pp_string (&asan_pp, location); } else pp_string (&asan_pp, "9 "); - pp_space (&asan_pp); + + if (l > 2) + pp_space (&asan_pp); } str_cst = asan_pp_string (&asan_pp); diff --git a/gcc/testsuite/c-c++-common/asan/pr64820.c b/gcc/testsuite/c-c++-common/asan/pr64820.c index 885a6621491..a00debf3588 100644 --- a/gcc/testsuite/c-c++-common/asan/pr64820.c +++ b/gcc/testsuite/c-c++-common/asan/pr64820.c @@ -28,4 +28,4 @@ int main(int argc, char **argv) { /* { dg-output "WRITE of size 1 at .* thread T0.*" } */ /* { dg-output "#0.*(Func2)?.*pr64820.(c:21)?.*" } */ /* { dg-output "is located in stack of thread T0 at offset.*" } */ -/* { dg-output "\'local\' <== Memory acces
Re: [PATCH][Middle-end][Version 4]Add a new option to control inlining only on static functions
> On Wed, 26 Sep 2018, Qing Zhao wrote: > > The request is for application developers who want to use gcc's online > >patching feature. > > > > Today, developers can turn off inlining and deliver just the patched > > routine. They > >can also allow all inlining and deliver the patched routine and all the > > routines > >that the patched routine was inlined into. > > > > completely turning off inlining will sacrifice too much run-time > > performance. completely > > enable inlining, on the other hand, will have the potential issues with > > code size, complexity and > > debuggability for the online patching. > > > > the proposed option provides a compromised solution for the above issues. > > and enable more > > developers to utilize gcc’s online patching feature. > > From this explanation it sounds to me that what you really need is -Os-like > behavior for IPA passes, without enabling -Os for gimple/rtl passes, as I > mentioned in my previous email. Honza, how does that sound? How -Os is related? We will still do things like inlining or cloning of functions if we expect code size to decrease (that may happen if arguments become dead) Honza > > > > If the original issue is that inlining duplicates code, wouldn't it be > > > better > > > solved by a switch that instructs inlining heuristics to inline as if for > > > -Os, > > > without enabling -Os for other passes? > > Alexander
Re: [PATCH][Middle-end][Version 4]Add a new option to control inlining only on static functions
> > Okay, I see. > > > > > If you make this to be INTERPOSABLE (which means it can be replaced by > > different > > implementation by linker and that is probably what we want for live > > patching) > > then also inliner, ipa-sra and other optimization will give up on these. > > do you suggest that to set the global function as AVAIL_INTERPOSABLE when > -finline-only-static > is present? then we should avoid all issues? It seems to be reasonable direction I think, because it is what really happens (well AVAIL_INTERPOSABLE still does not assume that the interposition will happen at runtime, but it is an approximation and we may introduce something like AVAIL_RUNTIME_INTERPOSABLE if there is need for better difference). I wonder if -finline-only-static is good name for the flag though, because it does a lot more than that. Maybe something like -flive-patching? How much is this all tied to one particular implementation of the feature? Honza > > Qing > > > > Honza > >> > >> > >>> For example comdat that was cloned by IPA-SRA. See can_be_local_p and > >>> comdat_can_be_unshared_p predicates. Similar problem happens to clones > >>> created > >>> by ipa-cp. > >>> > >>> I guess we want to disable localization and cloning in this case as well. > >>> I wonder what else. > >> > >> Yes, I think we should make -finline-only-static incompatible with cloning > >> and tree-sra too. > >> > >> Qing > >>> > >>> Honza >
Re: [PATCH] PR86957
On Mon, Sep 24, 2018 at 9:14 PM Indu Bhagat wrote: > > Done. Attached is updated patch. > > Patch is tested on x86_64 You obviously did _not_ properly test the patch since it causes a bunch of new testsuite failures: FAIL: g++.dg/pr60518.C -std=gnu++11 (test for excess errors) FAIL: g++.dg/pr60518.C -std=gnu++14 (test for excess errors) FAIL: g++.dg/pr60518.C -std=gnu++98 (test for excess errors) FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++11 (test for excess errors) FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++14 (test for excess errors) FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++98 (test for excess errors) FAIL: g++.dg/guality/pr55665.C -O2 -flto -fno-use-linker-plugin -flto-partition=none line 23 p == 40 FAIL: g++.dg/torture/pr59265.C -O0 (test for excess errors) FAIL: g++.dg/torture/pr59265.C -O1 (test for excess errors) FAIL: g++.dg/torture/pr59265.C -O2 (test for excess errors) FAIL: g++.dg/torture/pr59265.C -O2 -flto -fno-use-linker-plugin -flto-partition=none (test for excess errors) FAIL: g++.dg/torture/pr59265.C -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects (test for excess errors) FAIL: g++.dg/torture/pr59265.C -O3 -g (test for excess errors) FAIL: g++.dg/torture/pr59265.C -Os (test for excess errors) FAIL: g++.dg/tree-prof/morefunc.C compilation, -fprofile-use -D_PROFILE_USE UNRESOLVED: g++.dg/tree-prof/morefunc.C execution,-fprofile-use -D_PROFILE_USE and more. Please get up to speed with GCC development and testing requirements! Richard. > Thanks > > > On 09/24/2018 09:37 AM, Martin Sebor wrote: > > I would suggest to use the term "remove" or "delete" instead of > > the informal "wipe out" when referring to removing files or their > > contents. > > > > Martin >
Re: [PATCH][GCC][AArch64] Add support for SVE stack clash probing [patch (2/7)]
Hi All, It turns out the testsuite didn't have a case in it which would cause a significant enough spill to enter the loop. After creating one I noticed a bug in the loop and fixed it. The loops are now .cfi_startproc mov x15, sp cntbx16, all, mul #11 add x16, x16, 304 .cfi_def_cfa_register 15 .SVLPSPL0: cmp x16, 61440 b.lt.SVLPEND0 sub sp, sp, 61440 str xzr, [sp, 0] subsx16, x16, 61440 b .SVLPSPL0 .SVLPEND0: sub sp, sp, x16 .cfi_escape 0xf,0xc,0x8f,0,0x92,0x2e,0,0x8,0x58,0x1e,0x23,0xb0,0x2,0x22 for a 64KB guard size. I'm also adding a new testcase that causes a large enough spill to enter the loop. Ok for trunk? Thanks, Tamar gcc/ 2018-09-27 Tamar Christina PR target/86486 * config/aarch64/aarch64-protos.h (aarch64_output_probe_sve_stack_clash): New. * config/aarch64/aarch64.c (aarch64_output_probe_sve_stack_clash, aarch64_uimm12_nearest_value): New. (aarch64_allocate_and_probe_stack_space): Add SVE specific section. * config/aarch64/aarch64.md (probe_sve_stack_clash): New. gcc/testsuite/ 2018-09-27 Tamar Christina PR target/86486 * gcc.target/aarch64/stack-check-prologue-16.c: New test * gcc.target/aarch64/stack-check-cfa-3.c: New test. * gcc.target/aarch64/sve/struct_vect_24.c: New test. * gcc.target/aarch64/sve/struct_vect_24_run.c: New test. The 09/26/2018 09:20, Tamar Christina wrote: > Hi Richard, > > I've added a new loop that should also exit early as described in my previous > email. > > An example sequence is: > > .cfi_startproc > mov x15, sp > cntbx16, all, mul #11 > add x16, x16, 304 > .cfi_def_cfa_register 15 > cmp x16, 61440 > b.lt.SVLPEND0 > .SVLPSPL0: > sub sp, sp, 61440 > str xzr, [sp, 0] > subsx16, x16, 61440 > b.hs.SVLPSPL0 > add x16, x16, 61440 > .SVLPEND0: > sub sp, sp, x16 > .cfi_escape > 0xf,0xc,0x8f,0,0x92,0x2e,0,0x8,0x58,0x1e,0x23,0xb0,0x2,0x22 > > for a 64KB guard size, and for a 4KB guard size > > .cfi_startproc > mov x15, sp > cntbx16, all, mul #11 > add x16, x16, 304 > .cfi_def_cfa_register 15 > cmp x16, 3072 > b.lt.SVLPEND0 > .SVLPSPL0: > sub sp, sp, 3072 > str xzr, [sp, 0] > subsx16, x16, 3072 > b.hs.SVLPSPL0 > add x16, x16, 3072 > .SVLPEND0: > sub sp, sp, x16 > .cfi_escape > 0xf,0xc,0x8f,0,0x92,0x2e,0,0x8,0x58,0x1e,0x23,0xb0,0x2,0x22 > > > This has about the same semantics as alloca, except we prioritize the common > case > where no probe is required. We also change the amount we adjust the stack and > the probing interval to be the nearest value to `guard size - abi buffer` that > fits in the 12-bit shifted immediate used by cmp. > > While this would mean we probe a bit more often than we require, in practice > the > amount of SVE vectors you'd need to spill is significant. Even more so to > enter the > loop more than once. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues in sve > testsuite. > Target was tested with stack clash on and off by default. > > Ok for trunk? > > Thanks, > Tamar > > gcc/ > 2018-09-26 Tamar Christina > > PR target/86486 > * config/aarch64/aarch64-protos.h > (aarch64_output_probe_sve_stack_clash): New. > * config/aarch64/aarch64.c (aarch64_output_probe_sve_stack_clash, > aarch64_uimm12_nearest_value): New. > (aarch64_allocate_and_probe_stack_space): Add SVE specific section. > * config/aarch64/aarch64.md (probe_sve_stack_clash): New. > > gcc/testsuite/ > 2018-09-26 Tamar Christina > > PR target/86486 > * gcc.target/aarch64/stack-check-prologue-16.c: New test > * gcc.target/aarch64/stack-check-cfa-3.c: New test. > > > The 09/20/2018 10:23, Tamar Christina wrote: > > Hi Richard, > > > > The 09/11/2018 16:20, Richard Sandiford wrote: > > > Tamar Christina writes: > > > >> > + > > > >> > + /* No probe leave. */ > > > >> > + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_end_lab); > > > >> > + return ""; > > > >> > > > >> With the CFA stuff and constant load, I think this works out as: > > > >> > > > >> - > > > >> # 12 insns > > > >>mov r15, base > > > >>mov adjustment, N > > > >> 1: > > > >>cmp adjustment, guard_size > > > >>b.lt2f > > > >>sub base, base, guard_size > > > >>str xzr, [base, limit] > > > >>sub adjustment, adjustment, guard_size > > > >>b 1b > > > >> 2: > > > >>sub base, base, adjustment > > > >>cmp adjustment, l
Re: [PATCH] PR86957
On 9/27/18 11:47 AM, Richard Biener wrote: > On Mon, Sep 24, 2018 at 9:14 PM Indu Bhagat wrote: >> >> Done. Attached is updated patch. >> >> Patch is tested on x86_64 > > You obviously did _not_ properly test the patch since it causes a > bunch of new testsuite > failures: > > FAIL: g++.dg/pr60518.C -std=gnu++11 (test for excess errors) > FAIL: g++.dg/pr60518.C -std=gnu++14 (test for excess errors) > FAIL: g++.dg/pr60518.C -std=gnu++98 (test for excess errors) > FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++11 (test for excess errors) > FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++14 (test for excess errors) > FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++98 (test for excess errors) > FAIL: g++.dg/guality/pr55665.C -O2 -flto -fno-use-linker-plugin > -flto-partition=none line 23 p == 40 > FAIL: g++.dg/torture/pr59265.C -O0 (test for excess errors) > FAIL: g++.dg/torture/pr59265.C -O1 (test for excess errors) > FAIL: g++.dg/torture/pr59265.C -O2 (test for excess errors) > FAIL: g++.dg/torture/pr59265.C -O2 -flto -fno-use-linker-plugin > -flto-partition=none (test for excess errors) > FAIL: g++.dg/torture/pr59265.C -O2 -flto -fuse-linker-plugin > -fno-fat-lto-objects (test for excess errors) > FAIL: g++.dg/torture/pr59265.C -O3 -g (test for excess errors) > FAIL: g++.dg/torture/pr59265.C -Os (test for excess errors) > FAIL: g++.dg/tree-prof/morefunc.C compilation, -fprofile-use -D_PROFILE_USE > UNRESOLVED: g++.dg/tree-prof/morefunc.C execution,-fprofile-use > -D_PROFILE_USE > > and more. Please get up to speed with GCC development and testing > requirements! > > Richard. I'll cook patch for it in order to remove the failures as soon as possible. Martin > >> Thanks >> >> >> On 09/24/2018 09:37 AM, Martin Sebor wrote: >>> I would suggest to use the term "remove" or "delete" instead of >>> the informal "wipe out" when referring to removing files or their >>> contents. >>> >>> Martin >>
Re: [PATCH, GCC/ARM] Fix PR87374: ICE with -mslow-flash-data and -mword-relocations
On 27/09/2018 09:26, Kyrill Tkachov wrote: Hi Thomas, On 26/09/18 18:39, Thomas Preudhomme wrote: Hi, GCC ICEs under -mslow-flash-data and -mword-relocations because there is no way to load an address, both literal pools and MOVW/MOVT being forbidden. This patch gives an error message when both options are specified by the user and adds the according dg-skip-if directives for tests that use either of these options. ChangeLog entries are as follows: *** gcc/ChangeLog *** 2018-09-25 Thomas Preud'homme PR target/87374 * config/arm/arm.c (arm_option_check_internal): Disable the combined use of -mslow-flash-data and -mword-relocations. *** gcc/testsuite/ChangeLog *** 2018-09-25 Thomas Preud'homme PR target/87374 * gcc.target/arm/movdi_movt.c: Skip if both -mslow-flash-data and -mword-relocations would be passed when compiling the test. * gcc.target/arm/movsi_movt.c: Likewise. * gcc.target/arm/pr81863.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-1.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-2.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-3.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-4.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-5.c: Likewise. * gcc.target/arm/tls-disable-literal-pool.c: Likewise. Testing: Bootstrapped in Thumb-2 mode. No testsuite regression when targeting arm-none-eabi. Modified tests get skipped as expected when running the testsuite with -mslow-flash-data (pr81863.c) or -mword-relocations (all the others). Is this ok for trunk? I'd also appreciate guidance on whether this is worth a backport. It's a simple patch but on the other hand it only prevents some option combination, it does not fix anything so I have mixed feelings. In my opinion -mslow-flash-data is more of a tuning option rather than a security/ABI feature and therefore erroring out on its combination with -mword-relocations feels odd. I'm leaning more towards making -mword-relocations or any other option that really requires constant pools to bypass/disable the effects of -mslow-flash-data instead. For me, as a user, it would give a more friendly experience. That said, you have more familiarity with M-profile users. Would such users prefer a hard error when the -mslow-flash-data option has its effects bypassed? Maybe we could emit a warning rather than an error when this happens? -mslow-flash-data and -mword-relocations are contradictory in their expectations. mslow-flash-data is for not putting anything in the literal pool whereas mword-relocations is purely around the use of movw / movt instructions for word sized values. I wish we had called -mslow-flash-data something else (probably -mno-literal-pools). -mslow-flash-data is used primarily by M-profile users and -mword-relocations IIUC was a point fix for use in the Linux kernel for module loads at a time when not all module loaders in the linux kernel were fixed for the movw / movt relocations and armv7-a / thumb2 was in it's infancy . Thus they are used by different constituencies in general and I wouldn't see them used together by actual users. Considering the above, I would prefer a hard error rather than a warning as they are contradictory and I'd prefer that we error'd out. Further this bugzilla entry is probably created with fuzzing with a variety of options rather than from any real use case. Oh and yes, lets update invoke.texi while here. regards Ramana Thanks, Kyrill Best regards, Thomas
Re: [PATCH, GCC/ARM] Fix PR87374: ICE with -mslow-flash-data and -mword-relocations
On 27/09/18 11:13, Ramana Radhakrishnan wrote: On 27/09/2018 09:26, Kyrill Tkachov wrote: Hi Thomas, On 26/09/18 18:39, Thomas Preudhomme wrote: Hi, GCC ICEs under -mslow-flash-data and -mword-relocations because there is no way to load an address, both literal pools and MOVW/MOVT being forbidden. This patch gives an error message when both options are specified by the user and adds the according dg-skip-if directives for tests that use either of these options. ChangeLog entries are as follows: *** gcc/ChangeLog *** 2018-09-25 Thomas Preud'homme PR target/87374 * config/arm/arm.c (arm_option_check_internal): Disable the combined use of -mslow-flash-data and -mword-relocations. *** gcc/testsuite/ChangeLog *** 2018-09-25 Thomas Preud'homme PR target/87374 * gcc.target/arm/movdi_movt.c: Skip if both -mslow-flash-data and -mword-relocations would be passed when compiling the test. * gcc.target/arm/movsi_movt.c: Likewise. * gcc.target/arm/pr81863.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-1.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-2.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-3.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-4.c: Likewise. * gcc.target/arm/thumb2-slow-flash-data-5.c: Likewise. * gcc.target/arm/tls-disable-literal-pool.c: Likewise. Testing: Bootstrapped in Thumb-2 mode. No testsuite regression when targeting arm-none-eabi. Modified tests get skipped as expected when running the testsuite with -mslow-flash-data (pr81863.c) or -mword-relocations (all the others). Is this ok for trunk? I'd also appreciate guidance on whether this is worth a backport. It's a simple patch but on the other hand it only prevents some option combination, it does not fix anything so I have mixed feelings. In my opinion -mslow-flash-data is more of a tuning option rather than a security/ABI feature and therefore erroring out on its combination with -mword-relocations feels odd. I'm leaning more towards making -mword-relocations or any other option that really requires constant pools to bypass/disable the effects of -mslow-flash-data instead. -mslow-flash-data and -mword-relocations are contradictory in their expectations. mslow-flash-data is for not putting anything in the literal pool whereas mword-relocations is purely around the use of movw / movt instructions for word sized values. I wish we had called -mslow-flash-data something else (probably -mno-literal-pools). -mslow-flash-data is used primarily by M-profile users and -mword-relocations IIUC was a point fix for use in the Linux kernel for module loads at a time when not all module loaders in the linux kernel were fixed for the movw / movt relocations and armv7-a / thumb2 was in it's infancy :). Thus they are used by different constituencies in general and I wouldn't see them used together by actual users. Ah, thank you for the background Ramana. The naming of mslow-flash-data is confusing indeed. It sounds more like a tuning request rather than a hard requirement. Considering the above, I would prefer a hard error rather than a warning as they are contradictory and I'd prefer that we error'd out. Further this bugzilla entry is probably created with fuzzing with a variety of options rather than from any real use case. Yes, in that case erroring out is easier. Oh and yes, lets update invoke.texi while here. Yes. This deserves an entry in the documentation. Thanks, Kyrill regards Ramana For me, as a user, it would give a more friendly experience. That said, you have more familiarity with M-profile users. Would such users prefer a hard error when the -mslow-flash-data option has its effects bypassed? Maybe we could emit a warning rather than an error when this happens? Thanks, Kyrill Best regards, Thomas
Re: [PATCH][GCC][testsuite] Fix caching of tests for multiple variant runs and update existing target-supports tests.
Hi Richard, Thanks for the review! The 09/27/2018 09:40, Richard Sandiford wrote: > Tamar Christina writes: > > > > and no testsuite errors. Difference would depend on your site.exp. > > On arm we get about 4500 new testcases and on aarch64 the low 10s. > > On PowerPC and x86_64 no changes as expected since the default exp for these > > just test the default configuration. > > Would be good to try --target_board unix{,-m32} on x86_64. > Running now. > > What this means for new target checks is that they should always use either > > check_cached_effective_target or check_cached_effective_target_indexed if > > the > > result of the check is to be cached. > > > > As an example the new vect_int looks like > > > > proc check_effective_target_vect_int { } { > > return [check_cached_effective_target_indexed { > > expr { > > > > }}] > > } > > > > The debug information that was once there is now all hidden in > > check_cached_effective_target, (called from > > check_cached_effective_target_indexed) and so the only thing you are > > required to do is give it a unique cache name and a condition. > > > > The condition doesn't need to be an if statement so simple boolean > > expressions are enough here: > > > > [istarget i?86-*-*] || [istarget x86_64-*-*] > > || ([istarget powerpc*-*-*] > > && ![istarget powerpc-*-linux*paired*]) > > || ... > > > > The expr may not be required as check_cached_effective_target forces > > evaluation, but I have left these here just to be sure (TCL semantics > > is confusing at times.). > > It's required, since "eval" just runs a tcl script (which can give back > an arbitrary string) while "expr" evaluates the string as an expression. > > This means that the caching (as written) should work for arbitrary strings, > such as command-line flags or the default value of -march. > Ah ok, got it, > > @@ -120,22 +121,41 @@ proc check_cached_effective_target { prop args } { > > global et_prop_list > > > > set target [current_target_name] > > -if {![info exists et_cache($prop,target)] > > - || $et_cache($prop,target) != $target} { > > +if {![info exists et_cache($prop,$target)]} { > > verbose "check_cached_effective_target $prop: checking $target" 2 > > - set et_cache($prop,target) $target > > - set et_cache($prop,value) [uplevel eval $args] > > + if {[string is true -strict $args] || [string is false -strict $args]} { > > + set et_cache($prop,$target) $args > > + } else { > > + set et_cache($prop,$target) [uplevel eval $args] > > + } > > Why are the checks for true and false needed? We shouldn't be > using this function for something that's already true or false. > I had wanted to be a bit lenient here in accepting it if you have a function that already fully evaluated the expr before passing it to check_cached_effective_target. i.e. something like proc check_effective_target_vect_int { } { return [check_cached_effective_target_indexed [ expr { }]] } The error you would get if you do this is very confusing so I thought since it didn't matter much for the regexp only target triple tests that just accepting this would be fine. Should I drop it or keep it? Thanks, Tamar. > > +# Implements a version of check_cached_effective_target that also takes > > et_index > > +# into account when creating the key for the cache. > > +proc check_cached_effective_target_indexed { prop args } { > > +global et_index > > +set key "$et_index $prop" > > +verbose "check_cached_effective_target_index $prop: returning $key" 2 > > + > > +# Force the evaluation at this level since > > check_cached_effective_target > > +# may no longer be able to. > > +if {[string is true -strict $args] || [string is false -strict $args]} > > { > > + set value $args > > +} else { > > + set value [uplevel eval $args] > > +} > > +return [check_cached_effective_target $key $value] > > +} > > This defeats the point of the cache, since we'll evaluate the expression > every time before passing it to check_cached_effective_target. > > How about: > > return [check_cached_effective_target $key [list uplevel eval $args]] > > > @@ -149,11 +169,13 @@ proc clear_effective_target_cache { } { > > global et_cache > > global et_prop_list > > > > +set target [current_target_name] > > if {[info exists et_prop_list]} { > > verbose "clear_effective_target_cache: $et_prop_list" 2 > > foreach prop $et_prop_list { > > - unset et_cache($prop,value) > > - unset et_cache($prop,target) > > + if {[info exists et_cache($prop,$target)]} { > > + unset et_cache($prop,$target) > > +} > > Looks a bit odd that we now have to make this conditional. Maybe we > should maintain an et_prop_list for each target. I.e.: > > if {![info exists et_prop_list] > || [lsearch $et_prop_list $prop]
Re: [PR 87415] handle 1 bit bit fields in set_value_range_with_overflow()
On Wed, Sep 26, 2018 at 7:46 PM Aldy Hernandez wrote: > > As I've said in the PR... > > For a 1-bit signed field we are trying to subtract the following ranges: > > [0, 0] - VR_VARYING > > Mathematically these are: > > [MAX, MAX] - [MIN, MAX] > [0, 0] - [MIN, MAX] > => [0, 0] - [-1, 0] > > For ranges: [a, b] - [c, d] is [a - d, b - c], so combine_bounds() yields: > > [0, OVERFLOW] > > Then we adjust the result for overflows in > set_value_range_with_overflow() we transform the above into: > > [0, +INF] > > And since +INF is 0, we get [0, 0] which is incorrect. > > [0, 0] - [MIN, MAX] should have given [MIN, MAX]. > > 1-bit signed fields are weird in that 0 - (-MIN) is still -MIN. In any > other world, it is an impossible result. For instance, in an 8-bit > signed world, 0 - (-128) is invalid. I think that set_value_range_with_overflow saturates to the wrong end, possibly because wi::sub computes the wrong overflow? You can't really argue that 0 - (-128) is "invalid" in any sense since -128 might not be represented at runtime so you cannot use overflow semantics to restrict what is valid and what is not when doing the actual operations. > One option would be to treat signed bit fields as TYPE_OVERFLOW wraps, > since they seem to have wrapping properties, but I'm afraid such a heavy > handed approach would yield latent bugs across the compiler. What VRP > seems to currently be doing in set_and_canonicalize_value_range(), is > just special casing the wrapping of 1-bit fields: > >/* Wrong order for min and max, to swap them and the VR type we need > to adjust them. */ >if (tree_int_cst_lt (max, min)) > { >tree one, tmp; > >/* For one bit precision if max < min, then the swapped > range covers all values, so for VR_RANGE it is varying and > for VR_ANTI_RANGE empty range, so drop to varying as well. */ >if (TYPE_PRECISION (TREE_TYPE (min)) == 1) > { > set_value_range_to_varying (vr); > return; > } > ... > } > > I've done the same in set_value_range_with_overflow(). > > BTW, I believe this is a latent bug. Had the MINUS_EXPR code been > handed down [0,0] - [-1,0] instead of [0,0] - VR_VARYING, we would've > triggered it. > > Tested on x86-64 Linux. > > OK for trunk? Still OK. Thanks, Richard.
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Wed, Sep 26, 2018 at 8:11 PM H.J. Lu wrote: > > Add -mzero-caller-saved-regs=[skip|used|all] command-line option and > zero_caller_saved_regs("skip|used|all") function attribue: > > 1. -mzero-caller-saved-regs=skip and zero_caller_saved_regs("skip") > > Don't zero caller-saved integer registers upon function return. > > 2. -mzero-caller-saved-regs=used and zero_caller_saved_regs("used") > > Zero used caller-saved integer registers upon function return. > > 3. -mzero-caller-saved-regs=all and zero_caller_saved_regs("all") > > Zero all caller-saved integer registers upon function return. > > Tested on i686 and x86-64 with bootstrapping GCC trunk and > -mzero-caller-saved-regs=used as well as -mzero-caller-saved-regs=all > enabled by default. Can this be done in a target independet way? Richard. > gcc/ > > * config/i386/i386-opts.h (zero_caller_saved_regs): New enum. > * config/i386/i386-protos.h (ix86_split_simple_return_pop_internal): > Renamed to ... > (ix86_split_simple_return_internal): This. > * config/i386/i386.c (ix86_set_zero_caller_saved_regs_type): New > function. > (ix86_set_current_function): Call > ix86_set_zero_caller_saved_regs_type. > (ix86_expand_prologue): Replace gen_prologue_use with > gen_pro_epilogue_use. > (ix86_expand_epilogue): Replace gen_simple_return_pop_internal > with ix86_split_simple_return_internal. Replace > gen_simple_return_internal with ix86_split_simple_return_internal. > (ix86_find_live_outgoing_regs): New function. > (ix86_split_simple_return_pop_internal): Removed. > (ix86_split_simple_return_internal): New function. > (ix86_handle_fndecl_attribute): Support zero_caller_saved_regs > attribute. > (ix86_attribute_table): Add zero_caller_saved_regs. > * config/i386/i386.h (machine_function): Add > zero_caller_saved_regs_type and live_outgoing_regs. > (TARGET_POP_SCRATCH_REGISTER): New. > * config/i386/i386.md (UNSPEC_SIMPLE_RETURN): New UNSPEC. > (UNSPECV_PROLOGUE_USE): Renamed to ... > (UNSPECV_PRO_EPILOGUE_USE): This. > (prologue_use): Renamed to ... > (pro_epilogue_use): This. > (simple_return_internal): Changed to define_insn_and_split. > (simple_return_internal_1): New pattern. > (simple_return_pop_internal): Replace > ix86_split_simple_return_pop_internal with > ix86_split_simple_return_internal. Always call > ix86_split_simple_return_internal if epilogue_completed is > true. > (simple_return_pop_internal_1): New pattern. > (Epilogue deallocator to pop peepholes): Enabled only if > TARGET_POP_SCRATCH_REGISTER is true. > * config/i386/i386.opt (mzero-caller-saved-regs=): New option. > * doc/extend.texi: Document zero_caller_saved_regs attribute. > * doc/invoke.texi: Document -mzero-caller-saved-regs=. > > gcc/testsuite/ > > * gcc.target/i386/zero-scratch-regs-1.c: New test. > * gcc.target/i386/zero-scratch-regs-2.c: Likewise. > * gcc.target/i386/zero-scratch-regs-3.c: Likewise. > * gcc.target/i386/zero-scratch-regs-4.c: Likewise. > * gcc.target/i386/zero-scratch-regs-5.c: Likewise. > * gcc.target/i386/zero-scratch-regs-6.c: Likewise. > * gcc.target/i386/zero-scratch-regs-7.c: Likewise. > * gcc.target/i386/zero-scratch-regs-8.c: Likewise. > * gcc.target/i386/zero-scratch-regs-9.c: Likewise. > * gcc.target/i386/zero-scratch-regs-10.c: Likewise. > * gcc.target/i386/zero-scratch-regs-11.c: Likewise. > * gcc.target/i386/zero-scratch-regs-12.c: Likewise. > --- > gcc/config/i386/i386-opts.h | 7 + > gcc/config/i386/i386-protos.h | 2 +- > gcc/config/i386/i386.c| 245 -- > gcc/config/i386/i386.h| 13 + > gcc/config/i386/i386.md | 54 +++- > gcc/config/i386/i386.opt | 17 ++ > gcc/doc/extend.texi | 8 + > gcc/doc/invoke.texi | 12 +- > .../gcc.target/i386/zero-scratch-regs-1.c | 10 + > .../gcc.target/i386/zero-scratch-regs-10.c| 19 ++ > .../gcc.target/i386/zero-scratch-regs-11.c| 39 +++ > .../gcc.target/i386/zero-scratch-regs-12.c| 39 +++ > .../gcc.target/i386/zero-scratch-regs-2.c | 17 ++ > .../gcc.target/i386/zero-scratch-regs-3.c | 10 + > .../gcc.target/i386/zero-scratch-regs-4.c | 12 + > .../gcc.target/i386/zero-scratch-regs-5.c | 18 ++ > .../gcc.target/i386/zero-scratch-regs-6.c | 12 + > .../gcc.target/i386/zero-scratch-regs-7.c | 11 + > .../gcc.target/i386/zero-scratch-regs-8.c | 17 ++ > .../gcc.target/i386/zero-scratch-regs-9.c | 13 + > 20 files changed, 538 insertions(+), 37 deletions(-
Re: [PATCH][Middle-end][Version 4]Add a new option to control inlining only on static functions
On Thu, 27 Sep 2018, Jan Hubicka wrote: > > On Wed, 26 Sep 2018, Qing Zhao wrote: > > > The request is for application developers who want to use gcc's online > > >patching feature. > > > > > > Today, developers can turn off inlining and deliver just the patched > > > routine. They > > >can also allow all inlining and deliver the patched routine and all > > > the routines > > >that the patched routine was inlined into. > > > > > > completely turning off inlining will sacrifice too much run-time > > > performance. completely > > > enable inlining, on the other hand, will have the potential issues with > > > code size, complexity and > > > debuggability for the online patching. > > > > > > the proposed option provides a compromised solution for the above issues. > > > and enable more > > > developers to utilize gcc’s online patching feature. > > > > From this explanation it sounds to me that what you really need is -Os-like > > behavior for IPA passes, without enabling -Os for gimple/rtl passes, as I > > mentioned in my previous email. Honza, how does that sound? > > How -Os is related? We will still do things like inlining or cloning of > functions > if we expect code size to decrease (that may happen if arguments become dead) Yeah, I was suggesting -fno-inline-small-functions which would get you to do it the Linus-way (do-what-I-say) and only inline 'inline' declared functions. Richard.
Re: [PATCH][GCC][AArch64] Add support for SVE stack clash probing [patch (2/7)]
> It turns out the testsuite didn't have a case in it which would cause a > significant enough spill to enter the loop. After creating one I noticed a > bug > in the loop and fixed it. > > The loops are now > > .cfi_startproc > mov x15, sp > cntbx16, all, mul #11 > add x16, x16, 304 > .cfi_def_cfa_register 15 > .SVLPSPL0: > cmp x16, 61440 > b.lt.SVLPEND0 > sub sp, sp, 61440 > str xzr, [sp, 0] > subsx16, x16, 61440 (The code uses sub rather than subs here) > b .SVLPSPL0 > .SVLPEND0: > sub sp, sp, x16 > .cfi_escape > 0xf,0xc,0x8f,0,0x92,0x2e,0,0x8,0x58,0x1e,0x23,0xb0,0x2,0x22 > > for a 64KB guard size. That's OK with me. Like you say, the main goal was to make the common case of no probe as fast as possible. > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index > ef95fc829b83886e2ff00e4664e31af916e99b0c..e2d8734a8d5e513588e3b0318e9c67fdaebdf0d4 > 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -453,6 +453,7 @@ void aarch64_asm_output_labelref (FILE *, const char *); > void aarch64_cpu_cpp_builtins (cpp_reader *); > const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *); > const char * aarch64_output_probe_stack_range (rtx, rtx); > +const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); > void aarch64_err_no_fpadvsimd (machine_mode); > void aarch64_expand_epilogue (bool); > void aarch64_expand_mov_immediate (rtx, rtx, rtx (*) (rtx, rtx) = 0); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > d4b13d48d852a70848fc7c51fd867e776efb5e55..245fd6832ec0afe27c42a242c901a2e13024f935 > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -208,6 +208,7 @@ static bool aarch64_builtin_support_vector_misalignment > (machine_mode mode, > static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); > static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, > aarch64_addr_query_type); > +static HOST_WIDE_INT aarch64_uimm12_nearest_value (HOST_WIDE_INT val); > /* Major revision number of the ARM Architecture implemented by the target. > */ > unsigned aarch64_architecture_version; > @@ -3973,6 +3974,83 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2) >return ""; > } > +/* Emit the probe loop for doing stack clash probes and stack adjustments for > + SVE. This emits probes from BASE to BASE - ADJUSTMENT based on a guard > size > + of GUARD_SIZE. When a probe is emitted it is done at MIN_PROBE_OFFSET > bytes > + from the current BASE at an interval of MIN_PROBE_OFFSET. By the end of > this MIN_PROBE_THRESHOLD in both cases (or rename the var to min_probe_offset, either's fine). Probably "at most MIN_PROBE..." given the round down. > + function BASE = BASE - ADJUSTMENT. */ > + > +const char * > +aarch64_output_probe_sve_stack_clash (rtx base, rtx adjustment, > + rtx min_probe_threshold, rtx guard_size) > +{ > + /* This function is not allowed to use any instruction generation function > + like gen_ and friends. If you do you'll likely ICE during CFG > validation, > + so instead emit the code you want using output_asm_insn. */ > + gcc_assert (flag_stack_clash_protection); > + gcc_assert (CONST_INT_P (min_probe_threshold) && CONST_INT_P (guard_size)); > + gcc_assert (INTVAL (guard_size) > INTVAL (min_probe_threshold)); > + > + /* The minimum required allocation before the residual requires probing. > */ > + HOST_WIDE_INT residual_probe_guard = INTVAL (min_probe_threshold); > + > + /* Clamp the value down to the nearest value that can be used with a cmp. > */ > + residual_probe_guard = aarch64_uimm12_nearest_value (residual_probe_guard); Maybe aarch64_clamp_to_uimm12_shift or aarch64_round_down_to_uimm12_shift would be better; nearest implies that "0x1ff0" should become "0x2000" rather than "0x1000". > + /* ADJUSTMENT == RESIDUAL_PROBE_GUARD. */ > + xops[0] = adjustment; > + xops[1] = probe_offset_value_rtx; > + output_asm_insn ("cmp\t%0, %1", xops); < rather than == (or just "Compare ...") > + /* Branch to end if not enough adjustment to probe. */ > + fputs ("\tb.lt\t", asm_out_file); > + assemble_name_raw (asm_out_file, loop_end_lab); > + fputc ('\n', asm_out_file); > + > + /* BASE = BASE - RESIDUAL_PROBE_GUARD. */ > + xops[0] = base; > + xops[1] = gen_int_mode (residual_probe_guard, Pmode); probe_offset_value_rtx > + HOST_WIDE_INT size; > + /* Handle the SVE non-constant case first. */ > + if (!poly_size.is_constant (&size)) > +{ > + Excess blank line. > + if (dump_file) > + { > + fprintf (dump_file, "Stack clash SVE prologue: "); > + print_dec (poly_size,
Re: [PATCH 2/4] Remove unused functions and fields.
On Wed, 26 Sep 2018 10:59:02 +0200 Martin Liška wrote: > On 9/25/18 3:34 PM, Jeff Law wrote: > > On 9/25/18 6:19 AM, Richard Biener wrote: > >> On Tue, Sep 25, 2018 at 9:09 AM Martin Liška > >> wrote: > >>> > >>> On 9/24/18 4:42 PM, Jeff Law wrote: > On 9/22/18 1:08 PM, marxin wrote: > > > > gcc/ChangeLog: > > > > 2018-09-24 Martin Liska > > > > * alias.c (set_dest_equal_p): Remove unused function. > > * config/i386/i386.c (def_builtin_pure2): Likewise. > > * diagnostic-show-locus.c (class layout): Remove > > unused field. > > (layout::layout): Likewise here. > > * dump-context.h (class temp_dump_context): Likewise. > > * dwarf2out.c (add_AT_fde_ref): Remove unused function. > > (add_AT_loclistsptr): Likewise. > > (add_AT_offset): Likewise. > > (get_AT_hi_pc): Likewise. > > (is_comdat_die): Likewise. > > (type_is_enum): Likewise. > > (ceiling): Likewise. > > (add_AT_vms_delta): Likewise. > > (is_class_die): Likewise. > > * edit-context.c (class line_event): Remove unused field. > > * graphite-sese-to-poly.c (tree_int_to_gmp): Remove > > unused function. > > * ipa-cp.c (ipa_get_vr_lat): Likewise. > > * lra-constraints.c (ok_for_index_p_nonstrict): Likewise. > > (ok_for_base_p_nonstrict): Likewise. > > * tree-chrec.c (is_not_constant_evolution): Likewise. > > (chrec_fold_poly_cst): Likewise. > > * tree-if-conv.c (has_pred_critical_p): Likewise. > > * tree-ssa-coalesce.c (print_exprs): Likewise. > > * tree-ssa-pre.c (bitmap_set_contains_expr): Likewise. > > * tree-ssa-uninit.c (is_and_or_or_p): Likewise. > > * tree-vrp.c (value_ranges_intersect_p): Likewise. > > (value_range_nonnegative_p): Likewise. > > > > gcc/cp/ChangeLog: > > > > 2018-09-24 Martin Liska > > > > * name-lookup.c (namespace_scope_ht_size): Remove > > unused function. > > * parser.c (cp_lexer_next_token_is_not_keyword): Likewise. > > > > gcc/fortran/ChangeLog: > > > > 2018-09-24 Martin Liska > > > > * trans.c (remove_suffix): Remove > > unused function. > > > > gcc/go/ChangeLog: > > > > 2018-09-24 Martin Liska > > > > * gofrontend/escape.cc (Gogo::analyze_escape): Remove > > usage of a parameter. > > (Gogo::assign_connectivity): Likewise. > > (class Escape_analysis_tag): Likewise. > > (Gogo::tag_function): Likewise. > > * gofrontend/expressions.cc (Call_expression::do_type): > > Likewise. > > * gofrontend/gogo.h (class Gogo): Likewise. > > * gofrontend/types.cc (class Call_multiple_result_type): > > Likewise. (Type::make_call_multiple_result_type): Likewise. > > * gofrontend/types.h (class Type): Likewise. > > * gofrontend/wb.cc (class Check_escape): Likewise. > > (Gogo::add_write_barriers): Likewise. > Note that some of these may be used inside conditional code. For > example the add_AT_vms_delta is used by ia64-vms. You could > argue that ia64-vms is a zombie and should be removed, but the > general concern WRT conditionally compiled code holds. > >>> > >>> Well, please let me install the patch as is and if somebody will > >>> need the functionality, I would be happy to put it back. > >> > >> You could use contrib/config-list.mk to build all supported target > >> configs. You shouldn't break builds for valid configs > >> deliberately > > Right. On a suitably large box it only takes a few hours. Fire > > it off at the end of the day and you've got results in the morning. > > > > So how about this, if it passes config-list.mk then it's OK. > > Obviously it's OK to retain any functions that are flagged as > > necessary by config-list.mk. > > > > Alternately find + grep is your friend, but I find config-list.mk > > is better. > > > > jeff > > > > Ok, next time I would be more careful about testing of other targets. > > Thanks, > Martin Although not perfect, but may i suggest to consider putting the attached contrib/unused_functions.py into our tree? Just for fun i'm attaching output for fortran, c, cp as per the "Examples" comment in the header of the script. sample line for cp: gcc/cp/cvt.o: Symbol 'cp_get_callee_fndecl(tree_node*)' declared extern but never referenced externally sample for c: gcc/c/c-decl.o: Symbol 'pending_invalid_xref_location' declared extern but never referenced externally gcc/c/c-decl.o: Symbol 'pending_invalid_xref' declared extern but never referenced externally I may find time to take care of the fortran spots later on, fwiw. As per the comment inline, one could add support for extracting archive-members. This would allow to optionally put the script near t
Re: [Patch, Aarch64] Testsuite patch to fix one of the regressions in PR 87433, gcc.target/aarch64/ashltidisi.c
Hi Steve, On 26/09/18 15:44, Steve Ellcey wrote: The patch for PR rtl-optimization/85160 which allowed combine to convert two instructions into two different instructions if they had a lower cost caused a couple of regressions on aarch64. This patch fixes one of them. After the above patch, the gcc.target/aarch64/ashltidisi.c test generated 3 asr instructions instead of 4. Given that the overall test now has two fewer instructions and appears to be superior to the original generated code, this patch just updates the test to reflect the newly generated code. Tested on aarch64, OK for checkin? Steve Ellcey sell...@cavium.com 2018-09-26 Steve Ellcey * gcc.target/aarch64/ashltidisi.c: Expect 3 asr instructions instead of 4. diff --git a/gcc/testsuite/gcc.target/aarch64/ashltidisi.c b/gcc/testsuite/gcc.target/aarch64/ashltidisi.c index 293a0f2..e2a0997 100644 --- a/gcc/testsuite/gcc.target/aarch64/ashltidisi.c +++ b/gcc/testsuite/gcc.target/aarch64/ashltidisi.c @@ -45,5 +45,5 @@ main (int argc, char **argv) return 0; } -/* { dg-final { scan-assembler-times "asr" 4 } } */ +/* { dg-final { scan-assembler-times "asr" 3 } } */ /* { dg-final { scan-assembler-not "extr\t" } } */ This looks obvious to me. Thanks, Kyrill
Re: [PATCH][GCC][testsuite] Fix caching of tests for multiple variant runs and update existing target-supports tests.
Tamar Christina writes: > The 09/27/2018 09:40, Richard Sandiford wrote: >> Tamar Christina writes: >> > @@ -120,22 +121,41 @@ proc check_cached_effective_target { prop args } { >> > global et_prop_list >> > >> > set target [current_target_name] >> > -if {![info exists et_cache($prop,target)] >> > - || $et_cache($prop,target) != $target} { >> > +if {![info exists et_cache($prop,$target)]} { >> >verbose "check_cached_effective_target $prop: checking $target" 2 >> > - set et_cache($prop,target) $target >> > - set et_cache($prop,value) [uplevel eval $args] >> > + if {[string is true -strict $args] || [string is false -strict $args]} { >> > + set et_cache($prop,$target) $args >> > + } else { >> > + set et_cache($prop,$target) [uplevel eval $args] >> > + } >> >> Why are the checks for true and false needed? We shouldn't be >> using this function for something that's already true or false. >> > > I had wanted to be a bit lenient here in accepting it if you have a > function that already fully evaluated the expr before passing it to > check_cached_effective_target. > > i.e. something like > > proc check_effective_target_vect_int { } { > return [check_cached_effective_target_indexed [ >expr { > > }]] > } > > The error you would get if you do this is very confusing so I thought > since it didn't matter much for the regexp only target triple tests > that just accepting this would be fine. Seems a good thing that that's a noisy failure; the function should make up its mind whether it wants to cache (use curly braces) or not (just return the expr directly). > Should I drop it or keep it? Think we should either drop it or make it into a more user-friendly error, e.g.: if {[string is true -strict $args] || [string is false -strict $args]} { error {check_cached_effective_target condition already evaluated; did you pass [...] instead of the expected {...}?} } else { set et_cache($prop,$target) [uplevel eval $args] } Thanks, Richard
Re: [PATCH 07/25] [pr82089] Don't sign-extend SFV 1 in BImode
On 26/09/18 17:25, Richard Sandiford wrote: OK, thanks. Committed, thanks. Andrew
Re: GCC options for kernel live-patching (Was: Add a new option to control inlining only on static functions)
Hi, (this message is a part of the thread originating with https://gcc.gnu.org/ml/gcc-patches/2018-09/msg01018.html) On Thu, Sep 27 2018, Jan Hubicka wrote: >> > If you make this to be INTERPOSABLE (which means it can be replaced by >> > different >> > implementation by linker and that is probably what we want for live >> > patching) >> > then also inliner, ipa-sra and other optimization will give up on these. >> >> do you suggest that to set the global function as AVAIL_INTERPOSABLE when >> -finline-only-static >> is present? then we should avoid all issues? > > It seems to be reasonable direction I think, because it is what really happens > (well AVAIL_INTERPOSABLE still does not assume that the interposition will > happen at runtime, but it is an approximation and we may introduce something > like > AVAIL_RUNTIME_INTERPOSABLE if there is need for better difference). > I wonder if -finline-only-static is good name for the flag though, because it > does a lot more than that. Maybe something like -flive-patching? > How much is this all tied to one particular implementation of the feature? We have just had a quick discussion with two upstream maintainers of Linux kernel live-patching about this and the key points were: 1. SUSE live-patch creators (and I assume all that use the upstream live-patching method) use Martin Liska's (somewhat under-documented) -fdump-ipa-clones option and a utility he wrote (https://github.com/marxin/kgraft-analysis-tool) to deal with all kinds of inlining, IPA-CP and generally all IPA optimizations that internally create a clone. The tool tells them what happened and also lists all callers that need to be live-patched. 2. However, there is growing concern about other IPA analyses that do not create a clone but still affect code generation in other functions. Kernel developers have identified and disabled IPA-RA but there is more of them such as IPA-modref analysis, stack alignment propagation and possibly quite a few others which extract information from one function and use it a caller or perhaps even some almost-unrelated functions (such as detection of read-only and write-only static global variables). The kernel live-patching community would welcome if GCC had an option that could disable all such optimizations/analyses for which it cannot provide a list of all affected functions (i.e. which ones need to be live-patched if a particular function is). I assume this is orthogonal to the proposed -finline-only-static option, but the above approach seems superior in all respects. 3. The community would also like to be involved in these discussions, and therefore I am adding live-patch...@vger.kernel.org to CC. On a related note, they will also have a live-patching mini-summit at the Linux Plumbers conference in Vancouver in November where they plan to discuss what they would like GCC to provide. Thanks, Martin ... >> >> >> >>> For example comdat that was cloned by IPA-SRA. See can_be_local_p and >> >>> comdat_can_be_unshared_p predicates. Similar problem happens to clones >> >>> created >> >>> by ipa-cp. >> >>> >> >>> I guess we want to disable localization and cloning in this case as well. >> >>> I wonder what else. >> >> >> >> Yes, I think we should make -finline-only-static incompatible with >> >> cloning and tree-sra too. >> >> >> >> Qing >> >>> >> >>> Honza >>
Re: OpenCoarrays integration with gfortran
On Mon, Sep 24, 2018 at 12:58 PM Alastair McKinstry wrote: > > > On 23/09/2018 10:46, Toon Moene wrote: > > On 09/22/2018 01:23 AM, Jerry DeLisle wrote: > > > > I just installed opencoarrays on my system at home (Debian Testing): > > > > root@moene:~# apt-get install libcoarrays-openmpi-dev > > ... > > Setting up libcaf-openmpi-3:amd64 (2.2.0-3) ... > > Setting up libcoarrays-openmpi-dev:amd64 (2.2.0-3) ... > > Processing triggers for libc-bin (2.27-6) ... > > > > [ previously this led to apt errors, but not now. ] > > > > and moved my own installation of the OpenCoarrays-2.2.0.tar.gz out of > > the way: > > > > toon@moene:~$ ls -ld *pen* > > drwxr-xr-x 6 toon toon 4096 Aug 10 16:01 OpenCoarrays-2.2.0.opzij > > drwxr-xr-x 8 toon toon 4096 Sep 15 11:26 opencoarrays-build.opzij > > drwxr-xr-x 6 toon toon 4096 Sep 15 11:26 opencoarrays.opzij > > > > and recompiled my stuff: > > > > gfortran -g -fbacktrace -fcoarray=lib random-weather.f90 > > -L/usr/lib/x86_64-linux-gnu/open-coarrays/openmpi/lib -lcaf_mpi > > > > [ Yes, the location of the libs is quite experimental, but OK for the > > "Testing" variant of Debian ... ] > > > > I couldn't find cafrun, but mpirun works just fine: > > > > toon@moene:~/src$ echo ' &config /' | mpirun --oversubscribe --bind-to > > none -np 20 ./a.out > > Decomposition information on image7 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image6 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 11 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 15 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image1 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 13 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 12 is4 *5 slabs with 21 > > * 18 grid cells on this image. > > Decomposition information on image 20 is4 *5 slabs with 21 > > * 18 grid cells on this image. > > Decomposition information on image9 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 14 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 16 is4 *5 slabs with 21 > > * 18 grid cells on this image. > > Decomposition information on image 17 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 18 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image2 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image4 is4 *5 slabs with 21 > > * 18 grid cells on this image. > > Decomposition information on image5 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image3 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image8 is4 *5 slabs with 21 > > * 18 grid cells on this image. > > Decomposition information on image 10 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > Decomposition information on image 19 is4 *5 slabs with 23 > > * 18 grid cells on this image. > > > > ... etc. (see http://moene.org/~toon/random-weather.f90). > > > > I presume other Linux distributors will follow shortly (this *is* > > Debian Testing, which can be a bit testy at times - but I do trust my > > main business at home on it for over 15 years now). > > > > Kind regards, > > > Thanks, good to see it being tested (I'm the Debian/Ubuntu packager). > > caf /cafrun has been dropped (for the moment ? ) in favour of mpirun, > but I've added pkg-config caf packages so that becomes an option. > > $ pkg-config caf-mpich --libs > > -L/usr/lib/x86_64-linux-gnu/open-coarrays/mpich/lib -lcaf_mpich > -Wl,-z,relro -lmpich -lm -lbacktrace -lpthread -lrt > > (My thinking is that for libraries in particular, the user need not know > whether CAF is being used, and if lib foobar uses CAF, then adding a: > > Requires: caf > > into the pkg-config file gives you the correct linking transparently. > > The "strange" paths are due to Debians multiarch : it is possible to > include libraries for multiple architectures simultaneously. This works > ok with pkg-config and cmake , etc (which allow you to set > PKG_CONFIG_PATH and have multiple pkgconfig files for different libs > simultaneously) , but currently break wrappers such as caf / cafrun. > > I can add a new package for caf / cafrun but would rather not. (W e > currently don't do non-MPI CAF builds). > > There is currently pkg-config files 'caf-mpich' and 'caf-openmpi' for > t
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Thu, Sep 27, 2018 at 3:52 AM, Richard Biener wrote: > On Wed, Sep 26, 2018 at 8:11 PM H.J. Lu wrote: >> >> Add -mzero-caller-saved-regs=[skip|used|all] command-line option and >> zero_caller_saved_regs("skip|used|all") function attribue: >> >> 1. -mzero-caller-saved-regs=skip and zero_caller_saved_regs("skip") >> >> Don't zero caller-saved integer registers upon function return. >> >> 2. -mzero-caller-saved-regs=used and zero_caller_saved_regs("used") >> >> Zero used caller-saved integer registers upon function return. >> >> 3. -mzero-caller-saved-regs=all and zero_caller_saved_regs("all") >> >> Zero all caller-saved integer registers upon function return. >> >> Tested on i686 and x86-64 with bootstrapping GCC trunk and >> -mzero-caller-saved-regs=used as well as -mzero-caller-saved-regs=all >> enabled by default. > > Can this be done in a target independet way? Find out the live outgoing integer registers can be done in a target independent way. But zeroing caller-saved registers should be done when epilogue is generated, which is target dependent. > Richard. > >> gcc/ >> >> * config/i386/i386-opts.h (zero_caller_saved_regs): New enum. >> * config/i386/i386-protos.h (ix86_split_simple_return_pop_internal): >> Renamed to ... >> (ix86_split_simple_return_internal): This. >> * config/i386/i386.c (ix86_set_zero_caller_saved_regs_type): New >> function. >> (ix86_set_current_function): Call >> ix86_set_zero_caller_saved_regs_type. >> (ix86_expand_prologue): Replace gen_prologue_use with >> gen_pro_epilogue_use. >> (ix86_expand_epilogue): Replace gen_simple_return_pop_internal >> with ix86_split_simple_return_internal. Replace >> gen_simple_return_internal with ix86_split_simple_return_internal. >> (ix86_find_live_outgoing_regs): New function. >> (ix86_split_simple_return_pop_internal): Removed. >> (ix86_split_simple_return_internal): New function. >> (ix86_handle_fndecl_attribute): Support zero_caller_saved_regs >> attribute. >> (ix86_attribute_table): Add zero_caller_saved_regs. >> * config/i386/i386.h (machine_function): Add >> zero_caller_saved_regs_type and live_outgoing_regs. >> (TARGET_POP_SCRATCH_REGISTER): New. >> * config/i386/i386.md (UNSPEC_SIMPLE_RETURN): New UNSPEC. >> (UNSPECV_PROLOGUE_USE): Renamed to ... >> (UNSPECV_PRO_EPILOGUE_USE): This. >> (prologue_use): Renamed to ... >> (pro_epilogue_use): This. >> (simple_return_internal): Changed to define_insn_and_split. >> (simple_return_internal_1): New pattern. >> (simple_return_pop_internal): Replace >> ix86_split_simple_return_pop_internal with >> ix86_split_simple_return_internal. Always call >> ix86_split_simple_return_internal if epilogue_completed is >> true. >> (simple_return_pop_internal_1): New pattern. >> (Epilogue deallocator to pop peepholes): Enabled only if >> TARGET_POP_SCRATCH_REGISTER is true. >> * config/i386/i386.opt (mzero-caller-saved-regs=): New option. >> * doc/extend.texi: Document zero_caller_saved_regs attribute. >> * doc/invoke.texi: Document -mzero-caller-saved-regs=. >> >> gcc/testsuite/ >> >> * gcc.target/i386/zero-scratch-regs-1.c: New test. >> * gcc.target/i386/zero-scratch-regs-2.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-3.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-4.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-5.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-6.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-7.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-8.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-9.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-10.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-11.c: Likewise. >> * gcc.target/i386/zero-scratch-regs-12.c: Likewise. -- H.J.
Re: [PATCH, AArch64 00/11] LSE atomics out-of-line
On 26/09/2018 06:03, rth7...@gmail.com wrote: From: Richard Henderson ARMv8.1 adds an (mandatory) Atomics extension, also known as the Large System Extension. Deploying this extension at the OS level has proved challenging. The following is the result of a conversation between myself, Alex Graf of SuSE, and Ramana Radhakrishnan of ARM, at last week's Linaro Connect in Vancouver. The current state of the world is that one could distribute two different copies of a given shared library and place the LSE-enabled version in /lib64/atomics/ and it will be selected over the /lib64/ version by ld.so when HWCAP_ATOMICS is present. Alex's main concern with this is that (1) he doesn't want to distribute two copies of every library, or determine what a resonable subset would be and (2) this solution does not work for executables, e.g. mysql. Ramana's main concern was to avoid the overhead of an indirect jump, especially in how that would affect the (non-)branch-prediction of the smallest implementations. Therefore, I've created small out-of-line helpers that are directly linked into every library or executable that requires them. There will be two direct branches, both of which will be well-predicted. In the process, I discovered a number of places within the code where the existing implementation could be improved. In particular: - the LSE patterns didn't use predicates or constraints that match the actual instructions, requiring unnecessary splitting. - the non-LSE compare-and-swap can use an extending compare to avoid requiring the input to have been previously extended. - TImode compare-and-swap was missing entirely. This brings aarch64 to parity with x86_64 wrt __sync_val_compare_and_swap. There is a final patch that enables the new option by default. I am not necessarily expecting this to be merged upstream, but for the operating system to decide what the default should be. It might be that this should be a configure option, so as to make that OS choice easier, but I've just now thought of that. ;-) I'm going to have to rely on Alex and/or Ramana to perform testing on a system that supports LSE. Thanks for this patchset - I'll give this a whirl in the next couple of days but don't expect results until Monday or so. I do have an additional concern that I forgot to mention in Vancouver - Thanks Wilco for reminding me that this now replaces a bunch of inline instructions with effectively a library call therefore clobbering a whole bunch of caller saved registers. In which case I see 2 options. - maybe we should consider a private interface and restrict the registers that these files are compiled with to minimise the number of caller saved registers we trash. - Alternatively we should consider an option to inline these at O2 or O3 as we may just be trading the performance improvements we get with using the lse atomics for additional stacking and unstacking of caller saved registers in the main functions... But anyway while we discuss that we'll have a look at testing and benchmarking this. regards Ramana r~ Richard Henderson (11): aarch64: Simplify LSE cas generation aarch64: Improve cas generation aarch64: Improve swp generation aarch64: Improve atomic-op lse generation aarch64: Emit LSE st instructions Add visibility to libfunc constructors Link static libgcc after shared libgcc for -shared-libgcc aarch64: Add out-of-line functions for LSE atomics aarch64: Implement -matomic-ool aarch64: Implement TImode compare-and-swap Enable -matomic-ool by default gcc/config/aarch64/aarch64-protos.h | 20 +- gcc/optabs-libfuncs.h | 2 + gcc/common/config/aarch64/aarch64-common.c| 6 +- gcc/config/aarch64/aarch64.c | 480 ++ gcc/gcc.c | 9 +- gcc/optabs-libfuncs.c | 26 +- .../atomic-comp-swap-release-acquire.c| 2 +- .../gcc.target/aarch64/atomic-inst-ldadd.c| 18 +- .../gcc.target/aarch64/atomic-inst-ldlogic.c | 54 +- .../gcc.target/aarch64/atomic-op-acq_rel.c| 2 +- .../gcc.target/aarch64/atomic-op-acquire.c| 2 +- .../gcc.target/aarch64/atomic-op-char.c | 2 +- .../gcc.target/aarch64/atomic-op-consume.c| 2 +- .../gcc.target/aarch64/atomic-op-imm.c| 2 +- .../gcc.target/aarch64/atomic-op-int.c| 2 +- .../gcc.target/aarch64/atomic-op-long.c | 2 +- .../gcc.target/aarch64/atomic-op-relaxed.c| 2 +- .../gcc.target/aarch64/atomic-op-release.c| 2 +- .../gcc.target/aarch64/atomic-op-seq_cst.c| 2 +- .../gcc.target/aarch64/atomic-op-short.c | 2 +- .../aarch64/atomic_cmp_exchange_zero_reg_1.c | 2 +- .../atomic_cmp_exchange_zero_strong_1.c | 2 +- .../gcc.target/aarch64/sync-comp-swap.c | 2 +- .../gcc.target/aarch64/sync-op-acquire.c | 2 +- .../g
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On 26/09/18 19:10, H.J. Lu wrote: Add -mzero-caller-saved-regs=[skip|used|all] command-line option and zero_caller_saved_regs("skip|used|all") function attribue: 1. -mzero-caller-saved-regs=skip and zero_caller_saved_regs("skip") Don't zero caller-saved integer registers upon function return. 2. -mzero-caller-saved-regs=used and zero_caller_saved_regs("used") Zero used caller-saved integer registers upon function return. 3. -mzero-caller-saved-regs=all and zero_caller_saved_regs("all") Zero all caller-saved integer registers upon function return. Tested on i686 and x86-64 with bootstrapping GCC trunk and -mzero-caller-saved-regs=used as well as -mzero-caller-saved-regs=all enabled by default. from this description and the documentation it's not clear to me what this tries to achieve. is it trying to prevent information leak? or some pcs hack the caller may rely on? if it's for information leak then i'd expect such attribute to be used on crypto code.. however i'd expect crypto code to use simd registers as well, so integer only cleaning needs explanation.
Re: [PATCH, AArch64 10/11] aarch64: Implement TImode compare-and-swap
Hi Richard, I don't have any comment on the overall aim of the patch series, but in this particular patch implementing casp it looks like you doesn't ensure the register pairs for casp are even-odd. This is the restriction in the Arm Arm decode for casp variants as if Rs<0> == '1' then UnallocatedEncoding(); if Rt<0> == '1' then UnallocatedEncoding(); Example testcase where this trips up: #include #include bool foo (__int128 *ptr) { register __int128 a __asm ("x3") = 0; __int128 b = 0; // This statement to ensure "a" is indeed put into x3,x4 asm ("" : "=r" (a) : "0" (a)); return __sync_bool_compare_and_swap (ptr, b, a); } $ build-aarch64-none-elf/install/bin/aarch64-none-elf-gcc \ $ -matomic-ool \ $ -march=armv8.4-a \ $ -O3 \ $ -o test \ $ -c ~/test.c /tmp/cc0dw2At.s: Assembler messages: /tmp/cc0dw2At.s:15: Error: reg pair must start from even reg at operand 1 -- `caspal x1,x2,x3,x4,[x0]' gnu-work [14:01:14] $
Re: [PATCH] PR86957
On 9/27/18 12:14 PM, Martin Liška wrote: > On 9/27/18 11:47 AM, Richard Biener wrote: >> On Mon, Sep 24, 2018 at 9:14 PM Indu Bhagat wrote: >>> >>> Done. Attached is updated patch. >>> >>> Patch is tested on x86_64 >> >> You obviously did _not_ properly test the patch since it causes a >> bunch of new testsuite >> failures: >> >> FAIL: g++.dg/pr60518.C -std=gnu++11 (test for excess errors) >> FAIL: g++.dg/pr60518.C -std=gnu++14 (test for excess errors) >> FAIL: g++.dg/pr60518.C -std=gnu++98 (test for excess errors) >> FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++11 (test for excess errors) >> FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++14 (test for excess errors) >> FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++98 (test for excess errors) >> FAIL: g++.dg/guality/pr55665.C -O2 -flto -fno-use-linker-plugin >> -flto-partition=none line 23 p == 40 >> FAIL: g++.dg/torture/pr59265.C -O0 (test for excess errors) >> FAIL: g++.dg/torture/pr59265.C -O1 (test for excess errors) >> FAIL: g++.dg/torture/pr59265.C -O2 (test for excess errors) >> FAIL: g++.dg/torture/pr59265.C -O2 -flto -fno-use-linker-plugin >> -flto-partition=none (test for excess errors) >> FAIL: g++.dg/torture/pr59265.C -O2 -flto -fuse-linker-plugin >> -fno-fat-lto-objects (test for excess errors) >> FAIL: g++.dg/torture/pr59265.C -O3 -g (test for excess errors) >> FAIL: g++.dg/torture/pr59265.C -Os (test for excess errors) >> FAIL: g++.dg/tree-prof/morefunc.C compilation, -fprofile-use -D_PROFILE_USE >> UNRESOLVED: g++.dg/tree-prof/morefunc.C execution,-fprofile-use >> -D_PROFILE_USE >> >> and more. Please get up to speed with GCC development and testing >> requirements! >> >> Richard. > > I'll cook patch for it in order to remove the failures as soon as possible. > > Martin > >> >>> Thanks >>> >>> >>> On 09/24/2018 09:37 AM, Martin Sebor wrote: I would suggest to use the term "remove" or "delete" instead of the informal "wipe out" when referring to removing files or their contents. Martin >>> > There's tested patch that I made. It's tested on x86_64-linux-gnu. Feel free to install it after approval. I'll be back on Monday. Martin >From 1757b63f13e0f18bab4be90001e1056069f8799a Mon Sep 17 00:00:00 2001 From: marxin Date: Thu, 27 Sep 2018 12:14:09 +0200 Subject: [PATCH] Fix fallout of -Wmissing-profile warning. gcc/ChangeLog: 2018-09-27 Martin Liska * coverage.c (get_coverage_counts): Revert the formatting of missing profile opt info. gcc/testsuite/ChangeLog: 2018-09-27 Martin Liska * g++.dg/pr60518.C: Add -Wno-missing-profile. * g++.dg/torture/pr59265.C: Likewise. * g++.dg/tree-prof/morefunc.C: Likewise. * g++.dg/tree-ssa/dom-invalid.C: Likewise. * gcc.dg/pr26570.c: Likewise. * gcc.dg/pr32773.c: Likewise. * gcc.dg/pr40209.c: Likewise. * gcc.dg/pr51957-1.c: Likewise. * gcc.dg/pr80747.c: Likewise. * gcc.target/aarch64/pr62262.c: Likewise. --- gcc/coverage.c | 10 -- gcc/testsuite/g++.dg/pr60518.C | 2 +- gcc/testsuite/g++.dg/torture/pr59265.C | 2 +- gcc/testsuite/g++.dg/tree-prof/morefunc.C | 2 +- gcc/testsuite/g++.dg/tree-ssa/dom-invalid.C | 2 +- gcc/testsuite/gcc.dg/pr26570.c | 2 +- gcc/testsuite/gcc.dg/pr32773.c | 4 ++-- gcc/testsuite/gcc.dg/pr40209.c | 2 +- gcc/testsuite/gcc.dg/pr51957-1.c| 2 +- gcc/testsuite/gcc.dg/pr80747.c | 2 +- gcc/testsuite/gcc.target/aarch64/pr62262.c | 2 +- 11 files changed, 15 insertions(+), 17 deletions(-) diff --git a/gcc/coverage.c b/gcc/coverage.c index 19c696c3493..6b45ce9085b 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -314,12 +314,10 @@ get_coverage_counts (unsigned counter, unsigned cfg_checksum, dump_user_location_t loc = dump_user_location_t::from_location_t (input_location); dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - "file %s not found\n", - da_file_name); - dump_printf (MSG_OPTIMIZED_LOCATIONS, - (flag_guess_branch_prob - ? "execution counts estimated\n" - : "execution counts assumed to be zero\n")); + "file %s not found, %s\n", da_file_name, + (flag_guess_branch_prob +? "execution counts estimated" +: "execution counts assumed to be zero")); } } return NULL; diff --git a/gcc/testsuite/g++.dg/pr60518.C b/gcc/testsuite/g++.dg/pr60518.C index e3da48a2ec9..e4a80da29ee 100644 --- a/gcc/testsuite/g++.dg/pr60518.C +++ b/gcc/testsuite/g++.dg/pr60518.C @@ -1,5 +1,5 @@ // { dg-do compile } -// { dg-options "-Os -fprofile-use" } +// { dg-options "-Os -fprofile-use -Wno-missing-profile" } int a; int fn1 () { return a == ',' || a == ';'; } diff --git a/gcc/testsuite/g++.dg/torture/pr59265.C b/gcc/testsuite/g++.dg/torture/pr59265.C index d48efe59395..f3dceb929d2 100644 --- a/gcc/testsuite/g++.dg/torture/pr59265.C +++ b/gcc/testsuite/g++.dg/torture/pr59265.C @@ -1,5 +1,5
Add Header to libstdc++
Changelog: For C++2a Support: Add header in libstdc++ top level (From C++2a Working Draft) If __cplusplus is greater than 201703L (indicates C++2a): To namespace std, add span class. The Definition of span used is https://en.cppreference.com/w/cpp/container/span. Notes: Current Static-Extent spans initialized with an ill-sized container will apply the span over the static extent, which may result in Undefined behavior if std::size(container) is less than the extent. This is consistent with the above definition, stating that initializing a static extent span with an ill-sized container is Undefined Behavior (same with ill-sized ranges). Changes: https://patch-diff.githubusercontent.com/raw/gcc-mirror/gcc/pull/31.patch
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Thu, Sep 27, 2018 at 6:08 AM, Szabolcs Nagy wrote: > On 26/09/18 19:10, H.J. Lu wrote: >> >> Add -mzero-caller-saved-regs=[skip|used|all] command-line option and >> zero_caller_saved_regs("skip|used|all") function attribue: >> >> 1. -mzero-caller-saved-regs=skip and zero_caller_saved_regs("skip") >> >> Don't zero caller-saved integer registers upon function return. >> >> 2. -mzero-caller-saved-regs=used and zero_caller_saved_regs("used") >> >> Zero used caller-saved integer registers upon function return. >> >> 3. -mzero-caller-saved-regs=all and zero_caller_saved_regs("all") >> >> Zero all caller-saved integer registers upon function return. >> >> Tested on i686 and x86-64 with bootstrapping GCC trunk and >> -mzero-caller-saved-regs=used as well as -mzero-caller-saved-regs=all >> enabled by default. >> > > from this description and the documentation it's > not clear to me what this tries to achieve. > > is it trying to prevent information leak? > or some pcs hack the caller may rely on? > > if it's for information leak then i'd expect such > attribute to be used on crypto code.. however i'd > expect crypto code to use simd registers as well, > so integer only cleaning needs explanation. The target usage is in Linux kernel. -- H.J.
Re: [PATCH] Fix PR87440, extra lexical block in inline instances
On Thu, Sep 27, 2018 at 3:28 AM, Richard Biener wrote: > On Wed, 26 Sep 2018, Jason Merrill wrote: > >> On Wed, Sep 26, 2018 at 9:35 AM, Richard Biener wrote: >> > >> > We do not create a DW_AT_lexical_block for the outermost block in >> > functions but we do for DW_AT_inlined_subroutines. That makes >> > debuginfo look like if there were two of each local, the outer >> > one (from the abstract instance) optimized out (visible via >> > info locals in gdb). >> > >> > The following elides the outermost block also from inline instances. >> > It's a bit tricky to reliably track that block given we remove unused >> > blocks here and there. The trick is to have the block in the abstract >> > instance _not_ point to itself (given we do not output it it isn't >> > the abstract origin for itself). >> > >> > Bootstrapped on x86_64-unkown-linux-gnu, testing in progress. >> > >> > Again with some scan-assembler testcase, guality cannot do 'info locals'. >> > >> > OK? >> > >> > Thanks, >> > Richard. >> > >> > 2018-09-26 Richard Biener >> > >> > PR debug/87440 >> > * dwarf2out.c (set_block_origin_self): Do not mark outermost >> > block as we do not output that. >> > (gen_inlined_subroutine_die): Elide the originally outermost >> > block, matching what we do for concrete instances. >> > (decls_for_scope): Add parameter specifying whether to recurse >> > to subblocks. >> > >> > * gcc.dg/debug/dwarf2/inline4.c: New testcase. >> > >> > Index: gcc/testsuite/gcc.dg/debug/dwarf2/inline4.c >> > === >> > --- gcc/testsuite/gcc.dg/debug/dwarf2/inline4.c (nonexistent) >> > +++ gcc/testsuite/gcc.dg/debug/dwarf2/inline4.c (working copy) >> > @@ -0,0 +1,17 @@ >> > +/* Verify that the inline instance has no extra DW_TAG_lexical_block >> > between >> > + the DW_TAG_inlined_subroutine and the DW_TAG_variable for the local. >> > */ >> > +/* { dg-options "-O -gdwarf -dA" } */ >> > +/* { dg-do compile } */ >> > +/* { dg-final { scan-assembler >> > "DW_TAG_inlined_subroutine\[^\\(\]*\\(\[^\\)\]*\\)\[^\\(\]*\\(DIE >> > \\(0x\[0-9a-f\]*\\) DW_TAG_formal_parameter\[^\\(\]*\\(DIE >> > \\(0x\[0-9a-f\]*\\) DW_TAG_variable" } } */ >> > +/* { dg-final { scan-assembler-times "DW_TAG_inlined_subroutine" 2 } } */ >> > + >> > +static int foo (int i) >> > +{ >> > + volatile int j = i + 3; >> > + return j - 2; >> > +} >> > +int main() >> > +{ >> > + volatile int z = foo (-1); >> > + return z; >> > +} >> > Index: gcc/dwarf2out.c >> > === >> > --- gcc/dwarf2out.c (revision 264640) >> > +++ gcc/dwarf2out.c (working copy) >> > @@ -3867,7 +3867,7 @@ static void gen_subroutine_type_die (tre >> > static void gen_typedef_die (tree, dw_die_ref); >> > static void gen_type_die (tree, dw_die_ref); >> > static void gen_block_die (tree, dw_die_ref); >> > -static void decls_for_scope (tree, dw_die_ref); >> > +static void decls_for_scope (tree, dw_die_ref, bool = true); >> > static bool is_naming_typedef_decl (const_tree); >> > static inline dw_die_ref get_context_die (tree); >> > static void gen_namespace_die (tree, dw_die_ref); >> > @@ -22389,7 +22389,13 @@ set_block_origin_self (tree stmt) >> > { >> >if (BLOCK_ABSTRACT_ORIGIN (stmt) == NULL_TREE) >> > { >> > - BLOCK_ABSTRACT_ORIGIN (stmt) = stmt; >> > + /* We do not mark the outermost block as we are not outputting it. >> > +This is then a reliable way of determining whether we should >> > +do the same for an inline instance. */ >> > + if (TREE_CODE (BLOCK_SUPERCONTEXT (stmt)) != FUNCTION_DECL) >> > + BLOCK_ABSTRACT_ORIGIN (stmt) = stmt; >> > + else >> > + gcc_assert (DECL_INITIAL (BLOCK_SUPERCONTEXT (stmt)) == stmt); >> > >> >{ >> > tree local_decl; >> > @@ -24149,7 +24155,24 @@ gen_inlined_subroutine_die (tree stmt, d >> > add_high_low_attributes (stmt, subr_die); >> >add_call_src_coords_attributes (stmt, subr_die); >> > >> > - decls_for_scope (stmt, subr_die); >> > + /* The inliner creates an extra BLOCK for the parameter setup, >> > + we want to merge that with the actual outermost BLOCK of the >> > +inlined function to avoid duplicate locals in consumers. Note >> > +we specially mark that not as origin-self. >> > +Do that by doing the recursion to subblocks on the single subblock >> > +of STMT. */ >> > + bool unwrap_one = false; >> > + if (BLOCK_SUBBLOCKS (stmt) && !BLOCK_CHAIN (BLOCK_SUBBLOCKS (stmt))) >> > + { >> > + tree origin = block_ultimate_origin (BLOCK_SUBBLOCKS (stmt)); >> > + if (origin >> > + && TREE_CODE (origin) == BLOCK >> > + && !BLOCK_ABSTRACT_ORIGIN (origin)) >> >> Can we look at BLOCK_SUPERCONTEXT here rather than above? > > Ah, yes. It could be simply > > + && BLO
[PATCH][GCC][AARCH64]Introduce aarch64 atomic_{load,store}ti patterns
[PATCH][GCC][AARCH64] Introduce aarch64 atomic_{load,store}ti patterns In Armv8.4-a these patterns use the LDP/STP instructions that are guaranteed to be single-copy atomic, ensure correct memory ordering semantics by using the DMB instruction. We put the use of these inline expansions behind a command line flag since they do not satisfy the libatomic ABI and hence can't be used together with code already compiled using 16 byte atomics. This command line flag is -matomic-128bit-instructions. Given the introduction of a flag specified to break ABI compatibility with libatomic, it seems reasonable to introduce the load-exclusive/store-exclusive read-modify-write loop emulation of 128 bit atomic load and stores for older architectures behind this flag. We introduce the usual extension macros for the "at" extension marking the LDP/STP atomicity guarantees introduced in Armv8.4-a and use these to decide which to use when -matomic-128bit-instructions is provided on the command line. Tested with full bootstrap and make check on aarch64-none-linux-gnu. Ok for trunk? gcc/ChangeLog: 2018-09-27 Matthew Malcomson * config/aarch64/aarch64-protos.h (aarch64_split_atomic_ti_access): New prototype. * config/aarch64/aarch64.c (aarch64_split_atomic_ti_access): New. * config/aarch64/aarch64.h (AARCH64_FL_AT): New flag. (AARCH64_FL_PROFILE): Flag moved to accomodate above. (AARCH64_FL_FOR_ARCH8_4): Include AARCH64_FL_AT. (AARCH64_ISA_AT): New ISA flag. * config/aarch64/aarch64.opt (-matomic-128bit-instruction): New. * config/aarch64/atomics.md (atomic_load, atomic_store, @aarch64_load_exclusive {smaller registers}, @aarch64_load_exclusive {GPI registers}, @aarch64_store_exclusive): Use aarch_mm_needs_{acquire,release} instead of three part check. (atomic_loadti, aarch64_atomic_loadti_ldp, aarch64_atomic_loadti_basic atomic_storeti, aarch64_atomic_storeti_stp, aarch64_atomic_storeti_basic) New * config/aarch64/iterators.md (GPI_TI): New. * config/aarch64/predicates.md (aarch64_atomic_TImode_operand, aarch64_TImode_pair_operand): New. * doc/invoke.texi (-matomic-128bit-instructions): Document option. gcc/testsuite/ChangeLog: 2018-09-27 Matthew Malcomson * gcc.target/aarch64/atomic-load128.c: New test. * gcc.target/aarch64/atomic-store.x: Shared macro for below tests. * gcc.target/aarch64/atomic-store.c: Use atomic-store.x. * gcc.target/aarch64/atomic-store128.c: New test using atomic-store.x. ### Attachment also inlined for ease of reply### diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index caf1d2041f0cac8e3f975f8384a167a90dc638e5..578ea925fac9a7237af3a53e7ec642d0ba8e7b93 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -560,6 +560,8 @@ machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); rtx aarch64_load_tp (rtx); +void aarch64_split_atomic_ti_access (rtx op[], bool); + void aarch64_expand_compare_and_swap (rtx op[]); void aarch64_split_compare_and_swap (rtx op[]); void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index e5cdb1d54f4ee96140202ea21a9478438d208f45..c1e407b5a3f27aa7eea9c35e749fe597e79f3e65 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -158,9 +158,10 @@ extern unsigned aarch64_architecture_version; #define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ #define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ #define AARCH64_FL_RCPC8_4(1 << 20) /* Has ARMv8.4-a RCPC extensions. */ +#define AARCH64_FL_AT (1 << 21) /* Has ARMv8.4-a AT extensions. */ /* Statistical Profiling extensions. */ -#define AARCH64_FL_PROFILE(1 << 21) +#define AARCH64_FL_PROFILE(1 << 22) /* Has FP and SIMD. */ #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) @@ -179,7 +180,7 @@ extern unsigned aarch64_architecture_version; (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3) #define AARCH64_FL_FOR_ARCH8_4 \ (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \ - | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4) + | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_AT) /* Macros to test ISA flags. */ @@ -201,6 +202,7 @@ extern unsigned aarch64_architecture_version; #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) #define AARCH64_ISA_F16FML(aarch64_isa_flags & AARCH64_FL_F16FML) #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) +#define AARCH64_ISA_AT(aarch64_isa_flags & AARCH64_FL_AT) /* Crypto is an optional extension to AdvSIMD. */ #defi
Re: [PATCH] GCOV: introduce --json-format.
On Thu, 2018-09-27 at 09:46 +0200, Martin Liška wrote: > Hi. > > For some time we've been providing an intermediate format as > output of GCOV tool. It's documented in our code that primary > consumer of it is lcov. Apparently that's not true: > https://github.com/linux-test-project/lcov/issues/38#issuecomment-371 > 203750 > > So that I decided to come up with providing the very similar > intermediate > format in JSON format. It's much easier for consumers to work with. > > I'm planning to leave the legacy format for GCC 9.1 and I'll document > that > it's deprecated. We can then remove that in next release. > > The patch contains a small change to json.{h,cc}, hope David can > approve that? > Patch is tested on x86_64-linux-gnu. I'm not officially a reviewer for the json stuff, but I can comment, at least. The changes to json.h/cc are fine by me, FWIW. Some high-level observations: * how big are the JSON files? One of the comments at my Cauldron talk on optimization records was a concern that the output could be very large. The JSON files compress really well, so maybe this patch should gzip on output? Though I don't know if it's appropriate for this case. iirc, gfortran's module code has an example of gzipping a pretty_printer buffer. * json::object::print doesn't preserve the insertion order of its name/value pairs; they're written out in whatever order the hashing leads to (maybe we should fix that though). The top-level JSON value in your file format is currently a JSON object containing "version" metadata etc. There's thus a chance that could be at the end, after the data. Perhaps the top-level JSON value should be an array instead (as a "tuple"), to guarantee that the versioning metadata occurs at the start? Or are consumers assumed to read the whole file into memory and traverse it, tree-like? * Similar to the optimization records code, this patch is building a tree of dynamically-allocated JSON objects in memory, and then printing it to a pretty_printer, and flushing the pp's buffer to a FILE *. This is simple and flexible, but I suspect that I may need to rewrite this for the optimization records case to avoid bloating the memory usage (e.g. to eliminate the in-memory JSON tree in favor of printing as we go). Would that concern apply here, or is the pattern OK? * FWIW I'm working on DejaGnu test coverage for JSON output, but it's ugly - a nasty shim between .exp and (optional) .py scripts for parsing the JSON and verifying properties of it, sending the results back in a form that DejaGnu can digest and integrate into the .sum/.log files. I hope to post it before stage 1 closes (I'd much prefer to have good test coverage in place before optimizing how this stuff gets written) [...snip...] > diff --git a/gcc/gcov.c b/gcc/gcov.c > index e255e4e3922..39d9329d6d0 100644 > --- a/gcc/gcov.c > +++ b/gcc/gcov.c [...snip...] > @@ -1346,6 +1481,15 @@ generate_results (const char *file_name) > } > } > > + json::object *root = new json::object (); [...snip...] > > - if (flag_gcov_file && flag_intermediate_format && !flag_use_stdout) > + if (flag_gcov_file && flag_json_format) > +root->dump (out); > + It looks like "root" gets leaked here (and thus all of the objects in the JSON tree also). I don't know if that's a problem, but an easy fix would be to make "root" be an on-stack object, rather than allocated on the heap - this ought to lead everything to be cleaned up when root's dtor runs. Dave
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Wed, Sep 26, 2018 at 11:10:29AM -0700, H.J. Lu wrote: > Add -mzero-caller-saved-regs=[skip|used|all] command-line option and > zero_caller_saved_regs("skip|used|all") function attribue: Minor nit, but could this be named -mzero-call-clobbered-regs? "Caller-saved" is a misnomer and inconsistent with other gcc usage. For example -fcall-used-[reg] documents it as "an allocable register that is clobbered by function calls". This is a piece of terminology I've been trying to get fixed in educational materials so it would be nice to avoid a new instance of it in gcc. Rich
[PATCH][GCC][AARCH64] Add even-pair register classes
Hello, LSE instructions like casp require even-odd pairs of registers. Other instructions that take a pair of registers don't have this restriction. As mentioned in the internals documentation, a limitation of forcing a register pair to start with an even register can't be enforced by defining an EVEN_REGS register class, as a pseudo register that spans multiple hard-registers is only counted as being in a register class if all registers it spans are in that class is only counted as being in a register class if all registers it spans are in that class. The canonical way to require even-odd pairs of registers to implement a TImode pseudo register as mentioned in the documentation is to limit *all* TImode registers to being even-odd by using the TARGET_HARD_REGNO_MODE_OK hook. We don't want to use this method as it would apply to all register pairs. (It is noteworthy that the current cost model in ira prefers to put values that span two registers in an even-odd pair anyway, so the use of TARGET_HARD_REGNO_MODE_OK would likely not cause much change in low register pressure situations) As a workaround we define two register classes that each define a set of non-consecutive even-odd register pairs. i.e. their bitfields are 11001100110011001100110011001100 0011001100110011001100110011 c.f. the bitfield for GENERAL_REGS which is. 1110 (note the last two registers are always masked out as we can't use the stack pointer). Requiring a TImode register to be allocated from one of these classes ensures the register will be allocated an even-odd pair. Using constraint letters Uep and Uex (for U and U). Full bootstrap and regtest done on aarch64-none-linux-gnu. Ok for trunk? gcc/ChangeLog: 2018-09-27 Matthew Malcomson * config/aarch64/aarch64.c (aarch64_class_max_nregs,aarch64_register_move_cost): Account for new register classes. * config/aarch64/aarch64.h (enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): Add new register classes EVEN_PAIRS and EVEN_PAIRS_ALT. * config/aarch64/constraints.md (Uep, Uex): New register constraints corresponding to new register classes. * doc/md.texi: Document new register constraints. gcc/testsuite/ChangeLog: 2018-09-27 Matthew Malcomson * gcc.target/aarch64/even-pair-constraint.c: New test. ### Attachment also inlined for ease of reply### diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 56e48b8a553f0b38a9c39b4defa094bccbebc2c4..4538023971142dcd6d76c05fc54d1ad4eddaca8b 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -513,6 +513,8 @@ extern unsigned aarch64_architecture_version; enum reg_class { NO_REGS, + EVEN_PAIRS, + EVEN_PAIRS_ALT, TAILCALL_ADDR_REGS, GENERAL_REGS, STACK_REG, @@ -532,6 +534,8 @@ enum reg_class #define REG_CLASS_NAMES\ { \ "NO_REGS", \ + "EVEN_PAIRS" \ + "EVEN_PAIRS_ALT" \ "TAILCALL_ADDR_REGS",\ "GENERAL_REGS", \ "STACK_REG", \ @@ -548,6 +552,8 @@ enum reg_class #define REG_CLASS_CONTENTS \ { \ { 0x, 0x, 0x }, /* NO_REGS */ \ + { 0x, 0x, 0x }, /* EVEN_PAIRS */\ + { 0x0ccc, 0x, 0x }, /* EVEN_PAIRS_ALT */\ { 0x0004, 0x, 0x }, /* TAILCALL_ADDR_REGS */\ { 0x7fff, 0x, 0x0003 }, /* GENERAL_REGS */ \ { 0x8000, 0x, 0x }, /* STACK_REG */ \ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1e23b2336f6c3b895a20c6129a13e5606cf86f9f..356ced8583a5f543f4870caadcb6314123b9a4fe 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7549,6 +7549,8 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) switch (regclass) { case TAILCALL_ADDR_REGS: +case EVEN_PAIRS_ALT: +case EVEN_PAIRS: case POINTER_REGS: case GENERAL_REGS: case ALL_REGS: @@ -9709,11 +9711,14 @@ aarch64_register_move_cost (machine_mode mode, const struct cpu_regmove_cost *regmove_cost = aarch64_tune_params.regmove_cost; - /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ - if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS) + /* Caller save, even pairs, and pointer regs are equivalent to GENERAL_REGS. + */ + if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS || to == EVEN_PAIRS + || to == EVEN_PAIRS_ALT) to = GENERAL_REGS; - if (from =
Re: [PATCH 11/25] Simplify vec_merge according to the mask.
On 27/09/18 08:16, Richard Sandiford wrote: On keeping the complexity down: if (side_effects_p (x)) return NULL_RTX; makes this quadratic for chains of unary operations. Is it really needed? The code after it simply recurses on operands and doesn't discard anything itself, so it looks like the VEC_MERGE call to side_effects_p would be enough. The two calls do not check the same thing. The other one checks the other operand of a vec_merge, and this checks the current operand. I suppose it's safe to discard a VEC_MERGE when the chosen operand contains side effects, but I'm not so sure when the VEC_MERGE itself is an operand to an operator with side effects. I'm having a hard time inventing a scenario in which a PRE_INC could contain a VEC_MERGE, but maybe a volatile MEM or ASM_OPERANDS could do? Conversely, I don't see that side-effects deep down in an expression should stop us transforming it as a high level. Is there an equivalent to side_effects_p that doesn't recurse? Should there be? Andrew
Re: [PATCH][GCC][testsuite] Fix caching of tests for multiple variant runs and update existing target-supports tests.
> > > > The error you would get if you do this is very confusing so I thought > > since it didn't matter much for the regexp only target triple tests > > that just accepting this would be fine. > > Seems a good thing that that's a noisy failure; the function should > make up its mind whether it wants to cache (use curly braces) or not > (just return the expr directly). > > > Should I drop it or keep it? > > Think we should either drop it or make it into a more user-friendly > error, e.g.: > > if {[string is true -strict $args] || [string is false -strict $args]} { > error {check_cached_effective_target condition already evaluated; > did you pass [...] instead of the expected {...}?} > } else { > set et_cache($prop,$target) [uplevel eval $args] > } > Done, I have also ran a regression test on x86_64 with unix{,-m32} and no fallouts, testsuite is clean. Attached updated patch with feedback processed. Ok for trunk? Thanks, Tamar > Thanks, > Richard -- diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index c2d814cf8c692e5939ec71c43a50c5bd52554b2b..b70536503f87fe4e8b3bd27a0c25c5317f8bf9c8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -37,6 +37,7 @@ # and "// Go" for Go # If the tool is ObjC/ObjC++ then we overide the extension to .m/.mm to # allow for ObjC/ObjC++ specific flags. + proc check_compile {basename type contents args} { global tool verbose "check_compile tool: $tool for $basename" @@ -117,25 +118,36 @@ proc current_target_name { } { proc check_cached_effective_target { prop args } { global et_cache -global et_prop_list set target [current_target_name] -if {![info exists et_cache($prop,target)] - || $et_cache($prop,target) != $target} { +if {![info exists et_cache($prop,$target)]} { verbose "check_cached_effective_target $prop: checking $target" 2 - set et_cache($prop,target) $target - set et_cache($prop,value) [uplevel eval $args] - if {![info exists et_prop_list] - || [lsearch $et_prop_list $prop] < 0} { - lappend et_prop_list $prop + if {[string is true -strict $args] || [string is false -strict $args]} { + error {check_cached_effective_target condition already evaluated;\ + did you pass [...] instead of the expected {...}?} + } else { + set code [catch {uplevel eval $args} result] + if {$code != 0 && $code != 2} { + return -code $code $result + } + set et_cache($prop,$target) $result } - verbose "check_cached_effective_target cached list is now: $et_prop_list" 2 } -set value $et_cache($prop,value) +set value $et_cache($prop,$target) verbose "check_cached_effective_target $prop: returning $value for $target" 2 return $value } +# Implements a version of check_cached_effective_target that also takes et_index +# into account when creating the key for the cache. +proc check_cached_effective_target_indexed { prop args } { +global et_index +set key "$et_index $prop" +verbose "check_cached_effective_target_index $prop: returning $key" 2 + +return [check_cached_effective_target $key [list uplevel eval $args]] +} + # Clear effective-target cache. This is useful after testing # effective-target features and overriding TEST_ALWAYS_FLAGS and/or # ALWAYS_CXXFLAGS. @@ -147,16 +159,7 @@ proc check_cached_effective_target { prop args } { proc clear_effective_target_cache { } { global et_cache -global et_prop_list - -if {[info exists et_prop_list]} { - verbose "clear_effective_target_cache: $et_prop_list" 2 - foreach prop $et_prop_list { - unset et_cache($prop,value) - unset et_cache($prop,target) - } - unset et_prop_list -} +array unset et_cache } # Like check_compile, but delete the output file and return true if the @@ -380,12 +383,9 @@ proc check_visibility_available { what_kind } { # be determined. proc check_alias_available { } { -global alias_available_saved global tool -if [info exists alias_available_saved] { -verbose "check_alias_available returning saved $alias_available_saved" 2 -} else { +return [check_cached_effective_target alias_available { set src alias[pid].c set obj alias[pid].o verbose "check_alias_available compiling testfile $src" 2 @@ -402,7 +402,7 @@ proc check_alias_available { } { if [string match "" $lines] then { # No error messages, everything is OK. - set alias_available_saved 2 + return 2 } else { if [regexp "alias definitions not supported" $lines] { verbose "check_alias_available target does not support aliases" 2 @@ -411,24 +411,20 @@ proc check_alias_available { } { if { $objformat == "elf" } { verbose "check_alias_available but target uses ELF format, so it ought to" 2 - set alias_available_saved -1 + return -1 } else { - set alias_available_saved 0 + return 0 }
[PATCH] Fix PR87451
differen comment chars. Tested on x86_64-unknown-linux-gnu, applied. Richard. 2018-09-27 Richard Biener PR testsuite/87451 * gcc.dg/debug/dwarf2/inline5.c: Deal with different comment characters. Index: gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c === --- gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c (revision 264666) +++ gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c (working copy) @@ -4,13 +4,13 @@ /* { dg-options "-O -gdwarf -dA" } */ /* { dg-do compile } */ /* { dg-final { scan-assembler-times "DW_TAG_inlined_subroutine" 2 } } */ -/* { dg-final { scan-assembler-times "DW_TAG_lexical_block\\)\[^#\]*# DW_AT_abstract_origin" 2 } } */ -/* { dg-final { scan-assembler-times "DW_TAG_lexical_block\\)\[^#\]*# \\(DIE \\(0x\[0-9a-f\]*\\) DW_TAG_variable" 1 } } */ +/* { dg-final { scan-assembler-times "DW_TAG_lexical_block\\)\[^#/!\]*\[#/!\] DW_AT_abstract_origin" 2 } } */ +/* { dg-final { scan-assembler-times "DW_TAG_lexical_block\\)\[^#/!\]*\[#/!\] \\(DIE \\(0x\[0-9a-f\]*\\) DW_TAG_variable" 1 } } */ /* We do not know which is output first so look for both invalid abstract origins on the lexical blocks (knowing that the abstract instance has no attribute following the DW_TAG_lexical_block. */ -/* { dg-final { scan-assembler-not "\\(DIE \\(0x(\[0-9a-f\]*)\\) DW_TAG_lexical_block\\)\[^#\]*# \[^(\].*DW_TAG_lexical_block\\)\[^#x\]*x\\1\[^#\]*# DW_AT_abstract_origin" } } */ -/* { dg-final { scan-assembler-not "DW_TAG_lexical_block\\)\[^#x\]*x(\[0-9a-f\]*)\[^#\]*# DW_AT_abstract_origin.*\\(DIE \\(0x\\1\\) DW_TAG_lexical_block\\)\[^#\]*# DW_AT" } } */ +/* { dg-final { scan-assembler-not "\\(DIE \\(0x(\[0-9a-f\]*)\\) DW_TAG_lexical_block\\)\[^#/!\]*\[#/!\] \[^(\].*DW_TAG_lexical_block\\)\[^#/!x\]*x\\1\[^#/!\]*\[#/!\] DW_AT_abstract_origin" } } */ +/* { dg-final { scan-assembler-not "DW_TAG_lexical_block\\)\[^#/!x\]*x(\[0-9a-f\]*)\[^#/!\]*\[#/!\] DW_AT_abstract_origin.*\\(DIE \\(0x\\1\\) DW_TAG_lexical_block\\)\[^#/!\]*\[#/!\] DW_AT" } } */ int foo (int i) {
Re: [PATCH] PR86957
On Thu, Sep 27, 2018 at 3:10 PM Martin Liška wrote: > > On 9/27/18 12:14 PM, Martin Liška wrote: > > On 9/27/18 11:47 AM, Richard Biener wrote: > >> On Mon, Sep 24, 2018 at 9:14 PM Indu Bhagat wrote: > >>> > >>> Done. Attached is updated patch. > >>> > >>> Patch is tested on x86_64 > >> > >> You obviously did _not_ properly test the patch since it causes a > >> bunch of new testsuite > >> failures: > >> > >> FAIL: g++.dg/pr60518.C -std=gnu++11 (test for excess errors) > >> FAIL: g++.dg/pr60518.C -std=gnu++14 (test for excess errors) > >> FAIL: g++.dg/pr60518.C -std=gnu++98 (test for excess errors) > >> FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++11 (test for excess errors) > >> FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++14 (test for excess errors) > >> FAIL: g++.dg/tree-ssa/dom-invalid.C -std=gnu++98 (test for excess errors) > >> FAIL: g++.dg/guality/pr55665.C -O2 -flto -fno-use-linker-plugin > >> -flto-partition=none line 23 p == 40 > >> FAIL: g++.dg/torture/pr59265.C -O0 (test for excess errors) > >> FAIL: g++.dg/torture/pr59265.C -O1 (test for excess errors) > >> FAIL: g++.dg/torture/pr59265.C -O2 (test for excess errors) > >> FAIL: g++.dg/torture/pr59265.C -O2 -flto -fno-use-linker-plugin > >> -flto-partition=none (test for excess errors) > >> FAIL: g++.dg/torture/pr59265.C -O2 -flto -fuse-linker-plugin > >> -fno-fat-lto-objects (test for excess errors) > >> FAIL: g++.dg/torture/pr59265.C -O3 -g (test for excess errors) > >> FAIL: g++.dg/torture/pr59265.C -Os (test for excess errors) > >> FAIL: g++.dg/tree-prof/morefunc.C compilation, -fprofile-use > >> -D_PROFILE_USE > >> UNRESOLVED: g++.dg/tree-prof/morefunc.C execution,-fprofile-use > >> -D_PROFILE_USE > >> > >> and more. Please get up to speed with GCC development and testing > >> requirements! > >> > >> Richard. > > > > I'll cook patch for it in order to remove the failures as soon as possible. > > > > Martin > > > >> > >>> Thanks > >>> > >>> > >>> On 09/24/2018 09:37 AM, Martin Sebor wrote: > I would suggest to use the term "remove" or "delete" instead of > the informal "wipe out" when referring to removing files or their > contents. > > Martin > >>> > > > > There's tested patch that I made. It's tested on x86_64-linux-gnu. Feel free > to install it after approval. I'll be back on Monday. Patch is OK (and I installed it). Richard. > Martin
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Thu, Sep 27, 2018 at 3:16 PM H.J. Lu wrote: > > On Thu, Sep 27, 2018 at 6:08 AM, Szabolcs Nagy wrote: > > On 26/09/18 19:10, H.J. Lu wrote: > >> > >> Add -mzero-caller-saved-regs=[skip|used|all] command-line option and > >> zero_caller_saved_regs("skip|used|all") function attribue: > >> > >> 1. -mzero-caller-saved-regs=skip and zero_caller_saved_regs("skip") > >> > >> Don't zero caller-saved integer registers upon function return. > >> > >> 2. -mzero-caller-saved-regs=used and zero_caller_saved_regs("used") > >> > >> Zero used caller-saved integer registers upon function return. > >> > >> 3. -mzero-caller-saved-regs=all and zero_caller_saved_regs("all") > >> > >> Zero all caller-saved integer registers upon function return. > >> > >> Tested on i686 and x86-64 with bootstrapping GCC trunk and > >> -mzero-caller-saved-regs=used as well as -mzero-caller-saved-regs=all > >> enabled by default. > >> > > > > from this description and the documentation it's > > not clear to me what this tries to achieve. > > > > is it trying to prevent information leak? > > or some pcs hack the caller may rely on? > > > > if it's for information leak then i'd expect such > > attribute to be used on crypto code.. however i'd > > expect crypto code to use simd registers as well, > > so integer only cleaning needs explanation. > > The target usage is in Linux kernel. Maybe still somehow encode that in the option since it otherwise raises expectations that are not met? -mzero-call-clobbered-regs=used-int|all-int|skip|used-simd|used-fp,etc.? and sorry() on unimplemented ones? Or simply zero also non-integer regs the same way? I suppose there isn't sth like vzeroupper that zeros all SIMD regs and completely? Richard. > -- > H.J.
Re: [PATCH, AArch64 00/11] LSE atomics out-of-line
> Am 27.09.2018 um 15:07 schrieb Ramana Radhakrishnan > : > >> On 26/09/2018 06:03, rth7...@gmail.com wrote: >> From: Richard Henderson >> ARMv8.1 adds an (mandatory) Atomics extension, also known as the >> Large System Extension. Deploying this extension at the OS level >> has proved challenging. >> The following is the result of a conversation between myself, >> Alex Graf of SuSE, and Ramana Radhakrishnan of ARM, at last week's >> Linaro Connect in Vancouver. >> The current state of the world is that one could distribute two >> different copies of a given shared library and place the LSE-enabled >> version in /lib64/atomics/ and it will be selected over the /lib64/ >> version by ld.so when HWCAP_ATOMICS is present. >> Alex's main concern with this is that (1) he doesn't want to >> distribute two copies of every library, or determine what a >> resonable subset would be and (2) this solution does not work >> for executables, e.g. mysql. >> Ramana's main concern was to avoid the overhead of an indirect jump, >> especially in how that would affect the (non-)branch-prediction of >> the smallest implementations. >> Therefore, I've created small out-of-line helpers that are directly >> linked into every library or executable that requires them. There >> will be two direct branches, both of which will be well-predicted. >> In the process, I discovered a number of places within the code >> where the existing implementation could be improved. In particular: >> - the LSE patterns didn't use predicates or constraints that >>match the actual instructions, requiring unnecessary splitting. >> - the non-LSE compare-and-swap can use an extending compare to >>avoid requiring the input to have been previously extended. >> - TImode compare-and-swap was missing entirely. This brings >>aarch64 to parity with x86_64 wrt __sync_val_compare_and_swap. >> There is a final patch that enables the new option by default. >> I am not necessarily expecting this to be merged upstream, but >> for the operating system to decide what the default should be. >> It might be that this should be a configure option, so as to >> make that OS choice easier, but I've just now thought of that. ;-) >> I'm going to have to rely on Alex and/or Ramana to perform >> testing on a system that supports LSE. > > Thanks for this patchset - > > I'll give this a whirl in the next couple of days but don't expect results > until Monday or so. > > I do have an additional concern that I forgot to mention in Vancouver - > > Thanks Wilco for reminding me that this now replaces a bunch of inline > instructions with effectively a library call therefore clobbering a whole > bunch of caller saved registers. > > In which case I see 2 options. > > - maybe we should consider a private interface and restrict the registers > that these files are compiled with to minimise the number of caller saved > registers we trash. > > - Alternatively we should consider an option to inline these at O2 or O3 as > we may just be trading the performance improvements we get with using the lse > atomics I talked to Will Deacon about lse atomics today a bit. Apparently, a key benefit that you get from using them is guaranteed forward progress when compared to an exclusives loop. So IMHO even a tiny slowdown might be better than not progressing. Another concern he did bring up was that due to the additional conditional code a cmpxchg loop may become bigger, so converges slower/never than a native implementation. I assume we can identify those cases later and solve them with ifuncs in the target code though. Alex > for additional stacking and unstacking of caller saved registers in the main > functions... > > But anyway while we discuss that we'll have a look at testing and > benchmarking this. > > > regards > Ramana > >> r~ >> Richard Henderson (11): >> aarch64: Simplify LSE cas generation >> aarch64: Improve cas generation >> aarch64: Improve swp generation >> aarch64: Improve atomic-op lse generation >> aarch64: Emit LSE st instructions >> Add visibility to libfunc constructors >> Link static libgcc after shared libgcc for -shared-libgcc >> aarch64: Add out-of-line functions for LSE atomics >> aarch64: Implement -matomic-ool >> aarch64: Implement TImode compare-and-swap >> Enable -matomic-ool by default >> gcc/config/aarch64/aarch64-protos.h | 20 +- >> gcc/optabs-libfuncs.h | 2 + >> gcc/common/config/aarch64/aarch64-common.c| 6 +- >> gcc/config/aarch64/aarch64.c | 480 ++ >> gcc/gcc.c | 9 +- >> gcc/optabs-libfuncs.c | 26 +- >> .../atomic-comp-swap-release-acquire.c| 2 +- >> .../gcc.target/aarch64/atomic-inst-ldadd.c| 18 +- >> .../gcc.target/aarch64/atomic-inst-ldlogic.c | 54 +- >> .../gcc.target/aarch64/atomic-op-acq_rel.c| 2 +- >> .../gcc.target/aarc
Re: [PATCH][GCC][AARCH64] Add even-pair register classes
I had a superfluous #include in the testcase and some spelling mistakes in documentation -- corrected patch attached. On 27/09/18 15:02, Matthew Malcomson wrote: Hello, LSE instructions like casp require even-odd pairs of registers. Other instructions that take a pair of registers don't have this restriction. As mentioned in the internals documentation, a limitation of forcing a register pair to start with an even register can't be enforced by defining an EVEN_REGS register class, as a pseudo register that spans multiple hard-registers is only counted as being in a register class if all registers it spans are in that class is only counted as being in a register class if all registers it spans are in that class. The canonical way to require even-odd pairs of registers to implement a TImode pseudo register as mentioned in the documentation is to limit *all* TImode registers to being even-odd by using the TARGET_HARD_REGNO_MODE_OK hook. We don't want to use this method as it would apply to all register pairs. (It is noteworthy that the current cost model in ira prefers to put values that span two registers in an even-odd pair anyway, so the use of TARGET_HARD_REGNO_MODE_OK would likely not cause much change in low register pressure situations) As a workaround we define two register classes that each define a set of non-consecutive even-odd register pairs. i.e. their bitfields are 11001100110011001100110011001100 0011001100110011001100110011 c.f. the bitfield for GENERAL_REGS which is. 1110 (note the last two registers are always masked out as we can't use the stack pointer). Requiring a TImode register to be allocated from one of these classes ensures the register will be allocated an even-odd pair. Using constraint letters Uep and Uex (for U and U). Full bootstrap and regtest done on aarch64-none-linux-gnu. Ok for trunk? gcc/ChangeLog: 2018-09-27 Matthew Malcomson * config/aarch64/aarch64.c (aarch64_class_max_nregs,aarch64_register_move_cost): Account for new register classes. * config/aarch64/aarch64.h (enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): Add new register classes EVEN_PAIRS and EVEN_PAIRS_ALT. * config/aarch64/constraints.md (Uep, Uex): New register constraints corresponding to new register classes. * doc/md.texi: Document new register constraints. gcc/testsuite/ChangeLog: 2018-09-27 Matthew Malcomson * gcc.target/aarch64/even-pair-constraint.c: New test. ### Attachment also inlined for ease of reply### diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 56e48b8a553f0b38a9c39b4defa094bccbebc2c4..4538023971142dcd6d76c05fc54d1ad4eddaca8b 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -513,6 +513,8 @@ extern unsigned aarch64_architecture_version; enum reg_class { NO_REGS, + EVEN_PAIRS, + EVEN_PAIRS_ALT, TAILCALL_ADDR_REGS, GENERAL_REGS, STACK_REG, @@ -532,6 +534,8 @@ enum reg_class #define REG_CLASS_NAMES \ { \ "NO_REGS",\ + "EVEN_PAIRS" \ + "EVEN_PAIRS_ALT" \ "TAILCALL_ADDR_REGS", \ "GENERAL_REGS", \ "STACK_REG", \ @@ -548,6 +552,8 @@ enum reg_class #define REG_CLASS_CONTENTS\ { \ { 0x, 0x, 0x }, /* NO_REGS */ \ + { 0x, 0x, 0x }, /* EVEN_PAIRS */\ + { 0x0ccc, 0x, 0x }, /* EVEN_PAIRS_ALT */\ { 0x0004, 0x, 0x }, /* TAILCALL_ADDR_REGS */\ { 0x7fff, 0x, 0x0003 }, /* GENERAL_REGS */ \ { 0x8000, 0x, 0x }, /* STACK_REG */ \ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1e23b2336f6c3b895a20c6129a13e5606cf86f9f..356ced8583a5f543f4870caadcb6314123b9a4fe 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7549,6 +7549,8 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) switch (regclass) { case TAILCALL_ADDR_REGS: +case EVEN_PAIRS_ALT: +case EVEN_PAIRS: case POINTER_REGS: case GENERAL_REGS: case ALL_REGS: @@ -9709,11 +9711,14 @@ aarch64_register_move_cost (machine_mode mode, const struct cpu_regmove_cost *regmove_cost = aarch64_tune_params.regmove_cost; - /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ - if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS) + /* Caller save, even pairs, and po
Re: [PATCH][GCC][testsuite] Fix caching of tests for multiple variant runs and update existing target-supports tests.
Tamar Christina writes: >> > >> > The error you would get if you do this is very confusing so I thought >> > since it didn't matter much for the regexp only target triple tests >> > that just accepting this would be fine. >> >> Seems a good thing that that's a noisy failure; the function should >> make up its mind whether it wants to cache (use curly braces) or not >> (just return the expr directly). >> >> > Should I drop it or keep it? >> >> Think we should either drop it or make it into a more user-friendly >> error, e.g.: >> >> if {[string is true -strict $args] || [string is false -strict $args]} { >> error {check_cached_effective_target condition already evaluated; >> did you pass [...] instead of the expected {...}?} >> } else { >> set et_cache($prop,$target) [uplevel eval $args] >> } >> > > Done, I have also ran a regression test on x86_64 with unix{,-m32} and > no fallouts, testsuite is clean. > > Attached updated patch with feedback processed. > > Ok for trunk? Looks good, some minor things below. > @@ -117,25 +118,36 @@ proc current_target_name { } { > > proc check_cached_effective_target { prop args } { > global et_cache > -global et_prop_list > > set target [current_target_name] > -if {![info exists et_cache($prop,target)] > - || $et_cache($prop,target) != $target} { > +if {![info exists et_cache($prop,$target)]} { > verbose "check_cached_effective_target $prop: checking $target" 2 > - set et_cache($prop,target) $target > - set et_cache($prop,value) [uplevel eval $args] > - if {![info exists et_prop_list] > - || [lsearch $et_prop_list $prop] < 0} { > - lappend et_prop_list $prop > + if {[string is true -strict $args] || [string is false -strict $args]} { > + error {check_cached_effective_target condition already evaluated;\ > +did you pass [...] instead of the expected {...}?} Should be no line break here, since the extra spaces will be part of the error message. > @@ -657,7 +646,8 @@ proc check_profiling_available { test_what } { > } > # Now examine the cache variable. > -if {![info exists profiling_available_saved]} { > +set profiling_working \ > + [expr {[check_cached_effective_target profiling_available { > # Some targets don't have any implementation of __bb_init_func or are > # missing other needed machinery. > if {[istarget aarch64*-*-elf] No need for this [expr {...}] wrapper, can just use [check_cached_effective_target profiling_available ...] directly. > @@ -3072,12 +3057,10 @@ proc check_effective_target_vect_cmdline_needed { } { > || [istarget spu-*-*] >|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) >|| [istarget aarch64*-*-*] } { > -set et_vect_cmdline_needed_saved 0 > - } > -} > - > -verbose "check_effective_target_vect_cmdline_needed: returning > $et_vect_cmdline_needed_saved" 2 > -return $et_vect_cmdline_needed_saved > + return 0 > + } else { > + return 1 > + }}] > } Returns indented too far. > @@ -5720,17 +5479,7 @@ proc check_effective_target_vect_no_int_add { } { > # This won't change for different subtargets so cache the result. > proc check_effective_target_vect_no_bitwise { } { > -global et_vect_no_bitwise_saved > -global et_index > - > -if [info exists et_vect_no_bitwise_saved($et_index)] { > - verbose "check_effective_target_vect_no_bitwise: using cached result" 2 > -} else { > - set et_vect_no_bitwise_saved($et_index) 0 > -} > -verbose "check_effective_target_vect_no_bitwise:\ > - returning $et_vect_no_bitwise_saved($et_index)" 2 > -return $et_vect_no_bitwise_saved($et_index) > +return [check_cached_effective_target_indexed vect_no_bitwise { expr 0 }] > } Might as well just return 0 here. But OK if you want to keep it. > @@ -6523,20 +5978,15 @@ proc check_effective_target_vect_aligned_arrays { } { > # This won't change for different subtargets so cache the result. > > proc check_effective_target_natural_alignment_32 { } { > -global et_natural_alignment_32 > - > -if [info exists et_natural_alignment_32_saved] { > -verbose "check_effective_target_natural_alignment_32: using cached > result" 2 > -} else { > -# FIXME: 32bit powerpc: guaranteed only if MASK_ALIGN_NATURAL/POWER. > -set et_natural_alignment_32_saved 1 > -if { ([istarget *-*-darwin*] && [is-effective-target lp64]) > - || [istarget avr-*-*] } { > -set et_natural_alignment_32_saved 0 > -} > -} > -verbose "check_effective_target_natural_alignment_32: returning > $et_natural_alignment_32_saved" 2 > -return $et_natural_alignment_32_saved > +# FIXME: 32bit powerpc: guaranteed only if MASK_ALIGN_NATURAL/POWER. > +return [check_cached_effective_target_indexed natural_alignment
Re: [PATCH 11/25] Simplify vec_merge according to the mask.
Andrew Stubbs writes: > On 27/09/18 08:16, Richard Sandiford wrote: >> On keeping the complexity down: >> >>if (side_effects_p (x)) >> return NULL_RTX; >> >> makes this quadratic for chains of unary operations. Is it really >> needed? The code after it simply recurses on operands and doesn't >> discard anything itself, so it looks like the VEC_MERGE call to >> side_effects_p would be enough. > > The two calls do not check the same thing. The other one checks the > other operand of a vec_merge, and this checks the current operand. > > I suppose it's safe to discard a VEC_MERGE when the chosen operand > contains side effects, but I'm not so sure when the VEC_MERGE itself is > an operand to an operator with side effects. I'm having a hard time > inventing a scenario in which a PRE_INC could contain a VEC_MERGE, but > maybe a volatile MEM or ASM_OPERANDS could do? But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they have the wrong rtx class. AFAICT no current unary, binary or ternary operator has that level of side-effect (and that's a good thing). We also don't guarantee to preserve FP exceptions as side-effects. > Conversely, I don't see that side-effects deep down in an expression > should stop us transforming it as a high level. > > Is there an equivalent to side_effects_p that doesn't recurse? Should > there be? Not aware of an existing function, and it might be useful to have one at some point. Just not sure we need it for this. Richard
Re: [PATCH][Middle-end][Version 4]Add a new option to control inlining only on static functions
> On Sep 27, 2018, at 2:45 AM, Richard Biener wrote: > > On Wed, 26 Sep 2018, Qing Zhao wrote: > >> >>> On Sep 26, 2018, at 6:07 PM, Alexander Monakov wrote: >>> >>> On Wed, 26 Sep 2018, Qing Zhao wrote: The request is for application developers who want to use gcc's online patching feature. Today, developers can turn off inlining and deliver just the patched routine. They can also allow all inlining and deliver the patched routine and all the routines that the patched routine was inlined into. completely turning off inlining will sacrifice too much run-time performance. completely enable inlining, on the other hand, will have the potential issues with code size, complexity and debuggability for the online patching. the proposed option provides a compromised solution for the above issues. and enable more developers to utilize gcc’s online patching feature. >>> >>> From this explanation it sounds to me that what you really need is -Os-like >>> behavior for IPA passes, without enabling -Os for gimple/rtl passes, as I >>> mentioned in my previous email. Honza, how does that sound? >> >> I don’t think a -Os-like option will do the job. >> >> As Jeff already mentioned in a very previous email: >> >> “Presumably one of the cases where this capability is really helpful is >> things like ksplice. If you have a function with global scope that has >> been potentially inlined, then it's a lot harder track down those >> inlining points at DTRT. >> >> We ran into this internally when looking at hot patching some of the >> spinlock code in the kernel. It would have been real helpful if the >> kernel had been compiled with this kind of option :-) >> >> So conceptually I can see value in this kind of option. >> “ >> >> so, specially control inlining on static/global will be helpful to online >> patch. > > But as Honza said this gets you only sofar. IIRC for our kernel > livepatching we turn off most IPA passes because while we can "easily" > figure what and where things were inlined spotting the effects of > IPA analysis and transform is almost impossible. > > So there's two parts of the knob - one is to make the live-patch size > not explode (do less inlining where it doesn't hurt performance - that > eventually also applies to static functions called once inlining!). Yes, limit additional unnecessary inlining might be better/ > The other is to make it possible to conservatively compute the set > of functions you have to replace (the set of functions that are > affected by a patch). right. > > Having an option to _that_ effect might indeed be useful (to avoid > people chasing all the flags they need to disable). So shouldn't this > be a -fease-live-patching option rather that -finline-only-static > which doesn't really capture the intention nor the effect? Yes, if we can have -fease-live-patching, that will be even better for the live patch purpose. > > That is, -fease-live-patching would guarantee that if you source-patch > function X then, if you replace all functions which debuginfo tells you > X was inlined to, the result will be semantically equivalent with > replacing the whole program? Okay. > We might even add sth like > -fpatch-symbol-list=FOO,BAR that outputs a list of symbols into BAR > that are affected this way when functions FOO are changed (you > run that on unpatched code of course). Or we add sth to the > cgraph dumpfile that for each function lists the set of symbols > it was affected by. Yes, this option might be also helpful for livepatching. we can do it at the next step. Qing > > Thanks, > Richard.
Re: [PATCH][Middle-end][Version 4]Add a new option to control inlining only on static functions
> On Sep 27, 2018, at 3:58 AM, Jan Hubicka wrote: > >> >> Okay, I see. >> >>> >>> If you make this to be INTERPOSABLE (which means it can be replaced by >>> different >>> implementation by linker and that is probably what we want for live >>> patching) >>> then also inliner, ipa-sra and other optimization will give up on these. >> >> do you suggest that to set the global function as AVAIL_INTERPOSABLE when >> -finline-only-static >> is present? then we should avoid all issues? > > It seems to be reasonable direction I think, because it is what really happens > (well AVAIL_INTERPOSABLE still does not assume that the interposition will > happen at runtime, but it is an approximation and we may introduce something > like > AVAIL_RUNTIME_INTERPOSABLE if there is need for better difference). > I wonder if -finline-only-static is good name for the flag though, because it > does a lot more than that. Maybe something like -flive-patching? > How much is this all tied to one particular implementation of the feature? Yes, I like this idea. I will study a little more on this direction and report back. Qing > > Honza >> >> Qing >>
Re: [PATCH, AArch64 10/11] aarch64: Implement TImode compare-and-swap
On 9/27/18 6:04 AM, Matthew Malcomson wrote: > Hi Richard, > > > On 26/09/18 06:03, rth7...@gmail.com wrote: >> From: Richard Henderson >> >> This pattern will only be used with the __sync functions, because >> we do not yet have a bare TImode atomic load. >> >> > I don't have any comment on the overall aim of the patch series, but in > this particular > patch it looks like you doesn't ensure the register pairs for casp are > even-odd. > > This is the restriction in the Arm Arm decode for casp variants as > if Rs<0> == '1' then UnallocatedEncoding(); > if Rt<0> == '1' then UnallocatedEncoding(); Oops. I missed this bit when reading the docs. Thanks. I'll incorporate your even register class patch into the next round. r~
Re: GCC options for kernel live-patching (Was: Add a new option to control inlining only on static functions)
Thanks, Martin, for all these interesting information. Looks like that a more general option to help live-patching is needed. I will do a little more study on this direction. Qing > On Sep 27, 2018, at 7:19 AM, Martin Jambor wrote: > > Hi, > > (this message is a part of the thread originating with > https://gcc.gnu.org/ml/gcc-patches/2018-09/msg01018.html) > > On Thu, Sep 27 2018, Jan Hubicka wrote: If you make this to be INTERPOSABLE (which means it can be replaced by different implementation by linker and that is probably what we want for live patching) then also inliner, ipa-sra and other optimization will give up on these. >>> >>> do you suggest that to set the global function as AVAIL_INTERPOSABLE when >>> -finline-only-static >>> is present? then we should avoid all issues? >> >> It seems to be reasonable direction I think, because it is what really >> happens >> (well AVAIL_INTERPOSABLE still does not assume that the interposition will >> happen at runtime, but it is an approximation and we may introduce something >> like >> AVAIL_RUNTIME_INTERPOSABLE if there is need for better difference). >> I wonder if -finline-only-static is good name for the flag though, because it >> does a lot more than that. Maybe something like -flive-patching? >> How much is this all tied to one particular implementation of the feature? > > We have just had a quick discussion with two upstream maintainers of > Linux kernel live-patching about this and the key points were: > > 1. SUSE live-patch creators (and I assume all that use the upstream > live-patching method) use Martin Liska's (somewhat under-documented) > -fdump-ipa-clones option and a utility he wrote > (https://github.com/marxin/kgraft-analysis-tool) to deal with all > kinds of inlining, IPA-CP and generally all IPA optimizations that > internally create a clone. The tool tells them what happened and > also lists all callers that need to be live-patched. > > 2. However, there is growing concern about other IPA analyses that do > not create a clone but still affect code generation in other > functions. Kernel developers have identified and disabled IPA-RA but > there is more of them such as IPA-modref analysis, stack alignment > propagation and possibly quite a few others which extract information > from one function and use it a caller or perhaps even some > almost-unrelated functions (such as detection of read-only and > write-only static global variables). > > The kernel live-patching community would welcome if GCC had an option > that could disable all such optimizations/analyses for which it > cannot provide a list of all affected functions (i.e. which ones need > to be live-patched if a particular function is). > > I assume this is orthogonal to the proposed -finline-only-static > option, but the above approach seems superior in all respects. > > 3. The community would also like to be involved in these discussions, > and therefore I am adding live-patch...@vger.kernel.org to CC. On a > related note, they will also have a live-patching mini-summit at the > Linux Plumbers conference in Vancouver in November where they plan to > discuss what they would like GCC to provide. > > Thanks, > > Martin > > ... > >> For example comdat that was cloned by IPA-SRA. See can_be_local_p and >> comdat_can_be_unshared_p predicates. Similar problem happens to clones >> created >> by ipa-cp. >> >> I guess we want to disable localization and cloning in this case as well. >> I wonder what else. > > Yes, I think we should make -finline-only-static incompatible with > cloning and tree-sra too. > > Qing >> >> Honza >>>
Re: [PATCH, AArch64 00/11] LSE atomics out-of-line
On 9/27/18 6:07 AM, Ramana Radhakrishnan wrote: > I do have an additional concern that I forgot to mention in Vancouver - > > Thanks Wilco for reminding me that this now replaces a bunch of inline > instructions with effectively a library call therefore clobbering a whole > bunch > of caller saved registers. We did talk about this in Vancouver, including perhaps providing a private interface. At the time you brushed it off and asked why I couldn't just write the helpers in C. I guess we can talk about a private interface after we see what the total overhead is with it as it is. r~
Re: [PATCH, AArch64 10/11] aarch64: Implement TImode compare-and-swap
On 27/09/18 17:32, Richard Henderson wrote: On 9/27/18 6:04 AM, Matthew Malcomson wrote: Hi Richard, On 26/09/18 06:03, rth7...@gmail.com wrote: From: Richard Henderson This pattern will only be used with the __sync functions, because we do not yet have a bare TImode atomic load. I don't have any comment on the overall aim of the patch series, but in this particular patch it looks like you doesn't ensure the register pairs for casp are even-odd. This is the restriction in the Arm Arm decode for casp variants as if Rs<0> == '1' then UnallocatedEncoding(); if Rt<0> == '1' then UnallocatedEncoding(); Oops. I missed this bit when reading the docs. Thanks. I'll incorporate your even register class patch into the next round. r~ Just a heads up on that register class patch (because it's use is not very intuitive): To allow any valid register pair combination between operands you need to have all combinations of the two constraints in your patterns alternatives. So e.g. the "out" operand might have constraints like "Uep,Uex,Uep,Uex" while the "desired" operand would have "Uep,Uep,Uex,Uex". [It's ugly, but the best of the options I found ]. M
[PATCH][AArch64] Support zero-extended move to FP register
The popcount expansion uses SIMD instructions acting on 64-bit values. As a result a popcount of a 32-bit integer requires zero-extension before moving the zero-extended value into an FP register. This patch adds support for zero-extended int->FP moves to avoid the redundant uxtw. Similarly, add support for a 32-bit zero-extending load->FP register. Add a missing 'fp' arch attribute to the related 8/16-bit pattern. int f (int a) { return __builtin_popcount (a); } Before: uxtwx0, w0 fmovd0, x0 cnt v0.8b, v0.8b addvb0, v0.8b fmovw0, s0 ret After: fmovs0, w0 cnt v0.8b, v0.8b addvb0, v0.8b fmovw0, s0 ret Passes regress on AArch64, OK for commit? ChangeLog: 2018-09-27 Wilco Dijkstra gcc/ * config/aarch64/aarch64.md (zero_extendsidi2_aarch64): Add w=r and w=m zeroextend alternatives. (zero_extend2_aarch64): Add arch attribute. testsuite/ * gcc.target/aarch64/popcnt.c: Test zero-extended popcount. --- diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index ef2368706e88a551b9d0d2db2385860112bdbdde..c0b39ea876f76b54b31088d8d63d96a9cbcf8b88 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1588,13 +1588,16 @@ ) (define_insn "*zero_extendsidi2_aarch64" - [(set (match_operand:DI 0 "register_operand" "=r,r") -(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))] + [(set (match_operand:DI 0 "register_operand" "=r,r,w,w") +(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,r,m")))] "" "@ uxtw\t%0, %w1 - ldr\t%w0, %1" - [(set_attr "type" "extend,load_4")] + ldr\t%w0, %1 + fmov\t%s0, %w1 + ldr\t%s0, %1" + [(set_attr "type" "extend,load_4,fmov,f_loads") + (set_attr "arch" "*,*,fp,fp")] ) (define_insn "*load_pair_zero_extendsidi2_aarch64" @@ -1634,7 +1637,8 @@ and\t%0, %1, ldr\t%w0, %1 ldr\t%0, %1" - [(set_attr "type" "logic_imm,load_4,load_4")] + [(set_attr "type" "logic_imm,load_4,f_loads") + (set_attr "arch" "*,*,fp")] ) (define_expand "qihi2" diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt.c b/gcc/testsuite/gcc.target/aarch64/popcnt.c index 7e957966d8e81b8633a444bb42944d0da82ae5db..8fb8db82e052bd898aa8513710fd94f892d80b61 100644 --- a/gcc/testsuite/gcc.target/aarch64/popcnt.c +++ b/gcc/testsuite/gcc.target/aarch64/popcnt.c @@ -19,5 +19,14 @@ foo2 (long long x) return __builtin_popcountll (x); } -/* { dg-final { scan-assembler-not "popcount" } } */ -/* { dg-final { scan-assembler-times "cnt\t" 3 } } */ +int +foo3 (int *p) +{ + return __builtin_popcount (*p); +} + +/* { dg-final { scan-assembler-not {popcount} } } */ +/* { dg-final { scan-assembler-times {cnt\t} 4 } } */ +/* { dg-final { scan-assembler-times {fmov\ts} 1 } } */ +/* { dg-final { scan-assembler-times {fmov\td} 2 } } */ +/* { dg-final { scan-assembler-times {ldr\ts} 1 } } */
[gomp5] Initial OpenMP 5 task reduction support
Hi! I've committed following patch to add WIP task reduction support to GCC. Right now it is only supported for task_reduction clauses on taskgroup directive and matching in_reduction clauses on task directives. When registering the reductions, the runtime allocates memory holding array of blocks of the private vars followed by bool flags (if it has been initialized in the particular thread), indexed by the thread. When starting a task, it will remap pointers to the original vars to pointers to the new private copies, and initialize the var if it hasn't been initialized yet (there is an optimization for non-UDR initialization to zero, because the library clears the whole block). After the GOMP_taskgroup_end is called, all the vars are reduced and then they are freed. task-reduction-5.C testcase still fails, need to fix up passing of the remapped original address properly even for C/C++ array reductions. I plan to eventually add an optimization, where if the compiler can figure out which offset and from which taskgroup in_reduction should map to, it could avoid the expensive runtime remapping and instead make sure the GOMP_taskgroup_reduction_register returned pointer is firstprivatized to the corresponding tasks and then the private copy is at that pointer + size * omp_get_thread_num () + offset, but right now all reductions are always remapped. Task reductions are broken ATM if not nested inside parallel, because we don't create taskgroup at all. My further plan after fixing the bugs is to add reduction support for taskloop and then also support task modifier on parallel and worksharing reductions. This patch seems big enough and something is already working that I've decided to commit it. 2018-09-27 Jakub Jelinek * builtin-types.def (BT_FN_VOID_SIZE_SIZE_PTR): New. * omp-builtins.def (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER, BUILT_IN_GOMP_TASKGROUP_REDUCTION_UNREGISTER, BUILT_IN_GOMP_TASK_REDUCTION_REMAP): New builtins. * omp-low.c (use_pointer_for_field): Use is_global_var instead of TREE_STATIC || DECL_EXTERNAL, and apply only if not privatized in outer contexts. (scan_sharing_clauses): Handle OMP_CLAUSE_IN_REDUCTION in task contexts. Handle OMP_CLAUSE_TASK_REDUCTION. (scan_omp_1_stmt): Call scan_sharing_clauses for taskgroups. (lower_rec_input_clauses): Handle OMP_CLAUSE_IN_REDUCTION. Set TREE_THIS_NOTRAP instead of just noop testing it. (lower_send_clauses): Handle OMP_CLAUSE_IN_REDUCTION. (omp_task_reductions_find_first, omp_task_reduction_iterate, lower_omp_task_reductions): New functions. (lower_omp_taskgroup): Handle taskgroup reductions. (create_task_copyfn): Handle OMP_CLAUSE_IN_REDUCTION and OMP_CLAUSE_REDUCTION clauses. gcc/cp/ * semantics.c (finish_omp_reduction_clause): Don't mark OMP_CLAUSE_DECL addressable if it has reference type. Do mark decl_placeholder addressable if needed. gcc/fortran/ * types.def (BT_FN_VOID_SIZE_SIZE_PTR): New. libgomp/ * configure.ac: Check for aligned_alloc, posix_memalign, memalign and _aligned_malloc. * libgomp.h (gomp_aligned_alloc, gomp_aligned_free): New prototypes. (struct gomp_taskgroup): Add reductions field. * libgomp.map (GOMP_5.0): Export GOMP_taskgroup_reduction_register, GOMP_taskgroup_reduction_unregister and GOMP_task_reduction_remap. * task.c (GOMP_taskgroup_start): Initialize taskgroup->reductions. (GOMP_taskgroup_reduction_register, GOMP_taskgroup_reduction_unregister, GOMP_task_reduction_remap): New functions. * alloc.c (gomp_aligned_alloc, gomp_aligned_free): New functions. * configure: Regenerated. * config.h.in: Regenerated. * testsuite/libgomp.c-c++-common/task-reduction-1.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-2.c: New test. * testsuite/libgomp.c-c++-common/task-reduction-3.c: New test. * testsuite/libgomp.c++/task-reduction-1.C: New test. * testsuite/libgomp.c++/task-reduction-2.C: New test. * testsuite/libgomp.c++/task-reduction-3.C: New test. * testsuite/libgomp.c++/task-reduction-4.C: New test. * testsuite/libgomp.c++/task-reduction-5.C: New test. --- gcc/builtin-types.def.jj2018-07-10 11:35:00.316319081 +0200 +++ gcc/builtin-types.def 2018-09-25 14:30:29.038766626 +0200 @@ -619,6 +619,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_UINT32_U BT_VOID, BT_UINT32, BT_UINT64, BT_PTR) DEF_FUNCTION_TYPE_3 (BT_FN_VOID_UINT32_UINT32_PTR, BT_VOID, BT_UINT32, BT_UINT32, BT_PTR) +DEF_FUNCTION_TYPE_3 (BT_FN_VOID_SIZE_SIZE_PTR, BT_VOID, BT_SIZE, BT_SIZE, +BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR, BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR) --- gcc
Re: [PATCH] declare tree_to_shwi et al. nonnull and pure
On 9/26/18 5:54 PM, Martin Sebor wrote: > The attached patch adds attributes nonnull and pure to > tree_to_shwi() and a small number of other heavily used functions > that will benefit from both. > > First, I noticed that there are a bunch of functions in tree.c that > deal gracefully with null pointers right alongside a bunch of other > related functions that don't. > > For example, tree_fits_shwi_p() is null-safe but integer_zerop() > is not. There a number of others. I never remember which ones > are in which group and so I either add unnecessary checks or > forget to add them, for which we then all pay when the missing > check triggers an ICE. In patch reviews I see I'm not the only > one who's not sure. The attribute should help avoid some of > these problems: both visually and via warnings. > > Second, while testing the nonnull patch, I noticed that GCC would > not optimize some null tests after calls to nonnull functions that > take tree as an argument and that I expected it to optimize, like > > n = tree_to_shwi (TYPE_SIZE (type)); > if (TYPE_SIZE (type)) > ... > > The reason is that tree_to_shwi() isn't declared pure, even though > tree_fits_shwi_p() is (though as I mentioned, the latter is null > safe and so not declarted nonnull, and so it doesn't make the same > optimization possible). > > Tested on x86_64-linux. The patch exposed a couple of issues > in Ada. At least the first one is a false positive caused by > GCC being unaware that tree_fits_uhwi_p() returns false for > a null argument (propagating this knowledge via an attribute > seems like an opportunity for a future enhancement). > I suppressed the warning by introducing a local temporary. > > I suspect the second warning is caused by the Ada TYPE_RM_SIZE() > macro expanding to a conditional with an explicit null operand: > > #define TYPE_RM_SIZE(NODE) TYPE_RM_VALUE ((NODE), 0) > > #define TYPE_RM_VALUE(NODE, N) \ > (TYPE_RM_VALUES (NODE)\ > ? TREE_VEC_ELT (TYPE_RM_VALUES (NODE), (N)) : NULL_TREE) > > but I wasn't able to reduce it to a small test case to confirm > that. I suppressed this warning by introducing a temporary as > well. > > Martin > > gcc-tree-nonnull.diff > > gcc/ChangeLog: > > * tree.h (tree_to_shwi): Add attribute nonnull. > (tree_to_poly_int64, tree_to_uhwi, tree_to_poly_uint64): Same. > (integer_zerop, integer_onep, int_fits_type_p): Same. > > gcc/ada/ChangeLog: > > * gcc-interface/utils.c (make_packable_type): Introduce a temporary > to avoid -Wnonnull. > (unchecked_convert): Same. No doubt we have not been diligent about marking non-null, const, pure, etc over time. I thought we had warnings for functions which are const/pure but not suitably marked. Can you peek a bit at why those didn't trigger. See ipa-pure-const.c. Ignore the initial comment -- it applies to both functions and data. ISTM we could probably build a missing non-null attribute warning. If a NULL pointer argument unconditionally leads to an assert, then the function should be marked. Similarly if a pointer argument is unconditionally dereferenced, then it should be marked. I strongly suspect this would be too noisy to enable by default. ANyway, the patch is OK for the trunk. jeff
Re: [PR 87339, testsuite] Fix failure of gcc.dg/warn-abs-1.c on some targets
On Wed, 26 Sep 2018 at 17:26, Martin Jambor wrote: > > Hi, > > On Wed, Sep 26 2018, Christophe Lyon wrote: > > On Tue, 25 Sep 2018 at 17:50, Martin Jambor wrote: > >> > >> Hi, > >> > >> On Mon, Sep 24 2018, Christophe Lyon wrote: > >> > On Mon, 24 Sep 2018 at 20:46, Martin Jambor wrote: > >> >> > >> >> Hi, > >> >> > >> >> the test added to check whether _Float128 types are handled correctly by > >> >> the new warning about suspicious calls to abs-like functions fails on > >> >> many platforms. The patch below circumvents the problem by running on > >> >> i686/x86_64 only. I understand that the proper solution would be to > >> >> come up with a deja-gnu predicate and skip-if it was not provided but I > >> >> think that for one simple test that is a bit of an overkill and testing > >> >> it on x86_64 will provide all the test coverage we need. > >> >> > >> >> Tested on x86_64-linux and aarch64-linux, testing on i686-linux > >> >> pending. OK for trunk? > >> >> > >> >> Thanks, > >> >> > >> >> Martin > >> >> > >> >> > >> >> > >> >> 2018-09-24 Martin Jambor > >> >> > >> >> PR testsuite/87339 > >> >> * gcc.dg/warn-abs-1.c: Do not test _Float128. Remove > >> >> dg-skip-if. > >> >> * gcc.target/i386/warn-abs-3.c: New test. > >> >> --- > >> >> gcc/testsuite/gcc.dg/warn-abs-1.c | 4 +--- > >> >> gcc/testsuite/gcc.target/i386/warn-abs-3.c | 12 > >> >> 2 files changed, 13 insertions(+), 3 deletions(-) > >> >> create mode 100644 gcc/testsuite/gcc.target/i386/warn-abs-3.c > >> >> > >> >> diff --git a/gcc/testsuite/gcc.dg/warn-abs-1.c > >> >> b/gcc/testsuite/gcc.dg/warn-abs-1.c > >> >> index 129a3af8ac6..b494b14f4a9 100644 > >> >> --- a/gcc/testsuite/gcc.dg/warn-abs-1.c > >> >> +++ b/gcc/testsuite/gcc.dg/warn-abs-1.c > >> >> @@ -1,5 +1,4 @@ > >> >> /* { dg-do compile { target float128 } } */ > >> > > >> > Don't you want to remove the {target float128} part? > >> > >> I did notice it but yes, I do want to remove it. I am therefore going > >> to commit the following after re-testing on x86_64-linux and > >> aarch64-linux. > >> > >> Thank you, > >> > > > > Hi, thanks for this fix. > > Now the test runs on ARM, but fails because of two missing warnings: > > gcc.dg/warn-abs-1.c (test for warnings, line 46) > > gcc.dg/warn-abs-1.c (test for warnings, line 60) > > which correspond to: > > *pld = fabs (*pld); /* { dg-warning "may cause truncation of value" } */ > > *pcld = cabs (*pcld); /* { dg-warning "may cause truncation of value" } */ > > > > I see, I guess the easiest is to skip the test on targets that do not > really have long double, although if someone thinks that is too > restrictive, I can also split the test again and move long double bits > to a separate test. > > Can you please test whether the following helps? > Sorry for the delay, yes it is better in the sense that the tests are now skipped. I haven't tried Joseph's suggestion yet. > Thanks and sorry for the disruptions, > > Martin > > > > diff --git a/gcc/testsuite/gcc.dg/warn-abs-1.c > b/gcc/testsuite/gcc.dg/warn-abs-1.c > index 1c487270042..88389ec6be8 100644 > --- a/gcc/testsuite/gcc.dg/warn-abs-1.c > +++ b/gcc/testsuite/gcc.dg/warn-abs-1.c > @@ -1,4 +1,5 @@ > /* { dg-do compile } */ > +/* { dg-require-effective-target large_long_double } */ > /* { dg-options "-Wabsolute-value" } */ > > #include > > >
Re: Support excess precision for integer / floating-point comparisons (PR c/87390)
Hi Joseph, On Wed, 26 Sep 2018 at 23:15, Joseph Myers wrote: > > In C11, implicit conversions from integer to floating-point types > produce results with the range and precision of the corresponding > evaluation format rather than only those of the type implicitly > converted to. This patch implements that case of C11 excess precision > semantics in the case of a comparison between integer and > floating-point types, previously missed when implementing other cases > of excess precision for such implicit conversions. As with other such > fixes, this patch conservatively follows the reading of C99 where > conversions from integer to floating-point do not produce results with > excess precision and so the change is made for C11 mode only. > > Bootstrapped with no regressions on x86_64-pc-linux-gnu. Applied to > mainline. > > gcc/c: > 2018-09-26 Joseph Myers > > PR c/87390 > * c-typeck.c (build_binary_op): Use excess precision for > comparisons of integers and floating-point for C11 and later. > > gcc/testsuite: > 2018-09-26 Joseph Myers > > PR c/87390 > * gcc.target/i386/excess-precision-9.c, > gcc.target/i386/excess-precision-10.c: New tests. > After this patch I've noticed regressions on fp-int-convert-float16-timode.c gcc.dg/torture/fp-int-convert-float16.c on aarch64 and arm (at execution time) Christophe > Index: gcc/c/c-typeck.c > === > --- gcc/c/c-typeck.c(revision 264650) > +++ gcc/c/c-typeck.c(working copy) > @@ -11249,6 +11249,20 @@ build_binary_op (location_t location, enum tree_co > case EXACT_DIV_EXPR: >may_need_excess_precision = true; >break; > + > +case EQ_EXPR: > +case NE_EXPR: > +case LE_EXPR: > +case GE_EXPR: > +case LT_EXPR: > +case GT_EXPR: > + /* Excess precision for implicit conversions of integers to > +floating point in C11 and later. */ > + may_need_excess_precision = (flag_isoc11 > + && (ANY_INTEGRAL_TYPE_P (type0) > + || ANY_INTEGRAL_TYPE_P (type1))); > + break; > + > default: >may_need_excess_precision = false; >break; > Index: gcc/testsuite/gcc.target/i386/excess-precision-10.c > === > --- gcc/testsuite/gcc.target/i386/excess-precision-10.c (nonexistent) > +++ gcc/testsuite/gcc.target/i386/excess-precision-10.c (working copy) > @@ -0,0 +1,52 @@ > +/* Excess precision tests. Test implicit conversions in comparisons: > + excess precision in C11 mode. */ > +/* { dg-do run } */ > +/* { dg-options "-std=c11 -mfpmath=387 -fexcess-precision=standard" } */ > + > +extern void abort (void); > +extern void exit (int); > + > +int > +main (void) > +{ > + float f = 0x1p63f; > + unsigned long long int u = (1ULL << 63) + 1; > + > + if ((f == u) != 0) > +abort (); > + > + if ((u == f) != 0) > +abort (); > + > + if ((f != u) != 1) > +abort (); > + > + if ((u != f) != 1) > +abort (); > + > + if ((f < u) != 1) > +abort (); > + > + if ((u < f) != 0) > +abort (); > + > + if ((f <= u) != 1) > +abort (); > + > + if ((u <= f) != 0) > +abort (); > + > + if ((f > u) != 0) > +abort (); > + > + if ((u > f) != 1) > +abort (); > + > + if ((f >= u) != 0) > +abort (); > + > + if ((u >= f) != 1) > +abort (); > + > + exit (0); > +} > Index: gcc/testsuite/gcc.target/i386/excess-precision-9.c > === > --- gcc/testsuite/gcc.target/i386/excess-precision-9.c (nonexistent) > +++ gcc/testsuite/gcc.target/i386/excess-precision-9.c (working copy) > @@ -0,0 +1,52 @@ > +/* Excess precision tests. Test implicit conversions in comparisons: > + no excess precision in C99 mode. */ > +/* { dg-do run } */ > +/* { dg-options "-std=c99 -mfpmath=387 -fexcess-precision=standard" } */ > + > +extern void abort (void); > +extern void exit (int); > + > +int > +main (void) > +{ > + float f = 0x1p63f; > + unsigned long long int u = (1ULL << 63) + 1; > + > + if ((f == u) != 1) > +abort (); > + > + if ((u == f) != 1) > +abort (); > + > + if ((f != u) != 0) > +abort (); > + > + if ((u != f) != 0) > +abort (); > + > + if ((f < u) != 0) > +abort (); > + > + if ((u < f) != 0) > +abort (); > + > + if ((f <= u) != 1) > +abort (); > + > + if ((u <= f) != 1) > +abort (); > + > + if ((f > u) != 0) > +abort (); > + > + if ((u > f) != 0) > +abort (); > + > + if ((f >= u) != 1) > +abort (); > + > + if ((u >= f) != 1) > +abort (); > + > + exit (0); > +} > > -- > Joseph S. Myers > jos...@codesourcery.com
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
* H. J. Lu: > +@item zero_caller_saved_regs("@var{choice}") > +@cindex @code{zero_caller_saved_regs} function attribute, x86 > +On x86 targets, the @code{zero_caller_saved_regs} attribute causes the > +compiler to zero caller-saved integer registers at function return with > +@var{choice}. @samp{skip} doesn't zero caller-saved integer registers. > +@samp{used} zeros caller-saved integer registers which are used in > +function. @samp{all} zeros all caller-saved integer registers. Perhaps “according to @var{choice}:”. And say that the default for the attribute is controlled by @option{-mzero-caller-saved-regs}? (Maybe “skip” should be none?) I assume we can check for this use __has_attribute? We would use this in the implementation of explicit_bzero in glibc. > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 7ef4e7a449b..796477452d5 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -28459,6 +28459,16 @@ not be reachable in the large code model. > @opindex -mindirect-branch-register > Force indirect call and jump via register. > +@item -mzero-caller-saved-regs=@var{choice} > +@opindex -mzero-caller-saved-regs > +Zero caller-saved integer registers at function return with @var{choice}. > +The default is @samp{skip}, which doesn't zero caller-saved integer > +registers. @samp{used} zeros caller-saved integer registers which are > +used in function. @samp{all} zeros all caller-saved integer registers. > +You can control this behavior for a specific function by using the > +function attribute @code{zero_caller_saved_regs}. > +@xref{Function Attributes}. See above regarding “with @var{choice}”. Thanks, Florian
Re: Support excess precision for integer / floating-point comparisons (PR c/87390)
On Thu, 27 Sep 2018, Christophe Lyon wrote: > After this patch I've noticed regressions on > fp-int-convert-float16-timode.c > gcc.dg/torture/fp-int-convert-float16.c > on aarch64 and arm (at execution time) Does this patch help? Index: gcc/testsuite/gcc.dg/torture/fp-int-convert.h === --- gcc/testsuite/gcc.dg/torture/fp-int-convert.h (revision 264666) +++ gcc/testsuite/gcc.dg/torture/fp-int-convert.h (working copy) @@ -90,6 +90,7 @@ if (ivin != (VAL)\ || ((PREC_OK) && ivout != ivin) \ || ((PREC_OK) && ivout != (VAL)) \ - || fv1 != (VAL) || fv2 != (VAL) || fv1 != fv2) \ + || fv1 != (FT) (VAL) || fv2 != (FT) (VAL)\ + || fv1 != fv2) \ abort (); \ } while (0) -- Joseph S. Myers jos...@codesourcery.com
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Thu, Sep 27, 2018 at 7:57 AM, Richard Biener wrote: > On Thu, Sep 27, 2018 at 3:16 PM H.J. Lu wrote: >> >> On Thu, Sep 27, 2018 at 6:08 AM, Szabolcs Nagy wrote: >> > On 26/09/18 19:10, H.J. Lu wrote: >> >> >> >> Add -mzero-caller-saved-regs=[skip|used|all] command-line option and >> >> zero_caller_saved_regs("skip|used|all") function attribue: >> >> >> >> 1. -mzero-caller-saved-regs=skip and zero_caller_saved_regs("skip") >> >> >> >> Don't zero caller-saved integer registers upon function return. >> >> >> >> 2. -mzero-caller-saved-regs=used and zero_caller_saved_regs("used") >> >> >> >> Zero used caller-saved integer registers upon function return. >> >> >> >> 3. -mzero-caller-saved-regs=all and zero_caller_saved_regs("all") >> >> >> >> Zero all caller-saved integer registers upon function return. >> >> >> >> Tested on i686 and x86-64 with bootstrapping GCC trunk and >> >> -mzero-caller-saved-regs=used as well as -mzero-caller-saved-regs=all >> >> enabled by default. >> >> >> > >> > from this description and the documentation it's >> > not clear to me what this tries to achieve. >> > >> > is it trying to prevent information leak? >> > or some pcs hack the caller may rely on? >> > >> > if it's for information leak then i'd expect such >> > attribute to be used on crypto code.. however i'd >> > expect crypto code to use simd registers as well, >> > so integer only cleaning needs explanation. >> >> The target usage is in Linux kernel. > > Maybe still somehow encode that in the option since it otherwise raises > expectations that are not met? > -mzero-call-clobbered-regs=used-int|all-int|skip|used-simd|used-fp,etc.? > and sorry() on unimplemented ones? Or simply zero also non-integer > regs the same way? I suppose > there isn't sth like vzeroupper that zeros all SIMD regs and completely? > The problem with SIMD regs is how to cover different ISAs, widths and number of SIMD regs. We need to generate at least 3 different code paths to clear SSE, AVX and AVX512 registers. -- H.J.
Re: [gomp5] Initial OpenMP 5 task reduction support
On Thu, Sep 27, 2018 at 07:38:20PM +0200, Jakub Jelinek wrote: > task-reduction-5.C testcase still fails, need to fix up passing of the > remapped original address properly even for C/C++ array reductions. Fixed thusly, committed to gomp-5_0-branch. 2018-09-27 Jakub Jelinek * omp-low.c (lower_rec_input_clauses): Fix handling of OMP_CLAUSE_REDUCTION_OMP_ORIG_REF for task array reductions. libgomp/ * testsuite/libgomp.c++/task-reduction-5.C (main): Add forgotten checks. * testsuite/libgomp.c++/task-reduction-6.C: New test. --- gcc/omp-low.c.jj2018-09-27 18:26:31.960486678 +0200 +++ gcc/omp-low.c 2018-09-27 21:13:24.946076732 +0200 @@ -4096,36 +4096,51 @@ lower_rec_input_clauses (tree clauses, g tree i2 = NULL_TREE, y2 = NULL_TREE; tree body2 = NULL_TREE, end2 = NULL_TREE; tree y3 = NULL_TREE, y4 = NULL_TREE; - if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || is_simd) + if (task_reduction_needs_orig_p) { - y2 = create_tmp_var (ptype, NULL); - gimplify_assign (y2, y, ilist); - tree ref = build_outer_var_ref (var, ctx); - /* For ref build_outer_var_ref already performs this. */ - if (TREE_CODE (d) == INDIRECT_REF) - gcc_assert (omp_is_reference (var)); - else if (TREE_CODE (d) == ADDR_EXPR) - ref = build_fold_addr_expr (ref); - else if (omp_is_reference (var)) - ref = build_fold_addr_expr (ref); - ref = fold_convert_loc (clause_loc, ptype, ref); - if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) - && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) + tree ref = build4 (ARRAY_REF, ptr_type_node, tskred_avar, +size_int (task_reduction_cnt_full + + task_reduction_cntorig - 1), +NULL_TREE, NULL_TREE); + y3 = create_tmp_var (ptype, NULL); + gimplify_assign (y3, ref, ilist); + } + else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || is_simd) + { + if (pass != 3) { - y3 = create_tmp_var (ptype, NULL); - gimplify_assign (y3, unshare_expr (ref), ilist); + y2 = create_tmp_var (ptype, NULL); + gimplify_assign (y2, y, ilist); } - if (is_simd) + if (is_simd || OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) { - y4 = create_tmp_var (ptype, NULL); - gimplify_assign (y4, ref, dlist); + tree ref = build_outer_var_ref (var, ctx); + /* For ref build_outer_var_ref already performs this. */ + if (TREE_CODE (d) == INDIRECT_REF) + gcc_assert (omp_is_reference (var)); + else if (TREE_CODE (d) == ADDR_EXPR) + ref = build_fold_addr_expr (ref); + else if (omp_is_reference (var)) + ref = build_fold_addr_expr (ref); + ref = fold_convert_loc (clause_loc, ptype, ref); + if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) + && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c)) + { + y3 = create_tmp_var (ptype, NULL); + gimplify_assign (y3, unshare_expr (ref), ilist); + } + if (is_simd) + { + y4 = create_tmp_var (ptype, NULL); + gimplify_assign (y4, ref, dlist); + } } } tree i = create_tmp_var (TREE_TYPE (v), NULL); gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), ilist); tree body = create_artificial_label (UNKNOWN_LOCATION); gimple_seq_add_stmt (ilist, gimple_build_label (body)); - if (y2 && pass != 3) + if (y2) { i2 = create_tmp_var (TREE_TYPE (v), NULL); gimplify_assign (i2, build_int_cst (TREE_TYPE (v), 0), dlist); @@ -4170,7 +4185,7 @@ lower_rec_input_clauses (tree clauses, g } DECL_HAS_VALUE_EXPR_P (placeholder) = 0; DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 0; - if (pass != 3) + if (y2) { x = lang_hooks.decls.omp_clause_dtor (c, build_simple_mem_ref (y2)); @@ -4218,7 +4233,7 @@ lower_rec_input_clauses (tree clauses, g g = gimple_bui
[PATCH] rs6000: Delete many HAVE_AS_* (PR87149)
This deletes most HAVE_AS_* that determine if the assembler supports some ISA level (and also HAVE_AS_MFPGPR and HAVE_AS_DFP). These are not useful: we will only generate an instruction that requires some newer ISA if the user specifically asked for it (with -mcpu=, say). If the assembler cannot handle that, it is fine if it gives an error. They also hurt: it increases the number of possible situations that all need handling and all need testing. We do not handle all cases, and obviously do not test all either. This patch removes: HAVE_AS_POPCNTB (power5, 2.02) HAVE_AS_FPRND (power5+, 2.04) HAVE_AS_CMPB(power6, 2.05) HAVE_AS_POPCNTD (power7, 2.06) HAVE_AS_POWER8 (power8, 2.07) HAVE_AS_POWER9 (power9, 3.0) HAVE_AS_DFP (power6, 2.05, server) HAVE_AS_MFPGPR (power6x but not later, not arch) Later patches will tie up some loose ends (LWSYNC, DCI, MFCRF, specs) that are easier handled separately for various reasons. This also adjusts the powerpcspe port because the configuration parts are still shared (none of it applies to any cpu supported by the powerpcspe port). Andrew, I hope you don't mind. Tested on powerpc-linux, powerpc64-linux, powerpc64le-linux, powerpc-aix. Committing. Segher 2018-09-27 Segher Boessenkool PR target/87149 * config.in (HAVE_AS_CMPB, HAVE_AS_DFP, HAVE_AS_FPRND, HAVE_AS_MFPGPR, HAVE_AS_POPCNTB, HAVE_AS_POPCNTD, HAVE_AS_POWER8, HAVE_AS_POWER9): Delete, always treat as true. * config/powerpcspe/powerpcspe.c (rs6000_option_override_internal): Ditto. Simplify remaining code. * config/powerpcspe/powerpcspe.h: Ditto. * config/rs6000/rs6000.c (rs6000_option_override_internal): Ditto. Simplify remaining code. (rs6000_expand_builtin): Ditto. * config/rs6000/rs6000.h: Ditto. * configure.ac: Ditto. * configure: Regenerate. --- gcc/config.in | 48 - gcc/config/powerpcspe/powerpcspe.c | 61 --- gcc/config/powerpcspe/powerpcspe.h | 108 --- gcc/config/rs6000/rs6000.c | 44 - gcc/config/rs6000/rs6000.h | 104 --- gcc/configure | 356 - gcc/configure.ac | 116 7 files changed, 837 deletions(-) diff --git a/gcc/config.in b/gcc/config.in index 775536e..48dfd23 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -302,12 +302,6 @@ #endif -/* Define if your assembler supports cmpb. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_CMPB -#endif - - /* Define to the level of your assembler's compressed debug section support. */ #ifndef USED_FOR_TARGET @@ -327,12 +321,6 @@ #endif -/* Define if your assembler supports DFP instructions. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_DFP -#endif - - /* Define if your assembler supports .module. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_DOT_MODULE @@ -382,12 +370,6 @@ #endif -/* Define if your assembler supports fprnd. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_FPRND -#endif - - /* Define if your assembler supports the --gdwarf2 option. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_GDWARF2_DEBUG_FLAG @@ -570,12 +552,6 @@ #endif -/* Define if your assembler supports mffgpr and mftgpr. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_MFPGPR -#endif - - /* Define if your Mac OS X assembler supports the -mmacos-version-min option. */ #ifndef USED_FOR_TARGET @@ -607,30 +583,6 @@ #endif -/* Define if your assembler supports popcntb field. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_POPCNTB -#endif - - -/* Define if your assembler supports POPCNTD instructions. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_POPCNTD -#endif - - -/* Define if your assembler supports POWER8 instructions. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_POWER8 -#endif - - -/* Define if your assembler supports POWER9 instructions. */ -#ifndef USED_FOR_TARGET -#undef HAVE_AS_POWER9 -#endif - - /* Define if your assembler supports .ref */ #ifndef USED_FOR_TARGET #undef HAVE_AS_REF diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c index 325b6ad..90170c3 100644 --- a/gcc/config/powerpcspe/powerpcspe.c +++ b/gcc/config/powerpcspe/powerpcspe.c @@ -4196,67 +4196,6 @@ rs6000_option_override_internal (bool global_init_p) gcc_assert (cpu_index >= 0); - if (have_cpu) -{ -#ifndef HAVE_AS_POWER9 - if (processor_target_table[rs6000_cpu_index].processor - == PROCESSOR_POWER9) - { - have_cpu = false; - warning (0, "will not generate power9 instructions because " - "assembler lacks power9 support"); - } -#endif -#ifndef HAVE_AS_POWER8 - if (processor_target_table[rs6000_cpu_index].processor - == PROCESSOR_POWER8) - { - have_cpu = false; - warning (0, "will not generate power8 instructions because " - "assembler lacks power8 support"); -
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Thu, Sep 27, 2018 at 12:24 PM, Florian Weimer wrote: > * H. J. Lu: > >> +@item zero_caller_saved_regs("@var{choice}") >> +@cindex @code{zero_caller_saved_regs} function attribute, x86 >> +On x86 targets, the @code{zero_caller_saved_regs} attribute causes the >> +compiler to zero caller-saved integer registers at function return with >> +@var{choice}. @samp{skip} doesn't zero caller-saved integer registers. >> +@samp{used} zeros caller-saved integer registers which are used in >> +function. @samp{all} zeros all caller-saved integer registers. > > Perhaps “according to @var{choice}:”. And say that the default for the > attribute is controlled by @option{-mzero-caller-saved-regs}? Sure. > (Maybe “skip” should be none?) I have no strong opinion here. > I assume we can check for this use __has_attribute? We would use this Yes. > in the implementation of explicit_bzero in glibc. Good to know. >> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi >> index 7ef4e7a449b..796477452d5 100644 >> --- a/gcc/doc/invoke.texi >> +++ b/gcc/doc/invoke.texi > >> @@ -28459,6 +28459,16 @@ not be reachable in the large code model. >> @opindex -mindirect-branch-register >> Force indirect call and jump via register. > >> +@item -mzero-caller-saved-regs=@var{choice} >> +@opindex -mzero-caller-saved-regs >> +Zero caller-saved integer registers at function return with @var{choice}. >> +The default is @samp{skip}, which doesn't zero caller-saved integer >> +registers. @samp{used} zeros caller-saved integer registers which are >> +used in function. @samp{all} zeros all caller-saved integer registers. >> +You can control this behavior for a specific function by using the >> +function attribute @code{zero_caller_saved_regs}. >> +@xref{Function Attributes}. > > See above regarding “with @var{choice}”. I will update it. Thanks. -- H.J.
Re: RFC: [PATCH] x86: Add -mzero-caller-saved-regs=[skip|used|all]
On Thu, Sep 27, 2018 at 7:01 AM, Rich Felker wrote: > On Wed, Sep 26, 2018 at 11:10:29AM -0700, H.J. Lu wrote: >> Add -mzero-caller-saved-regs=[skip|used|all] command-line option and >> zero_caller_saved_regs("skip|used|all") function attribue: > > Minor nit, but could this be named -mzero-call-clobbered-regs? > "Caller-saved" is a misnomer and inconsistent with other gcc usage. > For example -fcall-used-[reg] documents it as "an allocable register > that is clobbered by function calls". This is a piece of terminology > I've been trying to get fixed in educational materials so it would be > nice to avoid a new instance of it in gcc. I picked caller-saved since this terminology is used in x86 psABIs. I have no strong opinion here. We can decide on the name later after we decide that we want this feature in GCC. -- H.J.
[PATCH, i386]: Remove fpcr register definition
This is actually an unused register, there is no point to define it. 2018-09-27 Uros Bizjak * config/i386/i386.md (FPCR_REG): Remove. (x86_fnstcw_1): Use (const_int 0) instead of FPCR_REG. (x86_fldcw_1): Remove insn pattern. (fnstenv): Do not clobber FPCR_REG. (fldenv): Ditto. * config/i386/i386.h (FIXED_REGISTERS) Remove fpsr register. (CALL_USED_REGISTERS): Ditto. (REG_ALLOC_ORDER): Ditto. (REG_CLASS_CONTENTS): Ditto. (HI_REGISTER_NAMES): Ditto. (ADDITIONAL_REGISTER_NAMES): Use defines instead of numerical constants. * config/i386/i386.c (regclass_map): Remove fpsr register. (dbx_register_map): Ditto. (dbx64_register_map): Ditto. (svr4_dbx_register_map): Ditto. (print_reg): Do not handle FPCR_REG. testsuite/ChangeLog: 2018-09-27 Uros Bizjak * gcc.dg/rtl/x86_64/dfinit.c: Update scan-rtl-dump string. * gcc.dg/rtl/x86_64/times-two.c.before-df.c: Ditto. * gcc.target/i386/pr79804.c (foo): Use register "19", not "20". Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros. Index: config/i386/i386.c === --- config/i386/i386.c (revision 264672) +++ config/i386/i386.c (working copy) @@ -241,10 +241,8 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REG /* FP registers */ FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, - /* arg pointer */ - NON_Q_REGS, - /* flags, fpsr, fpcr, frame */ - NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, + /* arg pointer, flags, fpsr, frame */ + NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS, /* SSE registers */ SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, @@ -273,7 +271,7 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] { 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ - -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ + -1, -1, -1, -1, /* arg, flags, fpsr, frame */ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ @@ -289,7 +287,7 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER { 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ - -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ + -1, -1, -1, -1, /* arg, flags, fpsr, frame */ 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 8,9,10,11,12,13,14,15, /* extended integer registers */ @@ -357,7 +355,7 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGIS { 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ - -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ + -1, 9, -1, -1, /* arg, flags, fpsr, frame */ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ @@ -17372,8 +17370,7 @@ print_reg (rtx x, int code, FILE *file) if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM - || regno == FPSR_REG - || regno == FPCR_REG) + || regno == FPSR_REG) { output_operand_lossage ("invalid use of register '%s'", reg_names[regno]); Index: config/i386/i386.h === --- config/i386/i386.h (revision 264672) +++ config/i386/i386.h (working copy) @@ -1026,8 +1026,8 @@ extern const char *host_detect_local_cpu (int argc #define FIXED_REGISTERS\ /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \ { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, \ -/*arg,flags,fpsr,fpcr,frame*/ \ -1,1, 1, 1,1, \ +/*arg,flags,fpsr,frame*/ \ +1,1, 1,1,\ /*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/\ 0, 0, 0, 0, 0, 0, 0, 0, \ /* mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7*/\ @@ -1063,8 +1063,8 @@ extern const char *host_detect_local_cpu (int argc #define CALL_USED_REGISTERS\ /*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \ { 1, 1, 1, 0, 4, 4, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ -/*arg,flags,fpsr,fpcr,frame*/ \ -1, 1,1, 1,1, \ +/*a
Re: libgo patch committed: Update to 1.11 release
On Wed, Sep 26, 2018 at 5:18 PM, Ian Lance Taylor wrote: > On Wed, Sep 26, 2018 at 12:57 AM, Uros Bizjak wrote: >>> I've committed a patch to update libgo to the 1.11 release. As usual >>> for these updates, the patch is too large to attach to this e-mail >>> message. I've attached some of the more relevant directories. This >>> update required some minor patches to the gotools directory and the Go >>> testsuite, also included here. Bootstrapped and ran Go testsuite on >>> x86_64-pc-linux-gnu. Committed to mainline. >> >> Fails to build on alpha-linux-gnu: >> >> /space/homedirs/uros/gcc-svn/trunk/libgo/go/syscall/setuidgid_linux.go:11:16: >> error: reference to undefined name ‘SYS_GETEUID’ >> 11 | sys_GETEUID = SYS_GETEUID >>|^ >> >> This is because /usr/include/asm/unistd.h says: >> >> /* >> * Ignore legacy syscalls that we don't use. >> */ >> #define __IGNORE_alarm >> #define __IGNORE_creat >> #define __IGNORE_getegid >> #define __IGNORE_geteuid >> #define __IGNORE_getgid >> #define __IGNORE_getpid >> #define __IGNORE_getppid >> #define __IGNORE_getuid >> #define __IGNORE_pause >> #define __IGNORE_time >> #define __IGNORE_utime >> #define __IGNORE_umount2 >> >> These legacy syscalls are undefined for alpha-linux-gnu. > > Thanks for the report. This is only used for testing. I've committed > this patch, which I think should fix the problem. Thanks, with your patch, I was able to build the library. Uros.
Re: [Ada] Introduce -gnatd_A to set Opt.Disable_ALI_File
On Sep 26, 2018, Pierre-Marie de Rodat wrote: > This will allow us to remove the import of flag_compare_debug in > lib-writ.adb in a second stage. And here's the second stage. Tested on x86_64-linux-gnu. Ok to install? [Ada] use -gnatd_A to disable .ali on -fcompare-debug recompile From: Alexandre Oliva for gcc/ada/ChangeLog * gcc-interface/lang-specs.h (default_compilers): When given fcompare-debug-second, adjust auxbase like cc1, and pass gnatd_A. * gcc-interface/misc.c (flag_compare_debug): Remove variable. (gnat_post_options): Do not set it. * lib-writ.adb (flag_compare_debug): Remove import. (Write_ALI): Do not test it. --- gcc/ada/gcc-interface/lang-specs.h |3 ++- gcc/ada/gcc-interface/misc.c |4 gcc/ada/lib-writ.adb |8 +--- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/gcc/ada/gcc-interface/lang-specs.h b/gcc/ada/gcc-interface/lang-specs.h index eece809343d13..c7bf32c03905d 100644 --- a/gcc/ada/gcc-interface/lang-specs.h +++ b/gcc/ada/gcc-interface/lang-specs.h @@ -35,7 +35,8 @@ gnat1 %{I*} %{k8:-gnatk8} %{Wall:-gnatwa} %{w:-gnatws} %{!Q:-quiet}\ %{nostdinc*} %{nostdlib*}\ -dumpbase %{.adb:%b.adb}%{.ads:%b.ads}%{!.adb:%{!.ads:%b.ada}}\ -%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}%{!c:%{!S:-auxbase %b}} \ +%{fcompare-debug-second:%:compare-debug-auxbase-opt(%b) -gnatd_A} \ +%{!fcompare-debug-second:%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}%{!c:%{!S:-auxbase %b}}} \ %{O*} %{W*} %{w} %{p} %{pg:-p} %{d*} \ %{coverage:-fprofile-arcs -ftest-coverage} " #if defined(TARGET_VXWORKS_RTP) diff --git a/gcc/ada/gcc-interface/misc.c b/gcc/ada/gcc-interface/misc.c index 16a9d2ccf8e0a..98ec2778d56d7 100644 --- a/gcc/ada/gcc-interface/misc.c +++ b/gcc/ada/gcc-interface/misc.c @@ -77,9 +77,6 @@ int optimize; #undef optimize_size int optimize_size; -#undef flag_compare_debug -int flag_compare_debug; - #undef flag_short_enums int flag_short_enums; @@ -284,7 +281,6 @@ gnat_post_options (const char **pfilename ATTRIBUTE_UNUSED) gnat_encodings = global_options.x_gnat_encodings; optimize = global_options.x_optimize; optimize_size = global_options.x_optimize_size; - flag_compare_debug = global_options.x_flag_compare_debug; flag_stack_check = global_options.x_flag_stack_check; flag_short_enums = global_options.x_flag_short_enums; diff --git a/gcc/ada/lib-writ.adb b/gcc/ada/lib-writ.adb index f035b45e91331..ce35c9e8fa58c 100644 --- a/gcc/ada/lib-writ.adb +++ b/gcc/ada/lib-writ.adb @@ -226,10 +226,6 @@ package body Lib.Writ is Num_Sdep : Nat := 0; -- Number of active entries in Sdep_Table - flag_compare_debug : Int; - pragma Import (C, flag_compare_debug); - -- Import from toplev.c - --- -- Local Subprograms -- --- @@ -1079,9 +1075,7 @@ package body Lib.Writ is -- We never write an ALI file if the original operating mode was -- syntax-only (-gnats switch used in compiler invocation line) - if Original_Operating_Mode = Check_Syntax -or flag_compare_debug /= 0 - then + if Original_Operating_Mode = Check_Syntax then return; end if; -- Alexandre Oliva, freedom fighter https://FSFLA.org/blogs/lxo Be the change, be Free! FSF Latin America board member GNU Toolchain EngineerFree Software Evangelist
Re: [PATCH 11/25] Simplify vec_merge according to the mask.
On 27/09/18 17:19, Richard Sandiford wrote: But we wouldn't recurse for PRE_INC, MEM or ASM_OPERANDS, since they have the wrong rtx class. AFAICT no current unary, binary or ternary operator has that level of side-effect (and that's a good thing). OK, in that case I'll remove it and we can cross that bridge if we come to it. This patch should also address your other concerns. OK? Andrew Simplify vec_merge according to the mask. This patch was part of the original patch we acquired from Honza and Martin. It simplifies nested vec_merge operations using the same mask. Self-tests are included. 2018-09-27 Andrew Stubbs Jan Hubicka Martin Jambor * simplify-rtx.c (simplify_merge_mask): New function. (simplify_ternary_operation): Use it, also see if VEC_MERGEs with the same masks are used in op1 or op2. (test_vec_merge): New function. (test_vector_ops): Call test_vec_merge. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index b4c6883..9bc5386 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5578,6 +5578,68 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) return NULL_RTX; } +/* Try to simplify X given that it appears within operand OP of a + VEC_MERGE operation whose mask is MASK. X need not use the same + vector mode as the VEC_MERGE, but it must have the same number of + elements. + + Return the simplified X on success, otherwise return NULL_RTX. */ + +rtx +simplify_merge_mask (rtx x, rtx mask, int op) +{ + gcc_assert (VECTOR_MODE_P (GET_MODE (x))); + poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x)); + if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask)) +{ + if (side_effects_p (XEXP (x, 1 - op))) + return NULL_RTX; + + return XEXP (x, op); +} + if (UNARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)) +{ + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + if (top0) + return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0, + GET_MODE (XEXP (x, 0))); +} + if (BINARY_P (x) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)) +{ + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + if (top0 || top1) + return simplify_gen_binary (GET_CODE (x), GET_MODE (x), +top0 ? top0 : XEXP (x, 0), +top1 ? top1 : XEXP (x, 1)); +} + if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY + && VECTOR_MODE_P (GET_MODE (XEXP (x, 0))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 1))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits) + && VECTOR_MODE_P (GET_MODE (XEXP (x, 2))) + && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits)) +{ + rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op); + rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op); + rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op); + if (top0 || top1 || top2) + return simplify_gen_ternary (GET_CODE (x), GET_MODE (x), + GET_MODE (XEXP (x, 0)), + top0 ? top0 : XEXP (x, 0), + top1 ? top1 : XEXP (x, 1), + top2 ? top2 : XEXP (x, 2)); +} + return NULL_RTX; +} + /* Simplify CODE, an operation with result mode MODE and three operands, OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became @@ -5967,6 +6029,16 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, && !side_effects_p (op2) && !side_effects_p (op1)) return op0; + if (!side_effects_p (op2)) + { + rtx top0 = simplify_merge_mask (op0, op2, 0); + rtx top1 = simplify_merge_mask (op1, op2, 1); + if (top0 || top1) + return simplify_gen_ternary (code, mode, mode, + top0 ? top0 : op0, + top1 ? top1 : op1, op2); + } + break; default: @@ -6856,6 +6928,69 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) constm1_rtx)); } +/* Verify simplify_merge_mask works correctly. */ + +static void +test_vec_merge (machine_mode mode) +{ + rtx op0 = make_test_reg (mode); + rtx op1 = make_test_reg (mode); + rtx op2 = make_test_reg (mode); + rtx op3 = make_test_reg (mode); + rtx op4 = make_test_reg (mode); + rtx op5 = make_test_reg (mode); + rtx mask1 = make_test_reg (SImode); + rtx mask2 = make_test_reg (SImode); + rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1); + rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1); + rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1); + + /* Simple vec_merge. */ + ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0)); + ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1)); +
Re: [PATCH 15/25] Don't double-count early-clobber matches.
On 17/09/18 10:18, Richard Sandiford wrote: The idea looks good to me FWIW, but you can't use curr_static_id for the state, since that's a static description of the .md pattern rather than data about this particular instance. I clearly misunderstood what that was for. This patch does the same thing, but uses a local variable to store the state. That probably means it does it more correctly, too. OK? Andrew Don't double-count early-clobber matches. Given a pattern with a number of operands: (match_operand 0 "" "=&v") (match_operand 1 "" " v0") (match_operand 2 "" " v0") (match_operand 3 "" " v0") GCC will currently increment "reject" once, for operand 0, and then decrement it once for each of the other operands, ending with reject == -2 and an assertion failure. If there's a conflict then it might try to decrement reject yet again. Incidentally, what these patterns are trying to achieve is an allocation in which operand 0 may match one of the other operands, but may not partially overlap any of them. Ideally there'd be a better way to do this. In any case, it will affect any pattern in which multiple operands may (or must) match an early-clobber operand. The patch only allows a reject-- when one has not already occurred, for that operand. 2018-09-27 Andrew Stubbs gcc/ * lra-constraints.c (process_alt_operands): Check matching_early_clobber before decrementing reject, and set matching_early_clobber after. * lra-int.h (struct lra_operand_data): Add matching_early_clobber. * lra.c (setup_operand_alternative): Initialize matching_early_clobber. diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 774d1ff..e1d1688 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1969,6 +1969,7 @@ process_alt_operands (int only_alternative) if (!TEST_BIT (preferred, nalt)) continue; + bool matching_early_clobber[MAX_RECOG_OPERANDS] = {}; curr_small_class_check++; overall = losers = addr_losers = 0; static_reject = reject = reload_nregs = reload_sum = 0; @@ -2175,7 +2176,11 @@ process_alt_operands (int only_alternative) "%d Matching earlyclobber alt:" " reject--\n", nop); - reject--; + if (!matching_early_clobber[m]) + { +reject--; +matching_early_clobber[m] = 1; + } } /* Otherwise we prefer no matching alternatives because it gives more freedom @@ -2921,15 +2926,11 @@ process_alt_operands (int only_alternative) curr_alt_dont_inherit_ops[curr_alt_dont_inherit_ops_num++] = last_conflict_j; losers++; - /* Early clobber was already reflected in REJECT. */ - lra_assert (reject > 0); if (lra_dump_file != NULL) fprintf (lra_dump_file, "%d Conflict early clobber reload: reject--\n", i); - reject--; - overall += LRA_LOSER_COST_FACTOR - 1; } else { @@ -2953,17 +2954,21 @@ process_alt_operands (int only_alternative) } curr_alt_win[i] = curr_alt_match_win[i] = false; losers++; - /* Early clobber was already reflected in REJECT. */ - lra_assert (reject > 0); if (lra_dump_file != NULL) fprintf (lra_dump_file, "%d Matched conflict early clobber reloads: " "reject--\n", i); + } + /* Early clobber was already reflected in REJECT. */ + if (!matching_early_clobber[i]) + { + lra_assert (reject > 0); reject--; - overall += LRA_LOSER_COST_FACTOR - 1; + matching_early_clobber[i] = 1; } + overall += LRA_LOSER_COST_FACTOR - 1; } if (lra_dump_file != NULL) fprintf (lra_dump_file, " alt=%d,overall=%d,losers=%d,rld_nregs=%d\n",
[PATCH] rs6000: Fix one more vec_splat case
This fixed the one remaining case where the stricter vec_splat checking complains in the testsuite. Committing. 2018-09-27 Segher Boessenkool gcc/testsuite/ * g++.dg/ext/altivec-6.C: Change the vec_splat second argument to a valid value, in the "vector bool int" case. --- gcc/testsuite/g++.dg/ext/altivec-6.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/ext/altivec-6.C b/gcc/testsuite/g++.dg/ext/altivec-6.C index 4c863ef..d17fd90 100644 --- a/gcc/testsuite/g++.dg/ext/altivec-6.C +++ b/gcc/testsuite/g++.dg/ext/altivec-6.C @@ -26,5 +26,5 @@ void foo(void) { elements in the referenced vector. */ vbs = vec_splat(vbs, 4); vp = vec_splat(vp, 1); - vbi = vec_splat(vbi, 15); + vbi = vec_splat(vbi, 3); } -- 1.8.3.1
Re: [PATCH] declare tree_to_shwi et al. nonnull and pure
On 09/27/2018 12:25 PM, Jeff Law wrote: On 9/26/18 5:54 PM, Martin Sebor wrote: The attached patch adds attributes nonnull and pure to tree_to_shwi() and a small number of other heavily used functions that will benefit from both. First, I noticed that there are a bunch of functions in tree.c that deal gracefully with null pointers right alongside a bunch of other related functions that don't. For example, tree_fits_shwi_p() is null-safe but integer_zerop() is not. There a number of others. I never remember which ones are in which group and so I either add unnecessary checks or forget to add them, for which we then all pay when the missing check triggers an ICE. In patch reviews I see I'm not the only one who's not sure. The attribute should help avoid some of these problems: both visually and via warnings. Second, while testing the nonnull patch, I noticed that GCC would not optimize some null tests after calls to nonnull functions that take tree as an argument and that I expected it to optimize, like n = tree_to_shwi (TYPE_SIZE (type)); if (TYPE_SIZE (type)) ... The reason is that tree_to_shwi() isn't declared pure, even though tree_fits_shwi_p() is (though as I mentioned, the latter is null safe and so not declarted nonnull, and so it doesn't make the same optimization possible). Tested on x86_64-linux. The patch exposed a couple of issues in Ada. At least the first one is a false positive caused by GCC being unaware that tree_fits_uhwi_p() returns false for a null argument (propagating this knowledge via an attribute seems like an opportunity for a future enhancement). I suppressed the warning by introducing a local temporary. I suspect the second warning is caused by the Ada TYPE_RM_SIZE() macro expanding to a conditional with an explicit null operand: #define TYPE_RM_SIZE(NODE) TYPE_RM_VALUE ((NODE), 0) #define TYPE_RM_VALUE(NODE, N) \ (TYPE_RM_VALUES (NODE)\ ? TREE_VEC_ELT (TYPE_RM_VALUES (NODE), (N)) : NULL_TREE) but I wasn't able to reduce it to a small test case to confirm that. I suppressed this warning by introducing a temporary as well. Martin gcc-tree-nonnull.diff gcc/ChangeLog: * tree.h (tree_to_shwi): Add attribute nonnull. (tree_to_poly_int64, tree_to_uhwi, tree_to_poly_uint64): Same. (integer_zerop, integer_onep, int_fits_type_p): Same. gcc/ada/ChangeLog: * gcc-interface/utils.c (make_packable_type): Introduce a temporary to avoid -Wnonnull. (unchecked_convert): Same. No doubt we have not been diligent about marking non-null, const, pure, etc over time. I thought we had warnings for functions which are const/pure but not suitably marked. Can you peek a bit at why those didn't trigger. See ipa-pure-const.c. Ignore the initial comment -- it applies to both functions and data. The -Wsuggest-attribute=const|pure warnings (or any others) are not enabled by default and GCC doesn't explicitly enable them. Maybe it should? FWIW, I tried add the options as a test in a bootstrap but no matter what make variables I set I couldn't figure out how to add them to the GCC command line, or find where it's documented. Do you have any idea how to do that? ISTM we could probably build a missing non-null attribute warning. If a NULL pointer argument unconditionally leads to an assert, then the function should be marked. Similarly if a pointer argument is unconditionally dereferenced, then it should be marked. I strongly suspect this would be too noisy to enable by default. Yes, that would be useful but I'm sure you're right that it would also be noisy with most code. ANyway, the patch is OK for the trunk. Committed in r264680. Martin
Re: C++ PATCH to implement P1064R0, Virtual Function Calls in Constant Expressions (v4)
On Thu, Sep 27, 2018 at 01:15:46AM -0400, Jason Merrill wrote: > On Thu, Sep 20, 2018 at 5:20 AM, Jakub Jelinek wrote: > > On Thu, Sep 20, 2018 at 09:12:53AM +0200, Andreas Schwab wrote: > >> On Sep 19 2018, Jason Merrill wrote: > >> > >> > Andreas, do the new testcases pass? That would surprise me, but OK if > >> > so. > >> > >> No, they don't. > >> > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:29:26: > >> error: non-constant condition for static assertion > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:29:23: > >> error: expression '((& X2::_ZTV2X2) + 16)' does not designate a > >> 'constexpr' function > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:33:26: > >> error: non-constant condition for static assertion > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:33:23: > >> error: expression '((& X2::_ZTV2X2) + 16)' does not designate a > >> 'constexpr' function > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:37:27: > >> error: non-constant condition for static assertion > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:37:24: > >> error: expression '((& X2::_ZTV2X2) + 16)' does not designate a > >> 'constexpr' function > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:41:26: > >> error: non-constant condition for static assertion > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:41:23: > >> error: expression '((& X4::_ZTV2X4) + 16)' does not designate a > >> 'constexpr' function > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:45:26: > >> error: non-constant condition for static assertion > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:45:23: > >> error: expression '((& X4::_ZTV2X4) + 16)' does not designate a > >> 'constexpr' function > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:49:27: > >> error: non-constant condition for static assertion > >> /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:49:24: > >> error: expression '((& X4::_ZTV2X4) + 16)' does not designate a > >> 'constexpr' function > >> compiler exited with status 1 > >> FAIL: g++.dg/cpp2a/constexpr-virtual2.C (test for excess errors) > > > > I think the primary problem here is: > > /* When using function descriptors, the address of the > > vtable entry is treated as a function pointer. */ > > if (TARGET_VTABLE_USES_DESCRIPTORS) > > e2 = build1 (NOP_EXPR, TREE_TYPE (e2), > > cp_build_addr_expr (e2, complain)); > > in typeck.c, on non-descriptor targets we have an INDIRECT_REF where we > > read the vtable function pointer. On ia64, the above optimizes the > > INDIRECT_REF away, so what the cxx_eval_call_expression actually gets > > after constexpr evaluating the CALL_FN is not ADDR_EXPR of a function, > > but the address of the function descriptor (e.g. &_ZTV2X2 + 16 ). > > > > So, perhaps in cxx_eval_call_expression we need: > >if (TREE_CODE (fun) == ADDR_EXPR) > > fun = TREE_OPERAND (fun, 0); > > + else if (TARGET_VTABLE_USES_DESCRIPTORS > > + && TREE_CODE (fun) == POINTER_PLUS_EXPR > > + && ...) > > where we verify that p+ first argument is ADDR_EXPR of a virtual table, > > second arg is INTEGER_CST and just walk the DECL_INITIAL of that, finding > > the FDESC_EXPR at the right offset (therefore, I believe you need following > > rather than the patch you've posted, so that you can actually find it) and > > finally pick the function from the FDESC_EXPR entry. > > Makes me wonder what happens with indirect calls in constexpr evaluation, > > e.g. if I do: > > constexpr int bar () { return 42; } > > constexpr int foo () { int (*fn) () = bar; return fn (); } > > static_assert (foo () == 42); > > but apparently this works. > > > > --- gcc/cp/class.c.jj 2018-09-20 09:56:59.229751895 +0200 > > +++ gcc/cp/class.c 2018-09-20 10:12:17.447370890 +0200 > > @@ -9266,7 +9266,6 @@ build_vtbl_initializer (tree binfo, > >tree vcall_index; > >tree fn, fn_original; > >tree init = NULL_TREE; > > - tree idx = size_int (jx++); > > > >fn = BV_FN (v); > >fn_original = fn; > > @@ -9370,7 +9369,7 @@ build_vtbl_initializer (tree binfo, > > int i; > > if (init == size_zero_node) > > for (i = 0; i < TARGET_VTABLE_USES_DESCRIPTORS; ++i) > > - CONSTRUCTOR_APPEND_ELT (*inits, idx, init); > > + CONSTRUCTOR_APPEND_ELT (*inits, size_int (jx++), init); > > else > > for (i = 0; i < TARGET_VTABLE_USES_DESCRIPTORS; ++i) > > { > > @@ -9378,11 +9377,11 @@ build_vtbl_initializer (tree binfo, > > fn, build_
Re: [PATCH,rs6000] Add builtins for accessing the FPSCR
Segher: I have addressed the various formatting and other minor issues. I checked to see if the builtins worked with -msoft-float, which they didn't. I added checks for the -msoft-float into the functions: rs6000_expand_set_fpscr_rn_builtin(), rs6000_expand_set_fpscr_drn_builtin(), rs6000_expand_mtfsb_builtin() in file cc/config/rs6000/rs6000.c to exit if the option was set on the command line. Per your suggestion, I added code to use the mtfsb[0|1] instructions in define_expand rs6000_set_fpscr_rn if the argument is a constant. I verified that mtfsb[0|1] instructions are generated for constant arguments vi objdump. I have rerun the regression tests on powerpc64le-unknown-linux-gnu (Power 8 LE 64-bit mode only) powerpc64-unknown-linux-gnu (Power 8 BE 32-bit and 64-bit modes) powerpc64le-unknown-linux-gnu (Power 9 LE 64-bit mode only) With no regressions. Please let me know if the patch looks OK for trunk. Thanks for your help on this patch. Carl Love --- gcc/ChangeLog: 2018-09-27 Carl Love * config/rs6000/rs6000-builtin.def (__builtin_mffsl): New. (__builtin_mtfsb0): New. (__builtin_mtfsb1): New. ( __builtin_set_fpscr_rn): New. (__builtin_set_fpscr_drn): New. * config/rs6000/rs6000.c (rs6000_expand_mtfsb_builtin): Add. (rs6000_expand_set_fpscr_rn_builtin): Add. (rs6000_expand_set_fpscr_drn_builtin): Add. (rs6000_expand_builtin): Add case statement entries for RS6000_BUILTIN_MTFSB0, RS6000_BUILTIN_MTFSB1, RS6000_BUILTIN_SET_FPSCR_RN, RS6000_BUILTIN_SET_FPSCR_DRN, RS6000_BUILTIN_MFFSL. (rs6000_init_builtins): Add ftype initialization and def_builtin calls for __builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn, __builtin_set_fpscr_drn. * config/rs6000.md (rs6000_mtfsb0, rs6000_mtfsb1, rs6000_mffscrn, rs6000_mffscdrn): Add define_insn. (rs6000_set_fpscr_rn, rs6000_set_fpscr_drn): Add define_expand. * doc/extend.texi: Add documentation for the builtins. gcc/testsuite/ChangeLog: 2018-09-27 Carl Love * gcc.target/powerpc/test_mffsl-p9.c: New file. * gcc.target/powerpc/test_fpscr_rn_builtin.c: New file. * gcc.target/powerpc/test_fpscr_drn_builtin.c: New file. * gcc.target/powerpc/test_fpscr_rn_builtin_error.c: New file. * gcc.target/powerpc/test_fpscr_drn_builtin_error.c: New file. --- gcc/config/rs6000/rs6000-builtin.def | 24 +++ gcc/config/rs6000/rs6000.c | 168 ++ gcc/config/rs6000/rs6000.md| 176 ++- gcc/doc/extend.texi| 38 - .../gcc.target/powerpc/test_fpscr_drn_builtin.c| 116 + .../powerpc/test_fpscr_drn_builtin_error.c | 17 ++ .../gcc.target/powerpc/test_fpscr_rn_builtin.c | 190 + .../powerpc/test_fpscr_rn_builtin_error.c | 22 +++ gcc/testsuite/gcc.target/powerpc/test_mffsl.c | 34 9 files changed, 783 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl.c diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index f799681..976c36b 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2486,11 +2486,35 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb", BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) +BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf", RS6000_BTM_ALWAYS, RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, CODE_FOR_rs6000_mtfsf) +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, + CODE_FOR_rs6000_mtfsb0) + +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1, "__builtin_mtfsb1", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, + CODE_FOR_rs6000_mtfsb1) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY| RS6000_BTC_VOID, + CODE_FOR_rs6000_set_fpscr_rn
Re: [PATCH,rs6000] Add builtins for accessing the FPSCR
Hi! On Thu, Sep 27, 2018 at 04:17:57PM -0700, Carl Love wrote: > + if (icode == CODE_FOR_rs6000_mffsl > + && rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT) > +fatal_error (input_location, > + "__builtin_mffsl() not supported with -msoft-float"); Please use plain "error ()" instead. To keep whatever else here from wreaking havoc, also immediately after the error() do "return const0_rtx"? (Same for all other fatal_error, of course. fatal_error is for when the compiler needs to go down ungracefully, _now_. It is nicer to still try to continue for a little while). > + /* If the argument is a constant, check the range. Argument can only be a > + 2-bit value. Unfortunately, can't check the range of the value at > + compile time if the argument is a variable. The least significant two > + bits of the argument, regardless of type, are used to set the rounding > + mode. All other bits are ignored. */ > + if (GET_CODE (op0) == CONST_INT && !const_0_to_3_operand(op0, VOIDmode)) > +{ > + error ("Argument must be a value between 0 and 3."); > + return const0_rtx; > +} These are indented a char too many. > + if (TARGET_P9_MISC) > +{ > + rtx src_df = gen_reg_rtx (DImode); > + > + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); > + emit_insn (gen_rs6000_mffscrn (tmp_df, src_df)); > +} > + else This is easier if you write it like: if (...) { emit this; emit that; DONE; } if (...) { emit this; emit that; DONE; } etc. With that style, code that is semantically at the same level has the same indent, instead of wandering further and further to the right. > + { > + rtx tmp_rn = gen_reg_rtx (DImode); > + rtx tmp_di = gen_reg_rtx (DImode); > + > + /* Extract new RN mode from operand. */ > + emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3))); > + > + /* Insert new RN mode into FSCPR. */ > + emit_insn (gen_rs6000_mffs (tmp_df)); > + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); > + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFC))); This loses bits 0..31 (the top half of the register). Maybe use GEN_INT (-4) ? > + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); > + > + /* Need to write to field k=15. The fields are [0:15]. Hence with > + L=0, W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W). FLM is an > + 8-bit field[0:7]. Need to set the bit that corresponds to the > + value of i that you want [0:7]. */ > + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); > + emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df)); > +} :-) > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c > @@ -0,0 +1,116 @@ > +/* { dg-do run { target { powerpc*-*-* && lp64 } } } */ > +/* { dg-options "-std=c99" } */ You need to require a system that implements the DRN bits... I think you'll need the "dfp_hw" selector. (That's power6 and later, may not be so easy to test this ;-) ) > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c > @@ -0,0 +1,34 @@ > +/* { dg-do run { target { powerpc*-*-* } } } */ > +/* { dg-options "-std=c99" } */ Maybe you should do the run tests with -O2? Maybe compile tests, too, come to think of it. With those details fixed, okay for trunk. Thanks! Segher
Re: C++ PATCH to implement P1064R0, Virtual Function Calls in Constant Expressions (v4)
On Thu, Sep 27, 2018, 7:08 PM Marek Polacek wrote: > On Thu, Sep 27, 2018 at 01:15:46AM -0400, Jason Merrill wrote: > > On Thu, Sep 20, 2018 at 5:20 AM, Jakub Jelinek wrote: > > > On Thu, Sep 20, 2018 at 09:12:53AM +0200, Andreas Schwab wrote: > > >> On Sep 19 2018, Jason Merrill wrote: > > >> > > >> > Andreas, do the new testcases pass? That would surprise me, but OK > if so. > > >> > > >> No, they don't. > > >> > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:29:26: > error: non-constant condition for static assertion > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:29:23: > error: expression '((& X2::_ZTV2X2) + 16)' does not designate a 'constexpr' > function > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:33:26: > error: non-constant condition for static assertion > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:33:23: > error: expression '((& X2::_ZTV2X2) + 16)' does not designate a 'constexpr' > function > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:37:27: > error: non-constant condition for static assertion > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:37:24: > error: expression '((& X2::_ZTV2X2) + 16)' does not designate a 'constexpr' > function > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:41:26: > error: non-constant condition for static assertion > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:41:23: > error: expression '((& X4::_ZTV2X4) + 16)' does not designate a 'constexpr' > function > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:45:26: > error: non-constant condition for static assertion > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:45:23: > error: expression '((& X4::_ZTV2X4) + 16)' does not designate a 'constexpr' > function > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:49:27: > error: non-constant condition for static assertion > > >> > /usr/local/gcc/gcc-20180920/gcc/testsuite/g++.dg/cpp2a/constexpr-virtual2.C:49:24: > error: expression '((& X4::_ZTV2X4) + 16)' does not designate a 'constexpr' > function > > >> compiler exited with status 1 > > >> FAIL: g++.dg/cpp2a/constexpr-virtual2.C (test for excess errors) > > > > > > I think the primary problem here is: > > > /* When using function descriptors, the address of the > > > vtable entry is treated as a function pointer. */ > > > if (TARGET_VTABLE_USES_DESCRIPTORS) > > > e2 = build1 (NOP_EXPR, TREE_TYPE (e2), > > > cp_build_addr_expr (e2, complain)); > > > in typeck.c, on non-descriptor targets we have an INDIRECT_REF where we > > > read the vtable function pointer. On ia64, the above optimizes the > > > INDIRECT_REF away, so what the cxx_eval_call_expression actually gets > > > after constexpr evaluating the CALL_FN is not ADDR_EXPR of a function, > > > but the address of the function descriptor (e.g. &_ZTV2X2 + 16 ). > > > > > > So, perhaps in cxx_eval_call_expression we need: > > >if (TREE_CODE (fun) == ADDR_EXPR) > > > fun = TREE_OPERAND (fun, 0); > > > + else if (TARGET_VTABLE_USES_DESCRIPTORS > > > + && TREE_CODE (fun) == POINTER_PLUS_EXPR > > > + && ...) > > > where we verify that p+ first argument is ADDR_EXPR of a virtual table, > > > second arg is INTEGER_CST and just walk the DECL_INITIAL of that, > finding > > > the FDESC_EXPR at the right offset (therefore, I believe you need > following > > > rather than the patch you've posted, so that you can actually find it) > and > > > finally pick the function from the FDESC_EXPR entry. > > > Makes me wonder what happens with indirect calls in constexpr > evaluation, > > > e.g. if I do: > > > constexpr int bar () { return 42; } > > > constexpr int foo () { int (*fn) () = bar; return fn (); } > > > static_assert (foo () == 42); > > > but apparently this works. > > > > > > --- gcc/cp/class.c.jj 2018-09-20 09:56:59.229751895 +0200 > > > +++ gcc/cp/class.c 2018-09-20 10:12:17.447370890 +0200 > > > @@ -9266,7 +9266,6 @@ build_vtbl_initializer (tree binfo, > > >tree vcall_index; > > >tree fn, fn_original; > > >tree init = NULL_TREE; > > > - tree idx = size_int (jx++); > > > > > >fn = BV_FN (v); > > >fn_original = fn; > > > @@ -9370,7 +9369,7 @@ build_vtbl_initializer (tree binfo, > > > int i; > > > if (init == size_zero_node) > > > for (i = 0; i < TARGET_VTABLE_USES_DESCRIPTORS; ++i) > > > - CONSTRUCTOR_APPEND_ELT (*inits, idx, init); > > > + CONSTRUCTOR_APPEND_ELT (*inits, size_int (jx++), init); > > > else > > > for (i = 0; i < TARGET_VTABLE_USES_DE
Re: C++ PATCH to implement P1064R0, Virtual Function Calls in Constant Expressions (v4)
On Thu, Sep 27, 2018 at 07:08:41PM -0400, Marek Polacek wrote: > > This patch is OK. And your suggestion for cxx_eval_call_expression > > sounds right, too. Marek, will you follow up on that? > > Ok, I will (provided I can get a box that has TARGET_VTABLE_USES_DESCRIPTORS, > I > think ppc64 BE should be enough). AFAIK only ia64 defines this macro to non-zero, so you need to cross-compile. Given that this is constexpr stuff, all you are looking for is compile time anyway ;) Jakub
Re: [Ada] Introduce -gnatd_A to set Opt.Disable_ALI_File
>> This will allow us to remove the import of flag_compare_debug in >> lib-writ.adb in a second stage. > > And here's the second stage. Tested on x86_64-linux-gnu. Ok to install? This is OK, thanks. > [Ada] use -gnatd_A to disable .ali on -fcompare-debug recompile > > From: Alexandre Oliva > > for gcc/ada/ChangeLog > >* gcc-interface/lang-specs.h (default_compilers): When given >fcompare-debug-second, adjust auxbase like cc1, and pass >gnatd_A. >* gcc-interface/misc.c (flag_compare_debug): Remove variable. >(gnat_post_options): Do not set it. >* lib-writ.adb (flag_compare_debug): Remove import. >(Write_ALI): Do not test it. > --- > gcc/ada/gcc-interface/lang-specs.h |3 ++- > gcc/ada/gcc-interface/misc.c |4 > gcc/ada/lib-writ.adb |8 +--- > 3 files changed, 3 insertions(+), 12 deletions(-) > > diff --git a/gcc/ada/gcc-interface/lang-specs.h > b/gcc/ada/gcc-interface/lang-specs.h > index eece809343d13..c7bf32c03905d 100644 > --- a/gcc/ada/gcc-interface/lang-specs.h > +++ b/gcc/ada/gcc-interface/lang-specs.h > @@ -35,7 +35,8 @@ > gnat1 %{I*} %{k8:-gnatk8} %{Wall:-gnatwa} %{w:-gnatws} %{!Q:-quiet}\ > %{nostdinc*} %{nostdlib*}\ > -dumpbase %{.adb:%b.adb}%{.ads:%b.ads}%{!.adb:%{!.ads:%b.ada}}\ > -%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}%{!c:%{!S:-auxbase %b}} \ > +%{fcompare-debug-second:%:compare-debug-auxbase-opt(%b) -gnatd_A} \ > +%{!fcompare-debug-second:%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase > %b}}%{!c:%{!S:-auxbase %b}}} \ > %{O*} %{W*} %{w} %{p} %{pg:-p} %{d*} \ > %{coverage:-fprofile-arcs -ftest-coverage} " > #if defined(TARGET_VXWORKS_RTP) > diff --git a/gcc/ada/gcc-interface/misc.c b/gcc/ada/gcc-interface/misc.c > index 16a9d2ccf8e0a..98ec2778d56d7 100644 > --- a/gcc/ada/gcc-interface/misc.c > +++ b/gcc/ada/gcc-interface/misc.c > @@ -77,9 +77,6 @@ int optimize; > #undef optimize_size > int optimize_size; > > -#undef flag_compare_debug > -int flag_compare_debug; > - > #undef flag_short_enums > int flag_short_enums; > > @@ -284,7 +281,6 @@ gnat_post_options (const char **pfilename > ATTRIBUTE_UNUSED) > gnat_encodings = global_options.x_gnat_encodings; > optimize = global_options.x_optimize; > optimize_size = global_options.x_optimize_size; > - flag_compare_debug = global_options.x_flag_compare_debug; > flag_stack_check = global_options.x_flag_stack_check; > flag_short_enums = global_options.x_flag_short_enums; > > diff --git a/gcc/ada/lib-writ.adb b/gcc/ada/lib-writ.adb > index f035b45e91331..ce35c9e8fa58c 100644 > --- a/gcc/ada/lib-writ.adb > +++ b/gcc/ada/lib-writ.adb > @@ -226,10 +226,6 @@ package body Lib.Writ is > Num_Sdep : Nat := 0; > -- Number of active entries in Sdep_Table > > - flag_compare_debug : Int; > - pragma Import (C, flag_compare_debug); > - -- Import from toplev.c > - > --- > -- Local Subprograms -- > --- > @@ -1079,9 +1075,7 @@ package body Lib.Writ is > -- We never write an ALI file if the original operating mode was > -- syntax-only (-gnats switch used in compiler invocation line) > > - if Original_Operating_Mode = Check_Syntax > -or flag_compare_debug /= 0 > - then > + if Original_Operating_Mode = Check_Syntax then > return; > end if; > > > > -- > Alexandre Oliva, freedom fighter https://FSFLA.org/blogs/lxo > Be the change, be Free! FSF Latin America board member > GNU Toolchain EngineerFree Software Evangelist