[PATCH] Make -gcolumn-info the default
Hi! When -gcolumn-info was added back in February, it was too late in the release cycle to make it the default, but I think now is the good time to do it for GCC8. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-10-23 Jakub Jelinek * common.opt (gcolumn-info): Enable by default. * doc/invoke.texi (gcolumn-info): Document new default. * lib/scanasm.exp (dg-function-on-line): Accept optional column info. * gcc.dg/debug/dwarf2/pr53948.c: Likewise. * g++.dg/debug/dwarf2/pr77363.C: Likewise. * gcc.dg/debug/dwarf2/asm-line1.c: Add -gno-column-info to dg-options. * gcc.dg/debug/dwarf2/discriminator.c: Likewise. * g++.dg/debug/dwarf2/typedef6.C: Likewise. --- gcc/common.opt.jj 2017-10-20 16:02:58.0 +0200 +++ gcc/common.opt 2017-10-20 16:54:04.522033739 +0200 @@ -2873,7 +2873,7 @@ Common Driver JoinedOrMissing Negative(g Generate debug information in COFF format. gcolumn-info -Common Driver Var(debug_column_info,1) Init(0) +Common Driver Var(debug_column_info,1) Init(1) Record DW_AT_decl_column and DW_AT_call_column in DWARF. gdwarf --- gcc/doc/invoke.texi.jj 2017-10-20 16:22:07.0 +0200 +++ gcc/doc/invoke.texi 2017-10-20 16:54:32.940684888 +0200 @@ -7064,7 +7064,7 @@ Allow using extensions of later DWARF st @opindex gno-column-info Emit location column information into DWARF debugging information, rather than just file and line. -This option is disabled by default. +This option is enabled by default. @item -gz@r{[}=@var{type}@r{]} @opindex gz --- gcc/testsuite/lib/scanasm.exp.jj2017-10-17 17:58:16.0 +0200 +++ gcc/testsuite/lib/scanasm.exp 2017-10-21 11:52:49.955774302 +0200 @@ -484,16 +484,16 @@ proc dg-function-on-line { args } { } if { [istarget hppa*-*-*] } { - set pattern [format {\t;[^:]+:%d\n(\t[^\t]+\n)+%s:\n\t.PROC} \ + set pattern [format {\t;[^:]+:%d(:[0-9]+)?\n(\t[^\t]+\n)+%s:\n\t.PROC} \ $line $symbol] } elseif { [istarget mips*-*-*] } { - set pattern [format {\t\.loc [0-9]+ %d 0( [^\n]*)?\n(\t.cfi_startproc[^\t]*\n)*\t\.set\t(no)?mips16\n\t(\.set\t(no)?micromips\n\t)?\.ent\t%s\n\t\.type\t%s, @function\n%s:\n} \ + set pattern [format {\t\.loc [0-9]+ %d [0-9]+( [^\n]*)?\n(\t.cfi_startproc[^\t]*\n)*\t\.set\t(no)?mips16\n\t(\.set\t(no)?micromips\n\t)?\.ent\t%s\n\t\.type\t%s, @function\n%s:\n} \ $line $symbol $symbol $symbol] } elseif { [istarget microblaze*-*-*] } { -set pattern [format {:%d\n\$.*:\n\t\.ent\t%s\n\t\.type\t%s, @function\n%s:\n} \ +set pattern [format {:%d(:[0-9]+)?\n\$.*:\n\t\.ent\t%s\n\t\.type\t%s, @function\n%s:\n} \ $line $symbol $symbol $symbol] } else { - set pattern [format {%s:[^\t]*(\t.(fnstart|frame|mask|file)[^\t]*)*\t[^:]+:%d\n} \ + set pattern [format {%s:[^\t]*(\t.(fnstart|frame|mask|file)[^\t]*)*\t[^:]+:%d(:[0-9]+)?\n} \ $symbol $line] } --- gcc/testsuite/gcc.dg/debug/dwarf2/asm-line1.c.jj2014-09-25 15:02:24.0 +0200 +++ gcc/testsuite/gcc.dg/debug/dwarf2/asm-line1.c 2017-10-21 11:46:24.655510428 +0200 @@ -1,6 +1,6 @@ /* PR debug/50983 */ /* { dg-do compile { target *-*-gnu* } } */ -/* { dg-options "-O0 -gdwarf" } */ +/* { dg-options "-O0 -gdwarf -gno-column-info" } */ /* { dg-final { scan-assembler "is_stmt 1" } } */ int i; --- gcc/testsuite/gcc.dg/debug/dwarf2/discriminator.c.jj2014-09-25 15:02:24.0 +0200 +++ gcc/testsuite/gcc.dg/debug/dwarf2/discriminator.c 2017-10-21 11:47:22.029804496 +0200 @@ -1,7 +1,7 @@ /* HAVE_AS_DWARF2_DEBUG_LINE macro needs to be defined to pass the unittest. However, dg cannot access it, so we restrict to GNU targets. */ /* { dg-do compile { target *-*-gnu* } } */ -/* { dg-options "-O0 -gdwarf" } */ +/* { dg-options "-O0 -gdwarf -gno-column-info" } */ /* { dg-final { scan-assembler "loc \[0-9] 11 \[0-9]( is_stmt \[0-9])?\n" } } */ /* { dg-final { scan-assembler "loc \[0-9] 11 \[0-9]( is_stmt \[0-9])? discriminator 2\n" } } */ /* { dg-final { scan-assembler "loc \[0-9] 11 \[0-9]( is_stmt \[0-9])? discriminator 1\n" } } */ --- gcc/testsuite/gcc.dg/debug/dwarf2/pr53948.c.jj 2014-09-25 15:02:24.0 +0200 +++ gcc/testsuite/gcc.dg/debug/dwarf2/pr53948.c 2017-10-21 11:48:14.555158962 +0200 @@ -1,7 +1,7 @@ /* Test that we have line information for the line with local variable initializations. */ /* { dg-options "-O0 -gdwarf -dA" } */ -/* { dg-final { scan-assembler ".loc 1 8 0|\[#/!\]\[ \t\]+line 8" } } */ +/* { dg-final { scan-assembler ".loc 1 8 \[0-9\]|\[#/!\]\[ \t\]+line 8" } } */ int f (register int a, register int b) { --- gcc/testsuite/g++.dg/debug/dwarf2/typedef6.C.jj 2015-07-09 19:47:10.0 +0200 +++ gcc/testsuite/g++.dg/debug/dwarf2/typedef6.C2017-10-21 11:56:44.744888754 +0200 @@ -1,5 +1,5 @@ // Origin PR debu
[PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)
Hi! If all fails, when we can't prove that the PIC register is in some hard register, we delegitimize something + foo@GOTOFF as (something - _GLOBAL_OFFSET_TABLE_) + foo. That is reasonable for the middle-end to understand what is going on (it will never match in actual instructions though), unfortunately when trying to emit that into .debug_info section we run into the problem that .long _GLOBAL_OFFSET_TABLE_ etc. is not actually assembled as address of _GLOBAL_OFFSET_TABLE_, but as _GLOBAL_OFFSET_TABLE_-. (any time the assembler sees _GLOBAL_OFFSET_TABLE_ symbol by name, it adds the special relocation) and thus we get a bogus expression. I couldn't come up with a way to express this that wouldn't be even larger than what we have, but if we actually not delegitimize it at all and let it be emitted as something .byte DW_OP_addr .long foo@GOTOFF .byte DW_OP_plus then it works fine and is even shorter than what we used to emit - something .byte DW_OP_addr .long _GLOBAL_OFFSET_TABLE_ .byte DW_OP_minus .byte DW_OP_addr .long foo .byte DW_OP_plus In order to achieve that, we need to allow selected UNSPECs through into debug info, current trunk just gives up on all UNSPECs. Fortunately, we already have a hook for rejecting some constants, so by adding the rejection of all UNSPECs into the hook and on i386 overriding that hook we achieve what we want. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-10-23 Jakub Jelinek PR debug/82630 * target.def (const_not_ok_for_debug_p): Default to default_const_not_ok_for_debug_p instead of hook_bool_rtx_false. * targhooks.h (default_const_not_ok_for_debug_p): New declaration. * targhooks.c (default_const_not_ok_for_debug_p): New function. * dwarf2out.c (const_ok_for_output_1): Only reject UNSPECs for which targetm.const_not_ok_for_debug_p returned true. * config/arm/arm.c (arm_const_not_ok_for_debug_p): Return true for UNSPECs. * config/powerpcspe/powerpcspe.c (rs6000_const_not_ok_for_debug_p): Likewise. * config/rs6000/rs6000.c (rs6000_const_not_ok_for_debug_p): Likewise. * config/i386/i386.c (ix86_delegitimize_address_1): Don't delegitimize UNSPEC_GOTOFF with addend into addend - _GLOBAL_OFFSET_TABLE_ + symbol if !base_term_p. (ix86_const_not_ok_for_debug_p): New function. (i386_asm_output_addr_const_extra): Handle UNSPEC_GOTOFF. (TARGET_CONST_NOT_OK_FOR_DEBUG_P): Redefine. * g++.dg/guality/pr82630.C: New test. --- gcc/target.def.jj 2017-10-10 11:54:13.0 +0200 +++ gcc/target.def 2017-10-20 14:07:06.463135128 +0200 @@ -2822,7 +2822,7 @@ DEFHOOK "This hook should return true if @var{x} should not be emitted into\n\ debug sections.", bool, (rtx x), - hook_bool_rtx_false) + default_const_not_ok_for_debug_p) /* Given an address RTX, say whether it is valid. */ DEFHOOK --- gcc/targhooks.c.jj 2017-10-13 19:02:08.0 +0200 +++ gcc/targhooks.c 2017-10-20 14:26:07.945464025 +0200 @@ -177,6 +177,14 @@ default_legitimize_address_displacement return false; } +bool +default_const_not_ok_for_debug_p (rtx x) +{ + if (GET_CODE (x) == UNSPEC) +return true; + return false; +} + rtx default_expand_builtin_saveregs (void) { --- gcc/targhooks.h.jj 2017-10-13 19:02:08.0 +0200 +++ gcc/targhooks.h 2017-10-20 14:26:07.945464025 +0200 @@ -26,6 +26,7 @@ extern void default_external_libcall (rt extern rtx default_legitimize_address (rtx, rtx, machine_mode); extern bool default_legitimize_address_displacement (rtx *, rtx *, machine_mode); +extern bool default_const_not_ok_for_debug_p (rtx); extern int default_unspec_may_trap_p (const_rtx, unsigned); extern machine_mode default_promote_function_mode (const_tree, machine_mode, --- gcc/dwarf2out.c.jj 2017-10-19 16:18:44.0 +0200 +++ gcc/dwarf2out.c 2017-10-20 14:39:49.432647598 +0200 @@ -13740,9 +13740,17 @@ expansion_failed (tree expr, rtx rtl, ch static bool const_ok_for_output_1 (rtx rtl) { - if (GET_CODE (rtl) == UNSPEC) + if (targetm.const_not_ok_for_debug_p (rtl)) { - /* If delegitimize_address couldn't do anything with the UNSPEC, assume + if (GET_CODE (rtl) != UNSPEC) + { + expansion_failed (NULL_TREE, rtl, + "Expression rejected for debug by the backend.\n"); + return false; + } + + /* If delegitimize_address couldn't do anything with the UNSPEC, and +the target hook doesn't explicitly allow it in debug info, assume we can't express it in the debug info. */ /* Don't complain about TLS UNSPECs, those are just too hard to delegitimize. Note this could be a non-decl SYMBOL_REF such as @@ -13769,13 +13777,6 @@ const_ok_for_output_1 (rtx rtl) return false; } - if (targetm.const_not_ok_
Re: [PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)
On Mon, 23 Oct 2017, Jakub Jelinek wrote: > Hi! > > If all fails, when we can't prove that the PIC register is in some hard > register, we delegitimize something + foo@GOTOFF as (something - > _GLOBAL_OFFSET_TABLE_) + foo. That is reasonable for the middle-end to > understand what is going on (it will never match in actual instructions > though), unfortunately when trying to emit that into .debug_info section > we run into the problem that .long _GLOBAL_OFFSET_TABLE_ etc. is not > actually assembled as address of _GLOBAL_OFFSET_TABLE_, but as > _GLOBAL_OFFSET_TABLE_-. (any time the assembler sees _GLOBAL_OFFSET_TABLE_ > symbol by name, it adds the special relocation) and thus we get a bogus > expression. > > I couldn't come up with a way to express this that wouldn't be even larger > than what we have, but if we actually not delegitimize it at all and let > it be emitted as > something > .byte DW_OP_addr > .long foo@GOTOFF > .byte DW_OP_plus > then it works fine and is even shorter than what we used to emit - > something > .byte DW_OP_addr > .long _GLOBAL_OFFSET_TABLE_ > .byte DW_OP_minus > .byte DW_OP_addr > .long foo > .byte DW_OP_plus > In order to achieve that, we need to allow selected UNSPECs through > into debug info, current trunk just gives up on all UNSPECs. > > Fortunately, we already have a hook for rejecting some constants, so > by adding the rejection of all UNSPECs into the hook and on i386 overriding > that hook we achieve what we want. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-10-23 Jakub Jelinek > > PR debug/82630 > * target.def (const_not_ok_for_debug_p): Default to > default_const_not_ok_for_debug_p instead of hook_bool_rtx_false. > * targhooks.h (default_const_not_ok_for_debug_p): New declaration. > * targhooks.c (default_const_not_ok_for_debug_p): New function. > * dwarf2out.c (const_ok_for_output_1): Only reject UNSPECs for > which targetm.const_not_ok_for_debug_p returned true. > * config/arm/arm.c (arm_const_not_ok_for_debug_p): Return true > for UNSPECs. > * config/powerpcspe/powerpcspe.c (rs6000_const_not_ok_for_debug_p): > Likewise. > * config/rs6000/rs6000.c (rs6000_const_not_ok_for_debug_p): Likewise. > * config/i386/i386.c (ix86_delegitimize_address_1): Don't delegitimize > UNSPEC_GOTOFF with addend into addend - _GLOBAL_OFFSET_TABLE_ + symbol > if !base_term_p. > (ix86_const_not_ok_for_debug_p): New function. > (i386_asm_output_addr_const_extra): Handle UNSPEC_GOTOFF. > (TARGET_CONST_NOT_OK_FOR_DEBUG_P): Redefine. > > * g++.dg/guality/pr82630.C: New test. > > --- gcc/target.def.jj 2017-10-10 11:54:13.0 +0200 > +++ gcc/target.def2017-10-20 14:07:06.463135128 +0200 > @@ -2822,7 +2822,7 @@ DEFHOOK > "This hook should return true if @var{x} should not be emitted into\n\ > debug sections.", > bool, (rtx x), > - hook_bool_rtx_false) > + default_const_not_ok_for_debug_p) > > /* Given an address RTX, say whether it is valid. */ > DEFHOOK > --- gcc/targhooks.c.jj2017-10-13 19:02:08.0 +0200 > +++ gcc/targhooks.c 2017-10-20 14:26:07.945464025 +0200 > @@ -177,6 +177,14 @@ default_legitimize_address_displacement >return false; > } > > +bool > +default_const_not_ok_for_debug_p (rtx x) > +{ > + if (GET_CODE (x) == UNSPEC) What about UNSPEC_VOLATILE? > +return true; > + return false; > +} > + > rtx > default_expand_builtin_saveregs (void) > { > --- gcc/targhooks.h.jj2017-10-13 19:02:08.0 +0200 > +++ gcc/targhooks.h 2017-10-20 14:26:07.945464025 +0200 > @@ -26,6 +26,7 @@ extern void default_external_libcall (rt > extern rtx default_legitimize_address (rtx, rtx, machine_mode); > extern bool default_legitimize_address_displacement (rtx *, rtx *, >machine_mode); > +extern bool default_const_not_ok_for_debug_p (rtx); > > extern int default_unspec_may_trap_p (const_rtx, unsigned); > extern machine_mode default_promote_function_mode (const_tree, machine_mode, > --- gcc/dwarf2out.c.jj2017-10-19 16:18:44.0 +0200 > +++ gcc/dwarf2out.c 2017-10-20 14:39:49.432647598 +0200 > @@ -13740,9 +13740,17 @@ expansion_failed (tree expr, rtx rtl, ch > static bool > const_ok_for_output_1 (rtx rtl) > { > - if (GET_CODE (rtl) == UNSPEC) > + if (targetm.const_not_ok_for_debug_p (rtl)) > { > - /* If delegitimize_address couldn't do anything with the UNSPEC, assume > + if (GET_CODE (rtl) != UNSPEC) > + { > + expansion_failed (NULL_TREE, rtl, > + "Expression rejected for debug by the backend.\n"); > + return false; > + } > + > + /* If delegitimize_address couldn't do anything with the UNSPEC, and > + the target hook doesn't explicitly allow it in debug info, assume >we can't express it in the debug info.
Re: [PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)
On Mon, Oct 23, 2017 at 09:48:50AM +0200, Richard Biener wrote: > > --- gcc/targhooks.c.jj 2017-10-13 19:02:08.0 +0200 > > +++ gcc/targhooks.c 2017-10-20 14:26:07.945464025 +0200 > > @@ -177,6 +177,14 @@ default_legitimize_address_displacement > >return false; > > } > > > > +bool > > +default_const_not_ok_for_debug_p (rtx x) > > +{ > > + if (GET_CODE (x) == UNSPEC) > > What about UNSPEC_VOLATILE? This hook is called on the argument of CONST or SYMBOL_REF. UNSPEC_VOLATILE can't appear inside of CONST, it wouldn't be CONST then. UNSPEC appearing outside of CONST is rejected unconditionally in mem_loc_descriptor: ... case UNSPEC: ... /* If delegitimize_address couldn't do anything with the UNSPEC, we can't express it in the debug info. This can happen e.g. with some TLS UNSPECs. */ break; and for UNSPEC_VOLATILE we just ICE, because var-tracking shouldn't let those through: default: if (flag_checking) { print_rtl (stderr, rtl); gcc_unreachable (); } break; Jakub
Re: [PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)
On Mon, 23 Oct 2017, Jakub Jelinek wrote: > On Mon, Oct 23, 2017 at 09:48:50AM +0200, Richard Biener wrote: > > > --- gcc/targhooks.c.jj2017-10-13 19:02:08.0 +0200 > > > +++ gcc/targhooks.c 2017-10-20 14:26:07.945464025 +0200 > > > @@ -177,6 +177,14 @@ default_legitimize_address_displacement > > >return false; > > > } > > > > > > +bool > > > +default_const_not_ok_for_debug_p (rtx x) > > > +{ > > > + if (GET_CODE (x) == UNSPEC) > > > > What about UNSPEC_VOLATILE? > > This hook is called on the argument of CONST or SYMBOL_REF. > UNSPEC_VOLATILE can't appear inside of CONST, it wouldn't be CONST then. > > UNSPEC appearing outside of CONST is rejected unconditionally in > mem_loc_descriptor: > ... > case UNSPEC: > ... > /* If delegitimize_address couldn't do anything with the UNSPEC, we > can't express it in the debug info. This can happen e.g. with some > TLS UNSPECs. */ > break; > and for UNSPEC_VOLATILE we just ICE, because var-tracking shouldn't let > those through: > default: > if (flag_checking) > { > print_rtl (stderr, rtl); > gcc_unreachable (); > } > break; Ok. The patch looks fine from a middle-end point of view. Thanks, Richard.
Zen tuning part 11: Fix cost model of AVX moves, unaligned moves and sse<->int moves
Hi, this patch extends processor_costs tables by unaligned moves (which is needed for vectorizer costmodel), by AVX move costs and split sse<->integer moves to two sections because AMD chips are very assymetric here (because of different length of pipelines I assume). Register move cost used to return 100 for all AVX moves, now it will behave more rationally and I also disabled code that increases costs of sse<->int moves through memory by 20 because of memory mismatch stall, at least when quantity moved fits in integer register. I think newer CPUs handle well cases where value is stored by parts but read as a whole, but I need to double check it. We may disable some of mismatch logic for those as it was made for early designs where sotre and loads was required to match in size and be aligned. I kept hack that increase sse<->int costs to be at least 8. I will look into it incrementally - it is true that SSE regs do not play well with MODES_TIEABLE macro, but I do not think artificial cost of 8 is a good way around. I also had go through the excercise of updating all the CPU tables. For RA the relative costs sort of matters only within register of given mode (i.e. it is cheaper to spill SImode register than DImode), but for vectorization we are replacing integer load/stores by vector load stores and thus costs needs to be realistics across different units. I noticed that many of other tables does not make much sense - some of this seems to be obivous bugs forgetting that move costs are relative to register move cost which is 2, so it needs to be latency*2 (if we ignore throughput as we do for now). I have added latencies according to Agner Fog's manual and chip optmization guides. Geode costs are complete guesswork. There are some inconsistencies in Agner's tables so I tried to avoid them to not biass the cost model. For unaligned moves I kept scheme of using twice of aligned move for CPUs where alignments matter and having same cost for modern CPUs where it doesn't seem to matter. I suppose we can fine-tune incrementally. For CPUs that do not support SSE/AVX I have added corresponding multiplies which at least will make GCC to behave sort-of reasonably with contradicting -march and -mtune flags. I have benchmarked the patch on CPU2000 on Zen and Core. It is spec neutral but it makes improvements on polyhedron (and followup patch to model scatter gather improves tonto of CPU2k6) Bootstrapped/regtested x86_64-linux. Honza * i386.c (dimode_scalar_chain::compute_convert_gain): Use xmm_move instead of sse_move. (sse_store_index): New function. (ix86_register_move_cost): Be more sensible about mismatch stall; model AVX moves correctly; make difference between sse->integer and integer->sse. (ix86_builtin_vectorization_cost): Model correctly aligned and unaligned moves; make difference between SSE and AVX. * i386.h (processor_costs): Remove sse_move; add xmm_move, ymm_move and zmm_move. Increase size of sse load and store tables; add unaligned load and store tables; add ssemmx_to_integer. * x86-tune-costs.h: Update all entries according to real move latencies from Agner Fog's manual and chip documentation. Index: config/i386/i386.c === --- config/i386/i386.c (revision 253982) +++ config/i386/i386.c (working copy) @@ -1601,7 +1601,7 @@ dimode_scalar_chain::compute_convert_gai rtx dst = SET_DEST (def_set); if (REG_P (src) && REG_P (dst)) - gain += COSTS_N_INSNS (2) - ix86_cost->sse_move; + gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move; else if (REG_P (src) && MEM_P (dst)) gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; else if (MEM_P (src) && REG_P (dst)) @@ -38603,6 +38603,28 @@ ix86_can_change_mode_class (machine_mode return true; } +/* Return index of MODE in the sse load/store tables. */ + +static inline int +sse_store_index (machine_mode mode) +{ + switch (GET_MODE_SIZE (mode)) + { + case 4: + return 0; + case 8: + return 1; + case 16: + return 2; + case 32: + return 3; + case 64: + return 4; + default: + return -1; + } +} + /* Return the cost of moving data of mode M between a register and memory. A value of 2 is the default; this cost is relative to those in `REGISTER_MOVE_COST'. @@ -38646,21 +38668,9 @@ inline_memory_move_cost (machine_mode mo } if (SSE_CLASS_P (regclass)) { - int index; - switch (GET_MODE_SIZE (mode)) - { - case 4: - index = 0; - break; - case 8: - index = 1; - break; - case 16: - index = 2; - break; - default: - return 100; - } + int index = sse_store_index (m
Zen tuning part 10: ix86_builtin_vectorization_cost fixes
Hi, this is patch to ix86_builtin_vectorization_cost I have comitted. Compared to earlier version it only removes now unused fields in processor_costs. The patch improves facerec on all tested targets (amdfam10, Buldozer, Zen and core), largest improvement is on Zen by about 25% (for core improvement is 5%). It also improves gromacs and lbm (on Zen and core) and makes small regression in gamess (sub 1%) and tonto (5-7%). Tonto is fixed by the scatter,gather patch I plan as a followup. There are also improvements for poyhedron (fatigue, fatigue2 6%, test_fpu 3%) There is a hack of making unaligned store twice the cost of aligned to make it similar to previous costmodel. With this and the earlier fixes to move cost tables it no longer causes regressions in testsuite with exception of gcc.target/i386/pr79683.c where costmodel now claims that vectorization is not profitable with generic (it is profitable i.e. for core) which seems corect: struct s { __INT64_TYPE__ a; __INT64_TYPE__ b; }; void test(struct s __seg_gs *x) { x->a += 1; x->b -= 1; } We model vector integer ops as more expensive then integer operations. I disabled costmodel there. The unaligned and avx costs will be fixed as followup. Bootstrapped/regtested x86_64-linux, comitted. Honza * gcc.target/i386/pr79683.c: Disable costmodel. * i386.c (ix86_builtin_vectorization_cost): Use existing rtx_cost latencies instead of having separate table; make difference between integer and float costs. * i386.h (processor_costs): Remove scalar_stmt_cost, scalar_load_cost, scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost, scalar_to_vec_cost, vec_align_load_cost, vec_unalign_load_cost, vec_store_cost. * x86-tune-costs.h: Remove entries which has been removed in procesor_costs from all tables; make cond_taken_branch_cost and cond_not_taken_branch_cost COST_N_INSNS based. Index: testsuite/gcc.target/i386/pr79683.c === --- testsuite/gcc.target/i386/pr79683.c (revision 253957) +++ testsuite/gcc.target/i386/pr79683.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -msse2" } */ +/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */ struct s { __INT64_TYPE__ a; Index: config/i386/i386.c === --- config/i386/i386.c (revision 253957) +++ config/i386/i386.c (working copy) @@ -44051,37 +44051,61 @@ static int ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int) { + bool fp = false; + machine_mode mode = TImode; + if (vectype != NULL) +{ + fp = FLOAT_TYPE_P (vectype); + mode = TYPE_MODE (vectype); +} + switch (type_of_cost) { case scalar_stmt: -return ix86_cost->scalar_stmt_cost; +return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: -return ix86_cost->scalar_load_cost; + /* load/store costs are relative to register move which is 2. Recompute + it to COSTS_N_INSNS so everything have same base. */ +return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] + : ix86_cost->int_load [2]) / 2; case scalar_store: -return ix86_cost->scalar_store_cost; +return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] + : ix86_cost->int_store [2]) / 2; case vector_stmt: -return ix86_cost->vec_stmt_cost; +return ix86_vec_cost (mode, + fp ? ix86_cost->addss : ix86_cost->sse_op, + true); case vector_load: -return ix86_cost->vec_align_load_cost; +return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2, + true); case vector_store: -return ix86_cost->vec_store_cost; +return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2, + true); case vec_to_scalar: -return ix86_cost->vec_to_scalar_cost; - case scalar_to_vec: -return ix86_cost->scalar_to_vec_cost; +return ix86_vec_cost (mode, ix86_cost->sse_op, true); + /* We should have separate costs for unaligned loads and gather/scatter. +Do that incrementally. */ case unaligned_load: - case unaligned_store: case vector_gather_load: +return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_load[2]), + true); + + case unaligned_store: case vector_scatter_store: -return ix86_cost->vec_unalign_load_cost; +return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_sto
[committed] Use scalar_mode in expand_shift_1
Since this function handles scalar and vector shifts: machine_mode scalar_mode = mode; if (VECTOR_MODE_P (mode)) scalar_mode = GET_MODE_INNER (mode); is equivalent to: scalar_mode = GET_MODE_INNER (mode); Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. Applied as obvious. Richard 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * expmed.c (expand_shift_1): Use scalar_mode for scalar_mode. Index: gcc/expmed.c === --- gcc/expmed.c2017-09-23 10:27:39.925846365 +0100 +++ gcc/expmed.c2017-10-23 10:30:47.246081163 +0100 @@ -2337,12 +2337,10 @@ expand_shift_1 (enum tree_code code, mac optab lrotate_optab = rotl_optab; optab rrotate_optab = rotr_optab; machine_mode op1_mode; - machine_mode scalar_mode = mode; + scalar_mode scalar_mode = GET_MODE_INNER (mode); int attempt; bool speed = optimize_insn_for_speed_p (); - if (VECTOR_MODE_P (mode)) -scalar_mode = GET_MODE_INNER (mode); op1 = amount; op1_mode = GET_MODE (op1);
[committed] Use scalar_int/float_mode in brig_langhook_type_for_mode
This follows on from similar changes a couple of months ago and is needed when general modes have variable size. Tested on x86_64-linux-gnu and applied as obvious. Richard 2017-10-23 Richard Sandiford gcc/brig/ * brig-lang.c (brig_langhook_type_for_mode): Use scalar_int_mode and scalar_float_mode. Index: gcc/brig/brig-lang.c === --- gcc/brig/brig-lang.c2017-10-02 09:10:56.960755788 +0100 +++ gcc/brig/brig-lang.c2017-10-23 10:33:45.740101313 +0100 @@ -278,10 +278,11 @@ brig_langhook_type_for_mode (machine_mod return NULL_TREE; } - enum mode_class mc = GET_MODE_CLASS (mode); - if (mc == MODE_FLOAT) + scalar_int_mode imode; + scalar_float_mode fmode; + if (is_int_mode (mode, &imode)) { - switch (GET_MODE_BITSIZE (mode)) + switch (GET_MODE_BITSIZE (imode)) { case 32: return float_type_node; @@ -290,15 +291,15 @@ brig_langhook_type_for_mode (machine_mod default: /* We have to check for long double in order to support i386 excess precision. */ - if (mode == TYPE_MODE (long_double_type_node)) + if (imode == TYPE_MODE (long_double_type_node)) return long_double_type_node; gcc_unreachable (); return NULL_TREE; } } - else if (mc == MODE_INT) -return brig_langhook_type_for_size(GET_MODE_BITSIZE(mode), unsignedp); + else if (is_float_mode (mode, &fmode)) +return brig_langhook_type_for_size (GET_MODE_BITSIZE (fmode), unsignedp); else { /* E.g., build_common_builtin_nodes () asks for modes/builtins
Re: [RFC] propagate malloc attribute in ipa-pure-const pass
On 14 October 2017 at 03:20, Prathamesh Kulkarni wrote: > On 7 October 2017 at 12:35, Prathamesh Kulkarni > wrote: >> On 7 October 2017 at 11:23, Jan Hubicka wrote: On 6 October 2017 at 06:04, Jan Hubicka wrote: >> Hi Honza, >> Thanks for the detailed suggestions, I have updated the patch >> accordingly. >> I have following questions on call_summary: >> 1] I added field bool is_return_callee in ipa_call_summary to track >> whether the caller possibly returns value returned by callee, which >> gets rid of return_callees_map. I assume ipa_call_summary_t::remove() >> and ipa_call_summary_t::duplicate() will already take care of handling >> late insertion/removal of cgraph nodes ? I just initialized >> is_return_callee to false in ipa_call_summary::reset and that seems to >> work. I am not sure though if I have handled it correctly. Could you >> please check that ? > > I was actually thinking to introduce separate summary for ipa-pure-const > pass, > but this seems fine to me too (for one bit definitly more effecient) > ipa_call_summary_t::duplicate copies all the fields, so indeed you > should be > safe here. > > Also it is possible for functions to be inserted late. Updating of call > summaries > is currently handled by ipa_fn_summary_t::insert >> >> 2] ipa_inline() called ipa_free_fn_summary, which made >> ipa_call_summaries unavailable during ipa-pure-const pass. I removed >> call to ipa_free_fn_summary from ipa_inline, and moved it to >> ipa_pure_const::execute(). Is that OK ? > > Seems OK to me. >> >> Patch passes bootstrap+test and lto bootstrap+test on >> x86_64-unknown-linux-gnu. >> Verfiied SPEC2k6 compiles and runs without miscompares with LTO >> enabled on aarch64-linux-gnu. >> Cross-tested on arm*-*-* and aarch64*-*-*. I will additionally test >> the patch by building chromium or firefox. >> Would it be OK to commit if it passes above validations ? >> >> Thanks, >> Prathamesh >> > >> > Thanks, >> > Honza > >> 2017-10-05 Prathamesh Kulkarni >> >> * cgraph.h (set_malloc_flag): Declare. >> * cgraph.c (set_malloc_flag_1): New function. >> (set_malloc_flag): Likewise. >> * ipa-fnsummary.h (ipa_call_summary): Add new field >> is_return_callee. >> * ipa-fnsummary.c (ipa_call_summary::reset): Set is_return_callee >> to >> false. >> (read_ipa_call_summary): Add support for reading is_return_callee. >> (write_ipa_call_summary): Stream is_return_callee. >> * ipa-inline.c (ipa_inline): Remove call to ipa_free_fn_summary. >> * ipa-pure-const.c: Add headers ssa.h, alloc-pool.h, >> symbol-summary.h, >> ipa-prop.h, ipa-fnsummary.h. >> (malloc_state_e): Define. >> (malloc_state_names): Define. >> (funct_state_d): Add field malloc_state. >> (varying_state): Set malloc_state to STATE_MALLOC_BOTTOM. >> (check_retval_uses): New function. >> (malloc_candidate_p): Likewise. >> (analyze_function): Add support for malloc attribute. >> (pure_const_write_summary): Stream malloc_state. >> (pure_const_read_summary): Add support for reading malloc_state. >> (dump_malloc_lattice): New function. >> (propagate_malloc): New function. >> (ipa_pure_const::execute): Call propagate_malloc and >> ipa_free_fn_summary. >> (pass_local_pure_const::execute): Add support for malloc >> attribute. >> * ssa-iterators.h (RETURN_FROM_IMM_USE_STMT): New macro. >> >> testsuite/ >> * gcc.dg/ipa/propmalloc-1.c: New test-case. >> * gcc.dg/ipa/propmalloc-2.c: Likewise. >> * gcc.dg/ipa/propmalloc-3.c: Likewise. >> >> diff --git a/gcc/cgraph.c b/gcc/cgraph.c >> index 3d0cefbd46b..0aad90d59ea 100644 >> --- a/gcc/cgraph.c >> +++ b/gcc/cgraph.c >> @@ -2530,6 +2530,53 @@ cgraph_node::set_nothrow_flag (bool nothrow) >>return changed; >> } >> >> +/* Worker to set malloc flag. */ > New line here I guess (it is below) >> +static void >> +set_malloc_flag_1 (cgraph_node *node, bool malloc_p, bool *changed) >> +{ >> + if (malloc_p && !DECL_IS_MALLOC (node->decl)) >> +{ >> + DECL_IS_MALLOC (node->decl) = true; >> + *changed = true; >> +} >> + >> + ipa_ref *ref; >> + FOR_EACH_ALIAS (node, ref) >> +{ >> + cgraph_node *alias = dyn_cast (ref->referring); >> + if (!malloc_p || alias->get_availability () > AVAIL_INTERPOSABLE) >> + set_malloc_flag_1 (alias, malloc_p, changed); >> +} >> +
Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing vec align tests.
Ping From: Tamar Christina Sent: Monday, October 16, 2017 11:17 AM To: Christophe Lyon Cc: Rainer Orth; gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft Subject: Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing vec align tests. Hi All, I've submitted a patch to fix this https://gcc.gnu.org/ml/gcc-patches/2017-10/msg00971.html Permission (just as the new patch) to backport these test changes to GCC 7 to fix the regressions there? Thanks, Tamar. From: Christophe Lyon Sent: Friday, October 6, 2017 5:07:44 PM To: Tamar Christina Cc: Rainer Orth; gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft Subject: Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing vec align tests. On 6 October 2017 at 09:45, Tamar Christina wrote: > > >> -Original Message- >> From: Rainer Orth [mailto:r...@cebitec.uni-bielefeld.de] >> Sent: 05 October 2017 20:16 >> To: Tamar Christina >> Cc: gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw; >> Marcus Shawcroft >> Subject: Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing >> vec align tests. >> >> Hi Tamar, >> >> > Previously I had corrected the vect_hw_misalign check which prompted >> > these three test to start failing because the condition needs to be >> > inverted in the testcases. >> > >> > Regtested on aarch64-none-elf, arm-none-linux-gnueabihf and x86_64-pc- >> linux-gnu. >> > >> > Ok for trunk? >> > >> > Thanks, >> > Tamar. >> > >> > gcc/testsuite/ >> > 2017-10-02 Tamar Christina >> > >> > * gcc.dg/vect/vect-align-1.c: Fix vect_hw_misalign condition. >> > * gcc.dg/vect/vect-align-2.c: Likewise. >> > * gcc.dg/vect/vect-multitypes-1.c: Likewise. >> >> unfortunately, your patch caused gcc.dg/vect/vect-multitypes-1.c to FAIL on >> sparc-sun-solaris2.11 (32 and 64-bit): >> >> FAIL: gcc.dg/vect/vect-multitypes-1.c -flto -ffat-lto-objects >> scan-tree-dump- >> times vect "Vectorizing an unaligned access" 4 >> FAIL: gcc.dg/vect/vect-multitypes-1.c scan-tree-dump-times vect >> "Vectorizing an unaligned access" 4 > > Thanks! I'll take a look. > If that's easier for you, I've noticed the same thing on armeb-none-linux-gnueabihf --with-mode arm --with-cpu cortex-a9 --with-fpu neon-fp16 Christophe > Tamar > >> >> It had XFAILed before. >> >> Rainer >> >> -- >> - >> Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product for generic tests for ARM and AArch64 [Patch (7/8)]
Ping From: Tamar Christina Sent: Thursday, October 12, 2017 3:00:36 PM To: Richard Earnshaw; James Greenhalgh Cc: gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft Subject: RE: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product for generic tests for ARM and AArch64 [Patch (7/8)] > -Original Message- > From: Richard Earnshaw (lists) [mailto:richard.earns...@arm.com] > Sent: 12 October 2017 14:21 > To: Tamar Christina; James Greenhalgh > Cc: gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft > Subject: Re: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product > for generic tests for ARM and AArch64 [Patch (7/8)] > > On 06/10/17 13:45, Tamar Christina wrote: > > Hi All, > > > > this is a respin with the changes suggested. Note that this patch is no 8/8 > > in > the series. > > > > Regtested on arm-none-eabi, armeb-none-eabi, aarch64-none-elf and > > aarch64_be-none-elf with no issues found. > > > > Ok for trunk? > > > > gcc/testsuite > > 2017-10-06 Tamar Christina > > > > * gcc.dg/vect/vect-reduc-dot-s8a.c > > (dg-additional-options, dg-require-effective-target): Add +dotprod. > > * gcc.dg/vect/vect-reduc-dot-u8a.c > > (dg-additional-options, dg-require-effective-target): Add +dotprod. > > > > From: Tamar Christina > > Sent: Monday, September 4, 2017 12:35:39 PM > > To: James Greenhalgh > > Cc: gcc-patches@gcc.gnu.org; nd; Richard Earnshaw; Marcus Shawcroft > > Subject: RE: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product > > for generic tests for ARM and AArch64 [Patch (7/8)] > > > >> I'm surprised that this worked! > >> > >> It looks like you unconditionally add the -march=armv8.2-a+dotprod > >> options, which should cause you to generate instructions which will > >> not execute on targets which don't support this instruction. As far > >> as I can see, this is an execute test, so that should cause undefined > >> instruction exceptions on an Armv8-A target at the very least. > > > > It's not, there is no dg-do specified, which means it defaults to "compile" > > This is a straight compilation tests that checks to see if the target > > can do the reduction. There may be a main, but it's never executed, > > which is why I don't have a hardware check against it. > > > > The unconditional armv8.2+dotprod is for this reason. It doesn't matter > what hardware. > > > >> > >> So, not OK in its current form. > >> > >> Thanks, > >> James > >> > >>> > >>> Ok for trunk? > >>> > >>> gcc/testsuite > >>> 2017-09-01 Tamar Christina > >>> > >>> * gcc.dg/vect/vect-reduc-dot-s8a.c > >>> (dg-additional-options, dg-require-effective-target): Add +dotprod. > >>> * gcc.dg/vect/vect-reduc-dot-u8a.c > >>> (dg-additional-options, dg-require-effective-target): Add +dotprod. > >>> > >>> -- > > > > iff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c > b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c > index > dc4f52019d5435edbbc811b73dee0f98ff44c1b1..acb6862f8274fb954f69bd45e8 > edeedcdca4cbf7 > 100644 > --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c > +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c > @@ -1,4 +1,7 @@ > /* { dg-require-effective-target vect_int } */ > +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { > aarch64*-*-* || arm*-*-* } } } */ > > Why do you need hardware with dot-product if these are compile-only > tests? (presumably that's what the _hw at the end of the require means). James was right in that vect.exp overrides the default from compile to run for these tests, So they are execution tests. > > R.
Re: [PATCH][GCC][ARM][AArch64] Testsuite framework changes and execution tests [Patch (8/8)]
Ping From: Tamar Christina Sent: Friday, October 6, 2017 1:45:18 PM To: Christophe Lyon Cc: gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft Subject: Re: [PATCH][GCC][ARM][AArch64] Testsuite framework changes and execution tests [Patch (8/8)] Hi All, this is a minor respin of the patch with the comments addressed. Note this patch is now 7/8 in the series. Regtested on arm-none-eabi, armeb-none-eabi, aarch64-none-elf and aarch64_be-none-elf with no issues found. Ok for trunk? gcc/testsuite 2017-10-06 Tamar Christina * lib/target-supports.exp (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New. (check_effective_target_arm_v8_2a_dotprod_neon_ok): New. (add_options_for_arm_v8_2a_dotprod_neon): New. (check_effective_target_arm_v8_2a_dotprod_neon_hw): New. (check_effective_target_vect_sdot_qi): New. (check_effective_target_vect_udot_qi): New. * gcc.target/arm/simd/vdot-exec.c: New. * gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New. * gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon. From: Tamar Christina Sent: Monday, September 4, 2017 2:01:40 PM To: Christophe Lyon Cc: gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft Subject: RE: [PATCH][GCC][ARM][AArch64] Testsuite framework changes and execution tests [Patch (8/8)] Hi Christophe, > > > > gcc/testsuite > > 2017-09-01 Tamar Christina > > > > * lib/target-supports.exp > > (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): > New. > > (check_effective_target_arm_v8_2a_dotprod_neon_ok): New. > > (add_options_for_arm_v8_2a_dotprod_neon): New. > > (check_effective_target_arm_v8_2a_dotprod_neon_hw): New. > > (check_effective_target_vect_sdot_qi): New. > > (check_effective_target_vect_udot_qi): New. > > * gcc.target/arm/simd/vdot-exec.c: New. > > Aren't you defining twice P() and ARR() in vdot-exec.c ? > I'd expect a preprocessor error, did I read too quickly? > Yes they are defined twice but they're not redefined, all the definitions are exactly the same so the pre-processor doesn't care. I can leave only one if this is confusing. > > Thanks, > > Christophe > > > * gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New. > > * gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon. > > > > --
[committed] Use SCALAR_INT_TYPE_MODE in loc_list_from_tree_1
This follows on from similar changes a couple of months ago and is needed when general modes have variable size. Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. Applied as obvious. Richard 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * dwarf2out.c (loc_list_from_tree_1): Use SCALAR_INT_TYPE_MODE Index: gcc/dwarf2out.c === --- gcc/dwarf2out.c 2017-10-19 21:19:47.742454435 +0100 +++ gcc/dwarf2out.c 2017-10-23 10:36:59.967280171 +0100 @@ -17482,7 +17482,7 @@ loc_list_from_tree_1 (tree loc, int want && (INTEGRAL_TYPE_P (TREE_TYPE (loc)) || POINTER_TYPE_P (TREE_TYPE (loc))) && DECL_CONTEXT (loc) == current_function_decl - && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (loc))) + && (GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (TREE_TYPE (loc))) <= DWARF2_ADDR_SIZE)) { dw_die_ref ref = lookup_decl_die (loc);
Re: [PATCH][GCC][AArch64] Restrict lrint inlining on ILP32.
Ping. Any objections to the patch? From: gcc-patches-ow...@gcc.gnu.org on behalf of Tamar Christina Sent: Monday, October 16, 2017 9:54:23 AM To: gcc-patches@gcc.gnu.org Cc: nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft; pins...@gmail.com Subject: Re: [PATCH][GCC][AArch64] Restrict lrint inlining on ILP32. Ping? From: gcc-patches-ow...@gcc.gnu.org on behalf of Tamar Christina Sent: Wednesday, September 13, 2017 4:00:24 PM To: gcc-patches@gcc.gnu.org Cc: nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft; pins...@gmail.com Subject: [PATCH][GCC][AArch64] Restrict lrint inlining on ILP32. Hi All, The inlining of lrint isn't valid in all cases on ILP32 when -fno-math-errno is used because an inexact exception is raised in certain circumstances. Instead the restriction is placed such that the integer mode has to be larger or equal to the float mode in addition to either inexacts being allowed or not caring about trapping math. This prevents the overflow, and the inexact errors that may arise. Unfortunately I can't create a test for this as there is a bug where the pattern is always passed DI as the smallest mode, and later takes a sub-reg of it to SI. This would prevent an overflow where one was expected. This fixed PR/81800. Regtested on aarch64-none-linux-gnu and no regressions. Ok for trunk? Thanks, Tamar gcc/ 2017-09-13 Tamar Christina PR target/81800 * config/aarch64/aarch64.md (lrint2): Add flag_trapping_math and flag_fp_int_builtin_inexact. gcc/testsuite/ 2017-09-13 Tamar Christina * gcc.target/aarch64/inline-lrint_2.c (dg-options): Add -fno-trapping-math. --
Re: [PATCH][GCC][Testsuite][SPARC][ARM] Fix vect-multitypes-1.c test on SPARC64 and ARMEB.
Ping From: gcc-patches-ow...@gcc.gnu.org on behalf of Tamar Christina Sent: Monday, October 16, 2017 11:16:21 AM To: gcc-patches@gcc.gnu.org Cc: nd; Ramana Radhakrishnan; Richard Earnshaw; ni...@redhat.com; Kyrylo Tkachov; christophe.l...@linaro.org; r...@cebitec.uni-bielefeld.de Subject: [PATCH][GCC][Testsuite][SPARC][ARM] Fix vect-multitypes-1.c test on SPARC64 and ARMEB. Hi All, This patch fixes a regression introduced by r253451. The target needs all three conditions to be true before it can vectorize unaligned accesses. This patch turns the erroneous || into an &&. regtested on aarch64-none-elf, arm-none-linux-gnueabihf, x86_64-pc-linux-gnu, armeb-none-linux-gnueabihf and sparc64-unknown-linux-gnu. OK for trunk? And for the GCC-7 branch? Thanks, Tamar gcc/testsuite/ 2017-10-16 Tamar Christina * gcc.dg/vect/vect-multitypes-1.c: Correct target selector. --
Use SCALAR_TYPE_MODE in vect_create_epilog_for_reduction
This follows on from similar changes a couple of months ago and is needed when general modes have variable size. Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. Applied as obvious. Richard 2017-10-23 Richard Sandiford gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Use SCALAR_TYPE_MODE instead of TYPE_MODE. Index: gcc/tree-vect-loop.c === --- gcc/tree-vect-loop.c2017-10-22 21:04:50.136830154 +0100 +++ gcc/tree-vect-loop.c2017-10-23 10:39:37.711243373 +0100 @@ -4487,7 +4487,7 @@ vect_create_epilog_for_reduction (vec
PING Fwd: [patch] implement generic debug() for vectors and hash sets
Forwarded Message Subject: [patch] implement generic debug() for vectors and hash sets Date: Mon, 16 Oct 2017 09:52:51 -0400 From: Aldy Hernandez To: gcc-patches We have a generic mechanism for dumping types from the debugger with: (gdb) call debug(some_type) However, even though most types are implemented, we have no canonical way of dumping vectors or hash sets. The attached patch fixes this oversight. With it you can call debug(vec<>) and debug(hash_set<>) with the following types: rtx, tree, basic_block, edge, rtx_insn. More can be added simply by adding a debug_slim(your_type) overload and calling: DEFINE_DEBUG_VEC (your_type) DEFINE_DEBUG_HASH_SET (your_type) Here is an example of how things look with this patch: vec of edges: [0] = 10)> vec of bbs: [0] = [1] = vec of trees: [0] = [1] = [2] = vec of rtx: [0] = (reg:SI 87) [1] = (reg:SI 87) hash of bbs: OK for mainline? gcc/ * vec.h (debug_helper): New function. (DEFINE_DEBUG_VEC): New macro. * hash-set.h (debug_helper): New function. (DEFINE_DEBUG_HASH_SET): New macro. * cfg.c (debug_slim (edge)): New function. Call DEFINE_DEBUG_VEC for edges. Call DEFINE_DEBUG_HASH_SET for edges. * cfghooks.c (debug_slim (basic_block)): New function. Call DEFINE_DEBUG_VEC for basic blocks. Call DEFINE_DEBUG_HASH_SET for basic blocks. * print-tree.c (debug_slim): New function to handle trees. Call DEFINE_DEBUG_VEC for trees. Call DEFINE_DEBUG_HASH_SET for trees. (debug (vec) &): Remove. (debug () *): Remove. * print-rtl.c (debug_slim): New function to handle const_rtx. Call DEFINE_DEBUG_VEC for rtx_def. Call DEFINE_DEBUG_VEC for rtx_insn. Call DEFINE_DEBUG_HASH_SET for rtx_def. Call DEFINE_DEBUG_HASH_SET for rtx_insn. * sel-sched-dump.c (debug (vec &): Remove. (debug (vec *ptr): Remove. (debug_insn_vector): Remove. * stor-layout.c (debug_rli): Call debug() instead of debug_vec_tree. diff --git a/gcc/cfg.c b/gcc/cfg.c index 01e68aeda51..4d02fb56cbf 100644 --- a/gcc/cfg.c +++ b/gcc/cfg.c @@ -573,6 +573,16 @@ debug (edge_def *ptr) else fprintf (stderr, "\n"); } + +static void +debug_slim (edge e) +{ + fprintf (stderr, " %d)>", (void *) e, + e->src->index, e->dest->index); +} + +DEFINE_DEBUG_VEC (edge) +DEFINE_DEBUG_HASH_SET (edge) /* Simple routines to easily allocate AUX fields of basic blocks. */ diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c index 258a5eabf8d..73b196feec7 100644 --- a/gcc/cfghooks.c +++ b/gcc/cfghooks.c @@ -304,6 +304,14 @@ debug (basic_block_def *ptr) fprintf (stderr, "\n"); } +static void +debug_slim (basic_block ptr) +{ + fprintf (stderr, "", (void *) ptr, ptr->index); +} + +DEFINE_DEBUG_VEC (basic_block_def *) +DEFINE_DEBUG_HASH_SET (basic_block_def *) /* Dumps basic block BB to pretty-printer PP, for use as a label of a DOT graph record-node. The implementation of this hook is diff --git a/gcc/hash-set.h b/gcc/hash-set.h index d2247d39571..58f7750243a 100644 --- a/gcc/hash-set.h +++ b/gcc/hash-set.h @@ -123,6 +123,44 @@ private: hash_table m_table; }; +/* Generic hash_set debug helper. + + This needs to be instantiated for each hash_set used throughout + the compiler like this: + +DEFINE_DEBUG_HASH_SET (TYPE) + + The reason we have a debug_helper() is because GDB can't + disambiguate a plain call to debug(some_hash), and it must be called + like debug(some_hash). */ +template +void +debug_helper (hash_set &ref) +{ + for (typename hash_set::iterator it = ref.begin (); + it != ref.end (); ++it) +{ + debug_slim (*it); + fputc ('\n', stderr); +} +} + +#define DEFINE_DEBUG_HASH_SET(T) \ + template static void debug_helper (hash_set &); \ + DEBUG_FUNCTION void \ + debug (hash_set &ref)\ + { \ +debug_helper (ref);\ + } \ + DEBUG_FUNCTION void \ + debug (hash_set *ptr)\ + { \ +if (ptr) \ + debug (*ptr); \ +else \ + fprintf (stderr, "\n"); \ + } + /* ggc marking routines. */ template diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c index 28d99862cad..5fe23801ab2 100644 --- a/gcc/print-rtl.c +++ b/gcc/print-rtl.c @@ -967,6 +967,23 @@ debug (const rtx_def *ptr) fprintf (stderr, "\n"); } +/* Like debug_rtx but with no newline, as debug_helper will add one. + + Note: No debug_slim(rtx_insn *) variant implemented, as this + function can serve for both rtx and rtx_insn. */ + +static void +debug_slim (const_rtx x) +{ + rtx_writer w (stderr, 0, false, false, NULL); + w.print_rtx (x); +} + +DEFINE_DEBUG_VEC (rtx_def *) +DEFINE_DEBUG_VEC (rtx_insn *) +DEFINE_DEBUG_HASH_SET (rtx_def *) +DEFINE_DEBUG_HASH_SET (rtx_insn *) + /* Count of rtx's to print with debug_rtx_list. This global exists because gdb user defined commands have no arguments. */ diff --git a/gcc/print-tree.c b/gcc/print-tree.c index d534c76ee49..3a0f85d4038 100644 --- a/gcc/print-tree.c +++ b/gcc/print-tree.c @@ -1095,32 +1095,6 @@
[committed] Convert STARTING_FRAME_OFFSET to a hook
I took the documentation of the FRAME_GROWS_DOWNWARD behaviour from the version that was in most header files, since the one in the manual seemed less clear. The patch deliberately keeps FIRST_PARM_OFFSET(FNDECL) in microblaze_starting_frame_offset; this seems to be a port-local convention and takes advantage of the fact that FIRST_PARM_OFFSET doesn't read FNDECL. Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. Also tested on at least one target per CPU directory. Commmitted as pre-approved by Jeff here: https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00923.html Richard 2017-10-23 Richard Sandiford gcc/ * target.def (starting_frame_offset): New hook. * doc/tm.texi (STARTING_FRAME_OFFSET): Remove in favor of... (TARGET_STARTING_FRAME_OFFSET): ...this new hook. * doc/tm.texi.in: Regenerate. * hooks.h (hook_hwi_void_0): Declare. * hooks.c (hook_hwi_void_0): New function. * doc/rtl.texi: Refer to TARGET_STARTING_FRAME_OFFSET instead of STARTING_FRAME_OFFSET. * builtins.c (expand_builtin_setjmp_receiver): Likewise. * reload1.c (reload): Likewise. * cfgexpand.c (expand_used_vars): Use targetm.starting_frame_offset instead of STARTING_FRAME_OFFSET. * function.c (try_fit_stack_local): Likewise. (assign_stack_local_1): Likewise (instantiate_virtual_regs): Likewise. * rtlanal.c (rtx_addr_can_trap_p_1): Likewise. * config/avr/avr.md (nonlocal_goto_receiver): Likewise. * config/aarch64/aarch64.h (STARTING_FRAME_OFFSET): Delete. * config/alpha/alpha.h (STARTING_FRAME_OFFSET): Likewise. * config/arc/arc.h (STARTING_FRAME_OFFSET): Likewise. * config/arm/arm.h (STARTING_FRAME_OFFSET): Likewise. * config/bfin/bfin.h (STARTING_FRAME_OFFSET): Likewise. * config/c6x/c6x.h (STARTING_FRAME_OFFSET): Likewise. * config/cr16/cr16.h (STARTING_FRAME_OFFSET): Likewise. * config/cris/cris.h (STARTING_FRAME_OFFSET): Likewise. * config/fr30/fr30.h (STARTING_FRAME_OFFSET): Likewise. * config/frv/frv.h (STARTING_FRAME_OFFSET): Likewise. * config/ft32/ft32.h (STARTING_FRAME_OFFSET): Likewise. * config/h8300/h8300.h (STARTING_FRAME_OFFSET): Likewise. * config/i386/i386.h (STARTING_FRAME_OFFSET): Likewise. * config/ia64/ia64.h (STARTING_FRAME_OFFSET): Likewise. * config/m32c/m32c.h (STARTING_FRAME_OFFSET): Likewise. * config/m68k/m68k.h (STARTING_FRAME_OFFSET): Likewise. * config/mcore/mcore.h (STARTING_FRAME_OFFSET): Likewise. * config/mn10300/mn10300.h (STARTING_FRAME_OFFSET): Likewise. * config/moxie/moxie.h (STARTING_FRAME_OFFSET): Likewise. * config/msp430/msp430.h (STARTING_FRAME_OFFSET): Likewise. * config/nds32/nds32.h (STARTING_FRAME_OFFSET): Likewise. * config/nios2/nios2.h (STARTING_FRAME_OFFSET): Likewise. * config/nvptx/nvptx.h (STARTING_FRAME_OFFSET): Likewise. * config/pdp11/pdp11.h (STARTING_FRAME_OFFSET): Likewise. * config/riscv/riscv.h (STARTING_FRAME_OFFSET): Likewise. * config/rl78/rl78.h (STARTING_FRAME_OFFSET): Likewise. * config/rx/rx.h (STARTING_FRAME_OFFSET): Likewise. * config/s390/s390.h (STARTING_FRAME_OFFSET): Likewise. * config/sh/sh.h (STARTING_FRAME_OFFSET): Likewise. * config/sparc/sparc.c (sparc_compute_frame_size): Likewise. * config/sparc/sparc.h (STARTING_FRAME_OFFSET): Likewise. * config/spu/spu.h (STARTING_FRAME_OFFSET): Likewise. * config/stormy16/stormy16.h (STARTING_FRAME_OFFSET): Likewise. * config/tilegx/tilegx.h (STARTING_FRAME_OFFSET): Likewise. * config/tilepro/tilepro.h (STARTING_FRAME_OFFSET): Likewise. * config/v850/v850.h (STARTING_FRAME_OFFSET): Likewise. * config/visium/visium.h (STARTING_FRAME_OFFSET): Likewise. * config/avr/avr.h (STARTING_FRAME_OFFSET): Likewise. * config/avr/avr-protos.h (avr_starting_frame_offset): Likewise. * config/avr/avr.c (avr_starting_frame_offset): Make static and return a HOST_WIDE_INT. (avr_builtin_setjmp_frame_value): Use it instead of STARTING_FRAME_OFFSET. (TARGET_STARTING_FRAME_OFFSET): Redefine. * config/epiphany/epiphany.h (STARTING_FRAME_OFFSET): Delete. * config/epiphany/epiphany.c (epiphany_starting_frame_offset): New function. (TARGET_STARTING_FRAME_OFFSET): Redefine. * config/iq2000/iq2000.h (STARTING_FRAME_OFFSET): Delete. * config/iq2000/iq2000.c (iq2000_starting_frame_offset): New function. (TARGET_CONSTANT_ALIGNMENT): Redefine. * config/lm32/lm32.h (STARTING_FRAME_OFFSET): Delete. * config/lm32/lm32.c (lm32_starting_frame_offset): New function. (TARGET_STARTING_FRAME_OFFSET): Redefine. * config/m32r/m32r.h (STARTING_FRAME_OFFSET): Delete.
Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode
On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote: > Hello! > > In PR 82628 Jakub figured out that insn patterns that consume carry > flag were not 100% correct. Due to this issue, combine is able to > simplify various CC_REG propagations that result in invalid code. > > Attached patch fixes (well, mitigates) the above problem by splitting > the double-mode compare after the reload, in the same way other > *_doubleword patterns are handled from "the beginning of the time". I'm afraid this is going to haunt us sooner or later, combine isn't the only pass that uses simplify-rtx.c infrastructure heavily and when we lie in the RTL pattern, eventually something will be simplified wrongly. So, at least we'd need to use UNSPEC for the pattern, like (only lightly tested so far) below. I'm not sure the double-word pattern is a win though, it causes PR82662 you've filed (the problem is that during ifcvt because of the double-word comparison the condition is canonicalized as (lt (reg:TI) (reg:TI)) and there is no instruction in the MD that would take such arguments, there are only instructions that compare flags registers. If you look at say normal DImode comparisons, it is the same thing, ifcvt also can't do anything with those, the reason they work is that we have a cstoredi4 optab (for 64-bit), but don't have a cstoreti4 optab. So, we'd need that (and only handle the GE/GEU/LT/LTU + the others that can be handled by swapping the operands). I think the double-word pattern has other issues, it will result in RA not knowing in detail what is going on and thus can at least reserve one extra register that otherwise would not be needed. The reason we have the doubleword patterns elsewhere is that splitting double-word early makes it harder/impossible for STV to use SSE registers; in this case we don't have something reasonable to expand to anyway, we always split. The alternative I have is the patch attached in the PR, if the unrelated addcarry/subborrow changes are removed, then it doesn't regress anything, the pr50038.c FAIL is from some other earlier change even on vanilla branch and pr67317-* FAILs were caused by the addcarry/subborrow changes, will look at those in detail. 2017-10-23 Jakub Jelinek PR target/82628 * config/i386/i386.md (UNSPEC_SBB): New unspec. (cmp_doubleword): Use unspec instead of compare. (sub3_carry_ccgz): Use unspec instead of compare. --- gcc/config/i386/i386.md.jj 2017-10-23 10:13:05.462218947 +0200 +++ gcc/config/i386/i386.md 2017-10-23 11:07:55.470376791 +0200 @@ -112,6 +112,7 @@ (define_c_enum "unspec" [ UNSPEC_STOS UNSPEC_PEEPSIB UNSPEC_INSN_FALSE_DEP + UNSPEC_SBB ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -1285,11 +1286,10 @@ (define_insn_and_split "cmp_doublew [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) (parallel [(set (reg:CCGZ FLAGS_REG) - (compare: CCGZ -(match_dup 4) -(plus:DWIH - (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (match_dup 5 + (unspec:CCGZ [(match_dup 4) +(match_dup 5) +(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))] + UNSPEC_SBB)) (clobber (match_dup 3))])] "split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]);") @@ -6911,13 +6911,18 @@ (define_insn "*subsi3_carry_zext" (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) +;; The sign flag is set from the +;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2))) +;; result, the overflow flag likewise, but the overflow flag is also +;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows. +;; The borrow flag can be modelled, but differently from SF and OF +;; and is quite difficult to handle. (define_insn "*sub3_carry_ccgz" [(set (reg:CCGZ FLAGS_REG) - (compare:CCGZ - (match_operand:DWIH 1 "register_operand" "0") - (plus:DWIH - (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (match_operand:DWIH 2 "x86_64_general_operand" "rme" + (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0") + (match_operand:DWIH 2 "x86_64_general_operand" "rme") + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))] +UNSPEC_SBB)) (clobber (match_scratch:DWIH 0 "=r"))] "" "sbb{}\t{%2, %0|%0, %2}" Jakub
Re: [PATCH, RFC] Add a pass counter for "are we there yet" purposes
Hi, On Mon, Oct 16, 2017 at 06:15:06PM +0200, Richard Biener wrote: > I guess that might help. I have the feeling that querying for 'did > pass X run' is wrong conceptually. The reason why I liked the idea is that I could unify SRA and early-SRA passes and their behavior would only differ according to a "did pass_starg run yet" query. Admittedly, it is not a big deal, I just always dislike typing "-fdump-tree-esra-details -fdump-tree-sra-details" when debugging :-) Martin
Re: [PATCH, RFC] Add a pass counter for "are we there yet" purposes
On Mon, Oct 23, 2017 at 12:18:58PM +0200, Martin Jambor wrote: > Hi, > > On Mon, Oct 16, 2017 at 06:15:06PM +0200, Richard Biener wrote: > > I guess that might help. I have the feeling that querying for 'did > > pass X run' is wrong conceptually. > > The reason why I liked the idea is that I could unify SRA and > early-SRA passes and their behavior would only differ according to a > "did pass_starg run yet" query. > > Admittedly, it is not a big deal, I just always dislike typing > "-fdump-tree-esra-details -fdump-tree-sra-details" when debugging :-) -fdump-tree-{,e}sra-details when using sane shell ;) ? Jakub
Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode
On Mon, Oct 23, 2017 at 12:09 PM, Jakub Jelinek wrote: > On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote: >> Hello! >> >> In PR 82628 Jakub figured out that insn patterns that consume carry >> flag were not 100% correct. Due to this issue, combine is able to >> simplify various CC_REG propagations that result in invalid code. >> >> Attached patch fixes (well, mitigates) the above problem by splitting >> the double-mode compare after the reload, in the same way other >> *_doubleword patterns are handled from "the beginning of the time". > > I'm afraid this is going to haunt us sooner or later, combine isn't the > only pass that uses simplify-rtx.c infrastructure heavily and when we lie > in the RTL pattern, eventually something will be simplified wrongly. > > So, at least we'd need to use UNSPEC for the pattern, like (only lightly > tested so far) below. I agree with the above. Patterns that consume Carry flag are now marked with (plus (ltu (...)), but effectively, they behave like unspecs. So, I see no problem to change all SBB and ADC to unspec at once, similar to the change you proposed in the patch. > I'm not sure the double-word pattern is a win though, it causes PR82662 > you've filed (the problem is that during ifcvt because of the double-word > comparison the condition is canonicalized as (lt (reg:TI) (reg:TI)) and > there is no instruction in the MD that would take such arguments, there > are only instructions that compare flags registers. It is not a win, my patch was more of a band-aid to mitigate the failure. It works, but it produces extra moves (as you mentione below), due to RA not knowing that CMP doesn't clobber the register. But, let's change the pattern back to expand-time splitting after the above patch that changes SBB and ADC to unspecs is committed. > If you look at say normal DImode comparisons, it is the same thing, > ifcvt also can't do anything with those, the reason they work is that we > have a cstoredi4 optab (for 64-bit), but don't have a cstoreti4 optab. > So, we'd need that (and only handle the GE/GEU/LT/LTU + the others that can > be handled by swapping the operands). > I think the double-word pattern has other issues, it will result in RA not > knowing in detail what is going on and thus can at least reserve one extra > register that otherwise would not be needed. The reason we have the > doubleword patterns elsewhere is that splitting double-word early makes it > harder/impossible for STV to use SSE registers; in this case we don't have > something reasonable to expand to anyway, we always split. > > The alternative I have is the patch attached in the PR, if the unrelated > addcarry/subborrow changes are removed, then it doesn't regress anything, > the pr50038.c FAIL is from some other earlier change even on vanilla > branch and pr67317-* FAILs were caused by the addcarry/subborrow changes, > will look at those in detail. I do have patch that allows double-mode for cstore, but it is not an elegant solution. Splitting to SBB at expand time would be considerably better. Thanks, Uros. > 2017-10-23 Jakub Jelinek > > PR target/82628 > * config/i386/i386.md (UNSPEC_SBB): New unspec. > (cmp_doubleword): Use unspec instead of compare. > (sub3_carry_ccgz): Use unspec instead of compare. > > --- gcc/config/i386/i386.md.jj 2017-10-23 10:13:05.462218947 +0200 > +++ gcc/config/i386/i386.md 2017-10-23 11:07:55.470376791 +0200 > @@ -112,6 +112,7 @@ (define_c_enum "unspec" [ >UNSPEC_STOS >UNSPEC_PEEPSIB >UNSPEC_INSN_FALSE_DEP > + UNSPEC_SBB > >;; For SSE/MMX support: >UNSPEC_FIX_NOTRUNC > @@ -1285,11 +1286,10 @@ (define_insn_and_split "cmp_doublew >[(set (reg:CC FLAGS_REG) > (compare:CC (match_dup 1) (match_dup 2))) > (parallel [(set (reg:CCGZ FLAGS_REG) > - (compare: CCGZ > -(match_dup 4) > -(plus:DWIH > - (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) > - (match_dup 5 > + (unspec:CCGZ [(match_dup 4) > +(match_dup 5) > +(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))] > + UNSPEC_SBB)) > (clobber (match_dup 3))])] >"split_double_mode (mode, &operands[0], 3, &operands[0], > &operands[3]);") > > @@ -6911,13 +6911,18 @@ (define_insn "*subsi3_carry_zext" > (set_attr "pent_pair" "pu") > (set_attr "mode" "SI")]) > > +;; The sign flag is set from the > +;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2))) > +;; result, the overflow flag likewise, but the overflow flag is also > +;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows. > +;; The borrow flag can be modelled, but differently from SF and OF > +;; and is quite difficult to handle. > (define_insn "*sub3_carry_ccgz" >[(set (reg:CCGZ FLAGS_REG) > - (compare:CCGZ > - (match_operan
Re: [Patch] Edit contrib/ files to download gfortran prerequisites
On Sat, Oct 21, 2017 at 2:26 AM, Damian Rouson wrote: > > Hi Richard, > > Attached is a revised patch that makes the downloading of Fortran > prerequisites optional via a new --no-fortran flag that can be passed to > contrib/download_prerequisites as requested in your reply below. > > As Jerry mentioned in his response, he has been working on edits to the > top-level build machinery, but we need additional guidance to complete his > work. Given that there were no responses to his request for guidance and > it’s not clear when that work will complete, I’m hoping this minor change can > be approved independently so that this patch doesn’t suffer bit rot in the > interim. But the change doesn't make sense without the build actually picking up things. > Ok for trunk? No. Thanks, Richard. > Damian > > > > > On September 21, 2017 at 12:40:49 AM, Richard Biener > (richard.guent...@gmail.com(mailto:richard.guent...@gmail.com)) wrote: > >> On Wed, Sep 20, 2017 at 10:35 PM, Damian Rouson >> wrote: >> > Attached is a patch that adds the downloading of gfortran prerequisites >> > OpenCoarrays and MPICH in the contrib/download_prerequisites script. The >> > patch also provides a useful error message when neither wget or curl are >> > available on the target platform. I tested this patch with several choices >> > for the command-line options on macOS (including --md5 and --sha512) and >> > Ubuntu Linux (including --sha512). A suggested ChangeLog entry is >> > >> > * contrib/download_prerequisites: Download OpenCoarrays and MPICH. >> > * contrib/prerequisites.sha5: Add sha512 message digests for OpenCoarrays >> > and MPICH. >> > * contrib/prerequisites.md5: Add md5 message digests for OpenCoarrays and >> > MPICH. >> > >> > >> > OK for trunk? If so, I’ll ask Jerry to commit this. I don’t have commit >> > rights. >> >> Can you make this optional similar to graphite/isl? Also I see no support in >> the toplevel build machinery to build/install the libs as part of GCC >> so how does >> that work in the end? >> >> Thanks, >> Richard. >> >> > Damian
Re: [RFC] New pragma exec_charset
On 10/19/2017 07:13 PM, Martin Sebor wrote: > On 10/19/2017 09:50 AM, Andreas Krebbel wrote: >> The TPF operating system uses the GCC S/390 backend. They set an >> EBCDIC exec charset for compilation using -fexec-charset. However, >> certain libraries require ASCII strings instead. In order to be able >> to put calls to that library into the normal code it is required to >> switch the exec charset within a compilation unit. >> >> This is an attempt to implement it by adding a new pragma which could >> be used like in the following example: >> >> int >> foo () >> { >> call_with_utf8("hello world"); >> >> #pragma GCC exec_charset("UTF16") >> call_with_utf16("hello world"); >> >> #pragma GCC exec_charset(pop) >> call_with_utf8("hello world"); >> } >> >> Does this look reasonable? > > I'm not an expert on this but at a high level it looks reasonable > to me. But based on some small amount of work I did in this area > I have a couple of questions. > > There are a few places in the compiler that already do or that > should but don't yet handle different execution character sets. > The former include built-ins like __bultin_isdigit() and > __builtin_sprintf (in both builtins.c and gimple-ssa-sprintf.c) > The latter is the -Wformat checking done by the C and C++ front > ends. The missing support for the latter is the subject of bug > 38308. According to bug 81686, LTO is apparently also missing > support for exec-charset. These probably are the areas Richard and Jakub were referring to as well?! These cases did not work properly with the -fexec-charset cmdline option and this does not change with the pragma. I'll try to look at what has been proposed in the discussion. Perhaps I can get it working somehow. > I'm curious how the pragma might interact with these two areas, > and whether the lack of support for it in the latter is a concern > (and if not, why not). For the former, I'm also wondering about > the interaction of inlining and other interprocedural optimizations > with the pragma. Does it propagate through inlined calls as one > would expect? The pragma does not apply to the callees of a function defined under the pragma regardless of whether it gets inlined or not. That matches the behavior of other pragmas. If it would apply to inlined callees the program semantics might change depending on optimization decisions i.e. whether a certain call got inlined or not. Callees marked as always_inline might be discussed separately. I remember this being a topic when looking at function attributes. Bye, -Andreas-
Re: [patch 2/5] add hook to track when splitting is complete
On Sat, Oct 21, 2017 at 11:17 PM, Sandra Loosemore wrote: > On 10/20/2017 02:24 AM, Richard Biener wrote: >> >> On Fri, Oct 20, 2017 at 4:09 AM, Sandra Loosemore >> wrote: >>> >>> This patch adds a function to indicate whether the split1 pass has run >>> yet. This is used in part 3 of the patch set to decide whether 32-bit >>> symbolic constant expressions are permitted, e.g. in >>> TARGET_LEGITIMATE_ADDRESS_P and the movsi expander. >>> >>> Since there's currently no usable hook for querying the pass manager >>> where it is relative to another pass, I implemented this using a >>> target-specific pass that runs directly after split1 and does nothing >>> but set a flag. >> >> >> "Nice" hack ;) The only currently existing way would be to add a property >> to the IL state like >> >> const pass_data pass_data_split_all_insns = >> { >>RTL_PASS, /* type */ >>"split1", /* name */ >>OPTGROUP_NONE, /* optinfo_flags */ >>TV_NONE, /* tv_id */ >>0, /* properties_required */ >>PROP_rtl_split_insns, /* properties_provided */ >>0, /* properties_destroyed */ >> >> and test that via cfun->curr_properties & PROP_rtl_split_insns >> >> Having run split might be a important enough change to warrant this. >> Likewise reload_completed and reload_in_progress could be transitioned >> to IL properties. >> >> Richard. > > > Well, here's a new version of this patch that implements what you suggested > above. It's certainly simpler than the original version, or the WIP patch I > posted before to add a general hook based on enumerating the passes. Is > this OK? +#define PROP_rtl_split_insns (1 << 17) /* split1 completed. */ /* RTL has insns split. */ that is, do not mention 'split1' specifically. Ok with that change. Thanks, Richard. > -Sandra >
[patch] Fix PR middle-end/82569
Hi, this is the regression present on the mainline for Power6 and introduced by my patch fiddling with SUBREG_PROMOTED_VAR_P in expand_expr_real_1. It turns out that the ouf-of-ssa pass implicitly assumes that promoted RTXes for partitions are fully initialized (because it can generate direct moves in promoted mode) and clearing SUBREG_PROMOTED_VAR_P for some of them goes against this. Therefore the attached patch goes in the opposite direction and initializes the RTXes for problematic partitions on function entry. Surprisingly enough, this generates smaller code on average at -O2 for gcc.c-torture/compile: 49 files changed, 1243 insertions(+), 1694 deletions(-) probably because the compiler can now infer values on paths from entry where variables are uninitialized. Tested on PowerPC64/Linux, OK for the mainline? 2017-10-23 Eric Botcazou PR middle-end/82569 * tree-outof-ssa.h (always_initialized_rtx_for_ssa_name_p): Delete. * expr.c (expand_expr_real_1) : Revert latest change. * loop-iv.c (iv_get_reaching_def): Likewise. * cfgexpand.c (expand_one_ssa_partition): Initialize the RTX if the variable is promoted and the partition contains undefined values. -- Eric BotcazouIndex: cfgexpand.c === --- cfgexpand.c (revision 253968) +++ cfgexpand.c (working copy) @@ -1391,10 +1391,18 @@ expand_one_ssa_partition (tree var) } machine_mode reg_mode = promote_ssa_mode (var, NULL); - rtx x = gen_reg_rtx (reg_mode); set_rtl (var, x); + + /* For a promoted variable, X will not be used directly but wrapped in a + SUBREG with SUBREG_PROMOTED_VAR_P set, which means that the RTL land + will assume that its upper bits can be inferred from its lower bits. + Therefore, if X isn't initialized on every path from the entry, then + we must do it manually in order to fulfill the above assumption. */ + if (reg_mode != TYPE_MODE (TREE_TYPE (var)) + && bitmap_bit_p (SA.partitions_for_undefined_values, part)) +emit_move_insn (x, CONST0_RTX (reg_mode)); } /* Record the association between the RTL generated for partition PART Index: expr.c === --- expr.c (revision 253968) +++ expr.c (working copy) @@ -9912,43 +9912,24 @@ expand_expr_real_1 (tree exp, rtx target && GET_MODE (decl_rtl) != dmode) { machine_mode pmode; - bool always_initialized_rtx; /* Get the signedness to be used for this variable. Ensure we get the same mode we got when the variable was declared. */ if (code != SSA_NAME) - { - pmode = promote_decl_mode (exp, &unsignedp); - always_initialized_rtx = true; - } + pmode = promote_decl_mode (exp, &unsignedp); else if ((g = SSA_NAME_DEF_STMT (ssa_name)) && gimple_code (g) == GIMPLE_CALL && !gimple_call_internal_p (g)) - { - pmode = promote_function_mode (type, mode, &unsignedp, - gimple_call_fntype (g), 2); - always_initialized_rtx - = always_initialized_rtx_for_ssa_name_p (ssa_name); - } + pmode = promote_function_mode (type, mode, &unsignedp, + gimple_call_fntype (g), + 2); else - { - pmode = promote_ssa_mode (ssa_name, &unsignedp); - always_initialized_rtx - = always_initialized_rtx_for_ssa_name_p (ssa_name); - } - + pmode = promote_ssa_mode (ssa_name, &unsignedp); gcc_assert (GET_MODE (decl_rtl) == pmode); temp = gen_lowpart_SUBREG (mode, decl_rtl); - - /* We cannot assume anything about an existing extension if the - register may contain uninitialized bits. */ - if (always_initialized_rtx) - { - SUBREG_PROMOTED_VAR_P (temp) = 1; - SUBREG_PROMOTED_SET (temp, unsignedp); - } - + SUBREG_PROMOTED_VAR_P (temp) = 1; + SUBREG_PROMOTED_SET (temp, unsignedp); return temp; } Index: loop-iv.c === --- loop-iv.c (revision 253968) +++ loop-iv.c (working copy) @@ -353,7 +353,7 @@ iv_get_reaching_def (rtx_insn *insn, rtx adef = DF_REF_CHAIN (use)->ref; /* We do not handle setting only part of the register. */ - if (DF_REF_FLAGS (adef) & (DF_REF_READ_WRITE | DF_REF_SUBREG)) + if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE) return GRD_INVALID; def_insn = DF_REF_INSN (adef); Index: tree-outof-ssa.h === --- tree-outof-ssa.h (revision 253968) +++ tree-outof-ssa.h (working copy) @@ -74,18 +74,6 @@ get_gimple_for_ssa_name (tree exp) return NULL; } -/* Return whether the RTX expression representing the storage of the outof-SSA - partition that the SSA name EXP is a member of is always initialized. */ -static inline bool -always_initialized_rtx_for_ssa_name_p (tree exp) -{ - int p = partition_find (SA.map->var_partition, SSA_NAME_VERSION (exp)); - if (SA.m
Re: [PATCH, RFC] Add a pass counter for "are we there yet" purposes
On Mon, Oct 23, 2017 at 12:18 PM, Martin Jambor wrote: > Hi, > > On Mon, Oct 16, 2017 at 06:15:06PM +0200, Richard Biener wrote: >> I guess that might help. I have the feeling that querying for 'did >> pass X run' is wrong conceptually. > > The reason why I liked the idea is that I could unify SRA and > early-SRA passes and their behavior would only differ according to a > "did pass_starg run yet" query. I think that "did pass_stdarg run yet" query isn't necessary anymore given we don't lower va-arg during gimplification. Richard. > Admittedly, it is not a big deal, I just always dislike typing > "-fdump-tree-esra-details -fdump-tree-sra-details" when debugging :-) > > Martin
Re: [patch] Fix PR middle-end/82569
On Mon, Oct 23, 2017 at 12:57 PM, Eric Botcazou wrote: > Hi, > > this is the regression present on the mainline for Power6 and introduced by my > patch fiddling with SUBREG_PROMOTED_VAR_P in expand_expr_real_1. It turns out > that the ouf-of-ssa pass implicitly assumes that promoted RTXes for partitions > are fully initialized (because it can generate direct moves in promoted mode) > and clearing SUBREG_PROMOTED_VAR_P for some of them goes against this. > > Therefore the attached patch goes in the opposite direction and initializes > the RTXes for problematic partitions on function entry. Surprisingly enough, > this generates smaller code on average at -O2 for gcc.c-torture/compile: > > 49 files changed, 1243 insertions(+), 1694 deletions(-) > > probably because the compiler can now infer values on paths from entry where > variables are uninitialized. Tested on PowerPC64/Linux, OK for the mainline? Ok. Thanks, Richard. > > 2017-10-23 Eric Botcazou > > PR middle-end/82569 > * tree-outof-ssa.h (always_initialized_rtx_for_ssa_name_p): Delete. > * expr.c (expand_expr_real_1) : Revert latest change. > * loop-iv.c (iv_get_reaching_def): Likewise. > * cfgexpand.c (expand_one_ssa_partition): Initialize the RTX if the > variable is promoted and the partition contains undefined values. > > -- > Eric Botcazou
Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode
On Mon, Oct 23, 2017 at 12:27:15PM +0200, Uros Bizjak wrote: > On Mon, Oct 23, 2017 at 12:09 PM, Jakub Jelinek wrote: > > On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote: > >> Hello! > >> > >> In PR 82628 Jakub figured out that insn patterns that consume carry > >> flag were not 100% correct. Due to this issue, combine is able to > >> simplify various CC_REG propagations that result in invalid code. > >> > >> Attached patch fixes (well, mitigates) the above problem by splitting > >> the double-mode compare after the reload, in the same way other > >> *_doubleword patterns are handled from "the beginning of the time". > > > > I'm afraid this is going to haunt us sooner or later, combine isn't the > > only pass that uses simplify-rtx.c infrastructure heavily and when we lie > > in the RTL pattern, eventually something will be simplified wrongly. > > > > So, at least we'd need to use UNSPEC for the pattern, like (only lightly > > tested so far) below. > > I agree with the above. Patterns that consume Carry flag are now > marked with (plus (ltu (...)), but effectively, they behave like > unspecs. So, I see no problem to change all SBB and ADC to unspec at > once, similar to the change you proposed in the patch. So like this (addcarry/subborrow defered to a separate patch)? Or do you want to use UNSPEC even for the unsigned comparison case, i.e. from the patch remove the predicates.md/constraints.md part, sub3_carry_ccc{,_1} and anything related to that? As for addcarry/subborrow, the problem is that we expect in the pr67317* tests that combine is able to notice that the CF setter sets CF to unconditional 0 and matches the pattern. With the patch I wrote we end up with the combiner trying to match an insn where the CCC is set from a TImode comparison: (parallel [ (set (reg:CC 17 flags) (compare:CC (zero_extend:TI (plus:DI (reg/v:DI 92 [ a ]) (reg/v:DI 94 [ c ]))) (zero_extend:TI (reg/v:DI 94 [ c ] (set (reg:DI 98) (plus:DI (reg/v:DI 92 [ a ]) (reg/v:DI 94 [ c ]))) ]) So, either we need a define_insn_and_split pattern that would deal with that (for UNSPEC it would be the same thing, have a define_insn_and_split that would replace the (ltu...) with (const_int 0)), or perhaps be smarter during expansion, if we see the first argument is constant 0, expand it like a normal add instruction with CC setter. 2017-10-23 Jakub Jelinek PR target/82628 * config/i386/predicates.md (x86_64_dwzext_immediate_operand): New. * config/i386/constraints.md (Wf): New constraint. * config/i386/i386.md (UNSPEC_SBB): New unspec. (cmp_doubleword): Removed. (sub3_carry_ccc, *sub3_carry_ccc_1): New patterns. (sub3_carry_ccgz): Use unspec instead of compare. * config/i386/i386.c (ix86_expand_branch) : Don't expand with cmp_doubleword. For LTU and GEU use sub3_carry_ccc instead of sub3_carry_ccgz and use CCCmode. --- gcc/config/i386/predicates.md.jj2017-10-23 12:00:13.899355249 +0200 +++ gcc/config/i386/predicates.md 2017-10-23 12:52:20.696576114 +0200 @@ -366,6 +366,31 @@ (define_predicate "x86_64_hilo_int_opera } }) +;; Return true if VALUE is a constant integer whose value is +;; x86_64_immediate_operand value zero extended from word mode to mode. +(define_predicate "x86_64_dwzext_immediate_operand" + (match_code "const_int,const_wide_int") +{ + switch (GET_CODE (op)) +{ +case CONST_INT: + if (!TARGET_64BIT) + return UINTVAL (op) <= HOST_WIDE_INT_UC (0x); + return UINTVAL (op) <= HOST_WIDE_INT_UC (0x7fff); + +case CONST_WIDE_INT: + if (!TARGET_64BIT) + return false; + return (CONST_WIDE_INT_NUNITS (op) == 2 + && CONST_WIDE_INT_ELT (op, 1) == 0 + && (trunc_int_for_mode (CONST_WIDE_INT_ELT (op, 0), SImode) + == (HOST_WIDE_INT) CONST_WIDE_INT_ELT (op, 0))); + +default: + gcc_unreachable (); +} +}) + ;; Return true if size of VALUE can be stored in a sign ;; extended immediate field. (define_predicate "x86_64_immediate_size_operand" --- gcc/config/i386/constraints.md.jj 2017-10-23 12:00:13.850355874 +0200 +++ gcc/config/i386/constraints.md 2017-10-23 12:52:20.697576102 +0200 @@ -332,6 +332,11 @@ (define_constraint "Wd" of it satisfies the e constraint." (match_operand 0 "x86_64_hilo_int_operand")) +(define_constraint "Wf" + "32-bit signed integer constant zero extended from word size + to double word size." + (match_operand 0 "x86_64_dwzext_immediate_operand")) + (define_constraint "Z" "32-bit unsigned integer constant, or a symbolic reference known to fit that range (for immediate operands in zero-extending x86-64 --- gcc/config/i386/i386.md.jj 2017-10-23 12:51:19.350356044 +0200 +++ gcc/config/i386/i386.md 2017-10-23 12:52:20.701576051 +0200 @@ -112,6 +112,7 @@ (define_c_enu
[PATCH] PR libstdc++/82644 document IS 29124 support
Also fix declarations of special functions in C++17, to import them into the global namespace in , and to prevent defining the non-standard hypergeometric functions in strict mode. PR libstdc++/82644 * doc/xml/manual/intro.xml: Include new section. * doc/xml/manual/status_cxxis29124.xml: New section on IS 29124 status. * include/bits/specfun.h [__STRICT_ANSI__] (hyperg, hypergf, hypergl) (conf_hyperg, conf_hypergf, conf_hypergl): Don't declare. * include/c_compatibility/math.h: Import special functions into global namespace for C++17. * testsuite/26_numerics/headers/cmath/82644.cc: New test. * testsuite/26_numerics/headers/cmath/functions_global_c++17.cc: New test. Tested powerpc64le-linux, committed to trunk. Backports to follow. commit c755fb16187909923bcc6b7ecca318dfeecd2450 Author: Jonathan Wakely Date: Mon Oct 23 11:18:47 2017 +0100 PR libstdc++/82644 document IS 29124 support Also fix declarations of special functions in C++17, to import them into the global namespace in , and to prevent defining the non-standard hypergeometric functions in strict mode. PR libstdc++/82644 * doc/xml/manual/intro.xml: Include new section. * doc/xml/manual/status_cxxis29124.xml: New section on IS 29124 status. * include/bits/specfun.h [__STRICT_ANSI__] (hyperg, hypergf, hypergl) (conf_hyperg, conf_hypergf, conf_hypergl): Don't declare. * include/c_compatibility/math.h: Import special functions into global namespace for C++17. * testsuite/26_numerics/headers/cmath/82644.cc: New test. * testsuite/26_numerics/headers/cmath/functions_global_c++17.cc: New test. diff --git a/libstdc++-v3/doc/xml/manual/intro.xml b/libstdc++-v3/doc/xml/manual/intro.xml index 3b243e57c8b..2df9c5fa6a7 100644 --- a/libstdc++-v3/doc/xml/manual/intro.xml +++ b/libstdc++-v3/doc/xml/manual/intro.xml @@ -50,6 +50,10 @@ http://www.w3.org/2001/XInclude"; parse="xml" href="status_cxxtr24733.xml"> + + +http://www.w3.org/2001/XInclude"; parse="xml" href="status_cxxis29124.xml"> + diff --git a/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml b/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml new file mode 100644 index 000..40a90fc9944 --- /dev/null +++ b/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml @@ -0,0 +1,315 @@ +http://docbook.org/ns/docbook"; version="5.0" +xml:id="status.iso.specfun" xreflabel="Status C++ 29124"> + + +C++ IS 29124 + +ISO C++ +Special Functions + + + + +This table is based on the table of contents of ISO/IEC FDIS 29124 +Doc No: N3060 Date: 2010-03-06 +Extensions to the C++ Library to support mathematical special functions + + + +Complete support for IS 29124 is in GCC 6.1 and later releases, when using +at least C++11 (for older releases or C++98/C++03 use TR1 instead). +For C++11 and C++14 the additions to the library are not declared by their +respective headers unless __STDCPP_WANT_MATH_SPEC_FUNCS__ +is defined as a macro that expands to a non-zero integer constant. +For C++17 the special functions are always declared (since GCC 7.1). + + + +When the special functions are declared the macro +__STDCPP_MATH_SPEC_FUNCS__ is defined to 201003L. + + + +In addition to the special functions defined in IS 29124, for +non-strict modes (i.e. -std=gnu++NN modes) the +hypergeometric functions and confluent hypergeometric functions +from TR1 are also provided, defined in namespace +__gnu_cxx. + + + + + +C++ Special Functions Implementation Status + + + + + + + + + + Section + Description + Status + Comments + + + + + + 7 + Macro names + Partial + No diagnostic for inconsistent definitions of + __STDCPP_WANT_MATH_SPEC_FUNCS__ + + + 8 + Mathematical special functions + Y + + + + 8.1 + Additions to headersynopsis + Y + + + + 8.1.1 + associated Laguerre polynomials + Y + + + + 8.1.2 + associated Legendre functions + Y + + + + 8.1.3 + beta function + Y + + + + 8.1.4 + (complete) elliptic integral of the first kind + Y + + + + 8.1.5 + (complete) elliptic integral of the second kind + Y + + + + 8.1.6 + (complete) elliptic integral of the third kind + Y + + + + 8.1.7 + regular modified cylindrical Bessel functions + Y + + + + 8.1.8 + cylindrical Bessel functions (of the first kind) + Y + + + + 8.1.9 + irregular modified cylindrical Bessel functions + Y + + + + 8.1.10 + cylindrical Neumann functions + Y +
[00/nn] Patches preparing for runtime offsets and sizes
This series of patches adds or does things are needed for SVE runtime offsets and sizes, but aren't directly related to offsets and sizes themselves. It's a prerequisite to the main series that I'll post later today. Tested by compiling the testsuite before and after the series on: aarch64-linux-gnu aarch64_be-linux-gnu alpha-linux-gnu arc-elf arm-linux-gnueabi arm-linux-gnueabihf avr-elf bfin-elf c6x-elf cr16-elf cris-elf epiphany-elf fr30-elf frv-linux-gnu ft32-elf h8300-elf hppa64-hp-hpux11.23 ia64-linux-gnu i686-pc-linux-gnu i686-apple-darwin iq2000-elf lm32-elf m32c-elf m32r-elf m68k-linux-gnu mcore-elf microblaze-elf mipsel-linux-gnu mipsisa64-linux-gnu mmix mn10300-elf moxie-rtems msp430-elf nds32le-elf nios2-linux-gnu nvptx-none pdp11 powerpc-linux-gnuspe powerpc-eabispe powerpc64-linux-gnu powerpc64le-linux-gnu powerpc-ibm-aix7.0 riscv32-elf riscv64-elf rl78-elf rx-elf s390-linux-gnu s390x-linux-gnu sh-linux-gnu sparc-linux-gnu sparc64-linux-gnu sparc-wrs-vxworks spu-elf tilegx-elf tilepro-elf xstormy16-elf v850-elf vax-netbsdelf visium-elf x86_64-darwin x86_64-linux-gnu xtensa-elf There were no differences besides the ones described in the covering notes (except on powerpc-ibm-aix7.0, where symbol names aren't stable). Also tested normally on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. Thanks, Richard
[PATCH] Revert fix for PR81181
The fix was subsumed by that for PR82129. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2017-10-23 Richard Biener PR tree-optimization/82129 Revert 2017-08-01 Richard Biener PR tree-optimization/81181 * tree-ssa-pre.c (compute_antic_aux): Defer clean() to ... (compute_antic): ... end of iteration here. Index: gcc/tree-ssa-pre.c === --- gcc/tree-ssa-pre.c (revision 254004) +++ gcc/tree-ssa-pre.c (working copy) @@ -2082,8 +2082,7 @@ static sbitmap has_abnormal_preds; ANTIC_OUT[BLOCK] = phi_translate (ANTIC_IN[succ(BLOCK)]) ANTIC_IN[BLOCK] = clean(ANTIC_OUT[BLOCK] U EXP_GEN[BLOCK] - TMP_GEN[BLOCK]) - - Note that clean() is deferred until after the iteration. */ +*/ static bool compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) @@ -2219,8 +2218,7 @@ compute_antic_aux (basic_block block, bo bitmap_value_insert_into_set (ANTIC_IN (block), expression_for_id (bii)); - /* clean (ANTIC_IN (block)) is defered to after the iteration converged - because it can cause non-convergence, see for example PR81181. */ + clean (ANTIC_IN (block)); if (!bitmap_set_equal (old, ANTIC_IN (block))) changed = true; @@ -2453,12 +2451,6 @@ compute_antic (void) gcc_checking_assert (num_iterations < 500); } - /* We have to clean after the dataflow problem converged as cleaning - can cause non-convergence because it is based on expressions - rather than values. */ - FOR_EACH_BB_FN (block, cfun) -clean (ANTIC_IN (block)); - statistics_histogram_event (cfun, "compute_antic iterations", num_iterations);
[01/nn] Add gen_(const_)vec_duplicate helpers
This patch adds helper functions for generating constant and non-constant vector duplicates. These routines help with SVE because it is then easier to use: (const:M (vec_duplicate:M X)) for a broadcast of X, even if the number of elements in M isn't known at compile time. It also makes it easier for general rtx code to treat constant and non-constant duplicates in the same way. In the target code, the patch uses gen_vec_duplicate instead of gen_rtx_VEC_DUPLICATE if handling constants correctly is potentially useful. It might be that some or all of the call sites only handle non-constants in practice, in which case the change is a harmless no-op (and a saving of a few characters). Otherwise, the target changes use gen_const_vec_duplicate instead of gen_rtx_CONST_VECTOR if the constant is obviously a duplicate. They also include some changes to use CONSTxx_RTX for easy global constants. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * emit-rtl.h (gen_const_vec_duplicate): Declare. (gen_vec_duplicate): Likewise. * emit-rtl.c (gen_const_vec_duplicate_1): New function, split out from... (gen_const_vector): ...here. (gen_const_vec_duplicate, gen_vec_duplicate): New functions. (gen_rtx_CONST_VECTOR): Use gen_const_vec_duplicate for constants whose elements are all equal. * optabs.c (expand_vector_broadcast): Use gen_const_vec_duplicate. * simplify-rtx.c (simplify_const_unary_operation): Likewise. (simplify_relational_operation): Likewise. * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup): Likewise. (aarch64_simd_dup_constant): Use gen_vec_duplicate. (aarch64_expand_vector_init): Likewise. * config/arm/arm.c (neon_vdup_constant): Likewise. (neon_expand_vector_init): Likewise. (arm_expand_vec_perm): Use gen_const_vec_duplicate. (arm_block_set_unaligned_vect): Likewise. (arm_block_set_aligned_vect): Likewise. * config/arm/neon.md (neon_copysignf): Likewise. * config/i386/i386.c (ix86_expand_vec_perm): Likewise. (expand_vec_perm_even_odd_pack): Likewise. (ix86_vector_duplicate_value): Use gen_vec_duplicate. * config/i386/sse.md (one_cmpl2): Use CONSTM1_RTX. * config/ia64/ia64.c (ia64_expand_vecint_compare): Use gen_const_vec_duplicate. * config/ia64/vect.md (addv2sf3, subv2sf3): Use CONST1_RTX. * config/mips/mips.c (mips_gen_const_int_vector): Use gen_const_vec_duplicate. (mips_expand_vector_init): Use CONST0_RTX. * config/powerpcspe/altivec.md (abs2, nabs2): Likewise. (define_split): Use gen_const_vec_duplicate. * config/rs6000/altivec.md (abs2, nabs2): Use CONST0_RTX. (define_split): Use gen_const_vec_duplicate. * config/s390/vx-builtins.md (vec_genmask): Likewise. (vec_ctd_s64, vec_ctd_u64, vec_ctsl, vec_ctul): Likewise. * config/spu/spu.c (spu_const): Likewise. Index: gcc/emit-rtl.h === --- gcc/emit-rtl.h 2017-10-23 11:40:11.561479591 +0100 +++ gcc/emit-rtl.h 2017-10-23 11:41:32.369050264 +0100 @@ -438,6 +438,9 @@ get_max_uid (void) return crtl->emit.x_cur_insn_uid; } +extern rtx gen_const_vec_duplicate (machine_mode, rtx); +extern rtx gen_vec_duplicate (machine_mode, rtx); + extern void set_decl_incoming_rtl (tree, rtx, bool); /* Return a memory reference like MEMREF, but with its mode changed Index: gcc/emit-rtl.c === --- gcc/emit-rtl.c 2017-10-23 11:41:25.541909864 +0100 +++ gcc/emit-rtl.c 2017-10-23 11:41:32.369050264 +0100 @@ -5756,32 +5756,60 @@ init_emit (void) #endif } -/* Generate a vector constant for mode MODE and constant value CONSTANT. */ +/* Like gen_const_vec_duplicate, but ignore const_tiny_rtx. */ static rtx -gen_const_vector (machine_mode mode, int constant) +gen_const_vec_duplicate_1 (machine_mode mode, rtx el) { - rtx tem; - rtvec v; - int units, i; - machine_mode inner; + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits); + for (int i = 0; i < nunits; ++i) +RTVEC_ELT (v, i) = el; + return gen_rtx_raw_CONST_VECTOR (mode, v); +} - units = GET_MODE_NUNITS (mode); - inner = GET_MODE_INNER (mode); +/* Generate a vector constant of mode MODE in which every element has + value ELT. */ - gcc_assert (!DECIMAL_FLOAT_MODE_P (inner)); +rtx +gen_const_vec_duplicate (machine_mode mode, rtx elt) +{ + scalar_mode inner_mode = GET_MODE_INNER (mode); + if (elt == CONST0_RTX (inner_mode)) +return CONST0_RTX (mode); + else if (elt == CONST1_RTX (inner_mode)) +return CONST1_RTX (mode); + else if (elt == CONSTM1_RTX (inner_mode)) +return CONSTM1_RTX (mode); + + return gen_const_vec_duplicate_1 (mode, elt); +} - v = rt
[02/nn] Add more vec_duplicate simplifications
This patch adds a vec_duplicate_p helper that tests for constant or non-constant vector duplicates. Together with the existing const_vec_duplicate_p, this complements the gen_vec_duplicate and gen_const_vec_duplicate added by a previous patch. The patch uses the new routines to add more rtx simplifications involving vector duplicates. These mirror simplifications that we already do for CONST_VECTOR broadcasts and are needed for variable-length SVE, which uses: (const:M (vec_duplicate:M X)) to represent constant broadcasts instead. The simplifications do trigger on the testsuite for variable duplicates too, and in each case I saw the change was an improvement. E.g.: - Several targets had this simplification in gcc.dg/pr49948.c when compiled at -O3: -Failed to match this instruction: +Successfully matched this instruction: (set (reg:DI 88) -(subreg:DI (vec_duplicate:V2DI (reg/f:DI 75 [ _4 ])) 0)) +(reg/f:DI 75 [ _4 ])) On aarch64 this gives: ret .p2align 2 .L8: + adrpx1, b sub sp, sp, #80 - adrpx2, b - add x1, sp, 12 + add x2, sp, 12 str wzr, [x0, #:lo12:a] + str x2, [x1, #:lo12:b] mov w0, 0 - dup v0.2d, x1 - str d0, [x2, #:lo12:b] add sp, sp, 80 ret .size foo, .-foo On x86_64: jg .L2 leaq-76(%rsp), %rax movl$0, a(%rip) - movq%rax, -96(%rsp) - movq-96(%rsp), %xmm0 - punpcklqdq %xmm0, %xmm0 - movq%xmm0, b(%rip) + movq%rax, b(%rip) .L2: xorl%eax, %eax ret etc. - gcc.dg/torture/pr58018.c compiled at -O3 on aarch64 has an instance of: Trying 50, 52, 46 -> 53: Failed to match this instruction: (set (reg:V4SI 167) -(and:V4SI (and:V4SI (vec_duplicate:V4SI (reg:SI 132 [ _165 ])) -(reg:V4SI 209)) -(const_vector:V4SI [ -(const_int 1 [0x1]) -(const_int 1 [0x1]) -(const_int 1 [0x1]) -(const_int 1 [0x1]) -]))) +(and:V4SI (vec_duplicate:V4SI (reg:SI 132 [ _165 ])) +(reg:V4SI 209))) Successfully matched this instruction: (set (reg:V4SI 163 [ vect_patt_16.14 ]) (vec_duplicate:V4SI (reg:SI 132 [ _165 ]))) +Successfully matched this instruction: +(set (reg:V4SI 167) +(and:V4SI (reg:V4SI 163 [ vect_patt_16.14 ]) +(reg:V4SI 209))) where (reg:SI 132) is the result of a scalar comparison and so is known to be 0 or 1. This saves a MOVI and vector AND: cmp w7, 4 bls .L15 dup v1.4s, w2 - lsr w2, w1, 2 + dup v2.4s, w6 moviv3.4s, 0 - mov w0, 0 - moviv2.4s, 0x1 + lsr w2, w1, 2 mvniv0.4s, 0 + mov w0, 0 cmgev1.4s, v1.4s, v3.4s and v1.16b, v2.16b, v1.16b - dup v2.4s, w6 - and v1.16b, v1.16b, v2.16b .p2align 3 .L7: and v0.16b, v0.16b, v1.16b - powerpc64le has many instances of things like: -Failed to match this instruction: +Successfully matched this instruction: (set (reg:V4SI 161 [ vect_cst__24 ]) -(vec_select:V4SI (vec_duplicate:V4SI (vec_select:SI (reg:V4SI 143) -(parallel [ -(const_int 0 [0]) -]))) -(parallel [ -(const_int 2 [0x2]) -(const_int 3 [0x3]) -(const_int 0 [0]) -(const_int 1 [0x1]) -]))) +(vec_duplicate:V4SI (vec_select:SI (reg:V4SI 143) +(parallel [ +(const_int 0 [0]) +] This removes redundant XXPERMDIs from many tests. The best way of testing the new simplifications seemed to be via selftests. The patch cribs part of David's patch here: https://gcc.gnu.org/ml/gcc-patches/2016-07/msg00270.html . 2017-10-23 Richard Sandiford David Malcolm Alan Hayward David Sherwood gcc/ * rtl.h (vec_duplicate_p): New function. * selftest-rtl.c (assert_rtx_eq_at): New function. * selftest-rtl.h (ASSERT_RTX_EQ): New macro. (assert_rtx_eq_at): Declare. * selftest.h (selftest::simplify_rtx_c_tests): Declare. * selftest-run-tests.c (selftest::run_tests): Call it. * simplify-rtx.c: Include selftest.h and selftest-rtl.h. (simplify_unary_operation_1): Recursively handle vector duplicates. (simplify_binary_operation_1): Likewise. Handle VEC_SELECTs of vector dupl
[03/nn] Allow vector CONSTs
This patch allows (const ...) wrappers to be used for rtx vector constants, as an alternative to const_vector. This is useful for SVE, where the number of elements isn't known until runtime. It could also be useful in future for fixed-length vectors, to reduce the amount of memory needed to represent simple constants with high element counts. However, one nice thing about keeping it restricted to variable-length vectors is that there is never any need to handle combinations of (const ...) and CONST_VECTOR. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * doc/rtl.texi (const): Update description of address constants. Say that vector constants are allowed too. * common.md (E, F): Use CONSTANT_P instead of checking for CONST_VECTOR. * emit-rtl.c (gen_lowpart_common): Use const_vec_p instead of checking for CONST_VECTOR. * expmed.c (make_tree): Use build_vector_from_val for a CONST VEC_DUPLICATE. * expr.c (expand_expr_real_2): Check for vector modes instead of checking for CONST_VECTOR. * rtl.h (const_vec_p): New function. (const_vec_duplicate_p): Check for a CONST VEC_DUPLICATE. (unwrap_const_vec_duplicate): Handle them here too. Index: gcc/doc/rtl.texi === --- gcc/doc/rtl.texi2017-10-23 11:41:22.176892260 +0100 +++ gcc/doc/rtl.texi2017-10-23 11:41:39.185050437 +0100 @@ -1667,14 +1667,17 @@ Usually that is the only mode for which @findex const @item (const:@var{m} @var{exp}) -Represents a constant that is the result of an assembly-time -arithmetic computation. The operand, @var{exp}, is an expression that -contains only constants (@code{const_int}, @code{symbol_ref} and -@code{label_ref} expressions) combined with @code{plus} and -@code{minus}. However, not all combinations are valid, since the -assembler cannot do arbitrary arithmetic on relocatable symbols. +Wraps an rtx computation @var{exp} whose inputs and result do not +change during the execution of a thread. There are two valid uses. +The first is to represent a global or thread-local address calculation. +In this case @var{exp} should contain @code{const_int}, +@code{symbol_ref}, @code{label_ref} or @code{unspec} expressions, +combined with @code{plus} and @code{minus}. Any such @code{unspec}s +are target-specific and typically represent some form of relocation +operator. @var{m} should be a valid address mode. -@var{m} should be @code{Pmode}. +The second use of @code{const} is to wrap a vector operation. +In this case @var{exp} must be a @code{vec_duplicate} expression. @findex high @item (high:@var{m} @var{exp}) Index: gcc/common.md === --- gcc/common.md 2017-10-23 11:40:11.431285821 +0100 +++ gcc/common.md 2017-10-23 11:41:39.184050436 +0100 @@ -80,14 +80,14 @@ (define_constraint "n" (define_constraint "E" "Matches a floating-point constant." (ior (match_test "CONST_DOUBLE_AS_FLOAT_P (op)") - (match_test "GET_CODE (op) == CONST_VECTOR + (match_test "CONSTANT_P (op) && GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_FLOAT"))) ;; There is no longer a distinction between "E" and "F". (define_constraint "F" "Matches a floating-point constant." (ior (match_test "CONST_DOUBLE_AS_FLOAT_P (op)") - (match_test "GET_CODE (op) == CONST_VECTOR + (match_test "CONSTANT_P (op) && GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_FLOAT"))) (define_constraint "X" Index: gcc/emit-rtl.c === --- gcc/emit-rtl.c 2017-10-23 11:41:32.369050264 +0100 +++ gcc/emit-rtl.c 2017-10-23 11:41:39.186050437 +0100 @@ -1470,7 +1470,7 @@ gen_lowpart_common (machine_mode mode, r return gen_rtx_fmt_e (GET_CODE (x), int_mode, XEXP (x, 0)); } else if (GET_CODE (x) == SUBREG || REG_P (x) - || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR + || GET_CODE (x) == CONCAT || const_vec_p (x) || CONST_DOUBLE_AS_FLOAT_P (x) || CONST_SCALAR_INT_P (x)) return lowpart_subreg (mode, x, innermode); Index: gcc/expmed.c === --- gcc/expmed.c2017-10-23 11:41:25.541909864 +0100 +++ gcc/expmed.c2017-10-23 11:41:39.186050437 +0100 @@ -5246,7 +5246,15 @@ make_tree (tree type, rtx x) return fold_convert (type, make_tree (t, XEXP (x, 0))); case CONST: - return make_tree (type, XEXP (x, 0)); + { + rtx op = XEXP (x, 0); + if (GET_CODE (op) == VEC_DUPLICATE) + { + tree elt_tree = make_tree (TREE_TYPE (type), XEXP (op, 0)); + return build_vector_from_val (type, elt_tree); + } + return make_tree (type, op); + } case SYMBOL_REF:
[04/nn] Add a VEC_SERIES rtl code
This patch adds an rtl representation of a vector linear series of the form: a[I] = BASE + I * STEP Like vec_duplicate; - the new rtx can be used for both constant and non-constant vectors - when used for constant vectors it is wrapped in a (const ...) - the constant form is only used for variable-length vectors; fixed-length vectors still use CONST_VECTOR At the moment the code is restricted to integer elements, to avoid concerns over floating-point rounding. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * doc/rtl.texi (vec_series): Document. (const): Say that the operand can be a vec_series. * rtl.def (VEC_SERIES): New rtx code. * rtl.h (const_vec_series_p_1): Declare. (const_vec_series_p): New function. * emit-rtl.h (gen_const_vec_series): Declare. (gen_vec_series): Likewise. * emit-rtl.c (const_vec_series_p_1, gen_const_vec_series) (gen_vec_series): Likewise. * optabs.c (expand_mult_highpart): Use gen_const_vec_series. * simplify-rtx.c (simplify_unary_operation): Handle negations of vector series. (simplify_binary_operation_series): New function. (simplify_binary_operation_1): Use it. Handle VEC_SERIES. (test_vector_ops_series): New function. (test_vector_ops): Call it. * config/powerpcspe/altivec.md (altivec_lvsl): Use gen_const_vec_series. (altivec_lvsr): Likewise. * config/rs6000/altivec.md (altivec_lvsl, altivec_lvsr): Likewise. Index: gcc/doc/rtl.texi === --- gcc/doc/rtl.texi2017-10-23 11:41:39.185050437 +0100 +++ gcc/doc/rtl.texi2017-10-23 11:41:41.547050496 +0100 @@ -1677,7 +1677,8 @@ are target-specific and typically repres operator. @var{m} should be a valid address mode. The second use of @code{const} is to wrap a vector operation. -In this case @var{exp} must be a @code{vec_duplicate} expression. +In this case @var{exp} must be a @code{vec_duplicate} or +@code{vec_series} expression. @findex high @item (high:@var{m} @var{exp}) @@ -2722,6 +2723,10 @@ the same submodes as the input vector mo number of output parts must be an integer multiple of the number of input parts. +@findex vec_series +@item (vec_series:@var{m} @var{base} @var{step}) +This operation creates a vector in which element @var{i} is equal to +@samp{@var{base} + @var{i}*@var{step}}. @var{m} must be a vector integer mode. @end table @node Conversions Index: gcc/rtl.def === --- gcc/rtl.def 2017-10-23 11:40:11.378243915 +0100 +++ gcc/rtl.def 2017-10-23 11:41:41.549050496 +0100 @@ -710,6 +710,11 @@ DEF_RTL_EXPR(VEC_CONCAT, "vec_concat", " an integer multiple of the number of input parts. */ DEF_RTL_EXPR(VEC_DUPLICATE, "vec_duplicate", "e", RTX_UNARY) +/* Creation of a vector in which element I has the value BASE + I * STEP, + where BASE is the first operand and STEP is the second. The result + must have a vector integer mode. */ +DEF_RTL_EXPR(VEC_SERIES, "vec_series", "ee", RTX_BIN_ARITH) + /* Addition with signed saturation */ DEF_RTL_EXPR(SS_PLUS, "ss_plus", "ee", RTX_COMM_ARITH) Index: gcc/rtl.h === --- gcc/rtl.h 2017-10-23 11:41:39.188050437 +0100 +++ gcc/rtl.h 2017-10-23 11:41:41.549050496 +0100 @@ -2816,6 +2816,51 @@ unwrap_const_vec_duplicate (T x) return x; } +/* In emit-rtl.c. */ +extern bool const_vec_series_p_1 (const_rtx, rtx *, rtx *); + +/* Return true if X is a constant vector that contains a linear series + of the form: + + { B, B + S, B + 2 * S, B + 3 * S, ... } + + for a nonzero S. Store B and S in *BASE_OUT and *STEP_OUT on sucess. */ + +inline bool +const_vec_series_p (const_rtx x, rtx *base_out, rtx *step_out) +{ + if (GET_CODE (x) == CONST_VECTOR + && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT) +return const_vec_series_p_1 (x, base_out, step_out); + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == VEC_SERIES) +{ + *base_out = XEXP (XEXP (x, 0), 0); + *step_out = XEXP (XEXP (x, 0), 1); + return true; +} + return false; +} + +/* Return true if X is a vector that contains a linear series of the + form: + + { B, B + S, B + 2 * S, B + 3 * S, ... } + + where B and S are constant or nonconstant. Store B and S in + *BASE_OUT and *STEP_OUT on sucess. */ + +inline bool +vec_series_p (const_rtx x, rtx *base_out, rtx *step_out) +{ + if (GET_CODE (x) == VEC_SERIES) +{ + *base_out = XEXP (x, 0); + *step_out = XEXP (x, 1); + return true; +} + return const_vec_series_p (x, base_out, step_out); +} + /* Return the unpromoted (outer) mode of SUBREG_PROMOTED_VAR_P subreg X. */ inline scalar_int_mode Index: gcc/emit-rtl.h ==
[05/nn] Add VEC_DUPLICATE_{CST,EXPR} and associated optab
SVE needs a way of broadcasting a scalar to a variable-length vector. This patch adds VEC_DUPLICATE_CST for when VECTOR_CST would be used for fixed-length vectors and VEC_DUPLICATE_EXPR for when CONSTRUCTOR would be used for fixed-length vectors. VEC_DUPLICATE_EXPR is the tree equivalent of the existing rtl code VEC_DUPLICATE. Originally we had a single VEC_DUPLICATE_EXPR and used TREE_CONSTANT to mark constant nodes, but in response to last year's RFC, Richard B. suggested it would be better to have separate codes for the constant and non-constant cases. This allows VEC_DUPLICATE_EXPR to be treated as a normal unary operation and avoids the previous need for treating it as a GIMPLE_SINGLE_RHS. It might make sense to use VEC_DUPLICATE_CST for all duplicated vector constants, since it's a bit more compact than VECTOR_CST in that case, and is potentially more efficient to process. However, the nice thing about keeping it restricted to variable-length vectors is that there is then no need to handle combinations of VECTOR_CST and VEC_DUPLICATE_CST; a vector type will always use VECTOR_CST or never use it. The patch also adds a vec_duplicate_optab to go with VEC_DUPLICATE_EXPR. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * doc/generic.texi (VEC_DUPLICATE_CST, VEC_DUPLICATE_EXPR): Document. (VEC_COND_EXPR): Add missing @tindex. * doc/md.texi (vec_duplicate@var{m}): Document. * tree.def (VEC_DUPLICATE_CST, VEC_DUPLICATE_EXPR): New tree codes. * tree-core.h (tree_base): Document that u.nelts and TREE_OVERFLOW are used for VEC_DUPLICATE_CST as well. (tree_vector): Access base.n.nelts directly. * tree.h (TREE_OVERFLOW): Add VEC_DUPLICATE_CST to the list of valid codes. (VEC_DUPLICATE_CST_ELT): New macro. (build_vec_duplicate_cst): Declare. * tree.c (tree_node_structure_for_code, tree_code_size, tree_size) (integer_zerop, integer_onep, integer_all_onesp, integer_truep) (real_zerop, real_onep, real_minus_onep, add_expr, initializer_zerop) (walk_tree_1, drop_tree_overflow): Handle VEC_DUPLICATE_CST. (build_vec_duplicate_cst): New function. (uniform_vector_p): Handle the new codes. (test_vec_duplicate_predicates_int): New function. (test_vec_duplicate_predicates_float): Likewise. (test_vec_duplicate_predicates): Likewise. (tree_c_tests): Call test_vec_duplicate_predicates. * cfgexpand.c (expand_debug_expr): Handle the new codes. * tree-pretty-print.c (dump_generic_node): Likewise. * dwarf2out.c (rtl_for_decl_init): Handle VEC_DUPLICATE_CST. * gimple-expr.h (is_gimple_constant): Likewise. * gimplify.c (gimplify_expr): Likewise. * graphite-isl-ast-to-gimple.c (translate_isl_ast_to_gimple::is_constant): Likewise. * graphite-scop-detection.c (scan_tree_for_params): Likewise. * ipa-icf-gimple.c (func_checker::compare_cst_or_decl): Likewise. (func_checker::compare_operand): Likewise. * ipa-icf.c (sem_item::add_expr, sem_variable::equals): Likewise. * match.pd (negate_expr_p): Likewise. * print-tree.c (print_node): Likewise. * tree-chkp.c (chkp_find_bounds_1): Likewise. * tree-loop-distribution.c (const_with_all_bytes_same): Likewise. * tree-ssa-loop.c (for_each_index): Likewise. * tree-ssa-pre.c (create_component_ref_by_pieces_1): Likewise. * tree-ssa-sccvn.c (copy_reference_ops_from_ref): Likewise. (ao_ref_init_from_vn_reference): Likewise. * tree-vect-generic.c (ssa_uniform_vector_p): Likewise. * varasm.c (const_hash_1, compare_constant): Likewise. * fold-const.c (negate_expr_p, fold_negate_expr_1, const_binop) (fold_convert_const, operand_equal_p, fold_view_convert_expr) (exact_inverse, fold_checksum_tree): Likewise. (const_unop): Likewise. Fold VEC_DUPLICATE_EXPRs of a constant. (test_vec_duplicate_folding): New function. (fold_const_c_tests): Call it. * optabs.def (vec_duplicate_optab): New optab. * optabs-tree.c (optab_for_tree_code): Handle VEC_DUPLICATE_EXPR. * optabs.h (expand_vector_broadcast): Declare. * optabs.c (expand_vector_broadcast): Make non-static. Try using vec_duplicate_optab. * expr.c (store_constructor): Try using vec_duplicate_optab for uniform vectors. (const_vector_element): New function, split out from... (const_vector_from_tree): ...here. (expand_expr_real_2): Handle VEC_DUPLICATE_EXPR. (expand_expr_real_1): Handle VEC_DUPLICATE_CST. * internal-fn.c (expand_vector_ubsan_overflow): Use CONSTANT_P instead of checking for VECTOR_CST. * tree-cfg.c (verify_gimple_assign_unary): Handle VEC_DUPLICATE_EXPR. (verify_gimple_assign_single): Handle VEC_DUPLI
[06/nn] Add VEC_SERIES_{CST,EXPR} and associated optab
Similarly to the VEC_DUPLICATE_{CST,EXPR}, this patch adds two tree code equivalents of the VEC_SERIES rtx code. VEC_SERIES_EXPR is for non-constant inputs and is a normal tcc_binary. VEC_SERIES_CST is a tcc_constant. Like VEC_DUPLICATE_CST, VEC_SERIES_CST is only used for variable-length vectors. This avoids the need to handle combinations of VECTOR_CST and VEC_SERIES_CST. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * doc/generic.texi (VEC_SERIES_CST, VEC_SERIES_EXPR): Document. * doc/md.texi (vec_series@var{m}): Document. * tree.def (VEC_SERIES_CST, VEC_SERIES_EXPR): New tree codes. * tree.h (TREE_OVERFLOW): Add VEC_SERIES_CST to the list of valid codes. (VEC_SERIES_CST_BASE, VEC_SERIES_CST_STEP): New macros. (build_vec_series_cst, build_vec_series): Declare. * tree.c (tree_node_structure_for_code, tree_code_size, tree_size) (add_expr, walk_tree_1, drop_tree_overflow): Handle VEC_SERIES_CST. (build_vec_series_cst, build_vec_series): New functions. * cfgexpand.c (expand_debug_expr): Handle the new codes. * tree-pretty-print.c (dump_generic_node): Likewise. * dwarf2out.c (rtl_for_decl_init): Handle VEC_SERIES_CST. * gimple-expr.h (is_gimple_constant): Likewise. * gimplify.c (gimplify_expr): Likewise. * graphite-scop-detection.c (scan_tree_for_params): Likewise. * ipa-icf-gimple.c (func_checker::compare_cst_or_decl): Likewise. (func_checker::compare_operand): Likewise. * ipa-icf.c (sem_item::add_expr, sem_variable::equals): Likewise. * print-tree.c (print_node): Likewise. * tree-ssa-loop.c (for_each_index): Likewise. * tree-ssa-pre.c (create_component_ref_by_pieces_1): Likewise. * tree-ssa-sccvn.c (copy_reference_ops_from_ref): Likewise. (ao_ref_init_from_vn_reference): Likewise. * varasm.c (const_hash_1, compare_constant): Likewise. * fold-const.c (negate_expr_p, fold_negate_expr_1, operand_equal_p) (fold_checksum_tree): Likewise. (vec_series_equivalent_p): New function. (const_binop): Use it. Fold VEC_SERIES_EXPRs of constants. * expmed.c (make_tree): Handle VEC_SERIES. * gimple-pretty-print.c (dump_binary_rhs): Likewise. * tree-inline.c (estimate_operator_cost): Likewise. * expr.c (const_vector_element): Include VEC_SERIES_CST in comment. (expand_expr_real_2): Handle VEC_SERIES_EXPR. (expand_expr_real_1): Handle VEC_SERIES_CST. * optabs.def (vec_series_optab): New optab. * optabs.h (expand_vec_series_expr): Declare. * optabs.c (expand_vec_series_expr): New function. * optabs-tree.c (optab_for_tree_code): Handle VEC_SERIES_EXPR. * tree-cfg.c (verify_gimple_assign_binary): Handle VEC_SERIES_EXPR. (verify_gimple_assign_single): Handle VEC_SERIES_CST. * tree-vect-generic.c (expand_vector_operations_1): Check that the operands also have vector type. Index: gcc/doc/generic.texi === --- gcc/doc/generic.texi2017-10-23 11:41:51.760448406 +0100 +++ gcc/doc/generic.texi2017-10-23 11:42:34.910720660 +0100 @@ -1037,6 +1037,7 @@ As this example indicates, the operands @tindex COMPLEX_CST @tindex VECTOR_CST @tindex VEC_DUPLICATE_CST +@tindex VEC_SERIES_CST @tindex STRING_CST @findex TREE_STRING_LENGTH @findex TREE_STRING_POINTER @@ -1098,6 +1099,16 @@ instead. The scalar element value is gi @code{VEC_DUPLICATE_CST_ELT} and has the same restrictions as the element of a @code{VECTOR_CST}. +@item VEC_SERIES_CST +These nodes represent a vector constant in which element @var{i} +has the value @samp{@var{base} + @var{i} * @var{step}}, for some +constant @var{base} and @var{step}. The value of @var{base} is +given by @code{VEC_SERIES_CST_BASE} and the value of @var{step} is +given by @code{VEC_SERIES_CST_STEP}. + +These nodes are restricted to integral types, in order to avoid +specifying the rounding behavior for floating-point types. + @item STRING_CST These nodes represent string-constants. The @code{TREE_STRING_LENGTH} returns the length of the string, as an @code{int}. The @@ -1702,6 +1713,7 @@ a value from @code{enum annot_expr_kind} @node Vectors @subsection Vectors @tindex VEC_DUPLICATE_EXPR +@tindex VEC_SERIES_EXPR @tindex VEC_LSHIFT_EXPR @tindex VEC_RSHIFT_EXPR @tindex VEC_WIDEN_MULT_HI_EXPR @@ -1721,6 +1733,14 @@ a value from @code{enum annot_expr_kind} This node has a single operand and represents a vector in which every element is equal to that operand. +@item VEC_SERIES_EXPR +This node represents a vector formed from a scalar base and step, +given as the first and second operands respectively. Element @var{i} +of the result is equal to @samp{@var{base} + @var{i}*@var{step}}. + +This node is restricted to integra
[08/nn] Add a fixed_size_mode class
This patch adds a fixed_size_mode machine_mode wrapper for modes that are known to have a fixed size. That applies to all current modes, but future patches will add support for variable-sized modes. The use of this class should be pretty restricted. One important use case is to hold the mode of static data, which can never be variable-sized with current file formats. Another is to hold the modes of registers involved in __builtin_apply and __builtin_result, since those interfaces don't cope well with variable-sized data. The class can also be useful when reinterpreting the contents of a fixed-length bit string as a different kind of value. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * machmode.h (fixed_size_mode): New class. * rtl.h (get_pool_mode): Return fixed_size_mode. * gengtype.c (main): Add fixed_size_mode. * target.def (get_raw_result_mode): Return a fixed_size_mode. (get_raw_arg_mode): Likewise. * doc/tm.texi: Regenerate. * targhooks.h (default_get_reg_raw_mode): Return a fixed_size_mode. * targhooks.c (default_get_reg_raw_mode): Likewise. * config/ia64/ia64.c (ia64_get_reg_raw_mode): Likewise. * config/mips/mips.c (mips_get_reg_raw_mode): Likewise. * config/msp430/msp430.c (msp430_get_raw_arg_mode): Likewise. (msp430_get_raw_result_mode): Likewise. * config/avr/avr-protos.h (regmask): Use as_a * dbxout.c (dbxout_parms): Require fixed-size modes. * expr.c (copy_blkmode_from_reg, copy_blkmode_to_reg): Likewise. * gimple-ssa-store-merging.c (encode_tree_to_bitpos): Likewise. * omp-low.c (lower_oacc_reductions): Likewise. * simplify-rtx.c (simplify_immed_subreg): Take fixed_size_modes. (simplify_subreg): Update accordingly. * varasm.c (constant_descriptor_rtx::mode): Change to fixed_size_mode. (force_const_mem): Update accordingly. Return NULL_RTX for modes that aren't fixed-size. (get_pool_mode): Return a fixed_size_mode. (output_constant_pool_2): Take a fixed_size_mode. Index: gcc/machmode.h === --- gcc/machmode.h 2017-09-15 14:47:33.184331588 +0100 +++ gcc/machmode.h 2017-10-23 11:42:52.014721093 +0100 @@ -652,6 +652,39 @@ GET_MODE_2XWIDER_MODE (const T &m) extern const unsigned char mode_complex[NUM_MACHINE_MODES]; #define GET_MODE_COMPLEX_MODE(MODE) ((machine_mode) mode_complex[MODE]) +/* Represents a machine mode that must have a fixed size. The main + use of this class is to represent the modes of objects that always + have static storage duration, such as constant pool entries. + (No current target supports the concept of variable-size static data.) */ +class fixed_size_mode +{ +public: + typedef mode_traits::from_int from_int; + + ALWAYS_INLINE fixed_size_mode () {} + ALWAYS_INLINE fixed_size_mode (from_int m) : m_mode (machine_mode (m)) {} + ALWAYS_INLINE fixed_size_mode (const scalar_mode &m) : m_mode (m) {} + ALWAYS_INLINE fixed_size_mode (const scalar_int_mode &m) : m_mode (m) {} + ALWAYS_INLINE fixed_size_mode (const scalar_float_mode &m) : m_mode (m) {} + ALWAYS_INLINE fixed_size_mode (const scalar_mode_pod &m) : m_mode (m) {} + ALWAYS_INLINE fixed_size_mode (const scalar_int_mode_pod &m) : m_mode (m) {} + ALWAYS_INLINE fixed_size_mode (const complex_mode &m) : m_mode (m) {} + ALWAYS_INLINE operator machine_mode () const { return m_mode; } + + static bool includes_p (machine_mode); + +protected: + machine_mode m_mode; +}; + +/* Return true if MODE has a fixed size. */ + +inline bool +fixed_size_mode::includes_p (machine_mode) +{ + return true; +} + extern opt_machine_mode mode_for_size (unsigned int, enum mode_class, int); /* Return the machine mode to use for a MODE_INT of SIZE bits, if one Index: gcc/rtl.h === --- gcc/rtl.h 2017-10-23 11:42:47.297720974 +0100 +++ gcc/rtl.h 2017-10-23 11:42:52.015721094 +0100 @@ -3020,7 +3020,7 @@ extern rtx force_const_mem (machine_mode struct function; extern rtx get_pool_constant (const_rtx); extern rtx get_pool_constant_mark (rtx, bool *); -extern machine_mode get_pool_mode (const_rtx); +extern fixed_size_mode get_pool_mode (const_rtx); extern rtx simplify_subtraction (rtx); extern void decide_function_section (tree); Index: gcc/gengtype.c === --- gcc/gengtype.c 2017-05-23 19:29:56.919436344 +0100 +++ gcc/gengtype.c 2017-10-23 11:42:52.014721093 +0100 @@ -5197,6 +5197,7 @@ #define POS_HERE(Call) do { pos.file = t POS_HERE (do_scalar_typedef ("JCF_u2", &pos)); POS_HERE (do_scalar_typedef ("void", &pos)); POS_HERE (do_scalar_typedef ("machine_mode", &pos)); + POS_HERE (do_scalar_typedef ("fixed_size_mode", &pos)); POS_HER
[07/nn] Add unique CONSTs
This patch adds a way of treating certain kinds of CONST as unique, so that pointer equality is equivalent to value equality. For now it is restricted to VEC_DUPLICATE and VEC_SERIES, although the code to generate them remains in the else arm of an "if (1)" until a later patch. This is needed so that (const (vec_duplicate xx)) can used as the CONSTxx_RTX of a variable-length vector. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * rtl.h (unique_const_p): New function. (gen_rtx_CONST): Declare. * emit-rtl.c (const_hasher): New struct. (const_htab): New variable. (init_emit_once): Initialize it. (const_hasher::hash, const_hasher::equal): New functions. (gen_rtx_CONST): New function. (spare_vec_duplicate, spare_vec_series): New variables. (gen_const_vec_duplicate_1): Add code for use (const (vec_duplicate)), but disable it for now. (gen_const_vec_series): Likewise (const (vec_series)). * gengenrtl.c (special_rtx): Return true for CONST. * rtl.c (shared_const_p): Return true if unique_const_p. Index: gcc/rtl.h === --- gcc/rtl.h 2017-10-23 11:41:41.549050496 +0100 +++ gcc/rtl.h 2017-10-23 11:42:47.297720974 +0100 @@ -2861,6 +2861,23 @@ vec_series_p (const_rtx x, rtx *base_out return const_vec_series_p (x, base_out, step_out); } +/* Return true if there should only ever be one instance of (const X), + so that constants of this type can be compared using pointer equality. */ + +inline bool +unique_const_p (const_rtx x) +{ + switch (GET_CODE (x)) +{ +case VEC_DUPLICATE: +case VEC_SERIES: + return true; + +default: + return false; +} +} + /* Return the unpromoted (outer) mode of SUBREG_PROMOTED_VAR_P subreg X. */ inline scalar_int_mode @@ -3542,6 +3559,7 @@ extern rtx_insn_list *gen_rtx_INSN_LIST gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn, basic_block bb, rtx pattern, int location, int code, rtx reg_notes); +extern rtx gen_rtx_CONST (machine_mode, rtx); extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); extern void set_mode_and_regno (rtx, machine_mode, unsigned int); Index: gcc/emit-rtl.c === --- gcc/emit-rtl.c 2017-10-23 11:41:41.548050496 +0100 +++ gcc/emit-rtl.c 2017-10-23 11:42:47.296720974 +0100 @@ -175,6 +175,15 @@ struct const_fixed_hasher : ggc_cache_pt static GTY ((cache)) hash_table *const_fixed_htab; +/* A hash table storing unique CONSTs. */ +struct const_hasher : ggc_cache_ptr_hash +{ + static hashval_t hash (rtx x); + static bool equal (rtx x, rtx y); +}; + +static GTY ((cache)) hash_table *const_htab; + #define cur_insn_uid (crtl->emit.x_cur_insn_uid) #define cur_debug_insn_uid (crtl->emit.x_cur_debug_insn_uid) #define first_label_num (crtl->emit.x_first_label_num) @@ -310,6 +319,28 @@ const_fixed_hasher::equal (rtx x, rtx y) return fixed_identical (CONST_FIXED_VALUE (a), CONST_FIXED_VALUE (b)); } +/* Returns a hash code for X (which is either an existing unique CONST + or an operand to gen_rtx_CONST). */ + +hashval_t +const_hasher::hash (rtx x) +{ + if (GET_CODE (x) == CONST) +x = XEXP (x, 0); + + int do_not_record_p = 0; + return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false); +} + +/* Returns true if the operand of unique CONST X is equal to Y. */ + +bool +const_hasher::equal (rtx x, rtx y) +{ + gcc_checking_assert (GET_CODE (x) == CONST); + return rtx_equal_p (XEXP (x, 0), y); +} + /* Return true if the given memory attributes are equal. */ bool @@ -5756,16 +5787,55 @@ init_emit (void) #endif } +rtx +gen_rtx_CONST (machine_mode mode, rtx val) +{ + if (unique_const_p (val)) +{ + /* Look up the CONST in the hash table. */ + rtx *slot = const_htab->find_slot (val, INSERT); + if (*slot == 0) + *slot = gen_rtx_raw_CONST (mode, val); + return *slot; +} + + return gen_rtx_raw_CONST (mode, val); +} + +/* Temporary rtx used by gen_const_vec_duplicate_1. */ +static GTY((deletable)) rtx spare_vec_duplicate; + /* Like gen_const_vec_duplicate, but ignore const_tiny_rtx. */ static rtx gen_const_vec_duplicate_1 (machine_mode mode, rtx el) { int nunits = GET_MODE_NUNITS (mode); - rtvec v = rtvec_alloc (nunits); - for (int i = 0; i < nunits; ++i) -RTVEC_ELT (v, i) = el; - return gen_rtx_raw_CONST_VECTOR (mode, v); + if (1) +{ + rtvec v = rtvec_alloc (nunits); + + for (int i = 0; i < nunits; ++i) + RTVEC_ELT (v, i) = el; + + return gen_rtx_raw_CONST_VECTOR (mode, v); +} + else +{ + if (spare_vec_duplicate) + { + PUT_MODE (spare_vec_duplicate, mode); + XEXP (spare_vec_duplicate, 0) = el; +
Some PRE TLC
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2017-10-23 Richard Biener * tree-ssa-pre.c (bitmap_remove_from_set): Rename to... (bitmap_remove_expr_from_set): ... this. All callers call this for non-constant values. (bitmap_set_subtract): Rename to... (bitmap_set_subtract_expressions): ... this. Adjust and optimize. (bitmap_set_contains_value): Remove superfluous check. (bitmap_set_replace_value): Inline into single caller ... (bitmap_value_replace_in_set): ... here and simplify. (dependent_clean): Merge into ... (clean): ... this using an overload. Adjust. (prune_clobbered_mems): Adjust. (compute_antic_aux): Likewise. (compute_partial_antic_aux): Likewise. Index: gcc/tree-ssa-pre.c === --- gcc/tree-ssa-pre.c (revision 253998) +++ gcc/tree-ssa-pre.c (working copy) @@ -719,14 +719,11 @@ sccvn_valnum_from_value_id (unsigned int /* Remove an expression EXPR from a bitmapped set. */ static void -bitmap_remove_from_set (bitmap_set_t set, pre_expr expr) +bitmap_remove_expr_from_set (bitmap_set_t set, pre_expr expr) { unsigned int val = get_expr_value_id (expr); - if (!value_id_constant_p (val)) -{ - bitmap_clear_bit (&set->values, val); - bitmap_clear_bit (&set->expressions, get_expression_id (expr)); -} + bitmap_clear_bit (&set->values, val); + bitmap_clear_bit (&set->expressions, get_expression_id (expr)); } /* Insert an expression EXPR into a bitmapped set. */ @@ -802,7 +799,7 @@ sorted_array_from_bitmap_set (bitmap_set /* Subtract all expressions contained in ORIG from DEST. */ static bitmap_set_t -bitmap_set_subtract (bitmap_set_t dest, bitmap_set_t orig) +bitmap_set_subtract_expressions (bitmap_set_t dest, bitmap_set_t orig) { bitmap_set_t result = bitmap_set_new (); bitmap_iterator bi; @@ -833,15 +830,15 @@ bitmap_set_subtract_values (bitmap_set_t { if (to_remove) { - bitmap_remove_from_set (a, to_remove); + bitmap_remove_expr_from_set (a, to_remove); to_remove = NULL; } pre_expr expr = expression_for_id (i); - if (bitmap_set_contains_value (b, get_expr_value_id (expr))) + if (bitmap_bit_p (&b->values, get_expr_value_id (expr))) to_remove = expr; } if (to_remove) -bitmap_remove_from_set (a, to_remove); +bitmap_remove_expr_from_set (a, to_remove); } @@ -853,9 +850,6 @@ bitmap_set_contains_value (bitmap_set_t if (value_id_constant_p (value_id)) return true; - if (!set || bitmap_empty_p (&set->expressions)) -return false; - return bitmap_bit_p (&set->values, value_id); } @@ -865,44 +859,6 @@ bitmap_set_contains_expr (bitmap_set_t s return bitmap_bit_p (&set->expressions, get_expression_id (expr)); } -/* Replace an instance of value LOOKFOR with expression EXPR in SET. */ - -static void -bitmap_set_replace_value (bitmap_set_t set, unsigned int lookfor, - const pre_expr expr) -{ - bitmap exprset; - unsigned int i; - bitmap_iterator bi; - - if (value_id_constant_p (lookfor)) -return; - - if (!bitmap_set_contains_value (set, lookfor)) -return; - - /* The number of expressions having a given value is usually - significantly less than the total number of expressions in SET. - Thus, rather than check, for each expression in SET, whether it - has the value LOOKFOR, we walk the reverse mapping that tells us - what expressions have a given value, and see if any of those - expressions are in our set. For large testcases, this is about - 5-10x faster than walking the bitmap. If this is somehow a - significant lose for some cases, we can choose which set to walk - based on the set size. */ - exprset = value_expressions[lookfor]; - EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi) -{ - if (bitmap_clear_bit (&set->expressions, i)) - { - bitmap_set_bit (&set->expressions, get_expression_id (expr)); - return; - } -} - - gcc_unreachable (); -} - /* Return true if two bitmap sets are equal. */ static bool @@ -918,9 +874,33 @@ static void bitmap_value_replace_in_set (bitmap_set_t set, pre_expr expr) { unsigned int val = get_expr_value_id (expr); + if (value_id_constant_p (val)) +return; if (bitmap_set_contains_value (set, val)) -bitmap_set_replace_value (set, val, expr); +{ + /* The number of expressions having a given value is usually +significantly less than the total number of expressions in SET. +Thus, rather than check, for each expression in SET, whether it +has the value LOOKFOR, we walk the reverse mapping that tells us +what expressions have a given value, and see if any of those +expressions are in our set. For large testcases, this is about +5
[09/nn] Add a fixed_size_mode_pod class
This patch adds a POD version of fixed_size_mode. The only current use is for storing the __builtin_apply and __builtin_result register modes, which were made fixed_size_modes by the previous patch. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * coretypes.h (fixed_size_mode): Declare. (fixed_size_mode_pod): New typedef. * builtins.h (target_builtins::x_apply_args_mode) (target_builtins::x_apply_result_mode): Change type to fixed_size_mode_pod. * builtins.c (apply_args_size, apply_result_size, result_vector) (expand_builtin_apply_args_1, expand_builtin_apply) (expand_builtin_return): Update accordingly. Index: gcc/coretypes.h === --- gcc/coretypes.h 2017-09-11 17:10:58.656085547 +0100 +++ gcc/coretypes.h 2017-10-23 11:42:57.592545063 +0100 @@ -59,6 +59,7 @@ typedef const struct rtx_def *const_rtx; class scalar_int_mode; class scalar_float_mode; class complex_mode; +class fixed_size_mode; template class opt_mode; typedef opt_mode opt_scalar_mode; typedef opt_mode opt_scalar_int_mode; @@ -66,6 +67,7 @@ typedef opt_mode opt_ template class pod_mode; typedef pod_mode scalar_mode_pod; typedef pod_mode scalar_int_mode_pod; +typedef pod_mode fixed_size_mode_pod; /* Subclasses of rtx_def, using indentation to show the class hierarchy, along with the relevant invariant. Index: gcc/builtins.h === --- gcc/builtins.h 2017-08-30 12:18:46.602740973 +0100 +++ gcc/builtins.h 2017-10-23 11:42:57.592545063 +0100 @@ -29,14 +29,14 @@ struct target_builtins { the register is not used for calling a function. If the machine has register windows, this gives only the outbound registers. INCOMING_REGNO gives the corresponding inbound register. */ - machine_mode x_apply_args_mode[FIRST_PSEUDO_REGISTER]; + fixed_size_mode_pod x_apply_args_mode[FIRST_PSEUDO_REGISTER]; /* For each register that may be used for returning values, this gives a mode used to copy the register's value. VOIDmode indicates the register is not used for returning values. If the machine has register windows, this gives only the outbound registers. INCOMING_REGNO gives the corresponding inbound register. */ - machine_mode x_apply_result_mode[FIRST_PSEUDO_REGISTER]; + fixed_size_mode_pod x_apply_result_mode[FIRST_PSEUDO_REGISTER]; }; extern struct target_builtins default_target_builtins; Index: gcc/builtins.c === --- gcc/builtins.c 2017-10-23 11:41:23.140260335 +0100 +++ gcc/builtins.c 2017-10-23 11:42:57.592545063 +0100 @@ -1358,7 +1358,6 @@ apply_args_size (void) static int size = -1; int align; unsigned int regno; - machine_mode mode; /* The values computed by this function never change. */ if (size < 0) @@ -1374,7 +1373,7 @@ apply_args_size (void) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (FUNCTION_ARG_REGNO_P (regno)) { - mode = targetm.calls.get_raw_arg_mode (regno); + fixed_size_mode mode = targetm.calls.get_raw_arg_mode (regno); gcc_assert (mode != VOIDmode); @@ -1386,7 +1385,7 @@ apply_args_size (void) } else { - apply_args_mode[regno] = VOIDmode; + apply_args_mode[regno] = as_a (VOIDmode); } } return size; @@ -1400,7 +1399,6 @@ apply_result_size (void) { static int size = -1; int align, regno; - machine_mode mode; /* The values computed by this function never change. */ if (size < 0) @@ -1410,7 +1408,7 @@ apply_result_size (void) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (targetm.calls.function_value_regno_p (regno)) { - mode = targetm.calls.get_raw_result_mode (regno); + fixed_size_mode mode = targetm.calls.get_raw_result_mode (regno); gcc_assert (mode != VOIDmode); @@ -1421,7 +1419,7 @@ apply_result_size (void) apply_result_mode[regno] = mode; } else - apply_result_mode[regno] = VOIDmode; + apply_result_mode[regno] = as_a (VOIDmode); /* Allow targets that use untyped_call and untyped_return to override the size so that machine-specific information can be stored here. */ @@ -1440,7 +1438,7 @@ apply_result_size (void) result_vector (int savep, rtx result) { int regno, size, align, nelts; - machine_mode mode; + fixed_size_mode mode; rtx reg, mem; rtx *savevec = XALLOCAVEC (rtx, FIRST_PSEUDO_REGISTER); @@ -1469,7 +1467,7 @@ expand_builtin_apply_args_1 (void) { rtx registers, tem; int size, align, regno; - machine_mode mode; + fixed_size_mode mode; rtx struct_incoming_value = targetm.calls.struct_value_rtx (c
[10/nn] Widening optab cleanup
widening_optab_handler had the comment: /* ??? Why does find_widening_optab_handler_and_mode attempt to widen things that can't be widened? E.g. add_optab... */ if (op > LAST_CONV_OPTAB) return CODE_FOR_nothing; I think it comes from expand_binop using find_widening_optab_handler_and_mode for two things: to test whether a "normal" optab like add_optab is supported for a standard binary operation and to test whether a "convert" optab is supported for a widening operation like umul_widen_optab. In the former case from_mode and to_mode must be the same, in the latter from_mode must be narrower than to_mode. For the former case, find_widening_optab_handler_and_mode is only really testing the modes that are passed in. permit_non_widening must be true here. For the latter case, find_widening_optab_handler_and_mode should only really consider new from_modes that are wider than the original from_mode and narrower than the original to_mode. Logically permit_non_widening should be false, since widening optabs aren't supposed to take operands that are the same width as the destination. We get away with permit_non_widening being true because no target would/should define a widening .md pattern with matching modes. But really, it seems better for expand_binop to handle these two cases itself rather than pushing them down. With that change, find_widening_optab_handler_and_mode is only ever called with permit_non_widening set to false and is only ever called with a "proper" convert optab. We then no longer need widening_optab_handler, we can just use convert_optab_handler directly. The patch also passes the instruction code down to expand_binop_directly. This should be more efficient and removes an extra call to find_widening_optab_handler_and_mode. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * optabs-query.h (convert_optab_p): New function, split out from... (convert_optab_handler): ...here. (widening_optab_handler): Delete. (find_widening_optab_handler): Remove permit_non_widening parameter. (find_widening_optab_handler_and_mode): Likewise. Provide an override that operates on mode class wrappers. * optabs-query.c (widening_optab_handler): Delete. (find_widening_optab_handler_and_mode): Remove permit_non_widening parameter. Assert that the two modes are the same class and that the "from" mode is narrower than the "to" mode. Use convert_optab_handler instead of widening_optab_handler. * expmed.c (expmed_mult_highpart_optab): Use convert_optab_handler instead of widening_optab_handler. * expr.c (expand_expr_real_2): Update calls to find_widening_optab_handler. * optabs.c (expand_widen_pattern_expr): Likewise. (expand_binop_directly): Take the insn_code as a parameter. (expand_binop): Only call find_widening_optab_handler for conversion optabs; use optab_handler otherwise. Update calls to find_widening_optab_handler and expand_binop_directly. Use convert_optab_handler instead of widening_optab_handler. * tree-ssa-math-opts.c (convert_mult_to_widen): Update calls to find_widening_optab_handler and use scalar_mode rather than machine_mode. (convert_plusminus_to_widen): Likewise. Index: gcc/optabs-query.h === --- gcc/optabs-query.h 2017-09-14 17:04:19.080694343 +0100 +++ gcc/optabs-query.h 2017-10-23 11:43:01.517673716 +0100 @@ -23,6 +23,14 @@ #define GCC_OPTABS_QUERY_H #include "insn-opinit.h" #include "target.h" +/* Return true if OP is a conversion optab. */ + +inline bool +convert_optab_p (optab op) +{ + return op > unknown_optab && op <= LAST_CONV_OPTAB; +} + /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing if the target does not have such an insn. */ @@ -43,7 +51,7 @@ convert_optab_handler (convert_optab op, machine_mode from_mode) { unsigned scode = (op << 16) | (from_mode << 8) | to_mode; - gcc_assert (op > unknown_optab && op <= LAST_CONV_OPTAB); + gcc_assert (convert_optab_p (op)); return raw_optab_handler (scode); } @@ -167,12 +175,11 @@ enum insn_code can_float_p (machine_mode enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *); bool can_conditionally_move_p (machine_mode mode); bool can_vec_perm_p (machine_mode, bool, vec_perm_indices *); -enum insn_code widening_optab_handler (optab, machine_mode, machine_mode); /* Find a widening optab even if it doesn't widen as much as we want. */ -#define find_widening_optab_handler(A,B,C,D) \ - find_widening_optab_handler_and_mode (A, B, C, D, NULL) +#define find_widening_optab_handler(A, B, C) \ + find_widening_optab_handler_and_mode (A, B, C, NULL) enum insn_code find_widening_optab_handler_and_mode (optab, machine_mod
[11/nn] Add narrower_subreg_mode helper function
This patch adds a narrowing equivalent of wider_subreg_mode. At present there is only one user. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * rtl.h (narrower_subreg_mode): New function. * ira-color.c (update_costs_from_allocno): Use it. Index: gcc/rtl.h === --- gcc/rtl.h 2017-10-23 11:44:06.562686090 +0100 +++ gcc/rtl.h 2017-10-23 11:44:15.916785881 +0100 @@ -2972,6 +2972,16 @@ subreg_lowpart_offset (machine_mode oute } /* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE, + return the smaller of the two modes if they are different sizes, + otherwise return the outer mode. */ + +inline machine_mode +narrower_subreg_mode (machine_mode outermode, machine_mode innermode) +{ + return paradoxical_subreg_p (outermode, innermode) ? innermode : outermode; +} + +/* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE, return the mode that is big enough to hold both the outer and inner values. Prefer the outer mode in the event of a tie. */ Index: gcc/ira-color.c === --- gcc/ira-color.c 2017-10-23 11:44:11.500538024 +0100 +++ gcc/ira-color.c 2017-10-23 11:44:15.915819948 +0100 @@ -1367,15 +1367,14 @@ update_costs_from_allocno (ira_allocno_t || ALLOCNO_ASSIGNED_P (another_allocno)) continue; - if (GET_MODE_SIZE (ALLOCNO_MODE (cp->second)) < GET_MODE_SIZE (mode)) - /* If we have different modes use the smallest one. It is - a sub-register move. It is hard to predict what LRA - will reload (the pseudo or its sub-register) but LRA - will try to minimize the data movement. Also for some - register classes bigger modes might be invalid, - e.g. DImode for AREG on x86. For such cases the - register move cost will be maximal. */ - mode = ALLOCNO_MODE (cp->second); + /* If we have different modes use the smallest one. It is +a sub-register move. It is hard to predict what LRA +will reload (the pseudo or its sub-register) but LRA +will try to minimize the data movement. Also for some +register classes bigger modes might be invalid, +e.g. DImode for AREG on x86. For such cases the +register move cost will be maximal. */ + mode = narrower_subreg_mode (mode, ALLOCNO_MODE (cp->second)); cost = (cp->second == allocno ? ira_register_move_cost[mode][rclass][aclass]
[12/nn] Add an is_narrower_int_mode helper function
This patch adds a function for testing whether an arbitrary mode X is an integer mode that is narrower than integer mode Y. This is useful for code like expand_float and expand_fix that could in principle handle vectors as well as scalars. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * machmode.h (is_narrower_int_mode): New function * optabs.c (expand_float, expand_fix): Use it. * dwarf2out.c (rotate_loc_descriptor): Likewise. Index: gcc/machmode.h === --- gcc/machmode.h 2017-10-23 11:44:06.561720156 +0100 +++ gcc/machmode.h 2017-10-23 11:44:23.979432614 +0100 @@ -893,6 +893,17 @@ is_complex_float_mode (machine_mode mode return false; } +/* Return true if MODE is a scalar integer mode with a precision + smaller than LIMIT's precision. */ + +inline bool +is_narrower_int_mode (machine_mode mode, scalar_int_mode limit) +{ + scalar_int_mode int_mode; + return (is_a (mode, &int_mode) + && GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (limit)); +} + namespace mode_iterator { /* Start mode iterator *ITER at the first mode in class MCLASS, if any. */ Index: gcc/optabs.c === --- gcc/optabs.c2017-10-23 11:44:07.732431531 +0100 +++ gcc/optabs.c2017-10-23 11:44:23.980398548 +0100 @@ -4820,7 +4820,7 @@ expand_float (rtx to, rtx from, int unsi rtx value; convert_optab tab = unsignedp ? ufloat_optab : sfloat_optab; - if (GET_MODE_PRECISION (GET_MODE (from)) < GET_MODE_PRECISION (SImode)) + if (is_narrower_int_mode (GET_MODE (from), SImode)) from = convert_to_mode (SImode, from, unsignedp); libfunc = convert_optab_libfunc (tab, GET_MODE (to), GET_MODE (from)); @@ -5002,7 +5002,7 @@ expand_fix (rtx to, rtx from, int unsign that the mode of TO is at least as wide as SImode, since those are the only library calls we know about. */ - if (GET_MODE_PRECISION (GET_MODE (to)) < GET_MODE_PRECISION (SImode)) + if (is_narrower_int_mode (GET_MODE (to), SImode)) { target = gen_reg_rtx (SImode); Index: gcc/dwarf2out.c === --- gcc/dwarf2out.c 2017-10-23 11:44:05.684652559 +0100 +++ gcc/dwarf2out.c 2017-10-23 11:44:23.979432614 +0100 @@ -14530,8 +14530,7 @@ rotate_loc_descriptor (rtx rtl, scalar_i dw_loc_descr_ref op0, op1, ret, mask[2] = { NULL, NULL }; int i; - if (GET_MODE (rtlop1) != VOIDmode - && GET_MODE_BITSIZE (GET_MODE (rtlop1)) < GET_MODE_BITSIZE (mode)) + if (is_narrower_int_mode (GET_MODE (rtlop1), mode)) rtlop1 = gen_rtx_ZERO_EXTEND (mode, rtlop1); op0 = mem_loc_descriptor (XEXP (rtl, 0), mode, mem_mode, VAR_INIT_STATUS_INITIALIZED);
[13/nn] More is_a
alias.c:find_base_term and find_base_value checked: if (GET_MODE_SIZE (GET_MODE (src)) < GET_MODE_SIZE (Pmode)) but (a) comparing the precision seems more correct, since it's possible for modes to have the same memory size as Pmode but fewer bits and (b) the functions are called on arbitrary rtl, so there's no guarantee that we're handling an integer truncation. Since there's no point processing truncations of anything other than an integer, this patch checks that first. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * alias.c (find_base_value, find_base_term): Only process integer truncations. Check the precision rather than the size. Index: gcc/alias.c === --- gcc/alias.c 2017-10-23 11:41:25.511925516 +0100 +++ gcc/alias.c 2017-10-23 11:44:27.544693078 +0100 @@ -1349,6 +1349,7 @@ known_base_value_p (rtx x) find_base_value (rtx src) { unsigned int regno; + scalar_int_mode int_mode; #if defined (FIND_BASE_TERM) /* Try machine-dependent ways to find the base term. */ @@ -1475,7 +1476,8 @@ find_base_value (rtx src) address modes depending on the address space. */ if (!target_default_pointer_address_modes_p ()) break; - if (GET_MODE_SIZE (GET_MODE (src)) < GET_MODE_SIZE (Pmode)) + if (!is_a (GET_MODE (src), &int_mode) + || GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (Pmode)) break; /* Fall through. */ case HIGH: @@ -1876,6 +1878,7 @@ find_base_term (rtx x) cselib_val *val; struct elt_loc_list *l, *f; rtx ret; + scalar_int_mode int_mode; #if defined (FIND_BASE_TERM) /* Try machine-dependent ways to find the base term. */ @@ -1893,7 +1896,8 @@ find_base_term (rtx x) address modes depending on the address space. */ if (!target_default_pointer_address_modes_p ()) return 0; - if (GET_MODE_SIZE (GET_MODE (x)) < GET_MODE_SIZE (Pmode)) + if (!is_a (GET_MODE (x), &int_mode) + || GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (Pmode)) return 0; /* Fall through. */ case HIGH:
[14/nn] Add helpers for shift count modes
This patch adds a stub helper routine to provide the mode of a scalar shift amount, given the mode of the values being shifted. One long-standing problem has been to decide what this mode should be for arbitrary rtxes (as opposed to those directly tied to a target pattern). Is it the mode of the shifted elements? Is it word_mode? Or maybe QImode? Is it whatever the corresponding target pattern says? (In which case what should the mode be when the target doesn't have a pattern?) For now the patch picks word_mode, which should be safe on all targets but could perhaps become suboptimal if the helper routine is used more often than it is in this patch. As it stands the patch does not change the generated code. The patch also adds a helper function that constructs rtxes for constant shift amounts, again given the mode of the value being shifted. As well as helping with the SVE patches, this is one step towards allowing CONST_INTs to have a real mode. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * target.h (get_shift_amount_mode): New function. * emit-rtl.h (gen_int_shift_amount): Declare. * emit-rtl.c (gen_int_shift_amount): New function. * asan.c (asan_emit_stack_protection): Use gen_int_shift_amount instead of GEN_INT. * calls.c (shift_return_value): Likewise. * cse.c (fold_rtx): Likewise. * dse.c (find_shift_sequence): Likewise. * expmed.c (init_expmed_one_mode, store_bit_field_1, expand_shift_1) (expand_shift, expand_smod_pow2): Likewise. * lower-subreg.c (shift_cost): Likewise. * simplify-rtx.c (simplify_unary_operation_1): Likewise. (simplify_binary_operation_1): Likewise. * combine.c (try_combine, find_split_point, force_int_to_mode) (simplify_shift_const_1, simplify_shift_const): Likewise. (change_zero_ext): Likewise. Use simplify_gen_binary. * optabs.c (expand_superword_shift, expand_doubleword_mult) (expand_unop): Use gen_int_shift_amount instead of GEN_INT. (expand_binop): Likewise. Use get_shift_amount_mode instead of word_mode as the mode of a CONST_INT shift amount. (shift_amt_for_vec_perm_mask): Add a machine_mode argument. Use gen_int_shift_amount instead of GEN_INT. (expand_vec_perm): Update caller accordingly. Use gen_int_shift_amount instead of GEN_INT. Index: gcc/target.h === --- gcc/target.h2017-10-23 11:47:06.643477568 +0100 +++ gcc/target.h2017-10-23 11:47:11.277288162 +0100 @@ -209,6 +209,17 @@ #define HOOKSTRUCT(FRAGMENT) FRAGMENT extern struct gcc_target targetm; +/* Return the mode that should be used to hold a scalar shift amount + when shifting values of the given mode. */ +/* ??? This could in principle be generated automatically from the .md + shift patterns, but for now word_mode should be universally OK. */ + +inline scalar_int_mode +get_shift_amount_mode (machine_mode) +{ + return word_mode; +} + #ifdef GCC_TM_H #ifndef CUMULATIVE_ARGS_MAGIC Index: gcc/emit-rtl.h === --- gcc/emit-rtl.h 2017-10-23 11:47:06.643477568 +0100 +++ gcc/emit-rtl.h 2017-10-23 11:47:11.274393237 +0100 @@ -369,6 +369,7 @@ extern void set_reg_attrs_for_parm (rtx, extern void set_reg_attrs_for_decl_rtl (tree t, rtx x); extern void adjust_reg_mode (rtx, machine_mode); extern int mem_expr_equal_p (const_tree, const_tree); +extern rtx gen_int_shift_amount (machine_mode, HOST_WIDE_INT); extern bool need_atomic_barrier_p (enum memmodel, bool); Index: gcc/emit-rtl.c === --- gcc/emit-rtl.c 2017-10-23 11:47:06.643477568 +0100 +++ gcc/emit-rtl.c 2017-10-23 11:47:11.273428262 +0100 @@ -6478,6 +6478,15 @@ need_atomic_barrier_p (enum memmodel mod } } +/* Return a constant shift amount for shifting a value of mode MODE + by VALUE bits. */ + +rtx +gen_int_shift_amount (machine_mode mode, HOST_WIDE_INT value) +{ + return gen_int_mode (value, get_shift_amount_mode (mode)); +} + /* Initialize fields of rtl_data related to stack alignment. */ void Index: gcc/asan.c === --- gcc/asan.c 2017-10-23 11:47:06.643477568 +0100 +++ gcc/asan.c 2017-10-23 11:47:11.27056 +0100 @@ -1388,7 +1388,7 @@ asan_emit_stack_protection (rtx base, rt TREE_ASM_WRITTEN (id) = 1; emit_move_insn (mem, expand_normal (build_fold_addr_expr (decl))); shadow_base = expand_binop (Pmode, lshr_optab, base, - GEN_INT (ASAN_SHADOW_SHIFT), + gen_int_shift_amount (Pmode, ASAN_SHADOW_SHIFT), NULL_RTX, 1, OPTAB_DIRECT); shadow_base = plus_constant (Pmode, shadow_base, Index: gcc/c
[15/nn] Use more specific hash functions in rtlhash.c
Avoid using add_object when we have more specific routines available. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * rtlhash.c (add_rtx): Use add_hwi for 'w' and add_int for 'i'. Index: gcc/rtlhash.c === --- gcc/rtlhash.c 2017-02-23 19:54:03.0 + +++ gcc/rtlhash.c 2017-10-23 11:47:20.120201389 +0100 @@ -77,11 +77,11 @@ add_rtx (const_rtx x, hash &hstate) switch (fmt[i]) { case 'w': - hstate.add_object (XWINT (x, i)); + hstate.add_hwi (XWINT (x, i)); break; case 'n': case 'i': - hstate.add_object (XINT (x, i)); + hstate.add_int (XINT (x, i)); break; case 'V': case 'E':
[16/nn] Factor out the mode handling in lower-subreg.c
This patch adds a helper routine (interesting_mode_p) to lower-subreg.c, to make the decision about whether a mode can be split and, if so, calculate the number of bytes and words in the mode. At present this function always returns true; a later patch will add cases in which it can return false. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * lower-subreg.c (interesting_mode_p): New function. (compute_costs, find_decomposable_subregs, decompose_register) (simplify_subreg_concatn, can_decompose_p, resolve_simple_move) (resolve_clobber, dump_choices): Use it. Index: gcc/lower-subreg.c === --- gcc/lower-subreg.c 2017-10-23 11:47:11.274393237 +0100 +++ gcc/lower-subreg.c 2017-10-23 11:47:23.555013148 +0100 @@ -103,6 +103,18 @@ #define twice_word_mode \ #define choices \ this_target_lower_subreg->x_choices +/* Return true if MODE is a mode we know how to lower. When returning true, + store its byte size in *BYTES and its word size in *WORDS. */ + +static inline bool +interesting_mode_p (machine_mode mode, unsigned int *bytes, + unsigned int *words) +{ + *bytes = GET_MODE_SIZE (mode); + *words = CEIL (*bytes, UNITS_PER_WORD); + return true; +} + /* RTXes used while computing costs. */ struct cost_rtxes { /* Source and target registers. */ @@ -199,10 +211,10 @@ compute_costs (bool speed_p, struct cost for (i = 0; i < MAX_MACHINE_MODE; i++) { machine_mode mode = (machine_mode) i; - int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; - if (factor > 1) + unsigned int size, factor; + if (interesting_mode_p (mode, &size, &factor) && factor > 1) { - int mode_move_cost; + unsigned int mode_move_cost; PUT_MODE (rtxes->target, mode); PUT_MODE (rtxes->source, mode); @@ -469,10 +481,10 @@ find_decomposable_subregs (rtx *loc, enu continue; } - outer_size = GET_MODE_SIZE (GET_MODE (x)); - inner_size = GET_MODE_SIZE (GET_MODE (inner)); - outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words) + || !interesting_mode_p (GET_MODE (inner), &inner_size, + &inner_words)) + continue; /* We only try to decompose single word subregs of multi-word registers. When we find one, we return -1 to avoid iterating @@ -507,7 +519,7 @@ find_decomposable_subregs (rtx *loc, enu } else if (REG_P (x)) { - unsigned int regno; + unsigned int regno, size, words; /* We will see an outer SUBREG before we see the inner REG, so when we see a plain REG here it means a direct reference to @@ -527,7 +539,8 @@ find_decomposable_subregs (rtx *loc, enu regno = REGNO (x); if (!HARD_REGISTER_NUM_P (regno) - && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) + && interesting_mode_p (GET_MODE (x), &size, &words) + && words > 1) { switch (*pcmi) { @@ -567,15 +580,15 @@ find_decomposable_subregs (rtx *loc, enu decompose_register (unsigned int regno) { rtx reg; - unsigned int words, i; + unsigned int size, words, i; rtvec v; reg = regno_reg_rtx[regno]; regno_reg_rtx[regno] = NULL_RTX; - words = GET_MODE_SIZE (GET_MODE (reg)); - words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + if (!interesting_mode_p (GET_MODE (reg), &size, &words)) +gcc_unreachable (); v = rtvec_alloc (words); for (i = 0; i < words; ++i) @@ -599,25 +612,29 @@ decompose_register (unsigned int regno) simplify_subreg_concatn (machine_mode outermode, rtx op, unsigned int byte) { - unsigned int inner_size; + unsigned int outer_size, outer_words, inner_size, inner_words; machine_mode innermode, partmode; rtx part; unsigned int final_offset; + innermode = GET_MODE (op); + if (!interesting_mode_p (outermode, &outer_size, &outer_words) + || !interesting_mode_p (innermode, &inner_size, &inner_words)) +gcc_unreachable (); + gcc_assert (GET_CODE (op) == CONCATN); - gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); + gcc_assert (byte % outer_size == 0); - innermode = GET_MODE (op); - gcc_assert (byte < GET_MODE_SIZE (innermode)); - if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode)) + gcc_assert (byte < inner_size); + if (outer_size > inner_size) return NULL_RTX; - inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); + inner_size /= XVECLEN (op, 0); part = XVECEXP (op, 0, byte / inner_size); partmode = GET_MODE (part); final_offset = byte % inner_size; - if
Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode
On Mon, Oct 23, 2017 at 1:07 PM, Jakub Jelinek wrote: > On Mon, Oct 23, 2017 at 12:27:15PM +0200, Uros Bizjak wrote: >> On Mon, Oct 23, 2017 at 12:09 PM, Jakub Jelinek wrote: >> > On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote: >> >> Hello! >> >> >> >> In PR 82628 Jakub figured out that insn patterns that consume carry >> >> flag were not 100% correct. Due to this issue, combine is able to >> >> simplify various CC_REG propagations that result in invalid code. >> >> >> >> Attached patch fixes (well, mitigates) the above problem by splitting >> >> the double-mode compare after the reload, in the same way other >> >> *_doubleword patterns are handled from "the beginning of the time". >> > >> > I'm afraid this is going to haunt us sooner or later, combine isn't the >> > only pass that uses simplify-rtx.c infrastructure heavily and when we lie >> > in the RTL pattern, eventually something will be simplified wrongly. >> > >> > So, at least we'd need to use UNSPEC for the pattern, like (only lightly >> > tested so far) below. >> >> I agree with the above. Patterns that consume Carry flag are now >> marked with (plus (ltu (...)), but effectively, they behave like >> unspecs. So, I see no problem to change all SBB and ADC to unspec at >> once, similar to the change you proposed in the patch. > > So like this (addcarry/subborrow defered to a separate patch)? > Or do you want to use UNSPEC even for the unsigned comparison case, > i.e. from the patch remove the predicates.md/constraints.md part, > sub3_carry_ccc{,_1} and anything related to that? Looking at the attached patch, I think, this won't be necessary anymore. The pattern is quite important for 32bit targets, so this fact warrants a couple of complicated patterns. > As for addcarry/subborrow, the problem is that we expect in the pr67317* > tests that combine is able to notice that the CF setter sets CF to > unconditional 0 and matches the pattern. With the patch I wrote > we end up with the combiner trying to match an insn where the CCC > is set from a TImode comparison: > (parallel [ > (set (reg:CC 17 flags) > (compare:CC (zero_extend:TI (plus:DI (reg/v:DI 92 [ a ]) > (reg/v:DI 94 [ c ]))) > (zero_extend:TI (reg/v:DI 94 [ c ] > (set (reg:DI 98) > (plus:DI (reg/v:DI 92 [ a ]) > (reg/v:DI 94 [ c ]))) > ]) > So, either we need a define_insn_and_split pattern that would deal with > that (for UNSPEC it would be the same thing, have a define_insn_and_split > that would replace the (ltu...) with (const_int 0)), or perhaps be smarter > during expansion, if we see the first argument is constant 0, expand it > like a normal add instruction with CC setter. > > 2017-10-23 Jakub Jelinek > > PR target/82628 > * config/i386/predicates.md (x86_64_dwzext_immediate_operand): New. > * config/i386/constraints.md (Wf): New constraint. > * config/i386/i386.md (UNSPEC_SBB): New unspec. > (cmp_doubleword): Removed. > (sub3_carry_ccc, *sub3_carry_ccc_1): New patterns. > (sub3_carry_ccgz): Use unspec instead of compare. > * config/i386/i386.c (ix86_expand_branch) : Don't > expand with cmp_doubleword. For LTU and GEU use > sub3_carry_ccc instead of sub3_carry_ccgz and use CCCmode. OK. Thanks, Uros. > --- gcc/config/i386/predicates.md.jj2017-10-23 12:00:13.899355249 +0200 > +++ gcc/config/i386/predicates.md 2017-10-23 12:52:20.696576114 +0200 > @@ -366,6 +366,31 @@ (define_predicate "x86_64_hilo_int_opera > } > }) > > +;; Return true if VALUE is a constant integer whose value is > +;; x86_64_immediate_operand value zero extended from word mode to mode. > +(define_predicate "x86_64_dwzext_immediate_operand" > + (match_code "const_int,const_wide_int") > +{ > + switch (GET_CODE (op)) > +{ > +case CONST_INT: > + if (!TARGET_64BIT) > + return UINTVAL (op) <= HOST_WIDE_INT_UC (0x); > + return UINTVAL (op) <= HOST_WIDE_INT_UC (0x7fff); > + > +case CONST_WIDE_INT: > + if (!TARGET_64BIT) > + return false; > + return (CONST_WIDE_INT_NUNITS (op) == 2 > + && CONST_WIDE_INT_ELT (op, 1) == 0 > + && (trunc_int_for_mode (CONST_WIDE_INT_ELT (op, 0), SImode) > + == (HOST_WIDE_INT) CONST_WIDE_INT_ELT (op, 0))); > + > +default: > + gcc_unreachable (); > +} > +}) > + > ;; Return true if size of VALUE can be stored in a sign > ;; extended immediate field. > (define_predicate "x86_64_immediate_size_operand" > --- gcc/config/i386/constraints.md.jj 2017-10-23 12:00:13.850355874 +0200 > +++ gcc/config/i386/constraints.md 2017-10-23 12:52:20.697576102 +0200 > @@ -332,6 +332,11 @@ (define_constraint "Wd" > of it satisfies the e constraint." >(match_operand 0 "x86_64_hilo_int_operand")) > > +(define_constraint "Wf" > + "32-bit signed integer constant zero extended f
[17/nn] Turn var-tracking.c:INT_MEM_OFFSET into a function
This avoids the double evaluation mentioned in the comments and simplifies the change to make MEM_OFFSET variable. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * var-tracking.c (INT_MEM_OFFSET): Replace with... (int_mem_offset): ...this new function. (var_mem_set, var_mem_delete_and_set, var_mem_delete) (find_mem_expr_in_1pdv, dataflow_set_preserve_mem_locs) (same_variable_part_p, use_type, add_stores, vt_get_decl_and_offset): Update accordingly. Index: gcc/var-tracking.c === --- gcc/var-tracking.c 2017-09-12 14:28:56.401824826 +0100 +++ gcc/var-tracking.c 2017-10-23 11:47:27.197231712 +0100 @@ -390,8 +390,15 @@ struct variable /* Pointer to the BB's information specific to variable tracking pass. */ #define VTI(BB) ((variable_tracking_info *) (BB)->aux) -/* Macro to access MEM_OFFSET as an HOST_WIDE_INT. Evaluates MEM twice. */ -#define INT_MEM_OFFSET(mem) (MEM_OFFSET_KNOWN_P (mem) ? MEM_OFFSET (mem) : 0) +/* Return MEM_OFFSET (MEM) as a HOST_WIDE_INT, or 0 if we can't. */ + +static inline HOST_WIDE_INT +int_mem_offset (const_rtx mem) +{ + if (MEM_OFFSET_KNOWN_P (mem)) +return MEM_OFFSET (mem); + return 0; +} #if CHECKING_P && (GCC_VERSION >= 2007) @@ -2336,7 +2343,7 @@ var_mem_set (dataflow_set *set, rtx loc, rtx set_src) { tree decl = MEM_EXPR (loc); - HOST_WIDE_INT offset = INT_MEM_OFFSET (loc); + HOST_WIDE_INT offset = int_mem_offset (loc); var_mem_decl_set (set, loc, initialized, dv_from_decl (decl), offset, set_src, INSERT); @@ -2354,7 +2361,7 @@ var_mem_delete_and_set (dataflow_set *se enum var_init_status initialized, rtx set_src) { tree decl = MEM_EXPR (loc); - HOST_WIDE_INT offset = INT_MEM_OFFSET (loc); + HOST_WIDE_INT offset = int_mem_offset (loc); clobber_overlapping_mems (set, loc); decl = var_debug_decl (decl); @@ -2375,7 +2382,7 @@ var_mem_delete_and_set (dataflow_set *se var_mem_delete (dataflow_set *set, rtx loc, bool clobber) { tree decl = MEM_EXPR (loc); - HOST_WIDE_INT offset = INT_MEM_OFFSET (loc); + HOST_WIDE_INT offset = int_mem_offset (loc); clobber_overlapping_mems (set, loc); decl = var_debug_decl (decl); @@ -4618,7 +4625,7 @@ find_mem_expr_in_1pdv (tree expr, rtx va for (node = var->var_part[0].loc_chain; node; node = node->next) if (MEM_P (node->loc) && MEM_EXPR (node->loc) == expr - && INT_MEM_OFFSET (node->loc) == 0) + && int_mem_offset (node->loc) == 0) { where = node; break; @@ -4683,7 +4690,7 @@ dataflow_set_preserve_mem_locs (variable /* We want to remove dying MEMs that don't refer to DECL. */ if (GET_CODE (loc->loc) == MEM && (MEM_EXPR (loc->loc) != decl - || INT_MEM_OFFSET (loc->loc) != 0) + || int_mem_offset (loc->loc) != 0) && mem_dies_at_call (loc->loc)) break; /* We want to move here MEMs that do refer to DECL. */ @@ -4727,7 +4734,7 @@ dataflow_set_preserve_mem_locs (variable if (GET_CODE (loc->loc) != MEM || (MEM_EXPR (loc->loc) == decl - && INT_MEM_OFFSET (loc->loc) == 0) + && int_mem_offset (loc->loc) == 0) || !mem_dies_at_call (loc->loc)) { if (old_loc != loc->loc && emit_notes) @@ -5254,7 +5261,7 @@ same_variable_part_p (rtx loc, tree expr else if (MEM_P (loc)) { expr2 = MEM_EXPR (loc); - offset2 = INT_MEM_OFFSET (loc); + offset2 = int_mem_offset (loc); } else return false; @@ -5522,7 +5529,7 @@ use_type (rtx loc, struct count_use_info return MO_CLOBBER; else if (target_for_debug_bind (var_debug_decl (expr))) return MO_CLOBBER; - else if (track_loc_p (loc, expr, INT_MEM_OFFSET (loc), + else if (track_loc_p (loc, expr, int_mem_offset (loc), false, modep, NULL) /* Multi-part variables shouldn't refer to one-part variable names such as VALUEs (never happens) or @@ -6017,7 +6024,7 @@ add_stores (rtx loc, const_rtx expr, voi rtx xexpr = gen_rtx_SET (loc, src); if (same_variable_part_p (SET_SRC (xexpr), MEM_EXPR (loc), - INT_MEM_OFFSET (loc))) + int_mem_offset (loc))) mo.type = MO_COPY; else mo.type = MO_SET; @@ -9579,7 +9586,7 @@ vt_get_decl_and_offset (rtx rtl, tree *d if (MEM_ATTRS (rtl)) { *declp = MEM_EXPR (rtl); - *offsetp = INT_MEM_OFFSET (rtl); + *offsetp = int_mem_offset (rtl); return true; } }
[18/nn] Use (CONST_VECTOR|GET_MODE)_NUNITS in simplify-rtx.c
This patch avoids some calculations of the form: GET_MODE_SIZE (vector_mode) / GET_MODE_SIZE (element_mode) in simplify-rtx.c. If we're dealing with CONST_VECTORs, it's better to use CONST_VECTOR_NUNITS, since that remains constant even after the SVE patches. In other cases we can get the number from GET_MODE_NUNITS. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * simplify-rtx.c (simplify_const_unary_operation): Use GET_MODE_NUNITS and CONST_VECTOR_NUNITS instead of computing the number of units from the byte sizes of the vector and element. (simplify_binary_operation_1): Likewise. (simplify_const_binary_operation): Likewise. (simplify_ternary_operation): Likewise. Index: gcc/simplify-rtx.c === --- gcc/simplify-rtx.c 2017-10-23 11:47:11.277288162 +0100 +++ gcc/simplify-rtx.c 2017-10-23 11:47:32.868935554 +0100 @@ -1752,18 +1752,12 @@ simplify_const_unary_operation (enum rtx return gen_const_vec_duplicate (mode, op); if (GET_CODE (op) == CONST_VECTOR) { - int elt_size = GET_MODE_UNIT_SIZE (mode); - unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size); - rtvec v = rtvec_alloc (n_elts); - unsigned int i; - - machine_mode inmode = GET_MODE (op); - int in_elt_size = GET_MODE_UNIT_SIZE (inmode); - unsigned in_n_elts = (GET_MODE_SIZE (inmode) / in_elt_size); - + unsigned int n_elts = GET_MODE_NUNITS (mode); + unsigned int in_n_elts = CONST_VECTOR_NUNITS (op); gcc_assert (in_n_elts < n_elts); gcc_assert ((n_elts % in_n_elts) == 0); - for (i = 0; i < n_elts; i++) + rtvec v = rtvec_alloc (n_elts); + for (unsigned i = 0; i < n_elts; i++) RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts); return gen_rtx_CONST_VECTOR (mode, v); } @@ -3608,9 +3602,7 @@ simplify_binary_operation_1 (enum rtx_co rtx op0 = XEXP (trueop0, 0); rtx op1 = XEXP (trueop0, 1); - machine_mode opmode = GET_MODE (op0); - int elt_size = GET_MODE_UNIT_SIZE (opmode); - int n_elts = GET_MODE_SIZE (opmode) / elt_size; + int n_elts = GET_MODE_NUNITS (GET_MODE (op0)); int i = INTVAL (XVECEXP (trueop1, 0, 0)); int elem; @@ -3637,21 +3629,8 @@ simplify_binary_operation_1 (enum rtx_co mode01 = GET_MODE (op01); /* Find out number of elements of each operand. */ - if (VECTOR_MODE_P (mode00)) - { - elt_size = GET_MODE_UNIT_SIZE (mode00); - n_elts00 = GET_MODE_SIZE (mode00) / elt_size; - } - else - n_elts00 = 1; - - if (VECTOR_MODE_P (mode01)) - { - elt_size = GET_MODE_UNIT_SIZE (mode01); - n_elts01 = GET_MODE_SIZE (mode01) / elt_size; - } - else - n_elts01 = 1; + n_elts00 = GET_MODE_NUNITS (mode00); + n_elts01 = GET_MODE_NUNITS (mode01); gcc_assert (n_elts == n_elts00 + n_elts01); @@ -3771,9 +3750,8 @@ simplify_binary_operation_1 (enum rtx_co rtx subop1 = XEXP (trueop0, 1); machine_mode mode0 = GET_MODE (subop0); machine_mode mode1 = GET_MODE (subop1); - int li = GET_MODE_UNIT_SIZE (mode0); - int l0 = GET_MODE_SIZE (mode0) / li; - int l1 = GET_MODE_SIZE (mode1) / li; + int l0 = GET_MODE_NUNITS (mode0); + int l1 = GET_MODE_NUNITS (mode1); int i0 = INTVAL (XVECEXP (trueop1, 0, 0)); if (i0 == 0 && !side_effects_p (op1) && mode == mode0) { @@ -3931,14 +3909,10 @@ simplify_binary_operation_1 (enum rtx_co || CONST_SCALAR_INT_P (trueop1) || CONST_DOUBLE_AS_FLOAT_P (trueop1))) { - int elt_size = GET_MODE_UNIT_SIZE (mode); - unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size); + unsigned n_elts = GET_MODE_NUNITS (mode); + unsigned in_n_elts = GET_MODE_NUNITS (op0_mode); rtvec v = rtvec_alloc (n_elts); unsigned int i; - unsigned in_n_elts = 1; - - if (VECTOR_MODE_P (op0_mode)) - in_n_elts = (GET_MODE_SIZE (op0_mode) / elt_size); for (i = 0; i < n_elts; i++) { if (i < in_n_elts) @@ -4026,16 +4000,12 @@ simplify_const_binary_operation (enum rt && GET_CODE (op0) == CONST_VECTOR && GET_CODE (op1) == CONST_VECTOR) { - unsigned n_elts = GET_MODE_NUNITS (mode); - machine_mode op0mode = GET_MODE (op0); - unsigned op0_n_elts = GET_MODE_NUNITS (
[19/nn] Don't treat zero-sized ranges as overlapping
Most GCC ranges seem to be represented as an offset and a size (rather than a start and inclusive end or start and exclusive end). The usual test for whether X is in a range is of course: x >= start && x < start + size or: x >= start && x - start < size which means that an empty range of size 0 contains nothing. But other range tests aren't as obvious. The usual test for whether one range is contained within another range is: start1 >= start2 && start1 + size1 <= start2 + size2 while the test for whether two ranges overlap (from ranges_overlap_p) is: (start1 >= start2 && start1 < start2 + size2) || (start2 >= start1 && start2 < start1 + size1) i.e. the ranges overlap if one range contains the start of the other range. This leads to strange results like: (start X, size 0) is a subrange of (start X, size 0) but (start X, size 0) does not overlap (start X, size 0) Similarly: (start 4, size 0) is a subrange of (start 2, size 2) but (start 4, size 0) does not overlap (start 2, size 2) It seems like "X is a subrange of Y" should imply "X overlaps Y". This becomes harder to ignore with the runtime sizes and offsets added for SVE. The most obvious fix seemed to be to say that an empty range does not overlap anything, and is therefore not a subrange of anything. Using the new definition of subranges didn't seem to cause any codegen differences in the testsuite. But there was one change with the new definition of overlapping ranges. strncpy-chk.c has: memset (dst, 0, sizeof (dst)); if (strncpy (dst, src, 0) != dst || strcmp (dst, "")) abort(); The strncpy is detected as a zero-size write, and so with the new definition of overlapping ranges, we treat the strncpy as having no effect on the strcmp (which is true). The reaching definition is the memset instead. This patch makes ranges_overlap_p return false for zero-sized ranges, even if the other range has an unknown size. 2017-10-23 Richard Sandiford gcc/ * tree-ssa-alias.h (ranges_overlap_p): Return false if either range is known to be empty. Index: gcc/tree-ssa-alias.h === --- gcc/tree-ssa-alias.h2017-03-28 16:19:22.0 +0100 +++ gcc/tree-ssa-alias.h2017-10-23 11:47:38.181155696 +0100 @@ -171,6 +171,8 @@ ranges_overlap_p (HOST_WIDE_INT pos1, HOST_WIDE_INT pos2, unsigned HOST_WIDE_INT size2) { + if (size1 == 0 || size2 == 0) +return false; if (pos1 >= pos2 && (size2 == (unsigned HOST_WIDE_INT)-1 || pos1 < (pos2 + (HOST_WIDE_INT) size2)))
[20/nn] Make tree-ssa-dse.c:normalize_ref return a bool
This patch moves the check for an overlapping byte to normalize_ref from its callers, so that it's easier to convert to poly_ints later. It's not really worth it on its own. 2017-10-23 Richard Sandiford gcc/ * tree-ssa-dse.c (normalize_ref): Check whether the ranges overlap and return false if not. (clear_bytes_written_by, live_bytes_read): Update accordingly. Index: gcc/tree-ssa-dse.c === --- gcc/tree-ssa-dse.c 2017-10-23 11:41:23.587123840 +0100 +++ gcc/tree-ssa-dse.c 2017-10-23 11:47:41.546155781 +0100 @@ -137,13 +137,11 @@ valid_ao_ref_for_dse (ao_ref *ref) && (ref->size != -1)); } -/* Normalize COPY (an ao_ref) relative to REF. Essentially when we are - done COPY will only refer bytes found within REF. +/* Try to normalize COPY (an ao_ref) relative to REF. Essentially when we are + done COPY will only refer bytes found within REF. Return true if COPY + is known to intersect at least one byte of REF. */ - We have already verified that COPY intersects at least one - byte with REF. */ - -static void +static bool normalize_ref (ao_ref *copy, ao_ref *ref) { /* If COPY starts before REF, then reset the beginning of @@ -151,13 +149,22 @@ normalize_ref (ao_ref *copy, ao_ref *ref number of bytes removed from COPY. */ if (copy->offset < ref->offset) { - copy->size -= (ref->offset - copy->offset); + HOST_WIDE_INT diff = ref->offset - copy->offset; + if (copy->size <= diff) + return false; + copy->size -= diff; copy->offset = ref->offset; } + HOST_WIDE_INT diff = copy->offset - ref->offset; + if (ref->size <= diff) +return false; + /* If COPY extends beyond REF, chop off its size appropriately. */ - if (copy->offset + copy->size > ref->offset + ref->size) -copy->size -= (copy->offset + copy->size - (ref->offset + ref->size)); + HOST_WIDE_INT limit = ref->size - diff; + if (copy->size > limit) +copy->size = limit; + return true; } /* Clear any bytes written by STMT from the bitmap LIVE_BYTES. The base @@ -179,14 +186,10 @@ clear_bytes_written_by (sbitmap live_byt if (valid_ao_ref_for_dse (&write) && operand_equal_p (write.base, ref->base, OEP_ADDRESS_OF) && write.size == write.max_size - && ((write.offset < ref->offset - && write.offset + write.size > ref->offset) - || (write.offset >= ref->offset - && write.offset < ref->offset + ref->size))) -{ - normalize_ref (&write, ref); - bitmap_clear_range (live_bytes, - (write.offset - ref->offset) / BITS_PER_UNIT, + && normalize_ref (&write, ref)) +{ + HOST_WIDE_INT start = write.offset - ref->offset; + bitmap_clear_range (live_bytes, start / BITS_PER_UNIT, write.size / BITS_PER_UNIT); } } @@ -480,21 +483,20 @@ live_bytes_read (ao_ref use_ref, ao_ref { /* We have already verified that USE_REF and REF hit the same object. Now verify that there's actually an overlap between USE_REF and REF. */ - if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size)) + if (normalize_ref (&use_ref, ref)) { - normalize_ref (&use_ref, ref); + HOST_WIDE_INT start = use_ref.offset - ref->offset; + HOST_WIDE_INT size = use_ref.size; /* If USE_REF covers all of REF, then it will hit one or more live bytes. This avoids useless iteration over the bitmap below. */ - if (use_ref.offset <= ref->offset - && use_ref.offset + use_ref.size >= ref->offset + ref->size) + if (start == 0 && size == ref->size) return true; /* Now check if any of the remaining bits in use_ref are set in LIVE. */ - unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT; - unsigned int end = ((use_ref.offset + use_ref.size) / BITS_PER_UNIT) - 1; - return bitmap_bit_in_range_p (live, start, end); + return bitmap_bit_in_range_p (live, start / BITS_PER_UNIT, + (start + size - 1) / BITS_PER_UNIT); } return true; }
[21/nn] Minor vn_reference_lookup_3 tweak
The repeated checks for MEM_REF made this code hard to convert to poly_ints as-is. Hopefully the new structure also makes it clearer at a glance what the two cases are. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * tree-ssa-sccvn.c (vn_reference_lookup_3): Avoid repeated checks for MEM_REF. Index: gcc/tree-ssa-sccvn.c === --- gcc/tree-ssa-sccvn.c2017-10-23 11:47:03.852769480 +0100 +++ gcc/tree-ssa-sccvn.c2017-10-23 11:47:44.596155858 +0100 @@ -2234,6 +2234,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree || offset % BITS_PER_UNIT != 0 || ref->size % BITS_PER_UNIT != 0) return (void *)-1; + at = offset / BITS_PER_UNIT; /* Extract a pointer base and an offset for the destination. */ lhs = gimple_call_arg (def_stmt, 0); @@ -2301,19 +2302,18 @@ vn_reference_lookup_3 (ao_ref *ref, tree copy_size = tree_to_uhwi (gimple_call_arg (def_stmt, 2)); /* The bases of the destination and the references have to agree. */ - if ((TREE_CODE (base) != MEM_REF - && !DECL_P (base)) - || (TREE_CODE (base) == MEM_REF - && (TREE_OPERAND (base, 0) != lhs - || !tree_fits_uhwi_p (TREE_OPERAND (base, 1 - || (DECL_P (base) - && (TREE_CODE (lhs) != ADDR_EXPR - || TREE_OPERAND (lhs, 0) != base))) + if (TREE_CODE (base) == MEM_REF) + { + if (TREE_OPERAND (base, 0) != lhs + || !tree_fits_uhwi_p (TREE_OPERAND (base, 1))) + return (void *) -1; + at += tree_to_uhwi (TREE_OPERAND (base, 1)); + } + else if (!DECL_P (base) + || TREE_CODE (lhs) != ADDR_EXPR + || TREE_OPERAND (lhs, 0) != base) return (void *)-1; - at = offset / BITS_PER_UNIT; - if (TREE_CODE (base) == MEM_REF) - at += tree_to_uhwi (TREE_OPERAND (base, 1)); /* If the access is completely outside of the memcpy destination area there is no aliasing. */ if (lhs_offset >= at + maxsize / BITS_PER_UNIT
[22/nn] Make dse.c use offset/width instead of start/end
store_info and read_info_type in dse.c represented the ranges as start/end, but a lot of the internal code used offset/width instead. Using offset/width throughout fits better with the poly_int.h range-checking functions. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * dse.c (store_info, read_info_type): Replace begin and end with offset and width. (print_range): New function. (set_all_positions_unneeded, any_positions_needed_p) (check_mem_read_rtx, scan_stores, scan_reads, dse_step5): Update accordingly. (record_store): Likewise. Optimize the case in which all positions are unneeded. (get_stored_val): Replace read_begin and read_end with read_offset and read_width. (replace_read): Update call accordingly. Index: gcc/dse.c === --- gcc/dse.c 2017-10-23 11:47:11.273428262 +0100 +++ gcc/dse.c 2017-10-23 11:47:48.294155952 +0100 @@ -243,9 +243,12 @@ struct store_info /* Canonized MEM address for use by canon_true_dependence. */ rtx mem_addr; - /* The offset of the first and byte before the last byte associated - with the operation. */ - HOST_WIDE_INT begin, end; + /* The offset of the first byte associated with the operation. */ + HOST_WIDE_INT offset; + + /* The number of bytes covered by the operation. This is always exact + and known (rather than -1). */ + HOST_WIDE_INT width; union { @@ -261,7 +264,7 @@ struct store_info bitmap bmap; /* Number of set bits (i.e. unneeded bytes) in BITMAP. If it is -equal to END - BEGIN, the whole store is unused. */ +equal to WIDTH, the whole store is unused. */ int count; } large; } positions_needed; @@ -304,10 +307,11 @@ struct read_info_type /* The id of the mem group of the base address. */ int group_id; - /* The offset of the first and byte after the last byte associated - with the operation. If begin == end == 0, the read did not have - a constant offset. */ - int begin, end; + /* The offset of the first byte associated with the operation. */ + HOST_WIDE_INT offset; + + /* The number of bytes covered by the operation, or -1 if not known. */ + HOST_WIDE_INT width; /* The mem being read. */ rtx mem; @@ -586,6 +590,18 @@ static deferred_change *deferred_change_ /* The number of bits used in the global bitmaps. */ static unsigned int current_position; + +/* Print offset range [OFFSET, OFFSET + WIDTH) to FILE. */ + +static void +print_range (FILE *file, poly_int64 offset, poly_int64 width) +{ + fprintf (file, "["); + print_dec (offset, file, SIGNED); + fprintf (file, ".."); + print_dec (offset + width, file, SIGNED); + fprintf (file, ")"); +} /* Zeroth step. @@ -1212,10 +1228,9 @@ set_all_positions_unneeded (store_info * { if (__builtin_expect (s_info->is_large, false)) { - int pos, end = s_info->end - s_info->begin; - for (pos = 0; pos < end; pos++) - bitmap_set_bit (s_info->positions_needed.large.bmap, pos); - s_info->positions_needed.large.count = end; + bitmap_set_range (s_info->positions_needed.large.bmap, + 0, s_info->width); + s_info->positions_needed.large.count = s_info->width; } else s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U; @@ -1227,8 +1242,7 @@ set_all_positions_unneeded (store_info * any_positions_needed_p (store_info *s_info) { if (__builtin_expect (s_info->is_large, false)) -return (s_info->positions_needed.large.count - < s_info->end - s_info->begin); +return s_info->positions_needed.large.count < s_info->width; else return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U); } @@ -1355,8 +1369,12 @@ record_store (rtx body, bb_info_t bb_inf set_usage_bits (group, offset, width, expr); if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " processing const base store gid=%d[%d..%d)\n", -group_id, (int)offset, (int)(offset+width)); + { + fprintf (dump_file, " processing const base store gid=%d", + group_id); + print_range (dump_file, offset, width); + fprintf (dump_file, "\n"); + } } else { @@ -1368,8 +1386,11 @@ record_store (rtx body, bb_info_t bb_inf group_id = -1; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " processing cselib store [%d..%d)\n", -(int)offset, (int)(offset+width)); + { + fprintf (dump_file, " processing cselib store "); + print_range (dump_file, offset, width); + fprintf (dump_file, "\n"); + } } const_rhs = rhs = NULL_RTX; @@ -1435,18 +1456,21 @@ re
[PATCH] i386: Skip DF_REF_INSN if DF_REF_INSN_INFO is false
We should check DF_REF_INSN_INFO before accessing DF_REF_INSN. OK for trunk? H.J. --- gcc/ PR target/82673 * config/i386/i386.c (ix86_finalize_stack_frame_flags): Skip DF_REF_INSN if DF_REF_INSN_INFO is false. gcc/testsuite/ PR target/82673 * gcc.target/i386/pr82673.c: New test. --- gcc/config/i386/i386.c | 7 +-- gcc/testsuite/gcc.target/i386/pr82673.c | 12 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr82673.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b86504378ae..25c898866e2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12694,10 +12694,13 @@ ix86_finalize_stack_frame_flags (void) for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); ref; ref = next) { - rtx_insn *insn = DF_REF_INSN (ref); + next = DF_REF_NEXT_REG (ref); + if (!DF_REF_INSN_INFO (ref)) + continue; + /* Make sure the next ref is for a different instruction, so that we're not affected by the rescan. */ - next = DF_REF_NEXT_REG (ref); + rtx_insn *insn = DF_REF_INSN (ref); while (next && DF_REF_INSN (next) == insn) next = DF_REF_NEXT_REG (next); diff --git a/gcc/testsuite/gcc.target/i386/pr82673.c b/gcc/testsuite/gcc.target/i386/pr82673.c new file mode 100644 index 000..cff4b34535b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82673.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fno-omit-frame-pointer -fvar-tracking-assignments-toggle" } */ + +register long *B asm ("ebp"); + +long y = 20; + +void +bar (void) /* { dg-error "frame pointer required, but reserved" } */ +{ + B = &y; +} /* { dg-error "bp cannot be used in asm here" } */ -- 2.13.6
[PATCH] Implement C++17 Filesystem library
Based on Filesystem TS implementation, with the changes applied by: - P0219R1 Relative Paths for Filesystem - P0317R1 Directory Entry Caching for Filesystem - P0492R2 Resolution of C++17 National Body Comments Where appropriate code is shared between the TS and C++17 implementations. * include/Makefile.am: Add new headers for C++17 filesystem library. * include/Makefile.in: Regenerate. * include/bits/fs_dir.h: New header, based on Filesystem TS code in include/experimental/bits directory. * include/bits/fs_fwd.h: Likewise. * include/bits/fs_ops.h: Likewise. * include/bits/fs_path.h: Likewise. * include/experimental/bits/fs_dir.h: Rename Doxygen group. * include/experimental/bits/fs_fwd.h: Likewise. * include/experimental/bits/fs_ops.h: Likewise. * include/experimental/bits/fs_path.h: Likewise. * include/experimental/filesystem (filesystem_error::_M_gen_what): Remove inline definition. * include/precompiled/stdc++.h: Add to precompiled header. * include/std/filesystem: New header. * python/libstdcxx/v6/printers.py: Enable printer for std::filesystem paths. * src/filesystem/Makefile.am: Add new files. Compile as C++17. * src/filesystem/Makefile.in: Regenerate. * src/filesystem/cow-dir.cc: Update comment. * src/filesystem/cow-ops.cc: Likewise. * src/filesystem/cow-path.cc: Likewise. * src/filesystem/cow-std-dir.cc: New file. * src/filesystem/cow-std-ops.cc: New file. * src/filesystem/cow-std-path.cc: New file. * src/filesystem/dir-common.h (_Dir_base, get_file_type): New header for common code. * src/filesystem/dir.cc (_Dir): Derive from _Dir_base. (open_dir): Move to _Dir_base constructor. (get_file_type): Move to dir-common.h. (recurse): Move to _Dir_base::should_recurse. * src/filesystem/ops-common.h: New header for common code. * src/filesystem/ops.cc (is_set, make_file_type, make_file_status) (is_not_found_errno, file_time, do_copy_file): Move to ops-common.h. * src/filesystem/path.cc (filesystem_error::_M_gen_what): Define. * src/filesystem/std-dir.cc: New file, based on Filesystem TS code. * src/filesystem/std-ops.cc: Likewise. * src/filesystem/std-dir.cc: Likewise. * testsuite/27_io/filesystem/iterators/directory_iterator.cc: New test. * testsuite/27_io/filesystem/iterators/pop.cc: New test. * testsuite/27_io/filesystem/iterators/recursive_directory_iterator.cc: New test. * testsuite/27_io/filesystem/operations/absolute.cc: New test. * testsuite/27_io/filesystem/operations/canonical.cc: New test. * testsuite/27_io/filesystem/operations/copy.cc: New test. * testsuite/27_io/filesystem/operations/copy_file.cc: New test. * testsuite/27_io/filesystem/operations/create_directories.cc: New test. * testsuite/27_io/filesystem/operations/create_directory.cc: New test. * testsuite/27_io/filesystem/operations/create_symlink.cc: New test. * testsuite/27_io/filesystem/operations/current_path.cc: New test. * testsuite/27_io/filesystem/operations/equivalent.cc: New test. * testsuite/27_io/filesystem/operations/exists.cc: New test. * testsuite/27_io/filesystem/operations/file_size.cc: New test. * testsuite/27_io/filesystem/operations/is_empty.cc: New test. * testsuite/27_io/filesystem/operations/last_write_time.cc: New test. * testsuite/27_io/filesystem/operations/permissions.cc: New test. * testsuite/27_io/filesystem/operations/proximate.cc: New test. * testsuite/27_io/filesystem/operations/read_symlink.cc: New test. * testsuite/27_io/filesystem/operations/relative.cc: New test. * testsuite/27_io/filesystem/operations/remove_all.cc: New test. * testsuite/27_io/filesystem/operations/space.cc: New test. * testsuite/27_io/filesystem/operations/status.cc: New test. * testsuite/27_io/filesystem/operations/symlink_status.cc: New test. * testsuite/27_io/filesystem/operations/temp_directory_path.cc: New test. * testsuite/27_io/filesystem/operations/weakly_canonical.cc: New test. * testsuite/27_io/filesystem/path/append/path.cc: New test. * testsuite/27_io/filesystem/path/assign/assign.cc: New test. * testsuite/27_io/filesystem/path/assign/copy.cc: New test. * testsuite/27_io/filesystem/path/compare/compare.cc: New test. * testsuite/27_io/filesystem/path/compare/path.cc: New test. * testsuite/27_io/filesystem/path/compare/strings.cc: New test. * testsuite/27_io/filesystem/path/concat/path.cc: New test. * testsuite/27_io/filesystem/path/concat/strings.cc: New test. * testsuite/27_io/filesystem/path/construct/c
[Patch][ARM] Add -mbranch-cost option, and update a few tests
Hi, After Jakub's suggestion in PR82120 and PR81184, the attached patch adds the -mbranch-cost option to the ARM target. My understanding is that it's intended to be used internally for testing and does not require user-facing documentation. I have updated a few tests, validation on aarch64 & arm targets shows no regression, and a few improvements when targeting cortex-a5 or cortex-m3: gcc.dg/tree-ssa/reassoc-3[3456].c now pass. That being said, I'm not sure about the other targets for which I changed the condition, and I am also concerned by the fact that it has no impact on gcc.dg/pr21643.c and gcc.dg/tree-ssa/phi-opt-11.c (PR81184). Should I restrict my patch to the only tests where it has an impact (gcc.dg/tree-ssa/reassoc-3[3456].c) ? Thanks, Christophe gcc/ChangeLog: 2017-10-23 Christophe Lyon * config/arm/arm.opt (-mbranch-cost): New option. * config/arm/arm.h (BRANCH_COST): Take arm_branch_cost into account. gcc/testsuite/ChangeLog: 2017-10-23 Christophe Lyon * lib/target-supports.exp (check_effective_target_branch_cost): New function. * gcc.dg/builtin-bswap-7.c: Use branch_cost effective target. * gcc.dg/pr21643.c: Likewise. * gcc.dg/pr46309.c: Likewise. * gcc.dg/tree-ssa/phi-opt-11.c: Likewise. * gcc.dg/tree-ssa/phi-opt-2.c: Likewise. * gcc.dg/tree-ssa/reassoc-32.c: Likewise. * gcc.dg/tree-ssa/reassoc-33.c: Likewise. * gcc.dg/tree-ssa/reassoc-34.c: Likewise. * gcc.dg/tree-ssa/reassoc-35.c: Likewise. * gcc.dg/tree-ssa/reassoc-36.c: Likewise. * gcc.dg/tree-ssa/ssa-ifcombine-13.c: Likewise. * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-1.c: Likewise. * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-2.c: Likewise. * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-3.c: Likewise. * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-4.c: Likewise. * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-5.c: Likewise. * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-6.c: Likewise. commit 0b21f80bf10d0273e6db4655654df9e125c0dae6 Author: Christophe Lyon Date: Fri Sep 8 12:27:42 2017 + Add -mbranch-cost=N option for ARM. Change-Id: I4d570646c405f7b186d0d1be80ce1661ef022aea diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 2d71e8f..854c753 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1946,8 +1946,9 @@ enum arm_auto_incmodes /* Try to generate sequences that don't involve branches, we can then use conditional instructions. */ -#define BRANCH_COST(speed_p, predictable_p) \ - (current_tune->branch_cost (speed_p, predictable_p)) +#define BRANCH_COST(speed_p, predictable_p)\ + ((arm_branch_cost != -1) ? arm_branch_cost : \ + (current_tune->branch_cost (speed_p, predictable_p))) /* False if short circuit operation is preferred. */ #define LOGICAL_OP_NON_SHORT_CIRCUIT \ diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 6060516..a3719cb 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -294,3 +294,7 @@ When linking for big-endian targets, generate a BE8 format image. mbe32 Target Report RejectNegative Negative(mbe8) InverseMask(BE8) When linking for big-endian targets, generate a legacy BE32 format image. + +mbranch-cost= +Target RejectNegative Joined UInteger Var(arm_branch_cost) Init(-1) +Cost to assume for a branch insn. diff --git a/gcc/testsuite/gcc.dg/builtin-bswap-7.c b/gcc/testsuite/gcc.dg/builtin-bswap-7.c index 3e1718d..fe85441 100644 --- a/gcc/testsuite/gcc.dg/builtin-bswap-7.c +++ b/gcc/testsuite/gcc.dg/builtin-bswap-7.c @@ -5,7 +5,7 @@ /* The branch cost setting prevents the return value from being calculated with arithmetic instead of doing a compare. */ -/* { dg-additional-options "-mbranch-cost=0" { target s390x-*-* } } */ +/* { dg-additional-options "-mbranch-cost=0" { target branch_cost } } */ #include diff --git a/gcc/testsuite/gcc.dg/pr21643.c b/gcc/testsuite/gcc.dg/pr21643.c index bd76aa8..d981fbc 100644 --- a/gcc/testsuite/gcc.dg/pr21643.c +++ b/gcc/testsuite/gcc.dg/pr21643.c @@ -1,6 +1,7 @@ /* PR tree-optimization/21643 */ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-reassoc1-details" } */ +/* { dg-additional-options "-mbranch-cost=2" { target branch_cost } } */ int f1 (unsigned char c) diff --git a/gcc/testsuite/gcc.dg/pr46309.c b/gcc/testsuite/gcc.dg/pr46309.c index 68229cf..c964529 100644 --- a/gcc/testsuite/gcc.dg/pr46309.c +++ b/gcc/testsuite/gcc.dg/pr46309.c @@ -4,7 +4,7 @@ /* The transformation depends on BRANCH_COST being greater than 1 (see the notes in the PR), so try to force that. */ /* { dg-additional-options "-mtune=octeon2" { target mips*-*-* } } */ -/* { dg-additional-options "-mbranch-cost=2" { target avr*-*-* s390*-*-* i?86-*-* x86_64-*-* } } */ +/* { dg-additional-options "-mbranch-cost=2" { target branch_cost } } */ int f1 (int a) diff --gi
[PATCH] Include from system.h (PR bootstrap/82610)
On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote: > On Thu, 19 Oct 2017, David Malcolm wrote: > > > In file included from /scratch/tmp/gerald/gcc-HEAD/gcc/unique- > > > ptr-tests.cc:23: > > > In file included from /scratch/tmp/gerald/gcc- > > > HEAD/gcc/../include/unique-ptr.h:77: > > > In file included from /usr/include/c++/v1/memory:629: > > > /usr/include/c++/v1/typeinfo:199:2: error: no member named > > > 'fancy_abort' in namespace 'std::__1'; did you mean simply > > > 'fancy_abort'? > > > _VSTD::abort(); > > > ^~~ > > > /usr/include/c++/v1/__config:390:15: note: expanded from macro > > > '_VSTD' > > > #define _VSTD std::_LIBCPP_NAMESPACE > > > > There seem to have been similar problems on OS X: > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610 > > Yes, I believe it's the same actually (unearthed by clang as system > compiler). > > > The proposed fix there is to include in system.h, which > > presumably would fix this also. > > That appears to work around the bootstrap failure on my tester as > well. > > How can we go about fixing this in the tree? > > Gerald Here's the patch by fxcoudert from the PR (plus a ChangeLog entry) Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; reported by fxcoudert as fixing the issue on darwin and by Gerald as fixing the issue on "newer versions of FreeBSD that use clang 4.0 as system compiler". OK for trunk? Sorry again about the breakage. gcc/ChangeLog: PR bootstrap/82610 * system.h [__cplusplus]: Include . --- gcc/system.h | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/system.h b/gcc/system.h index f0664e9..d6e1637 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -233,6 +233,7 @@ extern int errno; # include #endif # include +# include # include # include #endif -- 1.8.5.3
Re: [PING] [C++ Patch] PR 82307
Hi, following up to a short off-line exchange with Nathan, I'm sending a reworked patch which - among other things - avoids regressing on the second testcase (cpp0x/enum36.C). Tested x86_64-linux. Thanks, Paolo. /cp 2017-10-23 Mukesh Kapoor Paolo Carlini PR c++/82307 * cvt.c (type_promotes_to): Implement C++17, 7.6/4, about unscoped enumeration type whose underlying type is fixed. /testsuite 2017-10-23 Mukesh Kapoor Paolo Carlini PR c++/82307 * g++.dg/cpp0x/enum35.C: New. * g++.dg/cpp0x/enum36.C: Likewise. Index: cp/cvt.c === --- cp/cvt.c(revision 254005) +++ cp/cvt.c(working copy) @@ -1834,12 +1834,27 @@ type_promotes_to (tree type) || type == char32_type_node || type == wchar_type_node) { + tree prom = type; + + if (TREE_CODE (type) == ENUMERAL_TYPE) + { + prom = ENUM_UNDERLYING_TYPE (prom); + if (!ENUM_IS_SCOPED (type) + && ENUM_FIXED_UNDERLYING_TYPE_P (type)) + { + /* ISO C++17, 7.6/4. A prvalue of an unscoped enumeration type +whose underlying type is fixed (10.2) can be converted to a +prvalue of its underlying type. Moreover, if integral promotion +can be applied to its underlying type, a prvalue of an unscoped +enumeration type whose underlying type is fixed can also be +converted to a prvalue of the promoted underlying type. */ + return type_promotes_to (prom); + } + } + int precision = MAX (TYPE_PRECISION (type), TYPE_PRECISION (integer_type_node)); tree totype = c_common_type_for_size (precision, 0); - tree prom = type; - if (TREE_CODE (prom) == ENUMERAL_TYPE) - prom = ENUM_UNDERLYING_TYPE (prom); if (TYPE_UNSIGNED (prom) && ! int_fits_type_p (TYPE_MAX_VALUE (prom), totype)) prom = c_common_type_for_size (precision, 1); Index: testsuite/g++.dg/cpp0x/enum35.C === --- testsuite/g++.dg/cpp0x/enum35.C (nonexistent) +++ testsuite/g++.dg/cpp0x/enum35.C (working copy) @@ -0,0 +1,14 @@ +// PR c++/82307 +// { dg-do run { target c++11 } } + +#include + +enum : unsigned long long { VAL }; + +bool foo (unsigned long long) { return true; } +bool foo (int) { return false; } + +int main() +{ + assert (foo(VAL)); +} Index: testsuite/g++.dg/cpp0x/enum36.C === --- testsuite/g++.dg/cpp0x/enum36.C (nonexistent) +++ testsuite/g++.dg/cpp0x/enum36.C (working copy) @@ -0,0 +1,14 @@ +// PR c++/82307 +// { dg-do run { target c++11 } } + +#include + +enum : short { VAL }; + +bool foo (int) { return true; } +bool foo (unsigned long long) { return false; } + +int main() +{ + assert (foo (VAL)); +}
Re: [PATCH] Make -gcolumn-info the default
On Mon, Oct 23, 2017 at 3:33 AM, Jakub Jelinek wrote: > Hi! > > When -gcolumn-info was added back in February, it was too late in the > release cycle to make it the default, but I think now is the good time > to do it for GCC8. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Makes sense to me.
[PATCH] FIx PR82672
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2017-10-23 Richard Biener PR tree-optimization/82672 * graphite-isl-ast-to-gimple.c (graphite_copy_stmts_from_block): Fold the stmt if we propagated into it. * gfortran.dg/graphite/pr82672.f90: New testcase. Index: gcc/graphite-isl-ast-to-gimple.c === --- gcc/graphite-isl-ast-to-gimple.c(revision 253998) +++ gcc/graphite-isl-ast-to-gimple.c(working copy) @@ -1175,22 +1194,28 @@ graphite_copy_stmts_from_block (basic_bl ssa_op_iter iter; use_operand_p use_p; if (!is_gimple_debug (copy)) - FOR_EACH_SSA_USE_OPERAND (use_p, copy, iter, SSA_OP_USE) - { - tree old_name = USE_FROM_PTR (use_p); - - if (TREE_CODE (old_name) != SSA_NAME - || SSA_NAME_IS_DEFAULT_DEF (old_name) - || ! scev_analyzable_p (old_name, region->region)) - continue; - - gimple_seq stmts = NULL; - tree new_name = get_rename_from_scev (old_name, &stmts, - bb->loop_father, iv_map); - if (! codegen_error_p ()) - gsi_insert_earliest (stmts); - replace_exp (use_p, new_name); - } + { + bool changed = false; + FOR_EACH_SSA_USE_OPERAND (use_p, copy, iter, SSA_OP_USE) + { + tree old_name = USE_FROM_PTR (use_p); + + if (TREE_CODE (old_name) != SSA_NAME + || SSA_NAME_IS_DEFAULT_DEF (old_name) + || ! scev_analyzable_p (old_name, region->region)) + continue; + + gimple_seq stmts = NULL; + tree new_name = get_rename_from_scev (old_name, &stmts, + bb->loop_father, iv_map); + if (! codegen_error_p ()) + gsi_insert_earliest (stmts); + replace_exp (use_p, new_name); + changed = true; + } + if (changed) + fold_stmt_inplace (&gsi_tgt); + } update_stmt (copy); } Index: gcc/testsuite/gfortran.dg/graphite/pr82672.f90 === --- gcc/testsuite/gfortran.dg/graphite/pr82672.f90 (nonexistent) +++ gcc/testsuite/gfortran.dg/graphite/pr82672.f90 (working copy) @@ -0,0 +1,33 @@ +! { dg-do compile } +! { dg-options "-O2 -floop-nest-optimize" } + + character(len=20,kind=4) :: s4 + character(len=20,kind=1) :: s1 + + s1 = "foo\u" + s1 = "foo\u00ff" + s1 = "foo\u0100" + s1 = "foo\u0101" + s1 = "foo\U0101" + + s1 = 4_"foo bar" + s1 = 4_"foo\u00ff" + s1 = 4_"foo\u0101" + s1 = 4_"foo\u1101" + s1 = 4_"foo\U" + + s4 = "foo\u" + s4 = "foo\u00ff" + s4 = "foo\u0100" + s4 = "foo\U0100" + + s4 = 4_"foo bar" + s4 = 4_"\xFF\x96" + s4 = 4_"\x00\x96" + s4 = 4_"foo\u00ff" + s4 = 4_"foo\u0101" + s4 = 4_"foo\u1101" + s4 = 4_"foo\Uab98EF56" + s4 = 4_"foo\U" + +end
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm wrote: > On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote: >> On Thu, 19 Oct 2017, David Malcolm wrote: >> > > In file included from /scratch/tmp/gerald/gcc-HEAD/gcc/unique- >> > > ptr-tests.cc:23: >> > > In file included from /scratch/tmp/gerald/gcc- >> > > HEAD/gcc/../include/unique-ptr.h:77: >> > > In file included from /usr/include/c++/v1/memory:629: >> > > /usr/include/c++/v1/typeinfo:199:2: error: no member named >> > > 'fancy_abort' in namespace 'std::__1'; did you mean simply >> > > 'fancy_abort'? >> > > _VSTD::abort(); >> > > ^~~ >> > > /usr/include/c++/v1/__config:390:15: note: expanded from macro >> > > '_VSTD' >> > > #define _VSTD std::_LIBCPP_NAMESPACE >> > >> > There seem to have been similar problems on OS X: >> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610 >> >> Yes, I believe it's the same actually (unearthed by clang as system >> compiler). >> >> > The proposed fix there is to include in system.h, which >> > presumably would fix this also. >> >> That appears to work around the bootstrap failure on my tester as >> well. >> >> How can we go about fixing this in the tree? >> >> Gerald > > Here's the patch by fxcoudert from the PR (plus a ChangeLog entry) > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; > reported by fxcoudert as fixing the issue on darwin and by > Gerald as fixing the issue on "newer versions of FreeBSD that use > clang 4.0 as system compiler". > > OK for trunk? Not entirely happy as unique-ptr.h doesn't use but well. Ok to unbreak bootstrap. Thanks, Richard. > Sorry again about the breakage. > > gcc/ChangeLog: > PR bootstrap/82610 > * system.h [__cplusplus]: Include . > --- > gcc/system.h | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/gcc/system.h b/gcc/system.h > index f0664e9..d6e1637 100644 > --- a/gcc/system.h > +++ b/gcc/system.h > @@ -233,6 +233,7 @@ extern int errno; > # include > #endif > # include > +# include > # include > # include > #endif > -- > 1.8.5.3 >
Re: [PATCH] Make -gcolumn-info the default
On 10/23/2017 02:46 PM, Jason Merrill wrote: > On Mon, Oct 23, 2017 at 3:33 AM, Jakub Jelinek wrote: >> Hi! >> >> When -gcolumn-info was added back in February, it was too late in the >> release cycle to make it the default, but I think now is the good time >> to do it for GCC8. >> >> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > Makes sense to me. +1 from me, FWIW. Thanks, Pedro Alves
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On 10/23/2017 02:51 PM, Richard Biener wrote: > On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm wrote: >> OK for trunk? > > Not entirely happy as unique-ptr.h doesn't use but well. > Actually it does. It's needed in C++11 mode, because that's where std::unique_ptr is defined: #if __cplusplus >= 201103 /* In C++11 mode, all we need is import the standard std::unique_ptr. */ template using unique_ptr = std::unique_ptr; > Ok to unbreak bootstrap. Thanks, Pedro Alves
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On Mon, 2017-10-23 at 15:51 +0200, Richard Biener wrote: > On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm > wrote: > > On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote: > > > On Thu, 19 Oct 2017, David Malcolm wrote: > > > > > In file included from /scratch/tmp/gerald/gcc- > > > > > HEAD/gcc/unique- > > > > > ptr-tests.cc:23: > > > > > In file included from /scratch/tmp/gerald/gcc- > > > > > HEAD/gcc/../include/unique-ptr.h:77: > > > > > In file included from /usr/include/c++/v1/memory:629: > > > > > /usr/include/c++/v1/typeinfo:199:2: error: no member named > > > > > 'fancy_abort' in namespace 'std::__1'; did you mean simply > > > > > 'fancy_abort'? > > > > > _VSTD::abort(); > > > > > ^~~ > > > > > /usr/include/c++/v1/__config:390:15: note: expanded from > > > > > macro > > > > > '_VSTD' > > > > > #define _VSTD std::_LIBCPP_NAMESPACE > > > > > > > > There seem to have been similar problems on OS X: > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610 > > > > > > Yes, I believe it's the same actually (unearthed by clang as > > > system > > > compiler). > > > > > > > The proposed fix there is to include in system.h, > > > > which > > > > presumably would fix this also. > > > > > > That appears to work around the bootstrap failure on my tester as > > > well. > > > > > > How can we go about fixing this in the tree? > > > > > > Gerald > > > > Here's the patch by fxcoudert from the PR (plus a ChangeLog entry) > > > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; > > reported by fxcoudert as fixing the issue on darwin and by > > Gerald as fixing the issue on "newer versions of FreeBSD that use > > clang 4.0 as system compiler". > > > > OK for trunk? > > Not entirely happy as unique-ptr.h doesn't use but well. I'm not sure I understand you here. include/unique-ptr.h has: #if __cplusplus >= 201103 /* In C++11 mode, all we need is import the standard std::unique_ptr. */ template using unique_ptr = std::unique_ptr; /* Pull in move as well. */ using std::move; #else /* C++11 */ ...etc..., most of the file, the pre-C++11 implementation So in C++11 and later it's using std::unique_ptr, for which, as I understand it is the standard include, e.g.: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3337.pdf says in (20.6.2 Header synopsis [memory.syn]): "The header defines several types and function templates that describe properties of pointers and pointer-like types, manage memory for containers and other template types, [...] The header also defines the templates unique_ptr, shared_ptr, weak_ptr, and various template functions that operate on objects of these types (20.7)." Would you prefer the includes of in gcc/system.h and include/unique-ptr.h to be guarded by #if __cplusplus >= 201103 ? (not sure if it works yet, but I can try it) Thanks Dave > Ok to unbreak bootstrap. > > Thanks, > Richard. > > > Sorry again about the breakage. > > > > gcc/ChangeLog: > > PR bootstrap/82610 > > * system.h [__cplusplus]: Include . > > --- > > gcc/system.h | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/gcc/system.h b/gcc/system.h > > index f0664e9..d6e1637 100644 > > --- a/gcc/system.h > > +++ b/gcc/system.h > > @@ -233,6 +233,7 @@ extern int errno; > > # include > > #endif > > # include > > +# include > > # include > > # include > > #endif > > -- > > 1.8.5.3 > >
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On October 23, 2017 4:15:17 PM GMT+02:00, David Malcolm wrote: >On Mon, 2017-10-23 at 15:51 +0200, Richard Biener wrote: >> On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm >> wrote: >> > On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote: >> > > On Thu, 19 Oct 2017, David Malcolm wrote: >> > > > > In file included from /scratch/tmp/gerald/gcc- >> > > > > HEAD/gcc/unique- >> > > > > ptr-tests.cc:23: >> > > > > In file included from /scratch/tmp/gerald/gcc- >> > > > > HEAD/gcc/../include/unique-ptr.h:77: >> > > > > In file included from /usr/include/c++/v1/memory:629: >> > > > > /usr/include/c++/v1/typeinfo:199:2: error: no member named >> > > > > 'fancy_abort' in namespace 'std::__1'; did you mean simply >> > > > > 'fancy_abort'? >> > > > > _VSTD::abort(); >> > > > > ^~~ >> > > > > /usr/include/c++/v1/__config:390:15: note: expanded from >> > > > > macro >> > > > > '_VSTD' >> > > > > #define _VSTD std::_LIBCPP_NAMESPACE >> > > > >> > > > There seem to have been similar problems on OS X: >> > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610 >> > > >> > > Yes, I believe it's the same actually (unearthed by clang as >> > > system >> > > compiler). >> > > >> > > > The proposed fix there is to include in system.h, >> > > > which >> > > > presumably would fix this also. >> > > >> > > That appears to work around the bootstrap failure on my tester as >> > > well. >> > > >> > > How can we go about fixing this in the tree? >> > > >> > > Gerald >> > >> > Here's the patch by fxcoudert from the PR (plus a ChangeLog entry) >> > >> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; >> > reported by fxcoudert as fixing the issue on darwin and by >> > Gerald as fixing the issue on "newer versions of FreeBSD that use >> > clang 4.0 as system compiler". >> > >> > OK for trunk? >> >> Not entirely happy as unique-ptr.h doesn't use but well. > >I'm not sure I understand you here. > >include/unique-ptr.h has: > > #if __cplusplus >= 201103 > > /* In C++11 mode, all we need is import the standard > std::unique_ptr. */ > template using unique_ptr = std::unique_ptr; > > /* Pull in move as well. */ > using std::move; > > #else /* C++11 */ > > ...etc..., most of the file, the pre-C++11 implementation > >So in C++11 and later it's using std::unique_ptr, for which, as I >understand it is the standard include, e.g.: >http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3337.pdf >says in (20.6.2 Header synopsis [memory.syn]): > >"The header defines several types and function templates that >describe properties of pointers and pointer-like types, manage memory >for containers and other template types, [...] The header also defines >the templates unique_ptr, shared_ptr, weak_ptr, and various template >functions that operate on objects of these types (20.7)." > >Would you prefer the includes of in gcc/system.h and >include/unique-ptr.h to be guarded by #if __cplusplus >= 201103 ? (not >sure if it works yet, but I can try it) I guess so. But we have to make gdb happy as well. It really depends how much each TU grows with the extra (unneeded) include grows in C++11 and C++04 mode. Richard. >Thanks >Dave > > >> Ok to unbreak bootstrap. >> >> Thanks, >> Richard. >> >> > Sorry again about the breakage. >> > >> > gcc/ChangeLog: >> > PR bootstrap/82610 >> > * system.h [__cplusplus]: Include . >> > --- >> > gcc/system.h | 1 + >> > 1 file changed, 1 insertion(+) >> > >> > diff --git a/gcc/system.h b/gcc/system.h >> > index f0664e9..d6e1637 100644 >> > --- a/gcc/system.h >> > +++ b/gcc/system.h >> > @@ -233,6 +233,7 @@ extern int errno; >> > # include >> > #endif >> > # include >> > +# include >> > # include >> > # include >> > #endif >> > -- >> > 1.8.5.3 >> >
Re: [PATCH] Include from system.h (PR bootstrap/82610)
Hi, On Mon, 23 Oct 2017, Richard Biener wrote: > I guess so. But we have to make gdb happy as well. It really depends how > much each TU grows with the extra (unneeded) include grows in C++11 and > C++04 mode. The c++ headers unconditionally included from system.h, with: % echo '#include <$name>' | g++-7 -E -x c++ - | wc -l new: 3564 cstring: 533 utility: 3623 memory: 28066 compile time: % echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ - new: 0:00.06elapsed, 17060maxresident, 0major+3709minor cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor memory: 0:00.25elapsed, 40356maxresident, 0major+9764minor Hence, is not cheap at all, including it unconditionally from system.h when it isn't actually used by many things doesn't seem a good idea. Ciao, Michael.
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On 23/10/17 17:07 +0200, Michael Matz wrote: Hi, On Mon, 23 Oct 2017, Richard Biener wrote: I guess so. But we have to make gdb happy as well. It really depends how much each TU grows with the extra (unneeded) include grows in C++11 and C++04 mode. The c++ headers unconditionally included from system.h, with: % echo '#include <$name>' | g++-7 -E -x c++ - | wc -l new: 3564 cstring: 533 utility: 3623 memory: 28066 That's using the -std=gnu++4 default for g++-7, and for that mode the header *is* needed, to get the definition of std::unique_ptr. For C++98 (when it isn't needed) that header is much smaller: tmp$ echo '#include ' | g++ -E -x c++ - | wc -l 28101 tmp$ echo '#include ' | g++ -E -x c++ - -std=gnu++98 | wc -l 4267 (Because it doesn't contain std::unique_ptr and std::shared_ptr before C++11). compile time: % echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ - new: 0:00.06elapsed, 17060maxresident, 0major+3709minor cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor memory: 0:00.25elapsed, 40356maxresident, 0major+9764minor Hence, is not cheap at all, including it unconditionally from system.h when it isn't actually used by many things doesn't seem a good idea. Ciao, Michael.
Re: [PATCH] i386: Skip DF_REF_INSN if DF_REF_INSN_INFO is false
On Mon, Oct 23, 2017 at 1:45 PM, H.J. Lu wrote: > We should check DF_REF_INSN_INFO before accessing DF_REF_INSN. > > OK for trunk? > > H.J. > --- > gcc/ > > PR target/82673 > * config/i386/i386.c (ix86_finalize_stack_frame_flags): Skip > DF_REF_INSN if DF_REF_INSN_INFO is false. > > gcc/testsuite/ > > PR target/82673 > * gcc.target/i386/pr82673.c: New test. I'm not that versed in the dataflow details, the patch LGTM. One nit below. Thanks, Uros > --- > gcc/config/i386/i386.c | 7 +-- > gcc/testsuite/gcc.target/i386/pr82673.c | 12 > 2 files changed, 17 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr82673.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index b86504378ae..25c898866e2 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -12694,10 +12694,13 @@ ix86_finalize_stack_frame_flags (void) > for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); >ref; ref = next) > { > - rtx_insn *insn = DF_REF_INSN (ref); > + next = DF_REF_NEXT_REG (ref); > + if (!DF_REF_INSN_INFO (ref)) > + continue; > + > /* Make sure the next ref is for a different instruction, > so that we're not affected by the rescan. */ > - next = DF_REF_NEXT_REG (ref); > + rtx_insn *insn = DF_REF_INSN (ref); > while (next && DF_REF_INSN (next) == insn) > next = DF_REF_NEXT_REG (next); > > diff --git a/gcc/testsuite/gcc.target/i386/pr82673.c > b/gcc/testsuite/gcc.target/i386/pr82673.c > new file mode 100644 > index 000..cff4b34535b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr82673.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -fno-omit-frame-pointer > -fvar-tracking-assignments-toggle" } */ Use -fvar-tracking-assignments here. > + > +register long *B asm ("ebp"); > + > +long y = 20; > + > +void > +bar (void) /* { dg-error "frame pointer required, but reserved" } */ > +{ > + B = &y; > +} /* { dg-error "bp cannot be used in asm here" } */ > -- > 2.13.6 >
Re: [PATCH, rs6000 V3] Add Power 8 support to vec_revb
GCC maintainers: I have fixed the change log lines as mentioned by Segher. I removed the changes to swap_selector_for_mode() and instead created swap_endianess_selector_for_mode(). The mode attribute wd will not work for the define expand as the V16QI maps to "b" not "q". So I do need to have VSX_XXBR. I did change it to _. The XXBR_L iterator was changed to VEC_A which has everything needed but V1TI. There are other iterators that include V1TI but have additional entries which would then need to be included in VXS_XXBR. These really don't make sense to me for this instruction so I preferred to use the new iterator/attribute in a define_expand and for the V1TI case have an additional explicit define_expand for that case. I have retested the updated patch on: powerpc64-unknown-linux-gnu (Power 8 BE), powerpc64le-unknown-linux-gnu (Power 8 LE), powerpc64le-unknown-linux-gnu (Power 9 LE) without regressions. Please let me know if the following patch is acceptable. Thanks. Carl Love - gcc/ChangeLog: 2017-10-17 Carl Love * config/rs6000/rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8 definitions for the builtin instances. (P9V_BUILTIN_VEC_REVB): Remove the power 9 instance definitions. * config/rs6000/altivec.h (vec_revb): Change the #define from power 9 to power 8. * config/rs6000/r6000-protos.h (swap_selector_for_mode): Add extern declaration. * config/rs6000/rs6000.c (swap_endianess_selector_for_mode): Add function. * config/rs6000/rs6000-builtin.def (BU_P8V_VSX_1, BU_P8V_OVERLOAD_1): Add power 8 macro expansions. (BU_P9V_OVERLOAD_1): Remove power 9 overload expansion. * config/rs6000/vsx.md (revb_): Add define_expand to generate power 8 instructions for the vec_revb builtin. gcc/testsuite/ChangeLog: 2017-10-17 Carl Love * gcc.target/powerpc/builtins-revb-runnable.c: New runnable test file for the vec_revb builtin. --- gcc/config/rs6000/altivec.h| 3 +- gcc/config/rs6000/rs6000-builtin.def | 10 +- gcc/config/rs6000/rs6000-c.c | 44 +-- gcc/config/rs6000/rs6000-protos.h | 2 + gcc/config/rs6000/rs6000.c | 76 + gcc/config/rs6000/vsx.md | 54 .../gcc.target/powerpc/builtins-revb-runnable.c| 350 + 7 files changed, 514 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index c8e508c..a05e23a 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -415,6 +415,7 @@ #define vec_vsubuqm __builtin_vec_vsubuqm #define vec_vupkhsw __builtin_vec_vupkhsw #define vec_vupklsw __builtin_vec_vupklsw +#define vec_revb __builtin_vec_revb #endif #ifdef __POWER9_VECTOR__ @@ -476,8 +477,6 @@ #define vec_xlx __builtin_vec_vextulx #define vec_xrx __builtin_vec_vexturx - -#define vec_revb __builtin_vec_revb #endif /* Predicates. diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 850164a..dba5e70 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1853,6 +1853,13 @@ BU_P6_64BIT_2 (CMPB, "cmpb", CONST, cmpbdi3) /* 1 argument VSX instructions added in ISA 2.07. */ BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn) +BU_P8V_VSX_1 (REVB_V1TI, "revb_v1ti", CONST, revb_v1ti) +BU_P8V_VSX_1 (REVB_V2DI, "revb_v2di", CONST, revb_v2di) +BU_P8V_VSX_1 (REVB_V4SI, "revb_v4si", CONST, revb_v4si) +BU_P8V_VSX_1 (REVB_V8HI, "revb_v8hi", CONST, revb_v8hi) +BU_P8V_VSX_1 (REVB_V16QI, "revb_v16qi",CONST, revb_v16qi) +BU_P8V_VSX_1 (REVB_V2DF, "revb_v2df", CONST, revb_v2df) +BU_P8V_VSX_1 (REVB_V4SF, "revb_v4sf", CONST, revb_v4sf) /* 1 argument altivec instructions added in ISA 2.07. */ BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2) @@ -1962,6 +1969,7 @@ BU_P8V_OVERLOAD_1 (VPOPCNTUH, "vpopcntuh") BU_P8V_OVERLOAD_1 (VPOPCNTUW, "vpopcntuw") BU_P8V_OVERLOAD_1 (VPOPCNTUD, "vpopcntud") BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") +BU_P8V_OVERLOAD_1 (REVB, "revb") /* ISA 2.07 vector overloaded 2 argument functions. */ BU_P8V_OVERLOAD_2 (EQV,"eqv") @@ -2073,8 +2081,6 @@ BU_P9V_OVERLOAD_1 (VSTDCNQP, "scalar_test_neg_qp") BU_P9V_OVERLOAD_1 (VSTDCNDP, "scalar_test_neg_dp") BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp") -BU_P9V_OVERLOAD_1 (REVB, "revb") - BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth") BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_S
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On 10/23/2017 04:17 PM, Jonathan Wakely wrote: > On 23/10/17 17:07 +0200, Michael Matz wrote: >> Hi, >> >> On Mon, 23 Oct 2017, Richard Biener wrote: >> >>> I guess so. But we have to make gdb happy as well. It really depends how >>> much each TU grows with the extra (unneeded) include grows in C++11 and >>> C++04 mode. >> >> The c++ headers unconditionally included from system.h, with: >> >> % echo '#include <$name>' | g++-7 -E -x c++ - | wc -l >> new: 3564 >> cstring: 533 >> utility: 3623 >> memory: 28066 > > That's using the -std=gnu++4 default for g++-7, and for that mode > the header *is* needed, to get the definition of std::unique_ptr. > > For C++98 (when it isn't needed) that header is much smaller: > > tmp$ echo '#include ' | g++ -E -x c++ - | wc -l > 28101 > tmp$ echo '#include ' | g++ -E -x c++ - -std=gnu++98 | wc -l > 4267 > > (Because it doesn't contain std::unique_ptr and std::shared_ptr before > C++11). > >> compile time: >> % echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ - >> new: 0:00.06elapsed, 17060maxresident, 0major+3709minor >> cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor >> utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor >> memory: 0:00.25elapsed, 40356maxresident, 0major+9764minor >> >> Hence, is not cheap at all, including it unconditionally from >> system.h when it isn't actually used by many things doesn't seem a good >> idea. >> I think the real question is whether it makes a difference in a full build. There won't be many translation units that don't include some other headers. (though of course I won't be surprised if it does make a difference.) If it's a real issue, you could fix this like how the other similar cases were handled by system.h, by adding this in system.h: #ifdef __cplusplus #ifdef INCLUDE_UNIQUE_PTR # include "unique-ptr.h" #endif #endif instead of unconditionally including there, and then translation units that want unique-ptr.h would do "#define INCLUDE_UNIQUE_PTR" instead of #include "unique-ptr.h", like done for a few other C++ headers. (I maintain that IMO this is kind of self-inflicted GCC pain due to the fact that "#pragma poison" poisons too much. If #pragma poison's behavior were adjusted (or a new variant/mode created) to ignore references to the poisoned symbol names in system headers (or something like that), then you wouldn't need this manual management of header dependencies in gcc/system.h and the corresponding '#define INCLUDE_FOO' contortions. There's nothing that you can reasonably do with a reference to a poisoned symbol in a system header, other than avoid having the system header have the '#pragma poison' in effect when its included, which leads to contortions like system.h's. Note that the poisoned names are _still used anyway_. So can we come up with a GCC change that would avoid having to worry about manually doing this? It'd likely help other projects too.) Thanks, Pedro Alves
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On Mon, 2017-10-23 at 16:40 +0100, Pedro Alves wrote: > On 10/23/2017 04:17 PM, Jonathan Wakely wrote: > > On 23/10/17 17:07 +0200, Michael Matz wrote: > > > Hi, > > > > > > On Mon, 23 Oct 2017, Richard Biener wrote: > > > > > > > I guess so. But we have to make gdb happy as well. It really > > > > depends how > > > > much each TU grows with the extra (unneeded) include grows in > > > > C++11 and > > > > C++04 mode. > > > > > > The c++ headers unconditionally included from system.h, with: > > > > > > % echo '#include <$name>' | g++-7 -E -x c++ - | wc -l > > > new: 3564 > > > cstring: 533 > > > utility: 3623 > > > memory: 28066 > > > > That's using the -std=gnu++4 default for g++-7, and for that mode > > the header *is* needed, to get the definition of std::unique_ptr. > > > > For C++98 (when it isn't needed) that header is much smaller: > > > > tmp$ echo '#include ' | g++ -E -x c++ - | wc -l > > 28101 > > tmp$ echo '#include ' | g++ -E -x c++ - -std=gnu++98 | wc > > -l > > 4267 > > > > (Because it doesn't contain std::unique_ptr and std::shared_ptr > > before > > C++11). > > > > > compile time: > > > % echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ - > > > new: 0:00.06elapsed, 17060maxresident, 0major+3709minor > > > cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor > > > utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor > > > memory: 0:00.25elapsed, 40356maxresident, 0major+9764minor > > > > > > Hence, is not cheap at all, including it unconditionally > > > from > > > system.h when it isn't actually used by many things doesn't seem > > > a good > > > idea. > > > > > I think the real question is whether it makes a difference in > a full build. There won't be many translation units that > don't include some other headers. (though of course I won't > be surprised if it does make a difference.) > > If it's a real issue, you could fix this like how the > other similar cases were handled by system.h, by adding this > in system.h: > > #ifdef __cplusplus > #ifdef INCLUDE_UNIQUE_PTR > # include "unique-ptr.h" > #endif > #endif > > instead of unconditionally including there, > and then translation units that want unique-ptr.h would > do "#define INCLUDE_UNIQUE_PTR" instead of #include "unique-ptr.h", > like done for a few other C++ headers. > > (I maintain that IMO this is kind of self-inflicted GCC pain due > to the fact that "#pragma poison" poisons too much. If #pragma > poison's behavior were adjusted (or a new variant/mode created) to > ignore references to the poisoned symbol names in system headers (or > something like that), then you wouldn't need this manual management > of header dependencies in gcc/system.h and the corresponding > '#define INCLUDE_FOO' contortions. There's nothing that you can > reasonably > do with a reference to a poisoned symbol in a system header, other > than > avoid having the system header have the '#pragma poison' in effect > when > its included, which leads to contortions like system.h's. Note that > the poisoned names are _still used anyway_. So can we come up with > a GCC change that would avoid having to worry about manually doing > this? It'd likely help other projects too.) > > Thanks, > Pedro Alves FWIW, this one isn't from #pragma poison, it's from: #define abort() fancy_abort (__FILE__, __LINE__, __FUNCTION__) (I messed up the --in-reply-to when posting the patch, but Gerald noted the issue was due to: /usr/include/c++/v1/typeinfo:199:2: error: no member named 'fancy_abort' in namespace 'std::__1'; did you mean simply 'fancy_abort'? _VSTD::abort(); ^~~ /usr/include/c++/v1/__config:390:15: note: expanded from macro '_VSTD' #define _VSTD std::_LIBCPP_NAMESPACE ^ /scratch/tmp/gerald/gcc-HEAD/gcc/system.h:725:13: note: 'fancy_abort' declared here extern void fancy_abort (const char *, int, const char *) ^ https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01289.html )
Re: [PATCH] Include from system.h (PR bootstrap/82610)
Hi, On Mon, 23 Oct 2017, David Malcolm wrote: > FWIW, this one isn't from #pragma poison, it's from: > #define abort() fancy_abort (__FILE__, __LINE__, __FUNCTION__) > > (I messed up the --in-reply-to when posting the patch, but Gerald noted > the issue was due to: > /usr/include/c++/v1/typeinfo:199:2: error: no member named > 'fancy_abort' in namespace 'std::__1'; did you mean simply > 'fancy_abort'? > _VSTD::abort(); > ^~~ So if we really really have to add an unconditional include in system.h it's probably enough to include , not . Ciao, Michael.
Re: [RFC] New pragma exec_charset
On 10/23/2017 04:55 AM, Andreas Krebbel wrote: On 10/19/2017 07:13 PM, Martin Sebor wrote: On 10/19/2017 09:50 AM, Andreas Krebbel wrote: The TPF operating system uses the GCC S/390 backend. They set an EBCDIC exec charset for compilation using -fexec-charset. However, certain libraries require ASCII strings instead. In order to be able to put calls to that library into the normal code it is required to switch the exec charset within a compilation unit. This is an attempt to implement it by adding a new pragma which could be used like in the following example: int foo () { call_with_utf8("hello world"); #pragma GCC exec_charset("UTF16") call_with_utf16("hello world"); #pragma GCC exec_charset(pop) call_with_utf8("hello world"); } Does this look reasonable? I'm not an expert on this but at a high level it looks reasonable to me. But based on some small amount of work I did in this area I have a couple of questions. There are a few places in the compiler that already do or that should but don't yet handle different execution character sets. The former include built-ins like __bultin_isdigit() and __builtin_sprintf (in both builtins.c and gimple-ssa-sprintf.c) The latter is the -Wformat checking done by the C and C++ front ends. The missing support for the latter is the subject of bug 38308. According to bug 81686, LTO is apparently also missing support for exec-charset. These probably are the areas Richard and Jakub were referring to as well?! These cases did not work properly with the -fexec-charset cmdline option and this does not change with the pragma. I'll try to look at what has been proposed in the discussion. Perhaps I can get it working somehow. Right, the patch doesn't remove the known deficiencies. But by providing another knob to control the execution charset, at a fine grain level, it encourages users to make greater use of the (incomplete) exec-charset support and increases the odds that they will run afoul of them. It seems to me that before exposing a new mechanism to control the exec charset it would be prudent to a) plug at least the biggest holes to make the feature more reliable (in my mind, that's at least -Wformat), and b) make sure the pragma interacts correctly with existing features that work correctly with the -fexec-charset option. Where it doesn't and where it cannot be made to work correctly (i.e., is undefined), I would expect an effort to be made to detect and diagnose those undefined interactions if possible, or if that's too difficult, at a minimum document them. I'm curious how the pragma might interact with these two areas, and whether the lack of support for it in the latter is a concern (and if not, why not). For the former, I'm also wondering about the interaction of inlining and other interprocedural optimizations with the pragma. Does it propagate through inlined calls as one would expect? The pragma does not apply to the callees of a function defined under the pragma regardless of whether it gets inlined or not. That matches the behavior of other pragmas. If it would apply to inlined callees the program semantics might change depending on optimization decisions i.e. whether a certain call got inlined or not. Callees marked as always_inline might be discussed separately. I remember this being a topic when looking at function attributes. My concern with this pragma/attribute and inlining has to do with strings in one exec charset being propagated into functions that operate on strings in another charset. E.g., like in the test case below that's "miscompiled" with your patch -- the first test for n == 7 is eliminated and the buffer overflow is not detected. If this cannot be made to work then I think some effort should be made to detect this mixing and matching and existing optimizations that assume the same charset (like the sprintf one does) disabled. static inline int f (char *d, const char *fmt) { #pragma GCC exec_charset ("utf8") int n = __builtin_sprintf (d, fmt, 12345); #pragma GCC exec_charset (pop) if (n == 7) // incorrectly optimized away __builtin_abort (); return n; } int main (void) { char d[5]; #pragma GCC exec_charset ("EBCDIC-US") int n = f (d, "i=%i"); // buffer overflow not detected #pragma GCC exec_charset (pop) __builtin_printf ("%i (%lu): %s\n", n, __builtin_strlen (d), d); if (n != 7) // aborts at runtime __builtin_abort (); } Martin
Re: [PATCH] Include from system.h (PR bootstrap/82610)
On 10/23/2017 04:50 PM, David Malcolm wrote: > FWIW, this one isn't from #pragma poison, it's from: > #define abort() fancy_abort (__FILE__, __LINE__, __FUNCTION__) > > (I messed up the --in-reply-to when posting the patch, but Gerald noted > the issue was due to: > /usr/include/c++/v1/typeinfo:199:2: error: no member named > 'fancy_abort' in namespace 'std::__1'; did you mean simply > 'fancy_abort'? > _VSTD::abort(); > ^~~ > /usr/include/c++/v1/__config:390:15: note: expanded from macro '_VSTD' > #define _VSTD std::_LIBCPP_NAMESPACE > ^ > /scratch/tmp/gerald/gcc-HEAD/gcc/system.h:725:13: note: 'fancy_abort' > declared here > extern void fancy_abort (const char *, int, const char *) > ^ > IMO the best fix would be to rename that "#define abort" to "#define gcc_abort" and then call gcc_abort instead in the few places that currently call abort. IME, the introduction of a new naked call to abort() isn't something that easily passes review. abort calls always stand out and give reviewers pause (or they should!). FWIW, GDB also doesn't want such naked abort() calls, I don't recall people-sneaking-in-abort-()-calls ever being a problem over there. Thanks, Pedro Alves
Re: [PATCH][AArch64] Wrong type-attribute for stp and str
I’ve added your suggestions. I would also like to propose to change the type attribute from neon_stp to store_8 and store_16, this seems to be more in line with respect to other patterns. Thanks, Dominik ChangeLog: 2017-10-23 Dominik Infuehr * config/aarch64/aarch64-simd.md (*aarch64_simd_mov): Fix type-attribute. (*aarch64_simd_mov): Likewise. — diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 49f615cfdbf..447ee3afd17 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -102,7 +102,7 @@ [(set_attr "type" "neon_dup")] ) -(define_insn "*aarch64_simd_mov" +(define_insn "*aarch64_simd_mov" [(set (match_operand:VD 0 "nonimmediate_operand" "=w, m, m, w, ?r, ?w, ?r, w") (match_operand:VD 1 "general_operand" @@ -126,12 +126,12 @@ default: gcc_unreachable (); } } - [(set_attr "type" "neon_load1_1reg, neon_stp, neon_store1_1reg,\ + [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\ neon_logic, neon_to_gp, f_mcr,\ mov_reg, neon_move")] ) -(define_insn "*aarch64_simd_mov" +(define_insn "*aarch64_simd_mov" [(set (match_operand:VQ 0 "nonimmediate_operand" "=w, Umq, m, w, ?r, ?w, ?r, w") (match_operand:VQ 1 "general_operand" @@ -160,8 +160,8 @@ gcc_unreachable (); } } - [(set_attr "type" "neon_load1_1reg, neon_store1_1reg,\ -neon_stp, neon_logic, multiple, multiple,\ + [(set_attr "type" "neon_load1_1reg, store_16, neon_store1_1reg,\ +neon_logic, multiple, multiple,\ multiple, neon_move") (set_attr "length" "4,4,4,4,8,8,8,4")] ) > On 20 Oct 2017, at 16:07, Richard Earnshaw (lists) > wrote: > > On 16/10/17 14:26, Dominik Inführ wrote: >> Hi, >> >> it seems the type attributes for neon_stp and neon_store1_1reg should be >> the other way around. >> > > Yes, I agree, but there's more > > Firstly, we have two patterns that are named *aarch64_simd_mov, > with different iterators. That's slightly confusing. I think they need > to be renamed as: > > *aarch64_simd_mov > > and > > *aarch64_simd_mov > > to break the ambiguity. > > Secondly it looks to me as though the attributes on the other one are > also incorrect. Could you check that one out as well, please. > > Thanks, > > R. > >> Thanks >> Dominik >> >> ChangeLog: >> 2017-10-16 Dominik Infuehr >> >> * config/aarch64/aarch64-simd.md >> (*aarch64_simd_mov): Fix type-attribute. >> -- >> diff --git a/gcc/config/aarch64/aarch64-simd.md >> b/gcc/config/aarch64/aarch64-simd.md >> index 49f615cfdbf..409ad3502ff 100644 >> --- a/gcc/config/aarch64/aarch64-simd.md >> +++ b/gcc/config/aarch64/aarch64-simd.md >> @@ -160,8 +160,8 @@ >>gcc_unreachable (); >> } >> } >> - [(set_attr "type" "neon_load1_1reg, neon_store1_1reg,\ >> -neon_stp, neon_logic, multiple, multiple,\ >> + [(set_attr "type" "neon_load1_1reg, neon_stp, neon_store1_1reg,\ >> +neon_logic, multiple, multiple,\ >> multiple, neon_move") >>(set_attr "length" "4,4,4,4,8,8,8,4")] >> ) >> > signature.asc Description: Message signed with OpenPGP using GPGMail
Re: [PING][PATCH][Aarch64] Improve int<->FP conversions
On Tue, Oct 17, 2017 at 01:17:04AM +0100, Michael Collison wrote: > Patch updated with all comments from James. OK with an appropriate ChangeLog and assuming it has been tested as required. Thanks, James Reviewed-by:
[000/nnn] poly_int: representation of runtime offsets and sizes
This series adds support for offsets and sizes that are a runtime invariant rather than a compile time constant. It's based on the patch posted here: https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00406.html The rest of the covering note is split into: - Summary (from the message linked above) - Tree representation - RTL representation - Compile-time impact - Typical changes - Testing Summary === The size of an SVE register in bits can be any multiple of 128 between 128 and 2048 inclusive. The way we chose to represent this was to have a runtime indeterminate that counts the number of 128 bit blocks above the minimum of 128. If we call the indeterminate X then: * an SVE register has 128 + 128 * X bits (16 + 16 * X bytes) * the last int in an SVE vector is at byte offset 12 + 16 * X * etc. Although the maximum value of X is 15, we don't want to take advantage of that, since there's nothing particularly magical about the value. So we have two types of target: those for which there are no runtime indeterminates, and those for which there is one runtime indeterminate. We decided to generalise the interface slightly by allowing any number of indeterminates, although some parts of the underlying implementation are still limited to 0 and 1 for now. The main class for working with these runtime offsets and sizes is "poly_int". It represents a value of the form: C0 + C1 * X1 + ... + Cn * Xn where each coefficient Ci is a compile-time constant and where each indeterminate Xi is a nonnegative runtime value. The class takes two template parameters, one giving the number of coefficients and one giving the type of the coefficients. There are then typedefs for the common cases, with the number of coefficients being controlled by the target. poly_int is used for things like: - the number of elements in a VECTOR_TYPE - the size and number of units in a general machine_mode - the offset of something in the stack frame - SUBREG_BYTE - MEM_SIZE and MEM_OFFSET - mem_ref_offset (only a selective list). The patch that adds poly_int has detailed documentation, but the main points are: * there's no total ordering between poly_ints, so the best we can do when comparing them is to ask whether two values *might* or *must* be related in a particular way. E.g. if mode A has size 2 + 2X and mode B has size 4, the condition: GET_MODE_SIZE (A) <= GET_MODE_SIZE (B) is true for X<=1 and false for X>=2. This translates to: may_le (GET_MODE_SIZE (A), GET_MODE_SIZE (B)) == true must_le (GET_MODE_SIZE (A), GET_MODE_SIZE (B)) == false Of course, the may/must distinction already exists in things like alias analysis. * some poly_int arithmetic operations (notably division) are only possible for certain values. These operations therefore become conditional. * target-independent code is exposed to these restrictions even if the current target has no indeterminates. But: * we've tried to provide enough operations that poly_ints are easy to work with. * it means that developers working with non-SVE targets don't need to test SVE. If the code compiles on a non-SVE target, and if it doesn't use any asserting operations, it's reasonable to assume that it will work on SVE too. * for target-specific code, poly_int degenerates to a constant if there are no runtime invariants for that target. Only very minor changes are needed to non-AArch64 targets. * poly_int operations should be (and in practice seem to be) as efficient as single-coefficient operations on non-AArch64 targets. Tree representation === The series uses a new POLY_INT_CST node to represent a poly_int value at the tree level. It is only used on targets with runtime sizes and offsets; the associated test macro POLY_INT_CST_P is always false for other targets. The node has one INTEGER_CST per coefficient, which makes it easier to refer to the same tree as a poly_wide_int, a poly_offset_int and a poly_widest_int without copying the representation. Only low-level routines use the tree node directly. Most code uses: - poly_int_tree_p (x) Return true if X is an INTEGER_CST or a POLY_INT_CST. - wi::to_poly_wide (x) - wi::to_poly_offset (x) - wi::to_poly_widest (x) poly_int versions of the normal wi::to_wide etc. routines. These work on both INTEGER_CSTs and POLY_INT_CSTs. - poly_int_tree_p (x, &y) Test whether X is an INTEGER_CST or POLY_INT_CST and store its value in Y if so. This is defined for Y of type poly_int64 and poly_uint64; the wi::to_* routines are more efficient than return-by-pointer for wide_int-based types. - tree_to_poly_int64 (x) - tree_to_poly_uint64 (x) poly_int versions of tree_to_shwi and tree_to_uhwi. Again they work on both INTEGER_CSTs and POLY_INT_CSTs. Many tree routines now accept poly_int operands, such as: - build_int_cst - build_int_cstu - wide_int_to_tree - force_fit_type RTL representation ===
[001/nnn] poly_int: add poly-int.h
This patch adds a new "poly_int" class to represent polynomial integers of the form: C0 + C1*X1 + C2*X2 ... + Cn*Xn It also adds poly_int-based typedefs for offsets and sizes of various precisions. In these typedefs, the Ci coefficients are compile-time constants and the Xi indeterminates are run-time invariants. The number of coefficients is controlled by the target and is initially 1 for all ports. Most routines can handle general coefficient counts, but for now a few are specific to one or two coefficients. Support for other coefficient counts can be added when needed. The patch also adds a new macro, IN_TARGET_CODE, that can be set to indicate that a TU contains target-specific rather than target-independent code. When this macro is set and the number of coefficients is 1, the poly-int.h classes define a conversion operator to a constant. This allows most existing target code to work without modification. The main exceptions are: - values passed through ..., which need an explicit conversion to a constant - ?: expression in which one arm ends up being a polynomial and the other remains a constant. In these cases it would be valid to convert the constant to a polynomial and the polynomial to a constant, so a cast is needed to break the ambiguity. The patch also adds a new target hook to return the estimated value of a polynomial for costing purposes. The patch also adds operator<< on wide_ints (it was already defined for offset_int and widest_int). I think this was originally excluded because >> is ambiguous for wide_int, but << is useful for converting bytes to bits, etc., so is worth defining on its own. The patch also adds operator% and operator/ for offset_int and widest_int, since those types are always signed. These changes allow the poly_int interface to be more predictable. I'd originally tried adding the tests as selftests, but that ended up bloating cc1 by at least a third. It also took a while to build them at -O2. The patch therefore uses plugin tests instead, where we can force the tests to be built at -O0. They still run in negligible time when built that way. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * poly-int.h: New file. * poly-int-types.h: Likewise. * coretypes.h: Include them. (POLY_INT_CONVERSION): Define. * target.def (estimated_poly_value): New hook. * doc/tm.texi.in (TARGET_ESTIMATED_POLY_VALUE): New hook. * doc/tm.texi: Regenerate. * doc/poly-int.texi: New file. * doc/gccint.texi: Include it. * doc/rtl.texi: Describe restrictions on subreg modes. * Makefile.in (TEXI_GCCINT_FILES): Add poly-int.texi. * genmodes.c (NUM_POLY_INT_COEFFS): Provide a default definition. (emit_insn_modes_h): Emit a definition of NUM_POLY_INT_COEFFS. * targhooks.h (default_estimated_poly_value): Declare. * targhooks.c (default_estimated_poly_value): New function. * target.h (estimated_poly_value): Likewise. * wide-int.h (WI_UNARY_RESULT): Use wi::binary_traits. (wi::unary_traits): Delete. (wi::binary_traits::signed_shift_result_type): Define for offset_int << HOST_WIDE_INT, etc. (generic_wide_int::operator <<=): Define for all types and use wi::lshift instead of <<. (wi::hwi_with_prec): Add a default constructor. (wi::ints_for): New class. (operator <<): Define for all wide-int types. (operator /): New function. (operator %): Likewise. * selftest.h (ASSERT_MUST_EQ, ASSERT_MUST_EQ_AT, ASSERT_MAY_NE) (ASSERT_MAY_NE_AT): New macros. gcc/testsuite/ * gcc.dg/plugin/poly-int-tests.h, gcc.dg/plugin/poly-int-test-1.c, gcc.dg/plugin/poly-int-01_plugin.c, gcc.dg/plugin/poly-int-02_plugin.c, gcc.dg/plugin/poly-int-03_plugin.c, gcc.dg/plugin/poly-int-04_plugin.c, gcc.dg/plugin/poly-int-05_plugin.c, gcc.dg/plugin/poly-int-06_plugin.c, gcc.dg/plugin/poly-int-07_plugin.c: New tests. * gcc.dg/plugin/plugin.exp: Run them. poly-int.diff.bz2 Description: BZip2 compressed data
Re: [PING] [C++ Patch] PR 82307
On 10/23/2017 09:15 AM, Paolo Carlini wrote: Hi, following up to a short off-line exchange with Nathan, I'm sending a reworked patch which - among other things - avoids regressing on the second testcase (cpp0x/enum36.C). Tested x86_64-linux. ok, thanks! nathan -- Nathan Sidwell
[002/nnn] poly_int: IN_TARGET_CODE
This patch makes each target-specific TU define an IN_TARGET_CODE macro, which is used to decide whether poly_int<1, C> should convert to C. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * genattrtab.c (write_header): Define IN_TARGET_CODE to 1 in the target C file. * genautomata.c (main): Likewise. * genconditions.c (write_header): Likewise. * genemit.c (main): Likewise. * genextract.c (print_header): Likewise. * genopinit.c (main): Likewise. * genoutput.c (output_prologue): Likewise. * genpeep.c (main): Likewise. * genpreds.c (write_insn_preds_c): Likewise. * genrecog.c (writer_header): Likewise. * config/aarch64/aarch64-builtins.c (IN_TARGET_CODE): Define. * config/aarch64/aarch64-c.c (IN_TARGET_CODE): Likewise. * config/aarch64/aarch64.c (IN_TARGET_CODE): Likewise. * config/aarch64/cortex-a57-fma-steering.c (IN_TARGET_CODE): Likewise. * config/aarch64/driver-aarch64.c (IN_TARGET_CODE): Likewise. * config/alpha/alpha.c (IN_TARGET_CODE): Likewise. * config/alpha/driver-alpha.c (IN_TARGET_CODE): Likewise. * config/arc/arc-c.c (IN_TARGET_CODE): Likewise. * config/arc/arc.c (IN_TARGET_CODE): Likewise. * config/arc/driver-arc.c (IN_TARGET_CODE): Likewise. * config/arm/aarch-common.c (IN_TARGET_CODE): Likewise. * config/arm/arm-builtins.c (IN_TARGET_CODE): Likewise. * config/arm/arm-c.c (IN_TARGET_CODE): Likewise. * config/arm/arm.c (IN_TARGET_CODE): Likewise. * config/arm/driver-arm.c (IN_TARGET_CODE): Likewise. * config/avr/avr-c.c (IN_TARGET_CODE): Likewise. * config/avr/avr-devices.c (IN_TARGET_CODE): Likewise. * config/avr/avr-log.c (IN_TARGET_CODE): Likewise. * config/avr/avr.c (IN_TARGET_CODE): Likewise. * config/avr/driver-avr.c (IN_TARGET_CODE): Likewise. * config/avr/gen-avr-mmcu-specs.c (IN_TARGET_CODE): Likewise. * config/bfin/bfin.c (IN_TARGET_CODE): Likewise. * config/c6x/c6x.c (IN_TARGET_CODE): Likewise. * config/cr16/cr16.c (IN_TARGET_CODE): Likewise. * config/cris/cris.c (IN_TARGET_CODE): Likewise. * config/darwin.c (IN_TARGET_CODE): Likewise. * config/epiphany/epiphany.c (IN_TARGET_CODE): Likewise. * config/epiphany/mode-switch-use.c (IN_TARGET_CODE): Likewise. * config/epiphany/resolve-sw-modes.c (IN_TARGET_CODE): Likewise. * config/fr30/fr30.c (IN_TARGET_CODE): Likewise. * config/frv/frv.c (IN_TARGET_CODE): Likewise. * config/ft32/ft32.c (IN_TARGET_CODE): Likewise. * config/h8300/h8300.c (IN_TARGET_CODE): Likewise. * config/i386/djgpp.c (IN_TARGET_CODE): Likewise. * config/i386/driver-i386.c (IN_TARGET_CODE): Likewise. * config/i386/driver-mingw32.c (IN_TARGET_CODE): Likewise. * config/i386/host-cygwin.c (IN_TARGET_CODE): Likewise. * config/i386/host-i386-darwin.c (IN_TARGET_CODE): Likewise. * config/i386/host-mingw32.c (IN_TARGET_CODE): Likewise. * config/i386/i386-c.c (IN_TARGET_CODE): Likewise. * config/i386/i386.c (IN_TARGET_CODE): Likewise. * config/i386/intelmic-mkoffload.c (IN_TARGET_CODE): Likewise. * config/i386/msformat-c.c (IN_TARGET_CODE): Likewise. * config/i386/winnt-cxx.c (IN_TARGET_CODE): Likewise. * config/i386/winnt-stubs.c (IN_TARGET_CODE): Likewise. * config/i386/winnt.c (IN_TARGET_CODE): Likewise. * config/i386/x86-tune-sched-atom.c (IN_TARGET_CODE): Likewise. * config/i386/x86-tune-sched-bd.c (IN_TARGET_CODE): Likewise. * config/i386/x86-tune-sched-core.c (IN_TARGET_CODE): Likewise. * config/i386/x86-tune-sched.c (IN_TARGET_CODE): Likewise. * config/ia64/ia64-c.c (IN_TARGET_CODE): Likewise. * config/ia64/ia64.c (IN_TARGET_CODE): Likewise. * config/iq2000/iq2000.c (IN_TARGET_CODE): Likewise. * config/lm32/lm32.c (IN_TARGET_CODE): Likewise. * config/m32c/m32c-pragma.c (IN_TARGET_CODE): Likewise. * config/m32c/m32c.c (IN_TARGET_CODE): Likewise. * config/m32r/m32r.c (IN_TARGET_CODE): Likewise. * config/m68k/m68k.c (IN_TARGET_CODE): Likewise. * config/mcore/mcore.c (IN_TARGET_CODE): Likewise. * config/microblaze/microblaze-c.c (IN_TARGET_CODE): Likewise. * config/microblaze/microblaze.c (IN_TARGET_CODE): Likewise. * config/mips/driver-native.c (IN_TARGET_CODE): Likewise. * config/mips/frame-header-opt.c (IN_TARGET_CODE): Likewise. * config/mips/mips.c (IN_TARGET_CODE): Likewise. * config/mmix/mmix.c (IN_TARGET_CODE): Likewise. * config/mn10300/mn10300.c (IN_TARGET_CODE): Likewise. * config/moxie/moxie.c (IN_TARGET_CODE): Likewise. * config/msp430/driver-msp430.c (IN_TARGET_CODE): Likewise. * conf
[003/nnn] poly_int: MACRO_MODE
This patch uses a MACRO_MODE wrapper for the target macro invocations in targhooks.c and address.h, so that macros for non-AArch64 targets can continue to treat modes as fixed-size. It didn't seem worth converting the address macros to hooks since (a) they're heavily used, (b) they should be probably be replaced with a different interface rather than converted to hooks as-is, and most importantly (c) addresses.h already localises the problem. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * machmode.h (MACRO_MODE): New macro. * addresses.h (base_reg_class, ok_for_base_p_1): Use it. * targhooks.c (default_libcall_value, default_secondary_reload) (default_memory_move_cost, default_register_move_cost) (default_class_max_nregs): Likewise. Index: gcc/machmode.h === --- gcc/machmode.h 2017-10-23 16:52:20.675923636 +0100 +++ gcc/machmode.h 2017-10-23 17:00:49.664349224 +0100 @@ -685,6 +685,17 @@ fixed_size_mode::includes_p (machine_mod return true; } +/* Wrapper for mode arguments to target macros, so that if a target + doesn't need polynomial-sized modes, its header file can continue + to treat everything as fixed_size_mode. This should go away once + macros are moved to target hooks. It shouldn't be used in other + contexts. */ +#if NUM_POLY_INT_COEFFS == 1 +#define MACRO_MODE(MODE) (as_a (MODE)) +#else +#define MACRO_MODE(MODE) (MODE) +#endif + extern opt_machine_mode mode_for_size (unsigned int, enum mode_class, int); /* Return the machine mode to use for a MODE_INT of SIZE bits, if one Index: gcc/addresses.h === --- gcc/addresses.h 2017-10-23 16:52:20.675923636 +0100 +++ gcc/addresses.h 2017-10-23 17:00:49.663350133 +0100 @@ -31,14 +31,15 @@ base_reg_class (machine_mode mode ATTRIB enum rtx_code index_code ATTRIBUTE_UNUSED) { #ifdef MODE_CODE_BASE_REG_CLASS - return MODE_CODE_BASE_REG_CLASS (mode, as, outer_code, index_code); + return MODE_CODE_BASE_REG_CLASS (MACRO_MODE (mode), as, outer_code, + index_code); #else #ifdef MODE_BASE_REG_REG_CLASS if (index_code == REG) -return MODE_BASE_REG_REG_CLASS (mode); +return MODE_BASE_REG_REG_CLASS (MACRO_MODE (mode)); #endif #ifdef MODE_BASE_REG_CLASS - return MODE_BASE_REG_CLASS (mode); + return MODE_BASE_REG_CLASS (MACRO_MODE (mode)); #else return BASE_REG_CLASS; #endif @@ -58,15 +59,15 @@ ok_for_base_p_1 (unsigned regno ATTRIBUT enum rtx_code index_code ATTRIBUTE_UNUSED) { #ifdef REGNO_MODE_CODE_OK_FOR_BASE_P - return REGNO_MODE_CODE_OK_FOR_BASE_P (regno, mode, as, + return REGNO_MODE_CODE_OK_FOR_BASE_P (regno, MACRO_MODE (mode), as, outer_code, index_code); #else #ifdef REGNO_MODE_OK_FOR_REG_BASE_P if (index_code == REG) -return REGNO_MODE_OK_FOR_REG_BASE_P (regno, mode); +return REGNO_MODE_OK_FOR_REG_BASE_P (regno, MACRO_MODE (mode)); #endif #ifdef REGNO_MODE_OK_FOR_BASE_P - return REGNO_MODE_OK_FOR_BASE_P (regno, mode); + return REGNO_MODE_OK_FOR_BASE_P (regno, MACRO_MODE (mode)); #else return REGNO_OK_FOR_BASE_P (regno); #endif Index: gcc/targhooks.c === --- gcc/targhooks.c 2017-10-23 17:00:20.920834919 +0100 +++ gcc/targhooks.c 2017-10-23 17:00:49.664349224 +0100 @@ -941,7 +941,7 @@ default_libcall_value (machine_mode mode const_rtx fun ATTRIBUTE_UNUSED) { #ifdef LIBCALL_VALUE - return LIBCALL_VALUE (mode); + return LIBCALL_VALUE (MACRO_MODE (mode)); #else gcc_unreachable (); #endif @@ -1071,11 +1071,13 @@ default_secondary_reload (bool in_p ATTR } #ifdef SECONDARY_INPUT_RELOAD_CLASS if (in_p) -rclass = SECONDARY_INPUT_RELOAD_CLASS (reload_class, reload_mode, x); +rclass = SECONDARY_INPUT_RELOAD_CLASS (reload_class, + MACRO_MODE (reload_mode), x); #endif #ifdef SECONDARY_OUTPUT_RELOAD_CLASS if (! in_p) -rclass = SECONDARY_OUTPUT_RELOAD_CLASS (reload_class, reload_mode, x); +rclass = SECONDARY_OUTPUT_RELOAD_CLASS (reload_class, + MACRO_MODE (reload_mode), x); #endif if (rclass != NO_REGS) { @@ -1603,7 +1605,7 @@ default_memory_move_cost (machine_mode m #ifndef MEMORY_MOVE_COST return (4 + memory_move_secondary_cost (mode, (enum reg_class) rclass, in)); #else -return MEMORY_MOVE_COST (mode, (enum reg_class) rclass, in); +return MEMORY_MOVE_COST (MACRO_MODE (mode), (enum reg_class) rclass, in); #endif } @@ -1618,7 +1620,8 @@ default_register_move_cost (machine_mode #ifndef REGISTER_MOVE_COST return 2; #else - return REGISTER_MOVE_COST (mode, (enum reg_class) from, (enum reg_class) to); + return REGISTER_M
[004/nnn] poly_int: mode query functions
This patch changes the bit size and vector count arguments to the machmode.h functions from unsigned int to poly_uint64. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * machmode.h (mode_for_size, int_mode_for_size, float_mode_for_size) (smallest_mode_for_size, smallest_int_mode_for_size): Take the mode size as a poly_uint64. (mode_for_vector, mode_for_int_vector): Take the number of vector elements as a poly_uint64. * stor-layout.c (mode_for_size, smallest_mode_for_size): Take the mode size as a poly_uint64. (mode_for_vector, mode_for_int_vector): Take the number of vector elements as a poly_uint64. Index: gcc/machmode.h === --- gcc/machmode.h 2017-10-23 17:00:49.664349224 +0100 +++ gcc/machmode.h 2017-10-23 17:00:52.669615373 +0100 @@ -696,14 +696,14 @@ #define MACRO_MODE(MODE) (as_a (mode_for_size (size, MODE_INT, limit)); } @@ -712,7 +712,7 @@ int_mode_for_size (unsigned int size, in exists. */ inline opt_scalar_float_mode -float_mode_for_size (unsigned int size) +float_mode_for_size (poly_uint64 size) { return dyn_cast (mode_for_size (size, MODE_FLOAT, 0)); } @@ -726,21 +726,21 @@ decimal_float_mode_for_size (unsigned in (mode_for_size (size, MODE_DECIMAL_FLOAT, 0)); } -extern machine_mode smallest_mode_for_size (unsigned int, enum mode_class); +extern machine_mode smallest_mode_for_size (poly_uint64, enum mode_class); /* Find the narrowest integer mode that contains at least SIZE bits. Such a mode must exist. */ inline scalar_int_mode -smallest_int_mode_for_size (unsigned int size) +smallest_int_mode_for_size (poly_uint64 size) { return as_a (smallest_mode_for_size (size, MODE_INT)); } extern opt_scalar_int_mode int_mode_for_mode (machine_mode); extern opt_machine_mode bitwise_mode_for_mode (machine_mode); -extern opt_machine_mode mode_for_vector (scalar_mode, unsigned); -extern opt_machine_mode mode_for_int_vector (unsigned int, unsigned int); +extern opt_machine_mode mode_for_vector (scalar_mode, poly_uint64); +extern opt_machine_mode mode_for_int_vector (unsigned int, poly_uint64); /* Return the integer vector equivalent of MODE, if one exists. In other words, return the mode for an integer vector that has the same number Index: gcc/stor-layout.c === --- gcc/stor-layout.c 2017-10-23 16:52:20.627879504 +0100 +++ gcc/stor-layout.c 2017-10-23 17:00:52.669615373 +0100 @@ -297,22 +297,22 @@ finalize_size_functions (void) MAX_FIXED_MODE_SIZE. */ opt_machine_mode -mode_for_size (unsigned int size, enum mode_class mclass, int limit) +mode_for_size (poly_uint64 size, enum mode_class mclass, int limit) { machine_mode mode; int i; - if (limit && size > MAX_FIXED_MODE_SIZE) + if (limit && may_gt (size, (unsigned int) MAX_FIXED_MODE_SIZE)) return opt_machine_mode (); /* Get the first mode which has this size, in the specified class. */ FOR_EACH_MODE_IN_CLASS (mode, mclass) -if (GET_MODE_PRECISION (mode) == size) +if (must_eq (GET_MODE_PRECISION (mode), size)) return mode; if (mclass == MODE_INT || mclass == MODE_PARTIAL_INT) for (i = 0; i < NUM_INT_N_ENTS; i ++) - if (int_n_data[i].bitsize == size + if (must_eq (int_n_data[i].bitsize, size) && int_n_enabled_p[i]) return int_n_data[i].m; @@ -340,7 +340,7 @@ mode_for_size_tree (const_tree size, enu SIZE bits. Abort if no such mode exists. */ machine_mode -smallest_mode_for_size (unsigned int size, enum mode_class mclass) +smallest_mode_for_size (poly_uint64 size, enum mode_class mclass) { machine_mode mode = VOIDmode; int i; @@ -348,19 +348,18 @@ smallest_mode_for_size (unsigned int siz /* Get the first mode which has at least this size, in the specified class. */ FOR_EACH_MODE_IN_CLASS (mode, mclass) -if (GET_MODE_PRECISION (mode) >= size) +if (must_ge (GET_MODE_PRECISION (mode), size)) break; + gcc_assert (mode != VOIDmode); + if (mclass == MODE_INT || mclass == MODE_PARTIAL_INT) for (i = 0; i < NUM_INT_N_ENTS; i ++) - if (int_n_data[i].bitsize >= size - && int_n_data[i].bitsize < GET_MODE_PRECISION (mode) + if (must_ge (int_n_data[i].bitsize, size) + && must_lt (int_n_data[i].bitsize, GET_MODE_PRECISION (mode)) && int_n_enabled_p[i]) mode = int_n_data[i].m; - if (mode == VOIDmode) -gcc_unreachable (); - return mode; } @@ -475,7 +474,7 @@ bitwise_type_for_mode (machine_mode mode either an integer mode or a vector mode. */ opt_machine_mode -mode_for_vector (scalar_mode innermode, unsigned nunits) +mode_for_vector (scalar_mode innermode, poly_uint64 nunits) { machine_mode mode; @@ -496,14 +495,14 @@ mode_for_vector (scalar_mode innermode,
[005/nnn] poly_int: rtx constants
This patch adds an rtl representation of poly_int values. There were three possible ways of doing this: (1) Add a new rtl code for the poly_ints themselves and store the coefficients as trailing wide_ints. This would give constants like: (const_poly_int [c0 c1 ... cn]) The runtime value would be: c0 + c1 * x1 + ... + cn * xn (2) Like (1), but use rtxes for the coefficients. This would give constants like: (const_poly_int [(const_int c0) (const_int c1) ... (const_int cn)]) although the coefficients could be const_wide_ints instead of const_ints where appropriate. (3) Add a new rtl code for the polynomial indeterminates, then use them in const wrappers. A constant like c0 + c1 * x1 would then look like: (const:M (plus:M (mult:M (const_param:M x1) (const_int c1)) (const_int c0))) There didn't seem to be that much to choose between them. The main advantage of (1) is that it's a more efficient representation and that we can refer to the cofficients directly as wide_int_storage. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * doc/rtl.texi (const_poly_int): Document. * gengenrtl.c (excluded_rtx): Return true for CONST_POLY_INT. * rtl.h (const_poly_int_def): New struct. (rtx_def::u): Add a cpi field. (CASE_CONST_UNIQUE, CASE_CONST_ANY): Add CONST_POLY_INT. (CONST_POLY_INT_P, CONST_POLY_INT_COEFFS): New macros. (wi::rtx_to_poly_wide_ref): New typedef (const_poly_int_value, wi::to_poly_wide, rtx_to_poly_int64) (poly_int_rtx_p): New functions. (trunc_int_for_mode): Declare a poly_int64 version. (plus_constant): Take a poly_int64 instead of a HOST_WIDE_INT. (immed_wide_int_const): Take a poly_wide_int_ref rather than a wide_int_ref. (strip_offset): Declare. (strip_offset_and_add): New function. * rtl.def (CONST_POLY_INT): New rtx code. * rtl.c (rtx_size): Handle CONST_POLY_INT. (shared_const_p): Use poly_int_rtx_p. * emit-rtl.h (gen_int_mode): Take a poly_int64 instead of a HOST_WIDE_INT. (gen_int_shift_amount): Likewise. * emit-rtl.c (const_poly_int_hasher): New class. (const_poly_int_htab): New variable. (init_emit_once): Initialize it when NUM_POLY_INT_COEFFS > 1. (const_poly_int_hasher::hash): New function. (const_poly_int_hasher::equal): Likewise. (gen_int_mode): Take a poly_int64 instead of a HOST_WIDE_INT. (immed_wide_int_const): Rename to... (immed_wide_int_const_1): ...this and make static. (immed_wide_int_const): New function, taking a poly_wide_int_ref instead of a wide_int_ref. (gen_int_shift_amount): Take a poly_int64 instead of a HOST_WIDE_INT. (gen_lowpart_common): Handle CONST_POLY_INT. * cse.c (hash_rtx_cb, equiv_constant): Likewise. * cselib.c (cselib_hash_rtx): Likewise. * dwarf2out.c (const_ok_for_output_1): Likewise. * expr.c (convert_modes): Likewise. * print-rtl.c (rtx_writer::print_rtx, print_value): Likewise. * rtlhash.c (add_rtx): Likewise. * explow.c (trunc_int_for_mode): Add a poly_int64 version. (plus_constant): Take a poly_int64 instead of a HOST_WIDE_INT. Handle existing CONST_POLY_INT rtxes. * expmed.h (expand_shift): Take a poly_int64 instead of a HOST_WIDE_INT. * expmed.c (expand_shift): Likewise. * rtlanal.c (strip_offset): New function. (commutative_operand_precedence): Give CONST_POLY_INT the same precedence as CONST_DOUBLE and put CONST_WIDE_INT between that and CONST_INT. * rtl-tests.c (const_poly_int_tests): New struct. (rtl_tests_c_tests): Use it. * simplify-rtx.c (simplify_const_unary_operation): Handle CONST_POLY_INT. (simplify_const_binary_operation): Likewise. (simplify_binary_operation_1): Fold additions of symbolic constants and CONST_POLY_INTs. (simplify_subreg): Handle extensions and truncations of CONST_POLY_INTs. (simplify_const_poly_int_tests): New struct. (simplify_rtx_c_tests): Use it. * wide-int.h (storage_ref): Add default constructor. (wide_int_ref_storage): Likewise. (trailing_wide_ints): Use GTY((user)). (trailing_wide_ints::operator[]): Add a const version. (trailing_wide_ints::get_precision): New function. (trailing_wide_ints::extra_size): Likewise. Index: gcc/doc/rtl.texi === --- gcc/doc/rtl.texi2017-10-23 17:00:20.916834036 +0100 +++ gcc/doc/rtl.texi2017-10-23 17:00:54.437007600 +0100 @@ -1621,6 +1621,15 @@ is accessed with the macr
[006/nnn] poly_int: tree constants
This patch adds a tree representation for poly_ints. Unlike the rtx version, the coefficients are INTEGER_CSTs rather than plain integers, so that we can easily access them as poly_widest_ints and poly_offset_ints. The patch also adjusts some places that previously relied on "constant" meaning "INTEGER_CST". It also makes sure that the TYPE_SIZE agrees with the TYPE_SIZE_UNIT for vector booleans, given the existing: /* Several boolean vector elements may fit in a single unit. */ if (VECTOR_BOOLEAN_TYPE_P (type) && type->type_common.mode != BLKmode) TYPE_SIZE_UNIT (type) = size_int (GET_MODE_SIZE (type->type_common.mode)); else TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR, TYPE_SIZE_UNIT (innertype), size_int (nunits)); 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * doc/generic.texi (POLY_INT_CST): Document. * tree.def (POLY_INT_CST): New tree code. * treestruct.def (TS_POLY_INT_CST): New tree layout. * tree-core.h (tree_poly_int_cst): New struct. (tree_node): Add a poly_int_cst field. * tree.h (POLY_INT_CST_P, POLY_INT_CST_COEFF): New macros. (wide_int_to_tree, force_fit_type): Take a poly_wide_int_ref instead of a wide_int_ref. (build_int_cst, build_int_cst_type): Take a poly_int64 instead of a HOST_WIDE_INT. (build_int_cstu, build_array_type_nelts): Take a poly_uint64 instead of an unsigned HOST_WIDE_INT. (build_poly_int_cst, tree_fits_poly_int64_p, tree_fits_poly_uint64_p) (ptrdiff_tree_p): Declare. (tree_to_poly_int64, tree_to_poly_uint64): Likewise. Provide extern inline implementations if the target doesn't use POLY_INT_CST. (poly_int_tree_p): New function. (wi::unextended_tree): New class. (wi::int_traits ): New override. (wi::extended_tree): Add a default constructor. (wi::extended_tree::get_tree): New function. (wi::widest_extended_tree, wi::offset_extended_tree): New typedefs. (wi::tree_to_widest_ref, wi::tree_to_offset_ref): Use them. (wi::tree_to_poly_widest_ref, wi::tree_to_poly_offset_ref) (wi::tree_to_poly_wide_ref): New typedefs. (wi::ints_for): Provide overloads for extended_tree and unextended_tree. (poly_int_cst_value, wi::to_poly_widest, wi::to_poly_offset) (wi::to_wide): New functions. (wi::fits_to_boolean_p, wi::fits_to_tree_p): Handle poly_ints. * tree.c (poly_int_cst_hasher): New struct. (poly_int_cst_hash_table): New variable. (tree_node_structure_for_code, tree_code_size, simple_cst_equal) (valid_constant_size_p, add_expr, drop_tree_overflow): Handle POLY_INT_CST. (initialize_tree_contains_struct): Handle TS_POLY_INT_CST. (init_ttree): Initialize poly_int_cst_hash_table. (build_int_cst, build_int_cst_type, build_invariant_address): Take a poly_int64 instead of a HOST_WIDE_INT. (build_int_cstu, build_array_type_nelts): Take a poly_uint64 instead of an unsigned HOST_WIDE_INT. (wide_int_to_tree): Rename to... (wide_int_to_tree_1): ...this. (build_new_poly_int_cst, build_poly_int_cst): New functions. (force_fit_type): Take a poly_wide_int_ref instead of a wide_int_ref. (wide_int_to_tree): New function that takes a poly_wide_int_ref. (ptrdiff_tree_p, tree_to_poly_int64, tree_to_poly_uint64) (tree_fits_poly_int64_p, tree_fits_poly_uint64_p): New functions. * lto-streamer-out.c (DFS::DFS_write_tree_body, hash_tree): Handle TS_POLY_INT_CST. * tree-streamer-in.c (lto_input_ts_poly_tree_pointers): Likewise. (streamer_read_tree_body): Likewise. * tree-streamer-out.c (write_ts_poly_tree_pointers): Likewise. (streamer_write_tree_body): Likewise. * tree-streamer.c (streamer_check_handled_ts_structures): Likewise. * asan.c (asan_protect_global): Require the size to be an INTEGER_CST. * cfgexpand.c (expand_debug_expr): Handle POLY_INT_CST. * expr.c (const_vector_element, expand_expr_real_1): Likewise. * gimple-expr.h (is_gimple_constant): Likewise. * gimplify.c (maybe_with_size_expr): Likewise. * print-tree.c (print_node): Likewise. * tree-data-ref.c (data_ref_compare_tree): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. * tree-ssa-address.c (addr_for_mem_ref): Likewise. * tree-vect-data-refs.c (dr_group_sort_cmp): Likewise. * tree-vrp.c (compare_values_warnv): Likewise. * tree-ssa-loop-ivopts.c (determine_base_object, constant_multiple_of) (get_loop_invariant_expr, add_candidate_1, get_computation_aff_1) (forc
[007/nnn] poly_int: dump routines
Add poly_int routines for the dumpfile.h and pretty-print.h frameworks. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * dumpfile.h (dump_dec): Declare. * dumpfile.c (dump_dec): New function. * pretty-print.h (pp_wide_integer): Turn into a function and declare a poly_int version. * pretty-print.c (pp_wide_integer): New function for poly_ints. Index: gcc/dumpfile.h === --- gcc/dumpfile.h 2017-10-23 16:52:20.417686430 +0100 +++ gcc/dumpfile.h 2017-10-23 17:01:00.431554440 +0100 @@ -174,6 +174,9 @@ extern void dump_gimple_stmt (dump_flags extern void print_combine_total_stats (void); extern bool enable_rtl_dump_file (void); +template +void dump_dec (int, const poly_int &); + /* In tree-dump.c */ extern void dump_node (const_tree, dump_flags_t, FILE *); Index: gcc/dumpfile.c === --- gcc/dumpfile.c 2017-10-23 16:52:20.417686430 +0100 +++ gcc/dumpfile.c 2017-10-23 17:01:00.431554440 +0100 @@ -473,6 +473,27 @@ dump_printf_loc (dump_flags_t dump_kind, } } +/* Output VALUE in decimal to appropriate dump streams. */ + +template +void +dump_dec (int dump_kind, const poly_int &value) +{ + STATIC_ASSERT (poly_coeff_traits::signedness >= 0); + signop sgn = poly_coeff_traits::signedness ? SIGNED : UNSIGNED; + if (dump_file && (dump_kind & pflags)) +print_dec (value, dump_file, sgn); + + if (alt_dump_file && (dump_kind & alt_flags)) +print_dec (value, alt_dump_file, sgn); +} + +template void dump_dec (int, const poly_uint16 &); +template void dump_dec (int, const poly_int64 &); +template void dump_dec (int, const poly_uint64 &); +template void dump_dec (int, const poly_offset_int &); +template void dump_dec (int, const poly_widest_int &); + /* Start a dump for PHASE. Store user-supplied dump flags in *FLAG_PTR. Return the number of streams opened. Set globals DUMP_FILE, and ALT_DUMP_FILE to point to the opened streams, and Index: gcc/pretty-print.h === --- gcc/pretty-print.h 2017-10-23 16:52:20.417686430 +0100 +++ gcc/pretty-print.h 2017-10-23 17:01:00.431554440 +0100 @@ -328,8 +328,6 @@ #define pp_wide_int(PP, W, SGN) \ pp_string (PP, pp_buffer (PP)->digit_buffer);\ } \ while (0) -#define pp_wide_integer(PP, I) \ - pp_scalar (PP, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) I) #define pp_pointer(PP, P) pp_scalar (PP, "%p", P) #define pp_identifier(PP, ID) pp_string (PP, (pp_translate_identifiers (PP) \ @@ -401,4 +399,15 @@ extern const char *identifier_to_locale extern void *(*identifier_to_locale_alloc) (size_t); extern void (*identifier_to_locale_free) (void *); +/* Print I to PP in decimal. */ + +inline void +pp_wide_integer (pretty_printer *pp, HOST_WIDE_INT i) +{ + pp_scalar (pp, HOST_WIDE_INT_PRINT_DEC, i); +} + +template +void pp_wide_integer (pretty_printer *pp, const poly_int_pod &); + #endif /* GCC_PRETTY_PRINT_H */ Index: gcc/pretty-print.c === --- gcc/pretty-print.c 2017-10-23 16:52:20.417686430 +0100 +++ gcc/pretty-print.c 2017-10-23 17:01:00.431554440 +0100 @@ -795,6 +795,30 @@ pp_clear_state (pretty_printer *pp) pp_indentation (pp) = 0; } +/* Print X to PP in decimal. */ +template +void +pp_wide_integer (pretty_printer *pp, const poly_int_pod &x) +{ + if (x.is_constant ()) +pp_wide_integer (pp, x.coeffs[0]); + else +{ + pp_left_bracket (pp); + for (unsigned int i = 0; i < N; ++i) + { + if (i != 0) + pp_comma (pp); + pp_wide_integer (pp, x.coeffs[i]); + } + pp_right_bracket (pp); +} +} + +template void pp_wide_integer (pretty_printer *, const poly_uint16_pod &); +template void pp_wide_integer (pretty_printer *, const poly_int64_pod &); +template void pp_wide_integer (pretty_printer *, const poly_uint64_pod &); + /* Flush the formatted text of PRETTY-PRINTER onto the attached stream. */ void pp_write_text_to_stream (pretty_printer *pp)
[008/nnn] poly_int: create_integer_operand
This patch generalises create_integer_operand so that it accepts poly_int64s rather than HOST_WIDE_INTs. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * optabs.h (expand_operand): Add an int_value field. (create_expand_operand): Add an int_value parameter and use it to initialize the new expand_operand field. (create_integer_operand): Replace with a declaration of a function that accepts poly_int64s. Move the implementation to... * optabs.c (create_integer_operand): ...here. (maybe_legitimize_operand): For EXPAND_INTEGER, check whether the mode preserves the value of int_value, instead of calling const_int_operand on the rtx. Index: gcc/optabs.h === --- gcc/optabs.h2017-10-23 16:52:20.393664364 +0100 +++ gcc/optabs.h2017-10-23 17:01:02.532643107 +0100 @@ -60,6 +60,9 @@ struct expand_operand { /* The value of the operand. */ rtx value; + + /* The value of an EXPAND_INTEGER operand. */ + poly_int64 int_value; }; /* Initialize OP with the given fields. Initialise the other fields @@ -69,13 +72,14 @@ struct expand_operand { create_expand_operand (struct expand_operand *op, enum expand_operand_type type, rtx value, machine_mode mode, - bool unsigned_p) + bool unsigned_p, poly_int64 int_value = 0) { op->type = type; op->unsigned_p = unsigned_p; op->unused = 0; op->mode = mode; op->value = value; + op->int_value = int_value; } /* Make OP describe an operand that must use rtx X, even if X is volatile. */ @@ -142,18 +146,7 @@ create_address_operand (struct expand_op create_expand_operand (op, EXPAND_ADDRESS, value, Pmode, false); } -/* Make OP describe an input operand that has value INTVAL and that has - no inherent mode. This function should only be used for operands that - are always expand-time constants. The backend may request that INTVAL - be copied into a different kind of rtx, but it must specify the mode - of that rtx if so. */ - -static inline void -create_integer_operand (struct expand_operand *op, HOST_WIDE_INT intval) -{ - create_expand_operand (op, EXPAND_INTEGER, GEN_INT (intval), VOIDmode, false); -} - +extern void create_integer_operand (struct expand_operand *, poly_int64); /* Passed to expand_simple_binop and expand_binop to say which options to try to use if the requested operation can't be open-coded on the Index: gcc/optabs.c === --- gcc/optabs.c2017-10-23 16:52:20.393664364 +0100 +++ gcc/optabs.c2017-10-23 17:01:02.531644016 +0100 @@ -6959,6 +6959,20 @@ valid_multiword_target_p (rtx target) return true; } +/* Make OP describe an input operand that has value INTVAL and that has + no inherent mode. This function should only be used for operands that + are always expand-time constants. The backend may request that INTVAL + be copied into a different kind of rtx, but it must specify the mode + of that rtx if so. */ + +void +create_integer_operand (struct expand_operand *op, poly_int64 intval) +{ + create_expand_operand (op, EXPAND_INTEGER, +gen_int_mode (intval, MAX_MODE_INT), +VOIDmode, false, intval); +} + /* Like maybe_legitimize_operand, but do not change the code of the current rtx value. */ @@ -7071,7 +7085,9 @@ maybe_legitimize_operand (enum insn_code case EXPAND_INTEGER: mode = insn_data[(int) icode].operand[opno].mode; - if (mode != VOIDmode && const_int_operand (op->value, mode)) + if (mode != VOIDmode + && must_eq (trunc_int_for_mode (op->int_value, mode), + op->int_value)) goto input; break; }
[009/nnn] poly_int: TRULY_NOOP_TRUNCATION
This patch makes TRULY_NOOP_TRUNCATION take the mode sizes as poly_uint64s instead of unsigned ints. The function bodies don't need to change. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * target.def (truly_noop_truncation): Take poly_uint64s instead of unsigned ints. Change default to hook_bool_puint64_puint64_true. * doc/tm.texi: Regenerate. * hooks.h (hook_bool_uint_uint_true): Delete. (hook_bool_puint64_puint64_true): Declare. * hooks.c (hook_bool_uint_uint_true): Delete. (hook_bool_puint64_puint64_true): New function. * config/mips/mips.c (mips_truly_noop_truncation): Take poly_uint64s instead of unsigned ints. * config/spu/spu.c (spu_truly_noop_truncation): Likewise. * config/tilegx/tilegx.c (tilegx_truly_noop_truncation): Likewise. Index: gcc/target.def === --- gcc/target.def 2017-10-23 17:00:20.920834919 +0100 +++ gcc/target.def 2017-10-23 17:01:04.215112587 +0100 @@ -3155,8 +3155,8 @@ is correct for most machines.\n\ If @code{TARGET_MODES_TIEABLE_P} returns false for a pair of modes,\n\ suboptimal code can result if this hook returns true for the corresponding\n\ mode sizes. Making this hook return false in such cases may improve things.", - bool, (unsigned int outprec, unsigned int inprec), - hook_bool_uint_uint_true) + bool, (poly_uint64 outprec, poly_uint64 inprec), + hook_bool_puint64_puint64_true) /* If the representation of integral MODE is such that values are always sign-extended to a wider mode MODE_REP then return Index: gcc/doc/tm.texi === --- gcc/doc/tm.texi 2017-10-23 17:00:20.917834257 +0100 +++ gcc/doc/tm.texi 2017-10-23 17:01:04.214113496 +0100 @@ -10823,7 +10823,7 @@ nevertheless truncate the shift count, y by overriding it. @end deftypefn -@deftypefn {Target Hook} bool TARGET_TRULY_NOOP_TRUNCATION (unsigned int @var{outprec}, unsigned int @var{inprec}) +@deftypefn {Target Hook} bool TARGET_TRULY_NOOP_TRUNCATION (poly_uint64 @var{outprec}, poly_uint64 @var{inprec}) This hook returns true if it is safe to ``convert'' a value of @var{inprec} bits to one of @var{outprec} bits (where @var{outprec} is smaller than @var{inprec}) by merely operating on it as if it had only Index: gcc/hooks.h === --- gcc/hooks.h 2017-10-23 16:52:20.369642299 +0100 +++ gcc/hooks.h 2017-10-23 17:01:04.214113496 +0100 @@ -39,7 +39,7 @@ extern bool hook_bool_const_rtx_insn_con const rtx_insn *); extern bool hook_bool_mode_uhwi_false (machine_mode, unsigned HOST_WIDE_INT); -extern bool hook_bool_uint_uint_true (unsigned int, unsigned int); +extern bool hook_bool_puint64_puint64_true (poly_uint64, poly_uint64); extern bool hook_bool_uint_mode_false (unsigned int, machine_mode); extern bool hook_bool_uint_mode_true (unsigned int, machine_mode); extern bool hook_bool_tree_false (tree); Index: gcc/hooks.c === --- gcc/hooks.c 2017-10-23 16:52:20.369642299 +0100 +++ gcc/hooks.c 2017-10-23 17:01:04.214113496 +0100 @@ -133,9 +133,9 @@ hook_bool_mode_uhwi_false (machine_mode, return false; } -/* Generic hook that takes (unsigned int, unsigned int) and returns true. */ +/* Generic hook that takes (poly_uint64, poly_uint64) and returns true. */ bool -hook_bool_uint_uint_true (unsigned int, unsigned int) +hook_bool_puint64_puint64_true (poly_uint64, poly_uint64) { return true; } Index: gcc/config/mips/mips.c === --- gcc/config/mips/mips.c 2017-10-23 17:00:43.528930533 +0100 +++ gcc/config/mips/mips.c 2017-10-23 17:01:04.26223 +0100 @@ -22322,7 +22322,7 @@ mips_promote_function_mode (const_tree t /* Implement TARGET_TRULY_NOOP_TRUNCATION. */ static bool -mips_truly_noop_truncation (unsigned int outprec, unsigned int inprec) +mips_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) { return !TARGET_64BIT || inprec <= 32 || outprec > 32; } Index: gcc/config/spu/spu.c === --- gcc/config/spu/spu.c2017-10-23 17:00:43.548912356 +0100 +++ gcc/config/spu/spu.c2017-10-23 17:01:04.212115314 +0100 @@ -7182,7 +7182,7 @@ spu_can_change_mode_class (machine_mode /* Implement TARGET_TRULY_NOOP_TRUNCATION. */ static bool -spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec) +spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) { return inprec <= 32 && outprec <= inprec; } Index: gcc/config/tilegx/tilegx.c === --- gcc/
[010/nnn] poly_int: REG_OFFSET
This patch changes the type of the reg_attrs offset field from HOST_WIDE_INT to poly_int64 and updates uses accordingly. This includes changing reg_attr_hasher::hash to use inchash. (Doing this has no effect on code generation since the only use of the hasher is to avoid creating duplicate objects.) 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * rtl.h (reg_attrs::offset): Change from HOST_WIDE_INT to poly_int64. (gen_rtx_REG_offset): Take the offset as a poly_int64. * inchash.h (inchash::hash::add_poly_hwi): New function. * gengtype.c (main): Register poly_int64. * emit-rtl.c (reg_attr_hasher::hash): Use inchash. Treat the offset as a poly_int. (reg_attr_hasher::equal): Use must_eq to compare offsets. (get_reg_attrs, update_reg_offset, gen_rtx_REG_offset): Take the offset as a poly_int64. (set_reg_attrs_from_value): Treat the offset as a poly_int64. * print-rtl.c (print_poly_int): New function. (rtx_writer::print_rtx_operand_code_r): Treat REG_OFFSET as a poly_int. * var-tracking.c (track_offset_p, get_tracked_reg_offset): New functions. (var_reg_set, var_reg_delete_and_set, var_reg_delete): Use them. (same_variable_part_p, track_loc_p): Take the offset as a poly_int64. (vt_get_decl_and_offset): Return the offset as a poly_int64. Enforce track_offset_p for parts of a PARALLEL. (vt_add_function_parameter): Use const_offset for the final offset to track. Use get_tracked_reg_offset for the parts of a PARALLEL. Index: gcc/rtl.h === --- gcc/rtl.h 2017-10-23 17:01:15.119130016 +0100 +++ gcc/rtl.h 2017-10-23 17:01:43.314993320 +0100 @@ -187,7 +187,7 @@ struct GTY(()) mem_attrs struct GTY((for_user)) reg_attrs { tree decl; /* decl corresponding to REG. */ - HOST_WIDE_INT offset;/* Offset from start of DECL. */ + poly_int64 offset; /* Offset from start of DECL. */ }; /* Common union for an element of an rtx. */ @@ -2997,7 +2997,7 @@ subreg_promoted_mode (rtx x) extern rtvec gen_rtvec_v (int, rtx *); extern rtvec gen_rtvec_v (int, rtx_insn **); extern rtx gen_reg_rtx (machine_mode); -extern rtx gen_rtx_REG_offset (rtx, machine_mode, unsigned int, int); +extern rtx gen_rtx_REG_offset (rtx, machine_mode, unsigned int, poly_int64); extern rtx gen_reg_rtx_offset (rtx, machine_mode, int); extern rtx gen_reg_rtx_and_attrs (rtx); extern rtx_code_label *gen_label_rtx (void); Index: gcc/inchash.h === --- gcc/inchash.h 2017-10-23 17:01:29.530765486 +0100 +++ gcc/inchash.h 2017-10-23 17:01:43.314993320 +0100 @@ -63,6 +63,14 @@ hashval_t iterative_hash_hashval_t (hash val = iterative_hash_host_wide_int (v, val); } + /* Add polynomial value V, treating each element as a HOST_WIDE_INT. */ + template + void add_poly_hwi (const poly_int_pod &v) + { +for (unsigned int i = 0; i < N; ++i) + add_hwi (v.coeffs[i]); + } + /* Add wide_int-based value V. */ template void add_wide_int (const generic_wide_int &x) Index: gcc/gengtype.c === --- gcc/gengtype.c 2017-10-23 17:01:15.119130016 +0100 +++ gcc/gengtype.c 2017-10-23 17:01:43.313994743 +0100 @@ -5190,6 +5190,7 @@ #define POS_HERE(Call) do { pos.file = t POS_HERE (do_scalar_typedef ("offset_int", &pos)); POS_HERE (do_scalar_typedef ("widest_int", &pos)); POS_HERE (do_scalar_typedef ("int64_t", &pos)); + POS_HERE (do_scalar_typedef ("poly_int64", &pos)); POS_HERE (do_scalar_typedef ("uint64_t", &pos)); POS_HERE (do_scalar_typedef ("uint8", &pos)); POS_HERE (do_scalar_typedef ("uintptr_t", &pos)); Index: gcc/emit-rtl.c === --- gcc/emit-rtl.c 2017-10-23 17:01:15.119130016 +0100 +++ gcc/emit-rtl.c 2017-10-23 17:01:43.313994743 +0100 @@ -205,7 +205,6 @@ static rtx lookup_const_wide_int (rtx); #endif static rtx lookup_const_double (rtx); static rtx lookup_const_fixed (rtx); -static reg_attrs *get_reg_attrs (tree, int); static rtx gen_const_vector (machine_mode, int); static void copy_rtx_if_shared_1 (rtx *orig); @@ -424,7 +423,10 @@ reg_attr_hasher::hash (reg_attrs *x) { const reg_attrs *const p = x; - return ((p->offset * 1000) ^ (intptr_t) p->decl); + inchash::hash h; + h.add_ptr (p->decl); + h.add_poly_hwi (p->offset); + return h.end (); } /* Returns nonzero if the value represented by X is the same as that given by @@ -436,19 +438,19 @@ reg_attr_hasher::equal (reg_attrs *x, re const reg_attrs *const p = x; const reg_attrs *const q = y; - return (p->decl == q->decl && p->offset == q->offset); + return
C PATCH to fix a couple of typos (PR c/82681)
As noted in this PR, chages -> changes. Bootstrapped/regtested on x86_64-linux, applying to trunk. 2017-10-23 Marek Polacek PR c/82681 * c-warn.c (warnings_for_convert_and_check): Fix typos. * gcc.dg/c90-const-expr-11.c: Fix typos in dg-warning. * gcc.dg/overflow-warn-5.c: Likewise. * gcc.dg/overflow-warn-8.c: Likewise. diff --git gcc/c-family/c-warn.c gcc/c-family/c-warn.c index cb1db0327c3..78f6ba83d98 100644 --- gcc/c-family/c-warn.c +++ gcc/c-family/c-warn.c @@ -1215,12 +1215,12 @@ warnings_for_convert_and_check (location_t loc, tree type, tree expr, if (cst) warning_at (loc, OPT_Woverflow, "overflow in conversion from %qT to %qT " - "chages value from %qE to %qE", + "changes value from %qE to %qE", exprtype, type, expr, result); else warning_at (loc, OPT_Woverflow, "overflow in conversion from %qT to %qT " - "chages the value of %qE", + "changes the value of %qE", exprtype, type, expr); } else diff --git gcc/testsuite/gcc.dg/c90-const-expr-11.c gcc/testsuite/gcc.dg/c90-const-expr-11.c index e4f2aff7874..a2720c47bf4 100644 --- gcc/testsuite/gcc.dg/c90-const-expr-11.c +++ gcc/testsuite/gcc.dg/c90-const-expr-11.c @@ -20,7 +20,7 @@ f (void) /* Overflow. */ struct t b = { INT_MAX + 1 }; /* { dg-warning "integer overflow in expression" } */ /* { dg-error "overflow in constant expression" "constant" { target *-*-* } .-1 } */ - struct t c = { DBL_MAX }; /* { dg-warning "overflow in conversion from .double. to .int. chages value " } */ + struct t c = { DBL_MAX }; /* { dg-warning "overflow in conversion from .double. to .int. changes value " } */ /* { dg-error "overflow in constant expression" "constant" { target *-*-* } .-1 } */ /* Bad operator outside sizeof. */ struct s d = { 1 ? 1.0 : atan (a.d) }; /* { dg-error "is not a constant expression|near initialization" } */ diff --git gcc/testsuite/gcc.dg/overflow-warn-5.c gcc/testsuite/gcc.dg/overflow-warn-5.c index b2c8dc31d95..1a5aa0c6059 100644 --- gcc/testsuite/gcc.dg/overflow-warn-5.c +++ gcc/testsuite/gcc.dg/overflow-warn-5.c @@ -3,5 +3,5 @@ /* { dg-options "-Woverflow" } */ unsigned char rx_async(unsigned char p) { -return p & 512; /* { dg-warning "overflow in conversion from .int. to .unsigned char. chages value" } */ +return p & 512; /* { dg-warning "overflow in conversion from .int. to .unsigned char. changes value" } */ } diff --git gcc/testsuite/gcc.dg/overflow-warn-8.c gcc/testsuite/gcc.dg/overflow-warn-8.c index ace605517dc..e76bcac5e07 100644 --- gcc/testsuite/gcc.dg/overflow-warn-8.c +++ gcc/testsuite/gcc.dg/overflow-warn-8.c @@ -7,7 +7,7 @@ void foo (int j) int i3 = 1 + INT_MAX; /* { dg-warning "integer overflow" } */ int i4 = +1 + INT_MAX; /* { dg-warning "integer overflow" } */ int i5 = (int)((double)1.0 + INT_MAX); - int i6 = (double)1.0 + INT_MAX; /* { dg-warning "overflow in conversion from .double. to .int. chages value" } */ + int i6 = (double)1.0 + INT_MAX; /* { dg-warning "overflow in conversion from .double. to .int. changes value" } */ int i7 = 0 ? (int)(double)1.0 + INT_MAX : 1; int i8 = 1 ? 1 : (int)(double)1.0 + INT_MAX; int i9 = j ? (int)(double)1.0 + INT_MAX : 1; /* { dg-warning "integer overflow" } */ Marek
[011/nnn] poly_int: DWARF locations
This patch adds support for DWARF location expressions that involve polynomial offsets. It adds a target hook that says how the runtime invariants used in the offsets should be represented in DWARF. SVE vectors have to be a multiple of 128 bits in size, so the GCC port uses the number of 128-bit blocks minus one as the runtime invariant. However, in DWARF, the vector length is exposed via a pseudo "VG" register that holds the number of 64-bit elements in a vector. Thus: indeterminate 1 == (VG / 2) - 1 The hook needs to be general enough to express this. Note that in most cases the division and subtraction fold away into surrounding expressions. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * target.def (dwarf_poly_indeterminate_value): New hook. * targhooks.h (default_dwarf_poly_indeterminate_value): Declare. * targhooks.c (default_dwarf_poly_indeterminate_value): New function. * doc/tm.texi.in (TARGET_DWARF_POLY_INDETERMINATE_VALUE): Document. * doc/tm.texi: Regenerate. * dwarf2out.h (build_cfa_loc, build_cfa_aligned_loc): Take the offset as a poly_int64. * dwarf2out.c (new_reg_loc_descr): Move later in file. Take the offset as a poly_int64. (loc_descr_plus_const, loc_list_plus_const, build_cfa_aligned_loc): Take the offset as a poly_int64. (build_cfa_loc): Likewise. Use loc_descr_plus_const. (frame_pointer_fb_offset): Change to a poly_int64. (int_loc_descriptor): Take the offset as a poly_int64. Use targetm.dwarf_poly_indeterminate_value for polynomial offsets. (based_loc_descr): Take the offset as a poly_int64. Use strip_offset_and_add to handle (plus X (const)). Use new_reg_loc_descr instead of an open-coded version of the previous implementation. (mem_loc_descriptor): Handle CONST_POLY_INT. (compute_frame_pointer_to_fb_displacement): Take the offset as a poly_int64. Use strip_offset_and_add to handle (plus X (const)). Index: gcc/target.def === --- gcc/target.def 2017-10-23 17:01:04.215112587 +0100 +++ gcc/target.def 2017-10-23 17:01:45.057509456 +0100 @@ -4124,6 +4124,21 @@ the CFI label attached to the insn, @var the insn and @var{index} is @code{UNSPEC_INDEX} or @code{UNSPECV_INDEX}.", void, (const char *label, rtx pattern, int index), NULL) +DEFHOOK +(dwarf_poly_indeterminate_value, + "Express the value of @code{poly_int} indeterminate @var{i} as a DWARF\n\ +expression, with @var{i} counting from 1. Return the number of a DWARF\n\ +register @var{R} and set @samp{*@var{factor}} and @samp{*@var{offset}} such\n\ +that the value of the indeterminate is:\n\ +@smallexample\n\ +value_of(@var{R}) / @var{factor} - @var{offset}\n\ +@end smallexample\n\ +\n\ +A target only needs to define this hook if it sets\n\ +@samp{NUM_POLY_INT_COEFFS} to a value greater than 1.", + unsigned int, (unsigned int i, unsigned int *factor, int *offset), + default_dwarf_poly_indeterminate_value) + /* ??? Documenting this hook requires a GFDL license grant. */ DEFHOOK_UNDOC (stdarg_optimize_hook, Index: gcc/targhooks.h === --- gcc/targhooks.h 2017-10-23 17:00:20.920834919 +0100 +++ gcc/targhooks.h 2017-10-23 17:01:45.057509456 +0100 @@ -234,6 +234,9 @@ extern int default_label_align_max_skip extern int default_jump_align_max_skip (rtx_insn *); extern section * default_function_section(tree decl, enum node_frequency freq, bool startup, bool exit); +extern unsigned int default_dwarf_poly_indeterminate_value (unsigned int, + unsigned int *, + int *); extern machine_mode default_dwarf_frame_reg_mode (int); extern fixed_size_mode default_get_reg_raw_mode (int); extern bool default_keep_leaf_when_profiled (); Index: gcc/targhooks.c === --- gcc/targhooks.c 2017-10-23 17:00:49.664349224 +0100 +++ gcc/targhooks.c 2017-10-23 17:01:45.057509456 +0100 @@ -1838,6 +1838,15 @@ default_debug_unwind_info (void) return UI_NONE; } +/* Targets that set NUM_POLY_INT_COEFFS to something greater than 1 + must define this hook. */ + +unsigned int +default_dwarf_poly_indeterminate_value (unsigned int, unsigned int *, int *) +{ + gcc_unreachable (); +} + /* Determine the correct mode for a Dwarf frame register that represents register REGNO. */ Index: gcc/doc/tm.texi.in === --- gcc/doc/tm.texi.in 2017-10-23 17:00:20.918834478 +0100 +++ gcc/doc/tm.texi.in 2017-10-23 17:01:45.053515150 +0100 @@ -2553,6 +2553,8 @@ terminate the stack backtrace. New port
[012/nnn] poly_int: fold_ctor_reference
This patch changes the offset and size arguments to fold_ctor_reference from unsigned HOST_WIDE_INT to poly_uint64. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * gimple-fold.h (fold_ctor_reference): Take the offset and size as poly_uint64 rather than unsigned HOST_WIDE_INT. * gimple-fold.c (fold_ctor_reference): Likewise. Index: gcc/gimple-fold.h === --- gcc/gimple-fold.h 2017-10-23 16:52:20.201487839 +0100 +++ gcc/gimple-fold.h 2017-10-23 17:01:48.165079780 +0100 @@ -44,8 +44,7 @@ extern tree follow_single_use_edges (tre extern tree gimple_fold_stmt_to_constant_1 (gimple *, tree (*) (tree), tree (*) (tree) = no_follow_ssa_edges); extern tree gimple_fold_stmt_to_constant (gimple *, tree (*) (tree)); -extern tree fold_ctor_reference (tree, tree, unsigned HOST_WIDE_INT, -unsigned HOST_WIDE_INT, tree); +extern tree fold_ctor_reference (tree, tree, poly_uint64, poly_uint64, tree); extern tree fold_const_aggregate_ref_1 (tree, tree (*) (tree)); extern tree fold_const_aggregate_ref (tree); extern tree gimple_get_virt_method_for_binfo (HOST_WIDE_INT, tree, Index: gcc/gimple-fold.c === --- gcc/gimple-fold.c 2017-10-23 16:52:20.201487839 +0100 +++ gcc/gimple-fold.c 2017-10-23 17:01:48.164081204 +0100 @@ -6365,20 +6365,25 @@ fold_nonarray_ctor_reference (tree type, return build_zero_cst (type); } -/* CTOR is value initializing memory, fold reference of type TYPE and size SIZE - to the memory at bit OFFSET. */ +/* CTOR is value initializing memory, fold reference of type TYPE and + size POLY_SIZE to the memory at bit POLY_OFFSET. */ tree -fold_ctor_reference (tree type, tree ctor, unsigned HOST_WIDE_INT offset, -unsigned HOST_WIDE_INT size, tree from_decl) +fold_ctor_reference (tree type, tree ctor, poly_uint64 poly_offset, +poly_uint64 poly_size, tree from_decl) { tree ret; /* We found the field with exact match. */ if (useless_type_conversion_p (type, TREE_TYPE (ctor)) - && !offset) + && known_zero (poly_offset)) return canonicalize_constructor_val (unshare_expr (ctor), from_decl); + /* The remaining optimizations need a constant size and offset. */ + unsigned HOST_WIDE_INT size, offset; + if (!poly_size.is_constant (&size) || !poly_offset.is_constant (&offset)) +return NULL_TREE; + /* We are at the end of walk, see if we can view convert the result. */ if (!AGGREGATE_TYPE_P (TREE_TYPE (ctor)) && !offset
[013/nnn] poly_int: same_addr_size_stores_p
This patch makes tree-ssa-alias.c:same_addr_size_stores_p handle poly_int sizes and offsets. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * tree-ssa-alias.c (same_addr_size_stores_p): Take the offsets and sizes as poly_int64s rather than HOST_WIDE_INTs. Index: gcc/tree-ssa-alias.c === --- gcc/tree-ssa-alias.c2017-10-23 16:52:20.150440950 +0100 +++ gcc/tree-ssa-alias.c2017-10-23 17:01:49.579064221 +0100 @@ -2322,14 +2322,14 @@ stmt_may_clobber_ref_p (gimple *stmt, tr address. */ static bool -same_addr_size_stores_p (tree base1, HOST_WIDE_INT offset1, HOST_WIDE_INT size1, -HOST_WIDE_INT max_size1, -tree base2, HOST_WIDE_INT offset2, HOST_WIDE_INT size2, -HOST_WIDE_INT max_size2) +same_addr_size_stores_p (tree base1, poly_int64 offset1, poly_int64 size1, +poly_int64 max_size1, +tree base2, poly_int64 offset2, poly_int64 size2, +poly_int64 max_size2) { /* Offsets need to be 0. */ - if (offset1 != 0 - || offset2 != 0) + if (maybe_nonzero (offset1) + || maybe_nonzero (offset2)) return false; bool base1_obj_p = SSA_VAR_P (base1); @@ -2348,17 +2348,19 @@ same_addr_size_stores_p (tree base1, HOS tree memref = base1_memref_p ? base1 : base2; /* Sizes need to be valid. */ - if (max_size1 == -1 || max_size2 == -1 - || size1 == -1 || size2 == -1) + if (!known_size_p (max_size1) + || !known_size_p (max_size2) + || !known_size_p (size1) + || !known_size_p (size2)) return false; /* Max_size needs to match size. */ - if (max_size1 != size1 - || max_size2 != size2) + if (may_ne (max_size1, size1) + || may_ne (max_size2, size2)) return false; /* Sizes need to match. */ - if (size1 != size2) + if (may_ne (size1, size2)) return false; @@ -2386,10 +2388,9 @@ same_addr_size_stores_p (tree base1, HOS /* Check that the object size is the same as the store size. That ensures us that ptr points to the start of obj. */ - if (!tree_fits_shwi_p (DECL_SIZE (obj))) -return false; - HOST_WIDE_INT obj_size = tree_to_shwi (DECL_SIZE (obj)); - return obj_size == size1; + return (DECL_SIZE (obj) + && poly_int_tree_p (DECL_SIZE (obj)) + && must_eq (wi::to_poly_offset (DECL_SIZE (obj)), size1)); } /* If STMT kills the memory reference REF return true, otherwise
[014/nnn] poly_int: indirect_refs_may_alias_p
This patch makes indirect_refs_may_alias_p use ranges_may_overlap_p rather than ranges_overlap_p. Unlike the former, the latter can handle negative offsets, so the fix for PR44852 should no longer be necessary. It can also handle offset_int, so avoids unchecked truncations to HOST_WIDE_INT. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * tree-ssa-alias.c (indirect_ref_may_alias_decl_p) (indirect_refs_may_alias_p): Use ranges_may_overlap_p instead of ranges_overlap_p. Index: gcc/tree-ssa-alias.c === --- gcc/tree-ssa-alias.c2017-10-23 17:01:49.579064221 +0100 +++ gcc/tree-ssa-alias.c2017-10-23 17:01:51.044974644 +0100 @@ -1135,23 +1135,13 @@ indirect_ref_may_alias_decl_p (tree ref1 { tree ptr1; tree ptrtype1, dbase2; - HOST_WIDE_INT offset1p = offset1, offset2p = offset2; - HOST_WIDE_INT doffset1, doffset2; gcc_checking_assert ((TREE_CODE (base1) == MEM_REF || TREE_CODE (base1) == TARGET_MEM_REF) && DECL_P (base2)); ptr1 = TREE_OPERAND (base1, 0); - - /* The offset embedded in MEM_REFs can be negative. Bias them - so that the resulting offset adjustment is positive. */ - offset_int moff = mem_ref_offset (base1); - moff <<= LOG2_BITS_PER_UNIT; - if (wi::neg_p (moff)) -offset2p += (-moff).to_short_addr (); - else -offset1p += moff.to_short_addr (); + offset_int moff = mem_ref_offset (base1) << LOG2_BITS_PER_UNIT; /* If only one reference is based on a variable, they cannot alias if the pointer access is beyond the extent of the variable access. @@ -1160,7 +1150,7 @@ indirect_ref_may_alias_decl_p (tree ref1 ??? IVOPTs creates bases that do not honor this restriction, so do not apply this optimization for TARGET_MEM_REFs. */ if (TREE_CODE (base1) != TARGET_MEM_REF - && !ranges_overlap_p (MAX (0, offset1p), -1, offset2p, max_size2)) + && !ranges_may_overlap_p (offset1 + moff, -1, offset2, max_size2)) return false; /* They also cannot alias if the pointer may not point to the decl. */ if (!ptr_deref_may_alias_decl_p (ptr1, base2)) @@ -1213,18 +1203,11 @@ indirect_ref_may_alias_decl_p (tree ref1 dbase2 = ref2; while (handled_component_p (dbase2)) dbase2 = TREE_OPERAND (dbase2, 0); - doffset1 = offset1; - doffset2 = offset2; + HOST_WIDE_INT doffset1 = offset1; + offset_int doffset2 = offset2; if (TREE_CODE (dbase2) == MEM_REF || TREE_CODE (dbase2) == TARGET_MEM_REF) -{ - offset_int moff = mem_ref_offset (dbase2); - moff <<= LOG2_BITS_PER_UNIT; - if (wi::neg_p (moff)) - doffset1 -= (-moff).to_short_addr (); - else - doffset2 -= moff.to_short_addr (); -} +doffset2 -= mem_ref_offset (dbase2) << LOG2_BITS_PER_UNIT; /* If either reference is view-converted, give up now. */ if (same_type_for_tbaa (TREE_TYPE (base1), TREE_TYPE (ptrtype1)) != 1 @@ -1241,7 +1224,7 @@ indirect_ref_may_alias_decl_p (tree ref1 if ((TREE_CODE (base1) != TARGET_MEM_REF || (!TMR_INDEX (base1) && !TMR_INDEX2 (base1))) && same_type_for_tbaa (TREE_TYPE (base1), TREE_TYPE (dbase2)) == 1) -return ranges_overlap_p (doffset1, max_size1, doffset2, max_size2); +return ranges_may_overlap_p (doffset1, max_size1, doffset2, max_size2); if (ref1 && ref2 && nonoverlapping_component_refs_p (ref1, ref2)) @@ -1313,22 +1296,10 @@ indirect_refs_may_alias_p (tree ref1 ATT && operand_equal_p (TMR_INDEX2 (base1), TMR_INDEX2 (base2), 0)) { - offset_int moff; - /* The offset embedded in MEM_REFs can be negative. Bias them -so that the resulting offset adjustment is positive. */ - moff = mem_ref_offset (base1); - moff <<= LOG2_BITS_PER_UNIT; - if (wi::neg_p (moff)) - offset2 += (-moff).to_short_addr (); - else - offset1 += moff.to_shwi (); - moff = mem_ref_offset (base2); - moff <<= LOG2_BITS_PER_UNIT; - if (wi::neg_p (moff)) - offset1 += (-moff).to_short_addr (); - else - offset2 += moff.to_short_addr (); - return ranges_overlap_p (offset1, max_size1, offset2, max_size2); + offset_int moff1 = mem_ref_offset (base1) << LOG2_BITS_PER_UNIT; + offset_int moff2 = mem_ref_offset (base2) << LOG2_BITS_PER_UNIT; + return ranges_may_overlap_p (offset1 + moff1, max_size1, + offset2 + moff2, max_size2); } if (!ptr_derefs_may_alias_p (ptr1, ptr2)) return false;
[015/nnn] poly_int: ao_ref and vn_reference_op_t
This patch changes the offset, size and max_size fields of ao_ref from HOST_WIDE_INT to poly_int64 and propagates the change through the code that references it. This includes changing the off field of vn_reference_op_struct in the same way. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * inchash.h (inchash::hash::add_poly_int): New function. * tree-ssa-alias.h (ao_ref::offset, ao_ref::size, ao_ref::max_size): Use poly_int64 rather than HOST_WIDE_INT. (ao_ref::max_size_known_p): New function. * tree-ssa-sccvn.h (vn_reference_op_struct::off): Use poly_int64_pod rather than HOST_WIDE_INT. * tree-ssa-alias.c (ao_ref_base): Apply get_ref_base_and_extent to temporaries until its interface is adjusted to match. (ao_ref_init_from_ptr_and_size): Handle polynomial offsets and sizes. (aliasing_component_refs_p, decl_refs_may_alias_p) (indirect_ref_may_alias_decl_p, indirect_refs_may_alias_p): Take the offsets and max_sizes as poly_int64s instead of HOST_WIDE_INTs. (refs_may_alias_p_1, stmt_kills_ref_p): Adjust for changes to ao_ref fields. * alias.c (ao_ref_from_mem): Likewise. * tree-ssa-dce.c (mark_aliased_reaching_defs_necessary_1): Likewise. * tree-ssa-dse.c (valid_ao_ref_for_dse, normalize_ref) (clear_bytes_written_by, setup_live_bytes_from_ref, compute_trims) (maybe_trim_complex_store, maybe_trim_constructor_store) (live_bytes_read, dse_classify_store): Likewise. * tree-ssa-sccvn.c (vn_reference_compute_hash, vn_reference_eq): (copy_reference_ops_from_ref, ao_ref_init_from_vn_reference) (fully_constant_vn_reference_p, valueize_refs_1): Likewise. (vn_reference_lookup_3): Likewise. * tree-ssa-uninit.c (warn_uninitialized_vars): Likewise. Index: gcc/inchash.h === --- gcc/inchash.h 2017-10-23 17:01:43.314993320 +0100 +++ gcc/inchash.h 2017-10-23 17:01:52.303181137 +0100 @@ -57,6 +57,14 @@ hashval_t iterative_hash_hashval_t (hash val = iterative_hash_hashval_t (v, val); } + /* Add polynomial value V, treating each element as an unsigned int. */ + template + void add_poly_int (const poly_int_pod &v) + { +for (unsigned int i = 0; i < N; ++i) + add_int (v.coeffs[i]); + } + /* Add HOST_WIDE_INT value V. */ void add_hwi (HOST_WIDE_INT v) { Index: gcc/tree-ssa-alias.h === --- gcc/tree-ssa-alias.h2017-10-23 16:52:20.058356365 +0100 +++ gcc/tree-ssa-alias.h2017-10-23 17:01:52.304179714 +0100 @@ -80,11 +80,11 @@ struct ao_ref the following fields are not yet computed. */ tree base; /* The offset relative to the base. */ - HOST_WIDE_INT offset; + poly_int64 offset; /* The size of the access. */ - HOST_WIDE_INT size; + poly_int64 size; /* The maximum possible extent of the access or -1 if unconstrained. */ - HOST_WIDE_INT max_size; + poly_int64 max_size; /* The alias set of the access or -1 if not yet computed. */ alias_set_type ref_alias_set; @@ -94,8 +94,18 @@ struct ao_ref /* Whether the memory is considered a volatile access. */ bool volatile_p; + + bool max_size_known_p () const; }; +/* Return true if the maximum size is known, rather than the special -1 + marker. */ + +inline bool +ao_ref::max_size_known_p () const +{ + return known_size_p (max_size); +} /* In tree-ssa-alias.c */ extern void ao_ref_init (ao_ref *, tree); Index: gcc/tree-ssa-sccvn.h === --- gcc/tree-ssa-sccvn.h2017-10-23 16:52:20.058356365 +0100 +++ gcc/tree-ssa-sccvn.h2017-10-23 17:01:52.305178291 +0100 @@ -93,7 +93,7 @@ typedef struct vn_reference_op_struct /* For storing TYPE_ALIGN for array ref element size computation. */ unsigned align : 6; /* Constant offset this op adds or -1 if it is variable. */ - HOST_WIDE_INT off; + poly_int64_pod off; tree type; tree op0; tree op1; Index: gcc/tree-ssa-alias.c === --- gcc/tree-ssa-alias.c2017-10-23 17:01:51.044974644 +0100 +++ gcc/tree-ssa-alias.c2017-10-23 17:01:52.304179714 +0100 @@ -635,11 +635,15 @@ ao_ref_init (ao_ref *r, tree ref) ao_ref_base (ao_ref *ref) { bool reverse; + HOST_WIDE_INT offset, size, max_size; if (ref->base) return ref->base; - ref->base = get_ref_base_and_extent (ref->ref, &ref->offset, &ref->size, - &ref->max_size, &reverse); + ref->base = get_ref_base_and_extent (ref->ref, &offset, &size, + &max_size, &reverse); + ref->offset = offset; + ref->size = size; + ref->max_size = max_size; return ref->base; } @@ -679,7
[016/nnn] poly_int: dse.c
This patch makes RTL DSE use poly_int for offsets and sizes. The local phase can optimise them normally but the global phase treats them as wild accesses. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * dse.c (store_info): Change offset and width from HOST_WIDE_INT to poly_int64. Update commentary for positions_needed.large. (read_info_type): Change offset and width from HOST_WIDE_INT to poly_int64. (set_usage_bits): Likewise. (canon_address): Return the offset as a poly_int64 rather than a HOST_WIDE_INT. Use strip_offset_and_add. (set_all_positions_unneeded, any_positions_needed_p): Use positions_needed.large to track stores with non-constant widths. (all_positions_needed_p): Likewise. Take the offset and width as poly_int64s rather than ints. Assert that rhs is nonnull. (record_store): Cope with non-constant offsets and widths. Nullify the rhs of an earlier store if we can't tell which bytes of it are needed. (find_shift_sequence): Take the access_size and shift as poly_int64s rather than ints. (get_stored_val): Take the read_offset and read_width as poly_int64s rather than HOST_WIDE_INTs. (check_mem_read_rtx, scan_stores, scan_reads, dse_step5): Handle non-constant offsets and widths. Index: gcc/dse.c === --- gcc/dse.c 2017-10-23 16:52:20.003305798 +0100 +++ gcc/dse.c 2017-10-23 17:01:54.249406896 +0100 @@ -244,11 +244,11 @@ struct store_info rtx mem_addr; /* The offset of the first byte associated with the operation. */ - HOST_WIDE_INT offset; + poly_int64 offset; /* The number of bytes covered by the operation. This is always exact and known (rather than -1). */ - HOST_WIDE_INT width; + poly_int64 width; union { @@ -259,12 +259,19 @@ struct store_info struct { - /* A bitmap with one bit per byte. Cleared bit means the position -is needed. Used if IS_LARGE is false. */ + /* A bitmap with one bit per byte, or null if the number of +bytes isn't known at compile time. A cleared bit means +the position is needed. Used if IS_LARGE is true. */ bitmap bmap; - /* Number of set bits (i.e. unneeded bytes) in BITMAP. If it is -equal to WIDTH, the whole store is unused. */ + /* When BITMAP is nonnull, this counts the number of set bits +(i.e. unneeded bytes) in the bitmap. If it is equal to +WIDTH, the whole store is unused. + +When BITMAP is null: +- the store is definitely not needed when COUNT == 1 +- all the store is needed when COUNT == 0 and RHS is nonnull +- otherwise we don't know which parts of the store are needed. */ int count; } large; } positions_needed; @@ -308,10 +315,10 @@ struct read_info_type int group_id; /* The offset of the first byte associated with the operation. */ - HOST_WIDE_INT offset; + poly_int64 offset; /* The number of bytes covered by the operation, or -1 if not known. */ - HOST_WIDE_INT width; + poly_int64 width; /* The mem being read. */ rtx mem; @@ -940,13 +947,18 @@ can_escape (tree expr) OFFSET and WIDTH. */ static void -set_usage_bits (group_info *group, HOST_WIDE_INT offset, HOST_WIDE_INT width, +set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width, tree expr) { - HOST_WIDE_INT i; + /* Non-constant offsets and widths act as global kills, so there's no point + trying to use them to derive global DSE candidates. */ + HOST_WIDE_INT i, const_offset, const_width; bool expr_escapes = can_escape (expr); - if (offset > -MAX_OFFSET && offset + width < MAX_OFFSET) -for (i=offset; i -MAX_OFFSET + && const_offset + const_width < MAX_OFFSET) +for (i = const_offset; i < const_offset + const_width; ++i) { bitmap store1; bitmap store2; @@ -1080,7 +1092,7 @@ const_or_frame_p (rtx x) static bool canon_address (rtx mem, int *group_id, - HOST_WIDE_INT *offset, + poly_int64 *offset, cselib_val **base) { machine_mode address_mode = get_address_mode (mem); @@ -1147,12 +1159,7 @@ canon_address (rtx mem, if (GET_CODE (address) == CONST) address = XEXP (address, 0); - if (GET_CODE (address) == PLUS - && CONST_INT_P (XEXP (address, 1))) - { - *offset = INTVAL (XEXP (address, 1)); - address = XEXP (address, 0); - } + address = strip_offset_and_add (address, offset); if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)) && const_or_frame_p (address)) @@ -1160,8 +1167,11 @@ canon_address (rtx mem, group_info *group = ge
[017/nnn] poly_int: rtx_addr_can_trap_p_1
This patch changes the offset and size arguments of rtx_addr_can_trap_p_1 from HOST_WIDE_INT to poly_int64. It also uses a size of -1 rather than 0 to represent an unknown size and BLKmode rather than VOIDmode to represent an unknown mode. 2017-10-23 Richard Sandiford Alan Hayward David Sherwood gcc/ * rtlanal.c (rtx_addr_can_trap_p_1): Take the offset and size as poly_int64s rather than HOST_WIDE_INTs. Use a size of -1 rather than 0 to represent an unknown size. Assert that the size is known when the mode isn't BLKmode. (may_trap_p_1): Use -1 for unknown sizes. (rtx_addr_can_trap_p): Likewise. Pass BLKmode rather than VOIDmode. Index: gcc/rtlanal.c === --- gcc/rtlanal.c 2017-10-23 17:00:54.444001238 +0100 +++ gcc/rtlanal.c 2017-10-23 17:01:55.453690255 +0100 @@ -457,16 +457,17 @@ get_initial_register_offset (int from, i references on strict alignment machines. */ static int -rtx_addr_can_trap_p_1 (const_rtx x, HOST_WIDE_INT offset, HOST_WIDE_INT size, +rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size, machine_mode mode, bool unaligned_mems) { enum rtx_code code = GET_CODE (x); + gcc_checking_assert (mode == BLKmode || known_size_p (size)); /* The offset must be a multiple of the mode size if we are considering unaligned memory references on strict alignment machines. */ - if (STRICT_ALIGNMENT && unaligned_mems && GET_MODE_SIZE (mode) != 0) + if (STRICT_ALIGNMENT && unaligned_mems && mode != BLKmode) { - HOST_WIDE_INT actual_offset = offset; + poly_int64 actual_offset = offset; #ifdef SPARC_STACK_BOUNDARY_HACK /* ??? The SPARC port may claim a STACK_BOUNDARY higher than @@ -477,7 +478,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST actual_offset -= STACK_POINTER_OFFSET; #endif - if (actual_offset % GET_MODE_SIZE (mode) != 0) + if (!multiple_p (actual_offset, GET_MODE_SIZE (mode))) return 1; } @@ -489,14 +490,14 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST if (!CONSTANT_POOL_ADDRESS_P (x) && !SYMBOL_REF_FUNCTION_P (x)) { tree decl; - HOST_WIDE_INT decl_size; + poly_int64 decl_size; - if (offset < 0) + if (may_lt (offset, 0)) + return 1; + if (known_zero (offset)) + return 0; + if (!known_size_p (size)) return 1; - if (size == 0) - size = GET_MODE_SIZE (mode); - if (size == 0) - return offset != 0; /* If the size of the access or of the symbol is unknown, assume the worst. */ @@ -507,9 +508,10 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST if (!decl) decl_size = -1; else if (DECL_P (decl) && DECL_SIZE_UNIT (decl)) - decl_size = (tree_fits_shwi_p (DECL_SIZE_UNIT (decl)) -? tree_to_shwi (DECL_SIZE_UNIT (decl)) -: -1); + { + if (!poly_int_tree_p (DECL_SIZE_UNIT (decl), &decl_size)) + decl_size = -1; + } else if (TREE_CODE (decl) == STRING_CST) decl_size = TREE_STRING_LENGTH (decl); else if (TYPE_SIZE_UNIT (TREE_TYPE (decl))) @@ -517,7 +519,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST else decl_size = -1; - return (decl_size <= 0 ? offset != 0 : offset + size > decl_size); + return !known_subrange_p (offset, size, 0, decl_size); } return 0; @@ -534,17 +536,14 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])) { #ifdef RED_ZONE_SIZE - HOST_WIDE_INT red_zone_size = RED_ZONE_SIZE; + poly_int64 red_zone_size = RED_ZONE_SIZE; #else - HOST_WIDE_INT red_zone_size = 0; + poly_int64 red_zone_size = 0; #endif - HOST_WIDE_INT stack_boundary = PREFERRED_STACK_BOUNDARY -/ BITS_PER_UNIT; - HOST_WIDE_INT low_bound, high_bound; - - if (size == 0) - size = GET_MODE_SIZE (mode); - if (size == 0) + poly_int64 stack_boundary = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT; + poly_int64 low_bound, high_bound; + + if (!known_size_p (size)) return 1; if (x == frame_pointer_rtx) @@ -562,10 +561,10 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST } else if (x == hard_frame_pointer_rtx) { - HOST_WIDE_INT sp_offset + poly_int64 sp_offset = get_initial_register_offset (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM); - HOST_WIDE_INT ap_offset + poly_int64 ap_offset = get_initial_registe