date:20171023

[PATCH] Make -gcolumn-info the default

2017-10-23 Thread Jakub Jelinek

Hi!

When -gcolumn-info was added back in February, it was too late in the
release cycle to make it the default, but I think now is the good time
to do it for GCC8.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-10-23  Jakub Jelinek  

* common.opt (gcolumn-info): Enable by default.
* doc/invoke.texi (gcolumn-info): Document new default.

* lib/scanasm.exp (dg-function-on-line): Accept optional column info.
* gcc.dg/debug/dwarf2/pr53948.c: Likewise.
* g++.dg/debug/dwarf2/pr77363.C: Likewise.
* gcc.dg/debug/dwarf2/asm-line1.c: Add -gno-column-info to dg-options.
* gcc.dg/debug/dwarf2/discriminator.c: Likewise.
* g++.dg/debug/dwarf2/typedef6.C: Likewise.

--- gcc/common.opt.jj   2017-10-20 16:02:58.0 +0200
+++ gcc/common.opt  2017-10-20 16:54:04.522033739 +0200
@@ -2873,7 +2873,7 @@ Common Driver JoinedOrMissing Negative(g
 Generate debug information in COFF format.
 
 gcolumn-info
-Common Driver Var(debug_column_info,1) Init(0)
+Common Driver Var(debug_column_info,1) Init(1)
 Record DW_AT_decl_column and DW_AT_call_column in DWARF.
 
 gdwarf
--- gcc/doc/invoke.texi.jj  2017-10-20 16:22:07.0 +0200
+++ gcc/doc/invoke.texi 2017-10-20 16:54:32.940684888 +0200
@@ -7064,7 +7064,7 @@ Allow using extensions of later DWARF st
 @opindex gno-column-info
 Emit location column information into DWARF debugging information, rather
 than just file and line.
-This option is disabled by default.
+This option is enabled by default.
 
 @item -gz@r{[}=@var{type}@r{]}
 @opindex gz
--- gcc/testsuite/lib/scanasm.exp.jj2017-10-17 17:58:16.0 +0200
+++ gcc/testsuite/lib/scanasm.exp   2017-10-21 11:52:49.955774302 +0200
@@ -484,16 +484,16 @@ proc dg-function-on-line { args } {
 }
 
 if { [istarget hppa*-*-*] } {
-   set pattern [format {\t;[^:]+:%d\n(\t[^\t]+\n)+%s:\n\t.PROC} \
+   set pattern [format {\t;[^:]+:%d(:[0-9]+)?\n(\t[^\t]+\n)+%s:\n\t.PROC} \
  $line $symbol]
 } elseif { [istarget mips*-*-*] } {
-   set pattern [format {\t\.loc [0-9]+ %d 0( 
[^\n]*)?\n(\t.cfi_startproc[^\t]*\n)*\t\.set\t(no)?mips16\n\t(\.set\t(no)?micromips\n\t)?\.ent\t%s\n\t\.type\t%s,
 @function\n%s:\n} \
+   set pattern [format {\t\.loc [0-9]+ %d [0-9]+( 
[^\n]*)?\n(\t.cfi_startproc[^\t]*\n)*\t\.set\t(no)?mips16\n\t(\.set\t(no)?micromips\n\t)?\.ent\t%s\n\t\.type\t%s,
 @function\n%s:\n} \
 $line $symbol $symbol $symbol]
 } elseif { [istarget microblaze*-*-*] } {
-set pattern [format {:%d\n\$.*:\n\t\.ent\t%s\n\t\.type\t%s, 
@function\n%s:\n} \
+set pattern [format {:%d(:[0-9]+)?\n\$.*:\n\t\.ent\t%s\n\t\.type\t%s, 
@function\n%s:\n} \
  $line $symbol $symbol $symbol]
 } else {
-   set pattern [format 
{%s:[^\t]*(\t.(fnstart|frame|mask|file)[^\t]*)*\t[^:]+:%d\n} \
+   set pattern [format 
{%s:[^\t]*(\t.(fnstart|frame|mask|file)[^\t]*)*\t[^:]+:%d(:[0-9]+)?\n} \
  $symbol $line]
 }
 
--- gcc/testsuite/gcc.dg/debug/dwarf2/asm-line1.c.jj2014-09-25 
15:02:24.0 +0200
+++ gcc/testsuite/gcc.dg/debug/dwarf2/asm-line1.c   2017-10-21 
11:46:24.655510428 +0200
@@ -1,6 +1,6 @@
 /* PR debug/50983 */
 /* { dg-do compile { target *-*-gnu* } } */
-/* { dg-options "-O0 -gdwarf" } */
+/* { dg-options "-O0 -gdwarf -gno-column-info" } */
 /* { dg-final { scan-assembler "is_stmt 1" } } */
 
 int i;
--- gcc/testsuite/gcc.dg/debug/dwarf2/discriminator.c.jj2014-09-25 
15:02:24.0 +0200
+++ gcc/testsuite/gcc.dg/debug/dwarf2/discriminator.c   2017-10-21 
11:47:22.029804496 +0200
@@ -1,7 +1,7 @@
 /* HAVE_AS_DWARF2_DEBUG_LINE macro needs to be defined to pass the unittest.
However, dg cannot access it, so we restrict to GNU targets.  */
 /* { dg-do compile { target *-*-gnu* } } */
-/* { dg-options "-O0 -gdwarf" } */
+/* { dg-options "-O0 -gdwarf -gno-column-info" } */
 /* { dg-final { scan-assembler "loc \[0-9] 11 \[0-9]( is_stmt \[0-9])?\n" } } 
*/
 /* { dg-final { scan-assembler "loc \[0-9] 11 \[0-9]( is_stmt \[0-9])? 
discriminator 2\n" } } */
 /* { dg-final { scan-assembler "loc \[0-9] 11 \[0-9]( is_stmt \[0-9])? 
discriminator 1\n" } } */
--- gcc/testsuite/gcc.dg/debug/dwarf2/pr53948.c.jj  2014-09-25 
15:02:24.0 +0200
+++ gcc/testsuite/gcc.dg/debug/dwarf2/pr53948.c 2017-10-21 11:48:14.555158962 
+0200
@@ -1,7 +1,7 @@
 /* Test that we have line information for the line
with local variable initializations.  */
 /* { dg-options "-O0 -gdwarf -dA" } */
-/* { dg-final { scan-assembler ".loc 1 8 0|\[#/!\]\[ \t\]+line 8" } } */
+/* { dg-final { scan-assembler ".loc 1 8 \[0-9\]|\[#/!\]\[ \t\]+line 8" } } */
 
 
 int f (register int a, register int b) {
--- gcc/testsuite/g++.dg/debug/dwarf2/typedef6.C.jj 2015-07-09 
19:47:10.0 +0200
+++ gcc/testsuite/g++.dg/debug/dwarf2/typedef6.C2017-10-21 
11:56:44.744888754 +0200
@@ -1,5 +1,5 @@
 // Origin PR debu

[PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)

2017-10-23 Thread Jakub Jelinek

Hi!

If all fails, when we can't prove that the PIC register is in some hard
register, we delegitimize something + foo@GOTOFF as (something -
_GLOBAL_OFFSET_TABLE_) + foo.  That is reasonable for the middle-end to
understand what is going on (it will never match in actual instructions
though), unfortunately when trying to emit that into .debug_info section
we run into the problem that .long _GLOBAL_OFFSET_TABLE_ etc. is not
actually assembled as address of _GLOBAL_OFFSET_TABLE_, but as
_GLOBAL_OFFSET_TABLE_-. (any time the assembler sees _GLOBAL_OFFSET_TABLE_
symbol by name, it adds the special relocation) and thus we get a bogus
expression.

I couldn't come up with a way to express this that wouldn't be even larger
than what we have, but if we actually not delegitimize it at all and let
it be emitted as
  something
  .byte DW_OP_addr
  .long foo@GOTOFF
  .byte DW_OP_plus
then it works fine and is even shorter than what we used to emit -
  something
  .byte DW_OP_addr
  .long _GLOBAL_OFFSET_TABLE_
  .byte DW_OP_minus
  .byte DW_OP_addr
  .long foo
  .byte DW_OP_plus
In order to achieve that, we need to allow selected UNSPECs through
into debug info, current trunk just gives up on all UNSPECs.

Fortunately, we already have a hook for rejecting some constants, so
by adding the rejection of all UNSPECs into the hook and on i386 overriding
that hook we achieve what we want.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-10-23  Jakub Jelinek  

PR debug/82630
* target.def (const_not_ok_for_debug_p): Default to
default_const_not_ok_for_debug_p instead of hook_bool_rtx_false.
* targhooks.h (default_const_not_ok_for_debug_p): New declaration.
* targhooks.c (default_const_not_ok_for_debug_p): New function.
* dwarf2out.c (const_ok_for_output_1): Only reject UNSPECs for
which targetm.const_not_ok_for_debug_p returned true.
* config/arm/arm.c (arm_const_not_ok_for_debug_p): Return true
for UNSPECs.
* config/powerpcspe/powerpcspe.c (rs6000_const_not_ok_for_debug_p):
Likewise.
* config/rs6000/rs6000.c (rs6000_const_not_ok_for_debug_p): Likewise.
* config/i386/i386.c (ix86_delegitimize_address_1): Don't delegitimize
UNSPEC_GOTOFF with addend into addend - _GLOBAL_OFFSET_TABLE_ + symbol
if !base_term_p.
(ix86_const_not_ok_for_debug_p): New function.
(i386_asm_output_addr_const_extra): Handle UNSPEC_GOTOFF.
(TARGET_CONST_NOT_OK_FOR_DEBUG_P): Redefine.

* g++.dg/guality/pr82630.C: New test.

--- gcc/target.def.jj   2017-10-10 11:54:13.0 +0200
+++ gcc/target.def  2017-10-20 14:07:06.463135128 +0200
@@ -2822,7 +2822,7 @@ DEFHOOK
  "This hook should return true if @var{x} should not be emitted into\n\
 debug sections.",
  bool, (rtx x),
- hook_bool_rtx_false)
+ default_const_not_ok_for_debug_p)
 
 /* Given an address RTX, say whether it is valid.  */
 DEFHOOK
--- gcc/targhooks.c.jj  2017-10-13 19:02:08.0 +0200
+++ gcc/targhooks.c 2017-10-20 14:26:07.945464025 +0200
@@ -177,6 +177,14 @@ default_legitimize_address_displacement
   return false;
 }
 
+bool
+default_const_not_ok_for_debug_p (rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+return true;
+  return false;
+}
+
 rtx
 default_expand_builtin_saveregs (void)
 {
--- gcc/targhooks.h.jj  2017-10-13 19:02:08.0 +0200
+++ gcc/targhooks.h 2017-10-20 14:26:07.945464025 +0200
@@ -26,6 +26,7 @@ extern void default_external_libcall (rt
 extern rtx default_legitimize_address (rtx, rtx, machine_mode);
 extern bool default_legitimize_address_displacement (rtx *, rtx *,
 machine_mode);
+extern bool default_const_not_ok_for_debug_p (rtx);
 
 extern int default_unspec_may_trap_p (const_rtx, unsigned);
 extern machine_mode default_promote_function_mode (const_tree, machine_mode,
--- gcc/dwarf2out.c.jj  2017-10-19 16:18:44.0 +0200
+++ gcc/dwarf2out.c 2017-10-20 14:39:49.432647598 +0200
@@ -13740,9 +13740,17 @@ expansion_failed (tree expr, rtx rtl, ch
 static bool
 const_ok_for_output_1 (rtx rtl)
 {
-  if (GET_CODE (rtl) == UNSPEC)
+  if (targetm.const_not_ok_for_debug_p (rtl))
 {
-  /* If delegitimize_address couldn't do anything with the UNSPEC, assume
+  if (GET_CODE (rtl) != UNSPEC)
+   {
+ expansion_failed (NULL_TREE, rtl,
+   "Expression rejected for debug by the backend.\n");
+ return false;
+   }
+
+  /* If delegitimize_address couldn't do anything with the UNSPEC, and
+the target hook doesn't explicitly allow it in debug info, assume
 we can't express it in the debug info.  */
   /* Don't complain about TLS UNSPECs, those are just too hard to
 delegitimize.  Note this could be a non-decl SYMBOL_REF such as
@@ -13769,13 +13777,6 @@ const_ok_for_output_1 (rtx rtl)
   return false;
 }
 
-  if (targetm.const_not_ok_

Re: [PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)

2017-10-23 Thread Richard Biener

On Mon, 23 Oct 2017, Jakub Jelinek wrote:

> Hi!
> 
> If all fails, when we can't prove that the PIC register is in some hard
> register, we delegitimize something + foo@GOTOFF as (something -
> _GLOBAL_OFFSET_TABLE_) + foo.  That is reasonable for the middle-end to
> understand what is going on (it will never match in actual instructions
> though), unfortunately when trying to emit that into .debug_info section
> we run into the problem that .long _GLOBAL_OFFSET_TABLE_ etc. is not
> actually assembled as address of _GLOBAL_OFFSET_TABLE_, but as
> _GLOBAL_OFFSET_TABLE_-. (any time the assembler sees _GLOBAL_OFFSET_TABLE_
> symbol by name, it adds the special relocation) and thus we get a bogus
> expression.
> 
> I couldn't come up with a way to express this that wouldn't be even larger
> than what we have, but if we actually not delegitimize it at all and let
> it be emitted as
>   something
>   .byte DW_OP_addr
>   .long foo@GOTOFF
>   .byte DW_OP_plus
> then it works fine and is even shorter than what we used to emit -
>   something
>   .byte DW_OP_addr
>   .long _GLOBAL_OFFSET_TABLE_
>   .byte DW_OP_minus
>   .byte DW_OP_addr
>   .long foo
>   .byte DW_OP_plus
> In order to achieve that, we need to allow selected UNSPECs through
> into debug info, current trunk just gives up on all UNSPECs.
> 
> Fortunately, we already have a hook for rejecting some constants, so
> by adding the rejection of all UNSPECs into the hook and on i386 overriding
> that hook we achieve what we want.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2017-10-23  Jakub Jelinek  
> 
>   PR debug/82630
>   * target.def (const_not_ok_for_debug_p): Default to
>   default_const_not_ok_for_debug_p instead of hook_bool_rtx_false.
>   * targhooks.h (default_const_not_ok_for_debug_p): New declaration.
>   * targhooks.c (default_const_not_ok_for_debug_p): New function.
>   * dwarf2out.c (const_ok_for_output_1): Only reject UNSPECs for
>   which targetm.const_not_ok_for_debug_p returned true.
>   * config/arm/arm.c (arm_const_not_ok_for_debug_p): Return true
>   for UNSPECs.
>   * config/powerpcspe/powerpcspe.c (rs6000_const_not_ok_for_debug_p):
>   Likewise.
>   * config/rs6000/rs6000.c (rs6000_const_not_ok_for_debug_p): Likewise.
>   * config/i386/i386.c (ix86_delegitimize_address_1): Don't delegitimize
>   UNSPEC_GOTOFF with addend into addend - _GLOBAL_OFFSET_TABLE_ + symbol
>   if !base_term_p.
>   (ix86_const_not_ok_for_debug_p): New function.
>   (i386_asm_output_addr_const_extra): Handle UNSPEC_GOTOFF.
>   (TARGET_CONST_NOT_OK_FOR_DEBUG_P): Redefine.
> 
>   * g++.dg/guality/pr82630.C: New test.
> 
> --- gcc/target.def.jj 2017-10-10 11:54:13.0 +0200
> +++ gcc/target.def2017-10-20 14:07:06.463135128 +0200
> @@ -2822,7 +2822,7 @@ DEFHOOK
>   "This hook should return true if @var{x} should not be emitted into\n\
>  debug sections.",
>   bool, (rtx x),
> - hook_bool_rtx_false)
> + default_const_not_ok_for_debug_p)
>  
>  /* Given an address RTX, say whether it is valid.  */
>  DEFHOOK
> --- gcc/targhooks.c.jj2017-10-13 19:02:08.0 +0200
> +++ gcc/targhooks.c   2017-10-20 14:26:07.945464025 +0200
> @@ -177,6 +177,14 @@ default_legitimize_address_displacement
>return false;
>  }
>  
> +bool
> +default_const_not_ok_for_debug_p (rtx x)
> +{
> +  if (GET_CODE (x) == UNSPEC)

What about UNSPEC_VOLATILE?

> +return true;
> +  return false;
> +}
> +
>  rtx
>  default_expand_builtin_saveregs (void)
>  {
> --- gcc/targhooks.h.jj2017-10-13 19:02:08.0 +0200
> +++ gcc/targhooks.h   2017-10-20 14:26:07.945464025 +0200
> @@ -26,6 +26,7 @@ extern void default_external_libcall (rt
>  extern rtx default_legitimize_address (rtx, rtx, machine_mode);
>  extern bool default_legitimize_address_displacement (rtx *, rtx *,
>machine_mode);
> +extern bool default_const_not_ok_for_debug_p (rtx);
>  
>  extern int default_unspec_may_trap_p (const_rtx, unsigned);
>  extern machine_mode default_promote_function_mode (const_tree, machine_mode,
> --- gcc/dwarf2out.c.jj2017-10-19 16:18:44.0 +0200
> +++ gcc/dwarf2out.c   2017-10-20 14:39:49.432647598 +0200
> @@ -13740,9 +13740,17 @@ expansion_failed (tree expr, rtx rtl, ch
>  static bool
>  const_ok_for_output_1 (rtx rtl)
>  {
> -  if (GET_CODE (rtl) == UNSPEC)
> +  if (targetm.const_not_ok_for_debug_p (rtl))
>  {
> -  /* If delegitimize_address couldn't do anything with the UNSPEC, assume
> +  if (GET_CODE (rtl) != UNSPEC)
> + {
> +   expansion_failed (NULL_TREE, rtl,
> + "Expression rejected for debug by the backend.\n");
> +   return false;
> + }
> +
> +  /* If delegitimize_address couldn't do anything with the UNSPEC, and
> +  the target hook doesn't explicitly allow it in debug info, assume
>we can't express it in the debug info.

Re: [PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)

2017-10-23 Thread Jakub Jelinek

On Mon, Oct 23, 2017 at 09:48:50AM +0200, Richard Biener wrote:
> > --- gcc/targhooks.c.jj  2017-10-13 19:02:08.0 +0200
> > +++ gcc/targhooks.c 2017-10-20 14:26:07.945464025 +0200
> > @@ -177,6 +177,14 @@ default_legitimize_address_displacement
> >return false;
> >  }
> >  
> > +bool
> > +default_const_not_ok_for_debug_p (rtx x)
> > +{
> > +  if (GET_CODE (x) == UNSPEC)
> 
> What about UNSPEC_VOLATILE?

This hook is called on the argument of CONST or SYMBOL_REF.
UNSPEC_VOLATILE can't appear inside of CONST, it wouldn't be CONST then.

UNSPEC appearing outside of CONST is rejected unconditionally in
mem_loc_descriptor:
...
case UNSPEC:
...
  /* If delegitimize_address couldn't do anything with the UNSPEC, we
 can't express it in the debug info.  This can happen e.g. with some
 TLS UNSPECs.  */
  break;
and for UNSPEC_VOLATILE we just ICE, because var-tracking shouldn't let
those through:
default:
  if (flag_checking)
{
  print_rtl (stderr, rtl);
  gcc_unreachable ();
}
  break;

Jakub

Re: [PATCH] Fix wrong-debug with i?86/x86_64 _GLOBAL_OFFSET_TABLE_ (PR debug/82630)

2017-10-23 Thread Richard Biener

On Mon, 23 Oct 2017, Jakub Jelinek wrote:

> On Mon, Oct 23, 2017 at 09:48:50AM +0200, Richard Biener wrote:
> > > --- gcc/targhooks.c.jj2017-10-13 19:02:08.0 +0200
> > > +++ gcc/targhooks.c   2017-10-20 14:26:07.945464025 +0200
> > > @@ -177,6 +177,14 @@ default_legitimize_address_displacement
> > >return false;
> > >  }
> > >  
> > > +bool
> > > +default_const_not_ok_for_debug_p (rtx x)
> > > +{
> > > +  if (GET_CODE (x) == UNSPEC)
> > 
> > What about UNSPEC_VOLATILE?
> 
> This hook is called on the argument of CONST or SYMBOL_REF.
> UNSPEC_VOLATILE can't appear inside of CONST, it wouldn't be CONST then.
> 
> UNSPEC appearing outside of CONST is rejected unconditionally in
> mem_loc_descriptor:
> ...
> case UNSPEC:
> ...
>   /* If delegitimize_address couldn't do anything with the UNSPEC, we
>  can't express it in the debug info.  This can happen e.g. with some
>  TLS UNSPECs.  */
>   break;
> and for UNSPEC_VOLATILE we just ICE, because var-tracking shouldn't let
> those through:
> default:
>   if (flag_checking)
> {
>   print_rtl (stderr, rtl);
>   gcc_unreachable ();
> }
>   break;

Ok.  The patch looks fine from a middle-end point of view.

Thanks,
Richard.

Zen tuning part 11: Fix cost model of AVX moves, unaligned moves and sse<->int moves

2017-10-23 Thread Jan Hubicka

Hi,
this patch extends processor_costs tables by unaligned moves (which is
needed for vectorizer costmodel), by AVX move costs and split
sse<->integer moves to two sections because AMD chips are very assymetric here
(because of different length of pipelines I assume).

Register move cost used to return 100 for all AVX moves, now it will behave
more rationally and I also disabled code that increases costs of sse<->int
moves through memory by 20 because of memory mismatch stall, at least when
quantity moved fits in integer register.

I think newer CPUs handle well cases where value is stored by parts but
read as a whole, but I need to double check it.  We may disable some of
mismatch logic for those as it was made for early designs where sotre and
loads was required to match in size and be aligned.

I kept hack that increase sse<->int costs to be at least 8.  I will look into
it incrementally - it is true that SSE regs do not play well with MODES_TIEABLE
macro, but I do not think artificial cost of 8 is a good way around.

I also had go through the excercise of updating all the CPU tables.
For RA the relative costs sort of matters only within register of given mode
(i.e. it is cheaper to spill SImode register than DImode), but for vectorization
we are replacing integer load/stores by vector load stores and thus costs needs
to be realistics across different units.

I noticed that many of other tables does not make much sense - some of this
seems to be obivous bugs forgetting that move costs are relative to register
move cost which is 2, so it needs to be latency*2 (if we ignore throughput as
we do for now).

I have added latencies according to Agner Fog's manual and chip optmization
guides.  Geode costs are complete guesswork.  There are some inconsistencies in
Agner's tables so I tried to avoid them to not biass the cost model.  For
unaligned moves I kept scheme of using twice of aligned move for CPUs where
alignments matter and having same cost for modern CPUs where it doesn't seem to
matter.  I suppose we can fine-tune incrementally.

For CPUs that do not support SSE/AVX I have added corresponding multiplies
which at least will make GCC to behave sort-of reasonably with contradicting
-march and -mtune flags.

I have benchmarked the patch on CPU2000 on Zen and Core.  It is spec neutral
but it makes improvements on polyhedron (and followup patch to model scatter
gather improves tonto of CPU2k6)

Bootstrapped/regtested x86_64-linux.

Honza

* i386.c (dimode_scalar_chain::compute_convert_gain): Use
xmm_move instead of sse_move.
(sse_store_index): New function.
(ix86_register_move_cost): Be more sensible about mismatch stall;
model AVX moves correctly; make difference between sse->integer and
integer->sse.
(ix86_builtin_vectorization_cost): Model correctly aligned and unaligned
moves; make difference between SSE and AVX.
* i386.h (processor_costs): Remove sse_move; add xmm_move, ymm_move
and zmm_move. Increase size of sse load and store tables;
add unaligned load and store tables; add ssemmx_to_integer.
* x86-tune-costs.h: Update all entries according to real 
move latencies from Agner Fog's manual and chip documentation.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 253982)
+++ config/i386/i386.c  (working copy)
@@ -1601,7 +1601,7 @@ dimode_scalar_chain::compute_convert_gai
   rtx dst = SET_DEST (def_set);
 
   if (REG_P (src) && REG_P (dst))
-   gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
+   gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
   else if (REG_P (src) && MEM_P (dst))
gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
   else if (MEM_P (src) && REG_P (dst))
@@ -38603,6 +38603,28 @@ ix86_can_change_mode_class (machine_mode
   return true;
 }
 
+/* Return index of MODE in the sse load/store tables.  */
+
+static inline int
+sse_store_index (machine_mode mode)
+{
+  switch (GET_MODE_SIZE (mode))
+   {
+ case 4:
+   return 0;
+ case 8:
+   return 1;
+ case 16:
+   return 2;
+ case 32:
+   return 3;
+ case 64:
+   return 4;
+ default:
+   return -1;
+   }
+}
+
 /* Return the cost of moving data of mode M between a
register and memory.  A value of 2 is the default; this cost is
relative to those in `REGISTER_MOVE_COST'.
@@ -38646,21 +38668,9 @@ inline_memory_move_cost (machine_mode mo
 }
   if (SSE_CLASS_P (regclass))
 {
-  int index;
-  switch (GET_MODE_SIZE (mode))
-   {
- case 4:
-   index = 0;
-   break;
- case 8:
-   index = 1;
-   break;
- case 16:
-   index = 2;
-   break;
- default:
-   return 100;
-   }
+  int index = sse_store_index (m

Zen tuning part 10: ix86_builtin_vectorization_cost fixes

2017-10-23 Thread Jan Hubicka

Hi,
this is patch to ix86_builtin_vectorization_cost I have comitted. Compared to
earlier version it only removes now unused fields in processor_costs.  The
patch improves facerec on all tested targets (amdfam10, Buldozer, Zen and
core), largest improvement is on Zen by about 25% (for core improvement is 5%).
It also improves gromacs and lbm (on Zen and core) and makes small regression
in gamess (sub 1%) and tonto (5-7%). Tonto is fixed by the scatter,gather patch
I plan as a followup.

There are also improvements for poyhedron (fatigue, fatigue2 6%, test_fpu 3%)

There is a hack of making unaligned store twice the cost of aligned to make it
similar to previous costmodel.  With this and the earlier fixes to move cost
tables it no longer causes regressions in testsuite with exception of
gcc.target/i386/pr79683.c where costmodel now claims that vectorization is not
profitable with generic (it is profitable i.e. for core) which seems corect:
struct s {
__INT64_TYPE__ a;
__INT64_TYPE__ b;
};
void test(struct s __seg_gs *x) {
x->a += 1;
x->b -= 1;
}

We model vector integer ops as more expensive then integer operations.
I disabled costmodel there.

The unaligned and avx costs will be fixed as followup.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* gcc.target/i386/pr79683.c: Disable costmodel.
* i386.c (ix86_builtin_vectorization_cost): Use existing rtx_cost
latencies instead of having separate table; make difference between
integer and float costs.
* i386.h (processor_costs): Remove scalar_stmt_cost,
scalar_load_cost, scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost,
scalar_to_vec_cost, vec_align_load_cost, vec_unalign_load_cost,
vec_store_cost.
* x86-tune-costs.h: Remove entries which has been removed in
procesor_costs from all tables; make cond_taken_branch_cost
and cond_not_taken_branch_cost COST_N_INSNS based.
Index: testsuite/gcc.target/i386/pr79683.c
===
--- testsuite/gcc.target/i386/pr79683.c (revision 253957)
+++ testsuite/gcc.target/i386/pr79683.c (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -msse2" } */
+/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */
 
 struct s {
 __INT64_TYPE__ a;
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 253957)
+++ config/i386/i386.c  (working copy)
@@ -44051,37 +44051,61 @@ static int
 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
  tree vectype, int)
 {
+  bool fp = false;
+  machine_mode mode = TImode;
+  if (vectype != NULL)
+{
+  fp = FLOAT_TYPE_P (vectype);
+  mode = TYPE_MODE (vectype);
+}
+
   switch (type_of_cost)
 {
   case scalar_stmt:
-return ix86_cost->scalar_stmt_cost;
+return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
 
   case scalar_load:
-return ix86_cost->scalar_load_cost;
+   /* load/store costs are relative to register move which is 2. Recompute
+  it to COSTS_N_INSNS so everything have same base.  */
+return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
+ : ix86_cost->int_load [2]) / 2;
 
   case scalar_store:
-return ix86_cost->scalar_store_cost;
+return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
+ : ix86_cost->int_store [2]) / 2;
 
   case vector_stmt:
-return ix86_cost->vec_stmt_cost;
+return ix86_vec_cost (mode,
+ fp ? ix86_cost->addss : ix86_cost->sse_op,
+ true);
 
   case vector_load:
-return ix86_cost->vec_align_load_cost;
+return ix86_vec_cost (mode,
+ COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2,
+ true);
 
   case vector_store:
-return ix86_cost->vec_store_cost;
+return ix86_vec_cost (mode,
+ COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2,
+ true);
 
   case vec_to_scalar:
-return ix86_cost->vec_to_scalar_cost;
-
   case scalar_to_vec:
-return ix86_cost->scalar_to_vec_cost;
+return ix86_vec_cost (mode, ix86_cost->sse_op, true);
 
+  /* We should have separate costs for unaligned loads and gather/scatter.
+Do that incrementally.  */
   case unaligned_load:
-  case unaligned_store:
   case vector_gather_load:
+return ix86_vec_cost (mode,
+ COSTS_N_INSNS (ix86_cost->sse_load[2]),
+ true);
+
+  case unaligned_store:
   case vector_scatter_store:
-return ix86_cost->vec_unalign_load_cost;
+return ix86_vec_cost (mode,
+ COSTS_N_INSNS (ix86_cost->sse_sto

[committed] Use scalar_mode in expand_shift_1

2017-10-23 Thread Richard Sandiford

Since this function handles scalar and vector shifts:

  machine_mode scalar_mode = mode;
  if (VECTOR_MODE_P (mode))
scalar_mode = GET_MODE_INNER (mode);

is equivalent to:

  scalar_mode = GET_MODE_INNER (mode);

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Applied as obvious.

Richard


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* expmed.c (expand_shift_1): Use scalar_mode for scalar_mode.

Index: gcc/expmed.c
===
--- gcc/expmed.c2017-09-23 10:27:39.925846365 +0100
+++ gcc/expmed.c2017-10-23 10:30:47.246081163 +0100
@@ -2337,12 +2337,10 @@ expand_shift_1 (enum tree_code code, mac
   optab lrotate_optab = rotl_optab;
   optab rrotate_optab = rotr_optab;
   machine_mode op1_mode;
-  machine_mode scalar_mode = mode;
+  scalar_mode scalar_mode = GET_MODE_INNER (mode);
   int attempt;
   bool speed = optimize_insn_for_speed_p ();
 
-  if (VECTOR_MODE_P (mode))
-scalar_mode = GET_MODE_INNER (mode);
   op1 = amount;
   op1_mode = GET_MODE (op1);

[committed] Use scalar_int/float_mode in brig_langhook_type_for_mode

2017-10-23 Thread Richard Sandiford

This follows on from similar changes a couple of months ago and
is needed when general modes have variable size.

Tested on x86_64-linux-gnu and applied as obvious.

Richard


2017-10-23  Richard Sandiford  

gcc/brig/
* brig-lang.c (brig_langhook_type_for_mode): Use scalar_int_mode
and scalar_float_mode.

Index: gcc/brig/brig-lang.c
===
--- gcc/brig/brig-lang.c2017-10-02 09:10:56.960755788 +0100
+++ gcc/brig/brig-lang.c2017-10-23 10:33:45.740101313 +0100
@@ -278,10 +278,11 @@ brig_langhook_type_for_mode (machine_mod
   return NULL_TREE;
 }
 
-  enum mode_class mc = GET_MODE_CLASS (mode);
-  if (mc == MODE_FLOAT)
+  scalar_int_mode imode;
+  scalar_float_mode fmode;
+  if (is_int_mode (mode, &imode))
 {
-  switch (GET_MODE_BITSIZE (mode))
+  switch (GET_MODE_BITSIZE (imode))
{
case 32:
  return float_type_node;
@@ -290,15 +291,15 @@ brig_langhook_type_for_mode (machine_mod
default:
  /* We have to check for long double in order to support
 i386 excess precision.  */
- if (mode == TYPE_MODE (long_double_type_node))
+ if (imode == TYPE_MODE (long_double_type_node))
return long_double_type_node;
 
  gcc_unreachable ();
  return NULL_TREE;
}
 }
-  else if (mc == MODE_INT)
-return brig_langhook_type_for_size(GET_MODE_BITSIZE(mode), unsignedp);
+  else if (is_float_mode (mode, &fmode))
+return brig_langhook_type_for_size (GET_MODE_BITSIZE (fmode), unsignedp);
   else
 {
   /* E.g., build_common_builtin_nodes () asks for modes/builtins

Re: [RFC] propagate malloc attribute in ipa-pure-const pass

2017-10-23 Thread Prathamesh Kulkarni

On 14 October 2017 at 03:20, Prathamesh Kulkarni
 wrote:
> On 7 October 2017 at 12:35, Prathamesh Kulkarni
>  wrote:
>> On 7 October 2017 at 11:23, Jan Hubicka  wrote:
 On 6 October 2017 at 06:04, Jan Hubicka  wrote:
 >> Hi Honza,
 >> Thanks for the detailed suggestions, I have updated the patch 
 >> accordingly.
 >> I have following questions on call_summary:
 >> 1] I added field bool is_return_callee in ipa_call_summary to track
 >> whether the caller possibly returns value returned by callee, which
 >> gets rid of return_callees_map. I assume ipa_call_summary_t::remove()
 >> and ipa_call_summary_t::duplicate() will already take care of handling
 >> late insertion/removal of cgraph nodes ? I just initialized
 >> is_return_callee to false in ipa_call_summary::reset and that seems to
 >> work. I am not sure though if I have handled it correctly. Could you
 >> please check that ?
 >
 > I was actually thinking to introduce separate summary for ipa-pure-const 
 > pass,
 > but this seems fine to me too (for one bit definitly more effecient)
 > ipa_call_summary_t::duplicate copies all the fields, so indeed you 
 > should be
 > safe here.
 >
 > Also it is possible for functions to be inserted late.  Updating of call 
 > summaries
 > is currently handled by ipa_fn_summary_t::insert
 >>
 >> 2] ipa_inline() called ipa_free_fn_summary, which made
 >> ipa_call_summaries unavailable during ipa-pure-const pass. I removed
 >> call to ipa_free_fn_summary from ipa_inline, and moved it to
 >> ipa_pure_const::execute(). Is that OK ?
 >
 > Seems OK to me.
 >>
 >> Patch passes bootstrap+test and lto bootstrap+test on 
 >> x86_64-unknown-linux-gnu.
 >> Verfiied SPEC2k6 compiles and runs without miscompares with LTO
 >> enabled on aarch64-linux-gnu.
 >> Cross-tested on arm*-*-* and aarch64*-*-*. I will additionally test
 >> the patch by building chromium or firefox.
 >> Would it be OK to commit if it passes above validations ?
 >>
 >> Thanks,
 >> Prathamesh
 >> >
 >> > Thanks,
 >> > Honza
 >
 >> 2017-10-05  Prathamesh Kulkarni  
 >>
 >>   * cgraph.h (set_malloc_flag): Declare.
 >>   * cgraph.c (set_malloc_flag_1): New function.
 >>   (set_malloc_flag): Likewise.
 >>   * ipa-fnsummary.h (ipa_call_summary): Add new field 
 >> is_return_callee.
 >>   * ipa-fnsummary.c (ipa_call_summary::reset): Set is_return_callee 
 >> to
 >>   false.
 >>   (read_ipa_call_summary): Add support for reading is_return_callee.
 >>   (write_ipa_call_summary): Stream is_return_callee.
 >>   * ipa-inline.c (ipa_inline): Remove call to ipa_free_fn_summary.
 >>   * ipa-pure-const.c: Add headers ssa.h, alloc-pool.h, 
 >> symbol-summary.h,
 >>   ipa-prop.h, ipa-fnsummary.h.
 >>   (malloc_state_e): Define.
 >>   (malloc_state_names): Define.
 >>   (funct_state_d): Add field malloc_state.
 >>   (varying_state): Set malloc_state to STATE_MALLOC_BOTTOM.
 >>   (check_retval_uses): New function.
 >>   (malloc_candidate_p): Likewise.
 >>   (analyze_function): Add support for malloc attribute.
 >>   (pure_const_write_summary): Stream malloc_state.
 >>   (pure_const_read_summary): Add support for reading malloc_state.
 >>   (dump_malloc_lattice): New function.
 >>   (propagate_malloc): New function.
 >>   (ipa_pure_const::execute): Call propagate_malloc and
 >>   ipa_free_fn_summary.
 >>   (pass_local_pure_const::execute): Add support for malloc 
 >> attribute.
 >>   * ssa-iterators.h (RETURN_FROM_IMM_USE_STMT): New macro.
 >>
 >> testsuite/
 >>   * gcc.dg/ipa/propmalloc-1.c: New test-case.
 >>   * gcc.dg/ipa/propmalloc-2.c: Likewise.
 >>   * gcc.dg/ipa/propmalloc-3.c: Likewise.
 >>
 >> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
 >> index 3d0cefbd46b..0aad90d59ea 100644
 >> --- a/gcc/cgraph.c
 >> +++ b/gcc/cgraph.c
 >> @@ -2530,6 +2530,53 @@ cgraph_node::set_nothrow_flag (bool nothrow)
 >>return changed;
 >>  }
 >>
 >> +/* Worker to set malloc flag.  */
 > New line here I guess (it is below)
 >> +static void
 >> +set_malloc_flag_1 (cgraph_node *node, bool malloc_p, bool *changed)
 >> +{
 >> +  if (malloc_p && !DECL_IS_MALLOC (node->decl))
 >> +{
 >> +  DECL_IS_MALLOC (node->decl) = true;
 >> +  *changed = true;
 >> +}
 >> +
 >> +  ipa_ref *ref;
 >> +  FOR_EACH_ALIAS (node, ref)
 >> +{
 >> +  cgraph_node *alias = dyn_cast (ref->referring);
 >> +  if (!malloc_p || alias->get_availability () > AVAIL_INTERPOSABLE)
 >> + set_malloc_flag_1 (alias, malloc_p, changed);
 >> +}
 >> +

Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing vec align tests.

2017-10-23 Thread Tamar Christina

Ping

From: Tamar Christina
Sent: Monday, October 16, 2017 11:17 AM
To: Christophe Lyon
Cc: Rainer Orth; gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard 
Earnshaw; Marcus Shawcroft
Subject: Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing vec 
align tests.

Hi All,

I've submitted a patch to fix this 
https://gcc.gnu.org/ml/gcc-patches/2017-10/msg00971.html

Permission (just as the new patch) to backport these test changes to
GCC 7 to fix the regressions there?

Thanks,
Tamar.

From: Christophe Lyon 
Sent: Friday, October 6, 2017 5:07:44 PM
To: Tamar Christina
Cc: Rainer Orth; gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard 
Earnshaw; Marcus Shawcroft
Subject: Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing vec 
align tests.

On 6 October 2017 at 09:45, Tamar Christina  wrote:
>
>
>> -Original Message-
>> From: Rainer Orth [mailto:r...@cebitec.uni-bielefeld.de]
>> Sent: 05 October 2017 20:16
>> To: Tamar Christina
>> Cc: gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw;
>> Marcus Shawcroft
>> Subject: Re: [PATCH][GCC][testsuite][mid-end][ARM][AARCH64] Fix failing
>> vec align tests.
>>
>> Hi Tamar,
>>
>> > Previously I had corrected the vect_hw_misalign check which prompted
>> > these three test to start failing because the condition needs to be
>> > inverted in the testcases.
>> >
>> > Regtested on aarch64-none-elf, arm-none-linux-gnueabihf and x86_64-pc-
>> linux-gnu.
>> >
>> > Ok for trunk?
>> >
>> > Thanks,
>> > Tamar.
>> >
>> > gcc/testsuite/
>> > 2017-10-02  Tamar Christina  
>> >
>> > * gcc.dg/vect/vect-align-1.c: Fix vect_hw_misalign condition.
>> > * gcc.dg/vect/vect-align-2.c: Likewise.
>> > * gcc.dg/vect/vect-multitypes-1.c: Likewise.
>>
>> unfortunately, your patch caused gcc.dg/vect/vect-multitypes-1.c to FAIL on
>> sparc-sun-solaris2.11 (32 and 64-bit):
>>
>> FAIL: gcc.dg/vect/vect-multitypes-1.c -flto -ffat-lto-objects  
>> scan-tree-dump-
>> times vect "Vectorizing an unaligned access" 4
>> FAIL: gcc.dg/vect/vect-multitypes-1.c scan-tree-dump-times vect
>> "Vectorizing an unaligned access" 4
>
> Thanks! I'll take a look.
>

If that's easier for you, I've noticed the same thing on
armeb-none-linux-gnueabihf
--with-mode arm
--with-cpu cortex-a9
--with-fpu neon-fp16

Christophe


> Tamar
>
>>
>> It had XFAILed before.
>>
>>   Rainer
>>
>> --
>> -
>> Rainer Orth, Center for Biotechnology, Bielefeld University

Re: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product for generic tests for ARM and AArch64 [Patch (7/8)]

2017-10-23 Thread Tamar Christina

Ping

From: Tamar Christina
Sent: Thursday, October 12, 2017 3:00:36 PM
To: Richard Earnshaw; James Greenhalgh
Cc: gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
Subject: RE: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product for 
generic tests for ARM and AArch64 [Patch (7/8)]

> -Original Message-
> From: Richard Earnshaw (lists) [mailto:richard.earns...@arm.com]
> Sent: 12 October 2017 14:21
> To: Tamar Christina; James Greenhalgh
> Cc: gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
> Subject: Re: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product
> for generic tests for ARM and AArch64 [Patch (7/8)]
>
> On 06/10/17 13:45, Tamar Christina wrote:
> > Hi All,
> >
> > this is a respin with the changes suggested. Note that this patch is no 8/8 
> > in
> the series.
> >
> > Regtested on arm-none-eabi, armeb-none-eabi, aarch64-none-elf and
> > aarch64_be-none-elf with no issues found.
> >
> > Ok for trunk?
> >
> > gcc/testsuite
> > 2017-10-06  Tamar Christina  
> >
> > * gcc.dg/vect/vect-reduc-dot-s8a.c
> > (dg-additional-options, dg-require-effective-target): Add +dotprod.
> > * gcc.dg/vect/vect-reduc-dot-u8a.c
> > (dg-additional-options, dg-require-effective-target): Add +dotprod.
> > 
> > From: Tamar Christina
> > Sent: Monday, September 4, 2017 12:35:39 PM
> > To: James Greenhalgh
> > Cc: gcc-patches@gcc.gnu.org; nd; Richard Earnshaw; Marcus Shawcroft
> > Subject: RE: [PATCH][GCC][Testsuite][ARM][AArch64] Enable Dot Product
> > for generic tests for ARM and AArch64 [Patch (7/8)]
> >
> >> I'm surprised that this worked!
> >>
> >> It looks like you unconditionally add the -march=armv8.2-a+dotprod
> >> options, which should cause you to generate instructions which will
> >> not execute on targets which don't support this instruction. As far
> >> as I can see, this is an execute test, so that should cause undefined
> >> instruction exceptions on an Armv8-A target at the very least.
> >
> > It's not, there is no dg-do specified, which means it defaults to "compile"
> > This is a straight compilation tests that checks to see if the target
> > can do the reduction. There may be a main, but it's never executed,
> > which is why I don't have a hardware check against it.
> >
> > The unconditional armv8.2+dotprod is for this reason. It doesn't matter
> what hardware.
> >
> >>
> >> So, not OK in its current form.
> >>
> >> Thanks,
> >> James
> >>
> >>>
> >>> Ok for trunk?
> >>>
> >>> gcc/testsuite
> >>> 2017-09-01  Tamar Christina  
> >>>
> >>> * gcc.dg/vect/vect-reduc-dot-s8a.c
> >>> (dg-additional-options, dg-require-effective-target): Add +dotprod.
> >>> * gcc.dg/vect/vect-reduc-dot-u8a.c
> >>> (dg-additional-options, dg-require-effective-target): Add +dotprod.
> >>>
> >>> --
> >
>
> iff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
> b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
> index
> dc4f52019d5435edbbc811b73dee0f98ff44c1b1..acb6862f8274fb954f69bd45e8
> edeedcdca4cbf7
> 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
> @@ -1,4 +1,7 @@
>  /* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target {
> aarch64*-*-* || arm*-*-* } } } */
>
> Why do you need hardware with dot-product if these are compile-only
> tests?  (presumably that's what the _hw at the end of the require means).

James was right in that vect.exp overrides the default from compile to run for 
these tests,
So they are execution tests.

>
> R.

Re: [PATCH][GCC][ARM][AArch64] Testsuite framework changes and execution tests [Patch (8/8)]

2017-10-23 Thread Tamar Christina

Ping

From: Tamar Christina
Sent: Friday, October 6, 2017 1:45:18 PM
To: Christophe Lyon
Cc: gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw; Marcus 
Shawcroft
Subject: Re: [PATCH][GCC][ARM][AArch64] Testsuite framework changes and 
execution tests [Patch (8/8)]

Hi All,

this is a minor respin of the patch with the comments addressed. Note this 
patch is now 7/8 in the series.

Regtested on arm-none-eabi, armeb-none-eabi,
aarch64-none-elf and aarch64_be-none-elf with no issues found.

Ok for trunk?

gcc/testsuite
2017-10-06  Tamar Christina  

* lib/target-supports.exp
(check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): New.
(check_effective_target_arm_v8_2a_dotprod_neon_ok): New.
(add_options_for_arm_v8_2a_dotprod_neon): New.
(check_effective_target_arm_v8_2a_dotprod_neon_hw): New.
(check_effective_target_vect_sdot_qi): New.
(check_effective_target_vect_udot_qi): New.
* gcc.target/arm/simd/vdot-exec.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New.
* gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon.

From: Tamar Christina
Sent: Monday, September 4, 2017 2:01:40 PM
To: Christophe Lyon
Cc: gcc-patches@gcc.gnu.org; nd; James Greenhalgh; Richard Earnshaw; Marcus 
Shawcroft
Subject: RE: [PATCH][GCC][ARM][AArch64] Testsuite framework changes and 
execution tests [Patch (8/8)]

Hi Christophe,

> >
> > gcc/testsuite
> > 2017-09-01  Tamar Christina  
> >
> > * lib/target-supports.exp
> > (check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache):
> New.
> > (check_effective_target_arm_v8_2a_dotprod_neon_ok): New.
> > (add_options_for_arm_v8_2a_dotprod_neon): New.
> > (check_effective_target_arm_v8_2a_dotprod_neon_hw): New.
> > (check_effective_target_vect_sdot_qi): New.
> > (check_effective_target_vect_udot_qi): New.
> > * gcc.target/arm/simd/vdot-exec.c: New.
>
> Aren't you defining twice P() and ARR() in vdot-exec.c ?
> I'd expect a preprocessor error, did I read too quickly?
>

Yes they are defined twice but they're not redefined, all the definitions
are exactly the same so the pre-processor doesn't care. I can leave only
one if this is confusing.

>
> Thanks,
>
> Christophe
>
> > * gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c: New.
> > * gcc/doc/sourcebuild.texi: Document arm_v8_2a_dotprod_neon.
> >
> > --

[committed] Use SCALAR_INT_TYPE_MODE in loc_list_from_tree_1

2017-10-23 Thread Richard Sandiford

This follows on from similar changes a couple of months ago and
is needed when general modes have variable size.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Applied as obvious.

Richard


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* dwarf2out.c (loc_list_from_tree_1): Use SCALAR_INT_TYPE_MODE

Index: gcc/dwarf2out.c
===
--- gcc/dwarf2out.c 2017-10-19 21:19:47.742454435 +0100
+++ gcc/dwarf2out.c 2017-10-23 10:36:59.967280171 +0100
@@ -17482,7 +17482,7 @@ loc_list_from_tree_1 (tree loc, int want
&& (INTEGRAL_TYPE_P (TREE_TYPE (loc))
|| POINTER_TYPE_P (TREE_TYPE (loc)))
&& DECL_CONTEXT (loc) == current_function_decl
-   && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (loc)))
+   && (GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (TREE_TYPE (loc)))
<= DWARF2_ADDR_SIZE))
  {
dw_die_ref ref = lookup_decl_die (loc);

Re: [PATCH][GCC][AArch64] Restrict lrint inlining on ILP32.

2017-10-23 Thread Tamar Christina

Ping.

Any objections to the patch?

From: gcc-patches-ow...@gcc.gnu.org  on behalf 
of Tamar Christina 
Sent: Monday, October 16, 2017 9:54:23 AM
To: gcc-patches@gcc.gnu.org
Cc: nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft; pins...@gmail.com
Subject: Re: [PATCH][GCC][AArch64] Restrict lrint inlining on ILP32.

Ping?

From: gcc-patches-ow...@gcc.gnu.org  on behalf 
of Tamar Christina 
Sent: Wednesday, September 13, 2017 4:00:24 PM
To: gcc-patches@gcc.gnu.org
Cc: nd; James Greenhalgh; Richard Earnshaw; Marcus Shawcroft; pins...@gmail.com
Subject: [PATCH][GCC][AArch64] Restrict lrint inlining on ILP32.

Hi All,

The inlining of lrint isn't valid in all cases on ILP32 when
-fno-math-errno is used because an inexact exception is raised in
certain circumstances.

Instead the restriction is placed such that the integer mode has to
be larger or equal to the float mode in addition to either inexacts being
allowed or not caring about trapping math.

This prevents the overflow, and the inexact errors that may arise.

Unfortunately I can't create a test for this as there is a bug where
the pattern is always passed DI as the smallest mode,
and later takes a sub-reg of it to SI. This would prevent an overflow
where one was expected.

This fixed PR/81800.

Regtested on aarch64-none-linux-gnu and no regressions.

Ok for trunk?

Thanks,
Tamar

gcc/
2017-09-13  Tamar Christina  

PR target/81800
* config/aarch64/aarch64.md (lrint2): Add 
flag_trapping_math
and flag_fp_int_builtin_inexact.

gcc/testsuite/
2017-09-13  Tamar Christina  

* gcc.target/aarch64/inline-lrint_2.c (dg-options): Add 
-fno-trapping-math.

--

Re: [PATCH][GCC][Testsuite][SPARC][ARM] Fix vect-multitypes-1.c test on SPARC64 and ARMEB.

2017-10-23 Thread Tamar Christina

Ping

From: gcc-patches-ow...@gcc.gnu.org  on behalf 
of Tamar Christina 
Sent: Monday, October 16, 2017 11:16:21 AM
To: gcc-patches@gcc.gnu.org
Cc: nd; Ramana Radhakrishnan; Richard Earnshaw; ni...@redhat.com; Kyrylo 
Tkachov; christophe.l...@linaro.org; r...@cebitec.uni-bielefeld.de
Subject: [PATCH][GCC][Testsuite][SPARC][ARM] Fix vect-multitypes-1.c test on 
SPARC64 and ARMEB.

Hi All,

This patch fixes a regression introduced by r253451.
The target needs all three conditions to be true before it can
vectorize unaligned accesses. This patch turns the erroneous ||
into an &&.

regtested on aarch64-none-elf, arm-none-linux-gnueabihf,
x86_64-pc-linux-gnu, armeb-none-linux-gnueabihf and
sparc64-unknown-linux-gnu.

OK for trunk?

And for the GCC-7 branch?

Thanks,
Tamar

gcc/testsuite/
2017-10-16  Tamar Christina  

* gcc.dg/vect/vect-multitypes-1.c: Correct target selector.

--

Use SCALAR_TYPE_MODE in vect_create_epilog_for_reduction

2017-10-23 Thread Richard Sandiford

This follows on from similar changes a couple of months ago and
is needed when general modes have variable size.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Applied as obvious.

Richard


2017-10-23  Richard Sandiford  

gcc/
* tree-vect-loop.c (vect_create_epilog_for_reduction): Use
SCALAR_TYPE_MODE instead of TYPE_MODE.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2017-10-22 21:04:50.136830154 +0100
+++ gcc/tree-vect-loop.c2017-10-23 10:39:37.711243373 +0100
@@ -4487,7 +4487,7 @@ vect_create_epilog_for_reduction (vec

PING Fwd: [patch] implement generic debug() for vectors and hash sets

2017-10-23 Thread Aldy Hernandez





 Forwarded Message 
Subject: [patch] implement generic debug() for vectors and hash sets
Date: Mon, 16 Oct 2017 09:52:51 -0400
From: Aldy Hernandez 
To: gcc-patches 

We have a generic mechanism for dumping types from the debugger with:

(gdb) call debug(some_type)

However, even though most types are implemented, we have no canonical
way of dumping vectors or hash sets.

The attached patch fixes this oversight.  With it you can call
debug(vec<>) and debug(hash_set<>) with the following types: rtx,
tree, basic_block, edge, rtx_insn.  More can be added simply by adding
a debug_slim(your_type) overload and calling:

  DEFINE_DEBUG_VEC (your_type)
  DEFINE_DEBUG_HASH_SET (your_type)

Here is an example of how things look with this patch:

vec of edges:
[0] =  10)>

vec of bbs:
[0] = 
[1] = 

vec of trees:
[0] =  
[1] =  
[2] =  

vec of rtx:
[0] = (reg:SI 87)
[1] = (reg:SI 87)

hash of bbs:



OK for mainline?

gcc/

	* vec.h (debug_helper): New function.
	(DEFINE_DEBUG_VEC): New macro.
	* hash-set.h (debug_helper): New function.
	(DEFINE_DEBUG_HASH_SET): New macro.
	* cfg.c (debug_slim (edge)): New function.
	Call DEFINE_DEBUG_VEC for edges.
	Call DEFINE_DEBUG_HASH_SET for edges.
	* cfghooks.c (debug_slim (basic_block)): New function.
	Call DEFINE_DEBUG_VEC for basic blocks.
	Call DEFINE_DEBUG_HASH_SET for basic blocks.
	* print-tree.c (debug_slim): New function to handle trees.
	Call DEFINE_DEBUG_VEC for trees.
	Call DEFINE_DEBUG_HASH_SET for trees.
	(debug (vec) &): Remove.
	(debug () *): Remove.
	* print-rtl.c (debug_slim): New function to handle const_rtx.
	Call DEFINE_DEBUG_VEC for rtx_def.
	Call DEFINE_DEBUG_VEC for rtx_insn.
	Call DEFINE_DEBUG_HASH_SET for rtx_def.
	Call DEFINE_DEBUG_HASH_SET for rtx_insn.
	* sel-sched-dump.c (debug (vec &): Remove.
	(debug (vec *ptr): Remove.
	(debug_insn_vector): Remove.
	* stor-layout.c (debug_rli): Call debug() instead of debug_vec_tree.

diff --git a/gcc/cfg.c b/gcc/cfg.c
index 01e68aeda51..4d02fb56cbf 100644
--- a/gcc/cfg.c
+++ b/gcc/cfg.c
@@ -573,6 +573,16 @@ debug (edge_def *ptr)
   else
 fprintf (stderr, "\n");
 }
+
+static void
+debug_slim (edge e)
+{
+  fprintf (stderr, " %d)>", (void *) e,
+	   e->src->index, e->dest->index);
+}
+
+DEFINE_DEBUG_VEC (edge)
+DEFINE_DEBUG_HASH_SET (edge)
 
 /* Simple routines to easily allocate AUX fields of basic blocks.  */
 
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
index 258a5eabf8d..73b196feec7 100644
--- a/gcc/cfghooks.c
+++ b/gcc/cfghooks.c
@@ -304,6 +304,14 @@ debug (basic_block_def *ptr)
 fprintf (stderr, "\n");
 }
 
+static void
+debug_slim (basic_block ptr)
+{
+  fprintf (stderr, "", (void *) ptr, ptr->index);
+}
+
+DEFINE_DEBUG_VEC (basic_block_def *)
+DEFINE_DEBUG_HASH_SET (basic_block_def *)
 
 /* Dumps basic block BB to pretty-printer PP, for use as a label of
a DOT graph record-node.  The implementation of this hook is
diff --git a/gcc/hash-set.h b/gcc/hash-set.h
index d2247d39571..58f7750243a 100644
--- a/gcc/hash-set.h
+++ b/gcc/hash-set.h
@@ -123,6 +123,44 @@ private:
   hash_table m_table;
 };
 
+/* Generic hash_set debug helper.
+
+   This needs to be instantiated for each hash_set used throughout
+   the compiler like this:
+
+DEFINE_DEBUG_HASH_SET (TYPE)
+
+   The reason we have a debug_helper() is because GDB can't
+   disambiguate a plain call to debug(some_hash), and it must be called
+   like debug(some_hash).  */
+template
+void
+debug_helper (hash_set &ref)
+{
+  for (typename hash_set::iterator it = ref.begin ();
+   it != ref.end (); ++it)
+{
+  debug_slim (*it);
+  fputc ('\n', stderr);
+}
+}
+
+#define DEFINE_DEBUG_HASH_SET(T) \
+  template static void debug_helper (hash_set &);	\
+  DEBUG_FUNCTION void	\
+  debug (hash_set &ref)\
+  {			\
+debug_helper  (ref);\
+  }			\
+  DEBUG_FUNCTION void	\
+  debug (hash_set *ptr)\
+  {			\
+if (ptr)		\
+  debug (*ptr);	\
+else		\
+  fprintf (stderr, "\n");			\
+  }
+
 /* ggc marking routines.  */
 
 template
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index 28d99862cad..5fe23801ab2 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -967,6 +967,23 @@ debug (const rtx_def *ptr)
 fprintf (stderr, "\n");
 }
 
+/* Like debug_rtx but with no newline, as debug_helper will add one.
+
+   Note: No debug_slim(rtx_insn *) variant implemented, as this
+   function can serve for both rtx and rtx_insn.  */
+
+static void
+debug_slim (const_rtx x)
+{
+  rtx_writer w (stderr, 0, false, false, NULL);
+  w.print_rtx (x);
+}
+
+DEFINE_DEBUG_VEC (rtx_def *)
+DEFINE_DEBUG_VEC (rtx_insn *)
+DEFINE_DEBUG_HASH_SET (rtx_def *)
+DEFINE_DEBUG_HASH_SET (rtx_insn *)
+
 /* Count of rtx's to print with debug_rtx_list.
This global exists because gdb user defined commands have no arguments.  */
 
diff --git a/gcc/print-tree.c b/gcc/print-tree.c
index d534c76ee49..3a0f85d4038 100644
--- a/gcc/print-tree.c
+++ b/gcc/print-tree.c
@@ -1095,32 +1095,6 @@

[committed] Convert STARTING_FRAME_OFFSET to a hook

2017-10-23 Thread Richard Sandiford

I took the documentation of the FRAME_GROWS_DOWNWARD behaviour from the
version that was in most header files, since the one in the manual seemed
less clear.

The patch deliberately keeps FIRST_PARM_OFFSET(FNDECL) in
microblaze_starting_frame_offset; this seems to be a port-local
convention and takes advantage of the fact that FIRST_PARM_OFFSET
doesn't read FNDECL.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Also tested on at least one target per CPU directory.  Commmitted as
pre-approved by Jeff here:
  https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00923.html

Richard


2017-10-23  Richard Sandiford  

gcc/
* target.def (starting_frame_offset): New hook.
* doc/tm.texi (STARTING_FRAME_OFFSET): Remove in favor of...
(TARGET_STARTING_FRAME_OFFSET): ...this new hook.
* doc/tm.texi.in: Regenerate.
* hooks.h (hook_hwi_void_0): Declare.
* hooks.c (hook_hwi_void_0): New function.
* doc/rtl.texi: Refer to TARGET_STARTING_FRAME_OFFSET instead of
STARTING_FRAME_OFFSET.
* builtins.c (expand_builtin_setjmp_receiver): Likewise.
* reload1.c (reload): Likewise.
* cfgexpand.c (expand_used_vars): Use targetm.starting_frame_offset
instead of STARTING_FRAME_OFFSET.
* function.c (try_fit_stack_local): Likewise.
(assign_stack_local_1): Likewise
(instantiate_virtual_regs): Likewise.
* rtlanal.c (rtx_addr_can_trap_p_1): Likewise.
* config/avr/avr.md (nonlocal_goto_receiver): Likewise.
* config/aarch64/aarch64.h (STARTING_FRAME_OFFSET): Delete.
* config/alpha/alpha.h (STARTING_FRAME_OFFSET): Likewise.
* config/arc/arc.h (STARTING_FRAME_OFFSET): Likewise.
* config/arm/arm.h (STARTING_FRAME_OFFSET): Likewise.
* config/bfin/bfin.h (STARTING_FRAME_OFFSET): Likewise.
* config/c6x/c6x.h (STARTING_FRAME_OFFSET): Likewise.
* config/cr16/cr16.h (STARTING_FRAME_OFFSET): Likewise.
* config/cris/cris.h (STARTING_FRAME_OFFSET): Likewise.
* config/fr30/fr30.h (STARTING_FRAME_OFFSET): Likewise.
* config/frv/frv.h (STARTING_FRAME_OFFSET): Likewise.
* config/ft32/ft32.h (STARTING_FRAME_OFFSET): Likewise.
* config/h8300/h8300.h (STARTING_FRAME_OFFSET): Likewise.
* config/i386/i386.h (STARTING_FRAME_OFFSET): Likewise.
* config/ia64/ia64.h (STARTING_FRAME_OFFSET): Likewise.
* config/m32c/m32c.h (STARTING_FRAME_OFFSET): Likewise.
* config/m68k/m68k.h (STARTING_FRAME_OFFSET): Likewise.
* config/mcore/mcore.h (STARTING_FRAME_OFFSET): Likewise.
* config/mn10300/mn10300.h (STARTING_FRAME_OFFSET): Likewise.
* config/moxie/moxie.h (STARTING_FRAME_OFFSET): Likewise.
* config/msp430/msp430.h (STARTING_FRAME_OFFSET): Likewise.
* config/nds32/nds32.h (STARTING_FRAME_OFFSET): Likewise.
* config/nios2/nios2.h (STARTING_FRAME_OFFSET): Likewise.
* config/nvptx/nvptx.h (STARTING_FRAME_OFFSET): Likewise.
* config/pdp11/pdp11.h (STARTING_FRAME_OFFSET): Likewise.
* config/riscv/riscv.h (STARTING_FRAME_OFFSET): Likewise.
* config/rl78/rl78.h (STARTING_FRAME_OFFSET): Likewise.
* config/rx/rx.h (STARTING_FRAME_OFFSET): Likewise.
* config/s390/s390.h (STARTING_FRAME_OFFSET): Likewise.
* config/sh/sh.h (STARTING_FRAME_OFFSET): Likewise.
* config/sparc/sparc.c (sparc_compute_frame_size): Likewise.
* config/sparc/sparc.h (STARTING_FRAME_OFFSET): Likewise.
* config/spu/spu.h (STARTING_FRAME_OFFSET): Likewise.
* config/stormy16/stormy16.h (STARTING_FRAME_OFFSET): Likewise.
* config/tilegx/tilegx.h (STARTING_FRAME_OFFSET): Likewise.
* config/tilepro/tilepro.h (STARTING_FRAME_OFFSET): Likewise.
* config/v850/v850.h (STARTING_FRAME_OFFSET): Likewise.
* config/visium/visium.h (STARTING_FRAME_OFFSET): Likewise.
* config/avr/avr.h (STARTING_FRAME_OFFSET): Likewise.
* config/avr/avr-protos.h (avr_starting_frame_offset): Likewise.
* config/avr/avr.c (avr_starting_frame_offset): Make static and
return a HOST_WIDE_INT.
(avr_builtin_setjmp_frame_value): Use it instead of
STARTING_FRAME_OFFSET.
(TARGET_STARTING_FRAME_OFFSET): Redefine.
* config/epiphany/epiphany.h (STARTING_FRAME_OFFSET): Delete.
* config/epiphany/epiphany.c (epiphany_starting_frame_offset):
New function.
(TARGET_STARTING_FRAME_OFFSET): Redefine.
* config/iq2000/iq2000.h (STARTING_FRAME_OFFSET): Delete.
* config/iq2000/iq2000.c (iq2000_starting_frame_offset): New function.
(TARGET_CONSTANT_ALIGNMENT): Redefine.
* config/lm32/lm32.h (STARTING_FRAME_OFFSET): Delete.
* config/lm32/lm32.c (lm32_starting_frame_offset): New function.
(TARGET_STARTING_FRAME_OFFSET): Redefine.
* config/m32r/m32r.h (STARTING_FRAME_OFFSET): Delete.

Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode

2017-10-23 Thread Jakub Jelinek

On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote:
> Hello!
> 
> In PR 82628 Jakub figured out that insn patterns that consume carry
> flag were not 100% correct. Due to this issue, combine is able to
> simplify various CC_REG propagations that result in invalid code.
> 
> Attached patch fixes (well, mitigates) the above problem by splitting
> the double-mode compare after the reload, in the same way other
> *_doubleword patterns are handled from "the beginning of the time".

I'm afraid this is going to haunt us sooner or later, combine isn't the
only pass that uses simplify-rtx.c infrastructure heavily and when we lie
in the RTL pattern, eventually something will be simplified wrongly.

So, at least we'd need to use UNSPEC for the pattern, like (only lightly
tested so far) below.

I'm not sure the double-word pattern is a win though, it causes PR82662
you've filed (the problem is that during ifcvt because of the double-word
comparison the condition is canonicalized as (lt (reg:TI) (reg:TI)) and
there is no instruction in the MD that would take such arguments, there
are only instructions that compare flags registers.
If you look at say normal DImode comparisons, it is the same thing,
ifcvt also can't do anything with those, the reason they work is that we
have a cstoredi4 optab (for 64-bit), but don't have a cstoreti4 optab.
So, we'd need that (and only handle the GE/GEU/LT/LTU + the others that can
be handled by swapping the operands).
I think the double-word pattern has other issues, it will result in RA not
knowing in detail what is going on and thus can at least reserve one extra
register that otherwise would not be needed.  The reason we have the
doubleword patterns elsewhere is that splitting double-word early makes it
harder/impossible for STV to use SSE registers; in this case we don't have
something reasonable to expand to anyway, we always split.

The alternative I have is the patch attached in the PR, if the unrelated
addcarry/subborrow changes are removed, then it doesn't regress anything,
the pr50038.c FAIL is from some other earlier change even on vanilla
branch and pr67317-* FAILs were caused by the addcarry/subborrow changes,
will look at those in detail.

2017-10-23  Jakub Jelinek  

PR target/82628
* config/i386/i386.md (UNSPEC_SBB): New unspec.
(cmp_doubleword): Use unspec instead of compare.
(sub3_carry_ccgz): Use unspec instead of compare.

--- gcc/config/i386/i386.md.jj  2017-10-23 10:13:05.462218947 +0200
+++ gcc/config/i386/i386.md 2017-10-23 11:07:55.470376791 +0200
@@ -112,6 +112,7 @@ (define_c_enum "unspec" [
   UNSPEC_STOS
   UNSPEC_PEEPSIB
   UNSPEC_INSN_FALSE_DEP
+  UNSPEC_SBB

   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -1285,11 +1286,10 @@ (define_insn_and_split "cmp_doublew
   [(set (reg:CC FLAGS_REG)
(compare:CC (match_dup 1) (match_dup 2)))
(parallel [(set (reg:CCGZ FLAGS_REG)
-  (compare: CCGZ
-(match_dup 4)
-(plus:DWIH
-  (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
-  (match_dup 5
+  (unspec:CCGZ [(match_dup 4)
+(match_dup 5)
+(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
+   UNSPEC_SBB))
  (clobber (match_dup 3))])]
   "split_double_mode (mode, &operands[0], 3, &operands[0], 
&operands[3]);")

@@ -6911,13 +6911,18 @@ (define_insn "*subsi3_carry_zext"
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])

+;; The sign flag is set from the
+;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
+;; result, the overflow flag likewise, but the overflow flag is also
+;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
+;; The borrow flag can be modelled, but differently from SF and OF
+;; and is quite difficult to handle.
 (define_insn "*sub3_carry_ccgz"
   [(set (reg:CCGZ FLAGS_REG)
-   (compare:CCGZ
- (match_operand:DWIH 1 "register_operand" "0")
- (plus:DWIH
-   (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
-   (match_operand:DWIH 2 "x86_64_general_operand" "rme"
+   (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
+ (match_operand:DWIH 2 "x86_64_general_operand" "rme")
+ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
+UNSPEC_SBB))
(clobber (match_scratch:DWIH 0 "=r"))]
   ""
   "sbb{}\t{%2, %0|%0, %2}"

Jakub

Re: [PATCH, RFC] Add a pass counter for "are we there yet" purposes

2017-10-23 Thread Martin Jambor

Hi,

On Mon, Oct 16, 2017 at 06:15:06PM +0200, Richard Biener wrote:
> I guess that might help. I have the feeling that querying for 'did
> pass X run' is wrong conceptually.

The reason why I liked the idea is that I could unify SRA and
early-SRA passes and their behavior would only differ according to a
"did pass_starg run yet" query.

Admittedly, it is not a big deal, I just always dislike typing
"-fdump-tree-esra-details -fdump-tree-sra-details" when debugging :-)

Martin

Re: [PATCH, RFC] Add a pass counter for "are we there yet" purposes

2017-10-23 Thread Jakub Jelinek

On Mon, Oct 23, 2017 at 12:18:58PM +0200, Martin Jambor wrote:
> Hi,
> 
> On Mon, Oct 16, 2017 at 06:15:06PM +0200, Richard Biener wrote:
> > I guess that might help. I have the feeling that querying for 'did
> > pass X run' is wrong conceptually.
> 
> The reason why I liked the idea is that I could unify SRA and
> early-SRA passes and their behavior would only differ according to a
> "did pass_starg run yet" query.
> 
> Admittedly, it is not a big deal, I just always dislike typing
> "-fdump-tree-esra-details -fdump-tree-sra-details" when debugging :-)

-fdump-tree-{,e}sra-details when using sane shell ;) ?

Jakub

Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode

2017-10-23 Thread Uros Bizjak

On Mon, Oct 23, 2017 at 12:09 PM, Jakub Jelinek  wrote:
> On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote:
>> Hello!
>>
>> In PR 82628 Jakub figured out that insn patterns that consume carry
>> flag were not 100% correct. Due to this issue, combine is able to
>> simplify various CC_REG propagations that result in invalid code.
>>
>> Attached patch fixes (well, mitigates) the above problem by splitting
>> the double-mode compare after the reload, in the same way other
>> *_doubleword patterns are handled from "the beginning of the time".
>
> I'm afraid this is going to haunt us sooner or later, combine isn't the
> only pass that uses simplify-rtx.c infrastructure heavily and when we lie
> in the RTL pattern, eventually something will be simplified wrongly.
>
> So, at least we'd need to use UNSPEC for the pattern, like (only lightly
> tested so far) below.

I agree with the above. Patterns that consume Carry flag are now
marked with (plus (ltu (...)), but effectively, they behave like
unspecs. So, I see no problem to change all SBB and ADC to unspec at
once, similar to the change you proposed in the patch.

> I'm not sure the double-word pattern is a win though, it causes PR82662
> you've filed (the problem is that during ifcvt because of the double-word
> comparison the condition is canonicalized as (lt (reg:TI) (reg:TI)) and
> there is no instruction in the MD that would take such arguments, there
> are only instructions that compare flags registers.

It is not a win, my patch was more of a band-aid to mitigate the
failure. It works, but it produces extra moves (as you mentione
below), due to RA not knowing that CMP doesn't clobber the register.
But, let's change the pattern back to expand-time splitting after the
above patch that changes SBB and ADC to unspecs is committed.

> If you look at say normal DImode comparisons, it is the same thing,
> ifcvt also can't do anything with those, the reason they work is that we
> have a cstoredi4 optab (for 64-bit), but don't have a cstoreti4 optab.
> So, we'd need that (and only handle the GE/GEU/LT/LTU + the others that can
> be handled by swapping the operands).
> I think the double-word pattern has other issues, it will result in RA not
> knowing in detail what is going on and thus can at least reserve one extra
> register that otherwise would not be needed.  The reason we have the
> doubleword patterns elsewhere is that splitting double-word early makes it
> harder/impossible for STV to use SSE registers; in this case we don't have
> something reasonable to expand to anyway, we always split.
>
> The alternative I have is the patch attached in the PR, if the unrelated
> addcarry/subborrow changes are removed, then it doesn't regress anything,
> the pr50038.c FAIL is from some other earlier change even on vanilla
> branch and pr67317-* FAILs were caused by the addcarry/subborrow changes,
> will look at those in detail.

I do have patch that allows double-mode for cstore, but it is not an
elegant solution. Splitting to SBB at expand time would be
considerably better.

Thanks,
Uros.

> 2017-10-23  Jakub Jelinek  
>
> PR target/82628
> * config/i386/i386.md (UNSPEC_SBB): New unspec.
> (cmp_doubleword): Use unspec instead of compare.
> (sub3_carry_ccgz): Use unspec instead of compare.
>
> --- gcc/config/i386/i386.md.jj  2017-10-23 10:13:05.462218947 +0200
> +++ gcc/config/i386/i386.md 2017-10-23 11:07:55.470376791 +0200
> @@ -112,6 +112,7 @@ (define_c_enum "unspec" [
>UNSPEC_STOS
>UNSPEC_PEEPSIB
>UNSPEC_INSN_FALSE_DEP
> +  UNSPEC_SBB
>
>;; For SSE/MMX support:
>UNSPEC_FIX_NOTRUNC
> @@ -1285,11 +1286,10 @@ (define_insn_and_split "cmp_doublew
>[(set (reg:CC FLAGS_REG)
> (compare:CC (match_dup 1) (match_dup 2)))
> (parallel [(set (reg:CCGZ FLAGS_REG)
> -  (compare: CCGZ
> -(match_dup 4)
> -(plus:DWIH
> -  (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
> -  (match_dup 5
> +  (unspec:CCGZ [(match_dup 4)
> +(match_dup 5)
> +(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
> +   UNSPEC_SBB))
>   (clobber (match_dup 3))])]
>"split_double_mode (mode, &operands[0], 3, &operands[0], 
> &operands[3]);")
>
> @@ -6911,13 +6911,18 @@ (define_insn "*subsi3_carry_zext"
> (set_attr "pent_pair" "pu")
> (set_attr "mode" "SI")])
>
> +;; The sign flag is set from the
> +;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
> +;; result, the overflow flag likewise, but the overflow flag is also
> +;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
> +;; The borrow flag can be modelled, but differently from SF and OF
> +;; and is quite difficult to handle.
>  (define_insn "*sub3_carry_ccgz"
>[(set (reg:CCGZ FLAGS_REG)
> -   (compare:CCGZ
> - (match_operan

Re: [Patch] Edit contrib/ files to download gfortran prerequisites

2017-10-23 Thread Richard Biener

On Sat, Oct 21, 2017 at 2:26 AM, Damian Rouson
 wrote:
>
> Hi Richard,
>
> Attached is a revised patch that makes the downloading of Fortran 
> prerequisites optional via a new --no-fortran flag that can be passed to 
> contrib/download_prerequisites as requested in your reply below.
>
> As Jerry mentioned in his response, he has been working on edits to the 
> top-level build machinery, but we need additional guidance to complete his 
> work.  Given that there were no responses to his request for guidance and 
> it’s not clear when that work will complete, I’m hoping this minor change can 
> be approved independently so that this patch doesn’t suffer bit rot in the 
> interim.

But the change doesn't make sense without the build actually picking up things.

> Ok for trunk?

No.

Thanks,
Richard.

> Damian
>
>
>
>
> On September 21, 2017 at 12:40:49 AM, Richard Biener 
> (richard.guent...@gmail.com(mailto:richard.guent...@gmail.com)) wrote:
>
>> On Wed, Sep 20, 2017 at 10:35 PM, Damian Rouson
>> wrote:
>> > Attached is a patch that adds the downloading of gfortran prerequisites 
>> > OpenCoarrays and MPICH in the contrib/download_prerequisites script. The 
>> > patch also provides a useful error message when neither wget or curl are 
>> > available on the target platform. I tested this patch with several choices 
>> > for the command-line options on macOS (including --md5 and --sha512) and 
>> > Ubuntu Linux (including --sha512). A suggested ChangeLog entry is
>> >
>> > * contrib/download_prerequisites: Download OpenCoarrays and MPICH.
>> > * contrib/prerequisites.sha5: Add sha512 message digests for OpenCoarrays 
>> > and MPICH.
>> > * contrib/prerequisites.md5: Add md5 message digests for OpenCoarrays and 
>> > MPICH.
>> >
>> >
>> > OK for trunk? If so, I’ll ask Jerry to commit this. I don’t have commit 
>> > rights.
>>
>> Can you make this optional similar to graphite/isl? Also I see no support in
>> the toplevel build machinery to build/install the libs as part of GCC
>> so how does
>> that work in the end?
>>
>> Thanks,
>> Richard.
>>
>> > Damian

Re: [RFC] New pragma exec_charset

2017-10-23 Thread Andreas Krebbel

On 10/19/2017 07:13 PM, Martin Sebor wrote:
> On 10/19/2017 09:50 AM, Andreas Krebbel wrote:
>> The TPF operating system uses the GCC S/390 backend.  They set an
>> EBCDIC exec charset for compilation using -fexec-charset.  However,
>> certain libraries require ASCII strings instead.  In order to be able
>> to put calls to that library into the normal code it is required to
>> switch the exec charset within a compilation unit.
>>
>> This is an attempt to implement it by adding a new pragma which could
>> be used like in the following example:
>>
>> int
>> foo ()
>> {
>>   call_with_utf8("hello world");
>>
>> #pragma GCC exec_charset("UTF16")
>>   call_with_utf16("hello world");
>>
>> #pragma GCC exec_charset(pop)
>>   call_with_utf8("hello world");
>> }
>>
>> Does this look reasonable?
> 
> I'm not an expert on this but at a high level it looks reasonable
> to me.  But based on some small amount of work I did in this area
> I have a couple of questions.
> 
> There are a few places in the compiler that already do or that
> should but don't yet handle different execution character sets.
> The former include built-ins like __bultin_isdigit() and
> __builtin_sprintf (in both builtins.c and gimple-ssa-sprintf.c)
> The latter is the -Wformat checking done by the C and C++ front
> ends.  The missing support for the latter is the subject of bug
> 38308.  According to bug 81686, LTO is apparently also missing
> support for exec-charset.

These probably are the areas Richard and Jakub were referring to as well?!  
These cases did not work
properly with the -fexec-charset cmdline option and this does not change with 
the pragma. I'll try
to look at what has been proposed in the discussion. Perhaps I can get it 
working somehow.

> I'm curious how the pragma might interact with these two areas,
> and whether the lack of support for it in the latter is a concern
> (and if not, why not).  For the former, I'm also wondering about
> the interaction of inlining and other interprocedural optimizations
> with the pragma.  Does it propagate through inlined calls as one
> would expect?

The pragma does not apply to the callees of a function defined under the pragma 
regardless of
whether it gets inlined or not.  That matches the behavior of other pragmas.  
If it would apply to
inlined callees the program semantics might change depending on optimization 
decisions i.e. whether
a certain call got inlined or not.

Callees marked as always_inline might be discussed separately. I remember this 
being a topic when
looking at function attributes.

Bye,

-Andreas-

Re: [patch 2/5] add hook to track when splitting is complete

2017-10-23 Thread Richard Biener

On Sat, Oct 21, 2017 at 11:17 PM, Sandra Loosemore
 wrote:
> On 10/20/2017 02:24 AM, Richard Biener wrote:
>>
>> On Fri, Oct 20, 2017 at 4:09 AM, Sandra Loosemore
>>  wrote:
>>>
>>> This patch adds a function to indicate whether the split1 pass has run
>>> yet.  This is used in part 3 of the patch set to decide whether 32-bit
>>> symbolic constant expressions are permitted, e.g. in
>>> TARGET_LEGITIMATE_ADDRESS_P and the movsi expander.
>>>
>>> Since there's currently no usable hook for querying the pass manager
>>> where it is relative to another pass, I implemented this using a
>>> target-specific pass that runs directly after split1 and does nothing
>>> but set a flag.
>>
>>
>> "Nice" hack ;)  The only currently existing way would be to add a property
>> to the IL state like
>>
>> const pass_data pass_data_split_all_insns =
>> {
>>RTL_PASS, /* type */
>>"split1", /* name */
>>OPTGROUP_NONE, /* optinfo_flags */
>>TV_NONE, /* tv_id */
>>0, /* properties_required */
>>PROP_rtl_split_insns, /* properties_provided */
>>0, /* properties_destroyed */
>>
>> and test that via cfun->curr_properties & PROP_rtl_split_insns
>>
>> Having run split might be a important enough change to warrant this.
>> Likewise reload_completed and reload_in_progress could be transitioned
>> to IL properties.
>>
>> Richard.
>
>
> Well, here's a new version of this patch that implements what you suggested
> above.  It's certainly simpler than the original version, or the WIP patch I
> posted before to add a general hook based on enumerating the passes.  Is
> this OK?

+#define PROP_rtl_split_insns   (1 << 17)   /* split1 completed.  */

/* RTL has insns split.  */

that is, do not mention 'split1' specifically.

Ok with that change.

Thanks,
Richard.

> -Sandra
>

[patch] Fix PR middle-end/82569

2017-10-23 Thread Eric Botcazou

Hi,

this is the regression present on the mainline for Power6 and introduced by my 
patch fiddling with SUBREG_PROMOTED_VAR_P in expand_expr_real_1.  It turns out 
that the ouf-of-ssa pass implicitly assumes that promoted RTXes for partitions 
are fully initialized (because it can generate direct moves in promoted mode) 
and clearing SUBREG_PROMOTED_VAR_P for some of them goes against this.

Therefore the attached patch goes in the opposite direction and initializes 
the RTXes for problematic partitions on function entry.  Surprisingly enough, 
this generates smaller code on average at -O2 for gcc.c-torture/compile:

 49 files changed, 1243 insertions(+), 1694 deletions(-)

probably because the compiler can now infer values on paths from entry where 
variables are uninitialized.  Tested on PowerPC64/Linux, OK for the mainline?


2017-10-23  Eric Botcazou  

PR middle-end/82569
* tree-outof-ssa.h (always_initialized_rtx_for_ssa_name_p): Delete.
* expr.c (expand_expr_real_1) : Revert latest change.
* loop-iv.c (iv_get_reaching_def): Likewise.
* cfgexpand.c (expand_one_ssa_partition): Initialize the RTX if the 
variable is promoted and the partition contains undefined values.

-- 
Eric BotcazouIndex: cfgexpand.c
===
--- cfgexpand.c	(revision 253968)
+++ cfgexpand.c	(working copy)
@@ -1391,10 +1391,18 @@ expand_one_ssa_partition (tree var)
 }
 
   machine_mode reg_mode = promote_ssa_mode (var, NULL);
-
   rtx x = gen_reg_rtx (reg_mode);
 
   set_rtl (var, x);
+
+  /* For a promoted variable, X will not be used directly but wrapped in a
+ SUBREG with SUBREG_PROMOTED_VAR_P set, which means that the RTL land
+ will assume that its upper bits can be inferred from its lower bits.
+ Therefore, if X isn't initialized on every path from the entry, then
+ we must do it manually in order to fulfill the above assumption.  */
+  if (reg_mode != TYPE_MODE (TREE_TYPE (var))
+  && bitmap_bit_p (SA.partitions_for_undefined_values, part))
+emit_move_insn (x, CONST0_RTX (reg_mode));
 }
 
 /* Record the association between the RTL generated for partition PART
Index: expr.c
===
--- expr.c	(revision 253968)
+++ expr.c	(working copy)
@@ -9912,43 +9912,24 @@ expand_expr_real_1 (tree exp, rtx target
 	  && GET_MODE (decl_rtl) != dmode)
 	{
 	  machine_mode pmode;
-	  bool always_initialized_rtx;
 
 	  /* Get the signedness to be used for this variable.  Ensure we get
 	 the same mode we got when the variable was declared.  */
 	  if (code != SSA_NAME)
-	{
-	  pmode = promote_decl_mode (exp, &unsignedp);
-	  always_initialized_rtx = true;
-	}
+	pmode = promote_decl_mode (exp, &unsignedp);
 	  else if ((g = SSA_NAME_DEF_STMT (ssa_name))
 		   && gimple_code (g) == GIMPLE_CALL
 		   && !gimple_call_internal_p (g))
-	{
-	  pmode = promote_function_mode (type, mode, &unsignedp,
-	gimple_call_fntype (g), 2);
-	  always_initialized_rtx
-		= always_initialized_rtx_for_ssa_name_p (ssa_name);
-	}
+	pmode = promote_function_mode (type, mode, &unsignedp,
+	   gimple_call_fntype (g),
+	   2);
 	  else
-	{
-	  pmode = promote_ssa_mode (ssa_name, &unsignedp);
-	  always_initialized_rtx
-		= always_initialized_rtx_for_ssa_name_p (ssa_name);
-	}
-
+	pmode = promote_ssa_mode (ssa_name, &unsignedp);
 	  gcc_assert (GET_MODE (decl_rtl) == pmode);
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
-
-	  /* We cannot assume anything about an existing extension if the
-	 register may contain uninitialized bits.  */
-	  if (always_initialized_rtx)
-	{
-	  SUBREG_PROMOTED_VAR_P (temp) = 1;
-	  SUBREG_PROMOTED_SET (temp, unsignedp);
-	}
-
+	  SUBREG_PROMOTED_VAR_P (temp) = 1;
+	  SUBREG_PROMOTED_SET (temp, unsignedp);
 	  return temp;
 	}
 
Index: loop-iv.c
===
--- loop-iv.c	(revision 253968)
+++ loop-iv.c	(working copy)
@@ -353,7 +353,7 @@ iv_get_reaching_def (rtx_insn *insn, rtx
   adef = DF_REF_CHAIN (use)->ref;
 
   /* We do not handle setting only part of the register.  */
-  if (DF_REF_FLAGS (adef) & (DF_REF_READ_WRITE | DF_REF_SUBREG))
+  if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE)
 return GRD_INVALID;
 
   def_insn = DF_REF_INSN (adef);
Index: tree-outof-ssa.h
===
--- tree-outof-ssa.h	(revision 253968)
+++ tree-outof-ssa.h	(working copy)
@@ -74,18 +74,6 @@ get_gimple_for_ssa_name (tree exp)
   return NULL;
 }
 
-/* Return whether the RTX expression representing the storage of the outof-SSA
-   partition that the SSA name EXP is a member of is always initialized.  */
-static inline bool
-always_initialized_rtx_for_ssa_name_p (tree exp)
-{
-  int p = partition_find (SA.map->var_partition, SSA_NAME_VERSION (exp));
-  if (SA.m

Re: [PATCH, RFC] Add a pass counter for "are we there yet" purposes

2017-10-23 Thread Richard Biener

On Mon, Oct 23, 2017 at 12:18 PM, Martin Jambor  wrote:
> Hi,
>
> On Mon, Oct 16, 2017 at 06:15:06PM +0200, Richard Biener wrote:
>> I guess that might help. I have the feeling that querying for 'did
>> pass X run' is wrong conceptually.
>
> The reason why I liked the idea is that I could unify SRA and
> early-SRA passes and their behavior would only differ according to a
> "did pass_starg run yet" query.

I think that "did pass_stdarg run yet" query isn't necessary anymore
given we don't lower va-arg during gimplification.

Richard.

> Admittedly, it is not a big deal, I just always dislike typing
> "-fdump-tree-esra-details -fdump-tree-sra-details" when debugging :-)
>
> Martin

Re: [patch] Fix PR middle-end/82569

2017-10-23 Thread Richard Biener

On Mon, Oct 23, 2017 at 12:57 PM, Eric Botcazou  wrote:
> Hi,
>
> this is the regression present on the mainline for Power6 and introduced by my
> patch fiddling with SUBREG_PROMOTED_VAR_P in expand_expr_real_1.  It turns out
> that the ouf-of-ssa pass implicitly assumes that promoted RTXes for partitions
> are fully initialized (because it can generate direct moves in promoted mode)
> and clearing SUBREG_PROMOTED_VAR_P for some of them goes against this.
>
> Therefore the attached patch goes in the opposite direction and initializes
> the RTXes for problematic partitions on function entry.  Surprisingly enough,
> this generates smaller code on average at -O2 for gcc.c-torture/compile:
>
>  49 files changed, 1243 insertions(+), 1694 deletions(-)
>
> probably because the compiler can now infer values on paths from entry where
> variables are uninitialized.  Tested on PowerPC64/Linux, OK for the mainline?

Ok.

Thanks,
Richard.


>
> 2017-10-23  Eric Botcazou  
>
> PR middle-end/82569
> * tree-outof-ssa.h (always_initialized_rtx_for_ssa_name_p): Delete.
> * expr.c (expand_expr_real_1) : Revert latest change.
> * loop-iv.c (iv_get_reaching_def): Likewise.
> * cfgexpand.c (expand_one_ssa_partition): Initialize the RTX if the
> variable is promoted and the partition contains undefined values.
>
> --
> Eric Botcazou

Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode

2017-10-23 Thread Jakub Jelinek

On Mon, Oct 23, 2017 at 12:27:15PM +0200, Uros Bizjak wrote:
> On Mon, Oct 23, 2017 at 12:09 PM, Jakub Jelinek  wrote:
> > On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote:
> >> Hello!
> >>
> >> In PR 82628 Jakub figured out that insn patterns that consume carry
> >> flag were not 100% correct. Due to this issue, combine is able to
> >> simplify various CC_REG propagations that result in invalid code.
> >>
> >> Attached patch fixes (well, mitigates) the above problem by splitting
> >> the double-mode compare after the reload, in the same way other
> >> *_doubleword patterns are handled from "the beginning of the time".
> >
> > I'm afraid this is going to haunt us sooner or later, combine isn't the
> > only pass that uses simplify-rtx.c infrastructure heavily and when we lie
> > in the RTL pattern, eventually something will be simplified wrongly.
> >
> > So, at least we'd need to use UNSPEC for the pattern, like (only lightly
> > tested so far) below.
> 
> I agree with the above. Patterns that consume Carry flag are now
> marked with (plus (ltu (...)), but effectively, they behave like
> unspecs. So, I see no problem to change all SBB and ADC to unspec at
> once, similar to the change you proposed in the patch.

So like this (addcarry/subborrow defered to a separate patch)?
Or do you want to use UNSPEC even for the unsigned comparison case,
i.e. from the patch remove the predicates.md/constraints.md part,
sub3_carry_ccc{,_1} and anything related to that?

As for addcarry/subborrow, the problem is that we expect in the pr67317*
tests that combine is able to notice that the CF setter sets CF to
unconditional 0 and matches the pattern.  With the patch I wrote
we end up with the combiner trying to match an insn where the CCC
is set from a TImode comparison:
(parallel [
(set (reg:CC 17 flags)
(compare:CC (zero_extend:TI (plus:DI (reg/v:DI 92 [ a ])
(reg/v:DI 94 [ c ])))
(zero_extend:TI (reg/v:DI 94 [ c ]
(set (reg:DI 98)
(plus:DI (reg/v:DI 92 [ a ])
(reg/v:DI 94 [ c ])))
])
So, either we need a define_insn_and_split pattern that would deal with
that (for UNSPEC it would be the same thing, have a define_insn_and_split
that would replace the (ltu...) with (const_int 0)), or perhaps be smarter
during expansion, if we see the first argument is constant 0, expand it
like a normal add instruction with CC setter.

2017-10-23  Jakub Jelinek  

PR target/82628
* config/i386/predicates.md (x86_64_dwzext_immediate_operand): New.
* config/i386/constraints.md (Wf): New constraint.
* config/i386/i386.md (UNSPEC_SBB): New unspec.
(cmp_doubleword): Removed.
(sub3_carry_ccc, *sub3_carry_ccc_1): New patterns.
(sub3_carry_ccgz): Use unspec instead of compare.
* config/i386/i386.c (ix86_expand_branch) : Don't
expand with cmp_doubleword.  For LTU and GEU use
sub3_carry_ccc instead of sub3_carry_ccgz and use CCCmode.

--- gcc/config/i386/predicates.md.jj2017-10-23 12:00:13.899355249 +0200
+++ gcc/config/i386/predicates.md   2017-10-23 12:52:20.696576114 +0200
@@ -366,6 +366,31 @@ (define_predicate "x86_64_hilo_int_opera
 }
 })

+;; Return true if VALUE is a constant integer whose value is
+;; x86_64_immediate_operand value zero extended from word mode to mode.
+(define_predicate "x86_64_dwzext_immediate_operand"
+  (match_code "const_int,const_wide_int")
+{
+  switch (GET_CODE (op))
+{
+case CONST_INT:
+  if (!TARGET_64BIT)
+   return UINTVAL (op) <= HOST_WIDE_INT_UC (0x);
+  return UINTVAL (op) <= HOST_WIDE_INT_UC (0x7fff);
+
+case CONST_WIDE_INT:
+  if (!TARGET_64BIT)
+   return false;
+  return (CONST_WIDE_INT_NUNITS (op) == 2
+ && CONST_WIDE_INT_ELT (op, 1) == 0
+ && (trunc_int_for_mode (CONST_WIDE_INT_ELT (op, 0), SImode)
+ == (HOST_WIDE_INT) CONST_WIDE_INT_ELT (op, 0)));
+
+default:
+  gcc_unreachable ();
+}
+})
+
 ;; Return true if size of VALUE can be stored in a sign
 ;; extended immediate field.
 (define_predicate "x86_64_immediate_size_operand"
--- gcc/config/i386/constraints.md.jj   2017-10-23 12:00:13.850355874 +0200
+++ gcc/config/i386/constraints.md  2017-10-23 12:52:20.697576102 +0200
@@ -332,6 +332,11 @@ (define_constraint "Wd"
of it satisfies the e constraint."
   (match_operand 0 "x86_64_hilo_int_operand"))

+(define_constraint "Wf"
+  "32-bit signed integer constant zero extended from word size
+   to double word size."
+  (match_operand 0 "x86_64_dwzext_immediate_operand"))
+
 (define_constraint "Z"
   "32-bit unsigned integer constant, or a symbolic reference known
to fit that range (for immediate operands in zero-extending x86-64
--- gcc/config/i386/i386.md.jj  2017-10-23 12:51:19.350356044 +0200
+++ gcc/config/i386/i386.md 2017-10-23 12:52:20.701576051 +0200
@@ -112,6 +112,7 @@ (define_c_enu

[PATCH] PR libstdc++/82644 document IS 29124 support

2017-10-23 Thread Jonathan Wakely


Also fix declarations of special functions in C++17, to import them into
the global namespace in , and to prevent defining the
non-standard hypergeometric functions in strict mode.

PR libstdc++/82644
* doc/xml/manual/intro.xml: Include new section.
* doc/xml/manual/status_cxxis29124.xml: New section on IS 29124
status.
* include/bits/specfun.h [__STRICT_ANSI__] (hyperg, hypergf, hypergl)
(conf_hyperg, conf_hypergf, conf_hypergl): Don't declare.
* include/c_compatibility/math.h: Import special functions into
global namespace for C++17.
* testsuite/26_numerics/headers/cmath/82644.cc: New test.
* testsuite/26_numerics/headers/cmath/functions_global_c++17.cc: New
test.

Tested powerpc64le-linux, committed to trunk. Backports to follow.

commit c755fb16187909923bcc6b7ecca318dfeecd2450
Author: Jonathan Wakely 
Date:   Mon Oct 23 11:18:47 2017 +0100

PR libstdc++/82644 document IS 29124 support

Also fix declarations of special functions in C++17, to import them into
the global namespace in , and to prevent defining the
non-standard hypergeometric functions in strict mode.

PR libstdc++/82644
* doc/xml/manual/intro.xml: Include new section.
* doc/xml/manual/status_cxxis29124.xml: New section on IS 29124
status.
* include/bits/specfun.h [__STRICT_ANSI__] (hyperg, hypergf, 
hypergl)
(conf_hyperg, conf_hypergf, conf_hypergl): Don't declare.
* include/c_compatibility/math.h: Import special functions into
global namespace for C++17.
* testsuite/26_numerics/headers/cmath/82644.cc: New test.
* testsuite/26_numerics/headers/cmath/functions_global_c++17.cc: New
test.

diff --git a/libstdc++-v3/doc/xml/manual/intro.xml 
b/libstdc++-v3/doc/xml/manual/intro.xml
index 3b243e57c8b..2df9c5fa6a7 100644
--- a/libstdc++-v3/doc/xml/manual/intro.xml
+++ b/libstdc++-v3/doc/xml/manual/intro.xml
@@ -50,6 +50,10 @@
 
 http://www.w3.org/2001/XInclude"; parse="xml" 
href="status_cxxtr24733.xml">
 
+
+
+http://www.w3.org/2001/XInclude"; parse="xml" 
href="status_cxxis29124.xml">
+
   
 
   
diff --git a/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml 
b/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml
new file mode 100644
index 000..40a90fc9944
--- /dev/null
+++ b/libstdc++-v3/doc/xml/manual/status_cxxis29124.xml
@@ -0,0 +1,315 @@
+http://docbook.org/ns/docbook"; version="5.0"
+xml:id="status.iso.specfun" xreflabel="Status C++ 29124">
+
+
+C++ IS 29124
+  
+ISO C++
+Special Functions
+  
+
+
+
+This table is based on the table of contents of ISO/IEC FDIS 29124
+Doc No: N3060 Date: 2010-03-06
+Extensions to the C++ Library to support mathematical special functions
+
+
+
+Complete support for IS 29124 is in GCC 6.1 and later releases, when using
+at least C++11 (for older releases or C++98/C++03 use TR1 instead).
+For C++11 and C++14 the additions to the library are not declared by their
+respective headers unless __STDCPP_WANT_MATH_SPEC_FUNCS__
+is defined as a macro that expands to a non-zero integer constant.
+For C++17 the special functions are always declared (since GCC 7.1).
+
+
+
+When the special functions are declared the macro
+__STDCPP_MATH_SPEC_FUNCS__ is defined to 201003L.
+
+
+
+In addition to the special functions defined in IS 29124, for
+non-strict modes (i.e. -std=gnu++NN modes) the
+hypergeometric functions and confluent hypergeometric functions
+from TR1 are also provided, defined in namespace
+__gnu_cxx.
+
+
+
+
+
+C++ Special Functions Implementation Status
+
+
+
+
+
+
+
+  
+
+  Section
+  Description
+  Status
+  Comments
+
+  
+  
+
+  
+  7
+  Macro names
+  Partial
+  No diagnostic for inconsistent definitions of
+ __STDCPP_WANT_MATH_SPEC_FUNCS__
+
+
+  8
+  Mathematical special functions
+  Y
+  
+
+
+  8.1
+  Additions to header  synopsis
+  Y
+  
+
+
+  8.1.1
+  associated Laguerre polynomials
+  Y
+  
+
+
+  8.1.2
+  associated Legendre functions
+  Y
+  
+
+
+  8.1.3
+  beta function
+  Y
+  
+
+
+  8.1.4
+  (complete) elliptic integral of the first kind
+  Y
+  
+
+
+  8.1.5
+  (complete) elliptic integral of the second kind
+  Y
+  
+
+
+  8.1.6
+  (complete) elliptic integral of the third kind
+  Y
+  
+
+
+  8.1.7
+  regular modified cylindrical Bessel functions
+  Y
+  
+
+
+  8.1.8
+  cylindrical Bessel functions (of the first kind)
+  Y
+  
+
+
+  8.1.9
+  irregular modified cylindrical Bessel functions
+  Y
+  
+
+
+  8.1.10
+  cylindrical Neumann functions
+  Y
+

[00/nn] Patches preparing for runtime offsets and sizes

2017-10-23 Thread Richard Sandiford

This series of patches adds or does things are needed for SVE
runtime offsets and sizes, but aren't directly related to offsets
and sizes themselves.  It's a prerequisite to the main series that
I'll post later today.

Tested by compiling the testsuite before and after the series on:

aarch64-linux-gnu aarch64_be-linux-gnu alpha-linux-gnu arc-elf
arm-linux-gnueabi arm-linux-gnueabihf avr-elf bfin-elf c6x-elf
cr16-elf cris-elf epiphany-elf fr30-elf frv-linux-gnu ft32-elf
h8300-elf hppa64-hp-hpux11.23 ia64-linux-gnu i686-pc-linux-gnu
i686-apple-darwin iq2000-elf lm32-elf m32c-elf m32r-elf
m68k-linux-gnu mcore-elf microblaze-elf mipsel-linux-gnu
mipsisa64-linux-gnu mmix mn10300-elf moxie-rtems msp430-elf
nds32le-elf nios2-linux-gnu nvptx-none pdp11 powerpc-linux-gnuspe
powerpc-eabispe powerpc64-linux-gnu powerpc64le-linux-gnu
powerpc-ibm-aix7.0 riscv32-elf riscv64-elf rl78-elf rx-elf
s390-linux-gnu s390x-linux-gnu sh-linux-gnu sparc-linux-gnu
sparc64-linux-gnu sparc-wrs-vxworks spu-elf tilegx-elf tilepro-elf
xstormy16-elf v850-elf vax-netbsdelf visium-elf x86_64-darwin
x86_64-linux-gnu xtensa-elf

There were no differences besides the ones described in the
covering notes (except on powerpc-ibm-aix7.0, where symbol names
aren't stable).

Also tested normally on aarch64-linux-gnu, x86_64-linux-gnu and
powerpc64le-linux-gnu.

Thanks,
Richard

[PATCH] Revert fix for PR81181

2017-10-23 Thread Richard Biener


The fix was subsumed by that for PR82129.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-10-23  Richard Biener  

PR tree-optimization/82129
Revert
2017-08-01  Richard Biener  

PR tree-optimization/81181
* tree-ssa-pre.c (compute_antic_aux): Defer clean() to ...
(compute_antic): ... end of iteration here.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 254004)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -2082,8 +2082,7 @@ static sbitmap has_abnormal_preds;
  ANTIC_OUT[BLOCK] = phi_translate (ANTIC_IN[succ(BLOCK)])
 
ANTIC_IN[BLOCK] = clean(ANTIC_OUT[BLOCK] U EXP_GEN[BLOCK] - TMP_GEN[BLOCK])
-
-   Note that clean() is deferred until after the iteration.  */
+*/
 
 static bool
 compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
@@ -2219,8 +2218,7 @@ compute_antic_aux (basic_block block, bo
 bitmap_value_insert_into_set (ANTIC_IN (block),
  expression_for_id (bii));
 
-  /* clean (ANTIC_IN (block)) is defered to after the iteration converged
- because it can cause non-convergence, see for example PR81181.  */
+  clean (ANTIC_IN (block));
 
   if (!bitmap_set_equal (old, ANTIC_IN (block)))
 changed = true;
@@ -2453,12 +2451,6 @@ compute_antic (void)
   gcc_checking_assert (num_iterations < 500);
 }
 
-  /* We have to clean after the dataflow problem converged as cleaning
- can cause non-convergence because it is based on expressions
- rather than values.  */
-  FOR_EACH_BB_FN (block, cfun)
-clean (ANTIC_IN (block));
-
   statistics_histogram_event (cfun, "compute_antic iterations",
  num_iterations);

[01/nn] Add gen_(const_)vec_duplicate helpers

2017-10-23 Thread Richard Sandiford

This patch adds helper functions for generating constant and
non-constant vector duplicates.  These routines help with SVE because
it is then easier to use:

   (const:M (vec_duplicate:M X))

for a broadcast of X, even if the number of elements in M isn't known
at compile time.  It also makes it easier for general rtx code to treat
constant and non-constant duplicates in the same way.

In the target code, the patch uses gen_vec_duplicate instead of
gen_rtx_VEC_DUPLICATE if handling constants correctly is potentially
useful.  It might be that some or all of the call sites only handle
non-constants in practice, in which case the change is a harmless
no-op (and a saving of a few characters).

Otherwise, the target changes use gen_const_vec_duplicate instead
of gen_rtx_CONST_VECTOR if the constant is obviously a duplicate.
They also include some changes to use CONSTxx_RTX for easy global
constants.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* emit-rtl.h (gen_const_vec_duplicate): Declare.
(gen_vec_duplicate): Likewise.
* emit-rtl.c (gen_const_vec_duplicate_1): New function, split
out from...
(gen_const_vector): ...here.
(gen_const_vec_duplicate, gen_vec_duplicate): New functions.
(gen_rtx_CONST_VECTOR): Use gen_const_vec_duplicate for constants
whose elements are all equal.
* optabs.c (expand_vector_broadcast): Use gen_const_vec_duplicate.
* simplify-rtx.c (simplify_const_unary_operation): Likewise.
(simplify_relational_operation): Likewise.
* config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup):
Likewise.
(aarch64_simd_dup_constant): Use gen_vec_duplicate.
(aarch64_expand_vector_init): Likewise.
* config/arm/arm.c (neon_vdup_constant): Likewise.
(neon_expand_vector_init): Likewise.
(arm_expand_vec_perm): Use gen_const_vec_duplicate.
(arm_block_set_unaligned_vect): Likewise.
(arm_block_set_aligned_vect): Likewise.
* config/arm/neon.md (neon_copysignf): Likewise.
* config/i386/i386.c (ix86_expand_vec_perm): Likewise.
(expand_vec_perm_even_odd_pack): Likewise.
(ix86_vector_duplicate_value): Use gen_vec_duplicate.
* config/i386/sse.md (one_cmpl2): Use CONSTM1_RTX.
* config/ia64/ia64.c (ia64_expand_vecint_compare): Use
gen_const_vec_duplicate.
* config/ia64/vect.md (addv2sf3, subv2sf3): Use CONST1_RTX.
* config/mips/mips.c (mips_gen_const_int_vector): Use
gen_const_vec_duplicate.
(mips_expand_vector_init): Use CONST0_RTX.
* config/powerpcspe/altivec.md (abs2, nabs2): Likewise.
(define_split): Use gen_const_vec_duplicate.
* config/rs6000/altivec.md (abs2, nabs2): Use CONST0_RTX.
(define_split): Use gen_const_vec_duplicate.
* config/s390/vx-builtins.md (vec_genmask): Likewise.
(vec_ctd_s64, vec_ctd_u64, vec_ctsl, vec_ctul): Likewise.
* config/spu/spu.c (spu_const): Likewise.

Index: gcc/emit-rtl.h
===
--- gcc/emit-rtl.h  2017-10-23 11:40:11.561479591 +0100
+++ gcc/emit-rtl.h  2017-10-23 11:41:32.369050264 +0100
@@ -438,6 +438,9 @@ get_max_uid (void)
   return crtl->emit.x_cur_insn_uid;
 }
 
+extern rtx gen_const_vec_duplicate (machine_mode, rtx);
+extern rtx gen_vec_duplicate (machine_mode, rtx);
+
 extern void set_decl_incoming_rtl (tree, rtx, bool);
 
 /* Return a memory reference like MEMREF, but with its mode changed
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  2017-10-23 11:41:25.541909864 +0100
+++ gcc/emit-rtl.c  2017-10-23 11:41:32.369050264 +0100
@@ -5756,32 +5756,60 @@ init_emit (void)
 #endif
 }
 
-/* Generate a vector constant for mode MODE and constant value CONSTANT.  */
+/* Like gen_const_vec_duplicate, but ignore const_tiny_rtx.  */
 
 static rtx
-gen_const_vector (machine_mode mode, int constant)
+gen_const_vec_duplicate_1 (machine_mode mode, rtx el)
 {
-  rtx tem;
-  rtvec v;
-  int units, i;
-  machine_mode inner;
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits);
+  for (int i = 0; i < nunits; ++i)
+RTVEC_ELT (v, i) = el;
+  return gen_rtx_raw_CONST_VECTOR (mode, v);
+}
 
-  units = GET_MODE_NUNITS (mode);
-  inner = GET_MODE_INNER (mode);
+/* Generate a vector constant of mode MODE in which every element has
+   value ELT.  */
 
-  gcc_assert (!DECIMAL_FLOAT_MODE_P (inner));
+rtx
+gen_const_vec_duplicate (machine_mode mode, rtx elt)
+{
+  scalar_mode inner_mode = GET_MODE_INNER (mode);
+  if (elt == CONST0_RTX (inner_mode))
+return CONST0_RTX (mode);
+  else if (elt == CONST1_RTX (inner_mode))
+return CONST1_RTX (mode);
+  else if (elt == CONSTM1_RTX (inner_mode))
+return CONSTM1_RTX (mode);
+
+  return gen_const_vec_duplicate_1 (mode, elt);
+}
 
-  v = rt

[02/nn] Add more vec_duplicate simplifications

2017-10-23 Thread Richard Sandiford

This patch adds a vec_duplicate_p helper that tests for constant
or non-constant vector duplicates.  Together with the existing
const_vec_duplicate_p, this complements the gen_vec_duplicate
and gen_const_vec_duplicate added by a previous patch.

The patch uses the new routines to add more rtx simplifications
involving vector duplicates.  These mirror simplifications that
we already do for CONST_VECTOR broadcasts and are needed for
variable-length SVE, which uses:

  (const:M (vec_duplicate:M X))

to represent constant broadcasts instead.  The simplifications do
trigger on the testsuite for variable duplicates too, and in each
case I saw the change was an improvement.  E.g.:

- Several targets had this simplification in gcc.dg/pr49948.c
  when compiled at -O3:

-Failed to match this instruction:
+Successfully matched this instruction:
 (set (reg:DI 88)
-(subreg:DI (vec_duplicate:V2DI (reg/f:DI 75 [ _4 ])) 0))
+(reg/f:DI 75 [ _4 ]))

  On aarch64 this gives:

ret
.p2align 2
 .L8:
+   adrpx1, b
sub sp, sp, #80
-   adrpx2, b
-   add x1, sp, 12
+   add x2, sp, 12
str wzr, [x0, #:lo12:a]
+   str x2, [x1, #:lo12:b]
mov w0, 0
-   dup v0.2d, x1
-   str d0, [x2, #:lo12:b]
add sp, sp, 80
ret
.size   foo, .-foo

  On x86_64:

jg  .L2
leaq-76(%rsp), %rax
movl$0, a(%rip)
-   movq%rax, -96(%rsp)
-   movq-96(%rsp), %xmm0
-   punpcklqdq  %xmm0, %xmm0
-   movq%xmm0, b(%rip)
+   movq%rax, b(%rip)
 .L2:
xorl%eax, %eax
ret

  etc.

- gcc.dg/torture/pr58018.c compiled at -O3 on aarch64 has an instance of:

 Trying 50, 52, 46 -> 53:
 Failed to match this instruction:
 (set (reg:V4SI 167)
-(and:V4SI (and:V4SI (vec_duplicate:V4SI (reg:SI 132 [ _165 ]))
-(reg:V4SI 209))
-(const_vector:V4SI [
-(const_int 1 [0x1])
-(const_int 1 [0x1])
-(const_int 1 [0x1])
-(const_int 1 [0x1])
-])))
+(and:V4SI (vec_duplicate:V4SI (reg:SI 132 [ _165 ]))
+(reg:V4SI 209)))
 Successfully matched this instruction:
 (set (reg:V4SI 163 [ vect_patt_16.14 ])
 (vec_duplicate:V4SI (reg:SI 132 [ _165 ])))
+Successfully matched this instruction:
+(set (reg:V4SI 167)
+(and:V4SI (reg:V4SI 163 [ vect_patt_16.14 ])
+(reg:V4SI 209)))

  where (reg:SI 132) is the result of a scalar comparison and so
  is known to be 0 or 1.  This saves a MOVI and vector AND:

cmp w7, 4
bls .L15
dup v1.4s, w2
-   lsr w2, w1, 2
+   dup v2.4s, w6
moviv3.4s, 0
-   mov w0, 0
-   moviv2.4s, 0x1
+   lsr w2, w1, 2
mvniv0.4s, 0
+   mov w0, 0
cmgev1.4s, v1.4s, v3.4s
and v1.16b, v2.16b, v1.16b
-   dup v2.4s, w6
-   and v1.16b, v1.16b, v2.16b
.p2align 3
 .L7:
and v0.16b, v0.16b, v1.16b

- powerpc64le has many instances of things like:

-Failed to match this instruction:
+Successfully matched this instruction:
 (set (reg:V4SI 161 [ vect_cst__24 ])
-(vec_select:V4SI (vec_duplicate:V4SI (vec_select:SI (reg:V4SI 143)
-(parallel [
-(const_int 0 [0])
-])))
-(parallel [
-(const_int 2 [0x2])
-(const_int 3 [0x3])
-(const_int 0 [0])
-(const_int 1 [0x1])
-])))
+(vec_duplicate:V4SI (vec_select:SI (reg:V4SI 143)
+(parallel [
+(const_int 0 [0])
+]

  This removes redundant XXPERMDIs from many tests.

The best way of testing the new simplifications seemed to be
via selftests.  The patch cribs part of David's patch here:
https://gcc.gnu.org/ml/gcc-patches/2016-07/msg00270.html .


2017-10-23  Richard Sandiford  
David Malcolm  
Alan Hayward  
David Sherwood  

gcc/
* rtl.h (vec_duplicate_p): New function.
* selftest-rtl.c (assert_rtx_eq_at): New function.
* selftest-rtl.h (ASSERT_RTX_EQ): New macro.
(assert_rtx_eq_at): Declare.
* selftest.h (selftest::simplify_rtx_c_tests): Declare.
* selftest-run-tests.c (selftest::run_tests): Call it.
* simplify-rtx.c: Include selftest.h and selftest-rtl.h.
(simplify_unary_operation_1): Recursively handle vector duplicates.
(simplify_binary_operation_1): Likewise.  Handle VEC_SELECTs of
vector dupl

[03/nn] Allow vector CONSTs

2017-10-23 Thread Richard Sandiford

This patch allows (const ...) wrappers to be used for rtx vector
constants, as an alternative to const_vector.  This is useful
for SVE, where the number of elements isn't known until runtime.

It could also be useful in future for fixed-length vectors, to
reduce the amount of memory needed to represent simple constants
with high element counts.  However, one nice thing about keeping
it restricted to variable-length vectors is that there is never
any need to handle combinations of (const ...) and CONST_VECTOR.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* doc/rtl.texi (const): Update description of address constants.
Say that vector constants are allowed too.
* common.md (E, F): Use CONSTANT_P instead of checking for
CONST_VECTOR.
* emit-rtl.c (gen_lowpart_common): Use const_vec_p instead of
checking for CONST_VECTOR.
* expmed.c (make_tree): Use build_vector_from_val for a CONST
VEC_DUPLICATE.
* expr.c (expand_expr_real_2): Check for vector modes instead
of checking for CONST_VECTOR.
* rtl.h (const_vec_p): New function.
(const_vec_duplicate_p): Check for a CONST VEC_DUPLICATE.
(unwrap_const_vec_duplicate): Handle them here too.


Index: gcc/doc/rtl.texi
===
--- gcc/doc/rtl.texi2017-10-23 11:41:22.176892260 +0100
+++ gcc/doc/rtl.texi2017-10-23 11:41:39.185050437 +0100
@@ -1667,14 +1667,17 @@ Usually that is the only mode for which
 
 @findex const
 @item (const:@var{m} @var{exp})
-Represents a constant that is the result of an assembly-time
-arithmetic computation.  The operand, @var{exp}, is an expression that
-contains only constants (@code{const_int}, @code{symbol_ref} and
-@code{label_ref} expressions) combined with @code{plus} and
-@code{minus}.  However, not all combinations are valid, since the
-assembler cannot do arbitrary arithmetic on relocatable symbols.
+Wraps an rtx computation @var{exp} whose inputs and result do not
+change during the execution of a thread.  There are two valid uses.
+The first is to represent a global or thread-local address calculation.
+In this case @var{exp} should contain @code{const_int},
+@code{symbol_ref}, @code{label_ref} or @code{unspec} expressions,
+combined with @code{plus} and @code{minus}.  Any such @code{unspec}s
+are target-specific and typically represent some form of relocation
+operator.  @var{m} should be a valid address mode.
 
-@var{m} should be @code{Pmode}.
+The second use of @code{const} is to wrap a vector operation.
+In this case @var{exp} must be a @code{vec_duplicate} expression.
 
 @findex high
 @item (high:@var{m} @var{exp})
Index: gcc/common.md
===
--- gcc/common.md   2017-10-23 11:40:11.431285821 +0100
+++ gcc/common.md   2017-10-23 11:41:39.184050436 +0100
@@ -80,14 +80,14 @@ (define_constraint "n"
 (define_constraint "E"
   "Matches a floating-point constant."
   (ior (match_test "CONST_DOUBLE_AS_FLOAT_P (op)")
-   (match_test "GET_CODE (op) == CONST_VECTOR
+   (match_test "CONSTANT_P (op)
&& GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_FLOAT")))
 
 ;; There is no longer a distinction between "E" and "F".
 (define_constraint "F"
   "Matches a floating-point constant."
   (ior (match_test "CONST_DOUBLE_AS_FLOAT_P (op)")
-   (match_test "GET_CODE (op) == CONST_VECTOR
+   (match_test "CONSTANT_P (op)
&& GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_FLOAT")))
 
 (define_constraint "X"
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  2017-10-23 11:41:32.369050264 +0100
+++ gcc/emit-rtl.c  2017-10-23 11:41:39.186050437 +0100
@@ -1470,7 +1470,7 @@ gen_lowpart_common (machine_mode mode, r
return gen_rtx_fmt_e (GET_CODE (x), int_mode, XEXP (x, 0));
 }
   else if (GET_CODE (x) == SUBREG || REG_P (x)
-  || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR
+  || GET_CODE (x) == CONCAT || const_vec_p (x)
   || CONST_DOUBLE_AS_FLOAT_P (x) || CONST_SCALAR_INT_P (x))
 return lowpart_subreg (mode, x, innermode);
 
Index: gcc/expmed.c
===
--- gcc/expmed.c2017-10-23 11:41:25.541909864 +0100
+++ gcc/expmed.c2017-10-23 11:41:39.186050437 +0100
@@ -5246,7 +5246,15 @@ make_tree (tree type, rtx x)
   return fold_convert (type, make_tree (t, XEXP (x, 0)));
 
 case CONST:
-  return make_tree (type, XEXP (x, 0));
+  {
+   rtx op = XEXP (x, 0);
+   if (GET_CODE (op) == VEC_DUPLICATE)
+ {
+   tree elt_tree = make_tree (TREE_TYPE (type), XEXP (op, 0));
+   return build_vector_from_val (type, elt_tree);
+ }
+   return make_tree (type, op);
+  }
 
 case SYMBOL_REF:

[04/nn] Add a VEC_SERIES rtl code

2017-10-23 Thread Richard Sandiford

This patch adds an rtl representation of a vector linear series
of the form:

  a[I] = BASE + I * STEP

Like vec_duplicate;

- the new rtx can be used for both constant and non-constant vectors
- when used for constant vectors it is wrapped in a (const ...)
- the constant form is only used for variable-length vectors;
  fixed-length vectors still use CONST_VECTOR

At the moment the code is restricted to integer elements, to avoid
concerns over floating-point rounding.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* doc/rtl.texi (vec_series): Document.
(const): Say that the operand can be a vec_series.
* rtl.def (VEC_SERIES): New rtx code.
* rtl.h (const_vec_series_p_1): Declare.
(const_vec_series_p): New function.
* emit-rtl.h (gen_const_vec_series): Declare.
(gen_vec_series): Likewise.
* emit-rtl.c (const_vec_series_p_1, gen_const_vec_series)
(gen_vec_series): Likewise.
* optabs.c (expand_mult_highpart): Use gen_const_vec_series.
* simplify-rtx.c (simplify_unary_operation): Handle negations
of vector series.
(simplify_binary_operation_series): New function.
(simplify_binary_operation_1): Use it.  Handle VEC_SERIES.
(test_vector_ops_series): New function.
(test_vector_ops): Call it.
* config/powerpcspe/altivec.md (altivec_lvsl): Use
gen_const_vec_series.
(altivec_lvsr): Likewise.
* config/rs6000/altivec.md (altivec_lvsl, altivec_lvsr): Likewise.

Index: gcc/doc/rtl.texi
===
--- gcc/doc/rtl.texi2017-10-23 11:41:39.185050437 +0100
+++ gcc/doc/rtl.texi2017-10-23 11:41:41.547050496 +0100
@@ -1677,7 +1677,8 @@ are target-specific and typically repres
 operator.  @var{m} should be a valid address mode.
 
 The second use of @code{const} is to wrap a vector operation.
-In this case @var{exp} must be a @code{vec_duplicate} expression.
+In this case @var{exp} must be a @code{vec_duplicate} or
+@code{vec_series} expression.
 
 @findex high
 @item (high:@var{m} @var{exp})
@@ -2722,6 +2723,10 @@ the same submodes as the input vector mo
 number of output parts must be an integer multiple of the number of input
 parts.
 
+@findex vec_series
+@item (vec_series:@var{m} @var{base} @var{step})
+This operation creates a vector in which element @var{i} is equal to
+@samp{@var{base} + @var{i}*@var{step}}.  @var{m} must be a vector integer mode.
 @end table
 
 @node Conversions
Index: gcc/rtl.def
===
--- gcc/rtl.def 2017-10-23 11:40:11.378243915 +0100
+++ gcc/rtl.def 2017-10-23 11:41:41.549050496 +0100
@@ -710,6 +710,11 @@ DEF_RTL_EXPR(VEC_CONCAT, "vec_concat", "
an integer multiple of the number of input parts.  */
 DEF_RTL_EXPR(VEC_DUPLICATE, "vec_duplicate", "e", RTX_UNARY)
 
+/* Creation of a vector in which element I has the value BASE + I * STEP,
+   where BASE is the first operand and STEP is the second.  The result
+   must have a vector integer mode.  */
+DEF_RTL_EXPR(VEC_SERIES, "vec_series", "ee", RTX_BIN_ARITH)
+
 /* Addition with signed saturation */
 DEF_RTL_EXPR(SS_PLUS, "ss_plus", "ee", RTX_COMM_ARITH)
 
Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-10-23 11:41:39.188050437 +0100
+++ gcc/rtl.h   2017-10-23 11:41:41.549050496 +0100
@@ -2816,6 +2816,51 @@ unwrap_const_vec_duplicate (T x)
   return x;
 }
 
+/* In emit-rtl.c.  */
+extern bool const_vec_series_p_1 (const_rtx, rtx *, rtx *);
+
+/* Return true if X is a constant vector that contains a linear series
+   of the form:
+
+   { B, B + S, B + 2 * S, B + 3 * S, ... }
+
+   for a nonzero S.  Store B and S in *BASE_OUT and *STEP_OUT on sucess.  */
+
+inline bool
+const_vec_series_p (const_rtx x, rtx *base_out, rtx *step_out)
+{
+  if (GET_CODE (x) == CONST_VECTOR
+  && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
+return const_vec_series_p_1 (x, base_out, step_out);
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == VEC_SERIES)
+{
+  *base_out = XEXP (XEXP (x, 0), 0);
+  *step_out = XEXP (XEXP (x, 0), 1);
+  return true;
+}
+  return false;
+}
+
+/* Return true if X is a vector that contains a linear series of the
+   form:
+
+   { B, B + S, B + 2 * S, B + 3 * S, ... }
+
+   where B and S are constant or nonconstant.  Store B and S in
+   *BASE_OUT and *STEP_OUT on sucess.  */
+
+inline bool
+vec_series_p (const_rtx x, rtx *base_out, rtx *step_out)
+{
+  if (GET_CODE (x) == VEC_SERIES)
+{
+  *base_out = XEXP (x, 0);
+  *step_out = XEXP (x, 1);
+  return true;
+}
+  return const_vec_series_p (x, base_out, step_out);
+}
+
 /* Return the unpromoted (outer) mode of SUBREG_PROMOTED_VAR_P subreg X.  */
 
 inline scalar_int_mode
Index: gcc/emit-rtl.h
==

[05/nn] Add VEC_DUPLICATE_{CST,EXPR} and associated optab

2017-10-23 Thread Richard Sandiford

SVE needs a way of broadcasting a scalar to a variable-length vector.
This patch adds VEC_DUPLICATE_CST for when VECTOR_CST would be used for
fixed-length vectors and VEC_DUPLICATE_EXPR for when CONSTRUCTOR would
be used for fixed-length vectors.  VEC_DUPLICATE_EXPR is the tree
equivalent of the existing rtl code VEC_DUPLICATE.

Originally we had a single VEC_DUPLICATE_EXPR and used TREE_CONSTANT
to mark constant nodes, but in response to last year's RFC, Richard B.
suggested it would be better to have separate codes for the constant
and non-constant cases.  This allows VEC_DUPLICATE_EXPR to be treated
as a normal unary operation and avoids the previous need for treating
it as a GIMPLE_SINGLE_RHS.

It might make sense to use VEC_DUPLICATE_CST for all duplicated
vector constants, since it's a bit more compact than VECTOR_CST
in that case, and is potentially more efficient to process.
However, the nice thing about keeping it restricted to variable-length
vectors is that there is then no need to handle combinations of
VECTOR_CST and VEC_DUPLICATE_CST; a vector type will always use
VECTOR_CST or never use it.

The patch also adds a vec_duplicate_optab to go with VEC_DUPLICATE_EXPR.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* doc/generic.texi (VEC_DUPLICATE_CST, VEC_DUPLICATE_EXPR): Document.
(VEC_COND_EXPR): Add missing @tindex.
* doc/md.texi (vec_duplicate@var{m}): Document.
* tree.def (VEC_DUPLICATE_CST, VEC_DUPLICATE_EXPR): New tree codes.
* tree-core.h (tree_base): Document that u.nelts and TREE_OVERFLOW
are used for VEC_DUPLICATE_CST as well.
(tree_vector): Access base.n.nelts directly.
* tree.h (TREE_OVERFLOW): Add VEC_DUPLICATE_CST to the list of
valid codes.
(VEC_DUPLICATE_CST_ELT): New macro.
(build_vec_duplicate_cst): Declare.
* tree.c (tree_node_structure_for_code, tree_code_size, tree_size)
(integer_zerop, integer_onep, integer_all_onesp, integer_truep)
(real_zerop, real_onep, real_minus_onep, add_expr, initializer_zerop)
(walk_tree_1, drop_tree_overflow): Handle VEC_DUPLICATE_CST.
(build_vec_duplicate_cst): New function.
(uniform_vector_p): Handle the new codes.
(test_vec_duplicate_predicates_int): New function.
(test_vec_duplicate_predicates_float): Likewise.
(test_vec_duplicate_predicates): Likewise.
(tree_c_tests): Call test_vec_duplicate_predicates.
* cfgexpand.c (expand_debug_expr): Handle the new codes.
* tree-pretty-print.c (dump_generic_node): Likewise.
* dwarf2out.c (rtl_for_decl_init): Handle VEC_DUPLICATE_CST.
* gimple-expr.h (is_gimple_constant): Likewise.
* gimplify.c (gimplify_expr): Likewise.
* graphite-isl-ast-to-gimple.c
(translate_isl_ast_to_gimple::is_constant): Likewise.
* graphite-scop-detection.c (scan_tree_for_params): Likewise.
* ipa-icf-gimple.c (func_checker::compare_cst_or_decl): Likewise.
(func_checker::compare_operand): Likewise.
* ipa-icf.c (sem_item::add_expr, sem_variable::equals): Likewise.
* match.pd (negate_expr_p): Likewise.
* print-tree.c (print_node): Likewise.
* tree-chkp.c (chkp_find_bounds_1): Likewise.
* tree-loop-distribution.c (const_with_all_bytes_same): Likewise.
* tree-ssa-loop.c (for_each_index): Likewise.
* tree-ssa-pre.c (create_component_ref_by_pieces_1): Likewise.
* tree-ssa-sccvn.c (copy_reference_ops_from_ref): Likewise.
(ao_ref_init_from_vn_reference): Likewise.
* tree-vect-generic.c (ssa_uniform_vector_p): Likewise.
* varasm.c (const_hash_1, compare_constant): Likewise.
* fold-const.c (negate_expr_p, fold_negate_expr_1, const_binop)
(fold_convert_const, operand_equal_p, fold_view_convert_expr)
(exact_inverse, fold_checksum_tree): Likewise.
(const_unop): Likewise.  Fold VEC_DUPLICATE_EXPRs of a constant.
(test_vec_duplicate_folding): New function.
(fold_const_c_tests): Call it.
* optabs.def (vec_duplicate_optab): New optab.
* optabs-tree.c (optab_for_tree_code): Handle VEC_DUPLICATE_EXPR.
* optabs.h (expand_vector_broadcast): Declare.
* optabs.c (expand_vector_broadcast): Make non-static.  Try using
vec_duplicate_optab.
* expr.c (store_constructor): Try using vec_duplicate_optab for
uniform vectors.
(const_vector_element): New function, split out from...
(const_vector_from_tree): ...here.
(expand_expr_real_2): Handle VEC_DUPLICATE_EXPR.
(expand_expr_real_1): Handle VEC_DUPLICATE_CST.
* internal-fn.c (expand_vector_ubsan_overflow): Use CONSTANT_P
instead of checking for VECTOR_CST.
* tree-cfg.c (verify_gimple_assign_unary): Handle VEC_DUPLICATE_EXPR.
(verify_gimple_assign_single): Handle VEC_DUPLI

[06/nn] Add VEC_SERIES_{CST,EXPR} and associated optab

2017-10-23 Thread Richard Sandiford

Similarly to the VEC_DUPLICATE_{CST,EXPR}, this patch adds two
tree code equivalents of the VEC_SERIES rtx code.  VEC_SERIES_EXPR
is for non-constant inputs and is a normal tcc_binary.  VEC_SERIES_CST
is a tcc_constant.

Like VEC_DUPLICATE_CST, VEC_SERIES_CST is only used for variable-length
vectors.  This avoids the need to handle combinations of VECTOR_CST
and VEC_SERIES_CST.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* doc/generic.texi (VEC_SERIES_CST, VEC_SERIES_EXPR): Document.
* doc/md.texi (vec_series@var{m}): Document.
* tree.def (VEC_SERIES_CST, VEC_SERIES_EXPR): New tree codes.
* tree.h (TREE_OVERFLOW): Add VEC_SERIES_CST to the list of valid
codes.
(VEC_SERIES_CST_BASE, VEC_SERIES_CST_STEP): New macros.
(build_vec_series_cst, build_vec_series): Declare.
* tree.c (tree_node_structure_for_code, tree_code_size, tree_size)
(add_expr, walk_tree_1, drop_tree_overflow): Handle VEC_SERIES_CST.
(build_vec_series_cst, build_vec_series): New functions.
* cfgexpand.c (expand_debug_expr): Handle the new codes.
* tree-pretty-print.c (dump_generic_node): Likewise.
* dwarf2out.c (rtl_for_decl_init): Handle VEC_SERIES_CST.
* gimple-expr.h (is_gimple_constant): Likewise.
* gimplify.c (gimplify_expr): Likewise.
* graphite-scop-detection.c (scan_tree_for_params): Likewise.
* ipa-icf-gimple.c (func_checker::compare_cst_or_decl): Likewise.
(func_checker::compare_operand): Likewise.
* ipa-icf.c (sem_item::add_expr, sem_variable::equals): Likewise.
* print-tree.c (print_node): Likewise.
* tree-ssa-loop.c (for_each_index): Likewise.
* tree-ssa-pre.c (create_component_ref_by_pieces_1): Likewise.
* tree-ssa-sccvn.c (copy_reference_ops_from_ref): Likewise.
(ao_ref_init_from_vn_reference): Likewise.
* varasm.c (const_hash_1, compare_constant): Likewise.
* fold-const.c (negate_expr_p, fold_negate_expr_1, operand_equal_p)
(fold_checksum_tree): Likewise.
(vec_series_equivalent_p): New function.
(const_binop): Use it.  Fold VEC_SERIES_EXPRs of constants.
* expmed.c (make_tree): Handle VEC_SERIES.
* gimple-pretty-print.c (dump_binary_rhs): Likewise.
* tree-inline.c (estimate_operator_cost): Likewise.
* expr.c (const_vector_element): Include VEC_SERIES_CST in comment.
(expand_expr_real_2): Handle VEC_SERIES_EXPR.
(expand_expr_real_1): Handle VEC_SERIES_CST.
* optabs.def (vec_series_optab): New optab.
* optabs.h (expand_vec_series_expr): Declare.
* optabs.c (expand_vec_series_expr): New function.
* optabs-tree.c (optab_for_tree_code): Handle VEC_SERIES_EXPR.
* tree-cfg.c (verify_gimple_assign_binary): Handle VEC_SERIES_EXPR.
(verify_gimple_assign_single): Handle VEC_SERIES_CST.
* tree-vect-generic.c (expand_vector_operations_1): Check that
the operands also have vector type.

Index: gcc/doc/generic.texi
===
--- gcc/doc/generic.texi2017-10-23 11:41:51.760448406 +0100
+++ gcc/doc/generic.texi2017-10-23 11:42:34.910720660 +0100
@@ -1037,6 +1037,7 @@ As this example indicates, the operands
 @tindex COMPLEX_CST
 @tindex VECTOR_CST
 @tindex VEC_DUPLICATE_CST
+@tindex VEC_SERIES_CST
 @tindex STRING_CST
 @findex TREE_STRING_LENGTH
 @findex TREE_STRING_POINTER
@@ -1098,6 +1099,16 @@ instead.  The scalar element value is gi
 @code{VEC_DUPLICATE_CST_ELT} and has the same restrictions as the
 element of a @code{VECTOR_CST}.
 
+@item VEC_SERIES_CST
+These nodes represent a vector constant in which element @var{i}
+has the value @samp{@var{base} + @var{i} * @var{step}}, for some
+constant @var{base} and @var{step}.  The value of @var{base} is
+given by @code{VEC_SERIES_CST_BASE} and the value of @var{step} is
+given by @code{VEC_SERIES_CST_STEP}.
+
+These nodes are restricted to integral types, in order to avoid
+specifying the rounding behavior for floating-point types.
+
 @item STRING_CST
 These nodes represent string-constants.  The @code{TREE_STRING_LENGTH}
 returns the length of the string, as an @code{int}.  The
@@ -1702,6 +1713,7 @@ a value from @code{enum annot_expr_kind}
 @node Vectors
 @subsection Vectors
 @tindex VEC_DUPLICATE_EXPR
+@tindex VEC_SERIES_EXPR
 @tindex VEC_LSHIFT_EXPR
 @tindex VEC_RSHIFT_EXPR
 @tindex VEC_WIDEN_MULT_HI_EXPR
@@ -1721,6 +1733,14 @@ a value from @code{enum annot_expr_kind}
 This node has a single operand and represents a vector in which every
 element is equal to that operand.
 
+@item VEC_SERIES_EXPR
+This node represents a vector formed from a scalar base and step,
+given as the first and second operands respectively.  Element @var{i}
+of the result is equal to @samp{@var{base} + @var{i}*@var{step}}.
+
+This node is restricted to integra

[08/nn] Add a fixed_size_mode class

2017-10-23 Thread Richard Sandiford

This patch adds a fixed_size_mode machine_mode wrapper
for modes that are known to have a fixed size.  That applies
to all current modes, but future patches will add support for
variable-sized modes.

The use of this class should be pretty restricted.  One important
use case is to hold the mode of static data, which can never be
variable-sized with current file formats.  Another is to hold
the modes of registers involved in __builtin_apply and
__builtin_result, since those interfaces don't cope well with
variable-sized data.

The class can also be useful when reinterpreting the contents of
a fixed-length bit string as a different kind of value.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* machmode.h (fixed_size_mode): New class.
* rtl.h (get_pool_mode): Return fixed_size_mode.
* gengtype.c (main): Add fixed_size_mode.
* target.def (get_raw_result_mode): Return a fixed_size_mode.
(get_raw_arg_mode): Likewise.
* doc/tm.texi: Regenerate.
* targhooks.h (default_get_reg_raw_mode): Return a fixed_size_mode.
* targhooks.c (default_get_reg_raw_mode): Likewise.
* config/ia64/ia64.c (ia64_get_reg_raw_mode): Likewise.
* config/mips/mips.c (mips_get_reg_raw_mode): Likewise.
* config/msp430/msp430.c (msp430_get_raw_arg_mode): Likewise.
(msp430_get_raw_result_mode): Likewise.
* config/avr/avr-protos.h (regmask): Use as_a 
* dbxout.c (dbxout_parms): Require fixed-size modes.
* expr.c (copy_blkmode_from_reg, copy_blkmode_to_reg): Likewise.
* gimple-ssa-store-merging.c (encode_tree_to_bitpos): Likewise.
* omp-low.c (lower_oacc_reductions): Likewise.
* simplify-rtx.c (simplify_immed_subreg): Take fixed_size_modes.
(simplify_subreg): Update accordingly.
* varasm.c (constant_descriptor_rtx::mode): Change to fixed_size_mode.
(force_const_mem): Update accordingly.  Return NULL_RTX for modes
that aren't fixed-size.
(get_pool_mode): Return a fixed_size_mode.
(output_constant_pool_2): Take a fixed_size_mode.

Index: gcc/machmode.h
===
--- gcc/machmode.h  2017-09-15 14:47:33.184331588 +0100
+++ gcc/machmode.h  2017-10-23 11:42:52.014721093 +0100
@@ -652,6 +652,39 @@ GET_MODE_2XWIDER_MODE (const T &m)
 extern const unsigned char mode_complex[NUM_MACHINE_MODES];
 #define GET_MODE_COMPLEX_MODE(MODE) ((machine_mode) mode_complex[MODE])
 
+/* Represents a machine mode that must have a fixed size.  The main
+   use of this class is to represent the modes of objects that always
+   have static storage duration, such as constant pool entries.
+   (No current target supports the concept of variable-size static data.)  */
+class fixed_size_mode
+{
+public:
+  typedef mode_traits::from_int from_int;
+
+  ALWAYS_INLINE fixed_size_mode () {}
+  ALWAYS_INLINE fixed_size_mode (from_int m) : m_mode (machine_mode (m)) {}
+  ALWAYS_INLINE fixed_size_mode (const scalar_mode &m) : m_mode (m) {}
+  ALWAYS_INLINE fixed_size_mode (const scalar_int_mode &m) : m_mode (m) {}
+  ALWAYS_INLINE fixed_size_mode (const scalar_float_mode &m) : m_mode (m) {}
+  ALWAYS_INLINE fixed_size_mode (const scalar_mode_pod &m) : m_mode (m) {}
+  ALWAYS_INLINE fixed_size_mode (const scalar_int_mode_pod &m) : m_mode (m) {}
+  ALWAYS_INLINE fixed_size_mode (const complex_mode &m) : m_mode (m) {}
+  ALWAYS_INLINE operator machine_mode () const { return m_mode; }
+
+  static bool includes_p (machine_mode);
+
+protected:
+  machine_mode m_mode;
+};
+
+/* Return true if MODE has a fixed size.  */
+
+inline bool
+fixed_size_mode::includes_p (machine_mode)
+{
+  return true;
+}
+
 extern opt_machine_mode mode_for_size (unsigned int, enum mode_class, int);
 
 /* Return the machine mode to use for a MODE_INT of SIZE bits, if one
Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-10-23 11:42:47.297720974 +0100
+++ gcc/rtl.h   2017-10-23 11:42:52.015721094 +0100
@@ -3020,7 +3020,7 @@ extern rtx force_const_mem (machine_mode
 struct function;
 extern rtx get_pool_constant (const_rtx);
 extern rtx get_pool_constant_mark (rtx, bool *);
-extern machine_mode get_pool_mode (const_rtx);
+extern fixed_size_mode get_pool_mode (const_rtx);
 extern rtx simplify_subtraction (rtx);
 extern void decide_function_section (tree);
 
Index: gcc/gengtype.c
===
--- gcc/gengtype.c  2017-05-23 19:29:56.919436344 +0100
+++ gcc/gengtype.c  2017-10-23 11:42:52.014721093 +0100
@@ -5197,6 +5197,7 @@ #define POS_HERE(Call) do { pos.file = t
   POS_HERE (do_scalar_typedef ("JCF_u2", &pos));
   POS_HERE (do_scalar_typedef ("void", &pos));
   POS_HERE (do_scalar_typedef ("machine_mode", &pos));
+  POS_HERE (do_scalar_typedef ("fixed_size_mode", &pos));
   POS_HER

[07/nn] Add unique CONSTs

2017-10-23 Thread Richard Sandiford

This patch adds a way of treating certain kinds of CONST as unique,
so that pointer equality is equivalent to value equality.  For now it
is restricted to VEC_DUPLICATE and VEC_SERIES, although the code to
generate them remains in the else arm of an "if (1)" until a later
patch.

This is needed so that (const (vec_duplicate xx)) can used as the
CONSTxx_RTX of a variable-length vector.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* rtl.h (unique_const_p): New function.
(gen_rtx_CONST): Declare.
* emit-rtl.c (const_hasher): New struct.
(const_htab): New variable.
(init_emit_once): Initialize it.
(const_hasher::hash, const_hasher::equal): New functions.
(gen_rtx_CONST): New function.
(spare_vec_duplicate, spare_vec_series): New variables.
(gen_const_vec_duplicate_1): Add code for use (const (vec_duplicate)),
but disable it for now.
(gen_const_vec_series): Likewise (const (vec_series)).
* gengenrtl.c (special_rtx): Return true for CONST.
* rtl.c (shared_const_p): Return true if unique_const_p.

Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-10-23 11:41:41.549050496 +0100
+++ gcc/rtl.h   2017-10-23 11:42:47.297720974 +0100
@@ -2861,6 +2861,23 @@ vec_series_p (const_rtx x, rtx *base_out
   return const_vec_series_p (x, base_out, step_out);
 }
 
+/* Return true if there should only ever be one instance of (const X),
+   so that constants of this type can be compared using pointer equality.  */
+
+inline bool
+unique_const_p (const_rtx x)
+{
+  switch (GET_CODE (x))
+{
+case VEC_DUPLICATE:
+case VEC_SERIES:
+  return true;
+
+default:
+  return false;
+}
+}
+
 /* Return the unpromoted (outer) mode of SUBREG_PROMOTED_VAR_P subreg X.  */
 
 inline scalar_int_mode
@@ -3542,6 +3559,7 @@ extern rtx_insn_list *gen_rtx_INSN_LIST
 gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn,
  basic_block bb, rtx pattern, int location, int code,
  rtx reg_notes);
+extern rtx gen_rtx_CONST (machine_mode, rtx);
 extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT);
 extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec);
 extern void set_mode_and_regno (rtx, machine_mode, unsigned int);
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  2017-10-23 11:41:41.548050496 +0100
+++ gcc/emit-rtl.c  2017-10-23 11:42:47.296720974 +0100
@@ -175,6 +175,15 @@ struct const_fixed_hasher : ggc_cache_pt
 
 static GTY ((cache)) hash_table *const_fixed_htab;
 
+/* A hash table storing unique CONSTs.  */
+struct const_hasher : ggc_cache_ptr_hash
+{
+  static hashval_t hash (rtx x);
+  static bool equal (rtx x, rtx y);
+};
+
+static GTY ((cache)) hash_table *const_htab;
+
 #define cur_insn_uid (crtl->emit.x_cur_insn_uid)
 #define cur_debug_insn_uid (crtl->emit.x_cur_debug_insn_uid)
 #define first_label_num (crtl->emit.x_first_label_num)
@@ -310,6 +319,28 @@ const_fixed_hasher::equal (rtx x, rtx y)
   return fixed_identical (CONST_FIXED_VALUE (a), CONST_FIXED_VALUE (b));
 }
 
+/* Returns a hash code for X (which is either an existing unique CONST
+   or an operand to gen_rtx_CONST).  */
+
+hashval_t
+const_hasher::hash (rtx x)
+{
+  if (GET_CODE (x) == CONST)
+x = XEXP (x, 0);
+
+  int do_not_record_p = 0;
+  return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false);
+}
+
+/* Returns true if the operand of unique CONST X is equal to Y.  */
+
+bool
+const_hasher::equal (rtx x, rtx y)
+{
+  gcc_checking_assert (GET_CODE (x) == CONST);
+  return rtx_equal_p (XEXP (x, 0), y);
+}
+
 /* Return true if the given memory attributes are equal.  */
 
 bool
@@ -5756,16 +5787,55 @@ init_emit (void)
 #endif
 }
 
+rtx
+gen_rtx_CONST (machine_mode mode, rtx val)
+{
+  if (unique_const_p (val))
+{
+  /* Look up the CONST in the hash table.  */
+  rtx *slot = const_htab->find_slot (val, INSERT);
+  if (*slot == 0)
+   *slot = gen_rtx_raw_CONST (mode, val);
+  return *slot;
+}
+
+  return gen_rtx_raw_CONST (mode, val);
+}
+
+/* Temporary rtx used by gen_const_vec_duplicate_1.  */
+static GTY((deletable)) rtx spare_vec_duplicate;
+
 /* Like gen_const_vec_duplicate, but ignore const_tiny_rtx.  */
 
 static rtx
 gen_const_vec_duplicate_1 (machine_mode mode, rtx el)
 {
   int nunits = GET_MODE_NUNITS (mode);
-  rtvec v = rtvec_alloc (nunits);
-  for (int i = 0; i < nunits; ++i)
-RTVEC_ELT (v, i) = el;
-  return gen_rtx_raw_CONST_VECTOR (mode, v);
+  if (1)
+{
+  rtvec v = rtvec_alloc (nunits);
+
+  for (int i = 0; i < nunits; ++i)
+   RTVEC_ELT (v, i) = el;
+
+  return gen_rtx_raw_CONST_VECTOR (mode, v);
+}
+  else
+{
+  if (spare_vec_duplicate)
+   {
+ PUT_MODE (spare_vec_duplicate, mode);
+ XEXP (spare_vec_duplicate, 0) = el;
+

Some PRE TLC

2017-10-23 Thread Richard Biener


Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2017-10-23  Richard Biener  

* tree-ssa-pre.c (bitmap_remove_from_set): Rename to...
(bitmap_remove_expr_from_set): ... this.  All callers call this
for non-constant values.
(bitmap_set_subtract): Rename to...
(bitmap_set_subtract_expressions): ... this.  Adjust and
optimize.
(bitmap_set_contains_value): Remove superfluous check.
(bitmap_set_replace_value): Inline into single caller ...
(bitmap_value_replace_in_set): ... here and simplify.
(dependent_clean): Merge into ...
(clean): ... this using an overload.  Adjust.
(prune_clobbered_mems): Adjust.
(compute_antic_aux): Likewise.
(compute_partial_antic_aux): Likewise.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 253998)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -719,14 +719,11 @@ sccvn_valnum_from_value_id (unsigned int
 /* Remove an expression EXPR from a bitmapped set.  */
 
 static void
-bitmap_remove_from_set (bitmap_set_t set, pre_expr expr)
+bitmap_remove_expr_from_set (bitmap_set_t set, pre_expr expr)
 {
   unsigned int val  = get_expr_value_id (expr);
-  if (!value_id_constant_p (val))
-{
-  bitmap_clear_bit (&set->values, val);
-  bitmap_clear_bit (&set->expressions, get_expression_id (expr));
-}
+  bitmap_clear_bit (&set->values, val);
+  bitmap_clear_bit (&set->expressions, get_expression_id (expr));
 }
 
 /* Insert an expression EXPR into a bitmapped set.  */
@@ -802,7 +799,7 @@ sorted_array_from_bitmap_set (bitmap_set
 /* Subtract all expressions contained in ORIG from DEST.  */
 
 static bitmap_set_t
-bitmap_set_subtract (bitmap_set_t dest, bitmap_set_t orig)
+bitmap_set_subtract_expressions (bitmap_set_t dest, bitmap_set_t orig)
 {
   bitmap_set_t result = bitmap_set_new ();
   bitmap_iterator bi;
@@ -833,15 +830,15 @@ bitmap_set_subtract_values (bitmap_set_t
 {
   if (to_remove)
{
- bitmap_remove_from_set (a, to_remove);
+ bitmap_remove_expr_from_set (a, to_remove);
  to_remove = NULL;
}
   pre_expr expr = expression_for_id (i);
-  if (bitmap_set_contains_value (b, get_expr_value_id (expr)))
+  if (bitmap_bit_p (&b->values, get_expr_value_id (expr)))
to_remove = expr;
 }
   if (to_remove)
-bitmap_remove_from_set (a, to_remove);
+bitmap_remove_expr_from_set (a, to_remove);
 }
 
 
@@ -853,9 +850,6 @@ bitmap_set_contains_value (bitmap_set_t
   if (value_id_constant_p (value_id))
 return true;
 
-  if (!set || bitmap_empty_p (&set->expressions))
-return false;
-
   return bitmap_bit_p (&set->values, value_id);
 }
 
@@ -865,44 +859,6 @@ bitmap_set_contains_expr (bitmap_set_t s
   return bitmap_bit_p (&set->expressions, get_expression_id (expr));
 }
 
-/* Replace an instance of value LOOKFOR with expression EXPR in SET.  */
-
-static void
-bitmap_set_replace_value (bitmap_set_t set, unsigned int lookfor,
- const pre_expr expr)
-{
-  bitmap exprset;
-  unsigned int i;
-  bitmap_iterator bi;
-
-  if (value_id_constant_p (lookfor))
-return;
-
-  if (!bitmap_set_contains_value (set, lookfor))
-return;
-
-  /* The number of expressions having a given value is usually
- significantly less than the total number of expressions in SET.
- Thus, rather than check, for each expression in SET, whether it
- has the value LOOKFOR, we walk the reverse mapping that tells us
- what expressions have a given value, and see if any of those
- expressions are in our set.  For large testcases, this is about
- 5-10x faster than walking the bitmap.  If this is somehow a
- significant lose for some cases, we can choose which set to walk
- based on the set size.  */
-  exprset = value_expressions[lookfor];
-  EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
-{
-  if (bitmap_clear_bit (&set->expressions, i))
-   {
- bitmap_set_bit (&set->expressions, get_expression_id (expr));
- return;
-   }
-}
-
-  gcc_unreachable ();
-}
-
 /* Return true if two bitmap sets are equal.  */
 
 static bool
@@ -918,9 +874,33 @@ static void
 bitmap_value_replace_in_set (bitmap_set_t set, pre_expr expr)
 {
   unsigned int val = get_expr_value_id (expr);
+  if (value_id_constant_p (val))
+return;
 
   if (bitmap_set_contains_value (set, val))
-bitmap_set_replace_value (set, val, expr);
+{
+  /* The number of expressions having a given value is usually
+significantly less than the total number of expressions in SET.
+Thus, rather than check, for each expression in SET, whether it
+has the value LOOKFOR, we walk the reverse mapping that tells us
+what expressions have a given value, and see if any of those
+expressions are in our set.  For large testcases, this is about
+5

[09/nn] Add a fixed_size_mode_pod class

2017-10-23 Thread Richard Sandiford

This patch adds a POD version of fixed_size_mode.  The only current use
is for storing the __builtin_apply and __builtin_result register modes,
which were made fixed_size_modes by the previous patch.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* coretypes.h (fixed_size_mode): Declare.
(fixed_size_mode_pod): New typedef.
* builtins.h (target_builtins::x_apply_args_mode)
(target_builtins::x_apply_result_mode): Change type to
fixed_size_mode_pod.
* builtins.c (apply_args_size, apply_result_size, result_vector)
(expand_builtin_apply_args_1, expand_builtin_apply)
(expand_builtin_return): Update accordingly.

Index: gcc/coretypes.h
===
--- gcc/coretypes.h 2017-09-11 17:10:58.656085547 +0100
+++ gcc/coretypes.h 2017-10-23 11:42:57.592545063 +0100
@@ -59,6 +59,7 @@ typedef const struct rtx_def *const_rtx;
 class scalar_int_mode;
 class scalar_float_mode;
 class complex_mode;
+class fixed_size_mode;
 template class opt_mode;
 typedef opt_mode opt_scalar_mode;
 typedef opt_mode opt_scalar_int_mode;
@@ -66,6 +67,7 @@ typedef opt_mode opt_
 template class pod_mode;
 typedef pod_mode scalar_mode_pod;
 typedef pod_mode scalar_int_mode_pod;
+typedef pod_mode fixed_size_mode_pod;
 
 /* Subclasses of rtx_def, using indentation to show the class
hierarchy, along with the relevant invariant.
Index: gcc/builtins.h
===
--- gcc/builtins.h  2017-08-30 12:18:46.602740973 +0100
+++ gcc/builtins.h  2017-10-23 11:42:57.592545063 +0100
@@ -29,14 +29,14 @@ struct target_builtins {
  the register is not used for calling a function.  If the machine
  has register windows, this gives only the outbound registers.
  INCOMING_REGNO gives the corresponding inbound register.  */
-  machine_mode x_apply_args_mode[FIRST_PSEUDO_REGISTER];
+  fixed_size_mode_pod x_apply_args_mode[FIRST_PSEUDO_REGISTER];
 
   /* For each register that may be used for returning values, this gives
  a mode used to copy the register's value.  VOIDmode indicates the
  register is not used for returning values.  If the machine has
  register windows, this gives only the outbound registers.
  INCOMING_REGNO gives the corresponding inbound register.  */
-  machine_mode x_apply_result_mode[FIRST_PSEUDO_REGISTER];
+  fixed_size_mode_pod x_apply_result_mode[FIRST_PSEUDO_REGISTER];
 };
 
 extern struct target_builtins default_target_builtins;
Index: gcc/builtins.c
===
--- gcc/builtins.c  2017-10-23 11:41:23.140260335 +0100
+++ gcc/builtins.c  2017-10-23 11:42:57.592545063 +0100
@@ -1358,7 +1358,6 @@ apply_args_size (void)
   static int size = -1;
   int align;
   unsigned int regno;
-  machine_mode mode;
 
   /* The values computed by this function never change.  */
   if (size < 0)
@@ -1374,7 +1373,7 @@ apply_args_size (void)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (FUNCTION_ARG_REGNO_P (regno))
  {
-   mode = targetm.calls.get_raw_arg_mode (regno);
+   fixed_size_mode mode = targetm.calls.get_raw_arg_mode (regno);
 
gcc_assert (mode != VOIDmode);
 
@@ -1386,7 +1385,7 @@ apply_args_size (void)
  }
else
  {
-   apply_args_mode[regno] = VOIDmode;
+   apply_args_mode[regno] = as_a  (VOIDmode);
  }
 }
   return size;
@@ -1400,7 +1399,6 @@ apply_result_size (void)
 {
   static int size = -1;
   int align, regno;
-  machine_mode mode;
 
   /* The values computed by this function never change.  */
   if (size < 0)
@@ -1410,7 +1408,7 @@ apply_result_size (void)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (targetm.calls.function_value_regno_p (regno))
  {
-   mode = targetm.calls.get_raw_result_mode (regno);
+   fixed_size_mode mode = targetm.calls.get_raw_result_mode (regno);
 
gcc_assert (mode != VOIDmode);
 
@@ -1421,7 +1419,7 @@ apply_result_size (void)
apply_result_mode[regno] = mode;
  }
else
- apply_result_mode[regno] = VOIDmode;
+ apply_result_mode[regno] = as_a  (VOIDmode);
 
   /* Allow targets that use untyped_call and untyped_return to override
 the size so that machine-specific information can be stored here.  */
@@ -1440,7 +1438,7 @@ apply_result_size (void)
 result_vector (int savep, rtx result)
 {
   int regno, size, align, nelts;
-  machine_mode mode;
+  fixed_size_mode mode;
   rtx reg, mem;
   rtx *savevec = XALLOCAVEC (rtx, FIRST_PSEUDO_REGISTER);
 
@@ -1469,7 +1467,7 @@ expand_builtin_apply_args_1 (void)
 {
   rtx registers, tem;
   int size, align, regno;
-  machine_mode mode;
+  fixed_size_mode mode;
   rtx struct_incoming_value = targetm.calls.struct_value_rtx (c

[10/nn] Widening optab cleanup

2017-10-23 Thread Richard Sandiford

widening_optab_handler had the comment:

  /* ??? Why does find_widening_optab_handler_and_mode attempt to
 widen things that can't be widened?  E.g. add_optab... */
  if (op > LAST_CONV_OPTAB)
return CODE_FOR_nothing;

I think it comes from expand_binop using
find_widening_optab_handler_and_mode for two things: to test whether
a "normal" optab like add_optab is supported for a standard binary
operation and to test whether a "convert" optab is supported for a
widening operation like umul_widen_optab.  In the former case from_mode
and to_mode must be the same, in the latter from_mode must be narrower
than to_mode.

For the former case, find_widening_optab_handler_and_mode is only really
testing the modes that are passed in.  permit_non_widening must be true
here.

For the latter case, find_widening_optab_handler_and_mode should only
really consider new from_modes that are wider than the original
from_mode and narrower than the original to_mode.  Logically
permit_non_widening should be false, since widening optabs aren't
supposed to take operands that are the same width as the destination.
We get away with permit_non_widening being true because no target
would/should define a widening .md pattern with matching modes.

But really, it seems better for expand_binop to handle these two
cases itself rather than pushing them down.  With that change,
find_widening_optab_handler_and_mode is only ever called with
permit_non_widening set to false and is only ever called with
a "proper" convert optab.  We then no longer need widening_optab_handler,
we can just use convert_optab_handler directly.

The patch also passes the instruction code down to expand_binop_directly.
This should be more efficient and removes an extra call to
find_widening_optab_handler_and_mode.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* optabs-query.h (convert_optab_p): New function, split out from...
(convert_optab_handler): ...here.
(widening_optab_handler): Delete.
(find_widening_optab_handler): Remove permit_non_widening parameter.
(find_widening_optab_handler_and_mode): Likewise.  Provide an
override that operates on mode class wrappers.
* optabs-query.c (widening_optab_handler): Delete.
(find_widening_optab_handler_and_mode): Remove permit_non_widening
parameter.  Assert that the two modes are the same class and that
the "from" mode is narrower than the "to" mode.  Use
convert_optab_handler instead of widening_optab_handler.
* expmed.c (expmed_mult_highpart_optab): Use convert_optab_handler
instead of widening_optab_handler.
* expr.c (expand_expr_real_2): Update calls to
find_widening_optab_handler.
* optabs.c (expand_widen_pattern_expr): Likewise.
(expand_binop_directly): Take the insn_code as a parameter.
(expand_binop): Only call find_widening_optab_handler for
conversion optabs; use optab_handler otherwise.  Update calls
to find_widening_optab_handler and expand_binop_directly.
Use convert_optab_handler instead of widening_optab_handler.
* tree-ssa-math-opts.c (convert_mult_to_widen): Update calls to
find_widening_optab_handler and use scalar_mode rather than
machine_mode.
(convert_plusminus_to_widen): Likewise.

Index: gcc/optabs-query.h
===
--- gcc/optabs-query.h  2017-09-14 17:04:19.080694343 +0100
+++ gcc/optabs-query.h  2017-10-23 11:43:01.517673716 +0100
@@ -23,6 +23,14 @@ #define GCC_OPTABS_QUERY_H
 #include "insn-opinit.h"
 #include "target.h"
 
+/* Return true if OP is a conversion optab.  */
+
+inline bool
+convert_optab_p (optab op)
+{
+  return op > unknown_optab && op <= LAST_CONV_OPTAB;
+}
+
 /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
if the target does not have such an insn.  */
 
@@ -43,7 +51,7 @@ convert_optab_handler (convert_optab op,
   machine_mode from_mode)
 {
   unsigned scode = (op << 16) | (from_mode << 8) | to_mode;
-  gcc_assert (op > unknown_optab && op <= LAST_CONV_OPTAB);
+  gcc_assert (convert_optab_p (op));
   return raw_optab_handler (scode);
 }
 
@@ -167,12 +175,11 @@ enum insn_code can_float_p (machine_mode
 enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *);
 bool can_conditionally_move_p (machine_mode mode);
 bool can_vec_perm_p (machine_mode, bool, vec_perm_indices *);
-enum insn_code widening_optab_handler (optab, machine_mode, machine_mode);
 /* Find a widening optab even if it doesn't widen as much as we want.  */
-#define find_widening_optab_handler(A,B,C,D) \
-  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+#define find_widening_optab_handler(A, B, C) \
+  find_widening_optab_handler_and_mode (A, B, C, NULL)
 enum insn_code find_widening_optab_handler_and_mode (optab, machine_mod

[11/nn] Add narrower_subreg_mode helper function

2017-10-23 Thread Richard Sandiford

This patch adds a narrowing equivalent of wider_subreg_mode.  At present
there is only one user.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* rtl.h (narrower_subreg_mode): New function.
* ira-color.c (update_costs_from_allocno): Use it.

Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-10-23 11:44:06.562686090 +0100
+++ gcc/rtl.h   2017-10-23 11:44:15.916785881 +0100
@@ -2972,6 +2972,16 @@ subreg_lowpart_offset (machine_mode oute
 }
 
 /* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE,
+   return the smaller of the two modes if they are different sizes,
+   otherwise return the outer mode.  */
+
+inline machine_mode
+narrower_subreg_mode (machine_mode outermode, machine_mode innermode)
+{
+  return paradoxical_subreg_p (outermode, innermode) ? innermode : outermode;
+}
+
+/* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE,
return the mode that is big enough to hold both the outer and inner
values.  Prefer the outer mode in the event of a tie.  */
 
Index: gcc/ira-color.c
===
--- gcc/ira-color.c 2017-10-23 11:44:11.500538024 +0100
+++ gcc/ira-color.c 2017-10-23 11:44:15.915819948 +0100
@@ -1367,15 +1367,14 @@ update_costs_from_allocno (ira_allocno_t
  || ALLOCNO_ASSIGNED_P (another_allocno))
continue;
 
- if (GET_MODE_SIZE (ALLOCNO_MODE (cp->second)) < GET_MODE_SIZE (mode))
-   /* If we have different modes use the smallest one.  It is
-  a sub-register move.  It is hard to predict what LRA
-  will reload (the pseudo or its sub-register) but LRA
-  will try to minimize the data movement.  Also for some
-  register classes bigger modes might be invalid,
-  e.g. DImode for AREG on x86.  For such cases the
-  register move cost will be maximal. */
-   mode = ALLOCNO_MODE (cp->second);
+ /* If we have different modes use the smallest one.  It is
+a sub-register move.  It is hard to predict what LRA
+will reload (the pseudo or its sub-register) but LRA
+will try to minimize the data movement.  Also for some
+register classes bigger modes might be invalid,
+e.g. DImode for AREG on x86.  For such cases the
+register move cost will be maximal.  */
+ mode = narrower_subreg_mode (mode, ALLOCNO_MODE (cp->second));
  
  cost = (cp->second == allocno
  ? ira_register_move_cost[mode][rclass][aclass]

[12/nn] Add an is_narrower_int_mode helper function

2017-10-23 Thread Richard Sandiford

This patch adds a function for testing whether an arbitrary mode X
is an integer mode that is narrower than integer mode Y.  This is
useful for code like expand_float and expand_fix that could in
principle handle vectors as well as scalars.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* machmode.h (is_narrower_int_mode): New function
* optabs.c (expand_float, expand_fix): Use it.
* dwarf2out.c (rotate_loc_descriptor): Likewise.

Index: gcc/machmode.h
===
--- gcc/machmode.h  2017-10-23 11:44:06.561720156 +0100
+++ gcc/machmode.h  2017-10-23 11:44:23.979432614 +0100
@@ -893,6 +893,17 @@ is_complex_float_mode (machine_mode mode
   return false;
 }
 
+/* Return true if MODE is a scalar integer mode with a precision
+   smaller than LIMIT's precision.  */
+
+inline bool
+is_narrower_int_mode (machine_mode mode, scalar_int_mode limit)
+{
+  scalar_int_mode int_mode;
+  return (is_a  (mode, &int_mode)
+ && GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (limit));
+}
+
 namespace mode_iterator
 {
   /* Start mode iterator *ITER at the first mode in class MCLASS, if any.  */
Index: gcc/optabs.c
===
--- gcc/optabs.c2017-10-23 11:44:07.732431531 +0100
+++ gcc/optabs.c2017-10-23 11:44:23.980398548 +0100
@@ -4820,7 +4820,7 @@ expand_float (rtx to, rtx from, int unsi
   rtx value;
   convert_optab tab = unsignedp ? ufloat_optab : sfloat_optab;
 
-  if (GET_MODE_PRECISION (GET_MODE (from)) < GET_MODE_PRECISION (SImode))
+  if (is_narrower_int_mode (GET_MODE (from), SImode))
from = convert_to_mode (SImode, from, unsignedp);
 
   libfunc = convert_optab_libfunc (tab, GET_MODE (to), GET_MODE (from));
@@ -5002,7 +5002,7 @@ expand_fix (rtx to, rtx from, int unsign
  that the mode of TO is at least as wide as SImode, since those are the
  only library calls we know about.  */
 
-  if (GET_MODE_PRECISION (GET_MODE (to)) < GET_MODE_PRECISION (SImode))
+  if (is_narrower_int_mode (GET_MODE (to), SImode))
 {
   target = gen_reg_rtx (SImode);
 
Index: gcc/dwarf2out.c
===
--- gcc/dwarf2out.c 2017-10-23 11:44:05.684652559 +0100
+++ gcc/dwarf2out.c 2017-10-23 11:44:23.979432614 +0100
@@ -14530,8 +14530,7 @@ rotate_loc_descriptor (rtx rtl, scalar_i
   dw_loc_descr_ref op0, op1, ret, mask[2] = { NULL, NULL };
   int i;
 
-  if (GET_MODE (rtlop1) != VOIDmode
-  && GET_MODE_BITSIZE (GET_MODE (rtlop1)) < GET_MODE_BITSIZE (mode))
+  if (is_narrower_int_mode (GET_MODE (rtlop1), mode))
 rtlop1 = gen_rtx_ZERO_EXTEND (mode, rtlop1);
   op0 = mem_loc_descriptor (XEXP (rtl, 0), mode, mem_mode,
VAR_INIT_STATUS_INITIALIZED);

[13/nn] More is_a

2017-10-23 Thread Richard Sandiford

alias.c:find_base_term and find_base_value checked:

  if (GET_MODE_SIZE (GET_MODE (src)) < GET_MODE_SIZE (Pmode))

but (a) comparing the precision seems more correct, since it's possible
for modes to have the same memory size as Pmode but fewer bits and
(b) the functions are called on arbitrary rtl, so there's no guarantee
that we're handling an integer truncation.

Since there's no point processing truncations of anything other than an
integer, this patch checks that first.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* alias.c (find_base_value, find_base_term): Only process integer
truncations.  Check the precision rather than the size.

Index: gcc/alias.c
===
--- gcc/alias.c 2017-10-23 11:41:25.511925516 +0100
+++ gcc/alias.c 2017-10-23 11:44:27.544693078 +0100
@@ -1349,6 +1349,7 @@ known_base_value_p (rtx x)
 find_base_value (rtx src)
 {
   unsigned int regno;
+  scalar_int_mode int_mode;
 
 #if defined (FIND_BASE_TERM)
   /* Try machine-dependent ways to find the base term.  */
@@ -1475,7 +1476,8 @@ find_base_value (rtx src)
 address modes depending on the address space.  */
   if (!target_default_pointer_address_modes_p ())
break;
-  if (GET_MODE_SIZE (GET_MODE (src)) < GET_MODE_SIZE (Pmode))
+  if (!is_a  (GET_MODE (src), &int_mode)
+ || GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (Pmode))
break;
   /* Fall through.  */
 case HIGH:
@@ -1876,6 +1878,7 @@ find_base_term (rtx x)
   cselib_val *val;
   struct elt_loc_list *l, *f;
   rtx ret;
+  scalar_int_mode int_mode;
 
 #if defined (FIND_BASE_TERM)
   /* Try machine-dependent ways to find the base term.  */
@@ -1893,7 +1896,8 @@ find_base_term (rtx x)
 address modes depending on the address space.  */
   if (!target_default_pointer_address_modes_p ())
return 0;
-  if (GET_MODE_SIZE (GET_MODE (x)) < GET_MODE_SIZE (Pmode))
+  if (!is_a  (GET_MODE (x), &int_mode)
+ || GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (Pmode))
return 0;
   /* Fall through.  */
 case HIGH:

[14/nn] Add helpers for shift count modes

2017-10-23 Thread Richard Sandiford

This patch adds a stub helper routine to provide the mode
of a scalar shift amount, given the mode of the values
being shifted.

One long-standing problem has been to decide what this mode
should be for arbitrary rtxes (as opposed to those directly
tied to a target pattern).  Is it the mode of the shifted
elements?  Is it word_mode?  Or maybe QImode?  Is it whatever
the corresponding target pattern says?  (In which case what
should the mode be when the target doesn't have a pattern?)

For now the patch picks word_mode, which should be safe on
all targets but could perhaps become suboptimal if the helper
routine is used more often than it is in this patch.  As it
stands the patch does not change the generated code.

The patch also adds a helper function that constructs rtxes
for constant shift amounts, again given the mode of the value
being shifted.  As well as helping with the SVE patches, this
is one step towards allowing CONST_INTs to have a real mode.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* target.h (get_shift_amount_mode): New function.
* emit-rtl.h (gen_int_shift_amount): Declare.
* emit-rtl.c (gen_int_shift_amount): New function.
* asan.c (asan_emit_stack_protection): Use gen_int_shift_amount
instead of GEN_INT.
* calls.c (shift_return_value): Likewise.
* cse.c (fold_rtx): Likewise.
* dse.c (find_shift_sequence): Likewise.
* expmed.c (init_expmed_one_mode, store_bit_field_1, expand_shift_1)
(expand_shift, expand_smod_pow2): Likewise.
* lower-subreg.c (shift_cost): Likewise.
* simplify-rtx.c (simplify_unary_operation_1): Likewise.
(simplify_binary_operation_1): Likewise.
* combine.c (try_combine, find_split_point, force_int_to_mode)
(simplify_shift_const_1, simplify_shift_const): Likewise.
(change_zero_ext): Likewise.  Use simplify_gen_binary.
* optabs.c (expand_superword_shift, expand_doubleword_mult)
(expand_unop): Use gen_int_shift_amount instead of GEN_INT.
(expand_binop): Likewise.  Use get_shift_amount_mode instead
of word_mode as the mode of a CONST_INT shift amount.
(shift_amt_for_vec_perm_mask): Add a machine_mode argument.
Use gen_int_shift_amount instead of GEN_INT.
(expand_vec_perm): Update caller accordingly.  Use
gen_int_shift_amount instead of GEN_INT.

Index: gcc/target.h
===
--- gcc/target.h2017-10-23 11:47:06.643477568 +0100
+++ gcc/target.h2017-10-23 11:47:11.277288162 +0100
@@ -209,6 +209,17 @@ #define HOOKSTRUCT(FRAGMENT) FRAGMENT
 
 extern struct gcc_target targetm;
 
+/* Return the mode that should be used to hold a scalar shift amount
+   when shifting values of the given mode.  */
+/* ??? This could in principle be generated automatically from the .md
+   shift patterns, but for now word_mode should be universally OK.  */
+
+inline scalar_int_mode
+get_shift_amount_mode (machine_mode)
+{
+  return word_mode;
+}
+
 #ifdef GCC_TM_H
 
 #ifndef CUMULATIVE_ARGS_MAGIC
Index: gcc/emit-rtl.h
===
--- gcc/emit-rtl.h  2017-10-23 11:47:06.643477568 +0100
+++ gcc/emit-rtl.h  2017-10-23 11:47:11.274393237 +0100
@@ -369,6 +369,7 @@ extern void set_reg_attrs_for_parm (rtx,
 extern void set_reg_attrs_for_decl_rtl (tree t, rtx x);
 extern void adjust_reg_mode (rtx, machine_mode);
 extern int mem_expr_equal_p (const_tree, const_tree);
+extern rtx gen_int_shift_amount (machine_mode, HOST_WIDE_INT);
 
 extern bool need_atomic_barrier_p (enum memmodel, bool);
 
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  2017-10-23 11:47:06.643477568 +0100
+++ gcc/emit-rtl.c  2017-10-23 11:47:11.273428262 +0100
@@ -6478,6 +6478,15 @@ need_atomic_barrier_p (enum memmodel mod
 }
 }
 
+/* Return a constant shift amount for shifting a value of mode MODE
+   by VALUE bits.  */
+
+rtx
+gen_int_shift_amount (machine_mode mode, HOST_WIDE_INT value)
+{
+  return gen_int_mode (value, get_shift_amount_mode (mode));
+}
+
 /* Initialize fields of rtl_data related to stack alignment.  */
 
 void
Index: gcc/asan.c
===
--- gcc/asan.c  2017-10-23 11:47:06.643477568 +0100
+++ gcc/asan.c  2017-10-23 11:47:11.27056 +0100
@@ -1388,7 +1388,7 @@ asan_emit_stack_protection (rtx base, rt
   TREE_ASM_WRITTEN (id) = 1;
   emit_move_insn (mem, expand_normal (build_fold_addr_expr (decl)));
   shadow_base = expand_binop (Pmode, lshr_optab, base,
- GEN_INT (ASAN_SHADOW_SHIFT),
+ gen_int_shift_amount (Pmode, ASAN_SHADOW_SHIFT),
  NULL_RTX, 1, OPTAB_DIRECT);
   shadow_base
 = plus_constant (Pmode, shadow_base,
Index: gcc/c

[15/nn] Use more specific hash functions in rtlhash.c

2017-10-23 Thread Richard Sandiford

Avoid using add_object when we have more specific routines available.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* rtlhash.c (add_rtx): Use add_hwi for 'w' and add_int for 'i'.

Index: gcc/rtlhash.c
===
--- gcc/rtlhash.c   2017-02-23 19:54:03.0 +
+++ gcc/rtlhash.c   2017-10-23 11:47:20.120201389 +0100
@@ -77,11 +77,11 @@ add_rtx (const_rtx x, hash &hstate)
 switch (fmt[i])
   {
   case 'w':
-   hstate.add_object (XWINT (x, i));
+   hstate.add_hwi (XWINT (x, i));
break;
   case 'n':
   case 'i':
-   hstate.add_object (XINT (x, i));
+   hstate.add_int (XINT (x, i));
break;
   case 'V':
   case 'E':

[16/nn] Factor out the mode handling in lower-subreg.c

2017-10-23 Thread Richard Sandiford

This patch adds a helper routine (interesting_mode_p) to lower-subreg.c,
to make the decision about whether a mode can be split and, if so,
calculate the number of bytes and words in the mode.  At present this
function always returns true; a later patch will add cases in which it
can return false.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* lower-subreg.c (interesting_mode_p): New function.
(compute_costs, find_decomposable_subregs, decompose_register)
(simplify_subreg_concatn, can_decompose_p, resolve_simple_move)
(resolve_clobber, dump_choices): Use it.

Index: gcc/lower-subreg.c
===
--- gcc/lower-subreg.c  2017-10-23 11:47:11.274393237 +0100
+++ gcc/lower-subreg.c  2017-10-23 11:47:23.555013148 +0100
@@ -103,6 +103,18 @@ #define twice_word_mode \
 #define choices \
   this_target_lower_subreg->x_choices
 
+/* Return true if MODE is a mode we know how to lower.  When returning true,
+   store its byte size in *BYTES and its word size in *WORDS.  */
+
+static inline bool
+interesting_mode_p (machine_mode mode, unsigned int *bytes,
+   unsigned int *words)
+{
+  *bytes = GET_MODE_SIZE (mode);
+  *words = CEIL (*bytes, UNITS_PER_WORD);
+  return true;
+}
+
 /* RTXes used while computing costs.  */
 struct cost_rtxes {
   /* Source and target registers.  */
@@ -199,10 +211,10 @@ compute_costs (bool speed_p, struct cost
   for (i = 0; i < MAX_MACHINE_MODE; i++)
 {
   machine_mode mode = (machine_mode) i;
-  int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
-  if (factor > 1)
+  unsigned int size, factor;
+  if (interesting_mode_p (mode, &size, &factor) && factor > 1)
{
- int mode_move_cost;
+ unsigned int mode_move_cost;
 
  PUT_MODE (rtxes->target, mode);
  PUT_MODE (rtxes->source, mode);
@@ -469,10 +481,10 @@ find_decomposable_subregs (rtx *loc, enu
  continue;
}
 
- outer_size = GET_MODE_SIZE (GET_MODE (x));
- inner_size = GET_MODE_SIZE (GET_MODE (inner));
- outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
+ || !interesting_mode_p (GET_MODE (inner), &inner_size,
+ &inner_words))
+   continue;
 
  /* We only try to decompose single word subregs of multi-word
 registers.  When we find one, we return -1 to avoid iterating
@@ -507,7 +519,7 @@ find_decomposable_subregs (rtx *loc, enu
}
   else if (REG_P (x))
{
- unsigned int regno;
+ unsigned int regno, size, words;
 
  /* We will see an outer SUBREG before we see the inner REG, so
 when we see a plain REG here it means a direct reference to
@@ -527,7 +539,8 @@ find_decomposable_subregs (rtx *loc, enu
 
  regno = REGNO (x);
  if (!HARD_REGISTER_NUM_P (regno)
- && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+ && interesting_mode_p (GET_MODE (x), &size, &words)
+ && words > 1)
{
  switch (*pcmi)
{
@@ -567,15 +580,15 @@ find_decomposable_subregs (rtx *loc, enu
 decompose_register (unsigned int regno)
 {
   rtx reg;
-  unsigned int words, i;
+  unsigned int size, words, i;
   rtvec v;
 
   reg = regno_reg_rtx[regno];
 
   regno_reg_rtx[regno] = NULL_RTX;
 
-  words = GET_MODE_SIZE (GET_MODE (reg));
-  words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  if (!interesting_mode_p (GET_MODE (reg), &size, &words))
+gcc_unreachable ();
 
   v = rtvec_alloc (words);
   for (i = 0; i < words; ++i)
@@ -599,25 +612,29 @@ decompose_register (unsigned int regno)
 simplify_subreg_concatn (machine_mode outermode, rtx op,
 unsigned int byte)
 {
-  unsigned int inner_size;
+  unsigned int outer_size, outer_words, inner_size, inner_words;
   machine_mode innermode, partmode;
   rtx part;
   unsigned int final_offset;
 
+  innermode = GET_MODE (op);
+  if (!interesting_mode_p (outermode, &outer_size, &outer_words)
+  || !interesting_mode_p (innermode, &inner_size, &inner_words))
+gcc_unreachable ();
+
   gcc_assert (GET_CODE (op) == CONCATN);
-  gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
+  gcc_assert (byte % outer_size == 0);
 
-  innermode = GET_MODE (op);
-  gcc_assert (byte < GET_MODE_SIZE (innermode));
-  if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode))
+  gcc_assert (byte < inner_size);
+  if (outer_size > inner_size)
 return NULL_RTX;
 
-  inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
+  inner_size /= XVECLEN (op, 0);
   part = XVECEXP (op, 0, byte / inner_size);
   partmode = GET_MODE (part);
 
   final_offset = byte % inner_size;
-  if

Re: [PATCH, i386]: Fix PR 82628, wrong code at -Os on x86_64-linux-gnu in the 32-bit mode

2017-10-23 Thread Uros Bizjak

On Mon, Oct 23, 2017 at 1:07 PM, Jakub Jelinek  wrote:
> On Mon, Oct 23, 2017 at 12:27:15PM +0200, Uros Bizjak wrote:
>> On Mon, Oct 23, 2017 at 12:09 PM, Jakub Jelinek  wrote:
>> > On Sun, Oct 22, 2017 at 08:04:28PM +0200, Uros Bizjak wrote:
>> >> Hello!
>> >>
>> >> In PR 82628 Jakub figured out that insn patterns that consume carry
>> >> flag were not 100% correct. Due to this issue, combine is able to
>> >> simplify various CC_REG propagations that result in invalid code.
>> >>
>> >> Attached patch fixes (well, mitigates) the above problem by splitting
>> >> the double-mode compare after the reload, in the same way other
>> >> *_doubleword patterns are handled from "the beginning of the time".
>> >
>> > I'm afraid this is going to haunt us sooner or later, combine isn't the
>> > only pass that uses simplify-rtx.c infrastructure heavily and when we lie
>> > in the RTL pattern, eventually something will be simplified wrongly.
>> >
>> > So, at least we'd need to use UNSPEC for the pattern, like (only lightly
>> > tested so far) below.
>>
>> I agree with the above. Patterns that consume Carry flag are now
>> marked with (plus (ltu (...)), but effectively, they behave like
>> unspecs. So, I see no problem to change all SBB and ADC to unspec at
>> once, similar to the change you proposed in the patch.
>
> So like this (addcarry/subborrow defered to a separate patch)?
> Or do you want to use UNSPEC even for the unsigned comparison case,
> i.e. from the patch remove the predicates.md/constraints.md part,
> sub3_carry_ccc{,_1} and anything related to that?

Looking at the attached patch, I think, this won't be necessary
anymore. The pattern is quite important for 32bit targets, so this
fact warrants a couple of complicated patterns.

> As for addcarry/subborrow, the problem is that we expect in the pr67317*
> tests that combine is able to notice that the CF setter sets CF to
> unconditional 0 and matches the pattern.  With the patch I wrote
> we end up with the combiner trying to match an insn where the CCC
> is set from a TImode comparison:
> (parallel [
> (set (reg:CC 17 flags)
> (compare:CC (zero_extend:TI (plus:DI (reg/v:DI 92 [ a ])
> (reg/v:DI 94 [ c ])))
> (zero_extend:TI (reg/v:DI 94 [ c ]
> (set (reg:DI 98)
> (plus:DI (reg/v:DI 92 [ a ])
> (reg/v:DI 94 [ c ])))
> ])
> So, either we need a define_insn_and_split pattern that would deal with
> that (for UNSPEC it would be the same thing, have a define_insn_and_split
> that would replace the (ltu...) with (const_int 0)), or perhaps be smarter
> during expansion, if we see the first argument is constant 0, expand it
> like a normal add instruction with CC setter.
>
> 2017-10-23  Jakub Jelinek  
>
> PR target/82628
> * config/i386/predicates.md (x86_64_dwzext_immediate_operand): New.
> * config/i386/constraints.md (Wf): New constraint.
> * config/i386/i386.md (UNSPEC_SBB): New unspec.
> (cmp_doubleword): Removed.
> (sub3_carry_ccc, *sub3_carry_ccc_1): New patterns.
> (sub3_carry_ccgz): Use unspec instead of compare.
> * config/i386/i386.c (ix86_expand_branch) : Don't
> expand with cmp_doubleword.  For LTU and GEU use
> sub3_carry_ccc instead of sub3_carry_ccgz and use CCCmode.

OK.

Thanks,
Uros.

> --- gcc/config/i386/predicates.md.jj2017-10-23 12:00:13.899355249 +0200
> +++ gcc/config/i386/predicates.md   2017-10-23 12:52:20.696576114 +0200
> @@ -366,6 +366,31 @@ (define_predicate "x86_64_hilo_int_opera
>  }
>  })
>
> +;; Return true if VALUE is a constant integer whose value is
> +;; x86_64_immediate_operand value zero extended from word mode to mode.
> +(define_predicate "x86_64_dwzext_immediate_operand"
> +  (match_code "const_int,const_wide_int")
> +{
> +  switch (GET_CODE (op))
> +{
> +case CONST_INT:
> +  if (!TARGET_64BIT)
> +   return UINTVAL (op) <= HOST_WIDE_INT_UC (0x);
> +  return UINTVAL (op) <= HOST_WIDE_INT_UC (0x7fff);
> +
> +case CONST_WIDE_INT:
> +  if (!TARGET_64BIT)
> +   return false;
> +  return (CONST_WIDE_INT_NUNITS (op) == 2
> + && CONST_WIDE_INT_ELT (op, 1) == 0
> + && (trunc_int_for_mode (CONST_WIDE_INT_ELT (op, 0), SImode)
> + == (HOST_WIDE_INT) CONST_WIDE_INT_ELT (op, 0)));
> +
> +default:
> +  gcc_unreachable ();
> +}
> +})
> +
>  ;; Return true if size of VALUE can be stored in a sign
>  ;; extended immediate field.
>  (define_predicate "x86_64_immediate_size_operand"
> --- gcc/config/i386/constraints.md.jj   2017-10-23 12:00:13.850355874 +0200
> +++ gcc/config/i386/constraints.md  2017-10-23 12:52:20.697576102 +0200
> @@ -332,6 +332,11 @@ (define_constraint "Wd"
> of it satisfies the e constraint."
>(match_operand 0 "x86_64_hilo_int_operand"))
>
> +(define_constraint "Wf"
> +  "32-bit signed integer constant zero extended f

[17/nn] Turn var-tracking.c:INT_MEM_OFFSET into a function

2017-10-23 Thread Richard Sandiford

This avoids the double evaluation mentioned in the comments and
simplifies the change to make MEM_OFFSET variable.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* var-tracking.c (INT_MEM_OFFSET): Replace with...
(int_mem_offset): ...this new function.
(var_mem_set, var_mem_delete_and_set, var_mem_delete)
(find_mem_expr_in_1pdv, dataflow_set_preserve_mem_locs)
(same_variable_part_p, use_type, add_stores, vt_get_decl_and_offset):
Update accordingly.

Index: gcc/var-tracking.c
===
--- gcc/var-tracking.c  2017-09-12 14:28:56.401824826 +0100
+++ gcc/var-tracking.c  2017-10-23 11:47:27.197231712 +0100
@@ -390,8 +390,15 @@ struct variable
 /* Pointer to the BB's information specific to variable tracking pass.  */
 #define VTI(BB) ((variable_tracking_info *) (BB)->aux)
 
-/* Macro to access MEM_OFFSET as an HOST_WIDE_INT.  Evaluates MEM twice.  */
-#define INT_MEM_OFFSET(mem) (MEM_OFFSET_KNOWN_P (mem) ? MEM_OFFSET (mem) : 0)
+/* Return MEM_OFFSET (MEM) as a HOST_WIDE_INT, or 0 if we can't.  */
+
+static inline HOST_WIDE_INT
+int_mem_offset (const_rtx mem)
+{
+  if (MEM_OFFSET_KNOWN_P (mem))
+return MEM_OFFSET (mem);
+  return 0;
+}
 
 #if CHECKING_P && (GCC_VERSION >= 2007)
 
@@ -2336,7 +2343,7 @@ var_mem_set (dataflow_set *set, rtx loc,
 rtx set_src)
 {
   tree decl = MEM_EXPR (loc);
-  HOST_WIDE_INT offset = INT_MEM_OFFSET (loc);
+  HOST_WIDE_INT offset = int_mem_offset (loc);
 
   var_mem_decl_set (set, loc, initialized,
dv_from_decl (decl), offset, set_src, INSERT);
@@ -2354,7 +2361,7 @@ var_mem_delete_and_set (dataflow_set *se
enum var_init_status initialized, rtx set_src)
 {
   tree decl = MEM_EXPR (loc);
-  HOST_WIDE_INT offset = INT_MEM_OFFSET (loc);
+  HOST_WIDE_INT offset = int_mem_offset (loc);
 
   clobber_overlapping_mems (set, loc);
   decl = var_debug_decl (decl);
@@ -2375,7 +2382,7 @@ var_mem_delete_and_set (dataflow_set *se
 var_mem_delete (dataflow_set *set, rtx loc, bool clobber)
 {
   tree decl = MEM_EXPR (loc);
-  HOST_WIDE_INT offset = INT_MEM_OFFSET (loc);
+  HOST_WIDE_INT offset = int_mem_offset (loc);
 
   clobber_overlapping_mems (set, loc);
   decl = var_debug_decl (decl);
@@ -4618,7 +4625,7 @@ find_mem_expr_in_1pdv (tree expr, rtx va
   for (node = var->var_part[0].loc_chain; node; node = node->next)
 if (MEM_P (node->loc)
&& MEM_EXPR (node->loc) == expr
-   && INT_MEM_OFFSET (node->loc) == 0)
+   && int_mem_offset (node->loc) == 0)
   {
where = node;
break;
@@ -4683,7 +4690,7 @@ dataflow_set_preserve_mem_locs (variable
  /* We want to remove dying MEMs that don't refer to DECL.  */
  if (GET_CODE (loc->loc) == MEM
  && (MEM_EXPR (loc->loc) != decl
- || INT_MEM_OFFSET (loc->loc) != 0)
+ || int_mem_offset (loc->loc) != 0)
  && mem_dies_at_call (loc->loc))
break;
  /* We want to move here MEMs that do refer to DECL.  */
@@ -4727,7 +4734,7 @@ dataflow_set_preserve_mem_locs (variable
 
  if (GET_CODE (loc->loc) != MEM
  || (MEM_EXPR (loc->loc) == decl
- && INT_MEM_OFFSET (loc->loc) == 0)
+ && int_mem_offset (loc->loc) == 0)
  || !mem_dies_at_call (loc->loc))
{
  if (old_loc != loc->loc && emit_notes)
@@ -5254,7 +5261,7 @@ same_variable_part_p (rtx loc, tree expr
   else if (MEM_P (loc))
 {
   expr2 = MEM_EXPR (loc);
-  offset2 = INT_MEM_OFFSET (loc);
+  offset2 = int_mem_offset (loc);
 }
   else
 return false;
@@ -5522,7 +5529,7 @@ use_type (rtx loc, struct count_use_info
return MO_CLOBBER;
   else if (target_for_debug_bind (var_debug_decl (expr)))
return MO_CLOBBER;
-  else if (track_loc_p (loc, expr, INT_MEM_OFFSET (loc),
+  else if (track_loc_p (loc, expr, int_mem_offset (loc),
false, modep, NULL)
   /* Multi-part variables shouldn't refer to one-part
  variable names such as VALUEs (never happens) or
@@ -6017,7 +6024,7 @@ add_stores (rtx loc, const_rtx expr, voi
  rtx xexpr = gen_rtx_SET (loc, src);
  if (same_variable_part_p (SET_SRC (xexpr),
MEM_EXPR (loc),
-   INT_MEM_OFFSET (loc)))
+   int_mem_offset (loc)))
mo.type = MO_COPY;
  else
mo.type = MO_SET;
@@ -9579,7 +9586,7 @@ vt_get_decl_and_offset (rtx rtl, tree *d
   if (MEM_ATTRS (rtl))
{
  *declp = MEM_EXPR (rtl);
- *offsetp = INT_MEM_OFFSET (rtl);
+ *offsetp = int_mem_offset (rtl);
  return true;
}
 }

[18/nn] Use (CONST_VECTOR|GET_MODE)_NUNITS in simplify-rtx.c

2017-10-23 Thread Richard Sandiford

This patch avoids some calculations of the form:

  GET_MODE_SIZE (vector_mode) / GET_MODE_SIZE (element_mode)

in simplify-rtx.c.  If we're dealing with CONST_VECTORs, it's better
to use CONST_VECTOR_NUNITS, since that remains constant even after the
SVE patches.  In other cases we can get the number from GET_MODE_NUNITS.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* simplify-rtx.c (simplify_const_unary_operation): Use GET_MODE_NUNITS
and CONST_VECTOR_NUNITS instead of computing the number of units from
the byte sizes of the vector and element.
(simplify_binary_operation_1): Likewise.
(simplify_const_binary_operation): Likewise.
(simplify_ternary_operation): Likewise.

Index: gcc/simplify-rtx.c
===
--- gcc/simplify-rtx.c  2017-10-23 11:47:11.277288162 +0100
+++ gcc/simplify-rtx.c  2017-10-23 11:47:32.868935554 +0100
@@ -1752,18 +1752,12 @@ simplify_const_unary_operation (enum rtx
return gen_const_vec_duplicate (mode, op);
   if (GET_CODE (op) == CONST_VECTOR)
{
- int elt_size = GET_MODE_UNIT_SIZE (mode);
-  unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
- rtvec v = rtvec_alloc (n_elts);
- unsigned int i;
-
- machine_mode inmode = GET_MODE (op);
- int in_elt_size = GET_MODE_UNIT_SIZE (inmode);
- unsigned in_n_elts = (GET_MODE_SIZE (inmode) / in_elt_size);
-
+ unsigned int n_elts = GET_MODE_NUNITS (mode);
+ unsigned int in_n_elts = CONST_VECTOR_NUNITS (op);
  gcc_assert (in_n_elts < n_elts);
  gcc_assert ((n_elts % in_n_elts) == 0);
- for (i = 0; i < n_elts; i++)
+ rtvec v = rtvec_alloc (n_elts);
+ for (unsigned i = 0; i < n_elts; i++)
RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts);
  return gen_rtx_CONST_VECTOR (mode, v);
}
@@ -3608,9 +3602,7 @@ simplify_binary_operation_1 (enum rtx_co
  rtx op0 = XEXP (trueop0, 0);
  rtx op1 = XEXP (trueop0, 1);
 
- machine_mode opmode = GET_MODE (op0);
- int elt_size = GET_MODE_UNIT_SIZE (opmode);
- int n_elts = GET_MODE_SIZE (opmode) / elt_size;
+ int n_elts = GET_MODE_NUNITS (GET_MODE (op0));
 
  int i = INTVAL (XVECEXP (trueop1, 0, 0));
  int elem;
@@ -3637,21 +3629,8 @@ simplify_binary_operation_1 (enum rtx_co
  mode01 = GET_MODE (op01);
 
  /* Find out number of elements of each operand.  */
- if (VECTOR_MODE_P (mode00))
-   {
- elt_size = GET_MODE_UNIT_SIZE (mode00);
- n_elts00 = GET_MODE_SIZE (mode00) / elt_size;
-   }
- else
-   n_elts00 = 1;
-
- if (VECTOR_MODE_P (mode01))
-   {
- elt_size = GET_MODE_UNIT_SIZE (mode01);
- n_elts01 = GET_MODE_SIZE (mode01) / elt_size;
-   }
- else
-   n_elts01 = 1;
+ n_elts00 = GET_MODE_NUNITS (mode00);
+ n_elts01 = GET_MODE_NUNITS (mode01);
 
  gcc_assert (n_elts == n_elts00 + n_elts01);
 
@@ -3771,9 +3750,8 @@ simplify_binary_operation_1 (enum rtx_co
  rtx subop1 = XEXP (trueop0, 1);
  machine_mode mode0 = GET_MODE (subop0);
  machine_mode mode1 = GET_MODE (subop1);
- int li = GET_MODE_UNIT_SIZE (mode0);
- int l0 = GET_MODE_SIZE (mode0) / li;
- int l1 = GET_MODE_SIZE (mode1) / li;
+ int l0 = GET_MODE_NUNITS (mode0);
+ int l1 = GET_MODE_NUNITS (mode1);
  int i0 = INTVAL (XVECEXP (trueop1, 0, 0));
  if (i0 == 0 && !side_effects_p (op1) && mode == mode0)
{
@@ -3931,14 +3909,10 @@ simplify_binary_operation_1 (enum rtx_co
|| CONST_SCALAR_INT_P (trueop1) 
|| CONST_DOUBLE_AS_FLOAT_P (trueop1)))
  {
-   int elt_size = GET_MODE_UNIT_SIZE (mode);
-   unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
+   unsigned n_elts = GET_MODE_NUNITS (mode);
+   unsigned in_n_elts = GET_MODE_NUNITS (op0_mode);
rtvec v = rtvec_alloc (n_elts);
unsigned int i;
-   unsigned in_n_elts = 1;
-
-   if (VECTOR_MODE_P (op0_mode))
- in_n_elts = (GET_MODE_SIZE (op0_mode) / elt_size);
for (i = 0; i < n_elts; i++)
  {
if (i < in_n_elts)
@@ -4026,16 +4000,12 @@ simplify_const_binary_operation (enum rt
   && GET_CODE (op0) == CONST_VECTOR
   && GET_CODE (op1) == CONST_VECTOR)
 {
-  unsigned n_elts = GET_MODE_NUNITS (mode);
-  machine_mode op0mode = GET_MODE (op0);
-  unsigned op0_n_elts = GET_MODE_NUNITS (

[19/nn] Don't treat zero-sized ranges as overlapping

2017-10-23 Thread Richard Sandiford

Most GCC ranges seem to be represented as an offset and a size (rather
than a start and inclusive end or start and exclusive end).  The usual
test for whether X is in a range is of course:

  x >= start && x < start + size
or:
  x >= start && x - start < size

which means that an empty range of size 0 contains nothing.  But other
range tests aren't as obvious.

The usual test for whether one range is contained within another
range is:

  start1 >= start2 && start1 + size1 <= start2 + size2

while the test for whether two ranges overlap (from ranges_overlap_p) is:

 (start1 >= start2 && start1 < start2 + size2)
  || (start2 >= start1 && start2 < start1 + size1)

i.e. the ranges overlap if one range contains the start of the other
range.  This leads to strange results like:

  (start X, size 0) is a subrange of (start X, size 0) but
  (start X, size 0) does not overlap (start X, size 0)

Similarly:

  (start 4, size 0) is a subrange of (start 2, size 2) but
  (start 4, size 0) does not overlap (start 2, size 2)

It seems like "X is a subrange of Y" should imply "X overlaps Y".

This becomes harder to ignore with the runtime sizes and offsets
added for SVE.  The most obvious fix seemed to be to say that
an empty range does not overlap anything, and is therefore not
a subrange of anything.

Using the new definition of subranges didn't seem to cause any
codegen differences in the testsuite.  But there was one change
with the new definition of overlapping ranges.  strncpy-chk.c has:

  memset (dst, 0, sizeof (dst));
  if (strncpy (dst, src, 0) != dst || strcmp (dst, ""))
abort();

The strncpy is detected as a zero-size write, and so with the new
definition of overlapping ranges, we treat the strncpy as having
no effect on the strcmp (which is true).  The reaching definition
is the memset instead.

This patch makes ranges_overlap_p return false for zero-sized
ranges, even if the other range has an unknown size.


2017-10-23  Richard Sandiford  

gcc/
* tree-ssa-alias.h (ranges_overlap_p): Return false if either
range is known to be empty.

Index: gcc/tree-ssa-alias.h
===
--- gcc/tree-ssa-alias.h2017-03-28 16:19:22.0 +0100
+++ gcc/tree-ssa-alias.h2017-10-23 11:47:38.181155696 +0100
@@ -171,6 +171,8 @@ ranges_overlap_p (HOST_WIDE_INT pos1,
  HOST_WIDE_INT pos2,
  unsigned HOST_WIDE_INT size2)
 {
+  if (size1 == 0 || size2 == 0)
+return false;
   if (pos1 >= pos2
   && (size2 == (unsigned HOST_WIDE_INT)-1
  || pos1 < (pos2 + (HOST_WIDE_INT) size2)))

[20/nn] Make tree-ssa-dse.c:normalize_ref return a bool

2017-10-23 Thread Richard Sandiford

This patch moves the check for an overlapping byte to normalize_ref
from its callers, so that it's easier to convert to poly_ints later.
It's not really worth it on its own.


2017-10-23  Richard Sandiford  

gcc/
* tree-ssa-dse.c (normalize_ref): Check whether the ranges overlap
and return false if not.
(clear_bytes_written_by, live_bytes_read): Update accordingly.

Index: gcc/tree-ssa-dse.c
===
--- gcc/tree-ssa-dse.c  2017-10-23 11:41:23.587123840 +0100
+++ gcc/tree-ssa-dse.c  2017-10-23 11:47:41.546155781 +0100
@@ -137,13 +137,11 @@ valid_ao_ref_for_dse (ao_ref *ref)
  && (ref->size != -1));
 }
 
-/* Normalize COPY (an ao_ref) relative to REF.  Essentially when we are
-   done COPY will only refer bytes found within REF.
+/* Try to normalize COPY (an ao_ref) relative to REF.  Essentially when we are
+   done COPY will only refer bytes found within REF.  Return true if COPY
+   is known to intersect at least one byte of REF.  */
 
-   We have already verified that COPY intersects at least one
-   byte with REF.  */
-
-static void
+static bool
 normalize_ref (ao_ref *copy, ao_ref *ref)
 {
   /* If COPY starts before REF, then reset the beginning of
@@ -151,13 +149,22 @@ normalize_ref (ao_ref *copy, ao_ref *ref
  number of bytes removed from COPY.  */
   if (copy->offset < ref->offset)
 {
-  copy->size -= (ref->offset - copy->offset);
+  HOST_WIDE_INT diff = ref->offset - copy->offset;
+  if (copy->size <= diff)
+   return false;
+  copy->size -= diff;
   copy->offset = ref->offset;
 }
 
+  HOST_WIDE_INT diff = copy->offset - ref->offset;
+  if (ref->size <= diff)
+return false;
+
   /* If COPY extends beyond REF, chop off its size appropriately.  */
-  if (copy->offset + copy->size > ref->offset + ref->size)
-copy->size -= (copy->offset + copy->size - (ref->offset + ref->size));
+  HOST_WIDE_INT limit = ref->size - diff;
+  if (copy->size > limit)
+copy->size = limit;
+  return true;
 }
 
 /* Clear any bytes written by STMT from the bitmap LIVE_BYTES.  The base
@@ -179,14 +186,10 @@ clear_bytes_written_by (sbitmap live_byt
   if (valid_ao_ref_for_dse (&write)
   && operand_equal_p (write.base, ref->base, OEP_ADDRESS_OF)
   && write.size == write.max_size
-  && ((write.offset < ref->offset
-  && write.offset + write.size > ref->offset)
- || (write.offset >= ref->offset
- && write.offset < ref->offset + ref->size)))
-{
-  normalize_ref (&write, ref);
-  bitmap_clear_range (live_bytes,
- (write.offset - ref->offset) / BITS_PER_UNIT,
+  && normalize_ref (&write, ref))
+{
+  HOST_WIDE_INT start = write.offset - ref->offset;
+  bitmap_clear_range (live_bytes, start / BITS_PER_UNIT,
  write.size / BITS_PER_UNIT);
 }
 }
@@ -480,21 +483,20 @@ live_bytes_read (ao_ref use_ref, ao_ref
 {
   /* We have already verified that USE_REF and REF hit the same object.
  Now verify that there's actually an overlap between USE_REF and REF.  */
-  if (ranges_overlap_p (use_ref.offset, use_ref.size, ref->offset, ref->size))
+  if (normalize_ref (&use_ref, ref))
 {
-  normalize_ref (&use_ref, ref);
+  HOST_WIDE_INT start = use_ref.offset - ref->offset;
+  HOST_WIDE_INT size = use_ref.size;
 
   /* If USE_REF covers all of REF, then it will hit one or more
 live bytes.   This avoids useless iteration over the bitmap
 below.  */
-  if (use_ref.offset <= ref->offset
- && use_ref.offset + use_ref.size >= ref->offset + ref->size)
+  if (start == 0 && size == ref->size)
return true;
 
   /* Now check if any of the remaining bits in use_ref are set in LIVE.  */
-  unsigned int start = (use_ref.offset - ref->offset) / BITS_PER_UNIT;
-  unsigned int end  = ((use_ref.offset + use_ref.size) / BITS_PER_UNIT) - 
1;
-  return bitmap_bit_in_range_p (live, start, end);
+  return bitmap_bit_in_range_p (live, start / BITS_PER_UNIT,
+   (start + size - 1) / BITS_PER_UNIT);
 }
   return true;
 }

[21/nn] Minor vn_reference_lookup_3 tweak

2017-10-23 Thread Richard Sandiford

The repeated checks for MEM_REF made this code hard to convert to
poly_ints as-is.  Hopefully the new structure also makes it clearer
at a glance what the two cases are.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* tree-ssa-sccvn.c (vn_reference_lookup_3): Avoid repeated
checks for MEM_REF.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c2017-10-23 11:47:03.852769480 +0100
+++ gcc/tree-ssa-sccvn.c2017-10-23 11:47:44.596155858 +0100
@@ -2234,6 +2234,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
  || offset % BITS_PER_UNIT != 0
  || ref->size % BITS_PER_UNIT != 0)
return (void *)-1;
+  at = offset / BITS_PER_UNIT;
 
   /* Extract a pointer base and an offset for the destination.  */
   lhs = gimple_call_arg (def_stmt, 0);
@@ -2301,19 +2302,18 @@ vn_reference_lookup_3 (ao_ref *ref, tree
   copy_size = tree_to_uhwi (gimple_call_arg (def_stmt, 2));
 
   /* The bases of the destination and the references have to agree.  */
-  if ((TREE_CODE (base) != MEM_REF
-  && !DECL_P (base))
- || (TREE_CODE (base) == MEM_REF
- && (TREE_OPERAND (base, 0) != lhs
- || !tree_fits_uhwi_p (TREE_OPERAND (base, 1
- || (DECL_P (base)
- && (TREE_CODE (lhs) != ADDR_EXPR
- || TREE_OPERAND (lhs, 0) != base)))
+  if (TREE_CODE (base) == MEM_REF)
+   {
+ if (TREE_OPERAND (base, 0) != lhs
+ || !tree_fits_uhwi_p (TREE_OPERAND (base, 1)))
+   return (void *) -1;
+ at += tree_to_uhwi (TREE_OPERAND (base, 1));
+   }
+  else if (!DECL_P (base)
+  || TREE_CODE (lhs) != ADDR_EXPR
+  || TREE_OPERAND (lhs, 0) != base)
return (void *)-1;
 
-  at = offset / BITS_PER_UNIT;
-  if (TREE_CODE (base) == MEM_REF)
-   at += tree_to_uhwi (TREE_OPERAND (base, 1));
   /* If the access is completely outside of the memcpy destination
 area there is no aliasing.  */
   if (lhs_offset >= at + maxsize / BITS_PER_UNIT

[22/nn] Make dse.c use offset/width instead of start/end

2017-10-23 Thread Richard Sandiford

store_info and read_info_type in dse.c represented the ranges as
start/end, but a lot of the internal code used offset/width instead.
Using offset/width throughout fits better with the poly_int.h
range-checking functions.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* dse.c (store_info, read_info_type): Replace begin and end with
offset and width.
(print_range): New function.
(set_all_positions_unneeded, any_positions_needed_p)
(check_mem_read_rtx, scan_stores, scan_reads, dse_step5): Update
accordingly.
(record_store): Likewise.  Optimize the case in which all positions
are unneeded.
(get_stored_val): Replace read_begin and read_end with read_offset
and read_width.
(replace_read): Update call accordingly.

Index: gcc/dse.c
===
--- gcc/dse.c   2017-10-23 11:47:11.273428262 +0100
+++ gcc/dse.c   2017-10-23 11:47:48.294155952 +0100
@@ -243,9 +243,12 @@ struct store_info
   /* Canonized MEM address for use by canon_true_dependence.  */
   rtx mem_addr;
 
-  /* The offset of the first and byte before the last byte associated
- with the operation.  */
-  HOST_WIDE_INT begin, end;
+  /* The offset of the first byte associated with the operation.  */
+  HOST_WIDE_INT offset;
+
+  /* The number of bytes covered by the operation.  This is always exact
+ and known (rather than -1).  */
+  HOST_WIDE_INT width;
 
   union
 {
@@ -261,7 +264,7 @@ struct store_info
  bitmap bmap;
 
  /* Number of set bits (i.e. unneeded bytes) in BITMAP.  If it is
-equal to END - BEGIN, the whole store is unused.  */
+equal to WIDTH, the whole store is unused.  */
  int count;
} large;
 } positions_needed;
@@ -304,10 +307,11 @@ struct read_info_type
   /* The id of the mem group of the base address.  */
   int group_id;
 
-  /* The offset of the first and byte after the last byte associated
- with the operation.  If begin == end == 0, the read did not have
- a constant offset.  */
-  int begin, end;
+  /* The offset of the first byte associated with the operation.  */
+  HOST_WIDE_INT offset;
+
+  /* The number of bytes covered by the operation, or -1 if not known.  */
+  HOST_WIDE_INT width;
 
   /* The mem being read.  */
   rtx mem;
@@ -586,6 +590,18 @@ static deferred_change *deferred_change_
 
 /* The number of bits used in the global bitmaps.  */
 static unsigned int current_position;
+
+/* Print offset range [OFFSET, OFFSET + WIDTH) to FILE.  */
+
+static void
+print_range (FILE *file, poly_int64 offset, poly_int64 width)
+{
+  fprintf (file, "[");
+  print_dec (offset, file, SIGNED);
+  fprintf (file, "..");
+  print_dec (offset + width, file, SIGNED);
+  fprintf (file, ")");
+}
 
 /*
Zeroth step.
@@ -1212,10 +1228,9 @@ set_all_positions_unneeded (store_info *
 {
   if (__builtin_expect (s_info->is_large, false))
 {
-  int pos, end = s_info->end - s_info->begin;
-  for (pos = 0; pos < end; pos++)
-   bitmap_set_bit (s_info->positions_needed.large.bmap, pos);
-  s_info->positions_needed.large.count = end;
+  bitmap_set_range (s_info->positions_needed.large.bmap,
+   0, s_info->width);
+  s_info->positions_needed.large.count = s_info->width;
 }
   else
 s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
@@ -1227,8 +1242,7 @@ set_all_positions_unneeded (store_info *
 any_positions_needed_p (store_info *s_info)
 {
   if (__builtin_expect (s_info->is_large, false))
-return (s_info->positions_needed.large.count
-   < s_info->end - s_info->begin);
+return s_info->positions_needed.large.count < s_info->width;
   else
 return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
 }
@@ -1355,8 +1369,12 @@ record_store (rtx body, bb_info_t bb_inf
   set_usage_bits (group, offset, width, expr);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, " processing const base store gid=%d[%d..%d)\n",
-group_id, (int)offset, (int)(offset+width));
+   {
+ fprintf (dump_file, " processing const base store gid=%d",
+  group_id);
+ print_range (dump_file, offset, width);
+ fprintf (dump_file, "\n");
+   }
 }
   else
 {
@@ -1368,8 +1386,11 @@ record_store (rtx body, bb_info_t bb_inf
   group_id = -1;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, " processing cselib store [%d..%d)\n",
-(int)offset, (int)(offset+width));
+   {
+ fprintf (dump_file, " processing cselib store ");
+ print_range (dump_file, offset, width);
+ fprintf (dump_file, "\n");
+   }
 }
 
   const_rhs = rhs = NULL_RTX;
@@ -1435,18 +1456,21 @@ re

[PATCH] i386: Skip DF_REF_INSN if DF_REF_INSN_INFO is false

2017-10-23 Thread H.J. Lu

We should check DF_REF_INSN_INFO before accessing DF_REF_INSN.

OK for trunk?

H.J.
---
gcc/

PR target/82673
* config/i386/i386.c (ix86_finalize_stack_frame_flags): Skip
DF_REF_INSN if DF_REF_INSN_INFO is false.

gcc/testsuite/

PR target/82673
* gcc.target/i386/pr82673.c: New test.
---
 gcc/config/i386/i386.c  |  7 +--
 gcc/testsuite/gcc.target/i386/pr82673.c | 12 
 2 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr82673.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b86504378ae..25c898866e2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12694,10 +12694,13 @@ ix86_finalize_stack_frame_flags (void)
  for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
   ref; ref = next)
{
- rtx_insn *insn = DF_REF_INSN (ref);
+ next = DF_REF_NEXT_REG (ref);
+ if (!DF_REF_INSN_INFO (ref))
+   continue;
+
  /* Make sure the next ref is for a different instruction,
 so that we're not affected by the rescan.  */
- next = DF_REF_NEXT_REG (ref);
+ rtx_insn *insn = DF_REF_INSN (ref);
  while (next && DF_REF_INSN (next) == insn)
next = DF_REF_NEXT_REG (next);
 
diff --git a/gcc/testsuite/gcc.target/i386/pr82673.c 
b/gcc/testsuite/gcc.target/i386/pr82673.c
new file mode 100644
index 000..cff4b34535b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82673.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fno-omit-frame-pointer 
-fvar-tracking-assignments-toggle" } */
+
+register long *B asm ("ebp");
+
+long y = 20;
+
+void
+bar (void) /* { dg-error "frame pointer required, but reserved" } */
+{
+  B = &y;
+} /* { dg-error "bp cannot be used in asm here" } */
-- 
2.13.6

[PATCH] Implement C++17 Filesystem library

2017-10-23 Thread Jonathan Wakely


Based on Filesystem TS implementation, with the changes applied by:

- P0219R1 Relative Paths for Filesystem
- P0317R1 Directory Entry Caching for Filesystem
- P0492R2 Resolution of C++17 National Body Comments

Where appropriate code is shared between the TS and C++17
implementations.

* include/Makefile.am: Add new headers for C++17 filesystem library.
* include/Makefile.in: Regenerate.
* include/bits/fs_dir.h: New header, based on Filesystem TS code in
include/experimental/bits directory.
* include/bits/fs_fwd.h: Likewise.
* include/bits/fs_ops.h: Likewise.
* include/bits/fs_path.h: Likewise.
* include/experimental/bits/fs_dir.h: Rename Doxygen group.
* include/experimental/bits/fs_fwd.h: Likewise.
* include/experimental/bits/fs_ops.h: Likewise.
* include/experimental/bits/fs_path.h: Likewise.
* include/experimental/filesystem (filesystem_error::_M_gen_what):
Remove inline definition.
* include/precompiled/stdc++.h: Add  to precompiled
header.
* include/std/filesystem: New header.
* python/libstdcxx/v6/printers.py: Enable printer for std::filesystem
paths.
* src/filesystem/Makefile.am: Add new files. Compile as C++17.
* src/filesystem/Makefile.in: Regenerate.
* src/filesystem/cow-dir.cc: Update comment.
* src/filesystem/cow-ops.cc: Likewise.
* src/filesystem/cow-path.cc: Likewise.
* src/filesystem/cow-std-dir.cc: New file.
* src/filesystem/cow-std-ops.cc: New file.
* src/filesystem/cow-std-path.cc: New file.
* src/filesystem/dir-common.h (_Dir_base, get_file_type): New header
for common code.
* src/filesystem/dir.cc (_Dir): Derive from _Dir_base.
(open_dir): Move to _Dir_base constructor.
(get_file_type): Move to dir-common.h.
(recurse): Move to _Dir_base::should_recurse.
* src/filesystem/ops-common.h: New header for common code.
* src/filesystem/ops.cc (is_set, make_file_type, make_file_status)
(is_not_found_errno, file_time, do_copy_file): Move to ops-common.h.
* src/filesystem/path.cc (filesystem_error::_M_gen_what): Define.
* src/filesystem/std-dir.cc: New file, based on Filesystem TS code.
* src/filesystem/std-ops.cc: Likewise.
* src/filesystem/std-dir.cc: Likewise.
* testsuite/27_io/filesystem/iterators/directory_iterator.cc: New
test.
* testsuite/27_io/filesystem/iterators/pop.cc: New test.
* testsuite/27_io/filesystem/iterators/recursive_directory_iterator.cc:
New test.
* testsuite/27_io/filesystem/operations/absolute.cc: New test.
* testsuite/27_io/filesystem/operations/canonical.cc: New test.
* testsuite/27_io/filesystem/operations/copy.cc: New test.
* testsuite/27_io/filesystem/operations/copy_file.cc: New test.
* testsuite/27_io/filesystem/operations/create_directories.cc: New
test.
* testsuite/27_io/filesystem/operations/create_directory.cc: New test.
* testsuite/27_io/filesystem/operations/create_symlink.cc: New test.
* testsuite/27_io/filesystem/operations/current_path.cc: New test.
* testsuite/27_io/filesystem/operations/equivalent.cc: New test.
* testsuite/27_io/filesystem/operations/exists.cc: New test.
* testsuite/27_io/filesystem/operations/file_size.cc: New test.
* testsuite/27_io/filesystem/operations/is_empty.cc: New test.
* testsuite/27_io/filesystem/operations/last_write_time.cc: New test.
* testsuite/27_io/filesystem/operations/permissions.cc: New test.
* testsuite/27_io/filesystem/operations/proximate.cc: New test.
* testsuite/27_io/filesystem/operations/read_symlink.cc: New test.
* testsuite/27_io/filesystem/operations/relative.cc: New test.
* testsuite/27_io/filesystem/operations/remove_all.cc: New test.
* testsuite/27_io/filesystem/operations/space.cc: New test.
* testsuite/27_io/filesystem/operations/status.cc: New test.
* testsuite/27_io/filesystem/operations/symlink_status.cc: New test.
* testsuite/27_io/filesystem/operations/temp_directory_path.cc: New
test.
* testsuite/27_io/filesystem/operations/weakly_canonical.cc: New test.
* testsuite/27_io/filesystem/path/append/path.cc: New test.
* testsuite/27_io/filesystem/path/assign/assign.cc: New test.
* testsuite/27_io/filesystem/path/assign/copy.cc: New test.
* testsuite/27_io/filesystem/path/compare/compare.cc: New test.
* testsuite/27_io/filesystem/path/compare/path.cc: New test.
* testsuite/27_io/filesystem/path/compare/strings.cc: New test.
* testsuite/27_io/filesystem/path/concat/path.cc: New test.
* testsuite/27_io/filesystem/path/concat/strings.cc: New test.
* testsuite/27_io/filesystem/path/construct/c

[Patch][ARM] Add -mbranch-cost option, and update a few tests

2017-10-23 Thread Christophe Lyon

Hi,

After Jakub's suggestion in PR82120 and PR81184, the attached patch
adds the -mbranch-cost option to the ARM target. My understanding
is that it's intended to be used internally for testing and does not
require user-facing documentation.

I have updated a few tests, validation on aarch64 & arm targets shows
no regression,
and a few improvements when targeting cortex-a5 or cortex-m3:
gcc.dg/tree-ssa/reassoc-3[3456].c now pass.

That being said, I'm not sure about the other targets for which I
changed the condition,
and I am also concerned by the fact that it has no impact on
gcc.dg/pr21643.c and gcc.dg/tree-ssa/phi-opt-11.c (PR81184).

Should I restrict my patch to the only tests where it has an impact
(gcc.dg/tree-ssa/reassoc-3[3456].c) ?

Thanks,

Christophe
gcc/ChangeLog:

2017-10-23  Christophe Lyon  

* config/arm/arm.opt (-mbranch-cost): New option.
* config/arm/arm.h (BRANCH_COST): Take arm_branch_cost into
account.

gcc/testsuite/ChangeLog:

2017-10-23  Christophe Lyon  

* lib/target-supports.exp (check_effective_target_branch_cost):
New function.
* gcc.dg/builtin-bswap-7.c: Use branch_cost effective target.
* gcc.dg/pr21643.c: Likewise.
* gcc.dg/pr46309.c: Likewise.
* gcc.dg/tree-ssa/phi-opt-11.c: Likewise.
* gcc.dg/tree-ssa/phi-opt-2.c: Likewise.
* gcc.dg/tree-ssa/reassoc-32.c: Likewise.
* gcc.dg/tree-ssa/reassoc-33.c: Likewise.
* gcc.dg/tree-ssa/reassoc-34.c: Likewise.
* gcc.dg/tree-ssa/reassoc-35.c: Likewise.
* gcc.dg/tree-ssa/reassoc-36.c: Likewise.
* gcc.dg/tree-ssa/ssa-ifcombine-13.c: Likewise.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-1.c: Likewise.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-2.c: Likewise.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-3.c: Likewise.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-4.c: Likewise.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-5.c: Likewise.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-6.c: Likewise.

commit 0b21f80bf10d0273e6db4655654df9e125c0dae6
Author: Christophe Lyon 
Date:   Fri Sep 8 12:27:42 2017 +

Add -mbranch-cost=N option for ARM.

Change-Id: I4d570646c405f7b186d0d1be80ce1661ef022aea

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 2d71e8f..854c753 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1946,8 +1946,9 @@ enum arm_auto_incmodes
 
 /* Try to generate sequences that don't involve branches, we can then use
conditional instructions.  */
-#define BRANCH_COST(speed_p, predictable_p) \
-  (current_tune->branch_cost (speed_p, predictable_p))
+#define BRANCH_COST(speed_p, predictable_p)\
+  ((arm_branch_cost != -1) ? arm_branch_cost : \
+   (current_tune->branch_cost (speed_p, predictable_p)))
 
 /* False if short circuit operation is preferred.  */
 #define LOGICAL_OP_NON_SHORT_CIRCUIT   \
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 6060516..a3719cb 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -294,3 +294,7 @@ When linking for big-endian targets, generate a BE8 format 
image.
 mbe32
 Target Report RejectNegative Negative(mbe8) InverseMask(BE8)
 When linking for big-endian targets, generate a legacy BE32 format image.
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(arm_branch_cost) Init(-1)
+Cost to assume for a branch insn.
diff --git a/gcc/testsuite/gcc.dg/builtin-bswap-7.c 
b/gcc/testsuite/gcc.dg/builtin-bswap-7.c
index 3e1718d..fe85441 100644
--- a/gcc/testsuite/gcc.dg/builtin-bswap-7.c
+++ b/gcc/testsuite/gcc.dg/builtin-bswap-7.c
@@ -5,7 +5,7 @@
 
 /* The branch cost setting prevents the return value from being
calculated with arithmetic instead of doing a compare.  */
-/* { dg-additional-options "-mbranch-cost=0" { target s390x-*-* } } */
+/* { dg-additional-options "-mbranch-cost=0" { target branch_cost } } */
 
 #include 
 
diff --git a/gcc/testsuite/gcc.dg/pr21643.c b/gcc/testsuite/gcc.dg/pr21643.c
index bd76aa8..d981fbc 100644
--- a/gcc/testsuite/gcc.dg/pr21643.c
+++ b/gcc/testsuite/gcc.dg/pr21643.c
@@ -1,6 +1,7 @@
 /* PR tree-optimization/21643 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-reassoc1-details" } */
+/* { dg-additional-options "-mbranch-cost=2" { target branch_cost } } */
 
 int
 f1 (unsigned char c)
diff --git a/gcc/testsuite/gcc.dg/pr46309.c b/gcc/testsuite/gcc.dg/pr46309.c
index 68229cf..c964529 100644
--- a/gcc/testsuite/gcc.dg/pr46309.c
+++ b/gcc/testsuite/gcc.dg/pr46309.c
@@ -4,7 +4,7 @@
 /* The transformation depends on BRANCH_COST being greater than 1
(see the notes in the PR), so try to force that.  */
 /* { dg-additional-options "-mtune=octeon2" { target mips*-*-* } } */
-/* { dg-additional-options "-mbranch-cost=2" { target avr*-*-* s390*-*-* 
i?86-*-* x86_64-*-* } } */
+/* { dg-additional-options "-mbranch-cost=2" { target branch_cost } } */
 
 int
 f1 (int a)
diff --gi

[PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread David Malcolm

On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote:
> On Thu, 19 Oct 2017, David Malcolm wrote:
> > > In file included from /scratch/tmp/gerald/gcc-HEAD/gcc/unique-
> > > ptr-tests.cc:23:
> > > In file included from /scratch/tmp/gerald/gcc-
> > > HEAD/gcc/../include/unique-ptr.h:77:
> > > In file included from /usr/include/c++/v1/memory:629:
> > > /usr/include/c++/v1/typeinfo:199:2: error: no member named
> > > 'fancy_abort' in namespace 'std::__1'; did you mean simply
> > > 'fancy_abort'?
> > > _VSTD::abort();
> > > ^~~
> > > /usr/include/c++/v1/__config:390:15: note: expanded from macro
> > > '_VSTD'
> > > #define _VSTD std::_LIBCPP_NAMESPACE
> >
> > There seem to have been similar problems on OS X:
> >   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610
>
> Yes, I believe it's the same actually (unearthed by clang as system
> compiler).
>
> > The proposed fix there is to include  in system.h, which
> > presumably would fix this also.
>
> That appears to work around the bootstrap failure on my tester as
> well.
>
> How can we go about fixing this in the tree?
>
> Gerald

Here's the patch by fxcoudert from the PR (plus a ChangeLog entry)

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu;
reported by fxcoudert as fixing the issue on darwin and by
Gerald as fixing the issue on "newer versions of FreeBSD that use
clang 4.0 as system compiler".

OK for trunk?

Sorry again about the breakage.

gcc/ChangeLog:
PR bootstrap/82610
* system.h [__cplusplus]: Include .
---
 gcc/system.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/system.h b/gcc/system.h
index f0664e9..d6e1637 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -233,6 +233,7 @@ extern int errno;
 # include 
 #endif
 # include 
+# include 
 # include 
 # include 
 #endif
-- 
1.8.5.3

Re: [PING] [C++ Patch] PR 82307

2017-10-23 Thread Paolo Carlini


Hi,

following up to a short off-line exchange with Nathan, I'm sending a 
reworked patch which - among other things - avoids regressing on the 
second testcase (cpp0x/enum36.C). Tested x86_64-linux.


Thanks,
Paolo.


/cp
2017-10-23  Mukesh Kapoor  
Paolo Carlini  

PR c++/82307
* cvt.c (type_promotes_to): Implement C++17, 7.6/4, about unscoped
enumeration type whose underlying type is fixed.

/testsuite
2017-10-23  Mukesh Kapoor  
Paolo Carlini  

PR c++/82307
* g++.dg/cpp0x/enum35.C: New.
* g++.dg/cpp0x/enum36.C: Likewise.
Index: cp/cvt.c
===
--- cp/cvt.c(revision 254005)
+++ cp/cvt.c(working copy)
@@ -1834,12 +1834,27 @@ type_promotes_to (tree type)
   || type == char32_type_node
   || type == wchar_type_node)
 {
+  tree prom = type;
+
+  if (TREE_CODE (type) == ENUMERAL_TYPE)
+   {
+ prom = ENUM_UNDERLYING_TYPE (prom);
+ if (!ENUM_IS_SCOPED (type)
+ && ENUM_FIXED_UNDERLYING_TYPE_P (type))
+   {
+ /* ISO C++17, 7.6/4.  A prvalue of an unscoped enumeration type
+whose underlying type is fixed (10.2) can be converted to a
+prvalue of its underlying type. Moreover, if integral promotion
+can be applied to its underlying type, a prvalue of an unscoped
+enumeration type whose underlying type is fixed can also be 
+converted to a prvalue of the promoted underlying type.  */
+ return type_promotes_to (prom);
+   }
+   }
+
   int precision = MAX (TYPE_PRECISION (type),
   TYPE_PRECISION (integer_type_node));
   tree totype = c_common_type_for_size (precision, 0);
-  tree prom = type;
-  if (TREE_CODE (prom) == ENUMERAL_TYPE)
-   prom = ENUM_UNDERLYING_TYPE (prom);
   if (TYPE_UNSIGNED (prom)
  && ! int_fits_type_p (TYPE_MAX_VALUE (prom), totype))
prom = c_common_type_for_size (precision, 1);
Index: testsuite/g++.dg/cpp0x/enum35.C
===
--- testsuite/g++.dg/cpp0x/enum35.C (nonexistent)
+++ testsuite/g++.dg/cpp0x/enum35.C (working copy)
@@ -0,0 +1,14 @@
+// PR c++/82307
+// { dg-do run { target c++11 } }
+
+#include 
+
+enum : unsigned long long { VAL };
+
+bool foo (unsigned long long) { return true; }
+bool foo (int) { return false; }
+
+int main()
+{
+  assert (foo(VAL));
+}
Index: testsuite/g++.dg/cpp0x/enum36.C
===
--- testsuite/g++.dg/cpp0x/enum36.C (nonexistent)
+++ testsuite/g++.dg/cpp0x/enum36.C (working copy)
@@ -0,0 +1,14 @@
+// PR c++/82307
+// { dg-do run { target c++11 } }
+
+#include 
+
+enum : short { VAL };
+
+bool foo (int) { return true; }
+bool foo (unsigned long long) { return false; }
+
+int main()
+{
+  assert (foo (VAL));
+}

Re: [PATCH] Make -gcolumn-info the default

2017-10-23 Thread Jason Merrill

On Mon, Oct 23, 2017 at 3:33 AM, Jakub Jelinek  wrote:
> Hi!
>
> When -gcolumn-info was added back in February, it was too late in the
> release cycle to make it the default, but I think now is the good time
> to do it for GCC8.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Makes sense to me.

[PATCH] FIx PR82672

2017-10-23 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-10-23  Richard Biener  

PR tree-optimization/82672
* graphite-isl-ast-to-gimple.c (graphite_copy_stmts_from_block):
Fold the stmt if we propagated into it.

* gfortran.dg/graphite/pr82672.f90: New testcase.

Index: gcc/graphite-isl-ast-to-gimple.c
===
--- gcc/graphite-isl-ast-to-gimple.c(revision 253998)
+++ gcc/graphite-isl-ast-to-gimple.c(working copy)
@@ -1175,22 +1194,28 @@ graphite_copy_stmts_from_block (basic_bl
   ssa_op_iter iter;
   use_operand_p use_p;
   if (!is_gimple_debug (copy))
-   FOR_EACH_SSA_USE_OPERAND (use_p, copy, iter, SSA_OP_USE)
- {
-   tree old_name = USE_FROM_PTR (use_p);
-
-   if (TREE_CODE (old_name) != SSA_NAME
-   || SSA_NAME_IS_DEFAULT_DEF (old_name)
-   || ! scev_analyzable_p (old_name, region->region))
- continue;
-
-   gimple_seq stmts = NULL;
-   tree new_name = get_rename_from_scev (old_name, &stmts,
- bb->loop_father, iv_map);
-   if (! codegen_error_p ())
- gsi_insert_earliest (stmts);
-   replace_exp (use_p, new_name);
- }
+   {
+ bool changed = false;
+ FOR_EACH_SSA_USE_OPERAND (use_p, copy, iter, SSA_OP_USE)
+   {
+ tree old_name = USE_FROM_PTR (use_p);
+
+ if (TREE_CODE (old_name) != SSA_NAME
+ || SSA_NAME_IS_DEFAULT_DEF (old_name)
+ || ! scev_analyzable_p (old_name, region->region))
+   continue;
+
+ gimple_seq stmts = NULL;
+ tree new_name = get_rename_from_scev (old_name, &stmts,
+   bb->loop_father, iv_map);
+ if (! codegen_error_p ())
+   gsi_insert_earliest (stmts);
+ replace_exp (use_p, new_name);
+ changed = true;
+   }
+ if (changed)
+   fold_stmt_inplace (&gsi_tgt);
+   }
 
   update_stmt (copy);
 }
Index: gcc/testsuite/gfortran.dg/graphite/pr82672.f90
===
--- gcc/testsuite/gfortran.dg/graphite/pr82672.f90  (nonexistent)
+++ gcc/testsuite/gfortran.dg/graphite/pr82672.f90  (working copy)
@@ -0,0 +1,33 @@
+! { dg-do compile }
+! { dg-options "-O2 -floop-nest-optimize" }
+
+  character(len=20,kind=4) :: s4
+  character(len=20,kind=1) :: s1
+
+  s1 = "foo\u"
+  s1 = "foo\u00ff"
+  s1 = "foo\u0100"
+  s1 = "foo\u0101"
+  s1 = "foo\U0101"
+
+  s1 = 4_"foo bar"
+  s1 = 4_"foo\u00ff"
+  s1 = 4_"foo\u0101"
+  s1 = 4_"foo\u1101"
+  s1 = 4_"foo\U"
+
+  s4 = "foo\u"
+  s4 = "foo\u00ff"
+  s4 = "foo\u0100"
+  s4 = "foo\U0100"
+
+  s4 = 4_"foo bar"
+  s4 = 4_"\xFF\x96"
+  s4 = 4_"\x00\x96"
+  s4 = 4_"foo\u00ff"
+  s4 = 4_"foo\u0101"
+  s4 = 4_"foo\u1101"
+  s4 = 4_"foo\Uab98EF56"
+  s4 = 4_"foo\U"
+
+end

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Richard Biener

On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm  wrote:
> On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote:
>> On Thu, 19 Oct 2017, David Malcolm wrote:
>> > > In file included from /scratch/tmp/gerald/gcc-HEAD/gcc/unique-
>> > > ptr-tests.cc:23:
>> > > In file included from /scratch/tmp/gerald/gcc-
>> > > HEAD/gcc/../include/unique-ptr.h:77:
>> > > In file included from /usr/include/c++/v1/memory:629:
>> > > /usr/include/c++/v1/typeinfo:199:2: error: no member named
>> > > 'fancy_abort' in namespace 'std::__1'; did you mean simply
>> > > 'fancy_abort'?
>> > > _VSTD::abort();
>> > > ^~~
>> > > /usr/include/c++/v1/__config:390:15: note: expanded from macro
>> > > '_VSTD'
>> > > #define _VSTD std::_LIBCPP_NAMESPACE
>> >
>> > There seem to have been similar problems on OS X:
>> >   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610
>>
>> Yes, I believe it's the same actually (unearthed by clang as system
>> compiler).
>>
>> > The proposed fix there is to include  in system.h, which
>> > presumably would fix this also.
>>
>> That appears to work around the bootstrap failure on my tester as
>> well.
>>
>> How can we go about fixing this in the tree?
>>
>> Gerald
>
> Here's the patch by fxcoudert from the PR (plus a ChangeLog entry)
>
> Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu;
> reported by fxcoudert as fixing the issue on darwin and by
> Gerald as fixing the issue on "newer versions of FreeBSD that use
> clang 4.0 as system compiler".
>
> OK for trunk?

Not entirely happy as unique-ptr.h doesn't use  but well.

Ok to unbreak bootstrap.

Thanks,
Richard.

> Sorry again about the breakage.
>
> gcc/ChangeLog:
> PR bootstrap/82610
> * system.h [__cplusplus]: Include .
> ---
>  gcc/system.h | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/gcc/system.h b/gcc/system.h
> index f0664e9..d6e1637 100644
> --- a/gcc/system.h
> +++ b/gcc/system.h
> @@ -233,6 +233,7 @@ extern int errno;
>  # include 
>  #endif
>  # include 
> +# include 
>  # include 
>  # include 
>  #endif
> --
> 1.8.5.3
>

Re: [PATCH] Make -gcolumn-info the default

2017-10-23 Thread Pedro Alves

On 10/23/2017 02:46 PM, Jason Merrill wrote:
> On Mon, Oct 23, 2017 at 3:33 AM, Jakub Jelinek  wrote:
>> Hi!
>>
>> When -gcolumn-info was added back in February, it was too late in the
>> release cycle to make it the default, but I think now is the good time
>> to do it for GCC8.
>>
>> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Makes sense to me.

+1 from me, FWIW.

Thanks,
Pedro Alves

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Pedro Alves

On 10/23/2017 02:51 PM, Richard Biener wrote:
> On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm  wrote:

>> OK for trunk?
> 
> Not entirely happy as unique-ptr.h doesn't use  but well.
> 

Actually it does.  It's needed in C++11 mode, because that's
where std::unique_ptr is defined:

#if __cplusplus >= 201103

/* In C++11 mode, all we need is import the standard
   std::unique_ptr.  */
template using unique_ptr = std::unique_ptr;

> Ok to unbreak bootstrap.

Thanks,
Pedro Alves

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread David Malcolm

On Mon, 2017-10-23 at 15:51 +0200, Richard Biener wrote:
> On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm 
> wrote:
> > On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote:
> > > On Thu, 19 Oct 2017, David Malcolm wrote:
> > > > > In file included from /scratch/tmp/gerald/gcc-
> > > > > HEAD/gcc/unique-
> > > > > ptr-tests.cc:23:
> > > > > In file included from /scratch/tmp/gerald/gcc-
> > > > > HEAD/gcc/../include/unique-ptr.h:77:
> > > > > In file included from /usr/include/c++/v1/memory:629:
> > > > > /usr/include/c++/v1/typeinfo:199:2: error: no member named
> > > > > 'fancy_abort' in namespace 'std::__1'; did you mean simply
> > > > > 'fancy_abort'?
> > > > > _VSTD::abort();
> > > > > ^~~
> > > > > /usr/include/c++/v1/__config:390:15: note: expanded from
> > > > > macro
> > > > > '_VSTD'
> > > > > #define _VSTD std::_LIBCPP_NAMESPACE
> > > > 
> > > > There seem to have been similar problems on OS X:
> > > >   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610
> > > 
> > > Yes, I believe it's the same actually (unearthed by clang as
> > > system
> > > compiler).
> > > 
> > > > The proposed fix there is to include  in system.h,
> > > > which
> > > > presumably would fix this also.
> > > 
> > > That appears to work around the bootstrap failure on my tester as
> > > well.
> > > 
> > > How can we go about fixing this in the tree?
> > > 
> > > Gerald
> > 
> > Here's the patch by fxcoudert from the PR (plus a ChangeLog entry)
> > 
> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu;
> > reported by fxcoudert as fixing the issue on darwin and by
> > Gerald as fixing the issue on "newer versions of FreeBSD that use
> > clang 4.0 as system compiler".
> > 
> > OK for trunk?
> 
> Not entirely happy as unique-ptr.h doesn't use  but well.

I'm not sure I understand you here.

include/unique-ptr.h has:

  #if __cplusplus >= 201103

  /* In C++11 mode, all we need is import the standard
 std::unique_ptr.  */
  template using unique_ptr = std::unique_ptr;

  /* Pull in move as well.  */
  using std::move;

  #else /* C++11 */

  ...etc..., most of the file, the pre-C++11 implementation

So in C++11 and later it's using std::unique_ptr, for which, as I
understand it  is the standard include, e.g.: 
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3337.pdf
says in (20.6.2 Header  synopsis [memory.syn]):

"The header  defines several types and function templates that
describe properties of pointers and pointer-like types, manage memory
for containers and other template types, [...] The header also defines
the templates unique_ptr, shared_ptr, weak_ptr, and various template
functions that operate on objects of these types (20.7)."

Would you prefer the includes of  in gcc/system.h and
include/unique-ptr.h to be guarded by #if __cplusplus >= 201103 ? (not
sure if it works yet, but I can try it)

Thanks
Dave


> Ok to unbreak bootstrap.
> 
> Thanks,
> Richard.
> 
> > Sorry again about the breakage.
> > 
> > gcc/ChangeLog:
> > PR bootstrap/82610
> > * system.h [__cplusplus]: Include .
> > ---
> >  gcc/system.h | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/gcc/system.h b/gcc/system.h
> > index f0664e9..d6e1637 100644
> > --- a/gcc/system.h
> > +++ b/gcc/system.h
> > @@ -233,6 +233,7 @@ extern int errno;
> >  # include 
> >  #endif
> >  # include 
> > +# include 
> >  # include 
> >  # include 
> >  #endif
> > --
> > 1.8.5.3
> >

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Richard Biener

On October 23, 2017 4:15:17 PM GMT+02:00, David Malcolm  
wrote:
>On Mon, 2017-10-23 at 15:51 +0200, Richard Biener wrote:
>> On Mon, Oct 23, 2017 at 2:58 PM, David Malcolm 
>> wrote:
>> > On Sun, 2017-10-22 at 09:28 +0200, Gerald Pfeifer wrote:
>> > > On Thu, 19 Oct 2017, David Malcolm wrote:
>> > > > > In file included from /scratch/tmp/gerald/gcc-
>> > > > > HEAD/gcc/unique-
>> > > > > ptr-tests.cc:23:
>> > > > > In file included from /scratch/tmp/gerald/gcc-
>> > > > > HEAD/gcc/../include/unique-ptr.h:77:
>> > > > > In file included from /usr/include/c++/v1/memory:629:
>> > > > > /usr/include/c++/v1/typeinfo:199:2: error: no member named
>> > > > > 'fancy_abort' in namespace 'std::__1'; did you mean simply
>> > > > > 'fancy_abort'?
>> > > > > _VSTD::abort();
>> > > > > ^~~
>> > > > > /usr/include/c++/v1/__config:390:15: note: expanded from
>> > > > > macro
>> > > > > '_VSTD'
>> > > > > #define _VSTD std::_LIBCPP_NAMESPACE
>> > > > 
>> > > > There seem to have been similar problems on OS X:
>> > > >   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82610
>> > > 
>> > > Yes, I believe it's the same actually (unearthed by clang as
>> > > system
>> > > compiler).
>> > > 
>> > > > The proposed fix there is to include  in system.h,
>> > > > which
>> > > > presumably would fix this also.
>> > > 
>> > > That appears to work around the bootstrap failure on my tester as
>> > > well.
>> > > 
>> > > How can we go about fixing this in the tree?
>> > > 
>> > > Gerald
>> > 
>> > Here's the patch by fxcoudert from the PR (plus a ChangeLog entry)
>> > 
>> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu;
>> > reported by fxcoudert as fixing the issue on darwin and by
>> > Gerald as fixing the issue on "newer versions of FreeBSD that use
>> > clang 4.0 as system compiler".
>> > 
>> > OK for trunk?
>> 
>> Not entirely happy as unique-ptr.h doesn't use  but well.
>
>I'm not sure I understand you here.
>
>include/unique-ptr.h has:
>
>  #if __cplusplus >= 201103
>
>  /* In C++11 mode, all we need is import the standard
> std::unique_ptr.  */
>  template using unique_ptr = std::unique_ptr;
>
>  /* Pull in move as well.  */
>  using std::move;
>
>  #else /* C++11 */
>
>  ...etc..., most of the file, the pre-C++11 implementation
>
>So in C++11 and later it's using std::unique_ptr, for which, as I
>understand it  is the standard include, e.g.: 
>http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3337.pdf
>says in (20.6.2 Header  synopsis [memory.syn]):
>
>"The header  defines several types and function templates that
>describe properties of pointers and pointer-like types, manage memory
>for containers and other template types, [...] The header also defines
>the templates unique_ptr, shared_ptr, weak_ptr, and various template
>functions that operate on objects of these types (20.7)."
>
>Would you prefer the includes of  in gcc/system.h and
>include/unique-ptr.h to be guarded by #if __cplusplus >= 201103 ? (not
>sure if it works yet, but I can try it)

I guess so. But we have to make gdb happy as well. It really depends how much 
each TU grows with the extra (unneeded) include grows in C++11 and C++04 mode. 

Richard. 

>Thanks
>Dave
>
>
>> Ok to unbreak bootstrap.
>> 
>> Thanks,
>> Richard.
>> 
>> > Sorry again about the breakage.
>> > 
>> > gcc/ChangeLog:
>> > PR bootstrap/82610
>> > * system.h [__cplusplus]: Include .
>> > ---
>> >  gcc/system.h | 1 +
>> >  1 file changed, 1 insertion(+)
>> > 
>> > diff --git a/gcc/system.h b/gcc/system.h
>> > index f0664e9..d6e1637 100644
>> > --- a/gcc/system.h
>> > +++ b/gcc/system.h
>> > @@ -233,6 +233,7 @@ extern int errno;
>> >  # include 
>> >  #endif
>> >  # include 
>> > +# include 
>> >  # include 
>> >  # include 
>> >  #endif
>> > --
>> > 1.8.5.3
>> >

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Michael Matz

Hi,

On Mon, 23 Oct 2017, Richard Biener wrote:

> I guess so. But we have to make gdb happy as well. It really depends how 
> much each TU grows with the extra (unneeded) include grows in C++11 and 
> C++04 mode.

The c++ headers unconditionally included from system.h, with:

% echo '#include <$name>' | g++-7 -E -x c++ - | wc -l
new:  3564
cstring:   533
utility:  3623
memory:  28066

compile time:
% echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ -
new: 0:00.06elapsed, 17060maxresident, 0major+3709minor
cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor
utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor
memory:  0:00.25elapsed, 40356maxresident, 0major+9764minor

Hence,  is not cheap at all, including it unconditionally from 
system.h when it isn't actually used by many things doesn't seem a good 
idea.

Ciao,
Michael.

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Jonathan Wakely


On 23/10/17 17:07 +0200, Michael Matz wrote:

Hi,

On Mon, 23 Oct 2017, Richard Biener wrote:


I guess so. But we have to make gdb happy as well. It really depends how
much each TU grows with the extra (unneeded) include grows in C++11 and
C++04 mode.


The c++ headers unconditionally included from system.h, with:

% echo '#include <$name>' | g++-7 -E -x c++ - | wc -l
new:  3564
cstring:   533
utility:  3623
memory:  28066


That's using the -std=gnu++4 default for g++-7, and for that mode
the header *is* needed, to get the definition of std::unique_ptr.

For C++98 (when it isn't needed) that header is much smaller:

tmp$ echo '#include ' | g++ -E -x c++ - | wc -l
28101
tmp$ echo '#include ' | g++ -E -x c++ - -std=gnu++98  | wc -l
4267

(Because it doesn't contain std::unique_ptr and std::shared_ptr before
C++11).


compile time:
% echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ -
new: 0:00.06elapsed, 17060maxresident, 0major+3709minor
cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor
utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor
memory:  0:00.25elapsed, 40356maxresident, 0major+9764minor

Hence,  is not cheap at all, including it unconditionally from
system.h when it isn't actually used by many things doesn't seem a good
idea.


Ciao,
Michael.

Re: [PATCH] i386: Skip DF_REF_INSN if DF_REF_INSN_INFO is false

2017-10-23 Thread Uros Bizjak

On Mon, Oct 23, 2017 at 1:45 PM, H.J. Lu  wrote:
> We should check DF_REF_INSN_INFO before accessing DF_REF_INSN.
>
> OK for trunk?
>
> H.J.
> ---
> gcc/
>
> PR target/82673
> * config/i386/i386.c (ix86_finalize_stack_frame_flags): Skip
> DF_REF_INSN if DF_REF_INSN_INFO is false.
>
> gcc/testsuite/
>
> PR target/82673
> * gcc.target/i386/pr82673.c: New test.

I'm not that versed in the dataflow details, the patch LGTM.

One nit below.

Thanks,
Uros

> ---
>  gcc/config/i386/i386.c  |  7 +--
>  gcc/testsuite/gcc.target/i386/pr82673.c | 12 
>  2 files changed, 17 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr82673.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index b86504378ae..25c898866e2 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -12694,10 +12694,13 @@ ix86_finalize_stack_frame_flags (void)
>   for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
>ref; ref = next)
> {
> - rtx_insn *insn = DF_REF_INSN (ref);
> + next = DF_REF_NEXT_REG (ref);
> + if (!DF_REF_INSN_INFO (ref))
> +   continue;
> +
>   /* Make sure the next ref is for a different instruction,
>  so that we're not affected by the rescan.  */
> - next = DF_REF_NEXT_REG (ref);
> + rtx_insn *insn = DF_REF_INSN (ref);
>   while (next && DF_REF_INSN (next) == insn)
> next = DF_REF_NEXT_REG (next);
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr82673.c 
> b/gcc/testsuite/gcc.target/i386/pr82673.c
> new file mode 100644
> index 000..cff4b34535b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr82673.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -fno-omit-frame-pointer 
> -fvar-tracking-assignments-toggle" } */

Use -fvar-tracking-assignments here.

> +
> +register long *B asm ("ebp");
> +
> +long y = 20;
> +
> +void
> +bar (void) /* { dg-error "frame pointer required, but reserved" } */
> +{
> +  B = &y;
> +} /* { dg-error "bp cannot be used in asm here" } */
> --
> 2.13.6
>

Re: [PATCH, rs6000 V3] Add Power 8 support to vec_revb

2017-10-23 Thread Carl Love

GCC maintainers:

I have fixed the change log lines as mentioned by Segher.  I removed the
changes to swap_selector_for_mode() and instead created
swap_endianess_selector_for_mode().  The mode attribute wd will not work
for the define expand as the V16QI maps to "b" not "q".  So I do need to
have VSX_XXBR.  I did change it to _.  The XXBR_L
iterator was changed to VEC_A which has everything needed but V1TI.
There are other iterators that include V1TI but have additional entries
which would then need to be included in VXS_XXBR.  These really don't
make sense to me for this instruction so I preferred to use the new
iterator/attribute in a define_expand and for the V1TI case have an
additional explicit define_expand for that case.

I have retested the updated patch on:

 powerpc64-unknown-linux-gnu (Power 8 BE),
 powerpc64le-unknown-linux-gnu (Power 8 LE),   
 powerpc64le-unknown-linux-gnu (Power 9 LE)

without regressions.  

Please let me know if the following patch is acceptable.  Thanks.

   Carl Love

-

gcc/ChangeLog:

2017-10-17  Carl Love  

* config/rs6000/rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8
definitions for the builtin instances.
(P9V_BUILTIN_VEC_REVB): Remove the power 9 instance definitions.
* config/rs6000/altivec.h (vec_revb): Change the #define from power 9
to power 8.
* config/rs6000/r6000-protos.h (swap_selector_for_mode): Add extern
declaration.
* config/rs6000/rs6000.c (swap_endianess_selector_for_mode): Add
function.
* config/rs6000/rs6000-builtin.def (BU_P8V_VSX_1, BU_P8V_OVERLOAD_1):
Add power 8 macro expansions.
(BU_P9V_OVERLOAD_1): Remove power 9 overload expansion.
* config/rs6000/vsx.md (revb_): Add define_expand to generate
power 8 instructions for the vec_revb builtin.

gcc/testsuite/ChangeLog:

2017-10-17  Carl Love  

* gcc.target/powerpc/builtins-revb-runnable.c: New runnable test file
for the vec_revb builtin.
---
 gcc/config/rs6000/altivec.h|   3 +-
 gcc/config/rs6000/rs6000-builtin.def   |  10 +-
 gcc/config/rs6000/rs6000-c.c   |  44 +--
 gcc/config/rs6000/rs6000-protos.h  |   2 +
 gcc/config/rs6000/rs6000.c |  76 +
 gcc/config/rs6000/vsx.md   |  54 
 .../gcc.target/powerpc/builtins-revb-runnable.c| 350 +
 7 files changed, 514 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index c8e508c..a05e23a 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -415,6 +415,7 @@
 #define vec_vsubuqm __builtin_vec_vsubuqm
 #define vec_vupkhsw __builtin_vec_vupkhsw
 #define vec_vupklsw __builtin_vec_vupklsw
+#define vec_revb __builtin_vec_revb
 #endif
 
 #ifdef __POWER9_VECTOR__
@@ -476,8 +477,6 @@
 
 #define vec_xlx __builtin_vec_vextulx
 #define vec_xrx __builtin_vec_vexturx
-
-#define vec_revb __builtin_vec_revb
 #endif
 
 /* Predicates.
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 850164a..dba5e70 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1853,6 +1853,13 @@ BU_P6_64BIT_2 (CMPB, "cmpb", CONST,  cmpbdi3)
 /* 1 argument VSX instructions added in ISA 2.07.  */
 BU_P8V_VSX_1 (XSCVSPDPN,  "xscvspdpn", CONST,  vsx_xscvspdpn)
 BU_P8V_VSX_1 (XSCVDPSPN,  "xscvdpspn", CONST,  vsx_xscvdpspn)
+BU_P8V_VSX_1 (REVB_V1TI,  "revb_v1ti", CONST,  revb_v1ti)
+BU_P8V_VSX_1 (REVB_V2DI,  "revb_v2di", CONST,  revb_v2di)
+BU_P8V_VSX_1 (REVB_V4SI,  "revb_v4si", CONST,  revb_v4si)
+BU_P8V_VSX_1 (REVB_V8HI,  "revb_v8hi", CONST,  revb_v8hi)
+BU_P8V_VSX_1 (REVB_V16QI, "revb_v16qi",CONST,  revb_v16qi)
+BU_P8V_VSX_1 (REVB_V2DF,  "revb_v2df", CONST,  revb_v2df)
+BU_P8V_VSX_1 (REVB_V4SF,  "revb_v4sf", CONST,  revb_v4sf)
 
 /* 1 argument altivec instructions added in ISA 2.07.  */
 BU_P8V_AV_1 (ABS_V2DI,   "abs_v2di",   CONST,  absv2di2)
@@ -1962,6 +1969,7 @@ BU_P8V_OVERLOAD_1 (VPOPCNTUH, "vpopcntuh")
 BU_P8V_OVERLOAD_1 (VPOPCNTUW,  "vpopcntuw")
 BU_P8V_OVERLOAD_1 (VPOPCNTUD,  "vpopcntud")
 BU_P8V_OVERLOAD_1 (VGBBD,  "vgbbd")
+BU_P8V_OVERLOAD_1 (REVB,   "revb")
 
 /* ISA 2.07 vector overloaded 2 argument functions.  */
 BU_P8V_OVERLOAD_2 (EQV,"eqv")
@@ -2073,8 +2081,6 @@ BU_P9V_OVERLOAD_1 (VSTDCNQP,  "scalar_test_neg_qp")
 BU_P9V_OVERLOAD_1 (VSTDCNDP,   "scalar_test_neg_dp")
 BU_P9V_OVERLOAD_1 (VSTDCNSP,   "scalar_test_neg_sp")
 
-BU_P9V_OVERLOAD_1 (REVB,   "revb")
-
 BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth")
 BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_S

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Pedro Alves

On 10/23/2017 04:17 PM, Jonathan Wakely wrote:
> On 23/10/17 17:07 +0200, Michael Matz wrote:
>> Hi,
>>
>> On Mon, 23 Oct 2017, Richard Biener wrote:
>>
>>> I guess so. But we have to make gdb happy as well. It really depends how
>>> much each TU grows with the extra (unneeded) include grows in C++11 and
>>> C++04 mode.
>>
>> The c++ headers unconditionally included from system.h, with:
>>
>> % echo '#include <$name>' | g++-7 -E -x c++ - | wc -l
>> new:  3564
>> cstring:   533
>> utility:  3623
>> memory:  28066
> 
> That's using the -std=gnu++4 default for g++-7, and for that mode
> the header *is* needed, to get the definition of std::unique_ptr.
> 
> For C++98 (when it isn't needed) that header is much smaller:
> 
> tmp$ echo '#include ' | g++ -E -x c++ - | wc -l
> 28101
> tmp$ echo '#include ' | g++ -E -x c++ - -std=gnu++98  | wc -l
> 4267
> 
> (Because it doesn't contain std::unique_ptr and std::shared_ptr before
> C++11).
> 
>> compile time:
>> % echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ -
>> new: 0:00.06elapsed, 17060maxresident, 0major+3709minor
>> cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor
>> utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor
>> memory:  0:00.25elapsed, 40356maxresident, 0major+9764minor
>>
>> Hence,  is not cheap at all, including it unconditionally from
>> system.h when it isn't actually used by many things doesn't seem a good
>> idea.
>>

I think the real question is whether it makes a difference in
a full build.  There won't be many translation units that
don't include some other headers.  (though of course I won't
be surprised if it does make a difference.)

If it's a real issue, you could fix this like how the
other similar cases were handled by system.h, by adding this
in system.h:

 #ifdef __cplusplus
 #ifdef INCLUDE_UNIQUE_PTR
 # include "unique-ptr.h"
 #endif
 #endif

instead of unconditionally including  there,
and then translation units that want unique-ptr.h would
do "#define INCLUDE_UNIQUE_PTR" instead of #include "unique-ptr.h",
like done for a few other C++ headers.

(I maintain that IMO this is kind of self-inflicted GCC pain due
to the fact that "#pragma poison" poisons too much.  If #pragma
poison's behavior were adjusted (or a new variant/mode created) to
ignore references to the poisoned symbol names in system headers (or
something like that), then you wouldn't need this manual management
of header dependencies in gcc/system.h and the corresponding 
'#define INCLUDE_FOO' contortions.  There's nothing that you can reasonably
do with a reference to a poisoned symbol in a system header, other than
avoid having the system header have the '#pragma poison' in effect when
its included, which leads to contortions like system.h's.  Note that
the poisoned names are _still used anyway_.  So can we come up with
a GCC change that would avoid having to worry about manually doing
this?  It'd likely help other projects too.)

Thanks,
Pedro Alves

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread David Malcolm

On Mon, 2017-10-23 at 16:40 +0100, Pedro Alves wrote:
> On 10/23/2017 04:17 PM, Jonathan Wakely wrote:
> > On 23/10/17 17:07 +0200, Michael Matz wrote:
> > > Hi,
> > > 
> > > On Mon, 23 Oct 2017, Richard Biener wrote:
> > > 
> > > > I guess so. But we have to make gdb happy as well. It really
> > > > depends how
> > > > much each TU grows with the extra (unneeded) include grows in
> > > > C++11 and
> > > > C++04 mode.
> > > 
> > > The c++ headers unconditionally included from system.h, with:
> > > 
> > > % echo '#include <$name>' | g++-7 -E -x c++ - | wc -l
> > > new:  3564
> > > cstring:   533
> > > utility:  3623
> > > memory:  28066
> > 
> > That's using the -std=gnu++4 default for g++-7, and for that mode
> > the header *is* needed, to get the definition of std::unique_ptr.
> > 
> > For C++98 (when it isn't needed) that header is much smaller:
> > 
> > tmp$ echo '#include ' | g++ -E -x c++ - | wc -l
> > 28101
> > tmp$ echo '#include ' | g++ -E -x c++ - -std=gnu++98  | wc
> > -l
> > 4267
> > 
> > (Because it doesn't contain std::unique_ptr and std::shared_ptr
> > before
> > C++11).
> > 
> > > compile time:
> > > % echo -e '#include <$name>\nint i;' | time g++-7 -c -x c++ -
> > > new: 0:00.06elapsed, 17060maxresident, 0major+3709minor
> > > cstring: 0:00.03elapsed, 13524maxresident, 0major+3075minor
> > > utility: 0:00.05elapsed, 16952maxresident, 0major+3776minor
> > > memory:  0:00.25elapsed, 40356maxresident, 0major+9764minor
> > > 
> > > Hence,  is not cheap at all, including it unconditionally
> > > from
> > > system.h when it isn't actually used by many things doesn't seem
> > > a good
> > > idea.
> > > 
> 
> I think the real question is whether it makes a difference in
> a full build.  There won't be many translation units that
> don't include some other headers.  (though of course I won't
> be surprised if it does make a difference.)
> 
> If it's a real issue, you could fix this like how the
> other similar cases were handled by system.h, by adding this
> in system.h:
> 
>  #ifdef __cplusplus
>  #ifdef INCLUDE_UNIQUE_PTR
>  # include "unique-ptr.h"
>  #endif
>  #endif
> 
> instead of unconditionally including  there,
> and then translation units that want unique-ptr.h would
> do "#define INCLUDE_UNIQUE_PTR" instead of #include "unique-ptr.h",
> like done for a few other C++ headers.
> 
> (I maintain that IMO this is kind of self-inflicted GCC pain due
> to the fact that "#pragma poison" poisons too much.  If #pragma
> poison's behavior were adjusted (or a new variant/mode created) to
> ignore references to the poisoned symbol names in system headers (or
> something like that), then you wouldn't need this manual management
> of header dependencies in gcc/system.h and the corresponding 
> '#define INCLUDE_FOO' contortions.  There's nothing that you can
> reasonably
> do with a reference to a poisoned symbol in a system header, other
> than
> avoid having the system header have the '#pragma poison' in effect
> when
> its included, which leads to contortions like system.h's.  Note that
> the poisoned names are _still used anyway_.  So can we come up with
> a GCC change that would avoid having to worry about manually doing
> this?  It'd likely help other projects too.)
> 
> Thanks,
> Pedro Alves

FWIW, this one isn't from #pragma poison, it's from:
  #define abort() fancy_abort (__FILE__, __LINE__, __FUNCTION__)

(I messed up the --in-reply-to when posting the patch, but Gerald noted
the issue was due to:
/usr/include/c++/v1/typeinfo:199:2: error: no member named
'fancy_abort' in namespace 'std::__1'; did you mean simply
'fancy_abort'?
_VSTD::abort();
^~~
/usr/include/c++/v1/__config:390:15: note: expanded from macro '_VSTD'
#define _VSTD std::_LIBCPP_NAMESPACE
  ^
/scratch/tmp/gerald/gcc-HEAD/gcc/system.h:725:13: note: 'fancy_abort'
declared here
extern void fancy_abort (const char *, int, const char *)
^

https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01289.html

)

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Michael Matz

Hi,

On Mon, 23 Oct 2017, David Malcolm wrote:

> FWIW, this one isn't from #pragma poison, it's from:
>   #define abort() fancy_abort (__FILE__, __LINE__, __FUNCTION__)
> 
> (I messed up the --in-reply-to when posting the patch, but Gerald noted
> the issue was due to:
> /usr/include/c++/v1/typeinfo:199:2: error: no member named
> 'fancy_abort' in namespace 'std::__1'; did you mean simply
> 'fancy_abort'?
> _VSTD::abort();
> ^~~

So if we really really have to add an unconditional include in 
system.h it's probably enough to include , not .  


Ciao,
Michael.

Re: [RFC] New pragma exec_charset

2017-10-23 Thread Martin Sebor


On 10/23/2017 04:55 AM, Andreas Krebbel wrote:

On 10/19/2017 07:13 PM, Martin Sebor wrote:

On 10/19/2017 09:50 AM, Andreas Krebbel wrote:

The TPF operating system uses the GCC S/390 backend.  They set an
EBCDIC exec charset for compilation using -fexec-charset.  However,
certain libraries require ASCII strings instead.  In order to be able
to put calls to that library into the normal code it is required to
switch the exec charset within a compilation unit.

This is an attempt to implement it by adding a new pragma which could
be used like in the following example:

int
foo ()
{
  call_with_utf8("hello world");

#pragma GCC exec_charset("UTF16")
  call_with_utf16("hello world");

#pragma GCC exec_charset(pop)
  call_with_utf8("hello world");
}

Does this look reasonable?


I'm not an expert on this but at a high level it looks reasonable
to me.  But based on some small amount of work I did in this area
I have a couple of questions.

There are a few places in the compiler that already do or that
should but don't yet handle different execution character sets.
The former include built-ins like __bultin_isdigit() and
__builtin_sprintf (in both builtins.c and gimple-ssa-sprintf.c)
The latter is the -Wformat checking done by the C and C++ front
ends.  The missing support for the latter is the subject of bug
38308.  According to bug 81686, LTO is apparently also missing
support for exec-charset.


These probably are the areas Richard and Jakub were referring to as well?!  
These cases did not work
properly with the -fexec-charset cmdline option and this does not change with 
the pragma. I'll try
to look at what has been proposed in the discussion. Perhaps I can get it 
working somehow.


Right, the patch doesn't remove the known deficiencies.  But
by providing another knob to control the execution charset, at
a fine grain level, it encourages users to make greater use of
the (incomplete) exec-charset support and increases the odds
that they will run afoul of them.

It seems to me that before exposing a new mechanism to control
the exec charset it would be prudent to a) plug at least the
biggest holes to make the feature more reliable (in my mind,
that's at least -Wformat), and b) make sure the pragma interacts
correctly with existing features that work correctly with the
-fexec-charset option.  Where it doesn't and where it cannot
be made to work correctly (i.e., is undefined),  I would expect
an effort to be made to detect and diagnose those undefined
interactions if possible, or if that's too difficult, at
a minimum document them.


I'm curious how the pragma might interact with these two areas,
and whether the lack of support for it in the latter is a concern
(and if not, why not).  For the former, I'm also wondering about
the interaction of inlining and other interprocedural optimizations
with the pragma.  Does it propagate through inlined calls as one
would expect?


The pragma does not apply to the callees of a function defined under the pragma 
regardless of
whether it gets inlined or not.  That matches the behavior of other pragmas.  
If it would apply to
inlined callees the program semantics might change depending on optimization 
decisions i.e. whether
a certain call got inlined or not.

Callees marked as always_inline might be discussed separately. I remember this 
being a topic when
looking at function attributes.


My concern with this pragma/attribute and inlining has to do with
strings in one exec charset being propagated into functions that
operate on strings in another charset.  E.g., like in the test
case below that's "miscompiled" with your patch -- the first test
for n == 7 is eliminated and the buffer overflow is not detected.
If this cannot be made to work then I think some effort should be
made to detect this mixing and matching and existing optimizations
that assume the same charset (like the sprintf one does) disabled.

static inline int
f (char *d, const char *fmt)
{
#pragma GCC exec_charset ("utf8")
  int n = __builtin_sprintf (d, fmt, 12345);
#pragma GCC exec_charset (pop)

  if (n == 7)   // incorrectly optimized away
__builtin_abort ();

  return n;
}

int main (void)
{
  char d[5];

#pragma GCC exec_charset ("EBCDIC-US")
  int n = f (d, "i=%i");   // buffer overflow not detected
#pragma GCC exec_charset (pop)

  __builtin_printf ("%i (%lu): %s\n", n, __builtin_strlen (d), d);

  if (n != 7)   // aborts at runtime
__builtin_abort ();
}

Martin

Re: [PATCH] Include from system.h (PR bootstrap/82610)

2017-10-23 Thread Pedro Alves

On 10/23/2017 04:50 PM, David Malcolm wrote:

> FWIW, this one isn't from #pragma poison, it's from:
>   #define abort() fancy_abort (__FILE__, __LINE__, __FUNCTION__)
> 
> (I messed up the --in-reply-to when posting the patch, but Gerald noted
> the issue was due to:
> /usr/include/c++/v1/typeinfo:199:2: error: no member named
> 'fancy_abort' in namespace 'std::__1'; did you mean simply
> 'fancy_abort'?
> _VSTD::abort();
> ^~~
> /usr/include/c++/v1/__config:390:15: note: expanded from macro '_VSTD'
> #define _VSTD std::_LIBCPP_NAMESPACE
>   ^
> /scratch/tmp/gerald/gcc-HEAD/gcc/system.h:725:13: note: 'fancy_abort'
> declared here
> extern void fancy_abort (const char *, int, const char *)
> ^
> 

IMO the best fix would be to rename that "#define abort" to
"#define gcc_abort" and then call gcc_abort instead in the few
places that currently call abort.

IME, the introduction of a new naked call to abort() isn't something
that easily passes review.  abort calls always stand out and give
reviewers pause (or they should!).  FWIW, GDB also doesn't want
such naked abort() calls, I don't recall people-sneaking-in-abort-()-calls
ever being a problem over there.

Thanks,
Pedro Alves

Re: [PATCH][AArch64] Wrong type-attribute for stp and str

2017-10-23 Thread Dominik Inführ

I’ve added your suggestions. I would also like to propose to change the type 
attribute from neon_stp to store_8 and store_16, this seems to be more in line 
with respect to other patterns.

Thanks,
Dominik

ChangeLog:
2017-10-23  Dominik Infuehr  

* config/aarch64/aarch64-simd.md
(*aarch64_simd_mov): Fix type-attribute.
(*aarch64_simd_mov): Likewise.
—
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 49f615cfdbf..447ee3afd17 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -102,7 +102,7 @@
   [(set_attr "type" "neon_dup")]
 )

-(define_insn "*aarch64_simd_mov"
+(define_insn "*aarch64_simd_mov"
   [(set (match_operand:VD 0 "nonimmediate_operand"
"=w, m,  m,  w, ?r, ?w, ?r, w")
(match_operand:VD 1 "general_operand"
@@ -126,12 +126,12 @@
  default: gcc_unreachable ();
  }
 }
-  [(set_attr "type" "neon_load1_1reg, neon_stp, neon_store1_1reg,\
+  [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\
 neon_logic, neon_to_gp, f_mcr,\
 mov_reg, neon_move")]
 )

-(define_insn "*aarch64_simd_mov"
+(define_insn "*aarch64_simd_mov"
   [(set (match_operand:VQ 0 "nonimmediate_operand"
"=w, Umq,  m,  w, ?r, ?w, ?r, w")
(match_operand:VQ 1 "general_operand"
@@ -160,8 +160,8 @@
gcc_unreachable ();
 }
 }
-  [(set_attr "type" "neon_load1_1reg, neon_store1_1reg,\
-neon_stp, neon_logic, multiple, multiple,\
+  [(set_attr "type" "neon_load1_1reg, store_16, neon_store1_1reg,\
+neon_logic, multiple, multiple,\
 multiple, neon_move")
(set_attr "length" "4,4,4,4,8,8,8,4")]
 )


> On 20 Oct 2017, at 16:07, Richard Earnshaw (lists)  
> wrote:
> 
> On 16/10/17 14:26, Dominik Inführ wrote:
>> Hi,
>> 
>> it seems the type attributes for neon_stp and neon_store1_1reg should be 
>> the other way around.
>> 
> 
> Yes, I agree, but there's more
> 
> Firstly, we have two patterns that are named *aarch64_simd_mov,
> with different iterators.  That's slightly confusing.  I think they need
> to be renamed as:
> 
>   *aarch64_simd_mov
> 
> and
> 
>   *aarch64_simd_mov
> 
> to break the ambiguity.
> 
> Secondly it looks to me as though the attributes on the other one are
> also incorrect.  Could you check that one out as well, please.
> 
> Thanks,
> 
> R.
> 
>> Thanks
>> Dominik
>> 
>> ChangeLog:
>> 2017-10-16  Dominik Infuehr  
>> 
>>  * config/aarch64/aarch64-simd.md
>>  (*aarch64_simd_mov): Fix type-attribute.
>> --
>> diff --git a/gcc/config/aarch64/aarch64-simd.md 
>> b/gcc/config/aarch64/aarch64-simd.md
>> index 49f615cfdbf..409ad3502ff 100644
>> --- a/gcc/config/aarch64/aarch64-simd.md
>> +++ b/gcc/config/aarch64/aarch64-simd.md
>> @@ -160,8 +160,8 @@
>>gcc_unreachable ();
>> }
>> }
>> -  [(set_attr "type" "neon_load1_1reg, neon_store1_1reg,\
>> -neon_stp, neon_logic, multiple, multiple,\
>> +  [(set_attr "type" "neon_load1_1reg, neon_stp, neon_store1_1reg,\
>> +neon_logic, multiple, multiple,\
>> multiple, neon_move")
>>(set_attr "length" "4,4,4,4,8,8,8,4")]
>> )
>> 
> 



signature.asc
Description: Message signed with OpenPGP using GPGMail

Re: [PING][PATCH][Aarch64] Improve int<->FP conversions

2017-10-23 Thread James Greenhalgh

On Tue, Oct 17, 2017 at 01:17:04AM +0100, Michael Collison wrote:
> Patch updated with all comments from James.

OK with an appropriate ChangeLog and assuming it has been tested as
required.

Thanks,
James

Reviewed-by:

[000/nnn] poly_int: representation of runtime offsets and sizes

2017-10-23 Thread Richard Sandiford

This series adds support for offsets and sizes that are a runtime
invariant rather than a compile time constant.  It's based on the
patch posted here:

  https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00406.html

The rest of the covering note is split into:

- Summary   (from the message linked above)
- Tree representation
- RTL representation
- Compile-time impact
- Typical changes
- Testing


Summary
===

The size of an SVE register in bits can be any multiple of 128 between
128 and 2048 inclusive.  The way we chose to represent this was to have
a runtime indeterminate that counts the number of 128 bit blocks above
the minimum of 128.  If we call the indeterminate X then:

* an SVE register has 128 + 128 * X bits (16 + 16 * X bytes)
* the last int in an SVE vector is at byte offset 12 + 16 * X
* etc.

Although the maximum value of X is 15, we don't want to take advantage
of that, since there's nothing particularly magical about the value.

So we have two types of target: those for which there are no runtime
indeterminates, and those for which there is one runtime indeterminate.
We decided to generalise the interface slightly by allowing any number
of indeterminates, although some parts of the underlying implementation
are still limited to 0 and 1 for now.

The main class for working with these runtime offsets and sizes is
"poly_int".  It represents a value of the form:

  C0 + C1 * X1 + ... + Cn * Xn

where each coefficient Ci is a compile-time constant and where each
indeterminate Xi is a nonnegative runtime value.  The class takes two
template parameters, one giving the number of coefficients and one
giving the type of the coefficients.  There are then typedefs for the
common cases, with the number of coefficients being controlled by
the target.

poly_int is used for things like:

- the number of elements in a VECTOR_TYPE
- the size and number of units in a general machine_mode
- the offset of something in the stack frame
- SUBREG_BYTE
- MEM_SIZE and MEM_OFFSET
- mem_ref_offset

(only a selective list).

The patch that adds poly_int has detailed documentation, but the main
points are:

* there's no total ordering between poly_ints, so the best we can do
  when comparing them is to ask whether two values *might* or *must*
  be related in a particular way.  E.g. if mode A has size 2 + 2X
  and mode B has size 4, the condition:

GET_MODE_SIZE (A) <= GET_MODE_SIZE (B)

  is true for X<=1 and false for X>=2.  This translates to:

may_le (GET_MODE_SIZE (A), GET_MODE_SIZE (B)) == true
must_le (GET_MODE_SIZE (A), GET_MODE_SIZE (B)) == false

  Of course, the may/must distinction already exists in things like
  alias analysis.

* some poly_int arithmetic operations (notably division) are only possible
  for certain values.  These operations therefore become conditional.

* target-independent code is exposed to these restrictions even if the
  current target has no indeterminates.  But:

  * we've tried to provide enough operations that poly_ints are easy
to work with.

  * it means that developers working with non-SVE targets don't need
to test SVE.  If the code compiles on a non-SVE target, and if it
doesn't use any asserting operations, it's reasonable to assume
that it will work on SVE too.

* for target-specific code, poly_int degenerates to a constant if there
  are no runtime invariants for that target.  Only very minor changes
  are needed to non-AArch64 targets.

* poly_int operations should be (and in practice seem to be) as
  efficient as single-coefficient operations on non-AArch64 targets.


Tree representation
===

The series uses a new POLY_INT_CST node to represent a poly_int value
at the tree level.  It is only used on targets with runtime sizes and
offsets; the associated test macro POLY_INT_CST_P is always false for
other targets.

The node has one INTEGER_CST per coefficient, which makes it easier
to refer to the same tree as a poly_wide_int, a poly_offset_int and
a poly_widest_int without copying the representation.

Only low-level routines use the tree node directly.  Most code uses:

- poly_int_tree_p (x)
Return true if X is an INTEGER_CST or a POLY_INT_CST.

- wi::to_poly_wide (x)
- wi::to_poly_offset (x)
- wi::to_poly_widest (x)
poly_int versions of the normal wi::to_wide etc. routines.  These
work on both INTEGER_CSTs and POLY_INT_CSTs.

- poly_int_tree_p (x, &y)
Test whether X is an INTEGER_CST or POLY_INT_CST and store its value
in Y if so.  This is defined for Y of type poly_int64 and poly_uint64;
the wi::to_* routines are more efficient than return-by-pointer for
wide_int-based types.

- tree_to_poly_int64 (x)
- tree_to_poly_uint64 (x)
poly_int versions of tree_to_shwi and tree_to_uhwi.  Again they work
on both INTEGER_CSTs and POLY_INT_CSTs.

Many tree routines now accept poly_int operands, such as:

- build_int_cst
- build_int_cstu
- wide_int_to_tree
- force_fit_type


RTL representation
===

[001/nnn] poly_int: add poly-int.h

2017-10-23 Thread Richard Sandiford

This patch adds a new "poly_int" class to represent polynomial integers
of the form:

  C0 + C1*X1 + C2*X2 ... + Cn*Xn

It also adds poly_int-based typedefs for offsets and sizes of various
precisions.  In these typedefs, the Ci coefficients are compile-time
constants and the Xi indeterminates are run-time invariants.  The number
of coefficients is controlled by the target and is initially 1 for all
ports.

Most routines can handle general coefficient counts, but for now a few
are specific to one or two coefficients.  Support for other coefficient
counts can be added when needed.

The patch also adds a new macro, IN_TARGET_CODE, that can be
set to indicate that a TU contains target-specific rather than
target-independent code.  When this macro is set and the number of
coefficients is 1, the poly-int.h classes define a conversion operator
to a constant.  This allows most existing target code to work without
modification.  The main exceptions are:

- values passed through ..., which need an explicit conversion to a
  constant

- ?: expression in which one arm ends up being a polynomial and the
  other remains a constant.  In these cases it would be valid to convert
  the constant to a polynomial and the polynomial to a constant, so a
  cast is needed to break the ambiguity.

The patch also adds a new target hook to return the estimated
value of a polynomial for costing purposes.

The patch also adds operator<< on wide_ints (it was already defined
for offset_int and widest_int).  I think this was originally excluded
because >> is ambiguous for wide_int, but << is useful for converting
bytes to bits, etc., so is worth defining on its own.  The patch also
adds operator% and operator/ for offset_int and widest_int, since those
types are always signed.  These changes allow the poly_int interface to
be more predictable.

I'd originally tried adding the tests as selftests, but that ended up
bloating cc1 by at least a third.  It also took a while to build them
at -O2.  The patch therefore uses plugin tests instead, where we can
force the tests to be built at -O0.  They still run in negligible time
when built that way.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* poly-int.h: New file.
* poly-int-types.h: Likewise.
* coretypes.h: Include them.
(POLY_INT_CONVERSION): Define.
* target.def (estimated_poly_value): New hook.
* doc/tm.texi.in (TARGET_ESTIMATED_POLY_VALUE): New hook.
* doc/tm.texi: Regenerate.
* doc/poly-int.texi: New file.
* doc/gccint.texi: Include it.
* doc/rtl.texi: Describe restrictions on subreg modes.
* Makefile.in (TEXI_GCCINT_FILES): Add poly-int.texi.
* genmodes.c (NUM_POLY_INT_COEFFS): Provide a default definition.
(emit_insn_modes_h): Emit a definition of NUM_POLY_INT_COEFFS.
* targhooks.h (default_estimated_poly_value): Declare.
* targhooks.c (default_estimated_poly_value): New function.
* target.h (estimated_poly_value): Likewise.
* wide-int.h (WI_UNARY_RESULT): Use wi::binary_traits.
(wi::unary_traits): Delete.
(wi::binary_traits::signed_shift_result_type): Define for
offset_int << HOST_WIDE_INT, etc.
(generic_wide_int::operator <<=): Define for all types and use
wi::lshift instead of <<.
(wi::hwi_with_prec): Add a default constructor.
(wi::ints_for): New class.
(operator <<): Define for all wide-int types.
(operator /): New function.
(operator %): Likewise.
* selftest.h (ASSERT_MUST_EQ, ASSERT_MUST_EQ_AT, ASSERT_MAY_NE)
(ASSERT_MAY_NE_AT): New macros.

gcc/testsuite/
* gcc.dg/plugin/poly-int-tests.h,
gcc.dg/plugin/poly-int-test-1.c,
gcc.dg/plugin/poly-int-01_plugin.c,
gcc.dg/plugin/poly-int-02_plugin.c,
gcc.dg/plugin/poly-int-03_plugin.c,
gcc.dg/plugin/poly-int-04_plugin.c,
gcc.dg/plugin/poly-int-05_plugin.c,
gcc.dg/plugin/poly-int-06_plugin.c,
gcc.dg/plugin/poly-int-07_plugin.c: New tests.
* gcc.dg/plugin/plugin.exp: Run them.



poly-int.diff.bz2
Description: BZip2 compressed data

Re: [PING] [C++ Patch] PR 82307

2017-10-23 Thread Nathan Sidwell


On 10/23/2017 09:15 AM, Paolo Carlini wrote:

Hi,

following up to a short off-line exchange with Nathan, I'm sending a 
reworked patch which - among other things - avoids regressing on the 
second testcase (cpp0x/enum36.C). Tested x86_64-linux.


ok, thanks!

nathan


--
Nathan Sidwell

[002/nnn] poly_int: IN_TARGET_CODE

2017-10-23 Thread Richard Sandiford

This patch makes each target-specific TU define an IN_TARGET_CODE macro,
which is used to decide whether poly_int<1, C> should convert to C.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* genattrtab.c (write_header): Define IN_TARGET_CODE to 1 in the
target C file.
* genautomata.c (main): Likewise.
* genconditions.c (write_header): Likewise.
* genemit.c (main): Likewise.
* genextract.c (print_header): Likewise.
* genopinit.c (main): Likewise.
* genoutput.c (output_prologue): Likewise.
* genpeep.c (main): Likewise.
* genpreds.c (write_insn_preds_c): Likewise.
* genrecog.c (writer_header): Likewise.
* config/aarch64/aarch64-builtins.c (IN_TARGET_CODE): Define.
* config/aarch64/aarch64-c.c (IN_TARGET_CODE): Likewise.
* config/aarch64/aarch64.c (IN_TARGET_CODE): Likewise.
* config/aarch64/cortex-a57-fma-steering.c (IN_TARGET_CODE): Likewise.
* config/aarch64/driver-aarch64.c (IN_TARGET_CODE): Likewise.
* config/alpha/alpha.c (IN_TARGET_CODE): Likewise.
* config/alpha/driver-alpha.c (IN_TARGET_CODE): Likewise.
* config/arc/arc-c.c (IN_TARGET_CODE): Likewise.
* config/arc/arc.c (IN_TARGET_CODE): Likewise.
* config/arc/driver-arc.c (IN_TARGET_CODE): Likewise.
* config/arm/aarch-common.c (IN_TARGET_CODE): Likewise.
* config/arm/arm-builtins.c (IN_TARGET_CODE): Likewise.
* config/arm/arm-c.c (IN_TARGET_CODE): Likewise.
* config/arm/arm.c (IN_TARGET_CODE): Likewise.
* config/arm/driver-arm.c (IN_TARGET_CODE): Likewise.
* config/avr/avr-c.c (IN_TARGET_CODE): Likewise.
* config/avr/avr-devices.c (IN_TARGET_CODE): Likewise.
* config/avr/avr-log.c (IN_TARGET_CODE): Likewise.
* config/avr/avr.c (IN_TARGET_CODE): Likewise.
* config/avr/driver-avr.c (IN_TARGET_CODE): Likewise.
* config/avr/gen-avr-mmcu-specs.c (IN_TARGET_CODE): Likewise.
* config/bfin/bfin.c (IN_TARGET_CODE): Likewise.
* config/c6x/c6x.c (IN_TARGET_CODE): Likewise.
* config/cr16/cr16.c (IN_TARGET_CODE): Likewise.
* config/cris/cris.c (IN_TARGET_CODE): Likewise.
* config/darwin.c (IN_TARGET_CODE): Likewise.
* config/epiphany/epiphany.c (IN_TARGET_CODE): Likewise.
* config/epiphany/mode-switch-use.c (IN_TARGET_CODE): Likewise.
* config/epiphany/resolve-sw-modes.c (IN_TARGET_CODE): Likewise.
* config/fr30/fr30.c (IN_TARGET_CODE): Likewise.
* config/frv/frv.c (IN_TARGET_CODE): Likewise.
* config/ft32/ft32.c (IN_TARGET_CODE): Likewise.
* config/h8300/h8300.c (IN_TARGET_CODE): Likewise.
* config/i386/djgpp.c (IN_TARGET_CODE): Likewise.
* config/i386/driver-i386.c (IN_TARGET_CODE): Likewise.
* config/i386/driver-mingw32.c (IN_TARGET_CODE): Likewise.
* config/i386/host-cygwin.c (IN_TARGET_CODE): Likewise.
* config/i386/host-i386-darwin.c (IN_TARGET_CODE): Likewise.
* config/i386/host-mingw32.c (IN_TARGET_CODE): Likewise.
* config/i386/i386-c.c (IN_TARGET_CODE): Likewise.
* config/i386/i386.c (IN_TARGET_CODE): Likewise.
* config/i386/intelmic-mkoffload.c (IN_TARGET_CODE): Likewise.
* config/i386/msformat-c.c (IN_TARGET_CODE): Likewise.
* config/i386/winnt-cxx.c (IN_TARGET_CODE): Likewise.
* config/i386/winnt-stubs.c (IN_TARGET_CODE): Likewise.
* config/i386/winnt.c (IN_TARGET_CODE): Likewise.
* config/i386/x86-tune-sched-atom.c (IN_TARGET_CODE): Likewise.
* config/i386/x86-tune-sched-bd.c (IN_TARGET_CODE): Likewise.
* config/i386/x86-tune-sched-core.c (IN_TARGET_CODE): Likewise.
* config/i386/x86-tune-sched.c (IN_TARGET_CODE): Likewise.
* config/ia64/ia64-c.c (IN_TARGET_CODE): Likewise.
* config/ia64/ia64.c (IN_TARGET_CODE): Likewise.
* config/iq2000/iq2000.c (IN_TARGET_CODE): Likewise.
* config/lm32/lm32.c (IN_TARGET_CODE): Likewise.
* config/m32c/m32c-pragma.c (IN_TARGET_CODE): Likewise.
* config/m32c/m32c.c (IN_TARGET_CODE): Likewise.
* config/m32r/m32r.c (IN_TARGET_CODE): Likewise.
* config/m68k/m68k.c (IN_TARGET_CODE): Likewise.
* config/mcore/mcore.c (IN_TARGET_CODE): Likewise.
* config/microblaze/microblaze-c.c (IN_TARGET_CODE): Likewise.
* config/microblaze/microblaze.c (IN_TARGET_CODE): Likewise.
* config/mips/driver-native.c (IN_TARGET_CODE): Likewise.
* config/mips/frame-header-opt.c (IN_TARGET_CODE): Likewise.
* config/mips/mips.c (IN_TARGET_CODE): Likewise.
* config/mmix/mmix.c (IN_TARGET_CODE): Likewise.
* config/mn10300/mn10300.c (IN_TARGET_CODE): Likewise.
* config/moxie/moxie.c (IN_TARGET_CODE): Likewise.
* config/msp430/driver-msp430.c (IN_TARGET_CODE): Likewise.
* conf

[003/nnn] poly_int: MACRO_MODE

2017-10-23 Thread Richard Sandiford

This patch uses a MACRO_MODE wrapper for the target macro invocations
in targhooks.c and address.h, so that macros for non-AArch64 targets
can continue to treat modes as fixed-size.

It didn't seem worth converting the address macros to hooks since
(a) they're heavily used, (b) they should be probably be replaced
with a different interface rather than converted to hooks as-is,
and most importantly (c) addresses.h already localises the problem.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* machmode.h (MACRO_MODE): New macro.
* addresses.h (base_reg_class, ok_for_base_p_1): Use it.
* targhooks.c (default_libcall_value, default_secondary_reload)
(default_memory_move_cost, default_register_move_cost)
(default_class_max_nregs): Likewise.

Index: gcc/machmode.h
===
--- gcc/machmode.h  2017-10-23 16:52:20.675923636 +0100
+++ gcc/machmode.h  2017-10-23 17:00:49.664349224 +0100
@@ -685,6 +685,17 @@ fixed_size_mode::includes_p (machine_mod
   return true;
 }
 
+/* Wrapper for mode arguments to target macros, so that if a target
+   doesn't need polynomial-sized modes, its header file can continue
+   to treat everything as fixed_size_mode.  This should go away once
+   macros are moved to target hooks.  It shouldn't be used in other
+   contexts.  */
+#if NUM_POLY_INT_COEFFS == 1
+#define MACRO_MODE(MODE) (as_a  (MODE))
+#else
+#define MACRO_MODE(MODE) (MODE)
+#endif
+
 extern opt_machine_mode mode_for_size (unsigned int, enum mode_class, int);
 
 /* Return the machine mode to use for a MODE_INT of SIZE bits, if one
Index: gcc/addresses.h
===
--- gcc/addresses.h 2017-10-23 16:52:20.675923636 +0100
+++ gcc/addresses.h 2017-10-23 17:00:49.663350133 +0100
@@ -31,14 +31,15 @@ base_reg_class (machine_mode mode ATTRIB
enum rtx_code index_code ATTRIBUTE_UNUSED)
 {
 #ifdef MODE_CODE_BASE_REG_CLASS
-  return MODE_CODE_BASE_REG_CLASS (mode, as, outer_code, index_code);
+  return MODE_CODE_BASE_REG_CLASS (MACRO_MODE (mode), as, outer_code,
+  index_code);
 #else
 #ifdef MODE_BASE_REG_REG_CLASS
   if (index_code == REG)
-return MODE_BASE_REG_REG_CLASS (mode);
+return MODE_BASE_REG_REG_CLASS (MACRO_MODE (mode));
 #endif
 #ifdef MODE_BASE_REG_CLASS
-  return MODE_BASE_REG_CLASS (mode);
+  return MODE_BASE_REG_CLASS (MACRO_MODE (mode));
 #else
   return BASE_REG_CLASS;
 #endif
@@ -58,15 +59,15 @@ ok_for_base_p_1 (unsigned regno ATTRIBUT
 enum rtx_code index_code ATTRIBUTE_UNUSED)
 {
 #ifdef REGNO_MODE_CODE_OK_FOR_BASE_P
-  return REGNO_MODE_CODE_OK_FOR_BASE_P (regno, mode, as,
+  return REGNO_MODE_CODE_OK_FOR_BASE_P (regno, MACRO_MODE (mode), as,
outer_code, index_code);
 #else
 #ifdef REGNO_MODE_OK_FOR_REG_BASE_P
   if (index_code == REG)
-return REGNO_MODE_OK_FOR_REG_BASE_P (regno, mode);
+return REGNO_MODE_OK_FOR_REG_BASE_P (regno, MACRO_MODE (mode));
 #endif
 #ifdef REGNO_MODE_OK_FOR_BASE_P
-  return REGNO_MODE_OK_FOR_BASE_P (regno, mode);
+  return REGNO_MODE_OK_FOR_BASE_P (regno, MACRO_MODE (mode));
 #else
   return REGNO_OK_FOR_BASE_P (regno);
 #endif
Index: gcc/targhooks.c
===
--- gcc/targhooks.c 2017-10-23 17:00:20.920834919 +0100
+++ gcc/targhooks.c 2017-10-23 17:00:49.664349224 +0100
@@ -941,7 +941,7 @@ default_libcall_value (machine_mode mode
   const_rtx fun ATTRIBUTE_UNUSED)
 {
 #ifdef LIBCALL_VALUE
-  return LIBCALL_VALUE (mode);
+  return LIBCALL_VALUE (MACRO_MODE (mode));
 #else
   gcc_unreachable ();
 #endif
@@ -1071,11 +1071,13 @@ default_secondary_reload (bool in_p ATTR
 }
 #ifdef SECONDARY_INPUT_RELOAD_CLASS
   if (in_p)
-rclass = SECONDARY_INPUT_RELOAD_CLASS (reload_class, reload_mode, x);
+rclass = SECONDARY_INPUT_RELOAD_CLASS (reload_class,
+  MACRO_MODE (reload_mode), x);
 #endif
 #ifdef SECONDARY_OUTPUT_RELOAD_CLASS
   if (! in_p)
-rclass = SECONDARY_OUTPUT_RELOAD_CLASS (reload_class, reload_mode, x);
+rclass = SECONDARY_OUTPUT_RELOAD_CLASS (reload_class,
+   MACRO_MODE (reload_mode), x);
 #endif
   if (rclass != NO_REGS)
 {
@@ -1603,7 +1605,7 @@ default_memory_move_cost (machine_mode m
 #ifndef MEMORY_MOVE_COST
 return (4 + memory_move_secondary_cost (mode, (enum reg_class) rclass, 
in));
 #else
-return MEMORY_MOVE_COST (mode, (enum reg_class) rclass, in);
+return MEMORY_MOVE_COST (MACRO_MODE (mode), (enum reg_class) rclass, in);
 #endif
 }
 
@@ -1618,7 +1620,8 @@ default_register_move_cost (machine_mode
 #ifndef REGISTER_MOVE_COST
   return 2;
 #else
-  return REGISTER_MOVE_COST (mode, (enum reg_class) from, (enum reg_class) to);
+  return REGISTER_M

[004/nnn] poly_int: mode query functions

2017-10-23 Thread Richard Sandiford

This patch changes the bit size and vector count arguments to the
machmode.h functions from unsigned int to poly_uint64.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* machmode.h (mode_for_size, int_mode_for_size, float_mode_for_size)
(smallest_mode_for_size, smallest_int_mode_for_size): Take the mode
size as a poly_uint64.
(mode_for_vector, mode_for_int_vector): Take the number of vector
elements as a poly_uint64.
* stor-layout.c (mode_for_size, smallest_mode_for_size): Take the mode
size as a poly_uint64.
(mode_for_vector, mode_for_int_vector): Take the number of vector
elements as a poly_uint64.

Index: gcc/machmode.h
===
--- gcc/machmode.h  2017-10-23 17:00:49.664349224 +0100
+++ gcc/machmode.h  2017-10-23 17:00:52.669615373 +0100
@@ -696,14 +696,14 @@ #define MACRO_MODE(MODE) (as_a  (mode_for_size (size, MODE_INT, limit));
 }
@@ -712,7 +712,7 @@ int_mode_for_size (unsigned int size, in
exists.  */
 
 inline opt_scalar_float_mode
-float_mode_for_size (unsigned int size)
+float_mode_for_size (poly_uint64 size)
 {
   return dyn_cast  (mode_for_size (size, MODE_FLOAT, 0));
 }
@@ -726,21 +726,21 @@ decimal_float_mode_for_size (unsigned in
 (mode_for_size (size, MODE_DECIMAL_FLOAT, 0));
 }
 
-extern machine_mode smallest_mode_for_size (unsigned int, enum mode_class);
+extern machine_mode smallest_mode_for_size (poly_uint64, enum mode_class);
 
 /* Find the narrowest integer mode that contains at least SIZE bits.
Such a mode must exist.  */
 
 inline scalar_int_mode
-smallest_int_mode_for_size (unsigned int size)
+smallest_int_mode_for_size (poly_uint64 size)
 {
   return as_a  (smallest_mode_for_size (size, MODE_INT));
 }
 
 extern opt_scalar_int_mode int_mode_for_mode (machine_mode);
 extern opt_machine_mode bitwise_mode_for_mode (machine_mode);
-extern opt_machine_mode mode_for_vector (scalar_mode, unsigned);
-extern opt_machine_mode mode_for_int_vector (unsigned int, unsigned int);
+extern opt_machine_mode mode_for_vector (scalar_mode, poly_uint64);
+extern opt_machine_mode mode_for_int_vector (unsigned int, poly_uint64);
 
 /* Return the integer vector equivalent of MODE, if one exists.  In other
words, return the mode for an integer vector that has the same number
Index: gcc/stor-layout.c
===
--- gcc/stor-layout.c   2017-10-23 16:52:20.627879504 +0100
+++ gcc/stor-layout.c   2017-10-23 17:00:52.669615373 +0100
@@ -297,22 +297,22 @@ finalize_size_functions (void)
MAX_FIXED_MODE_SIZE.  */
 
 opt_machine_mode
-mode_for_size (unsigned int size, enum mode_class mclass, int limit)
+mode_for_size (poly_uint64 size, enum mode_class mclass, int limit)
 {
   machine_mode mode;
   int i;
 
-  if (limit && size > MAX_FIXED_MODE_SIZE)
+  if (limit && may_gt (size, (unsigned int) MAX_FIXED_MODE_SIZE))
 return opt_machine_mode ();
 
   /* Get the first mode which has this size, in the specified class.  */
   FOR_EACH_MODE_IN_CLASS (mode, mclass)
-if (GET_MODE_PRECISION (mode) == size)
+if (must_eq (GET_MODE_PRECISION (mode), size))
   return mode;
 
   if (mclass == MODE_INT || mclass == MODE_PARTIAL_INT)
 for (i = 0; i < NUM_INT_N_ENTS; i ++)
-  if (int_n_data[i].bitsize == size
+  if (must_eq (int_n_data[i].bitsize, size)
  && int_n_enabled_p[i])
return int_n_data[i].m;
 
@@ -340,7 +340,7 @@ mode_for_size_tree (const_tree size, enu
SIZE bits.  Abort if no such mode exists.  */
 
 machine_mode
-smallest_mode_for_size (unsigned int size, enum mode_class mclass)
+smallest_mode_for_size (poly_uint64 size, enum mode_class mclass)
 {
   machine_mode mode = VOIDmode;
   int i;
@@ -348,19 +348,18 @@ smallest_mode_for_size (unsigned int siz
   /* Get the first mode which has at least this size, in the
  specified class.  */
   FOR_EACH_MODE_IN_CLASS (mode, mclass)
-if (GET_MODE_PRECISION (mode) >= size)
+if (must_ge (GET_MODE_PRECISION (mode), size))
   break;
 
+  gcc_assert (mode != VOIDmode);
+
   if (mclass == MODE_INT || mclass == MODE_PARTIAL_INT)
 for (i = 0; i < NUM_INT_N_ENTS; i ++)
-  if (int_n_data[i].bitsize >= size
- && int_n_data[i].bitsize < GET_MODE_PRECISION (mode)
+  if (must_ge (int_n_data[i].bitsize, size)
+ && must_lt (int_n_data[i].bitsize, GET_MODE_PRECISION (mode))
  && int_n_enabled_p[i])
mode = int_n_data[i].m;
 
-  if (mode == VOIDmode)
-gcc_unreachable ();
-
   return mode;
 }
 
@@ -475,7 +474,7 @@ bitwise_type_for_mode (machine_mode mode
either an integer mode or a vector mode.  */
 
 opt_machine_mode
-mode_for_vector (scalar_mode innermode, unsigned nunits)
+mode_for_vector (scalar_mode innermode, poly_uint64 nunits)
 {
   machine_mode mode;
 
@@ -496,14 +495,14 @@ mode_for_vector (scalar_mode innermode,

[005/nnn] poly_int: rtx constants

2017-10-23 Thread Richard Sandiford

This patch adds an rtl representation of poly_int values.
There were three possible ways of doing this:

(1) Add a new rtl code for the poly_ints themselves and store the
coefficients as trailing wide_ints.  This would give constants like:

  (const_poly_int [c0 c1 ... cn])

The runtime value would be:

  c0 + c1 * x1 + ... + cn * xn

(2) Like (1), but use rtxes for the coefficients.  This would give
constants like:

  (const_poly_int [(const_int c0)
   (const_int c1)
   ...
   (const_int cn)])

although the coefficients could be const_wide_ints instead
of const_ints where appropriate.

(3) Add a new rtl code for the polynomial indeterminates,
then use them in const wrappers.  A constant like c0 + c1 * x1
would then look like:

  (const:M (plus:M (mult:M (const_param:M x1)
   (const_int c1))
   (const_int c0)))

There didn't seem to be that much to choose between them.  The main
advantage of (1) is that it's a more efficient representation and
that we can refer to the cofficients directly as wide_int_storage.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* doc/rtl.texi (const_poly_int): Document.
* gengenrtl.c (excluded_rtx): Return true for CONST_POLY_INT.
* rtl.h (const_poly_int_def): New struct.
(rtx_def::u): Add a cpi field.
(CASE_CONST_UNIQUE, CASE_CONST_ANY): Add CONST_POLY_INT.
(CONST_POLY_INT_P, CONST_POLY_INT_COEFFS): New macros.
(wi::rtx_to_poly_wide_ref): New typedef
(const_poly_int_value, wi::to_poly_wide, rtx_to_poly_int64)
(poly_int_rtx_p): New functions.
(trunc_int_for_mode): Declare a poly_int64 version.
(plus_constant): Take a poly_int64 instead of a HOST_WIDE_INT.
(immed_wide_int_const): Take a poly_wide_int_ref rather than
a wide_int_ref.
(strip_offset): Declare.
(strip_offset_and_add): New function.
* rtl.def (CONST_POLY_INT): New rtx code.
* rtl.c (rtx_size): Handle CONST_POLY_INT.
(shared_const_p): Use poly_int_rtx_p.
* emit-rtl.h (gen_int_mode): Take a poly_int64 instead of a
HOST_WIDE_INT.
(gen_int_shift_amount): Likewise.
* emit-rtl.c (const_poly_int_hasher): New class.
(const_poly_int_htab): New variable.
(init_emit_once): Initialize it when NUM_POLY_INT_COEFFS > 1.
(const_poly_int_hasher::hash): New function.
(const_poly_int_hasher::equal): Likewise.
(gen_int_mode): Take a poly_int64 instead of a HOST_WIDE_INT.
(immed_wide_int_const): Rename to...
(immed_wide_int_const_1): ...this and make static.
(immed_wide_int_const): New function, taking a poly_wide_int_ref
instead of a wide_int_ref.
(gen_int_shift_amount): Take a poly_int64 instead of a HOST_WIDE_INT.
(gen_lowpart_common): Handle CONST_POLY_INT.
* cse.c (hash_rtx_cb, equiv_constant): Likewise.
* cselib.c (cselib_hash_rtx): Likewise.
* dwarf2out.c (const_ok_for_output_1): Likewise.
* expr.c (convert_modes): Likewise.
* print-rtl.c (rtx_writer::print_rtx, print_value): Likewise.
* rtlhash.c (add_rtx): Likewise.
* explow.c (trunc_int_for_mode): Add a poly_int64 version.
(plus_constant): Take a poly_int64 instead of a HOST_WIDE_INT.
Handle existing CONST_POLY_INT rtxes.
* expmed.h (expand_shift): Take a poly_int64 instead of a
HOST_WIDE_INT.
* expmed.c (expand_shift): Likewise.
* rtlanal.c (strip_offset): New function.
(commutative_operand_precedence): Give CONST_POLY_INT the same
precedence as CONST_DOUBLE and put CONST_WIDE_INT between that
and CONST_INT.
* rtl-tests.c (const_poly_int_tests): New struct.
(rtl_tests_c_tests): Use it.
* simplify-rtx.c (simplify_const_unary_operation): Handle
CONST_POLY_INT.
(simplify_const_binary_operation): Likewise.
(simplify_binary_operation_1): Fold additions of symbolic constants
and CONST_POLY_INTs.
(simplify_subreg): Handle extensions and truncations of
CONST_POLY_INTs.
(simplify_const_poly_int_tests): New struct.
(simplify_rtx_c_tests): Use it.
* wide-int.h (storage_ref): Add default constructor.
(wide_int_ref_storage): Likewise.
(trailing_wide_ints): Use GTY((user)).
(trailing_wide_ints::operator[]): Add a const version.
(trailing_wide_ints::get_precision): New function.
(trailing_wide_ints::extra_size): Likewise.

Index: gcc/doc/rtl.texi
===
--- gcc/doc/rtl.texi2017-10-23 17:00:20.916834036 +0100
+++ gcc/doc/rtl.texi2017-10-23 17:00:54.437007600 +0100
@@ -1621,6 +1621,15 @@ is accessed with the macr

[006/nnn] poly_int: tree constants

2017-10-23 Thread Richard Sandiford

This patch adds a tree representation for poly_ints.  Unlike the
rtx version, the coefficients are INTEGER_CSTs rather than plain
integers, so that we can easily access them as poly_widest_ints
and poly_offset_ints.

The patch also adjusts some places that previously
relied on "constant" meaning "INTEGER_CST".  It also makes
sure that the TYPE_SIZE agrees with the TYPE_SIZE_UNIT for
vector booleans, given the existing:

/* Several boolean vector elements may fit in a single unit.  */
if (VECTOR_BOOLEAN_TYPE_P (type)
&& type->type_common.mode != BLKmode)
  TYPE_SIZE_UNIT (type)
= size_int (GET_MODE_SIZE (type->type_common.mode));
else
  TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
   TYPE_SIZE_UNIT (innertype),
   size_int (nunits));


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* doc/generic.texi (POLY_INT_CST): Document.
* tree.def (POLY_INT_CST): New tree code.
* treestruct.def (TS_POLY_INT_CST): New tree layout.
* tree-core.h (tree_poly_int_cst): New struct.
(tree_node): Add a poly_int_cst field.
* tree.h (POLY_INT_CST_P, POLY_INT_CST_COEFF): New macros.
(wide_int_to_tree, force_fit_type): Take a poly_wide_int_ref
instead of a wide_int_ref.
(build_int_cst, build_int_cst_type): Take a poly_int64 instead
of a HOST_WIDE_INT.
(build_int_cstu, build_array_type_nelts): Take a poly_uint64
instead of an unsigned HOST_WIDE_INT.
(build_poly_int_cst, tree_fits_poly_int64_p, tree_fits_poly_uint64_p)
(ptrdiff_tree_p): Declare.
(tree_to_poly_int64, tree_to_poly_uint64): Likewise.  Provide
extern inline implementations if the target doesn't use POLY_INT_CST.
(poly_int_tree_p): New function.
(wi::unextended_tree): New class.
(wi::int_traits ): New override.
(wi::extended_tree): Add a default constructor.
(wi::extended_tree::get_tree): New function.
(wi::widest_extended_tree, wi::offset_extended_tree): New typedefs.
(wi::tree_to_widest_ref, wi::tree_to_offset_ref): Use them.
(wi::tree_to_poly_widest_ref, wi::tree_to_poly_offset_ref)
(wi::tree_to_poly_wide_ref): New typedefs.
(wi::ints_for): Provide overloads for extended_tree and
unextended_tree.
(poly_int_cst_value, wi::to_poly_widest, wi::to_poly_offset)
(wi::to_wide): New functions.
(wi::fits_to_boolean_p, wi::fits_to_tree_p): Handle poly_ints.
* tree.c (poly_int_cst_hasher): New struct.
(poly_int_cst_hash_table): New variable.
(tree_node_structure_for_code, tree_code_size, simple_cst_equal)
(valid_constant_size_p, add_expr, drop_tree_overflow): Handle
POLY_INT_CST.
(initialize_tree_contains_struct): Handle TS_POLY_INT_CST.
(init_ttree): Initialize poly_int_cst_hash_table.
(build_int_cst, build_int_cst_type, build_invariant_address): Take
a poly_int64 instead of a HOST_WIDE_INT.
(build_int_cstu, build_array_type_nelts): Take a poly_uint64
instead of an unsigned HOST_WIDE_INT.
(wide_int_to_tree): Rename to...
(wide_int_to_tree_1): ...this.
(build_new_poly_int_cst, build_poly_int_cst): New functions.
(force_fit_type): Take a poly_wide_int_ref instead of a wide_int_ref.
(wide_int_to_tree): New function that takes a poly_wide_int_ref.
(ptrdiff_tree_p, tree_to_poly_int64, tree_to_poly_uint64)
(tree_fits_poly_int64_p, tree_fits_poly_uint64_p): New functions.
* lto-streamer-out.c (DFS::DFS_write_tree_body, hash_tree): Handle
TS_POLY_INT_CST.
* tree-streamer-in.c (lto_input_ts_poly_tree_pointers): Likewise.
(streamer_read_tree_body): Likewise.
* tree-streamer-out.c (write_ts_poly_tree_pointers): Likewise.
(streamer_write_tree_body): Likewise.
* tree-streamer.c (streamer_check_handled_ts_structures): Likewise.
* asan.c (asan_protect_global): Require the size to be an INTEGER_CST.
* cfgexpand.c (expand_debug_expr): Handle POLY_INT_CST.
* expr.c (const_vector_element, expand_expr_real_1): Likewise.
* gimple-expr.h (is_gimple_constant): Likewise.
* gimplify.c (maybe_with_size_expr): Likewise.
* print-tree.c (print_node): Likewise.
* tree-data-ref.c (data_ref_compare_tree): Likewise.
* tree-pretty-print.c (dump_generic_node): Likewise.
* tree-ssa-address.c (addr_for_mem_ref): Likewise.
* tree-vect-data-refs.c (dr_group_sort_cmp): Likewise.
* tree-vrp.c (compare_values_warnv): Likewise.
* tree-ssa-loop-ivopts.c (determine_base_object, constant_multiple_of)
(get_loop_invariant_expr, add_candidate_1, get_computation_aff_1)
(forc

[007/nnn] poly_int: dump routines

2017-10-23 Thread Richard Sandiford

Add poly_int routines for the dumpfile.h and pretty-print.h frameworks.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* dumpfile.h (dump_dec): Declare.
* dumpfile.c (dump_dec): New function.
* pretty-print.h (pp_wide_integer): Turn into a function and
declare a poly_int version.
* pretty-print.c (pp_wide_integer): New function for poly_ints.

Index: gcc/dumpfile.h
===
--- gcc/dumpfile.h  2017-10-23 16:52:20.417686430 +0100
+++ gcc/dumpfile.h  2017-10-23 17:01:00.431554440 +0100
@@ -174,6 +174,9 @@ extern void dump_gimple_stmt (dump_flags
 extern void print_combine_total_stats (void);
 extern bool enable_rtl_dump_file (void);
 
+template
+void dump_dec (int, const poly_int &);
+
 /* In tree-dump.c  */
 extern void dump_node (const_tree, dump_flags_t, FILE *);
 
Index: gcc/dumpfile.c
===
--- gcc/dumpfile.c  2017-10-23 16:52:20.417686430 +0100
+++ gcc/dumpfile.c  2017-10-23 17:01:00.431554440 +0100
@@ -473,6 +473,27 @@ dump_printf_loc (dump_flags_t dump_kind,
 }
 }
 
+/* Output VALUE in decimal to appropriate dump streams.  */
+
+template
+void
+dump_dec (int dump_kind, const poly_int &value)
+{
+  STATIC_ASSERT (poly_coeff_traits::signedness >= 0);
+  signop sgn = poly_coeff_traits::signedness ? SIGNED : UNSIGNED;
+  if (dump_file && (dump_kind & pflags))
+print_dec (value, dump_file, sgn);
+
+  if (alt_dump_file && (dump_kind & alt_flags))
+print_dec (value, alt_dump_file, sgn);
+}
+
+template void dump_dec (int, const poly_uint16 &);
+template void dump_dec (int, const poly_int64 &);
+template void dump_dec (int, const poly_uint64 &);
+template void dump_dec (int, const poly_offset_int &);
+template void dump_dec (int, const poly_widest_int &);
+
 /* Start a dump for PHASE. Store user-supplied dump flags in
*FLAG_PTR.  Return the number of streams opened.  Set globals
DUMP_FILE, and ALT_DUMP_FILE to point to the opened streams, and
Index: gcc/pretty-print.h
===
--- gcc/pretty-print.h  2017-10-23 16:52:20.417686430 +0100
+++ gcc/pretty-print.h  2017-10-23 17:01:00.431554440 +0100
@@ -328,8 +328,6 @@ #define pp_wide_int(PP, W, SGN) 
\
   pp_string (PP, pp_buffer (PP)->digit_buffer);\
 }  \
   while (0)
-#define pp_wide_integer(PP, I) \
-   pp_scalar (PP, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) I)
 #define pp_pointer(PP, P)  pp_scalar (PP, "%p", P)
 
 #define pp_identifier(PP, ID)  pp_string (PP, (pp_translate_identifiers (PP) \
@@ -401,4 +399,15 @@ extern const char *identifier_to_locale
 extern void *(*identifier_to_locale_alloc) (size_t);
 extern void (*identifier_to_locale_free) (void *);
 
+/* Print I to PP in decimal.  */
+
+inline void
+pp_wide_integer (pretty_printer *pp, HOST_WIDE_INT i)
+{
+  pp_scalar (pp, HOST_WIDE_INT_PRINT_DEC, i);
+}
+
+template
+void pp_wide_integer (pretty_printer *pp, const poly_int_pod &);
+
 #endif /* GCC_PRETTY_PRINT_H */
Index: gcc/pretty-print.c
===
--- gcc/pretty-print.c  2017-10-23 16:52:20.417686430 +0100
+++ gcc/pretty-print.c  2017-10-23 17:01:00.431554440 +0100
@@ -795,6 +795,30 @@ pp_clear_state (pretty_printer *pp)
   pp_indentation (pp) = 0;
 }
 
+/* Print X to PP in decimal.  */
+template
+void
+pp_wide_integer (pretty_printer *pp, const poly_int_pod &x)
+{
+  if (x.is_constant ())
+pp_wide_integer (pp, x.coeffs[0]);
+  else
+{
+  pp_left_bracket (pp);
+  for (unsigned int i = 0; i < N; ++i)
+   {
+ if (i != 0)
+   pp_comma (pp);
+ pp_wide_integer (pp, x.coeffs[i]);
+   }
+  pp_right_bracket (pp);
+}
+}
+
+template void pp_wide_integer (pretty_printer *, const poly_uint16_pod &);
+template void pp_wide_integer (pretty_printer *, const poly_int64_pod &);
+template void pp_wide_integer (pretty_printer *, const poly_uint64_pod &);
+
 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream.  */
 void
 pp_write_text_to_stream (pretty_printer *pp)

[008/nnn] poly_int: create_integer_operand

2017-10-23 Thread Richard Sandiford

This patch generalises create_integer_operand so that it accepts
poly_int64s rather than HOST_WIDE_INTs.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* optabs.h (expand_operand): Add an int_value field.
(create_expand_operand): Add an int_value parameter and use it
to initialize the new expand_operand field.
(create_integer_operand): Replace with a declaration of a function
that accepts poly_int64s.  Move the implementation to...
* optabs.c (create_integer_operand): ...here.
(maybe_legitimize_operand): For EXPAND_INTEGER, check whether the
mode preserves the value of int_value, instead of calling
const_int_operand on the rtx.

Index: gcc/optabs.h
===
--- gcc/optabs.h2017-10-23 16:52:20.393664364 +0100
+++ gcc/optabs.h2017-10-23 17:01:02.532643107 +0100
@@ -60,6 +60,9 @@ struct expand_operand {
 
   /* The value of the operand.  */
   rtx value;
+
+  /* The value of an EXPAND_INTEGER operand.  */
+  poly_int64 int_value;
 };
 
 /* Initialize OP with the given fields.  Initialise the other fields
@@ -69,13 +72,14 @@ struct expand_operand {
 create_expand_operand (struct expand_operand *op,
   enum expand_operand_type type,
   rtx value, machine_mode mode,
-  bool unsigned_p)
+  bool unsigned_p, poly_int64 int_value = 0)
 {
   op->type = type;
   op->unsigned_p = unsigned_p;
   op->unused = 0;
   op->mode = mode;
   op->value = value;
+  op->int_value = int_value;
 }
 
 /* Make OP describe an operand that must use rtx X, even if X is volatile.  */
@@ -142,18 +146,7 @@ create_address_operand (struct expand_op
   create_expand_operand (op, EXPAND_ADDRESS, value, Pmode, false);
 }
 
-/* Make OP describe an input operand that has value INTVAL and that has
-   no inherent mode.  This function should only be used for operands that
-   are always expand-time constants.  The backend may request that INTVAL
-   be copied into a different kind of rtx, but it must specify the mode
-   of that rtx if so.  */
-
-static inline void
-create_integer_operand (struct expand_operand *op, HOST_WIDE_INT intval)
-{
-  create_expand_operand (op, EXPAND_INTEGER, GEN_INT (intval), VOIDmode, 
false);
-}
-
+extern void create_integer_operand (struct expand_operand *, poly_int64);
 
 /* Passed to expand_simple_binop and expand_binop to say which options
to try to use if the requested operation can't be open-coded on the
Index: gcc/optabs.c
===
--- gcc/optabs.c2017-10-23 16:52:20.393664364 +0100
+++ gcc/optabs.c2017-10-23 17:01:02.531644016 +0100
@@ -6959,6 +6959,20 @@ valid_multiword_target_p (rtx target)
   return true;
 }
 
+/* Make OP describe an input operand that has value INTVAL and that has
+   no inherent mode.  This function should only be used for operands that
+   are always expand-time constants.  The backend may request that INTVAL
+   be copied into a different kind of rtx, but it must specify the mode
+   of that rtx if so.  */
+
+void
+create_integer_operand (struct expand_operand *op, poly_int64 intval)
+{
+  create_expand_operand (op, EXPAND_INTEGER,
+gen_int_mode (intval, MAX_MODE_INT),
+VOIDmode, false, intval);
+}
+
 /* Like maybe_legitimize_operand, but do not change the code of the
current rtx value.  */
 
@@ -7071,7 +7085,9 @@ maybe_legitimize_operand (enum insn_code
 
 case EXPAND_INTEGER:
   mode = insn_data[(int) icode].operand[opno].mode;
-  if (mode != VOIDmode && const_int_operand (op->value, mode))
+  if (mode != VOIDmode
+ && must_eq (trunc_int_for_mode (op->int_value, mode),
+ op->int_value))
goto input;
   break;
 }

[009/nnn] poly_int: TRULY_NOOP_TRUNCATION

2017-10-23 Thread Richard Sandiford

This patch makes TRULY_NOOP_TRUNCATION take the mode sizes as
poly_uint64s instead of unsigned ints.  The function bodies
don't need to change.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* target.def (truly_noop_truncation): Take poly_uint64s instead of
unsigned ints.  Change default to hook_bool_puint64_puint64_true.
* doc/tm.texi: Regenerate.
* hooks.h (hook_bool_uint_uint_true): Delete.
(hook_bool_puint64_puint64_true): Declare.
* hooks.c (hook_bool_uint_uint_true): Delete.
(hook_bool_puint64_puint64_true): New function.
* config/mips/mips.c (mips_truly_noop_truncation): Take poly_uint64s
instead of unsigned ints.
* config/spu/spu.c (spu_truly_noop_truncation): Likewise.
* config/tilegx/tilegx.c (tilegx_truly_noop_truncation): Likewise.

Index: gcc/target.def
===
--- gcc/target.def  2017-10-23 17:00:20.920834919 +0100
+++ gcc/target.def  2017-10-23 17:01:04.215112587 +0100
@@ -3155,8 +3155,8 @@ is correct for most machines.\n\
 If @code{TARGET_MODES_TIEABLE_P} returns false for a pair of modes,\n\
 suboptimal code can result if this hook returns true for the corresponding\n\
 mode sizes.  Making this hook return false in such cases may improve things.",
- bool, (unsigned int outprec, unsigned int inprec),
- hook_bool_uint_uint_true)
+ bool, (poly_uint64 outprec, poly_uint64 inprec),
+ hook_bool_puint64_puint64_true)
 
 /* If the representation of integral MODE is such that values are
always sign-extended to a wider mode MODE_REP then return
Index: gcc/doc/tm.texi
===
--- gcc/doc/tm.texi 2017-10-23 17:00:20.917834257 +0100
+++ gcc/doc/tm.texi 2017-10-23 17:01:04.214113496 +0100
@@ -10823,7 +10823,7 @@ nevertheless truncate the shift count, y
 by overriding it.
 @end deftypefn
 
-@deftypefn {Target Hook} bool TARGET_TRULY_NOOP_TRUNCATION (unsigned int 
@var{outprec}, unsigned int @var{inprec})
+@deftypefn {Target Hook} bool TARGET_TRULY_NOOP_TRUNCATION (poly_uint64 
@var{outprec}, poly_uint64 @var{inprec})
 This hook returns true if it is safe to ``convert'' a value of
 @var{inprec} bits to one of @var{outprec} bits (where @var{outprec} is
 smaller than @var{inprec}) by merely operating on it as if it had only
Index: gcc/hooks.h
===
--- gcc/hooks.h 2017-10-23 16:52:20.369642299 +0100
+++ gcc/hooks.h 2017-10-23 17:01:04.214113496 +0100
@@ -39,7 +39,7 @@ extern bool hook_bool_const_rtx_insn_con
  const rtx_insn *);
 extern bool hook_bool_mode_uhwi_false (machine_mode,
   unsigned HOST_WIDE_INT);
-extern bool hook_bool_uint_uint_true (unsigned int, unsigned int);
+extern bool hook_bool_puint64_puint64_true (poly_uint64, poly_uint64);
 extern bool hook_bool_uint_mode_false (unsigned int, machine_mode);
 extern bool hook_bool_uint_mode_true (unsigned int, machine_mode);
 extern bool hook_bool_tree_false (tree);
Index: gcc/hooks.c
===
--- gcc/hooks.c 2017-10-23 16:52:20.369642299 +0100
+++ gcc/hooks.c 2017-10-23 17:01:04.214113496 +0100
@@ -133,9 +133,9 @@ hook_bool_mode_uhwi_false (machine_mode,
   return false;
 }
 
-/* Generic hook that takes (unsigned int, unsigned int) and returns true.  */
+/* Generic hook that takes (poly_uint64, poly_uint64) and returns true.  */
 bool
-hook_bool_uint_uint_true (unsigned int, unsigned int)
+hook_bool_puint64_puint64_true (poly_uint64, poly_uint64)
 {
   return true;
 }
Index: gcc/config/mips/mips.c
===
--- gcc/config/mips/mips.c  2017-10-23 17:00:43.528930533 +0100
+++ gcc/config/mips/mips.c  2017-10-23 17:01:04.26223 +0100
@@ -22322,7 +22322,7 @@ mips_promote_function_mode (const_tree t
 /* Implement TARGET_TRULY_NOOP_TRUNCATION.  */
 
 static bool
-mips_truly_noop_truncation (unsigned int outprec, unsigned int inprec)
+mips_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
 {
   return !TARGET_64BIT || inprec <= 32 || outprec > 32;
 }
Index: gcc/config/spu/spu.c
===
--- gcc/config/spu/spu.c2017-10-23 17:00:43.548912356 +0100
+++ gcc/config/spu/spu.c2017-10-23 17:01:04.212115314 +0100
@@ -7182,7 +7182,7 @@ spu_can_change_mode_class (machine_mode
 /* Implement TARGET_TRULY_NOOP_TRUNCATION.  */
 
 static bool
-spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec)
+spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
 {
   return inprec <= 32 && outprec <= inprec;
 }
Index: gcc/config/tilegx/tilegx.c
===
--- gcc/

[010/nnn] poly_int: REG_OFFSET

2017-10-23 Thread Richard Sandiford

This patch changes the type of the reg_attrs offset field
from HOST_WIDE_INT to poly_int64 and updates uses accordingly.
This includes changing reg_attr_hasher::hash to use inchash.
(Doing this has no effect on code generation since the only
use of the hasher is to avoid creating duplicate objects.)


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* rtl.h (reg_attrs::offset): Change from HOST_WIDE_INT to poly_int64.
(gen_rtx_REG_offset): Take the offset as a poly_int64.
* inchash.h (inchash::hash::add_poly_hwi): New function.
* gengtype.c (main): Register poly_int64.
* emit-rtl.c (reg_attr_hasher::hash): Use inchash.  Treat the
offset as a poly_int.
(reg_attr_hasher::equal): Use must_eq to compare offsets.
(get_reg_attrs, update_reg_offset, gen_rtx_REG_offset): Take the
offset as a poly_int64.
(set_reg_attrs_from_value): Treat the offset as a poly_int64.
* print-rtl.c (print_poly_int): New function.
(rtx_writer::print_rtx_operand_code_r): Treat REG_OFFSET as
a poly_int.
* var-tracking.c (track_offset_p, get_tracked_reg_offset): New
functions.
(var_reg_set, var_reg_delete_and_set, var_reg_delete): Use them.
(same_variable_part_p, track_loc_p): Take the offset as a poly_int64.
(vt_get_decl_and_offset): Return the offset as a poly_int64.
Enforce track_offset_p for parts of a PARALLEL.
(vt_add_function_parameter): Use const_offset for the final
offset to track.  Use get_tracked_reg_offset for the parts
of a PARALLEL.

Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-10-23 17:01:15.119130016 +0100
+++ gcc/rtl.h   2017-10-23 17:01:43.314993320 +0100
@@ -187,7 +187,7 @@ struct GTY(()) mem_attrs
 
 struct GTY((for_user)) reg_attrs {
   tree decl;   /* decl corresponding to REG.  */
-  HOST_WIDE_INT offset;/* Offset from start of DECL.  */
+  poly_int64 offset;   /* Offset from start of DECL.  */
 };
 
 /* Common union for an element of an rtx.  */
@@ -2997,7 +2997,7 @@ subreg_promoted_mode (rtx x)
 extern rtvec gen_rtvec_v (int, rtx *);
 extern rtvec gen_rtvec_v (int, rtx_insn **);
 extern rtx gen_reg_rtx (machine_mode);
-extern rtx gen_rtx_REG_offset (rtx, machine_mode, unsigned int, int);
+extern rtx gen_rtx_REG_offset (rtx, machine_mode, unsigned int, poly_int64);
 extern rtx gen_reg_rtx_offset (rtx, machine_mode, int);
 extern rtx gen_reg_rtx_and_attrs (rtx);
 extern rtx_code_label *gen_label_rtx (void);
Index: gcc/inchash.h
===
--- gcc/inchash.h   2017-10-23 17:01:29.530765486 +0100
+++ gcc/inchash.h   2017-10-23 17:01:43.314993320 +0100
@@ -63,6 +63,14 @@ hashval_t iterative_hash_hashval_t (hash
 val = iterative_hash_host_wide_int (v, val);
   }
 
+  /* Add polynomial value V, treating each element as a HOST_WIDE_INT.  */
+  template
+  void add_poly_hwi (const poly_int_pod &v)
+  {
+for (unsigned int i = 0; i < N; ++i)
+  add_hwi (v.coeffs[i]);
+  }
+
   /* Add wide_int-based value V.  */
   template
   void add_wide_int (const generic_wide_int &x)
Index: gcc/gengtype.c
===
--- gcc/gengtype.c  2017-10-23 17:01:15.119130016 +0100
+++ gcc/gengtype.c  2017-10-23 17:01:43.313994743 +0100
@@ -5190,6 +5190,7 @@ #define POS_HERE(Call) do { pos.file = t
   POS_HERE (do_scalar_typedef ("offset_int", &pos));
   POS_HERE (do_scalar_typedef ("widest_int", &pos));
   POS_HERE (do_scalar_typedef ("int64_t", &pos));
+  POS_HERE (do_scalar_typedef ("poly_int64", &pos));
   POS_HERE (do_scalar_typedef ("uint64_t", &pos));
   POS_HERE (do_scalar_typedef ("uint8", &pos));
   POS_HERE (do_scalar_typedef ("uintptr_t", &pos));
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  2017-10-23 17:01:15.119130016 +0100
+++ gcc/emit-rtl.c  2017-10-23 17:01:43.313994743 +0100
@@ -205,7 +205,6 @@ static rtx lookup_const_wide_int (rtx);
 #endif
 static rtx lookup_const_double (rtx);
 static rtx lookup_const_fixed (rtx);
-static reg_attrs *get_reg_attrs (tree, int);
 static rtx gen_const_vector (machine_mode, int);
 static void copy_rtx_if_shared_1 (rtx *orig);
 
@@ -424,7 +423,10 @@ reg_attr_hasher::hash (reg_attrs *x)
 {
   const reg_attrs *const p = x;
 
-  return ((p->offset * 1000) ^ (intptr_t) p->decl);
+  inchash::hash h;
+  h.add_ptr (p->decl);
+  h.add_poly_hwi (p->offset);
+  return h.end ();
 }
 
 /* Returns nonzero if the value represented by X  is the same as that given by
@@ -436,19 +438,19 @@ reg_attr_hasher::equal (reg_attrs *x, re
   const reg_attrs *const p = x;
   const reg_attrs *const q = y;
 
-  return (p->decl == q->decl && p->offset == q->offset);
+  return

C PATCH to fix a couple of typos (PR c/82681)

2017-10-23 Thread Marek Polacek

As noted in this PR, chages -> changes.

Bootstrapped/regtested on x86_64-linux, applying to trunk.

2017-10-23  Marek Polacek  

PR c/82681
* c-warn.c (warnings_for_convert_and_check): Fix typos.

* gcc.dg/c90-const-expr-11.c: Fix typos in dg-warning.
* gcc.dg/overflow-warn-5.c: Likewise.
* gcc.dg/overflow-warn-8.c: Likewise.

diff --git gcc/c-family/c-warn.c gcc/c-family/c-warn.c
index cb1db0327c3..78f6ba83d98 100644
--- gcc/c-family/c-warn.c
+++ gcc/c-family/c-warn.c
@@ -1215,12 +1215,12 @@ warnings_for_convert_and_check (location_t loc, tree 
type, tree expr,
   if (cst)
warning_at (loc, OPT_Woverflow,
"overflow in conversion from %qT to %qT "
-   "chages value from %qE to %qE",
+   "changes value from %qE to %qE",
exprtype, type, expr, result);
   else
warning_at (loc, OPT_Woverflow,
"overflow in conversion from %qT to %qT "
-   "chages the value of %qE",
+   "changes the value of %qE",
exprtype, type, expr);
 }
   else
diff --git gcc/testsuite/gcc.dg/c90-const-expr-11.c 
gcc/testsuite/gcc.dg/c90-const-expr-11.c
index e4f2aff7874..a2720c47bf4 100644
--- gcc/testsuite/gcc.dg/c90-const-expr-11.c
+++ gcc/testsuite/gcc.dg/c90-const-expr-11.c
@@ -20,7 +20,7 @@ f (void)
   /* Overflow.  */
   struct t b = { INT_MAX + 1 }; /* { dg-warning "integer overflow in 
expression" } */
   /* { dg-error "overflow in constant expression" "constant" { target *-*-* } 
.-1 } */
-  struct t c = { DBL_MAX }; /* { dg-warning "overflow in conversion from 
.double. to .int. chages value " } */
+  struct t c = { DBL_MAX }; /* { dg-warning "overflow in conversion from 
.double. to .int. changes value " } */
   /* { dg-error "overflow in constant expression" "constant" { target *-*-* } 
.-1 } */
   /* Bad operator outside sizeof.  */
   struct s d = { 1 ? 1.0 : atan (a.d) }; /* { dg-error "is not a constant 
expression|near initialization" } */
diff --git gcc/testsuite/gcc.dg/overflow-warn-5.c 
gcc/testsuite/gcc.dg/overflow-warn-5.c
index b2c8dc31d95..1a5aa0c6059 100644
--- gcc/testsuite/gcc.dg/overflow-warn-5.c
+++ gcc/testsuite/gcc.dg/overflow-warn-5.c
@@ -3,5 +3,5 @@
 /* { dg-options "-Woverflow" } */
 
 unsigned char rx_async(unsigned char p) {
-return p & 512; /* { dg-warning "overflow in conversion from .int. to 
.unsigned char. chages value" } */
+return p & 512; /* { dg-warning "overflow in conversion from .int. to 
.unsigned char. changes value" } */
 }
diff --git gcc/testsuite/gcc.dg/overflow-warn-8.c 
gcc/testsuite/gcc.dg/overflow-warn-8.c
index ace605517dc..e76bcac5e07 100644
--- gcc/testsuite/gcc.dg/overflow-warn-8.c
+++ gcc/testsuite/gcc.dg/overflow-warn-8.c
@@ -7,7 +7,7 @@ void foo (int j)
   int i3 = 1 + INT_MAX; /* { dg-warning "integer overflow" } */
   int i4 = +1 + INT_MAX; /* { dg-warning "integer overflow" } */
   int i5 = (int)((double)1.0 + INT_MAX);
-  int i6 = (double)1.0 + INT_MAX; /* { dg-warning "overflow in conversion from 
.double. to .int. chages value" } */
+  int i6 = (double)1.0 + INT_MAX; /* { dg-warning "overflow in conversion from 
.double. to .int. changes value" } */
   int i7 = 0 ? (int)(double)1.0 + INT_MAX : 1;
   int i8 = 1 ? 1 : (int)(double)1.0 + INT_MAX;
   int i9 = j ? (int)(double)1.0 + INT_MAX : 1; /* { dg-warning "integer 
overflow" } */

Marek

[011/nnn] poly_int: DWARF locations

2017-10-23 Thread Richard Sandiford

This patch adds support for DWARF location expressions
that involve polynomial offsets.  It adds a target hook that
says how the runtime invariants used in the offsets should be
represented in DWARF.  SVE vectors have to be a multiple of
128 bits in size, so the GCC port uses the number of 128-bit
blocks minus one as the runtime invariant.  However, in DWARF,
the vector length is exposed via a pseudo "VG" register that
holds the number of 64-bit elements in a vector.  Thus:

  indeterminate 1 == (VG / 2) - 1

The hook needs to be general enough to express this.
Note that in most cases the division and subtraction fold
away into surrounding expressions.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* target.def (dwarf_poly_indeterminate_value): New hook.
* targhooks.h (default_dwarf_poly_indeterminate_value): Declare.
* targhooks.c (default_dwarf_poly_indeterminate_value): New function.
* doc/tm.texi.in (TARGET_DWARF_POLY_INDETERMINATE_VALUE): Document.
* doc/tm.texi: Regenerate.
* dwarf2out.h (build_cfa_loc, build_cfa_aligned_loc): Take the
offset as a poly_int64.
* dwarf2out.c (new_reg_loc_descr): Move later in file.  Take the
offset as a poly_int64.
(loc_descr_plus_const, loc_list_plus_const, build_cfa_aligned_loc):
Take the offset as a poly_int64.
(build_cfa_loc): Likewise.  Use loc_descr_plus_const.
(frame_pointer_fb_offset): Change to a poly_int64.
(int_loc_descriptor): Take the offset as a poly_int64.  Use
targetm.dwarf_poly_indeterminate_value for polynomial offsets.
(based_loc_descr): Take the offset as a poly_int64.
Use strip_offset_and_add to handle (plus X (const)).
Use new_reg_loc_descr instead of an open-coded version of the
previous implementation.
(mem_loc_descriptor): Handle CONST_POLY_INT.
(compute_frame_pointer_to_fb_displacement): Take the offset as a
poly_int64.  Use strip_offset_and_add to handle (plus X (const)).

Index: gcc/target.def
===
--- gcc/target.def  2017-10-23 17:01:04.215112587 +0100
+++ gcc/target.def  2017-10-23 17:01:45.057509456 +0100
@@ -4124,6 +4124,21 @@ the CFI label attached to the insn, @var
 the insn and @var{index} is @code{UNSPEC_INDEX} or @code{UNSPECV_INDEX}.",
  void, (const char *label, rtx pattern, int index), NULL)
 
+DEFHOOK
+(dwarf_poly_indeterminate_value,
+ "Express the value of @code{poly_int} indeterminate @var{i} as a DWARF\n\
+expression, with @var{i} counting from 1.  Return the number of a DWARF\n\
+register @var{R} and set @samp{*@var{factor}} and @samp{*@var{offset}} such\n\
+that the value of the indeterminate is:\n\
+@smallexample\n\
+value_of(@var{R}) / @var{factor} - @var{offset}\n\
+@end smallexample\n\
+\n\
+A target only needs to define this hook if it sets\n\
+@samp{NUM_POLY_INT_COEFFS} to a value greater than 1.",
+ unsigned int, (unsigned int i, unsigned int *factor, int *offset),
+ default_dwarf_poly_indeterminate_value)
+
 /* ??? Documenting this hook requires a GFDL license grant.  */
 DEFHOOK_UNDOC
 (stdarg_optimize_hook,
Index: gcc/targhooks.h
===
--- gcc/targhooks.h 2017-10-23 17:00:20.920834919 +0100
+++ gcc/targhooks.h 2017-10-23 17:01:45.057509456 +0100
@@ -234,6 +234,9 @@ extern int default_label_align_max_skip
 extern int default_jump_align_max_skip (rtx_insn *);
 extern section * default_function_section(tree decl, enum node_frequency freq,
  bool startup, bool exit);
+extern unsigned int default_dwarf_poly_indeterminate_value (unsigned int,
+   unsigned int *,
+   int *);
 extern machine_mode default_dwarf_frame_reg_mode (int);
 extern fixed_size_mode default_get_reg_raw_mode (int);
 extern bool default_keep_leaf_when_profiled ();
Index: gcc/targhooks.c
===
--- gcc/targhooks.c 2017-10-23 17:00:49.664349224 +0100
+++ gcc/targhooks.c 2017-10-23 17:01:45.057509456 +0100
@@ -1838,6 +1838,15 @@ default_debug_unwind_info (void)
   return UI_NONE;
 }
 
+/* Targets that set NUM_POLY_INT_COEFFS to something greater than 1
+   must define this hook.  */
+
+unsigned int
+default_dwarf_poly_indeterminate_value (unsigned int, unsigned int *, int *)
+{
+  gcc_unreachable ();
+}
+
 /* Determine the correct mode for a Dwarf frame register that represents
register REGNO.  */
 
Index: gcc/doc/tm.texi.in
===
--- gcc/doc/tm.texi.in  2017-10-23 17:00:20.918834478 +0100
+++ gcc/doc/tm.texi.in  2017-10-23 17:01:45.053515150 +0100
@@ -2553,6 +2553,8 @@ terminate the stack backtrace.  New port

[012/nnn] poly_int: fold_ctor_reference

2017-10-23 Thread Richard Sandiford

This patch changes the offset and size arguments to
fold_ctor_reference from unsigned HOST_WIDE_INT to poly_uint64.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* gimple-fold.h (fold_ctor_reference): Take the offset and size
as poly_uint64 rather than unsigned HOST_WIDE_INT.
* gimple-fold.c (fold_ctor_reference): Likewise.

Index: gcc/gimple-fold.h
===
--- gcc/gimple-fold.h   2017-10-23 16:52:20.201487839 +0100
+++ gcc/gimple-fold.h   2017-10-23 17:01:48.165079780 +0100
@@ -44,8 +44,7 @@ extern tree follow_single_use_edges (tre
 extern tree gimple_fold_stmt_to_constant_1 (gimple *, tree (*) (tree),
tree (*) (tree) = 
no_follow_ssa_edges);
 extern tree gimple_fold_stmt_to_constant (gimple *, tree (*) (tree));
-extern tree fold_ctor_reference (tree, tree, unsigned HOST_WIDE_INT,
-unsigned HOST_WIDE_INT, tree);
+extern tree fold_ctor_reference (tree, tree, poly_uint64, poly_uint64, tree);
 extern tree fold_const_aggregate_ref_1 (tree, tree (*) (tree));
 extern tree fold_const_aggregate_ref (tree);
 extern tree gimple_get_virt_method_for_binfo (HOST_WIDE_INT, tree,
Index: gcc/gimple-fold.c
===
--- gcc/gimple-fold.c   2017-10-23 16:52:20.201487839 +0100
+++ gcc/gimple-fold.c   2017-10-23 17:01:48.164081204 +0100
@@ -6365,20 +6365,25 @@ fold_nonarray_ctor_reference (tree type,
   return build_zero_cst (type);
 }
 
-/* CTOR is value initializing memory, fold reference of type TYPE and size SIZE
-   to the memory at bit OFFSET.  */
+/* CTOR is value initializing memory, fold reference of type TYPE and
+   size POLY_SIZE to the memory at bit POLY_OFFSET.  */
 
 tree
-fold_ctor_reference (tree type, tree ctor, unsigned HOST_WIDE_INT offset,
-unsigned HOST_WIDE_INT size, tree from_decl)
+fold_ctor_reference (tree type, tree ctor, poly_uint64 poly_offset,
+poly_uint64 poly_size, tree from_decl)
 {
   tree ret;
 
   /* We found the field with exact match.  */
   if (useless_type_conversion_p (type, TREE_TYPE (ctor))
-  && !offset)
+  && known_zero (poly_offset))
 return canonicalize_constructor_val (unshare_expr (ctor), from_decl);
 
+  /* The remaining optimizations need a constant size and offset.  */
+  unsigned HOST_WIDE_INT size, offset;
+  if (!poly_size.is_constant (&size) || !poly_offset.is_constant (&offset))
+return NULL_TREE;
+
   /* We are at the end of walk, see if we can view convert the
  result.  */
   if (!AGGREGATE_TYPE_P (TREE_TYPE (ctor)) && !offset

[013/nnn] poly_int: same_addr_size_stores_p

2017-10-23 Thread Richard Sandiford

This patch makes tree-ssa-alias.c:same_addr_size_stores_p handle
poly_int sizes and offsets.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* tree-ssa-alias.c (same_addr_size_stores_p): Take the offsets and
sizes as poly_int64s rather than HOST_WIDE_INTs.

Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c2017-10-23 16:52:20.150440950 +0100
+++ gcc/tree-ssa-alias.c2017-10-23 17:01:49.579064221 +0100
@@ -2322,14 +2322,14 @@ stmt_may_clobber_ref_p (gimple *stmt, tr
address.  */
 
 static bool
-same_addr_size_stores_p (tree base1, HOST_WIDE_INT offset1, HOST_WIDE_INT 
size1,
-HOST_WIDE_INT max_size1,
-tree base2, HOST_WIDE_INT offset2, HOST_WIDE_INT size2,
-HOST_WIDE_INT max_size2)
+same_addr_size_stores_p (tree base1, poly_int64 offset1, poly_int64 size1,
+poly_int64 max_size1,
+tree base2, poly_int64 offset2, poly_int64 size2,
+poly_int64 max_size2)
 {
   /* Offsets need to be 0.  */
-  if (offset1 != 0
-  || offset2 != 0)
+  if (maybe_nonzero (offset1)
+  || maybe_nonzero (offset2))
 return false;
 
   bool base1_obj_p = SSA_VAR_P (base1);
@@ -2348,17 +2348,19 @@ same_addr_size_stores_p (tree base1, HOS
   tree memref = base1_memref_p ? base1 : base2;
 
   /* Sizes need to be valid.  */
-  if (max_size1 == -1 || max_size2 == -1
-  || size1 == -1 || size2 == -1)
+  if (!known_size_p (max_size1)
+  || !known_size_p (max_size2)
+  || !known_size_p (size1)
+  || !known_size_p (size2))
 return false;
 
   /* Max_size needs to match size.  */
-  if (max_size1 != size1
-  || max_size2 != size2)
+  if (may_ne (max_size1, size1)
+  || may_ne (max_size2, size2))
 return false;
 
   /* Sizes need to match.  */
-  if (size1 != size2)
+  if (may_ne (size1, size2))
 return false;
 
 
@@ -2386,10 +2388,9 @@ same_addr_size_stores_p (tree base1, HOS
 
   /* Check that the object size is the same as the store size.  That ensures us
  that ptr points to the start of obj.  */
-  if (!tree_fits_shwi_p (DECL_SIZE (obj)))
-return false;
-  HOST_WIDE_INT obj_size = tree_to_shwi (DECL_SIZE (obj));
-  return obj_size == size1;
+  return (DECL_SIZE (obj)
+ && poly_int_tree_p (DECL_SIZE (obj))
+ && must_eq (wi::to_poly_offset (DECL_SIZE (obj)), size1));
 }
 
 /* If STMT kills the memory reference REF return true, otherwise

[014/nnn] poly_int: indirect_refs_may_alias_p

2017-10-23 Thread Richard Sandiford

This patch makes indirect_refs_may_alias_p use ranges_may_overlap_p
rather than ranges_overlap_p.  Unlike the former, the latter can handle
negative offsets, so the fix for PR44852 should no longer be necessary.
It can also handle offset_int, so avoids unchecked truncations to
HOST_WIDE_INT.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* tree-ssa-alias.c (indirect_ref_may_alias_decl_p)
(indirect_refs_may_alias_p): Use ranges_may_overlap_p
instead of ranges_overlap_p.

Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c2017-10-23 17:01:49.579064221 +0100
+++ gcc/tree-ssa-alias.c2017-10-23 17:01:51.044974644 +0100
@@ -1135,23 +1135,13 @@ indirect_ref_may_alias_decl_p (tree ref1
 {
   tree ptr1;
   tree ptrtype1, dbase2;
-  HOST_WIDE_INT offset1p = offset1, offset2p = offset2;
-  HOST_WIDE_INT doffset1, doffset2;
 
   gcc_checking_assert ((TREE_CODE (base1) == MEM_REF
|| TREE_CODE (base1) == TARGET_MEM_REF)
   && DECL_P (base2));
 
   ptr1 = TREE_OPERAND (base1, 0);
-
-  /* The offset embedded in MEM_REFs can be negative.  Bias them
- so that the resulting offset adjustment is positive.  */
-  offset_int moff = mem_ref_offset (base1);
-  moff <<= LOG2_BITS_PER_UNIT;
-  if (wi::neg_p (moff))
-offset2p += (-moff).to_short_addr ();
-  else
-offset1p += moff.to_short_addr ();
+  offset_int moff = mem_ref_offset (base1) << LOG2_BITS_PER_UNIT;
 
   /* If only one reference is based on a variable, they cannot alias if
  the pointer access is beyond the extent of the variable access.
@@ -1160,7 +1150,7 @@ indirect_ref_may_alias_decl_p (tree ref1
  ???  IVOPTs creates bases that do not honor this restriction,
  so do not apply this optimization for TARGET_MEM_REFs.  */
   if (TREE_CODE (base1) != TARGET_MEM_REF
-  && !ranges_overlap_p (MAX (0, offset1p), -1, offset2p, max_size2))
+  && !ranges_may_overlap_p (offset1 + moff, -1, offset2, max_size2))
 return false;
   /* They also cannot alias if the pointer may not point to the decl.  */
   if (!ptr_deref_may_alias_decl_p (ptr1, base2))
@@ -1213,18 +1203,11 @@ indirect_ref_may_alias_decl_p (tree ref1
   dbase2 = ref2;
   while (handled_component_p (dbase2))
 dbase2 = TREE_OPERAND (dbase2, 0);
-  doffset1 = offset1;
-  doffset2 = offset2;
+  HOST_WIDE_INT doffset1 = offset1;
+  offset_int doffset2 = offset2;
   if (TREE_CODE (dbase2) == MEM_REF
   || TREE_CODE (dbase2) == TARGET_MEM_REF)
-{
-  offset_int moff = mem_ref_offset (dbase2);
-  moff <<= LOG2_BITS_PER_UNIT;
-  if (wi::neg_p (moff))
-   doffset1 -= (-moff).to_short_addr ();
-  else
-   doffset2 -= moff.to_short_addr ();
-}
+doffset2 -= mem_ref_offset (dbase2) << LOG2_BITS_PER_UNIT;
 
   /* If either reference is view-converted, give up now.  */
   if (same_type_for_tbaa (TREE_TYPE (base1), TREE_TYPE (ptrtype1)) != 1
@@ -1241,7 +1224,7 @@ indirect_ref_may_alias_decl_p (tree ref1
   if ((TREE_CODE (base1) != TARGET_MEM_REF
|| (!TMR_INDEX (base1) && !TMR_INDEX2 (base1)))
   && same_type_for_tbaa (TREE_TYPE (base1), TREE_TYPE (dbase2)) == 1)
-return ranges_overlap_p (doffset1, max_size1, doffset2, max_size2);
+return ranges_may_overlap_p (doffset1, max_size1, doffset2, max_size2);
 
   if (ref1 && ref2
   && nonoverlapping_component_refs_p (ref1, ref2))
@@ -1313,22 +1296,10 @@ indirect_refs_may_alias_p (tree ref1 ATT
  && operand_equal_p (TMR_INDEX2 (base1),
  TMR_INDEX2 (base2), 0))
 {
-  offset_int moff;
-  /* The offset embedded in MEM_REFs can be negative.  Bias them
-so that the resulting offset adjustment is positive.  */
-  moff = mem_ref_offset (base1);
-  moff <<= LOG2_BITS_PER_UNIT;
-  if (wi::neg_p (moff))
-   offset2 += (-moff).to_short_addr ();
-  else
-   offset1 += moff.to_shwi ();
-  moff = mem_ref_offset (base2);
-  moff <<= LOG2_BITS_PER_UNIT;
-  if (wi::neg_p (moff))
-   offset1 += (-moff).to_short_addr ();
-  else
-   offset2 += moff.to_short_addr ();
-  return ranges_overlap_p (offset1, max_size1, offset2, max_size2);
+  offset_int moff1 = mem_ref_offset (base1) << LOG2_BITS_PER_UNIT;
+  offset_int moff2 = mem_ref_offset (base2) << LOG2_BITS_PER_UNIT;
+  return ranges_may_overlap_p (offset1 + moff1, max_size1,
+  offset2 + moff2, max_size2);
 }
   if (!ptr_derefs_may_alias_p (ptr1, ptr2))
 return false;

[015/nnn] poly_int: ao_ref and vn_reference_op_t

2017-10-23 Thread Richard Sandiford

This patch changes the offset, size and max_size fields
of ao_ref from HOST_WIDE_INT to poly_int64 and propagates
the change through the code that references it.  This includes
changing the off field of vn_reference_op_struct in the same way.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* inchash.h (inchash::hash::add_poly_int): New function.
* tree-ssa-alias.h (ao_ref::offset, ao_ref::size, ao_ref::max_size):
Use poly_int64 rather than HOST_WIDE_INT.
(ao_ref::max_size_known_p): New function.
* tree-ssa-sccvn.h (vn_reference_op_struct::off): Use poly_int64_pod
rather than HOST_WIDE_INT.
* tree-ssa-alias.c (ao_ref_base): Apply get_ref_base_and_extent
to temporaries until its interface is adjusted to match.
(ao_ref_init_from_ptr_and_size): Handle polynomial offsets and sizes.
(aliasing_component_refs_p, decl_refs_may_alias_p)
(indirect_ref_may_alias_decl_p, indirect_refs_may_alias_p): Take
the offsets and max_sizes as poly_int64s instead of HOST_WIDE_INTs.
(refs_may_alias_p_1, stmt_kills_ref_p): Adjust for changes to
ao_ref fields.
* alias.c (ao_ref_from_mem): Likewise.
* tree-ssa-dce.c (mark_aliased_reaching_defs_necessary_1): Likewise.
* tree-ssa-dse.c (valid_ao_ref_for_dse, normalize_ref)
(clear_bytes_written_by, setup_live_bytes_from_ref, compute_trims)
(maybe_trim_complex_store, maybe_trim_constructor_store)
(live_bytes_read, dse_classify_store): Likewise.
* tree-ssa-sccvn.c (vn_reference_compute_hash, vn_reference_eq):
(copy_reference_ops_from_ref, ao_ref_init_from_vn_reference)
(fully_constant_vn_reference_p, valueize_refs_1): Likewise.
(vn_reference_lookup_3): Likewise.
* tree-ssa-uninit.c (warn_uninitialized_vars): Likewise.

Index: gcc/inchash.h
===
--- gcc/inchash.h   2017-10-23 17:01:43.314993320 +0100
+++ gcc/inchash.h   2017-10-23 17:01:52.303181137 +0100
@@ -57,6 +57,14 @@ hashval_t iterative_hash_hashval_t (hash
 val = iterative_hash_hashval_t (v, val);
   }
 
+  /* Add polynomial value V, treating each element as an unsigned int.  */
+  template
+  void add_poly_int (const poly_int_pod &v)
+  {
+for (unsigned int i = 0; i < N; ++i)
+  add_int (v.coeffs[i]);
+  }
+
   /* Add HOST_WIDE_INT value V.  */
   void add_hwi (HOST_WIDE_INT v)
   {
Index: gcc/tree-ssa-alias.h
===
--- gcc/tree-ssa-alias.h2017-10-23 16:52:20.058356365 +0100
+++ gcc/tree-ssa-alias.h2017-10-23 17:01:52.304179714 +0100
@@ -80,11 +80,11 @@ struct ao_ref
  the following fields are not yet computed.  */
   tree base;
   /* The offset relative to the base.  */
-  HOST_WIDE_INT offset;
+  poly_int64 offset;
   /* The size of the access.  */
-  HOST_WIDE_INT size;
+  poly_int64 size;
   /* The maximum possible extent of the access or -1 if unconstrained.  */
-  HOST_WIDE_INT max_size;
+  poly_int64 max_size;
 
   /* The alias set of the access or -1 if not yet computed.  */
   alias_set_type ref_alias_set;
@@ -94,8 +94,18 @@ struct ao_ref
 
   /* Whether the memory is considered a volatile access.  */
   bool volatile_p;
+
+  bool max_size_known_p () const;
 };
 
+/* Return true if the maximum size is known, rather than the special -1
+   marker.  */
+
+inline bool
+ao_ref::max_size_known_p () const
+{
+  return known_size_p (max_size);
+}
 
 /* In tree-ssa-alias.c  */
 extern void ao_ref_init (ao_ref *, tree);
Index: gcc/tree-ssa-sccvn.h
===
--- gcc/tree-ssa-sccvn.h2017-10-23 16:52:20.058356365 +0100
+++ gcc/tree-ssa-sccvn.h2017-10-23 17:01:52.305178291 +0100
@@ -93,7 +93,7 @@ typedef struct vn_reference_op_struct
   /* For storing TYPE_ALIGN for array ref element size computation.  */
   unsigned align : 6;
   /* Constant offset this op adds or -1 if it is variable.  */
-  HOST_WIDE_INT off;
+  poly_int64_pod off;
   tree type;
   tree op0;
   tree op1;
Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c2017-10-23 17:01:51.044974644 +0100
+++ gcc/tree-ssa-alias.c2017-10-23 17:01:52.304179714 +0100
@@ -635,11 +635,15 @@ ao_ref_init (ao_ref *r, tree ref)
 ao_ref_base (ao_ref *ref)
 {
   bool reverse;
+  HOST_WIDE_INT offset, size, max_size;
 
   if (ref->base)
 return ref->base;
-  ref->base = get_ref_base_and_extent (ref->ref, &ref->offset, &ref->size,
-  &ref->max_size, &reverse);
+  ref->base = get_ref_base_and_extent (ref->ref, &offset, &size,
+  &max_size, &reverse);
+  ref->offset = offset;
+  ref->size = size;
+  ref->max_size = max_size;
   return ref->base;
 }
 
@@ -679,7

[016/nnn] poly_int: dse.c

2017-10-23 Thread Richard Sandiford

This patch makes RTL DSE use poly_int for offsets and sizes.
The local phase can optimise them normally but the global phase
treats them as wild accesses.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* dse.c (store_info): Change offset and width from HOST_WIDE_INT
to poly_int64.  Update commentary for positions_needed.large.
(read_info_type): Change offset and width from HOST_WIDE_INT
to poly_int64.
(set_usage_bits): Likewise.
(canon_address): Return the offset as a poly_int64 rather than
a HOST_WIDE_INT.  Use strip_offset_and_add.
(set_all_positions_unneeded, any_positions_needed_p): Use
positions_needed.large to track stores with non-constant widths.
(all_positions_needed_p): Likewise.  Take the offset and width
as poly_int64s rather than ints.  Assert that rhs is nonnull.
(record_store): Cope with non-constant offsets and widths.
Nullify the rhs of an earlier store if we can't tell which bytes
of it are needed.
(find_shift_sequence): Take the access_size and shift as poly_int64s
rather than ints.
(get_stored_val): Take the read_offset and read_width as poly_int64s
rather than HOST_WIDE_INTs.
(check_mem_read_rtx, scan_stores, scan_reads, dse_step5): Handle
non-constant offsets and widths.

Index: gcc/dse.c
===
--- gcc/dse.c   2017-10-23 16:52:20.003305798 +0100
+++ gcc/dse.c   2017-10-23 17:01:54.249406896 +0100
@@ -244,11 +244,11 @@ struct store_info
   rtx mem_addr;
 
   /* The offset of the first byte associated with the operation.  */
-  HOST_WIDE_INT offset;
+  poly_int64 offset;
 
   /* The number of bytes covered by the operation.  This is always exact
  and known (rather than -1).  */
-  HOST_WIDE_INT width;
+  poly_int64 width;
 
   union
 {
@@ -259,12 +259,19 @@ struct store_info
 
   struct
{
- /* A bitmap with one bit per byte.  Cleared bit means the position
-is needed.  Used if IS_LARGE is false.  */
+ /* A bitmap with one bit per byte, or null if the number of
+bytes isn't known at compile time.  A cleared bit means
+the position is needed.  Used if IS_LARGE is true.  */
  bitmap bmap;
 
- /* Number of set bits (i.e. unneeded bytes) in BITMAP.  If it is
-equal to WIDTH, the whole store is unused.  */
+ /* When BITMAP is nonnull, this counts the number of set bits
+(i.e. unneeded bytes) in the bitmap.  If it is equal to
+WIDTH, the whole store is unused.
+
+When BITMAP is null:
+- the store is definitely not needed when COUNT == 1
+- all the store is needed when COUNT == 0 and RHS is nonnull
+- otherwise we don't know which parts of the store are needed.  */
  int count;
} large;
 } positions_needed;
@@ -308,10 +315,10 @@ struct read_info_type
   int group_id;
 
   /* The offset of the first byte associated with the operation.  */
-  HOST_WIDE_INT offset;
+  poly_int64 offset;
 
   /* The number of bytes covered by the operation, or -1 if not known.  */
-  HOST_WIDE_INT width;
+  poly_int64 width;
 
   /* The mem being read.  */
   rtx mem;
@@ -940,13 +947,18 @@ can_escape (tree expr)
OFFSET and WIDTH.  */
 
 static void
-set_usage_bits (group_info *group, HOST_WIDE_INT offset, HOST_WIDE_INT width,
+set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
 tree expr)
 {
-  HOST_WIDE_INT i;
+  /* Non-constant offsets and widths act as global kills, so there's no point
+ trying to use them to derive global DSE candidates.  */
+  HOST_WIDE_INT i, const_offset, const_width;
   bool expr_escapes = can_escape (expr);
-  if (offset > -MAX_OFFSET && offset + width < MAX_OFFSET)
-for (i=offset; i -MAX_OFFSET
+  && const_offset + const_width < MAX_OFFSET)
+for (i = const_offset; i < const_offset + const_width; ++i)
   {
bitmap store1;
bitmap store2;
@@ -1080,7 +1092,7 @@ const_or_frame_p (rtx x)
 static bool
 canon_address (rtx mem,
   int *group_id,
-  HOST_WIDE_INT *offset,
+  poly_int64 *offset,
   cselib_val **base)
 {
   machine_mode address_mode = get_address_mode (mem);
@@ -1147,12 +1159,7 @@ canon_address (rtx mem,
   if (GET_CODE (address) == CONST)
address = XEXP (address, 0);
 
-  if (GET_CODE (address) == PLUS
- && CONST_INT_P (XEXP (address, 1)))
-   {
- *offset = INTVAL (XEXP (address, 1));
- address = XEXP (address, 0);
-   }
+  address = strip_offset_and_add (address, offset);
 
   if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
  && const_or_frame_p (address))
@@ -1160,8 +1167,11 @@ canon_address (rtx mem,
  group_info *group = ge

[017/nnn] poly_int: rtx_addr_can_trap_p_1

2017-10-23 Thread Richard Sandiford

This patch changes the offset and size arguments of
rtx_addr_can_trap_p_1 from HOST_WIDE_INT to poly_int64.  It also
uses a size of -1 rather than 0 to represent an unknown size and
BLKmode rather than VOIDmode to represent an unknown mode.


2017-10-23  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* rtlanal.c (rtx_addr_can_trap_p_1): Take the offset and size
as poly_int64s rather than HOST_WIDE_INTs.  Use a size of -1
rather than 0 to represent an unknown size.  Assert that the size
is known when the mode isn't BLKmode.
(may_trap_p_1): Use -1 for unknown sizes.
(rtx_addr_can_trap_p): Likewise.  Pass BLKmode rather than VOIDmode.

Index: gcc/rtlanal.c
===
--- gcc/rtlanal.c   2017-10-23 17:00:54.444001238 +0100
+++ gcc/rtlanal.c   2017-10-23 17:01:55.453690255 +0100
@@ -457,16 +457,17 @@ get_initial_register_offset (int from, i
references on strict alignment machines.  */
 
 static int
-rtx_addr_can_trap_p_1 (const_rtx x, HOST_WIDE_INT offset, HOST_WIDE_INT size,
+rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size,
   machine_mode mode, bool unaligned_mems)
 {
   enum rtx_code code = GET_CODE (x);
+  gcc_checking_assert (mode == BLKmode || known_size_p (size));
 
   /* The offset must be a multiple of the mode size if we are considering
  unaligned memory references on strict alignment machines.  */
-  if (STRICT_ALIGNMENT && unaligned_mems && GET_MODE_SIZE (mode) != 0)
+  if (STRICT_ALIGNMENT && unaligned_mems && mode != BLKmode)
 {
-  HOST_WIDE_INT actual_offset = offset;
+  poly_int64 actual_offset = offset;
 
 #ifdef SPARC_STACK_BOUNDARY_HACK
   /* ??? The SPARC port may claim a STACK_BOUNDARY higher than
@@ -477,7 +478,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
actual_offset -= STACK_POINTER_OFFSET;
 #endif
 
-  if (actual_offset % GET_MODE_SIZE (mode) != 0)
+  if (!multiple_p (actual_offset, GET_MODE_SIZE (mode)))
return 1;
 }
 
@@ -489,14 +490,14 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
   if (!CONSTANT_POOL_ADDRESS_P (x) && !SYMBOL_REF_FUNCTION_P (x))
{
  tree decl;
- HOST_WIDE_INT decl_size;
+ poly_int64 decl_size;
 
- if (offset < 0)
+ if (may_lt (offset, 0))
+   return 1;
+ if (known_zero (offset))
+   return 0;
+ if (!known_size_p (size))
return 1;
- if (size == 0)
-   size = GET_MODE_SIZE (mode);
- if (size == 0)
-   return offset != 0;
 
  /* If the size of the access or of the symbol is unknown,
 assume the worst.  */
@@ -507,9 +508,10 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
  if (!decl)
decl_size = -1;
  else if (DECL_P (decl) && DECL_SIZE_UNIT (decl))
-   decl_size = (tree_fits_shwi_p (DECL_SIZE_UNIT (decl))
-? tree_to_shwi (DECL_SIZE_UNIT (decl))
-: -1);
+   {
+ if (!poly_int_tree_p (DECL_SIZE_UNIT (decl), &decl_size))
+   decl_size = -1;
+   }
  else if (TREE_CODE (decl) == STRING_CST)
decl_size = TREE_STRING_LENGTH (decl);
  else if (TYPE_SIZE_UNIT (TREE_TYPE (decl)))
@@ -517,7 +519,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
  else
decl_size = -1;
 
- return (decl_size <= 0 ? offset != 0 : offset + size > decl_size);
+ return !known_subrange_p (offset, size, 0, decl_size);
 }
 
   return 0;
@@ -534,17 +536,14 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
 || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM]))
{
 #ifdef RED_ZONE_SIZE
- HOST_WIDE_INT red_zone_size = RED_ZONE_SIZE;
+ poly_int64 red_zone_size = RED_ZONE_SIZE;
 #else
- HOST_WIDE_INT red_zone_size = 0;
+ poly_int64 red_zone_size = 0;
 #endif
- HOST_WIDE_INT stack_boundary = PREFERRED_STACK_BOUNDARY
-/ BITS_PER_UNIT;
- HOST_WIDE_INT low_bound, high_bound;
-
- if (size == 0)
-   size = GET_MODE_SIZE (mode);
- if (size == 0)
+ poly_int64 stack_boundary = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+ poly_int64 low_bound, high_bound;
+
+ if (!known_size_p (size))
return 1;
 
  if (x == frame_pointer_rtx)
@@ -562,10 +561,10 @@ rtx_addr_can_trap_p_1 (const_rtx x, HOST
}
  else if (x == hard_frame_pointer_rtx)
{
- HOST_WIDE_INT sp_offset
+ poly_int64 sp_offset
= get_initial_register_offset (STACK_POINTER_REGNUM,
   HARD_FRAME_POINTER_REGNUM);
- HOST_WIDE_INT ap_offset
+ poly_int64 ap_offset
= get_initial_registe

1 2 3 >

1 - 100 of 225 matches

Mail list logo