Re: Backports to gcc 7.x

2017-12-07 Thread Segher Boessenkool
On Wed, Dec 06, 2017 at 04:32:38PM -0600, Kelvin Nilsen wrote:
> 
> I would like to backport the following patch to the GCC 7 branch.
> 
> PR80101: Fix ICE in store_data_bypass_p
>   https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00953.html
> 
> 
> This patch has been bootstrapped and regression tested on the
> GCC 7 branch.
> 
> Is this ok for backporting to GCC 7?

Yes please.  Thanks!


Segher


Re: [AArch64] Fix ICEs in aarch64_print_operand

2017-12-07 Thread James Greenhalgh
On Tue, Dec 05, 2017 at 05:57:37PM +, Richard Sandiford wrote:
> Three related regression fixes:
> 
> - We can't use asserts like:
> 
> gcc_assert (GET_MODE_SIZE (mode) == 16);
> 
>   in aarch64_print_operand because it could trigger for invalid user input.
> 
> - The output_operand_lossage in aarch64_print_address_internal:
> 
> output_operand_lossage ("invalid operand for '%%%c'", op);
> 
>   wasn't right because "op" is an rtx_code enum rather than the
>   prefix character.
> 
> - aarch64_print_operand_address shouldn't call output_operand_lossage
>   (because it doesn't have a prefix code) but instead fall back to
>   output_addr_const.
> 
> Tested on aarch64-linux-gnu.  OK to install?

OK.

Thanks,
James

> 
> Thanks,
> Richard
> 
> 
> 2017-12-05  Richard Sandiford  
> 
> gcc/
>   * config/aarch64/aarch64.c (aarch64_print_address_internal): Return
>   a bool success value.  Don't call output_operand_lossage here.
>   (aarch64_print_ldpstp_address): Return a bool success value.
>   (aarch64_print_operand_address): Call output_addr_const if
>   aarch64_print_address_internal fails.
>   (aarch64_print_operand): Don't assert that the mode is 16 bytes for
>   'y'; call output_operand_lossage instead.  Call output_operand_lossage
>   if aarch64_print_ldpstp_address fails.
> 
> gcc/testsuite/
>   * gcc.target/aarch64/asm-2.c: New test.
>   * gcc.target/aarch64/asm-3.c: Likewise.
> 
> Index: gcc/config/aarch64/aarch64.c
> ===
> --- gcc/config/aarch64/aarch64.c  2017-12-05 14:24:52.477015238 +
> +++ gcc/config/aarch64/aarch64.c  2017-12-05 17:54:56.466247227 +
> @@ -150,7 +150,7 @@ static bool aarch64_builtin_support_vect
>bool is_packed);
>  static machine_mode
>  aarch64_simd_container_mode (scalar_mode mode, unsigned width);
> -static void aarch64_print_ldpstp_address (FILE *f, machine_mode mode, rtx x);
> +static bool aarch64_print_ldpstp_address (FILE *, machine_mode, rtx);
>  
>  /* Major revision number of the ARM Architecture implemented by the target.  
> */
>  unsigned aarch64_architecture_version;
> @@ -5600,22 +5600,21 @@ #define buf_size 20
>{
>   machine_mode mode = GET_MODE (x);
>  
> - if (GET_CODE (x) != MEM)
> + if (GET_CODE (x) != MEM
> + || (code == 'y' && GET_MODE_SIZE (mode) != 16))
> {
>   output_operand_lossage ("invalid operand for '%%%c'", code);
>   return;
> }
>  
>   if (code == 'y')
> -   {
> - /* LDP/STP which uses a single double-width memory operand.
> -Adjust the mode to appear like a typical LDP/STP.
> -Currently this is supported for 16-byte accesses only.  */
> - gcc_assert (GET_MODE_SIZE (mode) == 16);
> - mode = DFmode;
> -   }
> +   /* LDP/STP which uses a single double-width memory operand.
> +  Adjust the mode to appear like a typical LDP/STP.
> +  Currently this is supported for 16-byte accesses only.  */
> +   mode = DFmode;
>  
> - aarch64_print_ldpstp_address (f, mode, XEXP (x, 0));
> + if (!aarch64_print_ldpstp_address (f, mode, XEXP (x, 0)))
> +   output_operand_lossage ("invalid operand prefix '%%%c'", code);
>}
>break;
>  
> @@ -5628,7 +5627,7 @@ #define buf_size 20
>  /* Print address 'x' of a memory access with mode 'mode'.
> 'op' is the context required by aarch64_classify_address.  It can either 
> be
> MEM for a normal memory access or PARALLEL for LDP/STP.  */
> -static void
> +static bool
>  aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, RTX_CODE 
> op)
>  {
>struct aarch64_address_info addr;
> @@ -5645,7 +5644,7 @@ aarch64_print_address_internal (FILE *f,
>   else
> asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
>  INTVAL (addr.offset));
> - return;
> + return true;
>  
>case ADDRESS_REG_REG:
>   if (addr.shift == 0)
> @@ -5654,7 +5653,7 @@ aarch64_print_address_internal (FILE *f,
>   else
> asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
>  reg_names [REGNO (addr.offset)], addr.shift);
> - return;
> + return true;
>  
>case ADDRESS_REG_UXTW:
>   if (addr.shift == 0)
> @@ -5663,7 +5662,7 @@ aarch64_print_address_internal (FILE *f,
>   else
> asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
>  REGNO (addr.offset) - R0_REGNUM, addr.shift);
> - return;
> + return true;
>  
>case ADDRESS_REG_SXTW:
>   if (addr.shift == 0)
> @@ -5672,7 +5671,7 @@ aarch64_print_address_internal (FILE *f,
>   else
> asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
>  REGNO (addr.offset) - R0_REGNUM, addr.shift);
> - return;
> + r

Re: [Patch][aarch64] Use IFUNCs to enable LSE instructions in libatomic on aarch64

2017-12-07 Thread James Greenhalgh
On Fri, Sep 29, 2017 at 09:29:37PM +0100, Steve Ellcey wrote:
> On Thu, 2017-09-28 at 12:31 +0100, Szabolcs Nagy wrote:
> > 
> > i think this should be improved, see below.
> 
> diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
> index d731406..92d19c6 100644
> --- a/libatomic/Makefile.am
> +++ b/libatomic/Makefile.am
> @@ -122,6 +122,10 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix 
> _$(s)_.lo,$(SIZEOBJS)))
>  
>  ## On a target-specific basis, include alternates to be selected by IFUNC.
>  if HAVE_IFUNC
> +if ARCH_AARCH64_LINUX
> +IFUNC_OPTIONS = -march=armv8.1-a
> +libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix 
> _$(s)_1_.lo,$(SIZEOBJS)))
> +endif
>  if ARCH_ARM_LINUX
>  IFUNC_OPTIONS = -march=armv7-a -DHAVE_KERNEL64
>  libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix 
> _$(s)_1_.lo,$(SIZEOBJS)))

One obvious thing I missed in the review is that this change will break
bootstrap on systems with older assemblers. Practically, that's those of
us who are holding out on Ubuntu 14.04. -march=armv8-a+lse would go back
a little further, so would be preferable, but even this won't get bootstrap
back on older systems.

Is there anything you can do to check for assembler support before turning
on IFUNCS for libatomic, or am I best to just start configuring with
--disable-gnu-indirect-function ?

Thanks,
James



[PATCH GCC]Introduce loop interchange pass and enable it at -O3

2017-12-07 Thread Bin Cheng
Hi,
This is the overall loop interchange patch on gimple-linterchange branch.  Note 
the new pass
is enabled at -O3 level by default.  Bootstrap and regtest on x86_64 and 
AArch64(ongoing).
NOte after cost model change it is now far more conservative than original 
version.  It only
interchanges 11 loops in spec2k6 (416 doesn't build at the moment), vs ~250 for 
the original
version.  I will collect compilation time data, though there shouldn't be any 
surprise given
few loops are actually interchanged.  I will also collect spec2k6 data, 
shouldn't affect cases
other than bwaves either.
So is it OK?

Thanks,
bin
2017-12-07  Bin Cheng  
Richard Biener  

PR tree-optimization/81303
* Makefile.in (gimple-loop-interchange.o): New object file.
* common.opt (floop-interchange): Reuse the option from graphite.
* doc/invoke.texi (-floop-interchange): Ditto.  New document for
-floop-interchange and mention it for -O3.
* opts.c (default_options_table): Enable -floop-interchange at -O3.
* gimple-loop-interchange.cc: New file.
* params.def (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS): New parameter.
(PARAM_LOOP_INTERCHANGE_STRIDE_RATIO): New parameter.
* passes.def (pass_linterchange): New pass.
* timevar.def (TV_LINTERCHANGE): New time var.
* tree-pass.h (make_pass_linterchange): New declaration.
* tree-ssa-loop-ivcanon.c (create_canonical_iv): Change to external
interchange.  Record IV before/after increment in new parameters.
* tree-ssa-loop-ivopts.h (create_canonical_iv): New declaration.
* tree-vect-loop.c (vect_is_simple_reduction): Factor out reduction
path check into...
(check_reduction_path): ...New function here.
* tree-vectorizer.h (check_reduction_path): New declaration.

gcc/testsuite
2017-12-07  Bin Cheng  
Richard Biener  

PR tree-optimization/81303
* gcc.dg/tree-ssa/loop-interchange-1.c: New test.
* gcc.dg/tree-ssa/loop-interchange-1b.c: New test.
* gcc.dg/tree-ssa/loop-interchange-2.c: New test.
* gcc.dg/tree-ssa/loop-interchange-3.c: New test.
* gcc.dg/tree-ssa/loop-interchange-4.c: New test.
* gcc.dg/tree-ssa/loop-interchange-5.c: New test.
* gcc.dg/tree-ssa/loop-interchange-6.c: New test.
* gcc.dg/tree-ssa/loop-interchange-7.c: New test.
* gcc.dg/tree-ssa/loop-interchange-8.c: New test.
* gcc.dg/tree-ssa/loop-interchange-9.c: New test.
* gcc.dg/tree-ssa/loop-interchange-10.c: New test.
* gcc.dg/tree-ssa/loop-interchange-11.c: New test.
* gcc.dg/tree-ssa/loop-interchange-12.c: New test.
* gcc.dg/tree-ssa/loop-interchange-13.c: New test.diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index db43fc1..3297437 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1302,6 +1302,7 @@ OBJS = \
gimple-iterator.o \
gimple-fold.o \
gimple-laddress.o \
+   gimple-loop-interchange.o \
gimple-low.o \
gimple-pretty-print.o \
gimple-ssa-backprop.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index ffcbf85..6b9e4ea 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1504,8 +1504,8 @@ Common Alias(floop-nest-optimize)
 Enable loop nest transforms.  Same as -floop-nest-optimize.
 
 floop-interchange
-Common Alias(floop-nest-optimize)
-Enable loop nest transforms.  Same as -floop-nest-optimize.
+Common Report Var(flag_loop_interchange) Optimization
+Enable loop interchange on trees.
 
 floop-block
 Common Alias(floop-nest-optimize)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b8c8083..cebc465 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7401,6 +7401,7 @@ by @option{-O2} and also turns on the following 
optimization flags:
 -ftree-loop-vectorize @gol
 -ftree-loop-distribution @gol
 -ftree-loop-distribute-patterns @gol
+-floop-interchange @gol
 -fsplit-paths @gol
 -ftree-slp-vectorize @gol
 -fvect-cost-model @gol
@@ -8500,12 +8501,10 @@ Perform loop optimizations on trees.  This flag is 
enabled by default
 at @option{-O} and higher.
 
 @item -ftree-loop-linear
-@itemx -floop-interchange
 @itemx -floop-strip-mine
 @itemx -floop-block
 @itemx -floop-unroll-and-jam
 @opindex ftree-loop-linear
-@opindex floop-interchange
 @opindex floop-strip-mine
 @opindex floop-block
 @opindex floop-unroll-and-jam
@@ -8600,6 +8599,25 @@ ENDDO
 @end smallexample
 and the initialization loop is transformed into a call to memset zero.
 
+@item -floop-interchange
+@opindex floop-interchange
+Perform loop interchange outside of graphite.  This flag can improve cache
+performance on loop nest and allow further loop optimizations, like
+vectorization, to take place.  For example, the loop
+@smallexample
+for (int i = 0; i < N; i++)
+  for (int j = 0; j < N; j++)
+for (int k = 0; k < N; k++)
+  c[i][j] = c[i][j] + a[i][k]*b[k][j];
+@end smallexample
+is transformed t

Re: [PR 83141] Prevent SRA from removing type changing assignment

2017-12-07 Thread Richard Biener
On Thu, 7 Dec 2017, Martin Jambor wrote:

> Hi,
> 
> On Wed, Dec 06 2017, Richard Biener wrote:
> > On Wed, 6 Dec 2017, Martin Jambor wrote:
> 
> >> ...
> 
> >> Second is the testcase I described in my previous email.  When I saw
> >> 
> >>   FAIL: gcc.dg/guality/pr54970.c   -O1  line 31 a[0] == 4
> >> 
> >> At all optimization levels, I grumbled about Jakub being right again and
> >> duly decided to bite the bullet and do what he asked me to because it
> >> fixes the issue.  But if you allow me to XFAIL the guality test, I will
> >> happily remove the whole padding detection, I don't really like it
> >> either.
> >> 
> >> The debug information is apparently lost because a[0] is never used from
> >> that point on, as opposed to a[1] and a[2] for which the guality test
> >> still passes.
> >
> > XFAILing that is fine I think.
> >
> 
> Great, the updated and re-tested patch is below.  The only problem is
> that I did not figure out how to XFAIL a dg-final test only for
> optimized runs and so it now XPASSes at -O0.  Alternatively I can make
> a[0] not dead in the test, but that would hide the new regression which
> seems worse.

Works for me.  One could duplicate the test and dg-skip-if one for -O0
and one for anything besides -O0 ...

> >> ...
> 
> >> But let me emphasize again that whenever correctness is the issue, the
> >> question whether an SRA recorded access comes from total scalarization
> >> or not is not important.  Users accessing the data in some other part of
> >> the function can create them too.  Users equipped with placement new can
> >> create all sorts of weird type accesses and because tree-sra is flow
> >> insensitive, they can then force scalarization to such replacements even
> >> at places where the data have wildly different types.
> >
> > Yes, but SRA will never create loads or stores for the non-total
> > scalarization case it will only choose one (better non-FP if not
> > all accesses are FP - I think compare_access_positions guarantees that) 
> > scalar register for each replacement, right?
> 
> Yes.  My point was just that with placement new, the same aggregate decl
> can contain wildly different data in two different places of a function,
> and SRA might pick some from the first place and use it in the other.
> Thus, any testcase that miscompiles with total scalarization can be
> extended to one that miscompiles without it.

I doubt that - do you have something specific in mind?
I think placement-new also emits a CLOBBER, how does
SRA treat that at the moment?

> >
> > Basically it will replace _all_ accesses of a memory piece with
> > a register instead, making that memory piece unused?
> 
> Yes.  By the way, given that we are about to consider assignments with
> type-changing MEM_REFs fragile and will not delete them, the aggregate
> will not go away and that is why I added back the bit setting to
> cannot_scalarize_away bitmap.  After all, that is exactly what the
> bitmap is for, don't bother totally scalarizing, the aggregate will not
> disappear.

Good.

> Below is the updated and quite a bit simpler patch, which has passed
> bootstrap and testing on x86_64-linux (but suffers from the XFAILs and
> XPASSes dewscribed above).

Ok.

Thanks,
Richard.

> Martin
> 
> 
> 2017-12-06  Martin Jambor  
> 
>   PR tree-optimization/83141
>   * tree-sra.c (contains_vce_or_bfcref_p): Move up in the file, also
>   test for MEM_REFs implicitely changing types with padding.  Remove
>   inline keyword.
>   (build_accesses_from_assign): Added contains_vce_or_bfcref_p checks.
> 
> testsuite/
>   * gcc.dg/tree-ssa/pr83141.c: New test.
>   * gcc.dg/guality/pr54970.c: XFAIL tests querying a[0].
> ---
>  gcc/testsuite/gcc.dg/guality/pr54970.c  | 10 +++---
>  gcc/testsuite/gcc.dg/tree-ssa/pr83141.c | 37 ++
>  gcc/tree-sra.c  | 54 
> +
>  3 files changed, 77 insertions(+), 24 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83141.c
> 
> diff --git a/gcc/testsuite/gcc.dg/guality/pr54970.c 
> b/gcc/testsuite/gcc.dg/guality/pr54970.c
> index a9b8c064d8b..1819d023e21 100644
> --- a/gcc/testsuite/gcc.dg/guality/pr54970.c
> +++ b/gcc/testsuite/gcc.dg/guality/pr54970.c
> @@ -24,23 +24,23 @@ main ()
>   /* { dg-final { gdb-test 25 "*p" "13" } } */
>asm volatile (NOP);/* { dg-final { gdb-test 25 "*q" "12" } 
> } */
>__builtin_memcpy (&a, (int [3]) { 4, 5, 6 }, sizeof (a));
> - /* { dg-final { gdb-test 31 "a\[0\]" "4" } } */
> + /* { dg-final { gdb-test 31 "a\[0\]" "4" { 
> xfail { *-*-* } } } } */
>   /* { dg-final { gdb-test 31 "a\[1\]" "5" } } */
>   /* { dg-final { gdb-test 31 "a\[2\]" "6" } } */
>   /* { dg-final { gdb-test 31 "*p" "6" } } */
>asm volatile (NOP);/* { dg-final

Re: RFC: Variable-length VECTOR_CSTs

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 4:11 PM, Richard Sandiford
 wrote:
> Richard Biener  writes:
>> On Thu, Nov 30, 2017 at 2:18 PM, Richard Sandiford
>>  wrote:
>>> Richard Sandiford  writes:
 Richard Biener  writes:
> On Wed, Nov 29, 2017 at 12:57 PM, Richard Sandiford
>  wrote:
>> It was clear from the SVE reviews that people were unhappy with how
>> "special" the variable-length case was.  One particular concern was
>> the use of VEC_DUPLICATE_CST and VEC_SERIES_CST, and the way that
>> that would in turn lead to different representations of VEC_PERM_EXPRs
>> with constant permute vectors, and difficulties in invoking
>> vec_perm_const_ok.
>>
>> This is an RFC for a VECTOR_CST representation that treats each
>> specific constant as one instance of an arbitrary-length sequence.
>> The reprensentation then extends to variable-length vectors in a
>> natural way.
>>
>> As discussed on IRC, if a vector contains X*N elements for some
>> constant N and integer X>0, the main features we need are:
>>
>> 1) the ability to represent a sequence that duplicates N values
>>
>>This is useful for SLP invariants.
>>
>> 2) the ability to represent a sequence that starts with N values and
>>is followed by zeros
>>
>>This is useful for the initial value in a double or SLP reduction
>>
>> 3) the ability to represent N interleaved series
>>
>>This is useful for SLP inductions and for VEC_PERM_EXPRs.
>>
>> For (2), zero isn't necessarily special, since vectors used in an AND
>> reduction might need to fill with ones.  Also, we might need up to N
>> different fill values with mixed SLP operations; it isn't necessarily
>> safe to assume that a single fill value will always be enough.
>>
>> The same goes for (3): there's no reason in principle why the
>> steps in an SLP induction should all be the same (although they
>> do need to be at the moment).  E.g. once we support SLP on:
>>
>>   for (unsigned int i = 0; i < n; i += 2)
>> {
>>   x[i] += 4 + i;
>>   x[i + 1] += 11 + i * 3;
>> }
>>
>> we'll need {[4, 14], +, [2, 6]}.
>>
>> So the idea is to represent vectors as P interleaved patterns of the 
>> form:
>>
>>   [BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, ...]
>>
>> where the STEP is always zero (actually null) for non-integer vectors.
>> This is effectively projecting a "foreground" value of P elements
>> onto an arbitrary-length "background" sequenece, where the background
>> sequence contains P parallel linear series.
>>
>> E.g. to pick an extreme and unlikely example,
>>
>>   [42, 99, 2, 20, 3, 30, 4, 40, ...]
>>
>> has 2 patterns:
>>
>>   BASE0 = 42, BASE1 = 2, STEP = 1
>>   BASE0 = 99, BASE1 = 20, STEP = 10
>>
>> The more useful cases are degenerate versions of this general case.
>>
>> As far as memory consumption goes: the number of patterns needed for a
>> fixed-length vector with 2*N elements is always at most N; in the worst
>> case, we simply interleave the first N elements with the second N 
>> elements.
>> The worst-case increase in footprint is therefore N trees for the steps.
>> In practice the footprint is usually smaller than it was before, since
>> most constants do have a pattern.
>>
>> The patch below implements this for trees.  I have patches to use the
>> same style of encoding for CONST_VECTOR and vec_perm_indices, but the
>> tree one is probably easiest to read.
>>
>> The patch only adds the representation.  Follow-on patches make more
>> use of it (and usually make things simpler; e.g. integer_zerop is no
>> longer a looping operation).
>>
>> Does this look better?
>
> Yes, the overall design looks good.  I wonder why you chose to have
> the number of patterns being a power of two?  I suppose this is
> to have the same number of elements from all patterns in the final
> vector (which is power-of-two sized)?

 Right.  The rtl and vec_perm_indices parts don't have this restriction,
 since some ports do define non-power-of-2 vectors for internal use.
 The problem is that, since VECTOR_CSTs are used by the FE, we need
 to support all valid vector lengths without blowing the 16-bit field.
 Using the same style of representation as TYPE_VECTOR_SUBPARTS seemed
 like the safest way of doing that.

> I wonder if there exists a vector where say a three-pattern
> interleaving would be smaller than a four-pattern one?

 Only in the non-power-of-2 case.

> Given you add flags for various purposes would it make sense to
> overload 'step' with a regular element to avoid the storage increase
> in case step is unnecessary?  This makes it have three elements
> which is of cour

Re: Use tree_vector_builder instead of build_vector

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 4:22 PM, Richard Sandiford
 wrote:
> This patch switches most build_vector calls over to tree_vector_builder,
> using explicit encodings where appropriate.  Later patches handle
> the remaining uses of build_vector.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also spot-checked on sparc64-linux-gnu.  OK to install?

Ok.

Richard.

> Thanks,
> Richard

>
> 2017-12-06  Richard Sandiford  
>
> gcc/
> * config/sparc/sparc.c: Include tree-vector-builder.h.
> (sparc_fold_builtin): Use tree_vector_builder instead of build_vector.
> * expmed.c: Include tree-vector-builder.h.
> (make_tree): Use tree_vector_builder instead of build_vector.
> * fold-const.c: Include tree-vector-builder.h.
> (const_binop): Use tree_vector_builder instead of build_vector.
> (const_unop): Likewise.
> (native_interpret_vector): Likewise.
> (fold_vec_perm): Likewise.
> (fold_ternary_loc): Likewise.
> * gimple-fold.c: Include tree-vector-builder.h.
> (gimple_fold_stmt_to_constant_1): Use tree_vector_builder instead
> of build_vector.
> * tree-ssa-forwprop.c: Include tree-vector-builder.h.
> (simplify_vector_constructor): Use tree_vector_builder instead
> of build_vector.
> * tree-vect-generic.c: Include tree-vector-builder.h.
> (add_rshift): Use tree_vector_builder instead of build_vector.
> (expand_vector_divmod): Likewise.
> (optimize_vector_constructor): Likewise.
> * tree-vect-loop.c: Include tree-vector-builder.h.
> (vect_create_epilog_for_reduction): Use tree_vector_builder instead
> of build_vector.  Explicitly use a stepped encoding for
> { 1, 2, 3, ... }.
> * tree-vect-slp.c: Include tree-vector-builder.h.
> (vect_get_constant_vectors): Use tree_vector_builder instead
> of build_vector.
> (vect_transform_slp_perm_load): Likewise.
> (vect_schedule_slp_instance): Likewise.
> * tree-vect-stmts.c: Include tree-vector-builder.h.
> (vectorizable_bswap): Use tree_vector_builder instead of build_vector.
> (vect_gen_perm_mask_any): Likewise.
> (vectorizable_call): Likewise.  Explicitly use a stepped encoding.
> * tree.c: (build_vector_from_ctor): Use tree_vector_builder instead
> of build_vector.
> (build_vector_from_val): Likewise.  Explicitly use a duplicate
> encoding.
>
> Index: gcc/config/sparc/sparc.c
> ===
> --- gcc/config/sparc/sparc.c2017-12-05 14:24:52.587013199 +
> +++ gcc/config/sparc/sparc.c2017-12-06 14:48:52.885162299 +
> @@ -57,6 +57,7 @@ the Free Software Foundation; either ver
>  #include "tree-pass.h"
>  #include "context.h"
>  #include "builtins.h"
> +#include "tree-vector-builder.h"
>
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -11752,14 +11753,14 @@ sparc_fold_builtin (tree fndecl, int n_a
>   tree inner_type = TREE_TYPE (rtype);
>   unsigned i;
>
> - auto_vec n_elts (VECTOR_CST_NELTS (arg0));
> + tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
>   for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
> {
>   unsigned HOST_WIDE_INT val
> = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
>   n_elts.quick_push (build_int_cst (inner_type, val << 4));
> }
> - return build_vector (rtype, n_elts);
> + return n_elts.build ();
> }
>break;
>
> @@ -11774,9 +11775,9 @@ sparc_fold_builtin (tree fndecl, int n_a
>if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
> {
>   tree inner_type = TREE_TYPE (rtype);
> - auto_vec n_elts (VECTOR_CST_NELTS (arg0));
> + tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
>   sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
> - return build_vector (rtype, n_elts);
> + return n_elts.build ();
> }
>break;
>
> @@ -11788,7 +11789,7 @@ sparc_fold_builtin (tree fndecl, int n_a
>
>if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
> {
> - auto_vec n_elts (2 * VECTOR_CST_NELTS (arg0));
> + tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
>   unsigned i;
>   for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
> {
> @@ -11796,7 +11797,7 @@ sparc_fold_builtin (tree fndecl, int n_a
>   n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
> }
>
> - return build_vector (rtype, n_elts);
> + return n_elts.build ();
> }
>break;
>
> Index: gcc/expmed.c
> ===
> --- gcc/expmed.c2017-11

Re: Use tree_vector_builder::new_unary_operation for folding

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 4:23 PM, Richard Sandiford
 wrote:
> This patch makes fold-const.c operate directly on the VECTOR_CST
> encoding when folding an operation that has a single VECTOR_CST input.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also spot-checked on sparc64-linux-gnu.  OK to install?

Ok.

Richard.

> Thanks,
> Richard
>
> 2017-12-06  Richard Sandiford  
>
> gcc/
> * fold-const.c (fold_negate_expr_1): Use tree_vector_builder and
> new_unary_operation, operating only on the encoded elements.
> (const_unop): Likewise.
> (exact_inverse): Likewise.
> (distributes_over_addition_p): New function.
> (const_binop): Use tree_vector_builder and new_unary_operation
> for combinations of VECTOR_CST and INTEGER_CST.  Operate only
> on the encoded elements unless the encoding is strided and the
> operation does not distribute over addition.
> (fold_convert_const):  Use tree_vector_builder and
> new_unary_operation.  Operate only on the encoded elements
> for truncating integer conversions, or for non-stepped encodings.
>
> Index: gcc/fold-const.c
> ===
> --- gcc/fold-const.c2017-12-06 14:48:52.887162217 +
> +++ gcc/fold-const.c2017-12-06 14:48:56.997993407 +
> @@ -566,10 +566,10 @@ fold_negate_expr_1 (location_t loc, tree
>
>  case VECTOR_CST:
>{
> -   int count = VECTOR_CST_NELTS (t), i;
> -
> -   auto_vec elts (count);
> -   for (i = 0; i < count; i++)
> +   tree_vector_builder elts;
> +   elts.new_unary_operation (type, t, true);
> +   unsigned int count = elts.encoded_nelts ();
> +   for (unsigned int i = 0; i < count; ++i)
>   {
> tree elt = fold_negate_expr (loc, VECTOR_CST_ELT (t, i));
> if (elt == NULL_TREE)
> @@ -577,7 +577,7 @@ fold_negate_expr_1 (location_t loc, tree
> elts.quick_push (elt);
>   }
>
> -   return build_vector (type, elts);
> +   return elts.build ();
>}
>
>  case COMPLEX_EXPR:
> @@ -1121,6 +1121,27 @@ int_const_binop (enum tree_code code, co
>return int_const_binop_1 (code, arg1, arg2, 1);
>  }
>
> +/* Return true if binary operation OP distributes over addition in operand
> +   OPNO, with the other operand being held constant.  OPNO counts from 1.  */
> +
> +static bool
> +distributes_over_addition_p (tree_code op, int opno)
> +{
> +  switch (op)
> +{
> +case PLUS_EXPR:
> +case MINUS_EXPR:
> +case MULT_EXPR:
> +  return true;
> +
> +case LSHIFT_EXPR:
> +  return opno == 1;
> +
> +default:
> +  return false;
> +}
> +}
> +
>  /* Combine two constants ARG1 and ARG2 under operation CODE to produce a new
> constant.  We assume ARG1 and ARG2 have the same data type, or at least
> are the same kind of constant and the same machine mode.  Return zero if
> @@ -1442,10 +1463,12 @@ const_binop (enum tree_code code, tree a
>&& TREE_CODE (arg2) == INTEGER_CST)
>  {
>tree type = TREE_TYPE (arg1);
> -  int count = VECTOR_CST_NELTS (arg1), i;
> -
> -  auto_vec elts (count);
> -  for (i = 0; i < count; i++)
> +  bool step_ok_p = distributes_over_addition_p (code, 1);
> +  tree_vector_builder elts;
> +  if (!elts.new_unary_operation (type, arg1, step_ok_p))
> +   return NULL_TREE;
> +  unsigned int count = elts.encoded_nelts ();
> +  for (unsigned int i = 0; i < count; ++i)
> {
>   tree elem1 = VECTOR_CST_ELT (arg1, i);
>
> @@ -1458,7 +1481,7 @@ const_binop (enum tree_code code, tree a
>   elts.quick_push (elt);
> }
>
> -  return build_vector (type, elts);
> +  return elts.build ();
>  }
>return NULL_TREE;
>  }
> @@ -1649,10 +1672,12 @@ const_unop (enum tree_code code, tree ty
>else if (TREE_CODE (arg0) == VECTOR_CST)
> {
>   tree elem;
> - unsigned count = VECTOR_CST_NELTS (arg0), i;
>
> - auto_vec elements (count);
> - for (i = 0; i < count; i++)
> + /* This can cope with stepped encodings because ~x == -1 - x.  */
> + tree_vector_builder elements;
> + elements.new_unary_operation (type, arg0, true);
> + unsigned int i, count = elements.encoded_nelts ();
> + for (i = 0; i < count; ++i)
> {
>   elem = VECTOR_CST_ELT (arg0, i);
>   elem = const_unop (BIT_NOT_EXPR, TREE_TYPE (type), elem);
> @@ -1661,7 +1686,7 @@ const_unop (enum tree_code code, tree ty
>   elements.quick_push (elem);
> }
>   if (i == count)
> -   return build_vector (type, elements);
> +   return elements.build ();
> }
>break;
>
> @@ -2135,10 +2160,19 @@ fold_convert_const (enum tree_code code,
>if (TREE_CODE (arg1) == VECTOR_CST
>   && TYPE_VECTOR_SUBPARTS (ty

Re: Use tree_vector_builder::new_binary_operation for folding

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 4:24 PM, Richard Sandiford
 wrote:
> This patch makes fold-const.c operate directly on the VECTOR_CST
> encoding when folding an operation that has two VECTOR_CST inputs.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also spot-checked on sparc64-linux-gnu.  OK to install?

Ok.

Richard.

> Thanks,
> Richard
>
>
> 2017-12-06  Richard Sandiford  
>
> gcc/
> * tree-vector-builder.h
> (tree_vector_builder::new_binary_operation): Declare.
> * tree-vector-builder.c
> (tree_vector_builder::new_binary_operation): New function.
> * fold-const.c (fold_relational_const): Use it.
> (const_binop): Likewise.  Check that both input vectors have
> the same number of elements, thus excluding things like WIDEN_SUM.
> Check whether it is possible to operate directly on the encodings
> of stepped inputs.
>
> Index: gcc/tree-vector-builder.h
> ===
> --- gcc/tree-vector-builder.h   2017-12-06 14:46:14.131599903 +
> +++ gcc/tree-vector-builder.h   2017-12-06 14:49:00.386854068 +
> @@ -38,6 +38,7 @@ #define GCC_TREE_VECTOR_BUILDER_H
>
>void new_vector (tree, unsigned int, unsigned int);
>bool new_unary_operation (tree, tree, bool);
> +  bool new_binary_operation (tree, tree, tree, bool);
>
>  private:
>bool equal_p (const_tree, const_tree) const;
> Index: gcc/tree-vector-builder.c
> ===
> --- gcc/tree-vector-builder.c   2017-12-06 14:46:14.131599903 +
> +++ gcc/tree-vector-builder.c   2017-12-06 14:49:00.386854068 +
> @@ -49,6 +49,53 @@ tree_vector_builder::new_unary_operation
>return true;
>  }
>
> +/* Try to start building a new vector of type TYPE that holds the result of
> +   a binary operation on VECTOR_CSTs T1 and T2.  ALLOW_STEPPED_P is true if
> +   the operation can handle stepped encodings directly, without having to
> +   expand the full sequence.
> +
> +   Return true if the operation is possible.  Leave the builder unchanged
> +   otherwise.  */
> +
> +bool
> +tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,
> +  bool allow_stepped_p)
> +{
> +  unsigned int full_nelts = TYPE_VECTOR_SUBPARTS (type);
> +  gcc_assert (full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1))
> + && full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)));
> +  /* Conceptually we split the patterns in T1 and T2 until we have
> + an equal number for both.  Each split pattern requires the same
> + number of elements per pattern as the original.  E.g. splitting:
> +
> +   { 1, 2, 3, ... }
> +
> + into two gives:
> +
> +   { 1, 3, 5, ... }
> +   { 2, 4, 6, ... }
> +
> + while splitting:
> +
> +   { 1, 0, ... }
> +
> + into two gives:
> +
> +   { 1, 0, ... }
> +   { 0, 0, ... }.  */
> +  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
> + VECTOR_CST_NPATTERNS (t2));
> +  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
> +   VECTOR_CST_NELTS_PER_PATTERN (t2));
> +  if (!allow_stepped_p && nelts_per_pattern > 2)
> +{
> +  npatterns = full_nelts;
> +  nelts_per_pattern = 1;
> +}
> +  new_vector (type, npatterns, nelts_per_pattern);
> +  return true;
> +}
> +
>  /* Return a VECTOR_CST for the current constant.  */
>
>  tree
> Index: gcc/fold-const.c
> ===
> --- gcc/fold-const.c2017-12-06 14:48:56.997993407 +
> +++ gcc/fold-const.c2017-12-06 14:49:00.386854068 +
> @@ -1435,13 +1435,40 @@ const_binop (enum tree_code code, tree a
>  }
>
>if (TREE_CODE (arg1) == VECTOR_CST
> -  && TREE_CODE (arg2) == VECTOR_CST)
> +  && TREE_CODE (arg2) == VECTOR_CST
> +  && (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1))
> + == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2
>  {
>tree type = TREE_TYPE (arg1);
> -  int count = VECTOR_CST_NELTS (arg1), i;
> +  bool step_ok_p;
> +  if (VECTOR_CST_STEPPED_P (arg1)
> + && VECTOR_CST_STEPPED_P (arg2))
> +   /* We can operate directly on the encoding if:
> +
> + a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
> +   implies
> + (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
> +
> +  Addition and subtraction are the supported operators
> +  for which this is true.  */
> +   step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR);
> +  else if (VECTOR_CST_STEPPED_P (arg1))
> +   /* We can operate directly on stepped encodings if:
> +
> +a3 - a2 == a2 - a1
> +  implies:
> +(a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
>
> -  auto_vec elts (count);
> -  for (i = 0;

Re: Make build_vector static

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 4:26 PM, Richard Sandiford
 wrote:
> After the previous patches, the only remaining uses of build_vector are
> in the selftests in tree.c.  This patch makes it static and moves it to
> the selftest part of the file.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also spot-checked on sparc64-linux-gnu.  OK to install?

Ok.

Richard.

> Thanks,
> Richard
>
>
> 2017-12-06  Richard Sandiford  
>
> gcc/
> * tree.c (build_vector): Delete.
> * tree.h (build_vector): Make static and move into the self-testing
> block.
>
> Index: gcc/tree.c
> ===
> --- gcc/tree.c  2017-12-06 14:48:52.891162052 +
> +++ gcc/tree.c  2017-12-06 14:49:10.295445836 +
> @@ -1736,18 +1736,6 @@ make_vector (unsigned log2_npatterns,
>  }
>
>  /* Return a new VECTOR_CST node whose type is TYPE and whose values
> -   are given by VALS.  */
> -
> -tree
> -build_vector (tree type, vec vals MEM_STAT_DECL)
> -{
> -  gcc_assert (vals.length () == TYPE_VECTOR_SUBPARTS (type));
> -  tree_vector_builder builder (type, vals.length (), 1);
> -  builder.splice (vals);
> -  return builder.build ();
> -}
> -
> -/* Return a new VECTOR_CST node whose type is TYPE and whose values
> are extracted from V, a vector of CONSTRUCTOR_ELT.  */
>
>  tree
> @@ -14066,6 +14054,18 @@ test_labels ()
>ASSERT_FALSE (FORCED_LABEL (label_decl));
>  }
>
> +/* Return a new VECTOR_CST node whose type is TYPE and whose values
> +   are given by VALS.  */
> +
> +static tree
> +build_vector (tree type, vec vals MEM_STAT_DECL)
> +{
> +  gcc_assert (vals.length () == TYPE_VECTOR_SUBPARTS (type));
> +  tree_vector_builder builder (type, vals.length (), 1);
> +  builder.splice (vals);
> +  return builder.build ();
> +}
> +
>  /* Check that VECTOR_CST ACTUAL contains the elements in EXPECTED.  */
>
>  static void
> Index: gcc/tree.h
> ===
> --- gcc/tree.h  2017-12-06 14:46:14.133599820 +
> +++ gcc/tree.h  2017-12-06 14:49:10.296445794 +
> @@ -4044,7 +4044,6 @@ extern tree build_int_cst (tree, HOST_WI
>  extern tree build_int_cstu (tree type, unsigned HOST_WIDE_INT cst);
>  extern tree build_int_cst_type (tree, HOST_WIDE_INT);
>  extern tree make_vector (unsigned, unsigned CXX_MEM_STAT_INFO);
> -extern tree build_vector (tree, vec CXX_MEM_STAT_INFO);
>  extern tree build_vector_from_ctor (tree, vec *);
>  extern tree build_vector_from_val (tree, tree);
>  extern void recompute_constructor_flags (tree);


Re: Make gimple_build_vector take a tree_vector_builder

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 4:25 PM, Richard Sandiford
 wrote:
> This patch changes gimple_build_vector so that it takes a
> tree_vector_builder instead of a size and a vector of trees.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also spot-checked on sparc64-linux-gnu.  OK to install?

Ok.

Richard.

> Thanks,
> Richard
>
>
> 2017-12-06  Richard Sandiford  
>
> gcc/
> * vector-builder.h (vector_builder::derived): New const overload.
> (vector_builder::elt): New function.
> * tree-vector-builder.h (tree_vector_builder::type): New function.
> (tree_vector_builder::apply_step): Declare.
> * tree-vector-builder.c (tree_vector_builder::apply_step): New
> function.
> * gimple-fold.h (tree_vector_builder): Declare.
> (gimple_build_vector): Take a tree_vector_builder instead of a
> type and vector of elements.
> * gimple-fold.c (gimple_build_vector): Likewise.
> * tree-vect-loop.c (get_initial_def_for_reduction): Update call
> accordingly.
> (get_initial_defs_for_reduction): Likewise.
> (vectorizable_induction): Likewise.
>
> Index: gcc/vector-builder.h
> ===
> --- gcc/vector-builder.h2017-12-06 14:46:14.133599820 +
> +++ gcc/vector-builder.h2017-12-06 14:49:04.289693414 +
> @@ -68,6 +68,10 @@ #define GCC_VECTOR_BUILDER_H
>   given integral_p (ELT1) && integral_p (ELT2).  There is no fixed
>   choice of StepType.
>
> +  T apply_step (T base, unsigned int factor, StepType step) const;
> +
> + Return a vector element with the value BASE + FACTOR * STEP.
> +
>bool can_elide_p (T elt) const;
>
>   Return true if we can drop element ELT, even if the retained
> @@ -91,6 +95,7 @@ #define GCC_VECTOR_BUILDER_H
>unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; }
>unsigned int encoded_nelts () const;
>bool encoded_full_vector_p () const;
> +  T elt (unsigned int) const;
>
>void finalize ();
>
> @@ -163,6 +168,38 @@ vector_builder::new_vector (
>this->truncate (0);
>  }
>
> +/* Return the value of vector element I, which might or might not be
> +   encoded explicitly.  */
> +
> +template
> +T
> +vector_builder::elt (unsigned int i) const
> +{
> +  /* This only makes sense if the encoding has been fully populated.  */
> +  gcc_checking_assert (encoded_nelts () <= this->length ());
> +
> +  /* First handle elements that are already present in the underlying
> + vector, regardless of whether they're part of the encoding or not.  */
> +  if (i < this->length ())
> +return (*this)[i];
> +
> +  /* Identify the pattern that contains element I and work out the index of
> + the last encoded element for that pattern.  */
> +  unsigned int pattern = i % m_npatterns;
> +  unsigned int count = i / m_npatterns;
> +  unsigned int final_i = encoded_nelts () - m_npatterns + pattern;
> +  T final = (*this)[final_i];
> +
> +  /* If there are no steps, the final encoded value is the right one.  */
> +  if (m_nelts_per_pattern <= 2)
> +return final;
> +
> +  /* Otherwise work out the value from the last two encoded elements.  */
> +  T prev = (*this)[final_i - m_npatterns];
> +  return derived ()->apply_step (final, count - 2,
> +derived ()->step (prev, final));
> +}
> +
>  /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each,
> but without changing the underlying vector.  */
>
> Index: gcc/tree-vector-builder.h
> ===
> --- gcc/tree-vector-builder.h   2017-12-06 14:49:00.386854068 +
> +++ gcc/tree-vector-builder.h   2017-12-06 14:49:04.289693414 +
> @@ -45,6 +45,7 @@ #define GCC_TREE_VECTOR_BUILDER_H
>bool allow_steps_p () const;
>bool integral_p (const_tree) const;
>wide_int step (const_tree, const_tree) const;
> +  tree apply_step (tree, unsigned int, const wide_int &) const;
>bool can_elide_p (const_tree) const;
>void note_representative (tree *, tree);
>
> Index: gcc/tree-vector-builder.c
> ===
> --- gcc/tree-vector-builder.c   2017-12-06 14:49:00.386854068 +
> +++ gcc/tree-vector-builder.c   2017-12-06 14:49:04.289693414 +
> @@ -96,6 +96,16 @@ tree_vector_builder::new_binary_operatio
>return true;
>  }
>
> +/* Return a vector element with the value BASE + FACTOR * STEP.  */
> +
> +tree
> +tree_vector_builder::apply_step (tree base, unsigned int factor,
> +const wide_int &step) const
> +{
> +  return wide_int_to_tree (TREE_TYPE (base),
> +  wi::to_wide (base) + factor * step);
> +}
> +
>  /* Return a VECTOR_CST for the current constant.  */
>
>  tree
> Index: gcc/gimple-fold.h
> ===

Re: Make more use of VECTOR_CST_ENCODED_ELT

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 4:28 PM, Richard Sandiford
 wrote:
> This patch makes various bits of code operate directly on the new
> VECTOR_CST encoding, instead of using VECTOR_CST_ELT on all elements
> of the vector.
>
> Previous patches handled operations that produce a new VECTOR_CST,
> while this patch handles things like predicates.  It also makes
> print_node dump the encoding instead of the full vector that
> the encoding represents.
>
> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
> Also spot-checked on sparc64-linux-gnu.  OK to install?

Ok.

Richard.

> Thanks,
> Richard
>
>
> 2017-12-06  Richard Sandiford  
>
> gcc/
> * tree-vector-builder.h
> (tree_vector_builder::binary_encoded_nelts): Declare.
> * tree-vector-builder.c
> (tree_vector_builder::binary_encoded_nelts): New function.
> * fold-const.c (negate_expr_p): Likewise.
> (operand_equal_p, fold_checksum_tree): Likewise.
> * tree-loop-distribution.c (const_with_all_bytes_same): Likewise.
> * tree.c (integer_zerop, integer_onep, integer_all_onesp, real_zerop)
> (real_onep, real_minus_onep, add_expr, initializer_zerop): Likewise.
> (uniform_vector_p): Likewise.
> * varasm.c (const_hash_1, compare_constant): Likewise.
> * tree-ssa-ccp.c: Include tree-vector-builder.h.
> (valid_lattice_transition): Operate directly on the VECTOR_CST
> encoding.
> * ipa-icf.c: Include tree-vector-builder.h.
> (sem_variable::equals): Operate directly on the VECTOR_CST encoding.
> * print-tree.c (print_node): Print encoding of VECTOR_CSTs.
>
> Index: gcc/tree-vector-builder.h
> ===
> --- gcc/tree-vector-builder.h   2017-12-06 14:49:04.289693414 +
> +++ gcc/tree-vector-builder.h   2017-12-06 14:50:45.559564436 +
> @@ -40,6 +40,8 @@ #define GCC_TREE_VECTOR_BUILDER_H
>bool new_unary_operation (tree, tree, bool);
>bool new_binary_operation (tree, tree, tree, bool);
>
> +  static unsigned int binary_encoded_nelts (tree, tree);
> +
>  private:
>bool equal_p (const_tree, const_tree) const;
>bool allow_steps_p () const;
> Index: gcc/tree-vector-builder.c
> ===
> --- gcc/tree-vector-builder.c   2017-12-06 14:49:04.289693414 +
> +++ gcc/tree-vector-builder.c   2017-12-06 14:50:45.558564477 +
> @@ -96,6 +96,24 @@ tree_vector_builder::new_binary_operatio
>return true;
>  }
>
> +/* Return the number of elements that the caller needs to operate on in
> +   order to handle a binary operation on VECTOR_CSTs T1 and T2.  This static
> +   function is used instead of new_binary_operation if the result of the
> +   operation is not a VECTOR_CST.  */
> +
> +unsigned int
> +tree_vector_builder::binary_encoded_nelts (tree t1, tree t2)
> +{
> +  unsigned int nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1));
> +  gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)));
> +  /* See new_binary_operation for details.  */
> +  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
> + VECTOR_CST_NPATTERNS (t2));
> +  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
> +   VECTOR_CST_NELTS_PER_PATTERN (t2));
> +  return MIN (npatterns * nelts_per_pattern, nelts);
> +}
> +
>  /* Return a vector element with the value BASE + FACTOR * STEP.  */
>
>  tree
> Index: gcc/fold-const.c
> ===
> --- gcc/fold-const.c2017-12-06 14:49:00.386854068 +
> +++ gcc/fold-const.c2017-12-06 14:50:45.557564518 +
> @@ -410,10 +410,10 @@ negate_expr_p (tree t)
> if (FLOAT_TYPE_P (TREE_TYPE (type)) || TYPE_OVERFLOW_WRAPS (type))
>   return true;
>
> -   int count = VECTOR_CST_NELTS (t), i;
> -
> -   for (i = 0; i < count; i++)
> - if (!negate_expr_p (VECTOR_CST_ELT (t, i)))
> +   /* Steps don't prevent negation.  */
> +   unsigned int count = vector_cst_encoded_nelts (t);
> +   for (unsigned int i = 0; i < count; ++i)
> + if (!negate_expr_p (VECTOR_CST_ENCODED_ELT (t, i)))
> return false;
>
> return true;
> @@ -2981,17 +2981,19 @@ operand_equal_p (const_tree arg0, const_
>
>case VECTOR_CST:
> {
> - unsigned i;
> + if (VECTOR_CST_LOG2_NPATTERNS (arg0)
> + != VECTOR_CST_LOG2_NPATTERNS (arg1))
> +   return 0;
>
> - if (VECTOR_CST_NELTS (arg0) != VECTOR_CST_NELTS (arg1))
> + if (VECTOR_CST_NELTS_PER_PATTERN (arg0)
> + != VECTOR_CST_NELTS_PER_PATTERN (arg1))
> return 0;
>
> - for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
> -   {
> - if (!operand_equal_p (VECTOR_CST_ELT (arg0, i),
> - 

Re: [PATCH, rs6000] Gimple folding of splat_uX

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 5:36 PM, Will Schmidt  wrote:
> Hi,
> Add support for gimple folding of splat_u{8,16,32}.
> Testcase coverage is primarily handled by existing tests
> testsuite/gcc.target/powerpc/fold-vec-splat_*.c
>
> One new test added to verify we continue to receive
> an 'invalid argument, must be a 5-bit immediate' error
> when we try to splat a non-constant value.
>
> Regtests currently running across assorted power systems.
> OK for trunk with successful results?
>
> Thanks
> -Will
>
> [gcc]
>
> 2017-12-05  Will Schmidt  
>
> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
> early folding of splat_u{8,16,32}.
>
> [testsuite]
>
> 2017-12-05  Will Schmidt  
>
> * gcc.target/powerpc/fold-vec-splat-misc-invalid.c: New.
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 045a014..1470557 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -16614,10 +16614,33 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
> *gsi)
>  case VSX_BUILTIN_CMPLE_2DI:
>  case VSX_BUILTIN_CMPLE_U2DI:
>fold_compare_helper (gsi, LE_EXPR, stmt);
>return true;
>
> +/* flavors of vec_splat_[us]{8,16,32}.  */
> +case ALTIVEC_BUILTIN_VSPLTISB:
> +case ALTIVEC_BUILTIN_VSPLTISH:
> +case ALTIVEC_BUILTIN_VSPLTISW:
> +  {
> +arg0 = gimple_call_arg (stmt, 0);
> +lhs = gimple_call_lhs (stmt);
> +/* Only fold the vec_splat_*() if arg0 is constant.  */
> +if ( TREE_CODE (arg0) != INTEGER_CST)
> +  return false;

Is there a reason to not do this for non-constants?  (even not for
float constants?)
You should probably double-check there is a LHS, folding runs before DCE.

> +tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> +  TREE_INT_CST_LOW (arg0));
> +vec *ctor_elts = NULL;
> +unsigned int n_elts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs));
> +for (unsigned int i=0; i < n_elts ; i++)
> +  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, splat_value);
> +tree splat_tree = build_constructor (TREE_TYPE (lhs), ctor_elts);

Just use

   tree slat_tree = build_vector_from_val (splat_value);

that would also work for non-constants btw.

> +g = gimple_build_assign (lhs, splat_tree);
> +gimple_set_location (g, gimple_location (stmt));
> +gsi_replace (gsi, g, true);
> +return true;
> +  }
> +
>  default:
>if (TARGET_DEBUG_BUILTIN)
> fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
>  fn_code, fn_name1, fn_name2);
>break;
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c 
> b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> new file mode 100644
> index 000..20f5b05
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> @@ -0,0 +1,33 @@
> +/* Verify that overloaded built-ins for vec_splat_s8 and vec_splat_s16
> +   generate errors as expected when we attempt to use invalid inputs.  */
> +
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-mvsx -O2" } */
> +
> +#include 
> +
> +vector signed short
> +testss_1 (unsigned int ui)
> +{
> +  return vec_splat_s16 (ui);/* { dg-error "argument 1 must be a 5-bit signed 
> literal" } */
> +}
> +
> +vector unsigned short
> +testss_2 (signed int si)
> +{
> +  return vec_splat_u16 (si);/* { dg-error "argument 1 must be a 5-bit signed 
> literal" } */
> +}
> +
> +vector signed char
> +testsc_1 (unsigned int ui)
> +{
> +  return vec_splat_s8 (ui); /* { dg-error "argument 1 must be a 5-bit signed 
> literal" } */
> +}
> +
> +vector unsigned char
> +testsc_2 (signed int si)
> +{
> +  return vec_splat_u8 (si);/* { dg-error "argument 1 must be a 5-bit signed 
> literal" } */
> +}
> +
>
>


Re: [PATCH, rs6000] Gimple folding of splat_uX

2017-12-07 Thread Jakub Jelinek
On Thu, Dec 07, 2017 at 12:21:03PM +0100, Richard Biener wrote:
> > +if ( TREE_CODE (arg0) != INTEGER_CST)

Also watch formatting, the space after ( doesn't belong there.

> > +  return false;
> 
> Is there a reason to not do this for non-constants?  (even not for
> float constants?)
> You should probably double-check there is a LHS, folding runs before DCE.
> 
> > +tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> > +  TREE_INT_CST_LOW (arg0));

tree splat_value = fold_convert (TREE_TYPE (TREE_TYPE (lhs)), arg0);
?
TREE_INT_CST_LOW should not be used unless necessary, e.g. it can throw
the upper bits of large constants, even if the type is wide.

Jakub


Re: [PATCH, rs6000] Gimple folding of splat_uX

2017-12-07 Thread Richard Biener
On Thu, Dec 7, 2017 at 12:26 PM, Jakub Jelinek  wrote:
> On Thu, Dec 07, 2017 at 12:21:03PM +0100, Richard Biener wrote:
>> > +if ( TREE_CODE (arg0) != INTEGER_CST)
>
> Also watch formatting, the space after ( doesn't belong there.
>
>> > +  return false;
>>
>> Is there a reason to not do this for non-constants?  (even not for
>> float constants?)
>> You should probably double-check there is a LHS, folding runs before DCE.
>>
>> > +tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
>> > +  TREE_INT_CST_LOW (arg0));
>
> tree splat_value = fold_convert (TREE_TYPE (TREE_TYPE (lhs)), arg0);
> ?
> TREE_INT_CST_LOW should not be used unless necessary, e.g. it can throw
> the upper bits of large constants, even if the type is wide.

Indeed.  For non-constants even better use gimple_convert () which will emit
a separate stmt when necessary.

Richard.

> Jakub


Re: [PATCH GCC]Introduce loop interchange pass and enable it at -O3

2017-12-07 Thread Richard Biener
On Thu, Dec 7, 2017 at 11:28 AM, Bin Cheng  wrote:
> Hi,
> This is the overall loop interchange patch on gimple-linterchange branch.  
> Note the new pass
> is enabled at -O3 level by default.  Bootstrap and regtest on x86_64 and 
> AArch64(ongoing).
> NOte after cost model change it is now far more conservative than original 
> version.  It only
> interchanges 11 loops in spec2k6 (416 doesn't build at the moment), vs ~250 
> for the original
> version.  I will collect compilation time data, though there shouldn't be any 
> surprise given
> few loops are actually interchanged.  I will also collect spec2k6 data, 
> shouldn't affect cases
> other than bwaves either.
> So is it OK?

Please omit the no longer needed change to gsi_remove in
gimple-iterator.[ch].  The new
--params need documenting in invoke.texi.

Ok with those changes.

Thanks!
Richard.

> Thanks,
> bin
> 2017-12-07  Bin Cheng  
> Richard Biener  
>
> PR tree-optimization/81303
> * Makefile.in (gimple-loop-interchange.o): New object file.
> * common.opt (floop-interchange): Reuse the option from graphite.
> * doc/invoke.texi (-floop-interchange): Ditto.  New document for
> -floop-interchange and mention it for -O3.
> * opts.c (default_options_table): Enable -floop-interchange at -O3.
> * gimple-loop-interchange.cc: New file.
> * params.def (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS): New parameter.
> (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO): New parameter.
> * passes.def (pass_linterchange): New pass.
> * timevar.def (TV_LINTERCHANGE): New time var.
> * tree-pass.h (make_pass_linterchange): New declaration.
> * tree-ssa-loop-ivcanon.c (create_canonical_iv): Change to external
> interchange.  Record IV before/after increment in new parameters.
> * tree-ssa-loop-ivopts.h (create_canonical_iv): New declaration.
> * tree-vect-loop.c (vect_is_simple_reduction): Factor out reduction
> path check into...
> (check_reduction_path): ...New function here.
> * tree-vectorizer.h (check_reduction_path): New declaration.
>
> gcc/testsuite
> 2017-12-07  Bin Cheng  
> Richard Biener  
>
> PR tree-optimization/81303
> * gcc.dg/tree-ssa/loop-interchange-1.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-1b.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-2.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-3.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-4.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-5.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-6.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-7.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-8.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-9.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-10.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-11.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-12.c: New test.
> * gcc.dg/tree-ssa/loop-interchange-13.c: New test.


Re: [PATCH, rs6000] Gimple folding of splat_uX

2017-12-07 Thread Segher Boessenkool
Hi!

On Wed, Dec 06, 2017 at 10:36:58AM -0600, Will Schmidt wrote:
> Add support for gimple folding of splat_u{8,16,32}.
> Testcase coverage is primarily handled by existing tests
> testsuite/gcc.target/powerpc/fold-vec-splat_*.c
> 
> One new test added to verify we continue to receive
> an 'invalid argument, must be a 5-bit immediate' error
> when we try to splat a non-constant value.

I don't have much to add, maybe Bill does?  On the next version then I
guess.  Some more formatting stuff:

> +  arg0 = gimple_call_arg (stmt, 0);
> +  lhs = gimple_call_lhs (stmt);
> +  /* Only fold the vec_splat_*() if arg0 is constant.  */
> +  if ( TREE_CODE (arg0) != INTEGER_CST)
> +return false;
> +  tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> +TREE_INT_CST_LOW (arg0));
> +  vec *ctor_elts = NULL;
> +  unsigned int n_elts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs));
> +  for (unsigned int i=0; i < n_elts ; i++)

+for (unsigned int i = 0; i < n_elts; i++)

Well that's all ;-)


Segher


Re: [PATCH GCC]Introduce loop interchange pass and enable it at -O3

2017-12-07 Thread Bin.Cheng
On Thu, Dec 7, 2017 at 11:39 AM, Richard Biener
 wrote:
> On Thu, Dec 7, 2017 at 11:28 AM, Bin Cheng  wrote:
>> Hi,
>> This is the overall loop interchange patch on gimple-linterchange branch.  
>> Note the new pass
>> is enabled at -O3 level by default.  Bootstrap and regtest on x86_64 and 
>> AArch64(ongoing).
>> NOte after cost model change it is now far more conservative than original 
>> version.  It only
>> interchanges 11 loops in spec2k6 (416 doesn't build at the moment), vs ~250 
>> for the original
>> version.  I will collect compilation time data, though there shouldn't be 
>> any surprise given
>> few loops are actually interchanged.  I will also collect spec2k6 data, 
>> shouldn't affect cases
>> other than bwaves either.
>> So is it OK?
>
> Please omit the no longer needed change to gsi_remove in
> gimple-iterator.[ch].  The new
> --params need documenting in invoke.texi.
Here is the updated patch.  I added document for new parameters in
invoke.texi, but the original patch doesn't have any change in
gimple-iterator.[ch]?

Thanks,
bin
>
> Ok with those changes.
>
> Thanks!
> Richard.
>
>> Thanks,
>> bin
>> 2017-12-07  Bin Cheng  
>> Richard Biener  
>>
>> PR tree-optimization/81303
>> * Makefile.in (gimple-loop-interchange.o): New object file.
>> * common.opt (floop-interchange): Reuse the option from graphite.
>> * doc/invoke.texi (-floop-interchange): Ditto.  New document for
>> -floop-interchange and mention it for -O3.
>> * opts.c (default_options_table): Enable -floop-interchange at -O3.
>> * gimple-loop-interchange.cc: New file.
>> * params.def (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS): New parameter.
>> (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO): New parameter.
>> * passes.def (pass_linterchange): New pass.
>> * timevar.def (TV_LINTERCHANGE): New time var.
>> * tree-pass.h (make_pass_linterchange): New declaration.
>> * tree-ssa-loop-ivcanon.c (create_canonical_iv): Change to external
>> interchange.  Record IV before/after increment in new parameters.
>> * tree-ssa-loop-ivopts.h (create_canonical_iv): New declaration.
>> * tree-vect-loop.c (vect_is_simple_reduction): Factor out reduction
>> path check into...
>> (check_reduction_path): ...New function here.
>> * tree-vectorizer.h (check_reduction_path): New declaration.
>>
>> gcc/testsuite
>> 2017-12-07  Bin Cheng  
>> Richard Biener  
>>
>> PR tree-optimization/81303
>> * gcc.dg/tree-ssa/loop-interchange-1.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-1b.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-2.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-3.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-4.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-5.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-6.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-7.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-8.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-9.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-10.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-11.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-12.c: New test.
>> * gcc.dg/tree-ssa/loop-interchange-13.c: New test.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index db43fc1..3297437 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1302,6 +1302,7 @@ OBJS = \
gimple-iterator.o \
gimple-fold.o \
gimple-laddress.o \
+   gimple-loop-interchange.o \
gimple-low.o \
gimple-pretty-print.o \
gimple-ssa-backprop.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index ffcbf85..6b9e4ea 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1504,8 +1504,8 @@ Common Alias(floop-nest-optimize)
 Enable loop nest transforms.  Same as -floop-nest-optimize.
 
 floop-interchange
-Common Alias(floop-nest-optimize)
-Enable loop nest transforms.  Same as -floop-nest-optimize.
+Common Report Var(flag_loop_interchange) Optimization
+Enable loop interchange on trees.
 
 floop-block
 Common Alias(floop-nest-optimize)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b8c8083..6a4e8aa 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7401,6 +7401,7 @@ by @option{-O2} and also turns on the following 
optimization flags:
 -ftree-loop-vectorize @gol
 -ftree-loop-distribution @gol
 -ftree-loop-distribute-patterns @gol
+-floop-interchange @gol
 -fsplit-paths @gol
 -ftree-slp-vectorize @gol
 -fvect-cost-model @gol
@@ -8500,12 +8501,10 @@ Perform loop optimizations on trees.  This flag is 
enabled by default
 at @option{-O} and higher.
 
 @item -ftree-loop-linear
-@itemx -floop-interchange
 @itemx -floop-strip-mine
 @itemx -floop-block
 @itemx -floop-unroll-and-jam
 @opindex ftree-loop-linear
-@opindex floop-interchange
 @opi

Re: [PATCH GCC]Introduce loop interchange pass and enable it at -O3

2017-12-07 Thread Richard Biener
On Thu, Dec 7, 2017 at 12:55 PM, Bin.Cheng  wrote:
> On Thu, Dec 7, 2017 at 11:39 AM, Richard Biener
>  wrote:
>> On Thu, Dec 7, 2017 at 11:28 AM, Bin Cheng  wrote:
>>> Hi,
>>> This is the overall loop interchange patch on gimple-linterchange branch.  
>>> Note the new pass
>>> is enabled at -O3 level by default.  Bootstrap and regtest on x86_64 and 
>>> AArch64(ongoing).
>>> NOte after cost model change it is now far more conservative than original 
>>> version.  It only
>>> interchanges 11 loops in spec2k6 (416 doesn't build at the moment), vs ~250 
>>> for the original
>>> version.  I will collect compilation time data, though there shouldn't be 
>>> any surprise given
>>> few loops are actually interchanged.  I will also collect spec2k6 data, 
>>> shouldn't affect cases
>>> other than bwaves either.
>>> So is it OK?
>>
>> Please omit the no longer needed change to gsi_remove in
>> gimple-iterator.[ch].  The new
>> --params need documenting in invoke.texi.
> Here is the updated patch.  I added document for new parameters in
> invoke.texi, but the original patch doesn't have any change in
> gimple-iterator.[ch]?

Whoops, sorry - looked at the wrong one.  The updated patch with the
missed docs fixed
is ok.

Thanks,
Richard.

> Thanks,
> bin
>>
>> Ok with those changes.
>>
>> Thanks!
>> Richard.
>>
>>> Thanks,
>>> bin
>>> 2017-12-07  Bin Cheng  
>>> Richard Biener  
>>>
>>> PR tree-optimization/81303
>>> * Makefile.in (gimple-loop-interchange.o): New object file.
>>> * common.opt (floop-interchange): Reuse the option from graphite.
>>> * doc/invoke.texi (-floop-interchange): Ditto.  New document for
>>> -floop-interchange and mention it for -O3.
>>> * opts.c (default_options_table): Enable -floop-interchange at -O3.
>>> * gimple-loop-interchange.cc: New file.
>>> * params.def (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS): New parameter.
>>> (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO): New parameter.
>>> * passes.def (pass_linterchange): New pass.
>>> * timevar.def (TV_LINTERCHANGE): New time var.
>>> * tree-pass.h (make_pass_linterchange): New declaration.
>>> * tree-ssa-loop-ivcanon.c (create_canonical_iv): Change to external
>>> interchange.  Record IV before/after increment in new parameters.
>>> * tree-ssa-loop-ivopts.h (create_canonical_iv): New declaration.
>>> * tree-vect-loop.c (vect_is_simple_reduction): Factor out reduction
>>> path check into...
>>> (check_reduction_path): ...New function here.
>>> * tree-vectorizer.h (check_reduction_path): New declaration.
>>>
>>> gcc/testsuite
>>> 2017-12-07  Bin Cheng  
>>> Richard Biener  
>>>
>>> PR tree-optimization/81303
>>> * gcc.dg/tree-ssa/loop-interchange-1.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-1b.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-2.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-3.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-4.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-5.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-6.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-7.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-8.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-9.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-10.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-11.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-12.c: New test.
>>> * gcc.dg/tree-ssa/loop-interchange-13.c: New test.


[PR81165] discount killed stmts when sizing blocks for threading

2017-12-07 Thread Alexandre Oliva
We limit the amount of copying for jump threading based on counting
stmts.  This counting is overly pessimistic, because we will very
often delete stmts as a consequence of jump threading: when the final
conditional jump of a block is removed, earlier SSA names computed
exclusively for use in that conditional are killed.  Furthermore, PHI
nodes in blocks with only two predecessors are trivially replaced with
their now-single values after threading.

This patch scans blocks to be copied in the path constructed so far
and estimates the number of stmts that will be removed in the copies,
bumping up the stmt count limit.

Regstrapped on x86_64-linux-gnu and i686-linux-gnu.  Ok to install?


for  gcc/ChangeLog

* tree-ssa-threadedge.c (uses_in_bb): New.
(estimate_threading_killed_stmts): New.
(estimate_threading_killed_stmts): New overload.
(record_temporary_equivalences_from_stmts_at_dest): Add path
parameter; adjust caller.  Expand limit when it's hit.

for  gcc/testsuite/ChangeLog

* gcc.dg/pr81165.c: New.
---
 gcc/testsuite/gcc.dg/pr81165.c |   59 
 gcc/tree-ssa-threadedge.c  |  189 +++-
 2 files changed, 245 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr81165.c

diff --git a/gcc/testsuite/gcc.dg/pr81165.c b/gcc/testsuite/gcc.dg/pr81165.c
new file mode 100644
index ..8508d893bed6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr81165.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not " \[/%\] " "optimized" } } */
+
+/* Testcase submitted for PR81165, with its main function removed as
+   it's turned into a compile test.  We want to make sure that all of
+   the divide/remainder computations are removed by tree optimizers.
+
+   We can figure out that we don't need to compute at runtime even the
+   condition to enter the loop: the initial i==0 would have to be
+   greater than the sum of two small unsigned values: 1U>>t1 is in the
+   range 0..1, whereas the char value is bounded by the range 0..127,
+   being 128 % a positive number (zero would invoke undefined
+   behavior, so we can assume it doesn't happen).  (We know it's
+   nonnegative because it's 10 times a number that has no more than
+   the bits for 16, 8 and 1 set.)
+
+   We don't realize that the loop is useless right away: jump
+   threading helps remove some of the complexity, particularly of the
+   computation within the loop: t1 is compared with 1, but it can
+   never be 1.  (We could assume as much, since its being 1 would
+   divide by zero, but we don't.)
+
+   If we don't enter the conditional block, t1 remains at 2; if we do,
+   it's set to either -1.  If we jump thread at the end of the
+   conditional block, we can figure out the ranges exclude 1 and the
+   jump body is completely optimized out.  However, we used to fail to
+   consider the block for jump threading due to the amount of
+   computation in it, without realizing most of it would die in
+   consequence of the threading.
+
+   We now take the dying code into account when deciding whether or
+   not to try jump threading.  That might enable us to optimize the
+   function into { if (x2 != 0 || (x1 & 1) == 0) abort (); }.  At the
+   time of this writing, with the patch, we get close, but the test on
+   x2 only gets as far as ((1 >> x2) == 0).  Without the patch, some
+   of the loop remains.  */
+
+short x0 = 15;
+
+void func (){
+  volatile int x1 = 1U;
+  volatile char x2 = 0;
+  char t0 = 0;
+  unsigned long t1 = 2LU;
+  int i = 0;
+  
+  if(1>>x2) {
+t0 = -1;
+t1 = (1&(short)(x1^8U))-1;
+  }
+
+  while(i > (int)((1U>>t1)+(char)(128%(10*(25LU&(29%x0)) {
+i += (int)(12L/(1!=(int)t1));
+  }
+
+  if (t0 != -1) __builtin_abort();
+  if (t1 != 0L) __builtin_abort();
+}
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 536c4717b725..25ccac2a3ecc 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -170,6 +170,173 @@ threadedge_valueize (tree t)
   return t;
 }
 
+/* Return how many uses of T there are within BB, as long as there
+   aren't any uses outside BB.  If there are any uses outside BB,
+   return -1 if there's at most one use within BB, or -2 if there is
+   more than one use within BB.  */
+
+static int
+uses_in_bb (tree t, basic_block bb)
+{
+  int uses = 0;
+  bool outside_bb = false;
+
+  imm_use_iterator iter;
+  use_operand_p use_p;
+  FOR_EACH_IMM_USE_FAST (use_p, iter, t)
+{
+  if (is_gimple_debug (USE_STMT (use_p)))
+   continue;
+
+  if (gimple_bb (USE_STMT (use_p)) != bb)
+   outside_bb = true;
+  else
+   uses++;
+
+  if (outside_bb && uses > 1)
+   return -2;
+}
+
+  if (outside_bb)
+return -1;
+
+  return uses;
+}
+
+/* Starting from the final control flow stmt in BB, assuming it will
+   be removed, follow uses in to-be-removed stmts ba

[PATCH] -fdump-tree, -save-temps=obj & subdirs

2017-12-07 Thread Nathan Sidwell
There's an unfortunate interaction between -save-temps=obj and 
-fdump-tree- when subdirectories are in play.


Consider:
  g++ -fdump-tree-all -save-temps=obj -c -o tgt/x.o sub/x.cc
we end up with a bunch of errors of the form:
 sub/x.cc:1:0: error: could not open dump file 
‘tgt/tgt/x.046t.profile_estimate’: No such file or directory


you'll see it's added the 'tgt' sub directory twice.  The reason is that 
cc1plus is invoked as:


 /usr/libexec/gcc/x86_64-redhat-linux/7/cc1plus -E -quiet -v 
-D_GNU_SOURCE sub/x.cc -mtune=generic -march=x86-64 -ansi 
-Woverloaded-virtual -Wall -Wpointer-arith -Wwrite-strings 
-felide-constructors -fdump-tree-all -fpch-preprocess -o tgt/x.ii


to generate the preprocessed output, and then as:

 /usr/libexec/gcc/x86_64-redhat-linux/7/cc1plus -fpreprocessed tgt/x.ii 
-quiet -dumpbase tgt/x -mtune=generic -march=x86-64 -auxbase-strip 
tgt/x.o -Woverloaded-virtual -Wall -Wpointer-arith -Wwrite-strings -ansi 
-version -felide-constructors -fdump-tree-all -o tgt/x.s


for the compilation itself.  That has '-dumpbase tgt/x' and 
'-auxbase-strip tgt/x.o' passed in.


The options processing checks if dump-base is absolute, and if not 
prefixes dump_dir_name or aux_base_name's directory components.


It seems to me that the absolute path check is incomplete.  We should 
check if dump-base has /any/ directory components.  If it does, we 
shouldn't touch it.  That's what this patch does:


1) remove the absolute dir check.  That's subsumed into ...
2) look for any DIR_SEPARATOR in the dump-base.  If so, no prefixing
3 & 4) existing prefix code
5) always set dump_base_name_prefixed, so we don't repeat #2 on a 
subsequent invocation.


With this patch we now get a successful compilation:

nathans@lyta:7>egcs/trunk/obj/x86_64/gcc/xg++ -B 
egcs/trunk/obj/x86_64/gcc/ -fdump-tree-all -save-temps=obj -c -o tgt/x.o 
sub/x.cc

nathans@lyta:8>ls tgt
total 100
4 x.003t.original  4 x.010t.eh			  4 x.020t.ssa		 4 
x.049t.release_ssa	4 x.220t.switchlower  4 x.o
4 x.004t.gimple4 x.011t.cfg			  4 x.027t.fixup_cfg3	 4 
x.050t.local-fnsummary2	4 x.226t.resx	  4 x.s
4 x.006t.omplower  4 x.012t.ompexp		  4 x.028t.local-fnsummary1  4 
x.087t.fixup_cfg4	4 x.228t.optimized
4 x.007t.lower	   4 x.013t.printf-return-value1  4 x.029t.einline	 4 
x.218t.veclower		0 x.313t.statistics
4 x.009t.ehopt	   4 x.019t.fixup_cfg1		  0 x.046t.profile_estimate  4 
x.219t.cplxlower0	4 x.ii


ok?

nathan
--
Nathan Sidwell
2017-12-06  Nathan Sidwell  

	* opts.c (finish_options): Don't prefix dump_base_name if it
	already contains directories.

Index: gcc/opts.c
===
--- gcc/opts.c	(revision 255442)
+++ gcc/opts.c	(working copy)
@@ -698,19 +698,27 @@ finish_options (struct gcc_options *opts
   enum unwind_info_type ui_except;
 
   if (opts->x_dump_base_name
-  && ! IS_ABSOLUTE_PATH (opts->x_dump_base_name)
   && ! opts->x_dump_base_name_prefixed)
 {
-  /* First try to make OPTS->X_DUMP_BASE_NAME relative to the
-	 OPTS->X_DUMP_DIR_NAME directory.  Then try to make
-	 OPTS->X_DUMP_BASE_NAME relative to the OPTS->X_AUX_BASE_NAME
-	 directory, typically the directory to contain the object
-	 file.  */
-  if (opts->x_dump_dir_name)
+  const char *sep = opts->x_dump_base_name;
+
+  for (; *sep; sep++)
+	if (IS_DIR_SEPARATOR (*sep))
+	  break;
+
+  if (*sep)
+	/* If dump_base_path contains subdirectories, don't prepend
+	   anything.  */;
+  else if (opts->x_dump_dir_name)
+	/* We have a DUMP_DIR_NAME, prepend that.  */
 	opts->x_dump_base_name = opts_concat (opts->x_dump_dir_name,
 	  opts->x_dump_base_name, NULL);
   else if (opts->x_aux_base_name
 	   && strcmp (opts->x_aux_base_name, HOST_BIT_BUCKET) != 0)
+	/* AUX_BASE_NAME is set and is not the bit bucket.  If it
+	   contains a directory component, prepend those directories.
+	   Typically this places things in the same directory as the
+	   object file.  */
 	{
 	  const char *aux_base;
 
@@ -729,7 +737,9 @@ finish_options (struct gcc_options *opts
 	  opts->x_dump_base_name = new_dump_base_name;
 	}
 	}
-	opts->x_dump_base_name_prefixed = true;
+
+  /* It is definitely prefixed now.  */
+  opts->x_dump_base_name_prefixed = true;
 }
 
   /* Handle related options for unit-at-a-time, toplevel-reorder, and


Re: [PR81165] discount killed stmts when sizing blocks for threading

2017-12-07 Thread Richard Biener
On Thu, Dec 7, 2017 at 1:04 PM, Alexandre Oliva  wrote:
> We limit the amount of copying for jump threading based on counting
> stmts.  This counting is overly pessimistic, because we will very
> often delete stmts as a consequence of jump threading: when the final
> conditional jump of a block is removed, earlier SSA names computed
> exclusively for use in that conditional are killed.  Furthermore, PHI
> nodes in blocks with only two predecessors are trivially replaced with
> their now-single values after threading.
>
> This patch scans blocks to be copied in the path constructed so far
> and estimates the number of stmts that will be removed in the copies,
> bumping up the stmt count limit.
>
> Regstrapped on x86_64-linux-gnu and i686-linux-gnu.  Ok to install?
>
>
> for  gcc/ChangeLog
>
> * tree-ssa-threadedge.c (uses_in_bb): New.
> (estimate_threading_killed_stmts): New.
> (estimate_threading_killed_stmts): New overload.
> (record_temporary_equivalences_from_stmts_at_dest): Add path
> parameter; adjust caller.  Expand limit when it's hit.
>
> for  gcc/testsuite/ChangeLog
>
> * gcc.dg/pr81165.c: New.
> ---
>  gcc/testsuite/gcc.dg/pr81165.c |   59 
>  gcc/tree-ssa-threadedge.c  |  189 
> +++-
>  2 files changed, 245 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr81165.c
>
> diff --git a/gcc/testsuite/gcc.dg/pr81165.c b/gcc/testsuite/gcc.dg/pr81165.c
> new file mode 100644
> index ..8508d893bed6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr81165.c
> @@ -0,0 +1,59 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-not " \[/%\] " "optimized" } } */
> +
> +/* Testcase submitted for PR81165, with its main function removed as
> +   it's turned into a compile test.  We want to make sure that all of
> +   the divide/remainder computations are removed by tree optimizers.
> +
> +   We can figure out that we don't need to compute at runtime even the
> +   condition to enter the loop: the initial i==0 would have to be
> +   greater than the sum of two small unsigned values: 1U>>t1 is in the
> +   range 0..1, whereas the char value is bounded by the range 0..127,
> +   being 128 % a positive number (zero would invoke undefined
> +   behavior, so we can assume it doesn't happen).  (We know it's
> +   nonnegative because it's 10 times a number that has no more than
> +   the bits for 16, 8 and 1 set.)
> +
> +   We don't realize that the loop is useless right away: jump
> +   threading helps remove some of the complexity, particularly of the
> +   computation within the loop: t1 is compared with 1, but it can
> +   never be 1.  (We could assume as much, since its being 1 would
> +   divide by zero, but we don't.)
> +
> +   If we don't enter the conditional block, t1 remains at 2; if we do,
> +   it's set to either -1.  If we jump thread at the end of the
> +   conditional block, we can figure out the ranges exclude 1 and the
> +   jump body is completely optimized out.  However, we used to fail to
> +   consider the block for jump threading due to the amount of
> +   computation in it, without realizing most of it would die in
> +   consequence of the threading.
> +
> +   We now take the dying code into account when deciding whether or
> +   not to try jump threading.  That might enable us to optimize the
> +   function into { if (x2 != 0 || (x1 & 1) == 0) abort (); }.  At the
> +   time of this writing, with the patch, we get close, but the test on
> +   x2 only gets as far as ((1 >> x2) == 0).  Without the patch, some
> +   of the loop remains.  */
> +
> +short x0 = 15;
> +
> +void func (){
> +  volatile int x1 = 1U;
> +  volatile char x2 = 0;
> +  char t0 = 0;
> +  unsigned long t1 = 2LU;
> +  int i = 0;
> +
> +  if(1>>x2) {
> +t0 = -1;
> +t1 = (1&(short)(x1^8U))-1;
> +  }
> +
> +  while(i > (int)((1U>>t1)+(char)(128%(10*(25LU&(29%x0)) {
> +i += (int)(12L/(1!=(int)t1));
> +  }
> +
> +  if (t0 != -1) __builtin_abort();
> +  if (t1 != 0L) __builtin_abort();
> +}
> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> index 536c4717b725..25ccac2a3ecc 100644
> --- a/gcc/tree-ssa-threadedge.c
> +++ b/gcc/tree-ssa-threadedge.c
> @@ -170,6 +170,173 @@ threadedge_valueize (tree t)
>return t;
>  }
>
> +/* Return how many uses of T there are within BB, as long as there
> +   aren't any uses outside BB.  If there are any uses outside BB,
> +   return -1 if there's at most one use within BB, or -2 if there is
> +   more than one use within BB.  */
> +
> +static int
> +uses_in_bb (tree t, basic_block bb)
> +{
> +  int uses = 0;
> +  bool outside_bb = false;
> +
> +  imm_use_iterator iter;
> +  use_operand_p use_p;
> +  FOR_EACH_IMM_USE_FAST (use_p, iter, t)
> +{
> +  if (is_gimple_debug (USE_STMT (use_p)))
> +   continue;
> +
> +  if (gimple_bb (USE_STMT (use_p)) != bb)
> +   

Re: [PATCH v2] Ability to remap file names in __FILE__, etc (PR other/70268)

2017-12-07 Thread Boris Kolpackov
Thanks for the review. Second revision of the patch attached (also
rebased on the current trunk). Issues that are not commented on
below have been resolved as suggested.


David Malcolm  writes:

> To my naive eyes this seems like a useful addition, but I'm hoping
> someone with more knowledge of the standards around the preprocessor
> can comment.

This would definitely be a non-standard extension to any preprocessor
standard there might be. Since it's not enabled by default I don't see
any issues though.


> Does any other compiler implement something similar?

Not that I am aware of.


> > * file-prefix-map.c: New file.
> 
> IIRC, new files should have a .cc suffix.

Hm, grepping changelogs for "New file" appear to contradict this. Can
someone confirm?


> What "owns" the memory returned by remap_macro_filename?
> 
> I see later on that this calls remap_filename, and hence the result is
> either ggc-allocated, or is the input pointer, and hence "fname" is
> temporary memory that goes away when the GC runs; build_string_literal
> takes a copy. (This is different from Hongxu Jia's patch attached to
> PR 70268, which calls xstrdup *or* returns the input pointer).

Yes. Do you see a problem with this or is this just a note? (I do see
a problem with Hongxu Jia's approach).


> I'm not a fan of the name "-fmacro-prefix-map" as it makes me wonder
> "what macros are affected?"; it doesn't immediately suggest __FILE__ to
> me (and __BASE_FILE__).
> 
> I wonder if "-f__FILE__-prefix-map" is sane and implementable?  (sorry
> to "bikeshed" this).

Yeah, "-f__FILE__-prefix-map" looks rather insane to me ;-).

I've spent some time picking the name. -fmacro-prefix-map is at least
consistent with (rather generic) -fdebug-prefix-map:

-fdebug-prefix-map - remap in 'debug' (information)
-fmacro-prefix-map - remap in 'macro' (expansions)


> You say "all the individual...options"; am I right in thinking there
> are just two of them: -fmacro-prefix-map and -fdebug-prefix-map?
> 
> If so, wouldn't it better to say "both of the options" and list them
> there.

While currently there are two, it is plausible there could be more in
the future. Specific areas that could require this:

1. C++ source_location:

   http://en.cppreference.com/w/cpp/experimental/source_location

2. C++ Modules TS support (that Nathan is working on) involves
   binary module interfaces that could embed file references.
   
This is also the reason for -ffile-prefix-map which will cover all
future such options.


> > +  error ("invalid argument %qs to %s", arg, opt);
> 
> I think both of these should be %qs, so that the option is quoted (the
> old code in final.c didn't do that, but I think it should have).

I personally disagree since an option like -ffile-prefix-map is not
easy to confuse with a language word. But I defer to you judgment.


> > +  s = (char *) ggc_alloc_atomic (name_len + map->new_len);
> > +  memcpy (s, map->new_prefix, map->new_len);
> > +  memcpy (s + map->new_len, name, name_len);
> > +  return s;
> > +}
> 
> You've moved this code from final.c, where the memory is allocated on
> stack and then copied via ggc_strdup, but with an early exit for :
> 
> > -  s = (char *) alloca (name_len + map->new_len);
> > -  memcpy (s, map->new_prefix, map->new_len);
> > -  memcpy (s + map->new_len, name, name_len);
> > -  return ggc_strdup (s);
> 
> so I guess the ggc_alloc_atomic avoids a strlen.

My code avoids the call to alloca() (which was an issue raised with
Hongxu Jia's patch). ggc_strdup() is implemented in terms of
ggc_alloc_atomic().


> > diff --git a/gcc/testsuite/c-c++-common/cpp/ffile-prefix-map.c
> > b/gcc/testsuite/c-c++-common/cpp/ffile-prefix-map.c
> > new file mode 100644
> > index 000..cf14de84a0d
> > --- /dev/null
> > +++ b/gcc/testsuite/c-c++-common/cpp/ffile-prefix-map.c
> > @@ -0,0 +1,5 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-ffile-prefix-map==FILE-PREFIX" } */
> 
> What's up with this "=="? (as opposed to "=").

Since I cannot predict the actual path, I am remapping empty prefix
to FILE-PREFIX which effectively adds FILE-PREFIX to any path.


> Is this being interpreted as an argument of "=FILE-PREFIX" to the
> option?  If so what does this mean?  (and if it's a meaningful special-
> case, please can you document this in the .texi)

It's not really a special case (an empty prefix is a prefix of any path)
and is not very useful in practice. Are you sure it's a good idea to have
this noise seeing that I will have to do it for all three options?


> > +#pragma message __FILE__   /* { dg-message "FILE-PREFIX" } */
> > +#pragma message __BASE_FILE__  /* { dg-message "FILE-PREFIX" } */
> 
> Please add some before/after text to the #pragma message and to the dg-
> message so that the dg-message is verifying the exact value of
> __FILE__, and not just a substring match.

I've added a prefix but I am not sure there is much value in suffix
since I cannot predict the exact tail of __FILE__.

Revis

[PATCH] Fix PR83296, alternate PR67769 fix

2017-12-07 Thread Richard Biener

This reverts the fix for PR67769 and installs an alternate fix
which is more targeted.  This helps preserving range info where
important (int this case for emitting a -Wstringop-overflow
warning).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-12-07  Richard Biener  

PR tree-optimization/83296
PR tree-optimization/67769
* tree-ssa-phiopt.c (conditional_replacement): Do not reset
flow sensitive info in an unrelated BB.
(value_replacement): Use reset_flow_sensitive_info.
(minmax_replacement): Reset flow sensitive info on the def
we move.  Do not reset flow sensitive info in the whole BB
we move the stmt to.
(abs_replacement): Likewise.

* g++.dg/warn/Wstringop-overflow-1.C: New testcase.

Index: gcc/tree-ssa-phiopt.c
===
--- gcc/tree-ssa-phiopt.c   (revision 255461)
+++ gcc/tree-ssa-phiopt.c   (working copy)
@@ -672,7 +672,6 @@ conditional_replacement (basic_block con
 }
 
   replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
-  reset_flow_sensitive_info_in_bb (cond_bb);
 
   /* Note that we optimized this PHI.  */
   return true;
@@ -1138,22 +1137,22 @@ value_replacement (basic_block cond_bb,
  cond_rhs, false, rhs2))
 {
   gsi = gsi_for_stmt (cond);
+  /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6
+def-stmt in:
+  if (n_5 != 0)
+goto ;
+  else
+goto ;
+
+  :
+  # RANGE [0, 4294967294]
+  u_6 = n_5 + 4294967295;
+
+  :
+  # u_3 = PHI   */
+  reset_flow_sensitive_info (lhs);
   if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
{
- /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6
-def-stmt in:
-if (n_5 != 0)
-  goto ;
-else
-  goto ;
-
-:
-# RANGE [0, 4294967294]
-u_6 = n_5 + 4294967295;
-
-:
-# u_3 = PHI   */
- SSA_NAME_RANGE_INFO (lhs) = NULL;
  /* If available, we can use VR of phi result at least.  */
  tree phires = gimple_phi_result (phi);
  struct range_info_def *phires_range_info
@@ -1166,7 +1165,7 @@ value_replacement (basic_block cond_bb,
   for (int i = prep_cnt - 1; i >= 0; --i)
{
  tree plhs = gimple_assign_lhs (prep_stmt[i]);
- SSA_NAME_RANGE_INFO (plhs) = NULL;
+ reset_flow_sensitive_info (plhs);
  gsi_from = gsi_for_stmt (prep_stmt[i]);
  gsi_move_before (&gsi_from, &gsi);
}
@@ -1490,6 +1489,8 @@ minmax_replacement (basic_block cond_bb,
   /* Move the statement from the middle block.  */
   gsi = gsi_last_bb (cond_bb);
   gsi_from = gsi_last_nondebug_bb (middle_bb);
+  reset_flow_sensitive_info (SINGLE_SSA_TREE_OPERAND (gsi_stmt (gsi_from),
+ SSA_OP_DEF));
   gsi_move_before (&gsi_from, &gsi);
 }
 
@@ -1508,7 +1509,6 @@ minmax_replacement (basic_block cond_bb,
   gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
 
   replace_phi_edge_with_variable (cond_bb, e1, phi, result);
-  reset_flow_sensitive_info_in_bb (cond_bb);
 
   return true;
 }
@@ -1636,7 +1636,6 @@ abs_replacement (basic_block cond_bb, ba
 }
 
   replace_phi_edge_with_variable (cond_bb, e1, phi, result);
-  reset_flow_sensitive_info_in_bb (cond_bb);
 
   /* Note that we optimized this PHI.  */
   return true;
Index: gcc/testsuite/g++.dg/warn/Wstringop-overflow-1.C
===
--- gcc/testsuite/g++.dg/warn/Wstringop-overflow-1.C(nonexistent)
+++ gcc/testsuite/g++.dg/warn/Wstringop-overflow-1.C(working copy)
@@ -0,0 +1,15 @@
+// { dg-do compile }
+// { dg-additional-options "-O2 -Wstringop-overflow=2" }
+
+struct S {
+char a[5];
+void (*pf)(void);
+};
+
+void f (struct S *s, int n)
+{
+  if (n < sizeof s->a + 1)
+n = sizeof s->a + 1;
+
+  __builtin_strncpy (s->a, "123456", n);   // { dg-warning "writing 6" }
+}


Re: [PATCH] avoid bogus -Wstringop-overflow for strncpy with _FORTIFY_SOURCE (PR 82646)

2017-12-07 Thread Christophe Lyon
Hi Martin,


On 6 December 2017 at 00:51, Jeff Law  wrote:
> On 12/05/2017 04:47 PM, Martin Sebor wrote:
>> PR middle-end/82646 - bogus -Wstringop-overflow with
>> -D_FORTIFY_SOURCE=2 on strncpy with range to a member array,
>>
>> The bug points out a false positive in a call to strncpy() when
>> _FORTIFY_SOURCE is defined that doesn't exist otherwise.
>>
>> The problem is that __builtin_strncpy buffer overflow checking
>> is done along with the expansion of the intrinsic in one place
>> and __builtin___strncpy_chk is handled differently in another,
>> and the two are out of sync.
>>
>> The attached patch corrects the choice of arguments used for
>> overflow detection in __builtin___strncpy_chk and aligns
>> the diagnostics between the two intrinsics.
>>
>> Martin
>>
>> gcc-82646.diff
>>
>>
>> PR tree-optimization/82646 - bogus -Wstringop-overflow with 
>> -D_FORTIFY_SOURCE=2 on strncpy with range to a member array
>>
>> gcc/ChangeLog:
>>
>>   PR tree-optimization/82646
>>   * builtins.c (maybe_emit_chk_warning): Use size as the bound for
>>   strncpy, not maxlen.
>>
>> gcc/testsuite/ChangeLog:
>>
>>   PR tree-optimization/82646
>>   * gcc.dg/builtin-stringop-chk-1.c: Adjust.
>>   * gcc.dg/builtin-stringop-chk-9.c: New test.
> OK.
>

The new test fails on 32 bits platforms (arm, x86_32, aarch64 ilp32):
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 125)
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 133)
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 141)
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 149)

Christophe

> [ Happy to see something easy fly by that isn't SVE related :-) ]
>
> jeff


Re: Add unroll-and-jam pass v2

2017-12-07 Thread Michael Matz
Hi,

On Mon, 20 Nov 2017, Richard Biener wrote:

> > +static void
> > +fix_loop_structure (struct loop *loop, struct loop *old)
> 
> We have a function with the same name in loop-init.c so please use a 
> different name.

Renamed to merge_loop_tree.

> > +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > +{
> > +  gimple *g = gsi_stmt (gsi);
> > +  if (gimple_vdef (g))
> 
> you mention unknown side-effects above but you don't test
> gimple_has_side_effects (g)?

Hmm, I didn't think that there could be side-effects for any insn which 
doesn't have a vdef.  I forgot about throwing ones like div-by-zero.  I've 
added the check.

> > +  /* The number of iterations of the inner loop must be loop invariant
> > + with respect to the outer loop.  */
> > +  if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
> > +false, true)
> > +  || niter.cmp == ERROR_MARK
> > +  || !expr_invariant_in_loop_p (outer, niter.niter))
> > +return false;
> 
> maybe you check it elsewhere but with n_o_i_e you have to check
> for may_be_zero and assumptions as well.

The assumption is tested in number_of_iterations_exit, but indeed I have 
to check for may_be_zero, so added.

> > +/* Returns true if the distance in DDR can be determined and adjusts
> > +   the unroll factor in *UNROLL to be valid for that distance.  If this
> > +   data dep can lead to a removed memory reference, increment *REMOVED
> > +   and adjust *PROFIT_UNROLL to be the necessary unroll factor for this
> > +   to happen.  Otherwise returns false.  */
> 
> As I understand this function doesn't distinguish different *removed for
> different unroll factors so if unrolling by two would remove 10 refs and
> unrolling by four an additional two then we'd unroll by four?

Yes; I consider this okay for the time being, let's not complicate the 
cost model without some real world evidence it's necessary.

> The function seems to compute a maximum validity unroll factor (UNROLL),
> so the function name is misleading.  I guess the comment above should
> say "to make unrolling valid for that distance", at least I was confused
> by an unroll factor to be valid or not.

Renamed to adjust_unroll_factor and rewrote comment.

> > +  if (loop_depth (loop) < 2
> > + || optimize_loop_for_size_p (loop))
> 
> I think you want optimize_loop_nest_for_size (outer) here

Hmm, maybe :)  Changed to that.

> > +   unroll_factor = profit_unroll;
> > +  if (unroll_factor > 4)
> > +   unroll_factor = 4;
> 
> PARAM_UNROLL_JAM_MAX_UNROLL?

Added that.

> > + free_original_copy_tables ();
> > + outer->inner = reverse_list (outer->inner);
> 
> Maybe make tree_unroll_loop sort this in the correct way?  As written 
> you're using an implementation detail of this helper while in general 
> the loop tree is arbitrarily ordered.

I've now instead made the unroller retain the order of inner sibling loops 
and add the new sibling loops at the end of the list (and documented 
that).  So if the input sibling list is in program order (e.g. if there's 
only one inner loop) then the output after unrolling will be as well.

So I can now get rid of reverse_list.

Regstrapped on trunk on x86_64-linux, okay?


Ciao,
Michael.
commit 9445396f7af85017a70403471d82e9cb0c674f08
Author: Michael Matz 
Date:   Fri Nov 17 13:49:39 2017 +0100

Add unroll and jam pass

* gimple-loop-jam.c: New file.
* Makefile.in (OBJS): Add gimple-loop-jam.o.
* common.opt (funroll-and-jam): New option.
* opts.c (default_options_table): Add unroll-and-jam at -O3.
* params.def (PARAM_UNROLL_JAM_MIN_PERCENT): New param.
(PARAM_UNROLL_JAM_MAX_UNROLL): Ditto.
* passes.def: Add pass_loop_jam.
* timevar.def (TV_LOOP_JAM): Add.
* tree-pass.h (make_pass_loop_jam): Declare.
* cfgloop.c (flow_loop_tree_node_add): Add AT argument.
* cfgloop.h (flow_loop_tree_node_add): Adjust declaration.
* cfgloopmanip.c (duplicate_loop): Add AT argument, adjust call
to flow_loop_tree_node_add.
(duplicate_subloops, copy_loops_to): Append to sibling list.
* cfgloopmanip.h: (duplicate_loop): Adjust declaration.
* doc/invoke.texi (-funroll-and-jam): Document new option.
(unroll-jam-min-percent, unroll-jam-max-unroll): Document new params.

testsuite/
* gcc.dg/unroll-and-jam.c: New test.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index db43fc1..ad92bce 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1302,6 +1302,7 @@ OBJS = \
gimple-iterator.o \
gimple-fold.o \
gimple-laddress.o \
+   gimple-loop-jam.o \
gimple-low.o \
gimple-pretty-print.o \
gimple-ssa-backprop.o \
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index d82da97..9fd82d9 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -296,13 +296,25 @@ establish_preds (struct lo

Re: Add unroll-and-jam pass v2

2017-12-07 Thread Richard Biener
On Thu, Dec 7, 2017 at 3:11 PM, Michael Matz  wrote:
> Hi,
>
> On Mon, 20 Nov 2017, Richard Biener wrote:
>
>> > +static void
>> > +fix_loop_structure (struct loop *loop, struct loop *old)
>>
>> We have a function with the same name in loop-init.c so please use a
>> different name.
>
> Renamed to merge_loop_tree.
>
>> > +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> > +{
>> > +  gimple *g = gsi_stmt (gsi);
>> > +  if (gimple_vdef (g))
>>
>> you mention unknown side-effects above but you don't test
>> gimple_has_side_effects (g)?
>
> Hmm, I didn't think that there could be side-effects for any insn which
> doesn't have a vdef.  I forgot about throwing ones like div-by-zero.  I've
> added the check.
>
>> > +  /* The number of iterations of the inner loop must be loop invariant
>> > + with respect to the outer loop.  */
>> > +  if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
>> > +false, true)
>> > +  || niter.cmp == ERROR_MARK
>> > +  || !expr_invariant_in_loop_p (outer, niter.niter))
>> > +return false;
>>
>> maybe you check it elsewhere but with n_o_i_e you have to check
>> for may_be_zero and assumptions as well.
>
> The assumption is tested in number_of_iterations_exit, but indeed I have
> to check for may_be_zero, so added.
>
>> > +/* Returns true if the distance in DDR can be determined and adjusts
>> > +   the unroll factor in *UNROLL to be valid for that distance.  If this
>> > +   data dep can lead to a removed memory reference, increment *REMOVED
>> > +   and adjust *PROFIT_UNROLL to be the necessary unroll factor for this
>> > +   to happen.  Otherwise returns false.  */
>>
>> As I understand this function doesn't distinguish different *removed for
>> different unroll factors so if unrolling by two would remove 10 refs and
>> unrolling by four an additional two then we'd unroll by four?
>
> Yes; I consider this okay for the time being, let's not complicate the
> cost model without some real world evidence it's necessary.
>
>> The function seems to compute a maximum validity unroll factor (UNROLL),
>> so the function name is misleading.  I guess the comment above should
>> say "to make unrolling valid for that distance", at least I was confused
>> by an unroll factor to be valid or not.
>
> Renamed to adjust_unroll_factor and rewrote comment.
>
>> > +  if (loop_depth (loop) < 2
>> > + || optimize_loop_for_size_p (loop))
>>
>> I think you want optimize_loop_nest_for_size (outer) here
>
> Hmm, maybe :)  Changed to that.
>
>> > +   unroll_factor = profit_unroll;
>> > +  if (unroll_factor > 4)
>> > +   unroll_factor = 4;
>>
>> PARAM_UNROLL_JAM_MAX_UNROLL?
>
> Added that.
>
>> > + free_original_copy_tables ();
>> > + outer->inner = reverse_list (outer->inner);
>>
>> Maybe make tree_unroll_loop sort this in the correct way?  As written
>> you're using an implementation detail of this helper while in general
>> the loop tree is arbitrarily ordered.
>
> I've now instead made the unroller retain the order of inner sibling loops
> and add the new sibling loops at the end of the list (and documented
> that).  So if the input sibling list is in program order (e.g. if there's
> only one inner loop) then the output after unrolling will be as well.
>
> So I can now get rid of reverse_list.
>
> Regstrapped on trunk on x86_64-linux, okay?

Minor comments below, ok with those changes.

Thanks,
Richard.

>
> Ciao,
> Michael.
> commit 9445396f7af85017a70403471d82e9cb0c674f08
> Author: Michael Matz 
> Date:   Fri Nov 17 13:49:39 2017 +0100
>
> Add unroll and jam pass
>
> * gimple-loop-jam.c: New file.
> * Makefile.in (OBJS): Add gimple-loop-jam.o.
> * common.opt (funroll-and-jam): New option.
> * opts.c (default_options_table): Add unroll-and-jam at -O3.
> * params.def (PARAM_UNROLL_JAM_MIN_PERCENT): New param.
> (PARAM_UNROLL_JAM_MAX_UNROLL): Ditto.
> * passes.def: Add pass_loop_jam.
> * timevar.def (TV_LOOP_JAM): Add.
> * tree-pass.h (make_pass_loop_jam): Declare.
> * cfgloop.c (flow_loop_tree_node_add): Add AT argument.
> * cfgloop.h (flow_loop_tree_node_add): Adjust declaration.
> * cfgloopmanip.c (duplicate_loop): Add AT argument, adjust call
> to flow_loop_tree_node_add.
> (duplicate_subloops, copy_loops_to): Append to sibling list.
> * cfgloopmanip.h: (duplicate_loop): Adjust declaration.
> * doc/invoke.texi (-funroll-and-jam): Document new option.
> (unroll-jam-min-percent, unroll-jam-max-unroll): Document new params.
>
> testsuite/
> * gcc.dg/unroll-and-jam.c: New test.
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index db43fc1..ad92bce 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1302,6 +1302,7 @@ OBJS = \
> gimple-iterator.o \
> gimple-fold.o \
> gimple-laddress.o \
> +   

FW: [PATCH] rl78 umaxdi3 improvement

2017-12-07 Thread Sebastian Perta
Hello,

The following patch improves both the speed and code size for 64 bit
unsigned max for RL78:
it emits a library function call instead of emitting code for  the 64 bit
max for every single time.
The unsigned max function which was added in libgcc is hand written, so more
optimal than what GCC generates.

The change can easily be seen on the following test case:
unsigned long long my_umaxdi3(unsigned long long x, unsigned long long y){ 
return (x > y)? x : y;
}
I did not add this to the regression as it very simple and there are test
cases in the regression which test this, for example
gcc/testsuite/gcc.c-torture/execute/pr49039.c
Regression test is OK, tested with the following command:
make -k check-gcc RUNTESTFLAGS=--target_board=rl78-sim

Please let me know if this is OK, Thank you!
Sebastian

Index: gcc/ChangeLog
===
--- gcc/ChangeLog   (revision 255466)
+++ gcc/ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2017-12-07  Sebastian Perta  
+
+   * config/rl78/rl78.md: New define_expand "umaxdi3".
+   
 2017-12-07  Richard Biener  
 
PR tree-optimization/83296
Index: gcc/config/rl78/rl78.md
===
--- gcc/config/rl78/rl78.md (revision 255466)
+++ gcc/config/rl78/rl78.md (working copy)
@@ -718,3 +718,13 @@
   [(set_attr "valloc" "macax")
(set_attr "is_g13_muldiv_insn" "yes")]
 )
+
+(define_expand "umaxdi3"
+ [(set (match_operand:DI  0 "nonimmediate_operand" "")
+   (umax:DI (match_operand:DI 1 "general_operand"  "")
+(match_operand:DI2 "general_operand"  "")))
+   ]
+  "optimize_size"
+  "rl78_emit_libcall (\"__umaxdi3\", UMAX, DImode, DImode, 3, operands);
+   DONE;"
+)
Index: libgcc/ChangeLog
===
--- libgcc/ChangeLog(revision 255466)
+++ libgcc/ChangeLog(working copy)
@@ -1,3 +1,8 @@
+2017-12-07  Sebastian Perta  
+ 
+   * config/rl78/umaxdi3.S: New assembly file.
+   * config/rl78/t-rl78: Added umaxdi3.S to LIB2ADD.
+
 2017-11-30  Michael Meissner  
 
* config/rs6000/_mulkc3.c (__mulkc3): Add forward declaration.
Index: libgcc/config/rl78/t-rl78
===
--- libgcc/config/rl78/t-rl78   (revision 255466)
+++ libgcc/config/rl78/t-rl78   (working copy)
@@ -32,7 +32,8 @@
$(srcdir)/config/rl78/fpmath-sf.S \
$(srcdir)/config/rl78/cmpsi2.S \
$(srcdir)/config/rl78/adddi3.S \
-   $(srcdir)/config/rl78/subdi3.S
+   $(srcdir)/config/rl78/subdi3.S \
+   $(srcdir)/config/rl78/umaxdi3.S
 
 LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \
   _popcounthi2 _popcountsi2 \
Index: libgcc/config/rl78/umaxdi3.S
===
--- libgcc/config/rl78/umaxdi3.S(nonexistent)
+++ libgcc/config/rl78/umaxdi3.S(working copy)
@@ -0,0 +1,74 @@
+;   Copyright (C) 2017 Free Software Foundation, Inc.
+;   Contributed by Sebastian Perta.
+; 
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+; 
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+; 
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+; .
+
+
+#include "vregs.h"
+
+.text
+
+START_FUNC ___umaxdi3
+
+; copy first argument/operand to the output registers
+movw   ax, [sp+4]
+movw   r8, ax
+movw   ax, [sp+6]
+movw   r10, ax
+movw   ax, [sp+8]
+movw   r12, ax
+movw   ax, [sp+10]
+movw   r14, ax
+
+; use 16-bit compares from the most significant words downto the least
significant ones
+movw   ax, [sp+18]
+cmpw   ax, r14
+bh $.L1
+bnz$.L2
+
+movw   ax, [sp+16]
+cmpw   ax, r12
+bh $.L1
+bnz$.L2
+
+movw   ax, [sp+14]
+cmpw   ax, r10
+bh $.L1
+bnz$.L2
+
+movw   ax, [sp+12]
+cmpw   ax, r8
+bh $.L1
+ret
+
+.L1:
+; copy second argument/operand to the output registers
+movw   ax, [sp+12]
+movw   r8, ax
+movw   ax, [sp+14]
+movw   r10, ax
+movw   ax, [sp+16]
+movw   r12, ax
+movw   ax, [sp+18]
+movw   r14, ax
+.L2:
+  

Re: [001/nnn] poly_int: add poly-int.h

2017-12-07 Thread Richard Biener
On Wed, Dec 6, 2017 at 9:11 PM, Jeff Law  wrote:
> On 11/13/2017 05:04 PM, Richard Sandiford wrote:
>> Richard Sandiford  writes:
>>> Richard Sandiford  writes:
 This patch adds a new "poly_int" class to represent polynomial integers
 of the form:

   C0 + C1*X1 + C2*X2 ... + Cn*Xn

 It also adds poly_int-based typedefs for offsets and sizes of various
 precisions.  In these typedefs, the Ci coefficients are compile-time
 constants and the Xi indeterminates are run-time invariants.  The number
 of coefficients is controlled by the target and is initially 1 for all
 ports.

 Most routines can handle general coefficient counts, but for now a few
 are specific to one or two coefficients.  Support for other coefficient
 counts can be added when needed.

 The patch also adds a new macro, IN_TARGET_CODE, that can be
 set to indicate that a TU contains target-specific rather than
 target-independent code.  When this macro is set and the number of
 coefficients is 1, the poly-int.h classes define a conversion operator
 to a constant.  This allows most existing target code to work without
 modification.  The main exceptions are:

 - values passed through ..., which need an explicit conversion to a
   constant

 - ?: expression in which one arm ends up being a polynomial and the
   other remains a constant.  In these cases it would be valid to convert
   the constant to a polynomial and the polynomial to a constant, so a
   cast is needed to break the ambiguity.

 The patch also adds a new target hook to return the estimated
 value of a polynomial for costing purposes.

 The patch also adds operator<< on wide_ints (it was already defined
 for offset_int and widest_int).  I think this was originally excluded
 because >> is ambiguous for wide_int, but << is useful for converting
 bytes to bits, etc., so is worth defining on its own.  The patch also
 adds operator% and operator/ for offset_int and widest_int, since those
 types are always signed.  These changes allow the poly_int interface to
 be more predictable.

 I'd originally tried adding the tests as selftests, but that ended up
 bloating cc1 by at least a third.  It also took a while to build them
 at -O2.  The patch therefore uses plugin tests instead, where we can
 force the tests to be built at -O0.  They still run in negligible time
 when built that way.
>>>
>>> Changes in v2:
>>>
>>> - Drop the controversial known_zero etc. wrapper functions.
>>> - Fix the operator<<= bug that Martin found.
>>> - Switch from "t" to "type" in SFINAE classes (requested by Martin).
>>>
>>> Not changed in v2:
>>>
>>> - Default constructors are still empty.  I agree it makes sense to use
>>>   "= default" when we switch to C++11, but it would be dangerous for
>>>   that to make "poly_int64 x;" less defined than it is now.
>>
>> After talking about this a bit more internally, it was obvious that
>> the choice of "must" and "may" for the predicate names was a common
>> sticking point.  The idea was to match the names of alias predicates,
>> but given my track record with names ("too_empty_p" being a recently
>> questioned example :-)), I'd be happy to rename them to something else.
>> Some alternatives we came up with were:
> I didn't find the must vs may naming problematical as I was going
> through the changes.  What I did find much more difficult was
> determining if the behavior was correct when we used a "may" predicate.
> It really relies a good deal on knowing the surrounding code.
>
> In places where I knew the code reasonably well could tell without much
> surrounding context.  In other places I had to look at the code and
> deduce proper behavior in the "may" cases -- and often I resorted to
> spot checking and relying on your reputation & testing to DTRT.
>
>
>>
>> - known_eq / maybe_eq / known_lt / maybe_lt etc.
>>
>>   Some functions already use "known" and "maybe", so this would arguably
>>   be more consistent than using "must" and "may".
>>
>> - always_eq / sometimes_eq / always_lt / sometimes_lt
>>
>>   Similar to the previous one in intent.  It's just a question of which
>>   wordng is clearer.
>>
>> - forall_eq / exists_eq / forall_lt / exists_lt etc.
>>
>>   Matches the usual logic quantifiers.  This seems quite appealing,
>>   as long as it's obvious that in:
>>
>> forall_eq (v0, v1)
>>
>>   v0 and v1 themselves are already bound: if vi == ai + bi*X then
>>   what we really saying is:
>>
>> forall X, a0 + b0*X == a1 + b1*X
>>
>> Which of those sounds best?  Any other suggestions?
> I can live with any of them.  I tend to prefer one of the first two, but
> it's not a major concern for me.  So if you or others have a clear
> preference, go with it.

Whatever you do use a consistent naming which I guess means
using known_eq / maybe_eq?

Otherwise ok.

Richard.

>
> jeff


[RFC] Add means to split dump file into several files -- Use in lra

2017-12-07 Thread Tom de Vries

Hi,

I'm currently debugging a problem in lra, and got a bit lost in the 20k+ 
lines dump file.


I observed that:
- the lra dump file is one of the biggest ones
- lra itself consists of a looping of sub-passes.

So, I've:
- written a dump infrastructure addition that can be used within a pass
  to mark the end of the current (sub)dump file and start a next
  subdump file.
- used that infrastructure to instrument lra to dump info from
  different subpasses into separate files.

Using this patch I managed to split the reload dump file into smaller bits:
...
$ wc -l *.reload.*
   3 no-scevccp-outer-10.c.276r.reload
   0 no-scevccp-outer-10.c.276r.reload.001.lra_start
   3 no-scevccp-outer-10.c.276r.reload.002.remove_scratches
2335 no-scevccp-outer-10.c.276r.reload.003.lra_constraints
1781 no-scevccp-outer-10.c.276r.reload.004.lra_create_live_ranges
 460 no-scevccp-outer-10.c.276r.reload.005.lra_inheritance
 920 no-scevccp-outer-10.c.276r.reload.006.lra_create_live_ranges
 563 no-scevccp-outer-10.c.276r.reload.007.lra_assign
 184 no-scevccp-outer-10.c.276r.reload.008.lra_undo_inheritance
 830 no-scevccp-outer-10.c.276r.reload.009.lra_create_live_ranges
   3 no-scevccp-outer-10.c.276r.reload.010.lra_coalesce
 165 no-scevccp-outer-10.c.276r.reload.011.lra_constraints
 844 no-scevccp-outer-10.c.276r.reload.012.lra_create_live_ranges
 110 no-scevccp-outer-10.c.276r.reload.013.lra_inheritance
 879 no-scevccp-outer-10.c.276r.reload.014.lra_create_live_ranges
  22 no-scevccp-outer-10.c.276r.reload.015.lra_assign
  74 no-scevccp-outer-10.c.276r.reload.016.lra_undo_inheritance
  19 no-scevccp-outer-10.c.276r.reload.017.lra_constraints
 845 no-scevccp-outer-10.c.276r.reload.018.lra_create_live_ranges
  80 no-scevccp-outer-10.c.276r.reload.019.lra_remat
  27 no-scevccp-outer-10.c.276r.reload.020.lra_spill
 866 no-scevccp-outer-10.c.276r.reload.021.lra_constraints
 830 no-scevccp-outer-10.c.276r.reload.022.lra_create_live_ranges
   0 no-scevccp-outer-10.c.276r.reload.023.lra_inheritance
 830 no-scevccp-outer-10.c.276r.reload.024.lra_create_live_ranges
  53 no-scevccp-outer-10.c.276r.reload.025.lra_assign
   5 no-scevccp-outer-10.c.276r.reload.026.lra_constraints
 370 no-scevccp-outer-10.c.276r.reload.027.lra_finishing
4137 no-scevccp-outer-10.c.276r.reload.028.lra_end
   0 no-scevccp-outer-10.c.276r.reload.029.lra_start
  27 no-scevccp-outer-10.c.276r.reload.030.remove_scratches
 553 no-scevccp-outer-10.c.276r.reload.031.lra_constraints
 188 no-scevccp-outer-10.c.276r.reload.032.lra_create_live_ranges
   8 no-scevccp-outer-10.c.276r.reload.033.lra_inheritance
 188 no-scevccp-outer-10.c.276r.reload.034.lra_create_live_ranges
  21 no-scevccp-outer-10.c.276r.reload.035.lra_assign
   3 no-scevccp-outer-10.c.276r.reload.036.lra_undo_inheritance
   5 no-scevccp-outer-10.c.276r.reload.037.lra_constraints
  99 no-scevccp-outer-10.c.276r.reload.038.lra_finishing
 515 no-scevccp-outer-10.c.276r.reload.039.lra_end
...

Notes:
- dump info from different functions is not put together
- this is on by default atm. We probably want to enable this only
  using a switch fsplit-dump or some such.
- the lra_end dump files ends with ";; Function ...", which should be in
  the next lra_start dump file. Once we enable this using a switch we
  can probably do better.

Any comments?

Thanks,
- Tom
Add dump_bump

2017-12-07  Tom de Vries  

	* dumpfile.c (DUMP_FILE_INFO): Add inits for bump and bump_name fields.
	(get_dump_file_name): Handle bump and bump_name fields.
	(gcc::dump_manager::dump_bump, dump_bump): New function.
	* dumpfile.h (struct dump_file_info): Add bump and bump_name fields.
	(dump_bump): Declare.
	* lra-assigns.c (lra_assign): Use lra_dump_bump.
	* lra-coalesce.c (lra_coalesce): Same.
	* lra-constraints.c (lra_constraints, lra_inheritance)
	(lra_undo_inheritance): Same.
	* lra-int.h (lra_dump_bump): Declare.
	* lra-lives.c (lra_create_live_ranges): Use lra_dump_bump.
	* lra-remat.c (lra_remat): Same.
	* lra-spills.c (lra_spill): Same.
	* lra.c (lra_dump_bump): New function.
	(remove_scratches, lra): Use lra_dump_bump.

---
 gcc/dumpfile.c| 47 ---
 gcc/dumpfile.h|  6 ++
 gcc/lra-assigns.c |  1 +
 gcc/lra-coalesce.c|  1 +
 gcc/lra-constraints.c |  3 +++
 gcc/lra-int.h |  1 +
 gcc/lra-lives.c   |  1 +
 gcc/lra-remat.c   |  1 +
 gcc/lra-spills.c  |  1 +
 gcc/lra.c | 12 
 10 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index 658500b..47715c4 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -51,7 +51,7 @@ dump_flags_t dump_flags;
 
 #define DUMP_FILE_INFO(suffix, swtch, dkind, num) \
   {suffix, swtch, NULL, NULL, NULL, NULL, NULL, dkind, 0, 0, 0, 0, 0, num, \
-   false, false}
+   0, NULL, false, false}
 
 /*

Re: [001/nnn] poly_int: add poly-int.h

2017-12-07 Thread Jeff Law
On 12/07/2017 07:46 AM, Richard Biener wrote:
> On Wed, Dec 6, 2017 at 9:11 PM, Jeff Law  wrote:
>> On 11/13/2017 05:04 PM, Richard Sandiford wrote:
>>> Richard Sandiford  writes:
 Richard Sandiford  writes:
> This patch adds a new "poly_int" class to represent polynomial integers
> of the form:
>
>   C0 + C1*X1 + C2*X2 ... + Cn*Xn
>
> It also adds poly_int-based typedefs for offsets and sizes of various
> precisions.  In these typedefs, the Ci coefficients are compile-time
> constants and the Xi indeterminates are run-time invariants.  The number
> of coefficients is controlled by the target and is initially 1 for all
> ports.
>
> Most routines can handle general coefficient counts, but for now a few
> are specific to one or two coefficients.  Support for other coefficient
> counts can be added when needed.
>
> The patch also adds a new macro, IN_TARGET_CODE, that can be
> set to indicate that a TU contains target-specific rather than
> target-independent code.  When this macro is set and the number of
> coefficients is 1, the poly-int.h classes define a conversion operator
> to a constant.  This allows most existing target code to work without
> modification.  The main exceptions are:
>
> - values passed through ..., which need an explicit conversion to a
>   constant
>
> - ?: expression in which one arm ends up being a polynomial and the
>   other remains a constant.  In these cases it would be valid to convert
>   the constant to a polynomial and the polynomial to a constant, so a
>   cast is needed to break the ambiguity.
>
> The patch also adds a new target hook to return the estimated
> value of a polynomial for costing purposes.
>
> The patch also adds operator<< on wide_ints (it was already defined
> for offset_int and widest_int).  I think this was originally excluded
> because >> is ambiguous for wide_int, but << is useful for converting
> bytes to bits, etc., so is worth defining on its own.  The patch also
> adds operator% and operator/ for offset_int and widest_int, since those
> types are always signed.  These changes allow the poly_int interface to
> be more predictable.
>
> I'd originally tried adding the tests as selftests, but that ended up
> bloating cc1 by at least a third.  It also took a while to build them
> at -O2.  The patch therefore uses plugin tests instead, where we can
> force the tests to be built at -O0.  They still run in negligible time
> when built that way.

 Changes in v2:

 - Drop the controversial known_zero etc. wrapper functions.
 - Fix the operator<<= bug that Martin found.
 - Switch from "t" to "type" in SFINAE classes (requested by Martin).

 Not changed in v2:

 - Default constructors are still empty.  I agree it makes sense to use
   "= default" when we switch to C++11, but it would be dangerous for
   that to make "poly_int64 x;" less defined than it is now.
>>>
>>> After talking about this a bit more internally, it was obvious that
>>> the choice of "must" and "may" for the predicate names was a common
>>> sticking point.  The idea was to match the names of alias predicates,
>>> but given my track record with names ("too_empty_p" being a recently
>>> questioned example :-)), I'd be happy to rename them to something else.
>>> Some alternatives we came up with were:
>> I didn't find the must vs may naming problematical as I was going
>> through the changes.  What I did find much more difficult was
>> determining if the behavior was correct when we used a "may" predicate.
>> It really relies a good deal on knowing the surrounding code.
>>
>> In places where I knew the code reasonably well could tell without much
>> surrounding context.  In other places I had to look at the code and
>> deduce proper behavior in the "may" cases -- and often I resorted to
>> spot checking and relying on your reputation & testing to DTRT.
>>
>>
>>>
>>> - known_eq / maybe_eq / known_lt / maybe_lt etc.
>>>
>>>   Some functions already use "known" and "maybe", so this would arguably
>>>   be more consistent than using "must" and "may".
>>>
>>> - always_eq / sometimes_eq / always_lt / sometimes_lt
>>>
>>>   Similar to the previous one in intent.  It's just a question of which
>>>   wordng is clearer.
>>>
>>> - forall_eq / exists_eq / forall_lt / exists_lt etc.
>>>
>>>   Matches the usual logic quantifiers.  This seems quite appealing,
>>>   as long as it's obvious that in:
>>>
>>> forall_eq (v0, v1)
>>>
>>>   v0 and v1 themselves are already bound: if vi == ai + bi*X then
>>>   what we really saying is:
>>>
>>> forall X, a0 + b0*X == a1 + b1*X
>>>
>>> Which of those sounds best?  Any other suggestions?
>> I can live with any of them.  I tend to prefer one of the first two, but
>> it's not a major concern for me.  So if you or others have a clear

Re: [PATCH, rs6000] Gimple folding of splat_uX

2017-12-07 Thread Will Schmidt
On Thu, 2017-12-07 at 12:21 +0100, Richard Biener wrote:
> On Wed, Dec 6, 2017 at 5:36 PM, Will Schmidt  
> wrote:
> > Hi,
> > Add support for gimple folding of splat_u{8,16,32}.
> > Testcase coverage is primarily handled by existing tests
> > testsuite/gcc.target/powerpc/fold-vec-splat_*.c
> >
> > One new test added to verify we continue to receive
> > an 'invalid argument, must be a 5-bit immediate' error
> > when we try to splat a non-constant value.
> >
> > Regtests currently running across assorted power systems.
> > OK for trunk with successful results?
> >
> > Thanks
> > -Will
> >
> > [gcc]
> >
> > 2017-12-05  Will Schmidt  
> >
> > * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support 
> > for
> > early folding of splat_u{8,16,32}.
> >
> > [testsuite]
> >
> > 2017-12-05  Will Schmidt  
> >
> > * gcc.target/powerpc/fold-vec-splat-misc-invalid.c: New.
> >
> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > index 045a014..1470557 100644
> > --- a/gcc/config/rs6000/rs6000.c
> > +++ b/gcc/config/rs6000/rs6000.c
> > @@ -16614,10 +16614,33 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
> > *gsi)
> >  case VSX_BUILTIN_CMPLE_2DI:
> >  case VSX_BUILTIN_CMPLE_U2DI:
> >fold_compare_helper (gsi, LE_EXPR, stmt);
> >return true;
> >
> > +/* flavors of vec_splat_[us]{8,16,32}.  */
> > +case ALTIVEC_BUILTIN_VSPLTISB:
> > +case ALTIVEC_BUILTIN_VSPLTISH:
> > +case ALTIVEC_BUILTIN_VSPLTISW:
> > +  {
> > +arg0 = gimple_call_arg (stmt, 0);
> > +lhs = gimple_call_lhs (stmt);
> > +/* Only fold the vec_splat_*() if arg0 is constant.  */
> > +if ( TREE_CODE (arg0) != INTEGER_CST)
> > +  return false;
> 
> Is there a reason to not do this for non-constants?  (even not for
> float constants?)

The full regtest failed elsewhere in the suite if I didn't limit this to
just constants, which is part of why I also added the dg-error checking
test.  [Something in the area of gcc/config/rs6000/emmintrin.h
_mm_slli_epi16() which calls vec_splat_s16(__B), iirc.]  The net of that
seemed to be that I had folded something I shouldn't have.  I'll
incorporate the other suggested changes from you and Jakub and Segher,
and double-check the results.

> You should probably double-check there is a LHS, folding runs before DCE.
Anything without a LHS gets rejected early in
rs6000_gimple_fold_builtin() , so I should be covered there.  :-)


> > +tree splat_value = build_int_cst (TREE_TYPE (TREE_TYPE (lhs)),
> > +  TREE_INT_CST_LOW (arg0));
> > +vec *ctor_elts = NULL;
> > +unsigned int n_elts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs));
> > +for (unsigned int i=0; i < n_elts ; i++)
> > +  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, splat_value);
> > +tree splat_tree = build_constructor (TREE_TYPE (lhs), ctor_elts);
> 
> Just use
> 
>tree slat_tree = build_vector_from_val (splat_value);
> 
> that would also work for non-constants btw.

Alright, thanks. :-)

-Will

> 
> > +g = gimple_build_assign (lhs, splat_tree);
> > +gimple_set_location (g, gimple_location (stmt));
> > +gsi_replace (gsi, g, true);
> > +return true;
> > +  }
> > +
> >  default:
> >if (TARGET_DEBUG_BUILTIN)
> > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> >  fn_code, fn_name1, fn_name2);
> >break;
> > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c 
> > b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> > new file mode 100644
> > index 000..20f5b05
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-misc-invalid.c
> > @@ -0,0 +1,33 @@
> > +/* Verify that overloaded built-ins for vec_splat_s8 and vec_splat_s16
> > +   generate errors as expected when we attempt to use invalid inputs.  */
> > +
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target powerpc_vsx_ok } */
> > +/* { dg-options "-mvsx -O2" } */
> > +
> > +#include 
> > +
> > +vector signed short
> > +testss_1 (unsigned int ui)
> > +{
> > +  return vec_splat_s16 (ui);/* { dg-error "argument 1 must be a 5-bit 
> > signed literal" } */
> > +}
> > +
> > +vector unsigned short
> > +testss_2 (signed int si)
> > +{
> > +  return vec_splat_u16 (si);/* { dg-error "argument 1 must be a 5-bit 
> > signed literal" } */
> > +}
> > +
> > +vector signed char
> > +testsc_1 (unsigned int ui)
> > +{
> > +  return vec_splat_s8 (ui); /* { dg-error "argument 1 must be a 5-bit 
> > signed literal" } */
> > +}
> > +
> > +vector unsigned char
> > +testsc_2 (signed int si)
> > +{
> > +  return vec_splat_u8 (si);/* { dg-error "argument 1 must be a 5-bit 
> > signed literal" } */
> > +}
> > +
> >
> >
> 




[PATCH] rl78 smaxdi3 improvement

2017-12-07 Thread Sebastian Perta
Hello,

The following patch improves both the speed and code size for 64 bit signed
max for RL78:
it emits a library function call instead of emitting code for  the 64 bit
max for every single time.
The signed max function which was added in libgcc is hand written, so more
optimal than what GCC generates.

The change can easily be seen on the following test case:
long long my_smaxdi3(long long x, long long y){ 
return (x > y)? x : y;
}
I did not add this to the regression as it very simple and there are test
cases in the regression which test this, for example
gcc.c-torture/execute/2224-1.c and gcc.c-torture/execute/20021010-2.c
Regression test is OK, tested with the following command:
make -k check-gcc RUNTESTFLAGS=--target_board=rl78-sim

Please let me know if this is OK, Thank you!
Sebastian

Index: gcc/ChangeLog
===
--- gcc/ChangeLog   (revision 255467)
+++ gcc/ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2017-12-07  Sebastian Perta  
+
+   * config/rl78/rl78.md: New define_expand "smaxdi3".
+   
 2017-12-07  Michael Matz  
 
Add unroll and jam pass
Index: gcc/config/rl78/rl78.md
===
--- gcc/config/rl78/rl78.md (revision 255467)
+++ gcc/config/rl78/rl78.md (working copy)
@@ -718,3 +718,13 @@
   [(set_attr "valloc" "macax")
(set_attr "is_g13_muldiv_insn" "yes")]
 )
+
+(define_expand "smaxdi3"
+ [(set (match_operand:DI  0 "nonimmediate_operand" "")
+   (smax:DI (match_operand:DI 1 "general_operand"  "")
+(match_operand:DI2 "general_operand"  "")))
+   ]
+  "optimize_size"
+  "rl78_emit_libcall (\"__smaxdi3\", SMAX, DImode, DImode, 3, operands);
+   DONE;"
+)
Index: libgcc/ChangeLog
===
--- libgcc/ChangeLog(revision 255467)
+++ libgcc/ChangeLog(working copy)
@@ -1,3 +1,9 @@
+2017-12-07  Sebastian Perta  
+ 
+   * config/rl78/smaxdi3.S: New assembly file.
+   * config/rl78/t-rl78: Added smaxdi3.S to LIB2ADD.
+
+
 2017-11-30  Michael Meissner  
 
* config/rs6000/_mulkc3.c (__mulkc3): Add forward declaration.
Index: libgcc/config/rl78/t-rl78
===
--- libgcc/config/rl78/t-rl78   (revision 255467)
+++ libgcc/config/rl78/t-rl78   (working copy)
@@ -32,7 +32,8 @@
$(srcdir)/config/rl78/fpmath-sf.S \
$(srcdir)/config/rl78/cmpsi2.S \
$(srcdir)/config/rl78/adddi3.S \
-   $(srcdir)/config/rl78/subdi3.S
+   $(srcdir)/config/rl78/subdi3.S \
+   $(srcdir)/config/rl78/smaxdi3.S
 
 LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \
   _popcounthi2 _popcountsi2 \
Index: libgcc/config/rl78/smaxdi3.S
===
--- libgcc/config/rl78/smaxdi3.S(nonexistent)
+++ libgcc/config/rl78/smaxdi3.S(working copy)
@@ -0,0 +1,76 @@
+;   Copyright (C) 2017 Free Software Foundation, Inc.
+;   Contributed by Sebastian Perta.
+; 
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+; 
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+; 
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+; .
+
+
+#include "vregs.h"
+
+.text
+
+START_FUNC ___smaxdi3
+
+; copy first argument/operand to the output registers
+movw   ax, [sp+4]
+movw   r8, ax
+movw   ax, [sp+6]
+movw   r10, ax
+movw   ax, [sp+8]
+movw   r12, ax
+movw   ax, [sp+10]
+movw   r14, ax
+
+; use 16-bit compares from the most significant words downto the least
significant ones
+movw   ax, [sp+18]
+cmpw   ax, r14
+xor1   CY, a.7   ; first compare accounts for the
+xor1   CY, r15.7 ; sign bits of the two operands
+bh $.L1
+bnz$.L2
+
+movw   ax, [sp+16]
+cmpw   ax, r12
+bh $.L1
+bnz$.L2
+
+movw   ax, [sp+14]
+cmpw   ax, r10
+bh $.L1
+bnz$.L2
+
+movw   ax, [sp+12]
+cmpw   ax, r8
+bh $.L1
+ret
+
+.L1:
+; copy second argument/operand to the output registers
+movw   ax, [sp+12]
+movw   r8, ax
+movw   ax, [sp+14]
+movw   r1

Re: [Patch][aarch64] Use IFUNCs to enable LSE instructions in libatomic on aarch64

2017-12-07 Thread Steve Ellcey
On Thu, 2017-12-07 at 09:56 +, James Greenhalgh wrote:
> 
> One obvious thing I missed in the review is that this change will break
> bootstrap on systems with older assemblers. Practically, that's those of
> us who are holding out on Ubuntu 14.04. -march=armv8-a+lse would go back
> a little further, so would be preferable, but even this won't get bootstrap
> back on older systems.
> 
> Is there anything you can do to check for assembler support before turning
> on IFUNCS for libatomic, or am I best to just start configuring with
> --disable-gnu-indirect-function ?
> 
> Thanks,
> James

It should be possible to check for assembler support.  I will work on a
patch to do that.

Steve Ellcey
sell...@cavium.com


[C++ RFC PATCH] Fix ICE with late attributes in templates (PR c++/83300)

2017-12-07 Thread Jakub Jelinek
Hi!

save_template_attributes ignored flags, when ATTR_FLAG_TYPE_IN_PLACE
wasn't set on a type, it would happily attach the attributes to some
existing type (in this case to integer_type_node).

My first approach was to just call build_type_attribute_variant, but
that ICEs on g++.dg/cpp0x/alias-decl-59.C, because there *decl_p is
UNDERLYING_TYPE, which the generic type_hash_canon
build_type_attribute_variant calls doesn't like.
This patch just creates a new variant type if *decl_p is dependent,
it passes bootstrap/regtest, but I'm not really sure if we need any
kind of FE type hashing for such types (when pt.c will handle it with
!processing_template_decl it will be built using
build_type_attribute_variant and afterwards we'll have the types hashed).
So, is this enough, or do we need to do something else (and what)?

2017-12-07  Jakub Jelinek  

PR c++/83300
* decl2.c (save_template_attributes): Add flags argument, if
not ATTR_FLAG_TYPE_IN_PLACE, *decl_p is a type and we want to
modify TYPE_ATTRIBUTES, add them on type attribute variant.

* g++.dg/ext/vector33.C: New test.

--- gcc/cp/decl2.c.jj   2017-12-06 23:48:08.205147975 +0100
+++ gcc/cp/decl2.c  2017-12-07 09:39:18.539996630 +0100
@@ -1244,7 +1244,7 @@ splice_template_attributes (tree *attr_p
DECL_P.  */
 
 static void
-save_template_attributes (tree *attr_p, tree *decl_p)
+save_template_attributes (tree *attr_p, tree *decl_p, int flags)
 {
   tree *q;
 
@@ -1265,7 +1265,20 @@ save_template_attributes (tree *attr_p,
   /* Merge the late attributes at the beginning with the attribute
  list.  */
   late_attrs = merge_attributes (late_attrs, *q);
-  *q = late_attrs;
+  if (*q != late_attrs
+  && !DECL_P (*decl_p)
+  && !(flags & ATTR_FLAG_TYPE_IN_PLACE))
+{
+  if (!dependent_type_p (*decl_p))
+   *decl_p = cp_build_type_attribute_variant (*decl_p, late_attrs);
+  else
+   {
+ *decl_p = build_variant_type_copy (*decl_p);
+ TYPE_ATTRIBUTES (*decl_p) = late_attrs;
+   }
+}
+  else
+*q = late_attrs;
 
   if (!DECL_P (*decl_p) && *decl_p == TYPE_MAIN_VARIANT (*decl_p))
 {
@@ -1466,7 +1479,7 @@ cplus_decl_attributes (tree *decl, tree
   if (check_for_bare_parameter_packs (attributes))
return;
 
-  save_template_attributes (&attributes, decl);
+  save_template_attributes (&attributes, decl, flags);
 }
 
   cp_check_const_attributes (attributes);
--- gcc/testsuite/g++.dg/ext/vector33.C.jj  2017-12-07 09:10:24.227635836 
+0100
+++ gcc/testsuite/g++.dg/ext/vector33.C 2017-12-07 09:10:24.227635836 +0100
@@ -0,0 +1,10 @@
+// PR c++/83300
+// { dg-do compile { target c++11 } }
+
+template
+using T = int __attribute__((vector_size (sizeof(int) * N)));
+
+void
+f (T<4>)
+{
+}

Jakub


[PATCH] Fix i?86/x86_64 pre-SSE4.1 rint expansion (PR target/81906)

2017-12-07 Thread Jakub Jelinek
Hi!

As mentioned in the PR, the code emitted by ix86_expand_rint
doesn't work with rounding to +/- infinity.
This patch adjusts it if flag_rounding_math to do something that works
well even for that case (should be just one insn longer).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-12-07  Joseph Myers  
Alexander Monakov  
Jakub Jelinek  

PR target/81906
* config/i386/i386.c (ix86_expand_rint): Handle flag_rounding_math.

* gcc.target/i386/pr81906.c: New test.

--- gcc/config/i386/i386.c.jj   2017-12-05 10:15:31.0 +0100
+++ gcc/config/i386/i386.c  2017-12-07 11:58:15.159881741 +0100
@@ -44255,8 +44255,7 @@ ix86_expand_lfloorceil (rtx op0, rtx op1
   emit_move_insn (op0, ireg);
 }
 
-/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
-   result in OPERAND0.  */
+/* Expand rint rounding OPERAND1 and storing the result in OPERAND0.  */
 void
 ix86_expand_rint (rtx operand0, rtx operand1)
 {
@@ -44264,11 +44263,17 @@ ix86_expand_rint (rtx operand0, rtx oper
xa = fabs (operand1);
 if (!isless (xa, 2**52))
  return operand1;
-xa = xa + 2**52 - 2**52;
+two52 = 2**52;
+if (flag_rounding_math)
+ {
+   two52 = copysign (two52, operand1);
+   xa = operand1;
+ }
+xa = xa + two52 - two52;
 return copysign (xa, operand1);
*/
   machine_mode mode = GET_MODE (operand0);
-  rtx res, xa, TWO52, mask;
+  rtx res, xa, TWO52, two52, mask;
   rtx_code_label *label;
 
   res = gen_reg_rtx (mode);
@@ -44281,8 +44286,16 @@ ix86_expand_rint (rtx operand0, rtx oper
   TWO52 = ix86_gen_TWO52 (mode);
   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
 
-  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
-  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+  two52 = TWO52;
+  if (flag_rounding_math)
+{
+  two52 = gen_reg_rtx (mode);
+  ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
+  xa = res;
+}
+
+  xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
 
   ix86_sse_copysign_to_positive (res, xa, res, mask);
 
--- gcc/testsuite/gcc.target/i386/pr81906.c.jj  2017-12-07 11:38:06.730812658 
+0100
+++ gcc/testsuite/gcc.target/i386/pr81906.c 2017-12-07 11:38:14.488716544 
+0100
@@ -0,0 +1,37 @@
+/* PR target/81906 */
+/* { dg-do run { target *-*-linux* *-*-gnu* } }
+/* { dg-options "-O2 -frounding-math" } */
+
+#include 
+
+int
+main ()
+{
+  #define N 12
+  double a[N] = { 2.0, 2.25, 2.5, 2.75, 3.5, -2.0, -2.25, -2.5, -2.75, -3.5, 
0x2.0p53, -0x2.0p53 };
+  double b[N], c[N], d[N], e[N];
+  double be[N] = { 2.0, 2.0, 2.0, 3.0, 4.0, -2.0, -2.0, -2.0, -3.0, -4.0, 
0x2.0p53, -0x2.0p53 };
+  double ce[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -3.0, -3.0, -3.0, -4.0, 
0x2.0p53, -0x2.0p53 };
+  double de[N] = { 2.0, 3.0, 3.0, 3.0, 4.0, -2.0, -2.0, -2.0, -2.0, -3.0, 
0x2.0p53, -0x2.0p53 };
+  double ee[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -2.0, -2.0, -2.0, -3.0, 
0x2.0p53, -0x2.0p53 };
+  asm volatile ("" : : "g" (a), "g" (be), "g" (ce), "g" (de), "g" (ee) : 
"memory");
+
+  int i;
+  fesetround (FE_TONEAREST);
+  for (i = 0; i < N; ++i)
+b[i] = __builtin_rint (a[i]);
+  fesetround (FE_DOWNWARD);
+  for (i = 0; i < N; ++i)
+c[i] = __builtin_rint (a[i]);
+  fesetround (FE_UPWARD);
+  for (i = 0; i < N; ++i)
+d[i] = __builtin_rint (a[i]);
+  fesetround (FE_TOWARDZERO);
+  for (i = 0; i < N; ++i)
+e[i] = __builtin_rint (a[i]);
+  fesetround (FE_TONEAREST);
+  for (i = 0; i < N; ++i)
+if (b[i] != be[i] || c[i] != ce[i] || d[i] != de[i] || e[i] != ee[i])
+  __builtin_abort ();
+  return 0;
+}

Jakub


[PATCH] Fix up tree-ssa/strn{cat,cpy-2}.c (PR tree-optimization/83075)

2017-12-07 Thread Jakub Jelinek
On Wed, Dec 06, 2017 at 05:30:53PM +0100, Jakub Jelinek wrote:
> On Wed, Dec 06, 2017 at 09:20:15AM -0700, Martin Sebor wrote:
> > Attached is a patch with the comment updated/simplified.
> > The tests do the job they need to do today so I just removed
> > the useless attribute but otherwise left them unchanged.  If
> > you would like to enhance them in some way please feel free.
> 
> Ok for trunk, with a minor nit.  I'll tweak the tests incrementally
> when it is in.

So here is the fix for those testcases.

They didn't test what they meant to test, because they didn't FAIL
without the patch.  That is because the bug was that the -W* option
affected code generation, so with -O2 -Wno-stringop-overflow it didn't
trigger it.
I've changed the tests to test both in a separate noipa function where
it doesn't know about the aliasing and string lengths from the caller,
in that case it does more verifications, including the content of the
whole buffer, and the individual values of the lengths,
and what you did before.

Regtested on x86_64-linux and i686-linux, verified that with the
r255446 tree-ssa-strlen.c change reverted it FAILs.

Ok for trunk?

2017-12-07  Jakub Jelinek  

PR tree-optimization/83075
* gcc.dg/tree-ssa/strncpy-2.c: Use size_t instead of unsigned, add
separate function with noipa attribute to also verify behavior when
optimizers don't know the sizes and aliasing, verify resulting sizes
and array content.  Add -Wstringop-overflow to dg-options.
* gcc.dg/tree-ssa/strncat.c: Likewise.

--- gcc/testsuite/gcc.dg/tree-ssa/strncpy-2.c.jj2017-12-06 
20:11:54.0 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/strncpy-2.c   2017-12-07 13:31:32.719722416 
+0100
@@ -1,19 +1,35 @@
-/* PR tree-optimization/83075 - Invalid strncpy optimization
-   { dg-do run }
-   { dg-options "-O2 -Wno-stringop-overflow" } */
+/* PR tree-optimization/83075 - Invalid strncpy optimization */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wstringop-overflow" } */
 
-int main (void)
+typedef __SIZE_TYPE__ size_t;
+
+__attribute__((noipa)) size_t
+foo (char *p, char *q, size_t *r)
 {
-  char a[8] = "";
+  size_t n0 = __builtin_strlen (p);
+  __builtin_strncpy (q, p, n0);/* { dg-warning "specified 
bound depends on the length" } */
+  size_t n1 = __builtin_strlen (p);
+  *r = n0;
+  return n1;
+}
 
+int
+main ()
+{
+  char a[8] = "";
   __builtin_strcpy (a, "123");
-
-  unsigned n0 = __builtin_strlen (a);
-
-  __builtin_strncpy (a + 3, a, n0);
-
-  unsigned n1 = __builtin_strlen (a);
-
+  size_t n0 = __builtin_strlen (a);
+  __builtin_strncpy (a + 3, a, n0);/* { dg-warning "specified bound 
depends on the length" } */
+  size_t n1 = __builtin_strlen (a);
   if (n1 == n0)
 __builtin_abort ();
+  a[6] = '7';
+  __builtin_strcpy (a, "456");
+  size_t n2;
+  if (foo (a, a + 3, &n2) != 7 || n2 != 3)
+__builtin_abort ();
+  if (__builtin_memcmp (a, "4564567", sizeof "4564567"))
+__builtin_abort ();
+  return 0;
 }
--- gcc/testsuite/gcc.dg/tree-ssa/strncat.c.jj  2017-12-06 20:11:54.0 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/strncat.c 2017-12-07 13:31:09.568008365 
+0100
@@ -1,19 +1,35 @@
-/* PR tree-optimization/83075 - Invalid strncpy optimization
-   { dg-do run }
-   { dg-options "-O2 -Wno-stringop-overflow" } */
+/* PR tree-optimization/83075 - Invalid strncpy optimization */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wstringop-overflow" } */
 
-int main (void)
+typedef __SIZE_TYPE__ size_t;
+
+__attribute__((noipa)) size_t
+foo (char *p, char *q, size_t *r)
 {
-  char a[8] = "";
+  size_t n0 = __builtin_strlen (p);
+  __builtin_strncat (q, p, n0);/* { dg-warning "specified 
bound depends on the length" } */
+  size_t n1 = __builtin_strlen (p);
+  *r = n0;
+  return n1;
+}
 
+int
+main ()
+{
+  char a[8] = "";
   __builtin_strcpy (a, "123");
-
-  unsigned n0 = __builtin_strlen (a);
-
-  __builtin_strncat (a + 3, a, n0);
-
-  unsigned n1 = __builtin_strlen (a);
-
+  size_t n0 = __builtin_strlen (a);
+  __builtin_strncat (a + 3, a, n0);/* { dg-warning "specified bound 
depends on the length" } */
+  size_t n1 = __builtin_strlen (a);
   if (n1 == n0)
 __builtin_abort ();
+  a[6] = '7';
+  __builtin_strcpy (a, "456");
+  size_t n2;
+  if (foo (a, a + 3, &n2) != 6 || n2 != 3)
+__builtin_abort ();
+  if (__builtin_memcmp (a, "456456\0", sizeof "456456\0"))
+__builtin_abort ();
+  return 0;
 }


Jakub


[PATCH, rs6000] Add missing builtin functionality and tests

2017-12-07 Thread Carl Love
GCC Maintainers:

The following patch adds support for missing builtin instances
documented in the ABI specification.  The patch includes test cases for
missing builtins.  

The patch has been run on:

  powerpc64le-unknown-linux-gnu (Power 8 LE)
  powerpc64le-unknown-linux-gnu (Power 8 BE)
  powerpc64le-unknown-linux-gnu (Power 9 LE)

without regressions.  

Please let me know if the following patch is acceptable.  Thanks.

  Carl Love

-

gcc/ChangeLog:

2017-12-07  Carl Love  

* config/rs6000/altivec.h (vec_extract_fp32_from_short[h|l]): Add 
#defines.
* config/rs6000/rs6000-builtin.def (VSLDOI_2DI): Add macro expansion.
* config/rs6000/rs6000-c.c (ALTIVEC_BUILTIN_VEC_UNPACK[H|L],
ALTIVEC_BUILTIN_VEC_[AND|SLL|SR|SRO}): Add expansions.
(ALTIVEC_BUILTIN_VEC_SLL): Add expansions.
* doc/extend.texi: Add documentation for the added builtins.

gcc/testsuite/ChangeLog:

2017-12-07  Carl Love  
* gcc.target/powerpc/altivec-7.c: Renamed altivec-7.h.
* gcc.target/powerpc/altivec-7.h (main): Add testcases for vec_unpackl.
Add dg-final tests for the instructions generated.
* gcc.target/powerpc/altivec-7-be.c: New file to test on big endian.
* gcc.target/powerpc/altivec-7-le.c: New file to test on little endian.
* gcc.target/powerpc/altivec-13.c (foo): Add vec_sld, vec_srl,
 vec_sro testcases. Add dg-final tests for the instructions generated.
* gcc.target/powerpc/builtins-3-p8.c (test_vsi_packs_[vui|vsi
vssi|vusi], test_vsi_packsu-[vssi|vusi|vsll|vull|vsi|vui]): Add
testcases. Add dg-final tests for new instructions.
* gcc.target/powerpc/p8vector-builtin-2.c (v[b|bs|u]char_eq,
v[b|s|i|u]int_eq, vbool_eq, v[b|s|u]int_ne, vbool_ne,
vsign_ne, vuns_ne, vbshort_ne): Add tests. Add dg-final instruction
tests.
* gcc.target/powerpc/vsx-vector-6.c: Renamed vsx-vector-6.h.
* gcc.target/powerpc/vsx-vector-6.h (vec_andc,vec_nmsub, vec_nmadd,
vec_or, vec_nor, vec_andc, vec_or, vec_andc, vec_msums): Add tests.
Add dg-final tests for the generated instructions.
* gcc.target/powerpc/builtins-3.c 
(test_sll_v[sc|uc|si]_v[sc|uc|si]_v[s|uc]):
Add tests.
---
 gcc/config/rs6000/altivec.h|   2 +
 gcc/config/rs6000/rs6000-builtin.def   |   1 +
 gcc/config/rs6000/rs6000-c.c   |  38 +
 gcc/doc/extend.texi|  50 +-
 gcc/testsuite/gcc.target/powerpc/altivec-13.c  |  69 +++-
 gcc/testsuite/gcc.target/powerpc/altivec-7-be.c|  33 
 gcc/testsuite/gcc.target/powerpc/altivec-7-le.c|  33 
 .../powerpc/{altivec-7.c => altivec-7.h}   |   9 +-
 gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c   |  77 +
 gcc/testsuite/gcc.target/powerpc/builtins-3.c  | 179 -
 .../gcc.target/powerpc/p8vector-builtin-2.c|  83 +-
 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.c |  32 
 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c |  32 
 .../powerpc/{vsx-vector-6.c => vsx-vector-6.h} |  88 +-
 14 files changed, 708 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-7-be.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-7-le.c
 rename gcc/testsuite/gcc.target/powerpc/{altivec-7.c => altivec-7.h} (84%)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-le.c
 rename gcc/testsuite/gcc.target/powerpc/{vsx-vector-6.c => vsx-vector-6.h} 
(50%)

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 068dfef2e..b58afd851 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -458,6 +458,8 @@
 
 #define vec_extract_fp_from_shorth __builtin_vec_vextract_fp_from_shorth
 #define vec_extract_fp_from_shortl __builtin_vec_vextract_fp_from_shortl
+#define vec_extract_fp32_from_shorth __builtin_vec_vextract_fp_from_shorth
+#define vec_extract_fp32_from_shortl __builtin_vec_vextract_fp_from_shortl
 
 #define scalar_extract_exp __builtin_vec_scalar_extract_exp
 #define scalar_extract_sig __builtin_vec_scalar_extract_sig
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index cfb6e55ed..a563a5d96 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -999,6 +999,7 @@ BU_ALTIVEC_3 (VSEL_1TI_UNS,   "vsel_1ti_uns",   CONST,  
vector_select_v1ti_uns)
 BU_ALTIVEC_3 (VSLDOI_16QI,"vsldoi_16qi",CONST, 
altivec_vsldoi_v16qi)
 BU_ALTIVEC_3 (VSLDOI_8HI, "vsldoi_8hi", CONST, 
altivec_vsldoi_v8hi)
 BU_ALTIVEC_3 (VSLDOI_4SI, "vsldoi_4si", CONST, 
altivec_vsldoi_v4si)
+BU_ALTIVEC_3 (VSLDOI_2DI, "

[PATCH] Further improvements for the (T)(P+A)-(T)(P+B) folding (PR sanitizer/81281)

2017-12-07 Thread Jakub Jelinek
Hi!

When committing the previous PR81281 patch, I've removed all the @@0 cases
on plus:c, used @0 instead, to make sure we don't regress.

This patch readds those where possible.  For the cases where there is
just P and A, it was mostly a matter of @@0 and convert? instead of convert
plus using type from @1 instead of @0, though if @0 is INTEGER_CST, what we
usually end up with is a (plus (convert (plus @1 @0) @2) where @2 negated
is equal to @0, so the patch adds a simplification for that too.

For the case with P, A and B, the patch limits it to the case where either
both A and B are narrower or both are wider.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-12-07  Jakub Jelinek  

PR sanitizer/81281
* match.pd ((T)(P + A) - (T)P -> (T) A): Use @@0 instead of @0 and
convert? on @0 instead of convert.  Check type of @1, not @0.
Add a simplify for (T)(P + A) + Q where -Q is equal to P.
((T)P - (T)(P + A) -> -(T) A): Use @@0 instead of @0 and
convert? on @0 instead of convert.  Check type of @1, not @0.
((T)(P + A) - (T)(P + B) -> (T)A - (T)B): Use @@0 instead of @0,
only optimize if either both @1 and @2 types are narrower
precision, or both are wider or equal precision, and in the former
case only if both have undefined overflow.

* gcc.dg/pr81281-3.c: New test.

--- gcc/match.pd.jj 2017-12-07 14:00:51.083048186 +0100
+++ gcc/match.pd2017-12-07 15:17:49.132784931 +0100
@@ -1784,8 +1784,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
   /* (T)(P + A) - (T)P -> (T) A */
   (simplify
-   (minus (convert (plus:c @0 @1))
-(convert @0))
+   (minus (convert (plus:c @@0 @1))
+(convert? @0))
(if (element_precision (type) <= element_precision (TREE_TYPE (@1))
/* For integer types, if A has a smaller type
   than T the result depends on the possible
@@ -1794,10 +1794,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   However, if an overflow in P + A would cause
   undefined behavior, we can assume that there
   is no overflow.  */
-   || (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0
+   || (INTEGRAL_TYPE_P (TREE_TYPE (@1))
+   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1
 (convert @1)))
   (simplify
+   (plus (convert (plus @1 INTEGER_CST@0)) INTEGER_CST@2)
+   (with { bool overflow;
+  wide_int w = wi::neg (wi::to_wide (@2), &overflow); }
+(if (wi::to_widest (@0) == widest_int::from (w, TYPE_SIGN (TREE_TYPE (@2)))
+&& (!overflow
+|| (INTEGRAL_TYPE_P (TREE_TYPE (@2))
+&& TYPE_UNSIGNED (TREE_TYPE (@2
+&& (element_precision (type) <= element_precision (TREE_TYPE (@1))
+/* For integer types, if A has a smaller type
+   than T the result depends on the possible
+   overflow in P + A.
+   E.g. T=size_t, A=(unsigned)429497295, P>0.
+   However, if an overflow in P + A would cause
+   undefined behavior, we can assume that there
+   is no overflow.  */
+|| (INTEGRAL_TYPE_P (TREE_TYPE (@1))
+&& TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1)
+ (convert @1
+  (simplify
(minus (convert (pointer_plus @@0 @1))
 (convert @0))
(if (element_precision (type) <= element_precision (TREE_TYPE (@1))
@@ -1818,8 +1837,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
   /* (T)P - (T)(P + A) -> -(T) A */
   (simplify
-   (minus (convert @0)
-(convert (plus:c @0 @1)))
+   (minus (convert? @0)
+(convert (plus:c @@0 @1)))
(if (INTEGRAL_TYPE_P (type)
&& TYPE_OVERFLOW_UNDEFINED (type)
&& element_precision (type) <= element_precision (TREE_TYPE (@1)))
@@ -1833,8 +1852,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
However, if an overflow in P + A would cause
undefined behavior, we can assume that there
is no overflow.  */
-|| (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-&& TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0
+|| (INTEGRAL_TYPE_P (TREE_TYPE (@1))
+&& TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1
  (negate (convert @1)
   (simplify
(minus (convert @0)
@@ -1862,23 +1881,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
   /* (T)(P + A) - (T)(P + B) -> (T)A - (T)B */
   (simplify
-   (minus (convert (plus:c @0 @1))
+   (minus (convert (plus:c @@0 @1))
 (convert (plus:c @0 @2)))
(if (INTEGRAL_TYPE_P (type)
&& TYPE_OVERFLOW_UNDEFINED (type)
-   && element_precision (type) <= element_precision (TREE_TYPE (@1)))
+   && element_precision (type) <= element_precision (TREE_TYPE (@1))
+   && element_precision (type) <= element_precision (TREE_TYPE (@2)))
 (with { tree utype = unsigned_type_for (type); }
  (convert (minus (convert:utype @1) (convert:utype @2
-(if (element_precision (type) <= element_pre

[committed] Tweak POINTER_DIFF_EXPR verification (PR middle-end/83164)

2017-12-07 Thread Jakub Jelinek
Hi!

As mentioned in the PR, we need to treat POINTER_DIFF_EXPR similarly
how we treat comparisons with pointer operands (i.e. that either
there is useless conversion from one type to the other, or from the
other to the one, or the pointer type modes are the same), because
we have exceptions for pointers to void and can propagate there
various pointer types.
But, if we accept all cases where TYPE_MODE is the same, then we don't
need to call useless_type_conversion_p at all, as for different TYPE_MODEs
it will always return false.

Bootstrapped/regtested on x86_64-linux and i686-linux, preapproved by
Richard in the PR, committed to trunk.

2017-12-07  Jakub Jelinek  

PR middle-end/83164
* tree-cfg.c (verify_gimple_assign_binary): Don't require
types_compatible_p, just that TYPE_MODE is the same.

* gcc.c-torture/compile/pr83164.c: New test.

--- gcc/tree-cfg.c.jj   2017-12-06 09:16:12.0 +0100
+++ gcc/tree-cfg.c  2017-12-07 13:00:33.641456189 +0100
@@ -4007,7 +4007,9 @@ verify_gimple_assign_binary (gassign *st
   {
if (!POINTER_TYPE_P (rhs1_type)
|| !POINTER_TYPE_P (rhs2_type)
-   || !types_compatible_p (rhs1_type, rhs2_type)
+   /* Because we special-case pointers to void we allow difference
+  of arbitrary pointers with the same mode.  */
+   || TYPE_MODE (rhs1_type) != TYPE_MODE (rhs2_type)
|| TREE_CODE (lhs_type) != INTEGER_TYPE
|| TYPE_UNSIGNED (lhs_type)
|| TYPE_PRECISION (lhs_type) != TYPE_PRECISION (rhs1_type))
--- gcc/testsuite/gcc.c-torture/compile/pr83164.c.jj2017-12-07 
12:54:01.911306184 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr83164.c   2017-12-07 
12:53:41.0 +0100
@@ -0,0 +1,7 @@
+/* PR middle-end/83164 */
+
+__PTRDIFF_TYPE__
+foo (void)
+{
+  return (char *) foo - (char *) 0x1230;
+}

Jakub


Re: [PATCH] Fix i?86/x86_64 pre-SSE4.1 rint expansion (PR target/81906)

2017-12-07 Thread Uros Bizjak
On Thu, Dec 7, 2017 at 5:48 PM, Jakub Jelinek  wrote:
> Hi!
>
> As mentioned in the PR, the code emitted by ix86_expand_rint
> doesn't work with rounding to +/- infinity.
> This patch adjusts it if flag_rounding_math to do something that works
> well even for that case (should be just one insn longer).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-12-07  Joseph Myers  
> Alexander Monakov  
> Jakub Jelinek  
>
> PR target/81906
> * config/i386/i386.c (ix86_expand_rint): Handle flag_rounding_math.
>
> * gcc.target/i386/pr81906.c: New test.

OK for trunk and release branches.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2017-12-05 10:15:31.0 +0100
> +++ gcc/config/i386/i386.c  2017-12-07 11:58:15.159881741 +0100
> @@ -44255,8 +44255,7 @@ ix86_expand_lfloorceil (rtx op0, rtx op1
>emit_move_insn (op0, ireg);
>  }
>
> -/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
> -   result in OPERAND0.  */
> +/* Expand rint rounding OPERAND1 and storing the result in OPERAND0.  */
>  void
>  ix86_expand_rint (rtx operand0, rtx operand1)
>  {
> @@ -44264,11 +44263,17 @@ ix86_expand_rint (rtx operand0, rtx oper
> xa = fabs (operand1);
>  if (!isless (xa, 2**52))
>   return operand1;
> -xa = xa + 2**52 - 2**52;
> +two52 = 2**52;
> +if (flag_rounding_math)
> + {
> +   two52 = copysign (two52, operand1);
> +   xa = operand1;
> + }
> +xa = xa + two52 - two52;
>  return copysign (xa, operand1);
> */
>machine_mode mode = GET_MODE (operand0);
> -  rtx res, xa, TWO52, mask;
> +  rtx res, xa, TWO52, two52, mask;
>rtx_code_label *label;
>
>res = gen_reg_rtx (mode);
> @@ -44281,8 +44286,16 @@ ix86_expand_rint (rtx operand0, rtx oper
>TWO52 = ix86_gen_TWO52 (mode);
>label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
>
> -  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, 
> OPTAB_DIRECT);
> -  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
> +  two52 = TWO52;
> +  if (flag_rounding_math)
> +{
> +  two52 = gen_reg_rtx (mode);
> +  ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
> +  xa = res;
> +}
> +
> +  xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, 
> OPTAB_DIRECT);
> +  xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
>
>ix86_sse_copysign_to_positive (res, xa, res, mask);
>
> --- gcc/testsuite/gcc.target/i386/pr81906.c.jj  2017-12-07 11:38:06.730812658 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr81906.c 2017-12-07 11:38:14.488716544 
> +0100
> @@ -0,0 +1,37 @@
> +/* PR target/81906 */
> +/* { dg-do run { target *-*-linux* *-*-gnu* } }
> +/* { dg-options "-O2 -frounding-math" } */
> +
> +#include 
> +
> +int
> +main ()
> +{
> +  #define N 12
> +  double a[N] = { 2.0, 2.25, 2.5, 2.75, 3.5, -2.0, -2.25, -2.5, -2.75, -3.5, 
> 0x2.0p53, -0x2.0p53 };
> +  double b[N], c[N], d[N], e[N];
> +  double be[N] = { 2.0, 2.0, 2.0, 3.0, 4.0, -2.0, -2.0, -2.0, -3.0, -4.0, 
> 0x2.0p53, -0x2.0p53 };
> +  double ce[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -3.0, -3.0, -3.0, -4.0, 
> 0x2.0p53, -0x2.0p53 };
> +  double de[N] = { 2.0, 3.0, 3.0, 3.0, 4.0, -2.0, -2.0, -2.0, -2.0, -3.0, 
> 0x2.0p53, -0x2.0p53 };
> +  double ee[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -2.0, -2.0, -2.0, -3.0, 
> 0x2.0p53, -0x2.0p53 };
> +  asm volatile ("" : : "g" (a), "g" (be), "g" (ce), "g" (de), "g" (ee) : 
> "memory");
> +
> +  int i;
> +  fesetround (FE_TONEAREST);
> +  for (i = 0; i < N; ++i)
> +b[i] = __builtin_rint (a[i]);
> +  fesetround (FE_DOWNWARD);
> +  for (i = 0; i < N; ++i)
> +c[i] = __builtin_rint (a[i]);
> +  fesetround (FE_UPWARD);
> +  for (i = 0; i < N; ++i)
> +d[i] = __builtin_rint (a[i]);
> +  fesetround (FE_TOWARDZERO);
> +  for (i = 0; i < N; ++i)
> +e[i] = __builtin_rint (a[i]);
> +  fesetround (FE_TONEAREST);
> +  for (i = 0; i < N; ++i)
> +if (b[i] != be[i] || c[i] != ce[i] || d[i] != de[i] || e[i] != ee[i])
> +  __builtin_abort ();
> +  return 0;
> +}
>
> Jakub


Re: [PATCH v2] Ability to remap file names in __FILE__, etc (PR other/70268)

2017-12-07 Thread Martin Sebor

On 12/07/2017 05:29 AM, Boris Kolpackov wrote:

Thanks for the review. Second revision of the patch attached (also
rebased on the current trunk). Issues that are not commented on
below have been resolved as suggested.


David Malcolm  writes:


To my naive eyes this seems like a useful addition, but I'm hoping
someone with more knowledge of the standards around the preprocessor
can comment.


This would definitely be a non-standard extension to any preprocessor
standard there might be. Since it's not enabled by default I don't see
any issues though.


FWIW, I agree, though the final word is of course Joseph and
Marek's as the C front end maintainers.


+  error ("invalid argument %qs to %s", arg, opt);


I think both of these should be %qs, so that the option is quoted (the
old code in final.c didn't do that, but I think it should have).


I personally disagree since an option like -ffile-prefix-map is not
easy to confuse with a language word. But I defer to you judgment.


We have been making an effort to add quoting around options
in diagnostics.  There are still many unquoted options left
but the (so far only informally agreed upon, AFAIK) goal is
to eventually quote them all.

I think adding options to the list in the Quoting section on
the DiagnosticsGuidelines Wiki would be appropriate.  Do you
agree, David?


diff --git a/gcc/testsuite/c-c++-common/cpp/ffile-prefix-map.c
b/gcc/testsuite/c-c++-common/cpp/ffile-prefix-map.c
new file mode 100644
index 000..cf14de84a0d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/ffile-prefix-map.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-ffile-prefix-map==FILE-PREFIX" } */


What's up with this "=="? (as opposed to "=").


Since I cannot predict the actual path, I am remapping empty prefix
to FILE-PREFIX which effectively adds FILE-PREFIX to any path.


IIUC, it's the same as "-ffile-prefix-map=/=/FILE-PREFIX", correct?
That seems useful to me, but I'm not sure it's as useful as it
could or might need to be in some cases.  E.g., if sources for
nightly builds are downloaded into a temporary directory with some
random name, this prepends a known prefix to the directory but it
doesn't make the whole path-name determinate.  Do you think it
would it be useful to add support for globbing, as a separate
enhancement?)


--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1175,7 +1175,11 @@ Common RejectNegative Joined 
Var(common_deferred_options) Defer

 fdebug-prefix-map=
 Common Joined RejectNegative Var(common_deferred_options) Defer
-Map one directory name to another in debug information.
+-fdebug-prefix-map== Map one directory name to another in debug 
information.
+
+ffile-prefix-map=
+Common Joined RejectNegative Var(common_deferred_options) Defer


AFAIK, RejectNegative applies to options that take integer arguments.
This one takes a string so it probably doesn't belong here.


--- a/gcc/doc/cppopts.texi
+++ b/gcc/doc/cppopts.texi
@@ -287,6 +287,16 @@ When this option is given no argument, the default 
parameter value is

 Note that @code{-ftrack-macro-expansion=2} is activated by default.

+@item -fmacro-prefix-map=@var{old}=@var{new}
+@opindex fmacro-prefix-map
+When preprocessing files in directory @file{@var{old}}, expand the
+@code{__FILE__} and @code{__BASE_FILE__} macros as in @file{@var{new}}
+instead.


This could be just me, but to my eyes this sentence can be read
one of two ways: (1) files residing in directory old, or (2)
the current working directory is old while preprocessing some
files.  To make it 100% clear which is meant, I would find it
more accurate if it were phrased like this instead:

  When preprocessing files residing in directory @file{@var{old}},
  expand the @code{__FILE__} and @code{__BASE_FILE__} macros as if
  the files resided in directory @file{@var{new}} instead.

Ditto for the other options.  (I assume you chose the wording to
be consistent with the existing -fdebug-prefix-map=old=new option.
I'd find the documentation for that option clearer if it were
reworded as well.)

Martin



patch to fix PR83252 and PR80818

2017-12-07 Thread Vladimir Makarov

  The following patch fixes PR83252 and PR80818.

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83252

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80818

The patch was successfully tested and bootstrapped on ppc64 and x86_64.

Committed as rev. 255471.

Jakub, the patch does not include a test case for PR83252.  Could you 
add your test to the trunk.


Index: ChangeLog
===
--- ChangeLog	(revision 255470)
+++ ChangeLog	(working copy)
@@ -1,3 +1,12 @@
+2017-12-07  Vladimir Makarov  
+
+	PR target/83252
+	PR rtl-optimization/80818
+	* lra.c (add_regs_to_insn_regno_info): Make a hard reg in CLOBBER
+	always early clobbered.
+	* lra-lives.c (process_bb_lives): Check input hard regs for early
+	clobbered non-operand hard reg.
+
 2017-12-07  Jakub Jelinek  
 
 	PR middle-end/83164
Index: lra.c
===
--- lra.c	(revision 255470)
+++ lra.c	(working copy)
@@ -1476,15 +1476,11 @@ add_regs_to_insn_regno_info (lra_insn_re
   add_regs_to_insn_regno_info (data, SET_SRC (x), insn, OP_IN, false, 0);
   break;
 case CLOBBER:
-  {
-	int code = INSN_CODE (insn);
-
-	/* We treat clobber of non-operand hard registers as early
-	   clobber (the behavior is expected from asm).  */
-	add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_OUT,
- code < 0, code < 0 ? ALL_ALTERNATIVES : 0);
-	break;
-  }
+  /* We treat clobber of non-operand hard registers as early
+	 clobber.  */
+  add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_OUT,
+   true, ALL_ALTERNATIVES);
+  break;
 case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
   add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, false, 0);
   break;
Index: lra-lives.c
===
--- lra-lives.c	(revision 255470)
+++ lra-lives.c	(working copy)
@@ -928,7 +928,18 @@ process_bb_lives (basic_block bb, int &c
   for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next)
 	if (reg->type == OP_OUT
 	&& reg_early_clobber_p (reg, n_alt) && ! reg->subreg_p)
-	  make_hard_regno_dead (reg->regno);
+	  {
+	struct lra_insn_reg *reg2;
+	
+	/* We can have early clobbered non-operand hard reg and
+	   the same hard reg as an insn input.  Don't make hard
+	   reg dead before the insns.  */
+	for (reg2 = curr_id->regs; reg2 != NULL; reg2 = reg2->next)
+	  if (reg2->type != OP_OUT && reg2->regno == reg->regno)
+		break;
+	if (reg2 == NULL)
+	  make_hard_regno_dead (reg->regno);
+	  }
 
   if (need_curr_point_incr)
 	next_program_point (curr_point, freq);


Re: [PATCH] Fix vectorizer part of PR81303

2017-12-07 Thread Bin.Cheng
On Wed, Dec 6, 2017 at 1:29 PM, Richard Biener  wrote:
>
> The following fixes a vectorization issue that appears when trying
> to vectorize the bwaves mat_times_vec kernel after interchange was
> performed by the interchange pass.  That interchange inserts the
> following code for the former reduction created by LIM store-motion:
I do observe more cases are vectorized by this patch on AArch64.
Still want to find a way not generating the cond_expr, but for the moment
I will have another patch make interchange even more conservative for
small cases.  In which the new cmp/select instructions do cost a lot against
the small loop body.

Thanks,
bin
>
>[local count: 161061274]:
>   # m_58 = PHI <1(10), m_84(20)>
> ...
>[local count: 912680551]:
>   # l_35 = PHI <1(13), l_57(21)>
> ...
>   y__I_lsm.113_140 = *y_139(D)[_31];
>   y__I_lsm.113_94 = m_58 != 1 ? y__I_lsm.113_140 : 0.0;
> ...
>   *y_139(D)[_31] = _101;
>
>
> so we have a COND_EXPR with a test on an integer IV m_58 with
> double values.  Note that the m_58 != 1 condition is invariant
> in the l loop.
>
> Currently we vectorize this condition using V8SImode vectors
> causing a vectorization factor of 8 and thus forcing the scalar
> path for the bwaves case (the loops have an iteration count of 5).
>
> The following patch makes the vectorizer handle invariant conditions
> in the first place and second handle widening of operands of invariant
> conditions transparently (the promotion will happen on the invariant
> scalars).  This makes it possible to use a vectorization factor of 4,
> reducing the bwaves runtime from 208s before interchange
> (via 190s after interchange) to 172s after interchange and vectorization
> with AVX256 (on a Haswell machine).
>
> For the vectorizable_condition part to work I need to avoid
> pulling apart the condition from the COND_EXPR during pattern
> detection.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Richard.
>
> 2017-12-06  Richard Biener  
>
> PR tree-optimization/81303
> * tree-vect-stmts.c (vect_is_simple_cond): For invariant
> conditions try to create a comparison vector type matching
> the data vector type.
> (vectorizable_condition): Adjust.
> * tree-vect-patterns.c (vect_recog_mask_conversion_pattern):
> Leave invariant conditions alone in case we can vectorize those.
>
> * gcc.target/i386/vectorize9.c: New testcase.
> * gcc.target/i386/vectorize10.c: New testcase.
>
> Index: gcc/tree-vect-stmts.c
> ===
> --- gcc/tree-vect-stmts.c   (revision 255438)
> +++ gcc/tree-vect-stmts.c   (working copy)
> @@ -7792,7 +7792,8 @@ vectorizable_load (gimple *stmt, gimple_
>
>  static bool
>  vect_is_simple_cond (tree cond, vec_info *vinfo,
> -tree *comp_vectype, enum vect_def_type *dts)
> +tree *comp_vectype, enum vect_def_type *dts,
> +tree vectype)
>  {
>tree lhs, rhs;
>tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
> @@ -7845,6 +7846,20 @@ vect_is_simple_cond (tree cond, vec_info
>  return false;
>
>*comp_vectype = vectype1 ? vectype1 : vectype2;
> +  /* Invariant comparison.  */
> +  if (! *comp_vectype)
> +{
> +  tree scalar_type = TREE_TYPE (lhs);
> +  /* If we can widen the comparison to match vectype do so.  */
> +  if (INTEGRAL_TYPE_P (scalar_type)
> + && tree_int_cst_lt (TYPE_SIZE (scalar_type),
> + TYPE_SIZE (TREE_TYPE (vectype
> +   scalar_type = build_nonstandard_integer_type
> + (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
> +  TYPE_UNSIGNED (scalar_type));
> +  *comp_vectype = get_vectype_for_scalar_type (scalar_type);
> +}
> +
>return true;
>  }
>
> @@ -7942,7 +7957,7 @@ vectorizable_condition (gimple *stmt, gi
>else_clause = gimple_assign_rhs3 (stmt);
>
>if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
> -   &comp_vectype, &dts[0])
> +   &comp_vectype, &dts[0], vectype)
>|| !comp_vectype)
>  return false;
>
> Index: gcc/tree-vect-patterns.c
> ===
> --- gcc/tree-vect-patterns.c(revision 255438)
> +++ gcc/tree-vect-patterns.c(working copy)
> @@ -3976,6 +3976,32 @@ vect_recog_mask_conversion_pattern (vec<
>   || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS 
> (vectype2))
> return NULL;
>
> +  /* If rhs1 is invariant and we can promote it leave the COND_EXPR
> + in place, we can handle it in vectorizable_condition.  This avoids
> +unnecessary promotion stmts and increased vectorization factor.  */
> +  if (COMPARISON_CLASS_P (rhs1)
> + && INTEGRAL_TYPE_P (rhs1_type)
> + && TYPE_VECTOR_SUBPARTS (vectype1) < TYPE_VECTOR_SUBPARTS 
> (vectype2))
> +   {
> +

[PATCH] rl78 smindi3 improvement

2017-12-07 Thread Sebastian Perta
Hello,

The following patch improves both the speed and code size for 64 bit signed
min for RL78:
it emits a library function call instead of emitting code for  the 64 bit
min for every single time.
The signed min function which was added in libgcc is hand written, so more
optimal than what GCC generates.

The change can easily be seen on the following test case:
long long my_smaxdi3(long long x, long long y){ 
return (x < y)? x : y;
}
I did not add this to the regression as it very simple and there are test
cases in the regression which test this, for example
gcc.c-torture/execute/mode-dependent-address.c and
gcc.c-torture/execute/pr68249.c
Regression test is OK, tested with the following command:
make -k check-gcc RUNTESTFLAGS=--target_board=rl78-sim

Please let me know if this is OK, Thank you!
Sebastian

Index: gcc/ChangeLog
===
--- gcc/ChangeLog   (revision 255468)
+++ gcc/ChangeLog   (working copy)
@@ -1,3 +1,8 @@
+2017-12-07  Sebastian Perta  
+
+   * config/rl78/rl78.md: New define_expand "smindi3".
+   
+
 2017-12-07  Tamar Christina  
 
PR target/82641
Index: gcc/config/rl78/rl78.md
===
--- gcc/config/rl78/rl78.md (revision 255468)
+++ gcc/config/rl78/rl78.md (working copy)
@@ -718,3 +718,13 @@
   [(set_attr "valloc" "macax")
(set_attr "is_g13_muldiv_insn" "yes")]
 )
+
+(define_expand "smindi3"
+ [(set (match_operand:DI  0 "nonimmediate_operand" "")
+   (smin:DI (match_operand:DI 1 "general_operand"  "")
+(match_operand:DI2 "general_operand"  "")))
+   ]
+  "optimize_size"
+  "rl78_emit_libcall (\"__smindi3\", SMIN, DImode, DImode, 3, operands);
+   DONE;"
+)
Index: libgcc/ChangeLog
===
--- libgcc/ChangeLog(revision 255468)
+++ libgcc/ChangeLog(working copy)
@@ -1,3 +1,8 @@
+2017-12-07  Sebastian Perta  
+ 
+   * config/rl78/smindi3.S: New assembly file.
+   * config/rl78/t-rl78: Added smindi3.S to LIB2ADD.
+
 2017-11-30  Michael Meissner  
 
* config/rs6000/_mulkc3.c (__mulkc3): Add forward declaration.
Index: libgcc/config/rl78/t-rl78
===
--- libgcc/config/rl78/t-rl78   (revision 255468)
+++ libgcc/config/rl78/t-rl78   (working copy)
@@ -32,7 +32,8 @@
$(srcdir)/config/rl78/fpmath-sf.S \
$(srcdir)/config/rl78/cmpsi2.S \
$(srcdir)/config/rl78/adddi3.S \
-   $(srcdir)/config/rl78/subdi3.S
+   $(srcdir)/config/rl78/subdi3.S \
+   $(srcdir)/config/rl78/smindi3.S
 
 LIB2FUNCS_EXCLUDE = _clzhi2 _clzsi2 _ctzhi2 _ctzsi2 \
   _popcounthi2 _popcountsi2 \
Index: libgcc/config/rl78/smindi3.S
===
--- libgcc/config/rl78/smindi3.S(nonexistent)
+++ libgcc/config/rl78/smindi3.S(working copy)
@@ -0,0 +1,76 @@
+;   Copyright (C) 2017 Free Software Foundation, Inc.
+;   Contributed by Sebastian Perta.
+; 
+; This file is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by the
+; Free Software Foundation; either version 3, or (at your option) any
+; later version.
+; 
+; This file is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+; 
+; Under Section 7 of GPL version 3, you are granted additional
+; permissions described in the GCC Runtime Library Exception, version
+; 3.1, as published by the Free Software Foundation.
+;
+; You should have received a copy of the GNU General Public License and
+; a copy of the GCC Runtime Library Exception along with this program;
+; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+; .
+
+
+#include "vregs.h"
+
+.text
+
+START_FUNC ___smindi3
+
+; copy first argument/operand to the output registers
+movw   ax, [sp+4]
+movw   r8, ax
+movw   ax, [sp+6]
+movw   r10, ax
+movw   ax, [sp+8]
+movw   r12, ax
+movw   ax, [sp+10]
+movw   r14, ax
+
+; use 16-bit compares from the most significant words downto the least
significant ones
+movw   ax, [sp+18]
+cmpw   ax, r14
+xor1   CY, a.7   ; first compare accounts for the
+xor1   CY, r15.7 ; sign bits of the two operands
+bc $.L1
+bnz$.L2
+
+movw   ax, [sp+16]
+cmpw   ax, r12
+bc $.L1
+bnz$.L2
+
+movw   ax, [sp+14]
+cmpw   ax, r10
+bc $.L1
+bnz$.L2
+
+movw   ax, [sp+12]
+cmpw   ax, r8
+bc $.L1
+ret
+
+.L1:
+; copy second argument/operand to the output registers
+movw   ax, [sp+12]
+movw   r8, ax
+movw   ax, [sp+14]
+movw 

Re: [PATCH] Fix up tree-ssa/strn{cat,cpy-2}.c (PR tree-optimization/83075)

2017-12-07 Thread Richard Biener
On December 7, 2017 5:55:47 PM GMT+01:00, Jakub Jelinek  
wrote:
>On Wed, Dec 06, 2017 at 05:30:53PM +0100, Jakub Jelinek wrote:
>> On Wed, Dec 06, 2017 at 09:20:15AM -0700, Martin Sebor wrote:
>> > Attached is a patch with the comment updated/simplified.
>> > The tests do the job they need to do today so I just removed
>> > the useless attribute but otherwise left them unchanged.  If
>> > you would like to enhance them in some way please feel free.
>> 
>> Ok for trunk, with a minor nit.  I'll tweak the tests incrementally
>> when it is in.
>
>So here is the fix for those testcases.
>
>They didn't test what they meant to test, because they didn't FAIL
>without the patch.  That is because the bug was that the -W* option
>affected code generation, so with -O2 -Wno-stringop-overflow it didn't
>trigger it.
>I've changed the tests to test both in a separate noipa function where
>it doesn't know about the aliasing and string lengths from the caller,
>in that case it does more verifications, including the content of the
>whole buffer, and the individual values of the lengths,
>and what you did before.
>
>Regtested on x86_64-linux and i686-linux, verified that with the
>r255446 tree-ssa-strlen.c change reverted it FAILs.
>
>Ok for trunk?

OK. 

Richard. 

>2017-12-07  Jakub Jelinek  
>
>   PR tree-optimization/83075
>   * gcc.dg/tree-ssa/strncpy-2.c: Use size_t instead of unsigned, add
>   separate function with noipa attribute to also verify behavior when
>   optimizers don't know the sizes and aliasing, verify resulting sizes
>   and array content.  Add -Wstringop-overflow to dg-options.
>   * gcc.dg/tree-ssa/strncat.c: Likewise.
>
>--- gcc/testsuite/gcc.dg/tree-ssa/strncpy-2.c.jj   2017-12-06
>20:11:54.0 +0100
>+++ gcc/testsuite/gcc.dg/tree-ssa/strncpy-2.c  2017-12-07
>13:31:32.719722416 +0100
>@@ -1,19 +1,35 @@
>-/* PR tree-optimization/83075 - Invalid strncpy optimization
>-   { dg-do run }
>-   { dg-options "-O2 -Wno-stringop-overflow" } */
>+/* PR tree-optimization/83075 - Invalid strncpy optimization */
>+/* { dg-do run } */
>+/* { dg-options "-O2 -Wstringop-overflow" } */
> 
>-int main (void)
>+typedef __SIZE_TYPE__ size_t;
>+
>+__attribute__((noipa)) size_t
>+foo (char *p, char *q, size_t *r)
> {
>-  char a[8] = "";
>+  size_t n0 = __builtin_strlen (p);
>+  __builtin_strncpy (q, p, n0);   /* { dg-warning "specified bound
>depends on the length" } */
>+  size_t n1 = __builtin_strlen (p);
>+  *r = n0;
>+  return n1;
>+}
> 
>+int
>+main ()
>+{
>+  char a[8] = "";
>   __builtin_strcpy (a, "123");
>-
>-  unsigned n0 = __builtin_strlen (a);
>-
>-  __builtin_strncpy (a + 3, a, n0);
>-
>-  unsigned n1 = __builtin_strlen (a);
>-
>+  size_t n0 = __builtin_strlen (a);
>+  __builtin_strncpy (a + 3, a, n0);   /* { dg-warning "specified bound
>depends on the length" } */
>+  size_t n1 = __builtin_strlen (a);
>   if (n1 == n0)
> __builtin_abort ();
>+  a[6] = '7';
>+  __builtin_strcpy (a, "456");
>+  size_t n2;
>+  if (foo (a, a + 3, &n2) != 7 || n2 != 3)
>+__builtin_abort ();
>+  if (__builtin_memcmp (a, "4564567", sizeof "4564567"))
>+__builtin_abort ();
>+  return 0;
> }
>--- gcc/testsuite/gcc.dg/tree-ssa/strncat.c.jj 2017-12-06
>20:11:54.0 +0100
>+++ gcc/testsuite/gcc.dg/tree-ssa/strncat.c2017-12-07
>13:31:09.568008365 +0100
>@@ -1,19 +1,35 @@
>-/* PR tree-optimization/83075 - Invalid strncpy optimization
>-   { dg-do run }
>-   { dg-options "-O2 -Wno-stringop-overflow" } */
>+/* PR tree-optimization/83075 - Invalid strncpy optimization */
>+/* { dg-do run } */
>+/* { dg-options "-O2 -Wstringop-overflow" } */
> 
>-int main (void)
>+typedef __SIZE_TYPE__ size_t;
>+
>+__attribute__((noipa)) size_t
>+foo (char *p, char *q, size_t *r)
> {
>-  char a[8] = "";
>+  size_t n0 = __builtin_strlen (p);
>+  __builtin_strncat (q, p, n0);   /* { dg-warning "specified bound
>depends on the length" } */
>+  size_t n1 = __builtin_strlen (p);
>+  *r = n0;
>+  return n1;
>+}
> 
>+int
>+main ()
>+{
>+  char a[8] = "";
>   __builtin_strcpy (a, "123");
>-
>-  unsigned n0 = __builtin_strlen (a);
>-
>-  __builtin_strncat (a + 3, a, n0);
>-
>-  unsigned n1 = __builtin_strlen (a);
>-
>+  size_t n0 = __builtin_strlen (a);
>+  __builtin_strncat (a + 3, a, n0);   /* { dg-warning "specified bound
>depends on the length" } */
>+  size_t n1 = __builtin_strlen (a);
>   if (n1 == n0)
> __builtin_abort ();
>+  a[6] = '7';
>+  __builtin_strcpy (a, "456");
>+  size_t n2;
>+  if (foo (a, a + 3, &n2) != 6 || n2 != 3)
>+__builtin_abort ();
>+  if (__builtin_memcmp (a, "456456\0", sizeof "456456\0"))
>+__builtin_abort ();
>+  return 0;
> }
>
>
>   Jakub



Re: [PATCH] Fix up tree-ssa/strn{cat,cpy-2}.c (PR tree-optimization/83075)

2017-12-07 Thread Martin Sebor

On 12/07/2017 09:55 AM, Jakub Jelinek wrote:

On Wed, Dec 06, 2017 at 05:30:53PM +0100, Jakub Jelinek wrote:

On Wed, Dec 06, 2017 at 09:20:15AM -0700, Martin Sebor wrote:

Attached is a patch with the comment updated/simplified.
The tests do the job they need to do today so I just removed
the useless attribute but otherwise left them unchanged.  If
you would like to enhance them in some way please feel free.


Ok for trunk, with a minor nit.  I'll tweak the tests incrementally
when it is in.


So here is the fix for those testcases.

They didn't test what they meant to test, because they didn't FAIL
without the patch.  That is because the bug was that the -W* option
affected code generation, so with -O2 -Wno-stringop-overflow it didn't
trigger it.


Doh!  I suppose that underscores that the right way to write test
cases for optimization bugs is to prune warnings out of their output
rather than suppressing them via -Wno-foo.  I've done the former
for this very reason a number of times but clearly fell into the
trap of suppressing the warnings in this instance.  Thanks for
pointing it out!


I've changed the tests to test both in a separate noipa function where
it doesn't know about the aliasing and string lengths from the caller,
in that case it does more verifications, including the content of the
whole buffer, and the individual values of the lengths,
and what you did before.


I don't have an opinion on the rest of these changes.  I do want
to make one comment about runtime tests.  I fairly regularly run
tests with cross-compilers on the build machine.  This lets me
verify that compile-only tests pass but it doesn't do anything
for tests that need to run.  In fact, with the current mixture
of all kinds of tests in the same directory, it pretty much rules
out drawing any conclusions from test results in this setup.  So
while I appreciate the additional testing done by the runtime
tests, I think ideally, having compile time only tests would be
the baseline requirement and runtime tests would be a separate
layer that would provide additional validation when possible.

Martin



Regtested on x86_64-linux and i686-linux, verified that with the
r255446 tree-ssa-strlen.c change reverted it FAILs.

Ok for trunk?

2017-12-07  Jakub Jelinek  

PR tree-optimization/83075
* gcc.dg/tree-ssa/strncpy-2.c: Use size_t instead of unsigned, add
separate function with noipa attribute to also verify behavior when
optimizers don't know the sizes and aliasing, verify resulting sizes
and array content.  Add -Wstringop-overflow to dg-options.
* gcc.dg/tree-ssa/strncat.c: Likewise.

--- gcc/testsuite/gcc.dg/tree-ssa/strncpy-2.c.jj2017-12-06 
20:11:54.0 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/strncpy-2.c   2017-12-07 13:31:32.719722416 
+0100
@@ -1,19 +1,35 @@
-/* PR tree-optimization/83075 - Invalid strncpy optimization
-   { dg-do run }
-   { dg-options "-O2 -Wno-stringop-overflow" } */
+/* PR tree-optimization/83075 - Invalid strncpy optimization */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wstringop-overflow" } */

-int main (void)
+typedef __SIZE_TYPE__ size_t;
+
+__attribute__((noipa)) size_t
+foo (char *p, char *q, size_t *r)
 {
-  char a[8] = "";
+  size_t n0 = __builtin_strlen (p);
+  __builtin_strncpy (q, p, n0);/* { dg-warning "specified bound 
depends on the length" } */
+  size_t n1 = __builtin_strlen (p);
+  *r = n0;
+  return n1;
+}

+int
+main ()
+{
+  char a[8] = "";
   __builtin_strcpy (a, "123");
-
-  unsigned n0 = __builtin_strlen (a);
-
-  __builtin_strncpy (a + 3, a, n0);
-
-  unsigned n1 = __builtin_strlen (a);
-
+  size_t n0 = __builtin_strlen (a);
+  __builtin_strncpy (a + 3, a, n0);/* { dg-warning "specified bound depends on 
the length" } */
+  size_t n1 = __builtin_strlen (a);
   if (n1 == n0)
 __builtin_abort ();
+  a[6] = '7';
+  __builtin_strcpy (a, "456");
+  size_t n2;
+  if (foo (a, a + 3, &n2) != 7 || n2 != 3)
+__builtin_abort ();
+  if (__builtin_memcmp (a, "4564567", sizeof "4564567"))
+__builtin_abort ();
+  return 0;
 }
--- gcc/testsuite/gcc.dg/tree-ssa/strncat.c.jj  2017-12-06 20:11:54.0 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/strncat.c 2017-12-07 13:31:09.568008365 
+0100
@@ -1,19 +1,35 @@
-/* PR tree-optimization/83075 - Invalid strncpy optimization
-   { dg-do run }
-   { dg-options "-O2 -Wno-stringop-overflow" } */
+/* PR tree-optimization/83075 - Invalid strncpy optimization */
+/* { dg-do run } */
+/* { dg-options "-O2 -Wstringop-overflow" } */

-int main (void)
+typedef __SIZE_TYPE__ size_t;
+
+__attribute__((noipa)) size_t
+foo (char *p, char *q, size_t *r)
 {
-  char a[8] = "";
+  size_t n0 = __builtin_strlen (p);
+  __builtin_strncat (q, p, n0);/* { dg-warning "specified bound 
depends on the length" } */
+  size_t n1 = __builtin_strlen (p);
+  *r = n0;
+  return n1;
+}

+int
+main ()
+{
+  char a[8] = "";
   __builtin_strcpy (a, "123");
-
-  unsigned n0 

[PATCH,libstdc++ testsuite] Fix hyperg failures

2017-12-07 Thread David Edelsohn
When the testcases were regenerated due to PR libstdc++/PR66689,
-ffp-contract=off was omitted from (at least) the hyperg
check_value.cc testcases, causing runtime regression.  This patch
restores the dg-options directive.

This returns the libstdc++ testsuite to all passing on AIX.

Bootstrapped on powerpc-ibm-aix7.2.0.0

Okay?

Thanks, David

PR libstdc++/83120
* testsuite/ext/special_functions/hyperg/check_value.cc: Add
-ffp-contract=off.
* 
testsuite/tr1/5_numerical_facilities/special_functions/17_hyperg/check_value.cc:
Same.

Index: testsuite/ext/special_functions/hyperg/check_value.cc
===
--- testsuite/ext/special_functions/hyperg/check_value.cc
(revision 255468)
+++ testsuite/ext/special_functions/hyperg/check_value.cc   (working copy)
@@ -1,5 +1,5 @@
 // { dg-do run { target c++11 } }
-// { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__" }
+// { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__ -ffp-contract=off" }
 //
 // Copyright (C) 2016-2017 Free Software Foundation, Inc.
 //
Index: 
testsuite/tr1/5_numerical_facilities/special_functions/17_hyperg/check_value.cc
===
--- 
testsuite/tr1/5_numerical_facilities/special_functions/17_hyperg/check_value.cc
(revision 255468)
+++ 
testsuite/tr1/5_numerical_facilities/special_functions/17_hyperg/check_value.cc
(working copy)
@@ -1,5 +1,5 @@
 // { dg-do run { target c++11 } }
-// { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__" }
+// { dg-options "-D__STDCPP_WANT_MATH_SPEC_FUNCS__ -ffp-contract=off" }
 //
 // Copyright (C) 2016-2017 Free Software Foundation, Inc.
 //


Re: [PATCH] Fix up tree-ssa/strn{cat,cpy-2}.c (PR tree-optimization/83075)

2017-12-07 Thread Jeff Law
On 12/07/2017 11:03 AM, Martin Sebor wrote:
> On 12/07/2017 09:55 AM, Jakub Jelinek wrote:
>> On Wed, Dec 06, 2017 at 05:30:53PM +0100, Jakub Jelinek wrote:
>>> On Wed, Dec 06, 2017 at 09:20:15AM -0700, Martin Sebor wrote:
 Attached is a patch with the comment updated/simplified.
 The tests do the job they need to do today so I just removed
 the useless attribute but otherwise left them unchanged.  If
 you would like to enhance them in some way please feel free.
>>>
>>> Ok for trunk, with a minor nit.  I'll tweak the tests incrementally
>>> when it is in.
>>
>> So here is the fix for those testcases.
>>
>> They didn't test what they meant to test, because they didn't FAIL
>> without the patch.  That is because the bug was that the -W* option
>> affected code generation, so with -O2 -Wno-stringop-overflow it didn't
>> trigger it.
> 
> Doh!  I suppose that underscores that the right way to write test
> cases for optimization bugs is to prune warnings out of their output
> rather than suppressing them via -Wno-foo.  I've done the former
> for this very reason a number of times but clearly fell into the
> trap of suppressing the warnings in this instance.  Thanks for
> pointing it out!
> 
>> I've changed the tests to test both in a separate noipa function where
>> it doesn't know about the aliasing and string lengths from the caller,
>> in that case it does more verifications, including the content of the
>> whole buffer, and the individual values of the lengths,
>> and what you did before.
> 
> I don't have an opinion on the rest of these changes.  I do want
> to make one comment about runtime tests.  I fairly regularly run
> tests with cross-compilers on the build machine.  This lets me
> verify that compile-only tests pass but it doesn't do anything
> for tests that need to run.  In fact, with the current mixture
> of all kinds of tests in the same directory, it pretty much rules
> out drawing any conclusions from test results in this setup.  So
> while I appreciate the additional testing done by the runtime
> tests, I think ideally, having compile time only tests would be
> the baseline requirement and runtime tests would be a separate
> layer that would provide additional validation when possible.
So note if you set up a deeper cross compilation environment (using
sysroot) you can cross-compile down to an executable.  Then you ought to
be able to use qemu's user emulation to run the resulting binary.

Alternately, for many embedded targets if you have a cross compilation
environment (including newlib) you can then use the old ISA simulator to
run execution tests.

We don't do those things much anymore, but we certainly have in the past
and knowing how to do so is valuable.

jeff


Allow _Alignas in compound literals (C11 DR#444)

2017-12-07 Thread Joseph Myers
C11 DR#444 dealt with how C11 intended to allow alignment specifiers
on struct and union members, but failed to include that in the syntax.
The final resolution of that DR also allows alignment specifiers in
type names in compound literals (in order to apply an increased
alignment to the unnamed object created by the compound literal), but
not other cases of type names.

This patch implements allowing alignment specifiers in compound
literals and adds tests for the resolution of the DR (including that
they are allowed on struct and union members, which GCC already
implemented).  Because the parser has to parse the parenthesized type
name of a compound literal before it can tell that it's a compound
literal (rather than, depending on the context, a cast expression or
sizeof (type-name) or _Alignof (type-name)), this means _Alignas
specifiers are allowed syntactically in those contexts and then an
error is given once it's known to be an invalid use (whereas _Alignas
specifiers are disallowed syntactically in other contexts where type
names can occur and a compound literal is not possible).

Bootstrapped with no regressions on x86_64-pc-linux-gnu.  Applied to 
mainline.

gcc/c:
2017-12-07  Joseph Myers  

* c-decl.c (build_compound_literal): Add parameter alignas_align
and set alignment of decl if nonzero.
* c-parser.c (c_keyword_starts_typename): Allow RID_ALIGNAS.
(c_parser_declspecs): Allow RID_ALIGNAS to follow a type, like a
qualifier.
(c_parser_struct_declaration): Update syntax comment.
(c_parser_type_name): Add alignas_ok argument and pass it to
c_parser_declspecs.
(c_parser_cast_expression): Pass true to c_parser_type_name and
give error if a cast used an _Alignas specifier.
(c_parser_sizeof_expression): Pass true to c_parser_type_name and
give error if sizeof (type-name) used an _Alignas specifier.
(c_parser_alignof_expression): Pass true to c_parser_type_name and
give error if _Alignof (type-name) used an _Alignas specifier.
(c_parser_postfix_expression_after_paren_type): Check specified
alignment for a compound literal and pass it to
build_compound_literal.
* c-parser.h (c_parser_type_name): Update prototype.
* c-tree.h (build_compound_literal): Update prototype.

gcc/testsuite:
2017-12-07  Joseph Myers  

* gcc.dg/c11-align-7.c, gcc.dg/c11-align-8.c,
gcc.dg/c11-align-9.c, gcc.dg/gnu11-align-1.c: New tests.
* gcc.dg/c11-align-5.c (test): Update expected error for sizeof
case.

Index: gcc/c/c-decl.c
===
--- gcc/c/c-decl.c  (revision 255464)
+++ gcc/c/c-decl.c  (working copy)
@@ -5263,10 +5263,13 @@ mark_forward_parm_decls (void)
literal, which may be an incomplete array type completed by the
initializer; INIT is a CONSTRUCTOR at LOC that initializes the compound
literal.  NON_CONST is true if the initializers contain something
-   that cannot occur in a constant expression.  */
+   that cannot occur in a constant expression.  If ALIGNAS_ALIGN is nonzero,
+   it is the (valid) alignment for this compound literal, as specified
+   with _Alignas.  */
 
 tree
-build_compound_literal (location_t loc, tree type, tree init, bool non_const)
+build_compound_literal (location_t loc, tree type, tree init, bool non_const,
+   unsigned int alignas_align)
 {
   /* We do not use start_decl here because we have a type, not a declarator;
  and do not use finish_decl because the decl should be stored inside
@@ -5290,6 +5293,11 @@ tree
   DECL_IGNORED_P (decl) = 1;
   TREE_TYPE (decl) = type;
   c_apply_type_quals_to_decl (TYPE_QUALS (strip_array_types (type)), decl);
+  if (alignas_align)
+{
+  SET_DECL_ALIGN (decl, alignas_align * BITS_PER_UNIT);
+  DECL_USER_ALIGN (decl) = 1;
+}
   store_init_value (loc, decl, init, NULL_TREE);
 
   if (TREE_CODE (type) == ARRAY_TYPE && !COMPLETE_TYPE_P (type))
Index: gcc/c/c-parser.c
===
--- gcc/c/c-parser.c(revision 255464)
+++ gcc/c/c-parser.c(working copy)
@@ -504,6 +504,7 @@ c_keyword_starts_typename (enum rid keyword)
 case RID_ACCUM:
 case RID_SAT:
 case RID_AUTO_TYPE:
+case RID_ALIGNAS:
   return true;
 default:
   if (keyword >= RID_FIRST_INT_N
@@ -2594,7 +2595,8 @@ c_parser_declspecs (c_parser *parser, struct c_dec
 has simply forgotten a semicolon, so we exit.  */
   if ((!typespec_ok || specs->typespec_kind == ctsk_tagdef)
  && c_parser_next_tokens_start_typename (parser, la)
- && !c_parser_next_token_is_qualifier (parser))
+ && !c_parser_next_token_is_qualifier (parser)
+ && !c_parser_next_token_is_keyword (parser, RID_ALIGNAS))
break;
 
   if (c_parser_next_token_is (parser, CPP_NAME))
@@ -3225,6 +3227,7 @@ c_pa

Re: [PATCH] avoid bogus -Wstringop-overflow for strncpy with _FORTIFY_SOURCE (PR 82646)

2017-12-07 Thread Martin Sebor

On 12/07/2017 06:46 AM, Christophe Lyon wrote:

Hi Martin,


On 6 December 2017 at 00:51, Jeff Law  wrote:

On 12/05/2017 04:47 PM, Martin Sebor wrote:

PR middle-end/82646 - bogus -Wstringop-overflow with
-D_FORTIFY_SOURCE=2 on strncpy with range to a member array,

The bug points out a false positive in a call to strncpy() when
_FORTIFY_SOURCE is defined that doesn't exist otherwise.

The problem is that __builtin_strncpy buffer overflow checking
is done along with the expansion of the intrinsic in one place
and __builtin___strncpy_chk is handled differently in another,
and the two are out of sync.

The attached patch corrects the choice of arguments used for
overflow detection in __builtin___strncpy_chk and aligns
the diagnostics between the two intrinsics.

Martin

gcc-82646.diff


PR tree-optimization/82646 - bogus -Wstringop-overflow with -D_FORTIFY_SOURCE=2 
on strncpy with range to a member array

gcc/ChangeLog:

  PR tree-optimization/82646
  * builtins.c (maybe_emit_chk_warning): Use size as the bound for
  strncpy, not maxlen.

gcc/testsuite/ChangeLog:

  PR tree-optimization/82646
  * gcc.dg/builtin-stringop-chk-1.c: Adjust.
  * gcc.dg/builtin-stringop-chk-9.c: New test.

OK.



The new test fails on 32 bits platforms (arm, x86_32, aarch64 ilp32):
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 125)
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 133)
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 141)
FAIL:gcc.dg/builtin-stringop-chk-9.c  (test for warnings, line 149)


I believe these failures were due to bug 83296 that Richard fixed
earlier today.  With the change in my tree, the test passes for
me with the arm-linux-gnueabi cross-compiler.  Can you please
try again and let me know if the failures persist on any of your
targets?

Thanks
Martin


Re: [PATCH,libstdc++ testsuite] Fix hyperg failures

2017-12-07 Thread Jonathan Wakely

On 07/12/17 13:13 -0500, David Edelsohn wrote:

When the testcases were regenerated due to PR libstdc++/PR66689,
-ffp-contract=off was omitted from (at least) the hyperg
check_value.cc testcases, causing runtime regression.  This patch
restores the dg-options directive.


Ah yes, good catch.

We might also need this for x86:

// { dg-additional-options "-ffloat-store" { target { m68*-*-* || ia32 } } }



This returns the libstdc++ testsuite to all passing on AIX.

Bootstrapped on powerpc-ibm-aix7.2.0.0

Okay?


OK, thanks.



[PATCH, rs6000] Add additional builtin tests

2017-12-07 Thread Carl Love
GCC Maintainers:

The following patch adds missing test cases for various builtins.  The
patch does not make any functional changes to GCC.  

The patch has been run on:

  powerpc64le-unknown-linux-gnu (Power 8 LE)
  powerpc64le-unknown-linux-gnu (Power 8 BE)
  powerpc64le-unknown-linux-gnu (Power 9 LE)

without regressions.  

Please let me know if the following patch is acceptable.  Thanks.

  Carl Love

---

gcc/testsuite/ChangeLog:

2017-12-07  Carl Love  

* gcc.target/powerpc/altivec-12.c (main): Add tests for vec_avg.
* gcc.target/powerpc/builtins-1.c: Renamed builtins-1.h.
* gcc.target/powerpc/builtins-1.h: Add tests for vec_and,
vec_andc, vec_cpsgn, vec_merge[h|l], vec_nor, vec_or,
vec_sel, vec_xor, vec_all_[eq|ge|gt|le|lt|ne],
vec_any_[eq|ge|gt|le|lt|ne], vec_cntlz, vec_round, vec_rsqrt,
vec_rsqrte, vec_splat, vec_div, vec_mul,vec_ctf,
vec_cts, vec_ctu.
* gcc.target/powerpc/builtins-1-be.c: New file, big endian
test file for tests in builtins-1.h.
* gcc.target/powerpc/builtins-1-le.c: New file, little endian
test file for tests in builtins-1.h.
* gcc.target/powerpc/builtins-2.c (main): Add tests for vec_ctf,
vec_cts, vec_ctu.
* gcc.target/powerpc/builtins-3-runnable.c (test_result_sp): Fix printf
conversion character.
* gcc.target/powerpc/fold-vec-minmax-char.c (test[1|2|4]_min,
test[1|2]_max): Add tests for vec_max and vec_min.
* gcc.target/powerpc/fold-vec-minmax-int.c (test[4|5|7|8]_min): Add
tests for vec_min.
* gcc.target/powerpc/fold-vec-minmax-longlong.c (test[7|8|9|10]_max):
Add tests for and vec_max.
* gcc.target/powerpc/fold-vec-minmax-short.c (test[4|5|7|8]_min): Add
tests for and vec_min.
* gcc.target/powerpc/p8vector-builtin-8.c (foo): Add test for vec_adds,
vec_subs, vec_sum2s, vec_sum4s.
* gcc.target/powerpc/vsx-splats-p8.c: New file to test vec_splats.
* gcc.target/vsx-vector-5.c (arg1_tests_d, arg1_tests_d): Add
tests for vec_ceil, vec_floor, vec_nearbyint, vec_sqrt.
* gcc.target/vsx-vector-7.c (foo): Add tests for vec_sel.
* gcc.target/vsx-vector-abss.c: New file to test vec_abss.
---
 gcc/testsuite/gcc.target/powerpc/altivec-12.c  |  46 ++
 gcc/testsuite/gcc.target/powerpc/builtins-1-be.c   |  70 
 gcc/testsuite/gcc.target/powerpc/builtins-1-le.c   |  69 
 .../powerpc/{builtins-1.c => builtins-1.h} |  96 ---
 gcc/testsuite/gcc.target/powerpc/builtins-2.c  |  46 +-
 .../gcc.target/powerpc/builtins-3-runnable.c   |   4 +-
 .../gcc.target/powerpc/fold-vec-minmax-char.c  |  26 ++-
 .../gcc.target/powerpc/fold-vec-minmax-int.c   |  28 +++-
 .../gcc.target/powerpc/fold-vec-minmax-longlong.c  |  28 +++-
 .../gcc.target/powerpc/fold-vec-minmax-short.c |  28 +++-
 .../gcc.target/powerpc/p8vector-builtin-8.c|  46 +-
 gcc/testsuite/gcc.target/powerpc/vsx-splats-p8.c   |  69 
 gcc/testsuite/gcc.target/powerpc/vsx-vector-5.c| 180 -
 gcc/testsuite/gcc.target/powerpc/vsx-vector-7.c|  12 +-
 gcc/testsuite/gcc.target/powerpc/vsx-vector-abss.c |  94 +++
 15 files changed, 764 insertions(+), 78 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-1-be.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-1-le.c
 rename gcc/testsuite/gcc.target/powerpc/{builtins-1.c => builtins-1.h} (63%)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-splats-p8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-abss.c

diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-12.c 
b/gcc/testsuite/gcc.target/powerpc/altivec-12.c
index 39d26940d..b0267b59b 100644
--- a/gcc/testsuite/gcc.target/powerpc/altivec-12.c
+++ b/gcc/testsuite/gcc.target/powerpc/altivec-12.c
@@ -6,10 +6,33 @@
 /* Program to test PowerPC AltiVec instructions.  */
 
 #include 
+#include 
 
 extern void abort (void);
 #define CHECK_IF(E) if(!(E)) abort()
 
+vector char sca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13};
+vector char scb = {6,4,8,3,1,9,2,6,10,3,7,11,15,12,14,13};
+vector char sc_expected = {3,4,8,2,3,9,2,6,10,3,7,11,15,12,14,13};
+vector char scz;
+vector unsigned char uca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13};
+vector unsigned char ucb = {6,4,8,3,1,9,2,6,10,3,7,11,15,12,14,13};
+vector unsigned char uc_expected = {3,4,8,2,3,9,2,6,10,3,7,11,15,12,14,13};
+vector char ucz;
+
+vector short int ssia = {9, 16, 25, 36};
+vector short int ssib = {-8, -27, -64, -125};
+vector short int ssiz;
+
+vector short unsigned int ssua = {9, 16, 25, 36};
+vector short unsigned int ssub = {8, 27, 64, 125};
+vector short unsigned int ssuz;
+
+vector unsigned int uia = {22, 13, 24, 39};
+vector unsigned int uib = {8, 7, 4, 15};
+vector unsign

Re: [PING 2][PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-12-07 Thread Jeff Law
On 11/25/2017 05:53 PM, Martin Sebor wrote:
> On 11/22/2017 04:50 PM, Jeff Law wrote:
>> On 11/16/2017 02:29 PM, Martin Sebor wrote:
 On 10/23/2017 08:42 PM, Martin Sebor wrote:
> Attached is a reworked solution to enhance -Wrestrict while
> avoiding changing tree-vrp.c or any other VRP machinery.  Richard,
> in considering you suggestions I realized that the ao_ref struct
> isn't general enough to detect the kinds of problems I needed to
> etect (storing bit-offsets in HOST_WIDE_INT means out-of-bounds
> offsets cannot be represented or detected, leading to either false
> positives or false negatives).
>> So this seems to be a recurring theme, which makes me wonder if we
>> should have an ao_ref-like structure that deals in bytes rather than
>> bits and make it a first class citizen.   There's certainly clients that
>> work on bits and certainly clients that would prefer to work on bytes.
> 
> The class I introduced serves a different purpose than ao_ref and
> stores a lot more data.
> 
> In https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82042#c3 Richard
> says that "this [offset being HOST_WIDE_INT and storing bits] is
> a know deficiency in ao_ref 'offset' (and also size and maxsize).
> Blowing up to offset_int isn't really a good idea."
Understood.  My point was that we seem to be stumbling into the same
class of problems in more than once place.  So we may want to consider
having a class that looks like like ao_ref, but which operates in byte
sized chunks.

I don't think it should be a requirement to go forward through.  It's
more of a long term question/concern.


>> So I realize you don't have any code to answer this question, but your
>> thoughts on how much we might loose effectiveness if we didn't do the
>> warnings within gimple_fold_builtin_, but instead broke out a
>> distinct pass to handle warnings?  My biggest design concern is the
>> warning from within the folder aspects.
> 
> With optimization enabled the folder folds things like this into
> MEM_REF which would prevent the warning unless the pass pass ran
> with early optimizations.
> 
>   struct S { char a[7], b[7]; };
> 
>   void sink (void*);
> 
>   void f (void)
>   {
>     struct S s;
>     sink (&s);
> 
>     unsigned n = sizeof s.a;
>     memcpy (s.a + 4, s.b, n);
> 
>     sink (&s);
>   }
> 
> Without optimization it isn't folded and so the memcpy call is
> emitted (but there is no warning).  The overlapping MEM_REF copy
> is safe but the overlapping memcpy call is not, so warning on it
> is helpful.
Understood.  I'm just trying to get a sense for how an implementation as
a separate pass would affect the quality of the diags we generate.  I
expect we'd lose some, the question is do we lose so many that the
ultimate result really isn't useful in practice and to balance that
against the various pros/cons of the different approaches.


> 
> I prototyped a pass over Thanksgiving for the -Wrestict code from
> builtins.c to and ran it just after the sprintf pass.  There are
> still lots of failures in the new test because it's non-trivial
> to compute the same data as builtins.c does (the data is also
> computed for -Wstringop-overflow).  I could move all that code
> to the new pass as well to clear up the failures.  I could also
> arrange for the pass to run multiple times to catch cases like
> the one above.  I suspect this would trigger some false negatives
> and/or positives due to the differences in range information, so
> it would mean some cleanup in the tests.  What I can't do without
> seriously compromising the feature is avoid calling into the new
> pass from tree-ssa-strlen.c, but that would presumably be fine.
I note that you posted that as a follow-up.  I'll take a look at it
momentarily.


> 
> With that said, and although I'm not necessarily opposed to it,
> moving all this code into its own pass would mean a non-trivial
> amount of work for what seems like a questionable benefit.  All
> it would achieve, as far as I can see, is duplicating some of
> the work that's already been done: iterating over the GIMPLE,
> testing for built-ins to handle, and extracting their arguments.
> What exactly do you hope to accomplish by moving it into its own
> pass?  (If it's a matter of keeping the warning code separate
> that can easily be done by moving it to its own file.)
By moving the warning into its own pass we get several benefits.

1. There is a general desire not to mix diagnostics and code
transformations.  Splitting it out is in line with that goal.

2. We get the flexibility to put the diagnostic pass wherever in the
pipeline makes the most sense.  We can even have early/late versions to
try and increase precision

3. Sometimes, but not always the analysis can be shared.  In that model
the underlying analysis engine is where the bulk of the work happens.
The optimizations and diagnostics are just clients of the analysis
module.  It also encourages re-use of the analysis module.  Oh how I
wish

Re: [PING 2][PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-12-07 Thread Jeff Law
On 11/27/2017 05:44 AM, Richard Biener wrote:
> 
> +
> +  if (const strinfo *chksi = olddsi ? olddsi : dsi)
> +if (si
> +   && !check_bounds_or_overlap (stmt, chksi->ptr, si->ptr, NULL_TREE, 
> len))
> +  /* Avoid transforming strcpy when out-of-bounds offsets or
> +overlapping access is detected.  */
> +  return;
> 
> as I said elsewhere diagnostics should not prevent optimization.  Your warning
> code isn't optimization-grade (that is, false positives are possible).
> 
> +   if (!check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE, slen))
> + /* Avoid transforming strcat when out-of-bounds offsets or
> +overlapping access is detected.  */
> + return;
> +  }
> 
> Likewise.
> 
> +  if (!check_bounds_or_overlap (stmt, dst, sptr, dstlen, srcsize))
> + /* Avoid transforming strcat when out-of-bounds offsets or
> +overlapping access is detected.  */
> +   return;
> 
> Likewise.
I'll note that if we separate diagnostics from optimization these become
a non-issue.  The diagnostic bits simply wouldn't change code, plain and
simple... :-)

> 
> I have no strong opinion against the "code duplication" Jeff mentions with
> regarding to builtin_access and friends.  The relation to ao_ref and friends
> could be documented a bit and how builtin_memref/builtin_access are
> not suitable for optimization.
At the least documentation around not using those classes driving any
code generation changes/decisions seems wise.  We have the same issue
around compute_objsize/check_memop_sizes for the stringop-overflow patch
if I understand those bits correctly (i've got messages to get back to
on that thread as well..)

JEff


Re: [PATCH] Further improvements for the (T)(P+A)-(T)(P+B) folding (PR sanitizer/81281)

2017-12-07 Thread Marc Glisse

On Thu, 7 Dec 2017, Jakub Jelinek wrote:


When committing the previous PR81281 patch, I've removed all the @@0 cases
on plus:c, used @0 instead, to make sure we don't regress.

This patch readds those where possible.  For the cases where there is
just P and A, it was mostly a matter of @@0 and convert? instead of convert
plus using type from @1 instead of @0, though if @0 is INTEGER_CST, what we
usually end up with is a (plus (convert (plus @1 @0) @2) where @2 negated
is equal to @0, so the patch adds a simplification for that too.


There may be a bit of overlap with "(A +- CST1) +- CST2 -> A + CST3" 
elsewhere in the file. Do you think there is a convenient way to 
generalize it so it also covers this case, or does it look better to keep 
them separate? (I haven't had time to study your recent patches yet, so I 
don't have an opinion)


--
Marc Glisse


Re: [PATCHv3] Add a warning for invalid function casts

2017-12-07 Thread Bernd Edlinger
On 12/06/17 23:35, Jason Merrill wrote:
> On Fri, Dec 1, 2017 at 7:42 AM, Bernd Edlinger
>  wrote:
>> this version of the patch improves the heuristic check to take the
>> target hook into account, to handle cases correctly when both or only
>> one parameter is _not_ promoted to int.
> 
> In looking at this, I discovered that the argument to
> promote_prototypes should be the function type, not the parameter
> type; the existing uses in the C++ front end were wrong.
> 

Bah, sorry.

Yes, it looks like there are at least two more target hooks that change
the type promotion rules later-on: targetm.calls.promote_function_mode
and PROMOTE_MODE.

In the ada FE we pass NULL_TREE to promote_prototypes which seems to
mean if the target wants type promotion in principle.  So it returns
true if some function may promote, and false if no function promotes.
At least this is how the sh-target handles this parameter.
This is BTW the only target that uses the argument of this callback.

So I would think for the purpose of this warning the following check
should be sufficient:

   if (INTEGRAL_TYPE_P (t1)
   && INTEGRAL_TYPE_P (t2)
   && TYPE_PRECISION (t1) == TYPE_PRECISION (t2)
   && (TYPE_UNSIGNED (t1) == TYPE_UNSIGNED (t2)
   || !targetm.calls.promote_prototypes (NULL_TREE)
   || TYPE_PRECISION (t1) >= TYPE_PRECISION (integer_type_node)))


What do you think?
Is the patch OK with this change?


Thanks
Bernd.


Re: [PR80693] drop value of parallel SETs dropped by combine

2017-12-07 Thread Alexandre Oliva
On Jul  7, 2017, Segher Boessenkool  wrote:

> I meant, just double check if your new
> code does the correct thing for the set count.

Sorry this took me so long to get back to.  Even this was difficult for
me to answer for sure, then and now.

We don't (and can't?) know whether the REG_UNUSED note originally
pertained to a clobber or a set, but I think that doesn't matter: unless
we've reused the REG in i2 as a scratch, I think we should decrement the
set count, because we used to have a SET or a CLOBBER that is now gone.

Looking back at the issue, I realized we should keep/add the REG_UNUSED
note to i2, if the reg is mentioned in i2, possibly turned into REG_DEAD
if i2 is referenced but not set.

Still, I'm concerned I haven't caught all of the cases in which
adjusting REG_N_SETS would be needed: we might have dropped multiple
SETs of the same pseudo, each with its own REG_UNUSED note (say, from
all of i3, i2, i1, and i0), and the current logic will only decrement
REG_N_SETS once, and only if i2 no longer sets the register.

I'm also concerned that the logic for when the REG is set or referenced
in i3 is incomplete: references in i3 might have come from any of the
previous insns, even if intervening sets of the same register were
substituted and thus removed.  Consider the following nightmarish scenario:

i0: (parallel [(set (reg CC) (op1 (reg A)))
   (set (reg A) (plus (reg A) (const_int 1)))])
(REG_UNUSED (reg A))
i1: (set (reg A) (ne (reg CC) (const_int 0)))
(REG_DEAD (reg CC))
i2: (parallel [(set (reg CC) (op2 (reg A)))
   (set (reg A) (plus (reg A) (const_int 1)))])
(REG_UNUSED (reg A)))
i3: (set (reg A) (eq (reg CC) (const_int 0)))
(REG_DEAD (reg CC))

we might turn that into say:

i2: (set (reg CC) (op3 (reg A)))
(REG_DEAD (reg A))
i3: (set (reg A) (op4 (reg CC)))
(REG_DEAD (reg CC))

and now we'd have to somehow figure out that we're to discount the two
unused sets of reg A, those from i0 and i2, and to turn either
REG_UNUSED note about reg A into a REG_DEAD note to be placed at i2.  A
is set at i3, so combine should record its new value, but if it's
computed in terms of the scratch CC and a much-older A, will we get the
correct value?  Or is the value unchanged because it's the output of the
latest insn?

Now, consider this slightly simpler scenario (trying to combine only the
first 3 insns):

i0: nil
i1: (parallel [(set (reg CC) (op1 (reg A)))
   (set (reg A) (plus (reg A) (const_int 1)))])
(REG_UNUSED (reg A))
i2: (set (reg A) (ne (reg CC) (const_int 0)))
(REG_DEAD (reg CC))
i3: (parallel [(set (reg CC) (op2 (reg A)))
   (set (reg A) (plus (reg A) (const_int 1)))])
(REG_UNUSED (reg A)))

this might combine into:

i2: (set (reg A) (op5 (reg A)))
i3: (set (reg CC) (op6 (reg A)))
(REG_DEAD (reg A))

and now we have removed 3 sets to A, but added 1 by splitting within
combine using A as scratch.  Would we then have to figure out that for
each of the REG_UNUSED notes pertaining to A we have to drop the
REG_N_SETS count by 1, although A remains used in i3, and set and used
in i2?  I don't see how.

I see that combine would record the value for reg A at i2 in this case,
but would it express it in terms of which earlier value of reg A?
Shouldn't we have reset it while placing notes in this case too?

> It wasn't obvious to me (this code is horribly complicated).  Whether
> all existing code is correct...  it's probably best not to look too
> closely :-/

You're right about its being horribly complicated.

Maybe I should go about it incrementally.

> If you have a patch you feel confident in, could you post it again
> please?

So let me tell you how I feel about this.  It has waited long enough,
and there are at least 3 bugs known to be fixed by the first very simple
patch below.  The catch is that it doesn't adjust REG_N_SETS at all (we
didn't before the patch, and that didn't seem to hurt too much).  I've
regstrapped this successfully on x86_64-linux-gnu and i686-linux-gnu.

--->cut<---
When combine drops a REG_UNUSED SET in a parallel, we have to clear
cached values, so that, even if the REGs remain used (e.g. because
they were referenced in the used SET_SRC), we will not use properties
of the dropped modified value as if they applied to the preserved
original one.

We fail to adjust REG_N_SETS.

for  gcc/ChangeLog

PR rtl-optimization/80693
PR rtl-optimization/81019
PR rtl-optimization/81020
* combine.c (distribute_notes): Reset any REG_UNUSED REGs that
are not mentioned in i3.  Place the REG_UNUSED note on i2,
possibly modified to REG_DEAD, if it did not originate in i3.

for  gcc/testsuite/ChangeLog

PR rtl-optimization/80693
PR rtl-optimization/81019
PR rtl-optimization/81020
* gcc.dg/pr80693.c: New.
* gcc.dg/pr81019.c: New.
---
 gcc/combine.c  |   40 
 gcc/testsuit

Re: [PR59319] output friends in debug info

2017-12-07 Thread Alexandre Oliva
On Apr  7, 2017, Alexandre Oliva  wrote:

> On Mar 21, 2017, Alexandre Oliva  wrote:
>> On Jan 27, 2017, Alexandre Oliva  wrote:
>>> On Oct 19, 2016, Alexandre Oliva  wrote:
 On Sep 23, 2016, Alexandre Oliva  wrote:
> On Aug 30, 2016, Alexandre Oliva  wrote:
>> Handling non-template friends is kind of easy, [...]
> Ping?
 Ping?  (conflicts resolved, patch refreshed and retested)
>>> Ping?  (trivial conflicts resolved)
>> Ping?  https://gcc.gnu.org/ml/gcc-patches/2017-01/msg02112.html
> Ping?
Ping? (refreshed, retested)

[PR59319] output friends in debug info

Handling non-template friends is kind of easy, but it required a bit
of infrastructure in dwarf2out to avoid (i) forcing debug info for
unused types or functions: DW_TAG_friend DIEs are only emitted if
their DW_AT_friend DIE is emitted, and (ii) creating DIEs for such
types or functions just to have them discarded at the end.  To this
end, I introduced a list (vec, actually) of types with friends,
processed at the end of the translation unit, and a list of
DW_TAG_friend DIEs that, when we're pruning unused types, reference
DIEs that are still not known to be used, revisited after we finish
deciding all other DIEs, so that we prune DIEs that would have
referenced pruned types or functions.

Handling template friends turned out to be trickier: there's no
representation in DWARF for templates.  I decided to give debuggers as
much information as possible, enumerating all specializations of
friend templates and outputting DW_TAG_friend DIEs referencing them as
well.  I considered marking those as DW_AT_artificial, to indicate
they're not explicitly stated in the source code, but in the end we
decided that was not useful.  The greatest challenge was to enumerate
all specializations of a template.  It looked trivial at first, given
DECL_TEMPLATE_INSTANTIATIONS, but it won't list specializations of
class-scoped functions and of nested templates.  For other templates,
I ended up writing code to look for specializations in the hashtables
of decl or type specializations.  That's not exactly efficient, but it
gets the job done.


for gcc/ChangeLog

PR debug/59319
* dwarf2out.c (class_types_with_friends): New.
(gen_friend_tags_for_type, gen_friend_tags): New.
(gen_member_die): Record class types with friends.
(deferred_marks): New.
(prune_unused_types_defer_undecided_mark_p): New.
(prune_unused_types_defer_mark): New.
(prune_unused_types_deferred_walk): New.
(prune_unused_types_walk): Defer DW_TAG_friend.
(prune_unused_types): Check deferred marks is empty on entry,
empty it after processing.
(dwarf2out_finish): Generate friend tags.
(dwarf2out_early_finish): Likewise.
* langhooks-def.h (LANG_HOOKS_GET_FRIENDS): New.
(LANG_HOOKS_FOR_TYPES_INITIALIZER): Add it.
* langhooks.h (lang_hooks_for_types): Add get_friends.
* hooks.c (hook_tree_const_tree_int_null): New.
* hooks.h (hook_tree_const_tree_int_null): Declare.

for gcc/cp/ChangeLog

PR debug/59319
* cp-objcp-common.c (cp_get_friends): New.
* cp-objcp-common.h (cp_get_friends): Declare.
(LANG_HOOKS_GET_FRIENDS): Override.
* cp-tree.h (enumerate_friend_specializations): Declare.
* pt.c (optimize_friend_specialization_lookup_p): New.
(retrieve_friend_specialization): New.
(enumerate_friend_specializations): New.
(register_specialization): Update DECL_TEMPLATE_INSTANTIATIONS
for functions, even after definition, if we are emitting debug
info.

for gcc/testsuite/ChangeLog

PR debug/59319
* g++.dg/debug/dwarf2/friend-1.C: New.
* g++.dg/debug/dwarf2/friend-2.C: New.
* g++.dg/debug/dwarf2/friend-3.C: New.
* g++.dg/debug/dwarf2/friend-4.C: New.
* g++.dg/debug/dwarf2/friend-5.C: New.
* g++.dg/debug/dwarf2/friend-6.C: New.
* g++.dg/debug/dwarf2/friend-7.C: New.
* g++.dg/debug/dwarf2/friend-8.C: New.
* g++.dg/debug/dwarf2/friend-9.C: New.
* g++.dg/debug/dwarf2/friend-10.C: New.
* g++.dg/debug/dwarf2/friend-11.C: New.
* g++.dg/debug/dwarf2/friend-12.C: New.
* g++.dg/debug/dwarf2/friend-13.C: New.
* g++.dg/debug/dwarf2/friend-14.C: New.
* g++.dg/debug/dwarf2/friend-15.C: New.
* g++.dg/debug/dwarf2/friend-16.C: New.
* g++.dg/debug/dwarf2/friend-17.C: New.
* g++.dg/debug/dwarf2/friend-18.C: New.
---
 gcc/cp/cp-objcp-common.c  |  106 ++
 gcc/cp/cp-objcp-common.h  |4 +
 gcc/cp/cp-tree.h  |1 
 gcc/cp/pt.c   |  195 +
 gcc/dwarf2out.c   |  165 +
 gcc/hooks.c   |7 +
 gcc/hooks.h   |

Re: [PATCH] handle non-constant offsets in -Wstringop-overflow (PR 77608)

2017-12-07 Thread Jeff Law
On 12/01/2017 11:06 AM, Martin Sebor wrote:
> On 12/01/2017 01:26 AM, Jeff Law wrote:
>> On 11/30/2017 01:30 PM, Martin Sebor wrote:
>>> On 11/22/2017 05:03 PM, Jeff Law wrote:
 On 11/21/2017 12:07 PM, Martin Sebor wrote:
> On 11/21/2017 09:55 AM, Jeff Law wrote:
>> On 11/19/2017 04:28 PM, Martin Sebor wrote:
>>> On 11/18/2017 12:53 AM, Jeff Law wrote:
 On 11/17/2017 12:36 PM, Martin Sebor wrote:
[ Lots of snipping ]
>>
>> In fact, if I look at how we handle expand_builtin_mempcpy we have:
>>
>>  /* Avoid expanding mempcpy into memcpy when the call is determined
>>  to overflow the buffer.  This also prevents the same overflow
>>  from being diagnosed again when expanding memcpy.  */
>>   if (!check_memop_sizes (exp, dest, src, len))
>>     return NULL_RTX;
>>
>> While that's not strictly meant to be an optimization, it is in effect
>> changing the code we generate based on the return value of
>> check_memop_sizes, which comes from compute_objsize.  Thankfully, the
>> worst that happens here is we'll fail to turn the mempcpy into a memcpy
>> if compute_objsize/check_memop_sizes returns a value that is not
>> strictly correct.  But I think it highlights how easy it is to end up
>> having code generation changing based on the results of compute_objsize.
> 
> I think you have misread the code (which is easy to do), so I'm
> not sure it does highlight it.
Just to be clear, I'm convinced the code should DTRT as-written and that
it's  behavior is not changed by your patch.

My point is that these routines should not generally be used to
influence code generation/optimization decisions.  In an ideal world we
could express and enforce that rule.  But we don't live in that ideal
world and as a result it's relatively easy to use those routines to
influence code generation decisions without even knowing it.


So again, anytime we have something like what we see above where we call
check_memop_sizes/compute_objsize and the result is used to select
between paths that change code generation/optimization we need a
comment.  We also need a comment in compute_objsize to discourage its
use in any contexts where the result affects code generation/optimization.

Again, the code is safe as-is and not affected by your patch.  I'm just
asking for a comment for those functions and at any site where those
functions are used to influence code generation/optimization decisions.


[ More snipping..  Hopefully not losing too much context.. ]
>> SO would you rather go with baking the inexact nature of the return
>> value into the API or the pair of comments noted above?
> 
> I added the comment to compute_objsize in the last iteration
> of the patch.
> 
> As I explained above, expand_builtin_mempcpy isn't affected by
> this patch.  The comments above and within in the function
> already explain what what happens when a buffer overflow is
> detected and why so I'm not sure what else to say there.  If
> I misunderstood your suggestion please clarify.
The comment I'm looking for in expand_builtin_mempcpy would be something
like this:

/* Policy does not generally allow using compute_objsize (which
   is used internally by check_memop_size) to change code generation
   or drive optimization decisions.

   In this instance it is safe because the code we generate has
   the same semantics regardless of the return value of
   check_memop_sizes.   Exactly the same amount of data is copied
   and the return value is exactly the same in both cases.

   Furthermore, check_memop_size always uses mode 0 for the call to
   compute_objsize, so the imprecise nature of compute_objsize is
   avoided.  */



> 
> The other change you are suggesting means restoring the false
> negatives in Object Size modes 2 and 3 where my patch detected
> true positives.  Here's an example to demonstrate the effect.
> With my original patch, the overflow in both functions below
> is diagnosed with all -Wstringop-overflow=N arguments.  With
> the change you asked for, only the memcpy overflow is diagnosed
> with all arguments.  The strncpy overflow is only diagnosed
> with N=1 and 2, and suppressed with N=3 and N=4.  That's clearly
> a worse outcome, but in the interest of moving forward I've made
> the change.  I think the overall improvement is worthwhile despite
> this flaw.
So given the comments about restrictions in how compute_objsize is used,
I don't think we need to make the change I originally asked for.  Go
with whatever you think is best WRT handling of that bound computation.

Either computation of that bound has a degree of imprecision due to
range involvement and makes the result unsuitable for influencing code
generation or optimization.

In the compute_objsize comment I'd use

"The function is intended for diagnostics and should not be used to
influence code generation or optimization."

I think that's slightly better.


OK with either computation the updated comment for compute_objsize.
Please add a comment to the mempcpy 

Re: [PATCH, GCC/testsuite] Improve fstack_protector effective target

2017-12-07 Thread Jeff Law
On 11/30/2017 09:40 AM, Thomas Preudhomme wrote:
> Hi,
> 
> Effective target fstack_protector fails to return an error for
> newlib-based target (such as arm-none-eabi targets) which does not
> support stack protector. This is due to the test being too simplist for
> stack protection code to be generated by GCC: it does not contain a
> local buffer and does not read unknown input.
> 
> This commit adds a small local buffer with a copy of the filename to
> trigger stack protector code to be generated. The filename is used
> instead of the full path so as to ensure the size will fit in the local
> buffer.
> 
> ChangeLog entry is as follows:
> 
> *** gcc/testsuite/ChangeLog ***
> 
> 2017-11-28  Thomas Preud'homme  
> 
> * lib/target-supports.exp (check_effective_target_fstack_protector):
> Copy filename in local buffer to trigger stack protection.
> 
> Testing: Ran gcc.dg/pr38616 on arm-none-eabi and arm-linux-gnueabihf,
> the former is now UNSUPPORTED while the latter continues to PASS.
> 
> Is this ok for stage3?
OK.
jeff


Re: [PING 2][PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-12-07 Thread Jeff Law
On 11/29/2017 04:36 PM, Martin Sebor wrote:
> I've finished reimplementing the patch as a standalone pass.
> In the attached revision I also addressed your comments below
> as well as Richard's to allowing the strlen optimizations even
> for overlapping accesses.
> 
> While beefing up the tests I found a few minor issues that
> I also fixed (false negatives).
> 
> The fallout wasn't quite as bad as I thought, mainly thanks
> to the narrow API for the checker.
So still reading though this, but wanted to start with a question I hope
you can answer quickly.

In terms of coverage -- did we lose much in terms of cases that were
diagnosed in the original version, but aren't in this version?

jeff


Re: [PING 2][PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-12-07 Thread Martin Sebor

On 12/07/2017 02:14 PM, Jeff Law wrote:

On 11/29/2017 04:36 PM, Martin Sebor wrote:

I've finished reimplementing the patch as a standalone pass.
In the attached revision I also addressed your comments below
as well as Richard's to allowing the strlen optimizations even
for overlapping accesses.

While beefing up the tests I found a few minor issues that
I also fixed (false negatives).

The fallout wasn't quite as bad as I thought, mainly thanks
to the narrow API for the checker.

So still reading though this, but wanted to start with a question I hope
you can answer quickly.

In terms of coverage -- did we lose much in terms of cases that were
diagnosed in the original version, but aren't in this version?


I'm quite pleased to say that with the pass in the right place
(after vrp) the coverage is the same.

Martin


Re: [PING 2][PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-12-07 Thread Jeff Law
On 12/07/2017 02:28 PM, Martin Sebor wrote:
> On 12/07/2017 02:14 PM, Jeff Law wrote:
>> On 11/29/2017 04:36 PM, Martin Sebor wrote:
>>> I've finished reimplementing the patch as a standalone pass.
>>> In the attached revision I also addressed your comments below
>>> as well as Richard's to allowing the strlen optimizations even
>>> for overlapping accesses.
>>>
>>> While beefing up the tests I found a few minor issues that
>>> I also fixed (false negatives).
>>>
>>> The fallout wasn't quite as bad as I thought, mainly thanks
>>> to the narrow API for the checker.
>> So still reading though this, but wanted to start with a question I hope
>> you can answer quickly.
>>
>> In terms of coverage -- did we lose much in terms of cases that were
>> diagnosed in the original version, but aren't in this version?
> 
> I'm quite pleased to say that with the pass in the right place
> (after vrp) the coverage is the same.
That's awesome.
Jeff


Re: [PING 2][PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-12-07 Thread Jeff Law
On 11/29/2017 04:36 PM, Martin Sebor wrote:
> I've finished reimplementing the patch as a standalone pass.
> In the attached revision I also addressed your comments below
> as well as Richard's to allowing the strlen optimizations even
> for overlapping accesses.
> 
> While beefing up the tests I found a few minor issues that
> I also fixed (false negatives).
> 
> The fallout wasn't quite as bad as I thought, mainly thanks
> to the narrow API for the checker.
> 
> Syncing up with the latest trunk has led to some more changes
> in tree-ssa-strlen.
> 
> I've retested the patch with GDB and Glibc with the same results
> as before.
> 
> The patch seems sizable (over 3KLOC without tests) but it's worth
> noting that most of the complexity is actually not in determining
> whether or not an overlap exists (that's quite simple) but rather
> in computing its offset and size to mention in the warnings and
> making sure the information is meaningful to the user even when
> ranges are involved.  All the subtly different forms of warnings
> also contribute substantially to the overall size.
> 
> Martin
[ Huge snip. ]

> 
> gcc-78918.diff
> 
> 
> PR tree-optimization/78918 - missing -Wrestrict on memcpy copying over self
> 
> gcc/c-family/ChangeLog:
> 
>   PR tree-optimization/78918
>   * c-common.c (check_function_restrict): Avoid checking built-ins.
>   * c.opt (-Wrestrict): Include in -Wall.
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/78918
>   * Makefile.in (OBJS): Add gimple-ssa-warn-restrict.o.
>   * builtins.c (check_sizes): Rename...
>   (check_access): ...to this.  Rename function arguments for clarity.
>   (check_memop_sizes): Adjust names.
>   (expand_builtin_memchr, expand_builtin_memcpy): Same.
>   (expand_builtin_memmove, expand_builtin_mempcpy): Same.
>   (expand_builtin_strcat, expand_builtin_stpncpy): Same.
>   (check_strncat_sizes, expand_builtin_strncat): Same.
>   (expand_builtin_strncpy, expand_builtin_memset): Same.
>   (expand_builtin_bzero, expand_builtin_memcmp): Same.
>   (expand_builtin_memory_chk, maybe_emit_chk_warning): Same.
>   (maybe_emit_sprintf_chk_warning): Same.
>   (expand_builtin_strcpy): Adjust.
>   (expand_builtin_stpcpy): Same.
>   (expand_builtin_with_bounds): Detect out-of-bounds accesses
>   in pointer-checking forms of memcpy, memmove, and mempcpy.
>   (gcall_to_tree_minimal, max_object_size): Define new functions.
>   * builtins.h (max_object_size): Declare.
>   * calls.c (alloc_max_size): Call max_object_size instead of
>   hardcoding ssizetype limit.
>   (get_size_range): Handle new argument.
>   * calls.h (get_size_range): Add a new argument.
>   * cfgexpand.c (expand_call_stmt): Propagate no-warning bit.
>   * doc/invoke.texi (-Wrestrict): Adjust, add example.
>   * gimple-fold.c (gimple_fold_builtin_memory_op): Detect overlapping
>   operations.
>   (gimple_fold_builtin_memory_chk): Same.
>   (gimple_fold_builtin_stxcpy_chk): New function.
>   * gimple-ssa-warn-restrict.c: New source.
>   * gimple-ssa-warn-restrict.h: New header.
>   * gimple.c (gimple_build_call_from_tree): Propagate location.
>   * passes.def (pass_warn_restrict): Add new pass.
>   * tree-pass.h (make_pass_warn_restrict): Declare.
>   * tree-ssa-strlen.c (handle_builtin_strcpy): Detect overlapping
>   operations.
>   (handle_builtin_strcat): Same.
>   (strlen_optimize_stmt): Rename...
>   (strlen_check_and_optimize_stmt): ...to this.  Handle strncat,
>   stpncpy, strncpy, and their checking forms.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/78918
>   * c-c++-common/Warray-bounds.c: New test.
>   * c-c++-common/Warray-bounds-2.c: New test.
>   * c-c++-common/Warray-bounds-3.c: New test.
>   * c-c++-common/Wrestrict-2.c: New test.
>   * c-c++-common/Wrestrict.c: New test.
>   * c-c++-common/Wrestrict.s: New test.
>   * c-c++-common/Wsizeof-pointer-memaccess1.c: Adjust
>   * c-c++-common/Wsizeof-pointer-memaccess2.c: Same.
>   * g++.dg/torture/Wsizeof-pointer-memaccess1.C: Same.
>   * g++.dg/torture/Wsizeof-pointer-memaccess2.C: Same.
>   * gcc.dg/memcpy-6.c: New test.
>   * gcc.dg/pr69172.c: Adjust.
>   * gcc.dg/pr79223.c: Same.
>   * gcc.dg/Wrestrict-2.c: New test.
>   * gcc.dg/Wrestrict.c: New test.
>   * gcc.dg/Wsizeof-pointer-memaccess1.c
>   * gcc.target/i386/chkp-stropt-17.c: New test.
>   * gcc.dg/torture/Wsizeof-pointer-memaccess1.c: Adjust.
> 
> @@ -3874,32 +3885,32 @@ check_strncat_sizes (tree exp, tree objsize)
>   size_one_node)
>: NULL_TREE);
>  
> -  /* Strncat copies at most MAXLEN bytes and always appends the terminating
> +  /* Strncat copies at most MAXREAD bytes and always appends the terminating
Nit.  Use "strncat" rather than "Strncat", even when starting a
sentence.  I saw thi

Re: [SFN+LVU+IEPM v4 1/9] [SFN] adjust RTL insn-walking API

2017-12-07 Thread Jeff Law
On 11/09/2017 07:34 PM, Alexandre Oliva wrote:
> This patch removes unused RTL functions, introduces alternate ones for
> use in a later SFN patch, and regroups other related functions so that
> they appear in a more consistent order.
> 
> for  gcc/ChangeLog
> 
>   * emit-rtl.c (next_nondebug_insn, prev_nondebug_insn): Reorder.
>   (next_nonnote_nondebug_insn, prev_nonnote_nondebug_insn): Reorder.
>   (next_nonnote_nondebug_insn_bb): New.
>   (prev_nonnote_nondebug_insn_bb): New.
>   (prev_nonnote_insn_bb, next_nonnote_insn_bb): Remove.
>   * rtl.h (prev_nonnote_insn_bb, next_nonnote_insn_bb): Remove decls.
>   (prev_nonnote_nondebug_insn_bb): Declare.
>   (next_nonnote_nondebug_insn_bb): Declare.
>   * cfgbuild.c (find_bb_boundaries): Adjust to skip debug insns.
>   * cfgrtl.c (get_last_bb_insn): Likewise.
>   * lra.c (push_insns): Likewise.
OK.  Seems like this ought to go in immediately rather than waiting on
the full kit to be ack'd.

jeff


Re: [SFN+LVU+IEPM v4 2/9] [SFN] boilerplate changes in preparation to introduce nonbind markers

2017-12-07 Thread Jeff Law
On 11/09/2017 07:34 PM, Alexandre Oliva wrote:
> This patch introduces a number of new macros and functions that will
> be used to distinguish between different kinds of debug stmts, insns
> and notes, namely, preexisting debug bind ones and to-be-introduced
> nonbind markers.
> 
> In a seemingly mechanical way, it adjusts several uses of the macros
> and functions, so that they refer to narrower categories when
> appropriate.
> 
> These changes, by themselves, should not have any visible effect in
> the compiler behavior, since the upcoming debug markers are never
> created with this patch alone.
> 
> for  gcc/ChangeLog
> 
>   * gimple.h (enum gimple_debug_subcode): Add
>   GIMPLE_DEBUG_BEGIN_STMT.
>   (gimple_debug_begin_stmt_p): New.
>   (gimple_debug_nonbind_marker_p): New.
>   * tree.h (MAY_HAVE_DEBUG_MARKER_STMTS): New.
>   (MAY_HAVE_DEBUG_BIND_STMTS): Renamed from
>   (MAY_HAVE_DEBUG_STMTS): ... this.  Check both.
>   * insn-notes.def (BEGIN_STMT): New.
>   * rtl.h (MAY_HAVE_DEBUG_MARKER_INSNS): New.
>   (MAY_HAVE_DEBUG_BIND_INSNS): Renamed from
>   (MAY_HAVE_DEBUG_INSNS): ... this.  Check both.
>   (NOTE_MARKER_LOCATION, NOTE_MARKER_P): New.
>   (DEBUG_BIND_INSN_P, DEBUG_MARKER_INSN_P): New.
>   (INSN_DEBUG_MARKER_KIND): New.
>   (GEN_RTX_DEBUG_MARKER_BEGIN_STMT_PAT): New.
>   (INSN_VAR_LOCATION): Check for VAR_LOCATION.
>   (INSN_VAR_LOCATION_PTR): New.
>   * cfgexpand.c (expand_debug_locations): Handle debug bind insns
>   only.
>   (expand_gimple_basic_block): Likewise.  Emit debug temps for TER
>   deps only if debug bind insns are enabled.
>   (pass_expand::execute): Avoid deep TER and expand
>   debug locations for debug bind insns only.
>   * cgraph.c (cgraph_edge::redirect_call_stmt_to_callee): Narrow
>   debug stmts special handling down to debug bind stmts.
>   * combine.c (try_combine): Narrow debug insns special handling
>   down to debug bind insns.
>   * cse.c (delete_trivially_dead_insns): Handle debug bindings.
>   Narrow debug insns preexisting special handling down to debug
>   bind insns.
>   * dce.c (rest_of_handle_ud_dce): Narrow debug insns special
>   handling down to debug bind insns.
>   * function.c (instantiate_virtual_regs): Skip debug markers,
>   adjust handling of debug binds.
>   * gimple-ssa-backprop.c (backprop::prepare_change): Try debug
>   temp insertion iff MAY_HAVE_DEBUG_BIND_STMTS.
>   * haifa-sched.c (schedule_insn): Narrow special handling of debug
>   insns to debug bind insns.
>   * ipa-param-manipulation.c (ipa_modify_call_arguments): Narrow
>   special handling of debug stmts to debug bind stmts.
>   * ipa-split.c (split_function): Likewise.
>   * ira.c (combine_and_move_insns): Adjust debug bind insns only.
>   * loop-unroll.c (apply_opt_in_copies): Adjust tests on bind
>   debug insns.
>   * reg-stack.c (convert_regs_1): Use DEBUG_BIND_INSN_P.
>   * regrename.c (build_def_use): Likewise.
>   * regcprop.c (copyprop_hardreg_forward_1): Likewise.
>   (pass_cprop_hardreg): Narrow special casing of debug insns to
>   debug bind insns.
>   * regstat.c (regstat_init_n_sets_and_refs): Likewise.
>   * reload1.c (reload): Likewise.
>   * sese.c (sese_insert_phis_for_liveouts): Narrow special
>   casing of debug stmts to debug bind stmts.
>   * shrink-wrap.c (move_insn_for_shrink_wrap): Likewise.
>   * ssa-iterators.h (num_imm_uses): Likewise.
>   * tree-cfg.c (gimple_merge_blocks): Narrow special casing of
>   debug stmts to debug bind stmts.
>   * tree-inline.c (tree_function_versioning): Narrow special casing
>   of debug stmts to debug bind stmts.
>   * tree-loop-distribution.c (generate_loops_for_partition):
>   Narrow special casing of debug stmts to debug bind stmts.
>   * tree-sra.c (analyze_access_subtree): Narrow special casing
>   of debug stmts to debug bind stmts.
>   * tree-ssa-dce.c (remove_dead_stmt): Narrow special casing of debug
>   stmts to debug bind stmts.
>   * tree-ssa-loop-ivopt.c (remove_unused_ivs): Narrow special
>   casing of debug stmts to debug bind stmts.
>   * tree-ssa-reassoc.c (reassoc_remove_stmt): Likewise.
>   * tree-ssa-tail-merge.c (tail_merge_optimize): Narrow special
>   casing of debug stmts to debug bind stmts.
>   * tree-ssa-threadedge.c (propagate_threaded_block_debug_info):
>   Likewise.
>   * tree-ssa.c (flush_pending_stmts): Narrow special casing of
>   debug stmts to debug bind stmts.
>   (gimple_replace_ssa_lhs): Likewise.
>   (insert_debug_temp_for_var_def): Likewise.
>   (insert_debug_temps_for_defs): Likewise.
>   (reset_debug_uses): Likewise.
>   * tree-ssanames.c (release_ssa_name_fn): Likewise.
>   * tree-vect-loop-manip.c (adjust_debug_stmts_now): Likewise.
>   (adjust_debug_stmt

Re: [001/nnn] poly_int: add poly-int.h

2017-12-07 Thread Richard Sandiford
Jeff Law  writes:
> On 12/07/2017 07:46 AM, Richard Biener wrote:
>> On Wed, Dec 6, 2017 at 9:11 PM, Jeff Law  wrote:
>>> On 11/13/2017 05:04 PM, Richard Sandiford wrote:
 Richard Sandiford  writes:
> Richard Sandiford  writes:
>> This patch adds a new "poly_int" class to represent polynomial integers
>> of the form:
>>
>>   C0 + C1*X1 + C2*X2 ... + Cn*Xn
>>
>> It also adds poly_int-based typedefs for offsets and sizes of various
>> precisions.  In these typedefs, the Ci coefficients are compile-time
>> constants and the Xi indeterminates are run-time invariants.  The number
>> of coefficients is controlled by the target and is initially 1 for all
>> ports.
>>
>> Most routines can handle general coefficient counts, but for now a few
>> are specific to one or two coefficients.  Support for other coefficient
>> counts can be added when needed.
>>
>> The patch also adds a new macro, IN_TARGET_CODE, that can be
>> set to indicate that a TU contains target-specific rather than
>> target-independent code.  When this macro is set and the number of
>> coefficients is 1, the poly-int.h classes define a conversion operator
>> to a constant.  This allows most existing target code to work without
>> modification.  The main exceptions are:
>>
>> - values passed through ..., which need an explicit conversion to a
>>   constant
>>
>> - ?: expression in which one arm ends up being a polynomial and the
>>   other remains a constant.  In these cases it would be valid to convert
>>   the constant to a polynomial and the polynomial to a constant, so a
>>   cast is needed to break the ambiguity.
>>
>> The patch also adds a new target hook to return the estimated
>> value of a polynomial for costing purposes.
>>
>> The patch also adds operator<< on wide_ints (it was already defined
>> for offset_int and widest_int).  I think this was originally excluded
>> because >> is ambiguous for wide_int, but << is useful for converting
>> bytes to bits, etc., so is worth defining on its own.  The patch also
>> adds operator% and operator/ for offset_int and widest_int, since those
>> types are always signed.  These changes allow the poly_int interface to
>> be more predictable.
>>
>> I'd originally tried adding the tests as selftests, but that ended up
>> bloating cc1 by at least a third.  It also took a while to build them
>> at -O2.  The patch therefore uses plugin tests instead, where we can
>> force the tests to be built at -O0.  They still run in negligible time
>> when built that way.
>
> Changes in v2:
>
> - Drop the controversial known_zero etc. wrapper functions.
> - Fix the operator<<= bug that Martin found.
> - Switch from "t" to "type" in SFINAE classes (requested by Martin).
>
> Not changed in v2:
>
> - Default constructors are still empty.  I agree it makes sense to use
>   "= default" when we switch to C++11, but it would be dangerous for
>   that to make "poly_int64 x;" less defined than it is now.

 After talking about this a bit more internally, it was obvious that
 the choice of "must" and "may" for the predicate names was a common
 sticking point.  The idea was to match the names of alias predicates,
 but given my track record with names ("too_empty_p" being a recently
 questioned example :-)), I'd be happy to rename them to something else.
 Some alternatives we came up with were:
>>> I didn't find the must vs may naming problematical as I was going
>>> through the changes.  What I did find much more difficult was
>>> determining if the behavior was correct when we used a "may" predicate.
>>> It really relies a good deal on knowing the surrounding code.
>>>
>>> In places where I knew the code reasonably well could tell without much
>>> surrounding context.  In other places I had to look at the code and
>>> deduce proper behavior in the "may" cases -- and often I resorted to
>>> spot checking and relying on your reputation & testing to DTRT.
>>>
>>>

 - known_eq / maybe_eq / known_lt / maybe_lt etc.

   Some functions already use "known" and "maybe", so this would arguably
   be more consistent than using "must" and "may".

 - always_eq / sometimes_eq / always_lt / sometimes_lt

   Similar to the previous one in intent.  It's just a question of which
   wordng is clearer.

 - forall_eq / exists_eq / forall_lt / exists_lt etc.

   Matches the usual logic quantifiers.  This seems quite appealing,
   as long as it's obvious that in:

 forall_eq (v0, v1)

   v0 and v1 themselves are already bound: if vi == ai + bi*X then
   what we really saying is:

 forall X, a0 + b0*X == a1 + b1*X

 Which of those sounds best?  Any other suggestions?
>>> I can live with an

Re: [SFN+LVU+IEPM v4 3/9] [SFN] not-quite-boilerplate changes in preparation to introduce nonbind markers

2017-12-07 Thread Jeff Law
On 11/09/2017 07:34 PM, Alexandre Oliva wrote:
> This patch adjusts numerous parts of the compiler that would
> malfunction should they find debug markers at points where they may be
> introduced.  The changes purport to allow the compiler to pass
> bootstrap-debug-lean (-fcompare-debug in stage3) at various
> optimization levels, as well as bootstrap-debug-lib (-fcompare-debug
> for target libraries), even after the compiler is changed so that
> debug markers are introduced in code streams at spots where earlier
> debug stmts, insns and notes wouldn't normally appear.
> 
> This patch depends on an earlier SFN boilerplate patch, and on another
> SFN patch that introduces new RTL insn-walking functions.
> 
> for  gcc/ChangeLog
> 
>   * cfgcleanup.c (delete_unreachable_blocks): Use alternate
>   block removal order if MAY_HAVE_DEBUG_BIND_INSNS.
>   * cfgexpand.c (label_rtx_for_bb): Skip debug insns.
>   * cfgrtl.c (try_redirect_by_replacing_jump): Skip debug insns.
>   (rtl_tidy_fallthru_edge): Likewise.
>   (rtl_verify_fallthru): Likewise.
>   (rtl_verify_bb_layout): Likewise.
>   (skip_insns_after_block): Likewise.
>   (duplicate_insn_chain): Use DEBUG_BIND_INSN_P.
>   * dwarf2out.c: Include print-rtl.h.
>   (dwarf2out_next_real_insn): New.
>   (dwarf2out_var_location): Call it.  Disregard begin stmt markers.
>   Dump debug binds in asm comments.
>   * gimple-iterator.c (gimple_find_edge_insert_loc): Skip debug stmts.
>   * gimple-iterator.h (gsi_start_bb_nondebug): Remove; adjust
>   callers to use gsi_start_nondebug_bb instead.
>   (gsi_after_labels): Skip gimple debug stmts.
>   (gsi_start_nondebug): New.
>   * gimple-low.c (gimple_seq_may_fallthru): Take last nondebug stmt.
>   * gimple.h (gimple_seq_last_nondebug_stmt): New.
>   * gimplify.c (last_stmt_in_scope): Skip debug stmts.
>   (collect_fallthrough_labels): Likewise.
>   (should_warn_for_implicit_fallthrough): Likewise.
>   (warn_implicit_fallthrough_r): Likewise.
>   (expand_FALLTHROUGH_r): Likewise.
>   * graphite-isl-ast-to-gimple.c (gsi_insert_earliest): Adjust.
>   (graphite_copy_stmts_from_block): Skip nonbind markers.
>   * haifa-sched.c (sched_extend_bb): Skip debug insns.
>   * ipa-icf-gimple.c (func_checker::compare_bb): Adjust.
>   * jump.c (clean_barriers): Skip debug insns.
>   * omp-expand.c (expand_parallel_call): Skip debug insns.
>   (expand_cilk_for_call): Likewise.
>   (expand_task_call): Likewise.
>   (remove_exit_barrier): Likewise.
>   (expand_omp_taskreg): Likewise.
>   (expand_omp_for_init_counts): Likewise.
>   (expand_omp_for_generic): Likewise.
>   (expand_omp_for_static_nochunk): Likewise.
>   (expand_omp_for_static_chunk): Likewise.
>   (expand_cilk_for): Likewise.
>   (expand_omp_simd): Likewise.
>   (expand_omp_taskloop_for_outer): Likewise.
>   (expand_omp_taskloop_for_inner): Likewise.
>   (expand_oacc_for): Likewise.
>   (expand_omp_sections): Likewise.
>   (expand_omp_single): Likewise.
>   (expand_omp_synch): Likewise.
>   (expand_omp_atomic_load): Likewise.
>   (expand_omp_atomic_store): Likewise.
>   (expand_omp_atomic_fetch_op): Likewise.
>   (expand_omp_atomic_pipeline): Likewise.
>   (expand_omp_atomic_mutex): Likewise.
>   (expand_omp_target): Likewise.
>   (grid_expand_omp_for_loop): Likewise.
>   (grid_expand_target_grid_body): Likewise.
>   (build_omp_regions_1): Likewise.
>   * omp-low.c (check_combined_parallel): Skip debug stmts.
>   * postreload.c (fixup_debug_insns): Skip nonbind debug insns.
>   * regcprop.c (find_oldest_value_reg): Ensure REGNO is not a pseudo.
>   * sese.c (sese_trivially_empty_bb_p): Call is_gimple_debug in
>   test.
>   * tree-cfg.c (make_blobs_1): Skip debug stmts.
>   (make_edges): Likewise.
>   (cleanup_dead_labels): Likewise.
>   (gimple_can_merge_blocks_p): Likewise.
>   (stmt_starts_bb_p): Likewise.
>   (gimple_block_label): Likewise.
>   (gimple_redirect_edge_and_branch): Likewise.
>   * tree-cfgcleanup.c (remove_forwarder_block): Rearrange skipping
>   of debug stmts.
>   (execute_cleanup_cfg_post_optimizing): Dump enumerated decls with
>   TDF_SLIM.
>   * tree-pretty-print (print_declaration): Omit initializer in slim
>   dumps.
>   * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Mark begin stmt
>   markers.
>   (eliminate_unnecessary_stmts): Stabilize block removal order.
>   * tree-ssa-tail-merge.c (find_duplicate): Skip debug stmts.
>   * var-tracking.c (get_first_insn): New.
>   (vt_emit_notes): Call it.
>   (vt_initialize): Walk any insns before the first BB.
>   (delete_debug_insns): Likewise.
OK.

As I read through this my first thought is that we're way too lose with
how we find the first/last statement by not taking into account

Re: [SFN+LVU+IEPM v4 4/9] [SFN] stabilize find_bb_boundaries

2017-12-07 Thread Jeff Law
On 11/09/2017 07:34 PM, Alexandre Oliva wrote:
> If find_bb_boundaries is given a block with zero or one nondebug insn
> beside debug insns, it shouldn't purge dead edges, because without
> debug insns we wouldn't purge them at that point.  Doing so may change
> the order in which edges are processed, and ultimately lead to
> different transformations to the CFG and then to different
> optimizations.
> 
> We shouldn't, however, retain debug insns after control flow insns, so
> if we find debug insns after a single insn that happens to be a
> control flow insn, do the debug insn cleanups, but still refrain from
> purging dead edges at that point.
> 
> 
> for  gcc/ChangeLog
> 
>   * cfgbuild.c (find_bb_boundaries): Don't purge dead edges if,
>   without debug insns, we wouldn't, but clean up debug insns
>   after a control flow insn nevertheless.
OK.  Seems to me like it's independent of the rest of the work and
should go in immediately.

jeff


Re: [001/nnn] poly_int: add poly-int.h

2017-12-07 Thread Jeff Law
On 12/07/2017 03:38 PM, Richard Sandiford wrote:

>> So I think that's the final ack on this series.
> 
> Thanks to both of you, really appreciate it!
Sorry it took so long.

> 
>> Richard S. can you confirm?  I fully expect the trunk has moved some
>> and the patches will need adjustments -- consider adjustments which
>> work in a manner similar to the patches to date pre-approved.
> 
> Yeah, that's now all of the poly_int patches.  I still owe you replies
> to some of them -- I'll get to that as soon as I can.
NP.  I don't think any of the questions were all that significant.
Those which were I think you already responded to.

> 
> I'll make the name changes and propagate through the series and then
> commit this first patch.  I was thinking that for the rest it would
> make sense to commit them individually, with individual testing of
> each patch, so that it's easier to bisect.  I'll try to make sure
> I don't repeat the merge mistake in the machine-mode series.
> 
> I think it'd also make sense to divide the commits up into groups rather
> than do them all at once, since it's easier to do the individual testing
> that way.  Does that sound OK?
Your call on the best way to stage in.

jeff


Re: [SFN+LVU+IEPM v4 6/9] [SFN] Introduce -gstatement-frontiers option, enable debug markers

2017-12-07 Thread Jeff Law
On 11/09/2017 07:34 PM, Alexandre Oliva wrote:
> Introduce a command line option to enable statement frontiers, enabled
> by default in optimized builds with DWARF2+ debug information.
> 
> This patch depends on an earlier patch that completed the
> infrastructure for debug markers, and on another patch that turns -g
> into a negatable option prefix.
> 
> gcc/ChangeLog
> 
>   * common.opt (gstatement-frontiers): New, setting
>   debug_nonbind_markers_p.
>   * rtl.h (MAY_HAVE_DEBUG_MARKER_INSNS): Activate.
>   * toplev.c (process_options): Autodetect value for debug statement
>   frontiers option.
>   * tree.h (MAY_HAVE_DEBUG_MARKER_STMTS): Activate.
>   * doc/invoke.texi (gstatement-frontiers, gno-statement-frontiers): New.
OK once all prereqs are ack'd.

jeff


Re: [SFN+LVU+IEPM v4 8/9] [IEPM] Introduce debug hook for inline entry point markers

2017-12-07 Thread Jeff Law
On 11/09/2017 07:34 PM, Alexandre Oliva wrote:
> The inline_entry hook will be given a definition in a later patch.
> 
> for  gcc/ChangeLog
> 
>   * debug.h (gcc_debug_hooks): Add inline_entry.
>   * dbxout.c (dbx_debug_hooks, xcoff_debug_hooks): Likewise.
>   * debug.c (do_nothing_debug_hooks): Likewise.
>   * vmsdbgout.c (vmsdbg_debug_hooks): Likewise.
>   * dwarf2out.c (dwarf2_debug_hooks): Likewise.
>   (dwarf2_lineno_debug_hooks): Likewise.
OK.
jeff


Re: [patch] prevent .cfi_personality/.cfi_lsda on !dwarf eh configurations

2017-12-07 Thread Jeff Law
On 11/30/2017 02:18 AM, Olivier Hainque wrote:
> Hello,
> 
> Bootstrap is currently broken with Ada on x86_64-windows using SEH exceptions,
> with several stage2/stage3 object comparison failures like
> 
>   Bootstrap comparison failure!
>   gcc/ada/ali-util.o differs
>   gcc/ada/ali.o differs
>   ...
> 
> (PR 81470)
> 
> The differences come from variations between -g and -g0 on the placement of
> data attached to LDFCM labels, in particular those related to the emission of
> .cfi_personality and .cfi_lsda by dwarf2out_do_cfi_startproc.
> 
> These aren't needed for SEH anyway, so the proposed patch just prevents the
> generation of those .cfi notes if the target eh format isn't dwarf2.
> 
> We have used this for a couple of years on gcc-6, it cures the same
> bootstrap failure on gcc-7, and bootstraps + regression-tests fine
> with mainline on x86_64-linux.
> 
> OK to commit ?
> 
> 2017-11-30  Tristan Gingold  
> 
> * dwarf2out.c (dwarf2out_do_cfi_startproc): Only emit
> .cfi_personality or .cfi_lsda if the eh data format is dwarf2.
> 
OK.
jeff


Re: [PATCH 06/10] [ARC] Update legitimate constant hook.

2017-12-07 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-11-27 12:09:55 
+0100]:

> From: Claudiu Zissulescu 
> 
> Make sure we check the constants in all cases.
> 
> gcc/
> 2017-10-14  Claudiu Zissulescu  
> 
>   * config/arc/arc.c (arc_legitimate_constant_p): Always check all
>   constants.
> 
> testsuite/
> 2017-10-14  Claudiu Zissulescu  
> 
>   * gcc.target/arc/tls-1.c: New test.

Looks good.

Thanks,
Andrew


> ---
>  gcc/config/arc/arc.c |  6 --
>  gcc/testsuite/gcc.target/arc/tls-1.c | 26 ++
>  2 files changed, 26 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arc/tls-1.c
> 
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 4d7a282..42ea921 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -6185,12 +6185,6 @@ arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx 
> frame)
>  bool
>  arc_legitimate_constant_p (machine_mode mode, rtx x)
>  {
> -  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
> -return false;
> -
> -  if (!flag_pic && mode != Pmode)
> -return true;
> -
>switch (GET_CODE (x))
>  {
>  case CONST:
> diff --git a/gcc/testsuite/gcc.target/arc/tls-1.c 
> b/gcc/testsuite/gcc.target/arc/tls-1.c
> new file mode 100644
> index 000..3f7a6d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arc/tls-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target tls } */
> +/* { dg-options "-O3 -std=gnu99" } */
> +
> +/* Check if addressing the `pos` member of struct is done via tls
> +   mechanism.  */
> +
> +struct callchain_cursor {
> +  int last;
> +  long long pos;
> +} __thread a;
> +void fn1(struct callchain_cursor *p1)
> +{
> +  p1->pos++;
> +}
> +
> +extern void fn3 (void);
> +
> +void fn2(void) {
> +  struct callchain_cursor *b = &a;
> +  while (1) {
> +fn3();
> +fn1(b);
> +  }
> +}
> +/* { dg-final { scan-assembler "r25,@a@tpoff" } } */
> -- 
> 1.9.1
> 


Re: [PATCH 07/10] [ARC][FIX] Consider command line ffixed- option.

2017-12-07 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-11-27 12:09:56 
+0100]:

> From: claziss 
> 
> Track which regs are set fixed/call saved/call used from commnad line.
> Do not try to override their properties if user says otherwise.
> 
> gcc/
> 2017-06-08  Claudiu Zissulescu  
> 
>   * config/arc/arc.c (overrideregs): New variable.
>   (arc_override_options): Track fixed/call saved/call options.
>   (arc_conditional_register_usage): Check against overrideregs
>   variable whenever we change register properties.

Looks good.

Thanks,
Andrew


> ---
>  gcc/config/arc/arc.c | 60 
> +---
>  1 file changed, 48 insertions(+), 12 deletions(-)
> 
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 42ea921..0eeeb42 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -79,6 +79,9 @@ typedef struct GTY (()) _arc_jli_section
>  
>  static arc_jli_section *arc_jli_sections = NULL;
>  
> +/* Track which regs are set fixed/call saved/call used from commnad line.  */
> +HARD_REG_SET overrideregs;
> +
>  /* Maximum size of a loop.  */
>  #define ARC_MAX_LOOP_LENGTH 4095
>  
> @@ -1144,6 +1147,30 @@ arc_override_options (void)
> }
>}
>  
> +  CLEAR_HARD_REG_SET (overrideregs);
> +  if (common_deferred_options)
> +{
> +  vec v =
> + *((vec *) common_deferred_options);
> +  int reg, nregs, j;
> +
> +  FOR_EACH_VEC_ELT (v, i, opt)
> + {
> +   switch (opt->opt_index)
> + {
> + case OPT_ffixed_:
> + case OPT_fcall_used_:
> + case OPT_fcall_saved_:
> +   if ((reg = decode_reg_name_and_count (opt->arg, &nregs)) >= 0)
> + for (j = reg;  j < reg + nregs; j++)
> +   SET_HARD_REG_BIT (overrideregs, j);
> +   break;
> + default:
> +   break;
> + }
> + }
> +}
> +
>/* Set cpu flags accordingly to architecture/selected cpu.  The cpu
>   specific flags are set in arc-common.c.  The architecture forces
>   the default hardware configurations in, regardless what command
> @@ -1673,14 +1700,20 @@ arc_conditional_register_usage (void)
>/* For ARCv2 the core register set is changed.  */
>strcpy (rname29, "ilink");
>strcpy (rname30, "r30");
> -  call_used_regs[30] = 1;
> -  fixed_regs[30] = 0;
> -
> -  arc_regno_reg_class[30] = WRITABLE_CORE_REGS;
> -  SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], 30);
> -  SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], 30);
> -  SET_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], 30);
> -  SET_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], 30);
> +
> +  if (!TEST_HARD_REG_BIT (overrideregs, 30))
> + {
> +   /* No user interference.  Set the r30 to be used by the
> +  compiler.  */
> +   call_used_regs[30] = 1;
> +   fixed_regs[30] = 0;
> +
> +   arc_regno_reg_class[30] = WRITABLE_CORE_REGS;
> +   SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], 30);
> +   SET_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], 30);
> +   SET_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], 30);
> +   SET_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], 30);
> + }
> }
>  
>if (TARGET_MUL64_SET)
> @@ -1935,11 +1968,14 @@ arc_conditional_register_usage (void)
>  SET_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], 
> ACCL_REGNO);
>  SET_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], 
> ACCH_REGNO);
>  
> - /* Allow the compiler to freely use them.  */
> -fixed_regs[ACCL_REGNO] = 0;
> -fixed_regs[ACCH_REGNO] = 0;
> +/* Allow the compiler to freely use them.  */
> +if (!TEST_HARD_REG_BIT (overrideregs, ACCL_REGNO))
> +  fixed_regs[ACCL_REGNO] = 0;
> +if (!TEST_HARD_REG_BIT (overrideregs, ACCH_REGNO))
> +  fixed_regs[ACCH_REGNO] = 0;
>  
> -arc_hard_regno_modes[ACC_REG_FIRST] = D_MODES;
> +if (!fixed_regs[ACCH_REGNO] && !fixed_regs[ACCL_REGNO])
> +  arc_hard_regno_modes[ACC_REG_FIRST] = D_MODES;
>}
>  }
>  
> -- 
> 1.9.1
> 


Re: [PATCH 09/10] [ARC] Update (u)maddsidi patterns.

2017-12-07 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-11-27 12:09:58 
+0100]:

> From: claziss 
> 
> The accumulator registers are freely used by the compiler. However,
> there are a number of instructions which are having an intrinsic use
> of these registers. Update patterns to inform the compiler which ones.
> 
> gcc/
> 2017-09-19  Claudiu Zissulescu  
> 
>   * config/arc/arc.md (maddsidi4, maddsidi4_split): Update pattern.
>   (umaddsidi4,umaddsidi4): Likewise.
> 
> gcc/testsuite
> 2017-09-19  Claudiu Zissulescu  
> 
>   * gcc.target/arc/tumaddsidi4.c: New test.

Looks good.

Thanks,
Andrew


> ---
>  gcc/config/arc/arc.md  | 32 
> ++
>  gcc/testsuite/gcc.target/arc/tumaddsidi4.c | 14 +
>  2 files changed, 42 insertions(+), 4 deletions(-)
>  create mode 100755 gcc/testsuite/gcc.target/arc/tumaddsidi4.c
> 
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index 42c6a23..155ee6c 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -6175,13 +6175,25 @@ archs4xd, archs4xd_slow, core_3"
>[(set_attr "length" "0")])
>  
>  ;; MAC and DMPY instructions
> -(define_insn_and_split "maddsidi4"
> +(define_expand "maddsidi4"
> +  [(match_operand:DI 0 "register_operand" "")
> +   (match_operand:SI 1 "register_operand" "")
> +   (match_operand:SI 2 "extend_operand"   "")
> +   (match_operand:DI 3 "register_operand" "")]
> +  "TARGET_PLUS_DMPY"
> +  "{
> +   emit_insn (gen_maddsidi4_split (operands[0], operands[1], operands[2], 
> operands[3]));
> +   DONE;
> +  }")
> +
> +(define_insn_and_split "maddsidi4_split"
>[(set (match_operand:DI 0 "register_operand" "=r")
>   (plus:DI
>(mult:DI
> (sign_extend:DI (match_operand:SI 1 "register_operand" "%r"))
> (sign_extend:DI (match_operand:SI 2 "extend_operand" "ri")))
> -  (match_operand:DI 3 "register_operand" "r")))]
> +  (match_operand:DI 3 "register_operand" "r")))
> +   (clobber (reg:DI ARCV2_ACC))]
>"TARGET_PLUS_DMPY"
>"#"
>"TARGET_PLUS_DMPY && reload_completed"
> @@ -6263,13 +6275,25 @@ archs4xd, archs4xd_slow, core_3"
> (set_attr "predicable" "no")
> (set_attr "cond" "nocond")])
>  
> -(define_insn_and_split "umaddsidi4"
> +(define_expand "umaddsidi4"
> +  [(match_operand:DI 0 "register_operand" "")
> +   (match_operand:SI 1 "register_operand" "")
> +   (match_operand:SI 2 "extend_operand"   "")
> +   (match_operand:DI 3 "register_operand" "")]
> +  "TARGET_PLUS_DMPY"
> +  "{
> +   emit_insn (gen_umaddsidi4_split (operands[0], operands[1], operands[2], 
> operands[3]));
> +   DONE;
> +  }")
> +
> +(define_insn_and_split "umaddsidi4_split"
>[(set (match_operand:DI 0 "register_operand" "=r")
>   (plus:DI
>(mult:DI
> (zero_extend:DI (match_operand:SI 1 "register_operand" "%r"))
> (zero_extend:DI (match_operand:SI 2 "extend_operand" "ri")))
> -  (match_operand:DI 3 "register_operand" "r")))]
> +  (match_operand:DI 3 "register_operand" "r")))
> +   (clobber (reg:DI ARCV2_ACC))]
>"TARGET_PLUS_DMPY"
>"#"
>"TARGET_PLUS_DMPY && reload_completed"
> diff --git a/gcc/testsuite/gcc.target/arc/tumaddsidi4.c 
> b/gcc/testsuite/gcc.target/arc/tumaddsidi4.c
> new file mode 100755
> index 000..40d2b33
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arc/tumaddsidi4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mcpu=archs -O1 -mmpy-option=plus_dmpy" } */
> +
> +/* Check how we generate umaddsidi4 patterns.  */
> +long a;
> +long long b;
> +unsigned c, d;
> +
> +void fn1(void)
> +{
> +  b = d * (long long)c + a;
> +}
> +
> +/* { dg-final { scan-assembler "macu 0,r" } } */
> -- 
> 1.9.1
> 


[Patch][Aarch64] Fix aarch64 libatomic build with older binutils

2017-12-07 Thread Steve Ellcey
James,

Here is a patch that will turn off the use of IFUNC and the LSE
instructions in libatomic if the compiler/assembler toolchain do not
understand the '-march=armv8-a+lse' option (changed from
-march=armv8.1-a).  Rather than check the assembler directly, I used
the existing ACX_PROG_CC_WARNING_OPTS macro to test this.  This will
cause the GCC being built to send the option in question to the
assembler and if the assembler complains that is enough to cause us to
not set enable_aarch64_lse, and thus not set try_ifunc.

Steve Ellcey
sell...@cavium.com


2017-12-07  Steve Ellcey  

* Makefile.am (IFUNC_OPTIONS): Change aarch64
option from -march=armv8.1-a to -march=armv8-a+lse.
* configure.ac (*aarch64*): Check to see if
compiler understands -march=armv8-a+lse option.
* configure.tgt (*aarch64*): Only set try_ifunc
if compiler understands -march=armv8-a+lse option.
* Makefile.in: Regenerate.
* configure: Regenerate.
* aclocal.m4: Regenerate.

diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
index ec36c8e..21fd1be 100644
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -123,7 +123,7 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix _$(s)_.lo,$(SIZEOBJS)))
 ## On a target-specific basis, include alternates to be selected by IFUNC.
 if HAVE_IFUNC
 if ARCH_AARCH64_LINUX
-IFUNC_OPTIONS	 = -march=armv8.1-a
+IFUNC_OPTIONS	 = -march=armv8-a+lse
 libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
 endif
 if ARCH_ARM_LINUX
diff --git a/libatomic/configure.ac b/libatomic/configure.ac
index 6b11f21..cee0198 100644
--- a/libatomic/configure.ac
+++ b/libatomic/configure.ac
@@ -157,6 +157,12 @@ AC_MSG_CHECKING([for thread model used by GCC])
 target_thread_file=`$CC -v 2>&1 | sed -n 's/^Thread model: //p'`
 AC_MSG_RESULT([$target_thread_file])
 
+case "$target" in
+ *aarch64*)
+ACX_PROG_CC_WARNING_OPTS([-march=armv8-a+lse],[enable_aarch64_lse])
+;;
+esac
+
 # Get target configury.
 . ${srcdir}/configure.tgt
 if test -n "$UNSUPPORTED"; then
diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index 388ae95..2f3bcc9 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -44,7 +44,9 @@ case "${target_cpu}" in
 	ARCH=aarch64
 	case "${target}" in
 	aarch64*-*-linux*)
-		try_ifunc=yes
+		if test -n "$enable_aarch64_lse"; then
+		try_ifunc=yes
+		fi
 		;;
 	esac
 	;;


Re: [SFN+LVU+IEPM v4 5/9] [SFN] introduce statement frontier notes, still disabled

2017-12-07 Thread Jeff Law
On 11/09/2017 07:34 PM, Alexandre Oliva wrote:
> This patch completes the infrastructure for the introduction of
> statement frontiers in C-family languages.
> 
> It brings in all the code remaining code needed to introduce and
> transform begin stmt trees, gimple stmts, insns and notes, and
> ultimately use them to generate the is_stmt column in DWARF2+ line
> number tables/programs, however none of it is activated: the option
> that would do so will be introduced in a subsequent patch.
> 
> This patch depends on an earlier patch with not-quite-boilerplate
> changes towards SFN.
> 
> for  gcc/c-family/ChangeLog
> 
>   * c-semantics.c (pop_stmt_list): Move begin stmt marker into
>   subsequent statement list.
> 
> for  gcc/c/ChangeLog
> 
>   * c-objc-common.h (LANG_HOOKS_EMITS_BEGIN_STMT): Redefine as true.
>   * c-parser.c (add_debug_begin_stmt): New.
>   (c_parser_declaration_or_fndef): Call it.
>   (c_parser_compound_statement_nostart): Likewise.
>   (c_parser_statement_after_labels): Likewise.
>   * c-typeck (c_finish_stmt_expr): Skip begin stmts markers.
> 
> for  gcc/cp/ChangeLog
> 
>   * constexpr.c (check_constexpr_ctor_body_1): Skip begin stmt
>   markers.
>   (constexpr_fn_retval): Likewise.
>   (potential_constant_expression_1): Likewise.
>   (cxx_eval_statement_list): Check that a begin stmt marker is
>   not used as the value of a statement list.
>   (cxx_eval_constant_expression): Return begin stmt markers
>   unchanged.
>   * cp-array-notation.c (stmt_location): New.
>   (cp_expand_cond_array_notations): Use it.
>   * cp-objcp-common.h (LANG_HOOKS_EMITS_BEGIN_STMT): Redefine as true.
>   * parser.c (add_debug_begin_stmt): New.
>   (cp_parser_statement): Call it.
>   * pt.c (tsubst_copy): Handle begin stmt markers.
> 
> for  gcc/ChangeLog
> 
>   * cfgexpand.c (expand_gimple_basic_block): Handle begin stmt
>   markers.  Integrate source bind into debug stmt expand loop.
>   (pass_expand::execute): Check debug marker limit.  Avoid deep
>   TER and expand debug locations for debug bind insns only.
>   * cse.c (insn_live_p): Keep nonbind markers and debug bindings
>   followed by them.
>   * df-scan.c (df_insn_delete): Accept out-of-block debug insn.
>   * final.c (reemit_insn_block_notes): Take current block from
>   nonbind markers.  Declare note where it's first set.
>   (final_scan_insn): Handle begin stmt notes.  Emit is_stmt according to
>   begin stmt markers if enabled.
>   (notice_source_line): Handle nonbind markers.  Fail if their
>   location is unknown or that of builtins.
>   (rest_of_handle_final): Convert begin stmt markers to notes if
>   var-tracking didn't run.
>   (rest_of_clean_state): Skip begin stmt markers.
>   * gimple-pretty-print.c (dump_gimple_debug): Handle begin stmt
>   markers.
>   * function.c (allocate_struct_function): Set begin_stmt_markers.
>   * function.h (struct function): Add debug_marker_count counter
>   and debug_nonbind_markers flag.
>   * gimple-iterator.c (gsi_remove): Adjust debug_marker_count.
>   * gimple-low.c (lower_function_body): Adjust
>   debug_nonbind_markers.
>   (lower_stmt): Drop or skip gimple debug stmts.
>   (lower_try_catch): Skip debug stmts.
>   * gimple.c (gimple_build_debug_begin_stmt): New.
>   (gimple_copy): Increment debug_marker_count if copying one.
>   * gimple.h (gimple_build_debug_begin_stmt): Declare.
>   * gimplify.c (rexpr_location): New.
>   (rexpr_has_location): New.
>   (warn_switch_unreachable_r): Handle gimple debug stmts.
>   (shortcut_cond_r): Call expr_location.
>   (find_goto): New.
>   (find_goto_label): New.
>   (shortcut_cond_expr): Call expr_has_location, expr_location, and
>   find_goto_label.
>   (gimplify_cond_expr): Call find_goto_label, expr_has_location, and
>   expr_location.
>   (gimplify_expr): Handle begin stmt markers.  Reject debug expr decls.
>   * langhooks-def.h (LANG_HOOKS_EMITS_BEGIN_STMT): New.  Add to...
>   (LANG_HOOKS_INITIALIZER): ... this.
>   * langhooks.h (struct lang_hooks): Add emits_begin_stmt.
>   * lra-contraints.c (inherit_reload_reg): Tolerate between-blocks
>   debug insns.
>   (update_ebb_live_info): Skip debug insn markers.
>   * lra.c (debug_insn_static_data): Rename to...
>   (debug_bind_static_data): ... this.
>   (debug_marker_static_data): New.
>   (lra_set_insn_recog_data): Select one of the above depending
>   on debug insn kind.
>   (lra_update_isn_regno_info): Don't assume debug insns have
>   freqs.
>   (push_insns): Skip debug insns.
>   * lto-streamer-in.c (input_function): Drop debug stmts
>   depending on active options.  Adjust debug_nonbind_markers.
>   * params.def (PARAM_MAX_DEBUG_MARKER_COUNT): New.
>   * print-rtl.c (rtx_writer::print_rtx

[PATCH][RFA][P1 PR tree-optimization/83298] Avoid over-optimistic result range for COND_EXPR

2017-12-07 Thread Jeff Law

So the underlying issue here is quite simple.  Given something like

x = (cond) ? res1 : res2;

EVRP analysis will compute the resultant range using vrp_meet of the
ranges for res1 and res2.  Seems pretty natural.

vrp_meet makes optimistic assumptions if either range is VR_UNDEFINED
and will set the resultant range to the range of the other operand.

Some callers explicitly mention this is the desired behavior (PHI
processing).  Other callers avoid calling vrp_meet when one of the
ranges is VR_UNDEFINED and do something sensible
(extract_range_from_unary_expr, extract_range_from_binary_expr_1).

extract_range_from_cond_expr neither mentions that it wants the
optimistic behavior nor does it avoid calling vrp_meet with a
VR_UNDEFINED range.  It naturally seems to fit in better with the other
extract_range_from_* routines.

I'm not at all familiar with the ipa-cp bits, but from a quick look they
also seems to fall into the extract_* camp.


Anyway, normally in a domwalk the only place where we're going to see
VR_UNDEFINED would be in the PHI nodes.  It's one of the nice properties
of a domwalk :-)

However, for jump threading we look past the dominance frontier;
furthermore, we do not currently record ranges for statements we process
as part of the jump threading.  But we do try to extract the range each
statement generates -- we're primarily looking for cases where the
statement generates a singleton range.

While the plan does include recording ranges as we look past the
dominance frontier, I strongly believe some serious code cleanup in DOM
and jump threading needs to happen first.  So I don't want to go down
that path for gcc-8.

So we're kind-of stuck with the fact that we might query for a resultant
range when one or more input operands may not have recorded range
information.  Thankfully that's easily resolved by making
extract_range_from_cond_expr work like the other range extraction
routines and avoid calling vrp_meet when one or more operands is
VR_UNDEFINED.

Bootstrapped and regression tested on x86_64.  OK for the trunk?

Jeff

PR tree-optimization/83298
* vr-values.c (vr_values::extract_range_from_cond_expr): Do not
call vrp_meet if one of the input operands is VR_UNDEFINED.

PR tree-optimization/83298
* gcc.c-torture/execute/pr83298.c: New test.

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr83298.c 
b/gcc/testsuite/gcc.c-torture/execute/pr83298.c
new file mode 100644
index 000..0e51ababf5c
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr83298.c
@@ -0,0 +1,11 @@
+
+int a, b, c = 1;
+
+int main ()
+{
+  for (; b < 1; b++)
+;
+  if (!(c * (a < 1))) 
+__builtin_abort ();
+  return 0; 
+}
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 9352e120d9d..ee5ae3c6a27 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -912,6 +912,23 @@ vr_values::extract_range_from_cond_expr (value_range *vr, 
gassign *stmt)
   else
 set_value_range_to_varying (&vr1);
 
+  /* If either range is VR_UNDEFINED, vrp_meet will make the optimistic
+ choice and use the range of the other operand as the result range.
+
+ Other users of vrp_meet either explicitly filter the calls for
+ this case, or they do not care (PHI processing where unexecutable
+ edges are explicitly expected to be ignored).
+
+ Like most other callers, we can not generally tolerate the optimistic
+ choice here.  Consider jump threading where we're looking into a
+ non-dominated block and thus may not necessarily have processed the
+ ranges for statements within that non-dominated block.  */
+  if (vr0.type == VR_UNDEFINED || vr1.type == VR_UNDEFINED)
+{
+  set_value_range_to_varying (vr);
+  return;
+}
+
   /* The resulting value range is the union of the operand ranges */
   copy_value_range (vr, &vr0);
   vrp_meet (vr, &vr1);


Re: [PING 2][PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-12-07 Thread Martin Sebor

On 12/07/2017 03:23 PM, Jeff Law wrote:

On 11/29/2017 04:36 PM, Martin Sebor wrote:

I've finished reimplementing the patch as a standalone pass.
In the attached revision I also addressed your comments below
as well as Richard's to allowing the strlen optimizations even
for overlapping accesses.

While beefing up the tests I found a few minor issues that
I also fixed (false negatives).

The fallout wasn't quite as bad as I thought, mainly thanks
to the narrow API for the checker.

Syncing up with the latest trunk has led to some more changes
in tree-ssa-strlen.

I've retested the patch with GDB and Glibc with the same results
as before.

The patch seems sizable (over 3KLOC without tests) but it's worth
noting that most of the complexity is actually not in determining
whether or not an overlap exists (that's quite simple) but rather
in computing its offset and size to mention in the warnings and
making sure the information is meaningful to the user even when
ranges are involved.  All the subtly different forms of warnings
also contribute substantially to the overall size.

Martin

[ Huge snip. ]



gcc-78918.diff


PR tree-optimization/78918 - missing -Wrestrict on memcpy copying over self

gcc/c-family/ChangeLog:

PR tree-optimization/78918
* c-common.c (check_function_restrict): Avoid checking built-ins.
* c.opt (-Wrestrict): Include in -Wall.

gcc/ChangeLog:

PR tree-optimization/78918
* Makefile.in (OBJS): Add gimple-ssa-warn-restrict.o.
* builtins.c (check_sizes): Rename...
(check_access): ...to this.  Rename function arguments for clarity.
(check_memop_sizes): Adjust names.
(expand_builtin_memchr, expand_builtin_memcpy): Same.
(expand_builtin_memmove, expand_builtin_mempcpy): Same.
(expand_builtin_strcat, expand_builtin_stpncpy): Same.
(check_strncat_sizes, expand_builtin_strncat): Same.
(expand_builtin_strncpy, expand_builtin_memset): Same.
(expand_builtin_bzero, expand_builtin_memcmp): Same.
(expand_builtin_memory_chk, maybe_emit_chk_warning): Same.
(maybe_emit_sprintf_chk_warning): Same.
(expand_builtin_strcpy): Adjust.
(expand_builtin_stpcpy): Same.
(expand_builtin_with_bounds): Detect out-of-bounds accesses
in pointer-checking forms of memcpy, memmove, and mempcpy.
(gcall_to_tree_minimal, max_object_size): Define new functions.
* builtins.h (max_object_size): Declare.
* calls.c (alloc_max_size): Call max_object_size instead of
hardcoding ssizetype limit.
(get_size_range): Handle new argument.
* calls.h (get_size_range): Add a new argument.
* cfgexpand.c (expand_call_stmt): Propagate no-warning bit.
* doc/invoke.texi (-Wrestrict): Adjust, add example.
* gimple-fold.c (gimple_fold_builtin_memory_op): Detect overlapping
operations.
(gimple_fold_builtin_memory_chk): Same.
(gimple_fold_builtin_stxcpy_chk): New function.
* gimple-ssa-warn-restrict.c: New source.
* gimple-ssa-warn-restrict.h: New header.
* gimple.c (gimple_build_call_from_tree): Propagate location.
* passes.def (pass_warn_restrict): Add new pass.
* tree-pass.h (make_pass_warn_restrict): Declare.
* tree-ssa-strlen.c (handle_builtin_strcpy): Detect overlapping
operations.
(handle_builtin_strcat): Same.
(strlen_optimize_stmt): Rename...
(strlen_check_and_optimize_stmt): ...to this.  Handle strncat,
stpncpy, strncpy, and their checking forms.

gcc/testsuite/ChangeLog:

PR tree-optimization/78918
* c-c++-common/Warray-bounds.c: New test.
* c-c++-common/Warray-bounds-2.c: New test.
* c-c++-common/Warray-bounds-3.c: New test.
* c-c++-common/Wrestrict-2.c: New test.
* c-c++-common/Wrestrict.c: New test.
* c-c++-common/Wrestrict.s: New test.
* c-c++-common/Wsizeof-pointer-memaccess1.c: Adjust
* c-c++-common/Wsizeof-pointer-memaccess2.c: Same.
* g++.dg/torture/Wsizeof-pointer-memaccess1.C: Same.
* g++.dg/torture/Wsizeof-pointer-memaccess2.C: Same.
* gcc.dg/memcpy-6.c: New test.
* gcc.dg/pr69172.c: Adjust.
* gcc.dg/pr79223.c: Same.
* gcc.dg/Wrestrict-2.c: New test.
* gcc.dg/Wrestrict.c: New test.
* gcc.dg/Wsizeof-pointer-memaccess1.c
* gcc.target/i386/chkp-stropt-17.c: New test.
* gcc.dg/torture/Wsizeof-pointer-memaccess1.c: Adjust.

@@ -3874,32 +3885,32 @@ check_strncat_sizes (tree exp, tree objsize)
size_one_node)
 : NULL_TREE);

-  /* Strncat copies at most MAXLEN bytes and always appends the terminating
+  /* Strncat copies at most MAXREAD bytes and always appends the terminating

Nit.  Use "strncat" rather than "Strncat", even when starting a
sentence.  I saw this elsewhere.  You can fix these in a follow-u

[PATCH] Add srodata section support to riscv port.

2017-12-07 Thread Jim Wilson
This adds srodata section support to the RISC-V port, putting small read-only
data in the .srodata section instead of the .sdata section.  There is already
code to put small read-only rtx in .srodata* instead of .rodata*.  This
does the same for small read-only trees to be consistent.

Tested with a rv32gc/ilp32 make check.  There were no regressions.  Committed.

2017-12-07  Andrew Waterman  

gcc/
* config/riscv/riscv.c (TARGET_ASM_SELECT_SECTION): New define.
(TARGET_HAVE_SRODATA_SECTION): New define.
(riscv_select_section): New function.
---
 gcc/config/riscv/riscv.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index c7283d02141..cfab2e77686 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3042,6 +3042,22 @@ riscv_in_small_data_p (const_tree x)
   return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
 }
 
+/* Switch to the appropriate section for output of DECL.  */
+
+static section *
+riscv_select_section (tree decl, int reloc,
+ unsigned HOST_WIDE_INT align)
+{
+  switch (categorize_decl_for_section (decl, reloc))
+{
+case SECCAT_SRODATA:
+  return get_named_section (decl, ".srodata", reloc);
+
+default:
+  return default_elf_select_section (decl, reloc, align);
+}
+}
+
 /* Return a section for X, handling small data. */
 
 static section *
@@ -4320,6 +4336,12 @@ riscv_constant_alignment (const_tree exp, HOST_WIDE_INT 
align)
 #undef TARGET_IN_SMALL_DATA_P
 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
 
+#undef TARGET_HAVE_SRODATA_SECTION
+#define TARGET_HAVE_SRODATA_SECTION true
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION riscv_select_section
+
 #undef TARGET_ASM_SELECT_RTX_SECTION
 #define TARGET_ASM_SELECT_RTX_SECTION  riscv_elf_select_rtx_section
 
-- 
2.14.1



[patch 1/3] [nios2] fix code size regressions with constant integer addresses

2017-12-07 Thread Sandra Loosemore

My series of patches from earlier this fall

https://gcc.gnu.org/ml/gcc-patches/2017-10/msg01309.html

caused code size regressions in cases where constant integers are used 
as addresses; specifically, LRA was getting confused and trying to 
generate a stack spill because it couldn't figure out how to match the 
insn constraints.  My solution for this problem is to generalize what I 
did previously to handle 32-bit integer constant addresses as well as 
32-bit symbolic constants.  Much of this patch is just renaming some 
functions to reflect that they've been made more general now.


I also special-cased integer constant addresses that fit in the 16-bit 
range to use the newish r0-relative addressing mode.


I've checked this in.  There are new test cases to go with this in part 3.

-Sandra
2017-12-07  Sandra Loosemore  

	gcc/
	* config/nios2/nios2.c (nios2_symbolic_constant_allowed):
	Rename to...
	(nios2_large_constant_allowed): ...this.  Adjust uses.
	(nios2_plus_symbolic_constant_p): Rename to...
	(nios2_plus_large_constant_p): ...this.  Adjust uses.
	(nios2_legitimate_address_p): Correct CONST_INT handling.
	(nios2_symbolic_memory_operand_p): Rename to...
	(nios2_large_constant_memory_operand_p): ...this.  Adjust uses.
	(nios2_large_constant_p): Check for large constant integers too.
	(nios2_split_large_constant): Handle constant integers.
	(nios2_split_symbolic_memory_operand): Rename to...
	(nios2_split_large_constant_memory_operand): ...this.  Adjust uses.
	(nios2_legitimize_constant_address): Handle constant integers.
	(r0rel_constant_p): Handle small constant integers.
	(nios2_print_operand_address): Handle r0-relative integer addresses.
	* config/nios2/nios2-protos.h: Adjust for renamed functions.
	* config/nios2/nios2.md: Adjust for renamed functions.
Index: gcc/config/nios2/nios2.c
===
--- gcc/config/nios2/nios2.c	(revision 255266)
+++ gcc/config/nios2/nios2.c	(working copy)
@@ -2009,12 +2009,13 @@ nios2_validate_compare (machine_mode mod
 
 /* Addressing modes and constants.  */
 
-/* Symbolic constants are split into high/lo_sum pairs during the 
-   split1 pass.  After that, they are not considered legitimate addresses.
+/* Symbol references and other 32-bit constants are split into
+   high/lo_sum pairs during the split1 pass.  After that, they are not
+   considered legitimate addresses.
This function returns true if in a pre-split context where these
constants are allowed.  */
 static bool
-nios2_symbolic_constant_allowed (void)
+nios2_large_constant_allowed (void)
 {
   /* The reload_completed check is for the benefit of
  nios2_asm_output_mi_thunk and perhaps other places that try to
@@ -2046,13 +2047,13 @@ nios2_symbolic_constant_p (rtx x)
 }
 
 /* Return true if X is an expression of the form 
-   (PLUS reg symbolic_constant).  */
+   (PLUS reg large_constant).  */
 static bool
-nios2_plus_symbolic_constant_p (rtx x)
+nios2_plus_large_constant_p (rtx x)
 {
   return (GET_CODE (x) == PLUS
 	  && REG_P (XEXP (x, 0))
-	  && nios2_symbolic_constant_p (XEXP (x, 1)));
+	  && nios2_large_constant_p (XEXP (x, 1)));
 }
 
 /* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
@@ -2122,7 +2123,7 @@ nios2_valid_addr_expr_p (rtx base, rtx o
 	  && nios2_regno_ok_for_base_p (REGNO (base), strict_p)
 	  && (offset == NULL_RTX
 	  || nios2_valid_addr_offset_p (offset)
-	  || (nios2_symbolic_constant_allowed () 
+	  || (nios2_large_constant_allowed () 
 		  && nios2_symbolic_constant_p (offset))
 	  || nios2_unspec_reloc_p (offset)));
 }
@@ -2146,12 +2147,16 @@ nios2_legitimate_address_p (machine_mode
 
   /* Else, fall through.  */
 case LABEL_REF:
-  if (nios2_symbolic_constant_allowed () 
+  if (nios2_large_constant_allowed () 
 	  && nios2_symbolic_constant_p (operand))
 	return true;
+  return false;
 
-  /* Else, fall through.  */
 case CONST_INT:
+  if (r0rel_constant_p (operand))
+	return true;
+  return nios2_large_constant_allowed ();
+
 case CONST_DOUBLE:
   return false;
 
@@ -2213,9 +2218,9 @@ nios2_address_cost (rtx address,
 		addr_space_t as ATTRIBUTE_UNUSED, 
 		bool speed ATTRIBUTE_UNUSED)
 {
-  if (nios2_plus_symbolic_constant_p (address))
+  if (nios2_plus_large_constant_p (address))
 return COSTS_N_INSNS (1);
-  if (nios2_symbolic_constant_p (address))
+  if (nios2_large_constant_p (address))
 {
   if (GET_CODE (address) == CONST)
 	return COSTS_N_INSNS (1);
@@ -2225,10 +2230,10 @@ nios2_address_cost (rtx address,
   return COSTS_N_INSNS (0);
 }
 
-/* Return true if X is a MEM whose address expression involves a symbolic
+/* Return true if X is a MEM whose address expression involves a large (32-bit)
constant.  */
 bool
-nios2_symbolic_memory_operand_p (rtx x)
+nios2_large_constant_memory_operand_p (rtx x)
 {
   rtx addr;
 
@@ -2236,8 +2241,8 @@ nios2_symbolic_memory_operand_p (rtx x)
 return false;
   addr = XEXP (x, 0);

[patch 2/3] [nios2] add splitters for io-variant memory access insns

2017-12-07 Thread Sandra Loosemore
I previously neglected to add address splitters for the io-variant 
memory read and write insns, as I did for all the other insns that take 
memory operands.  In the examples I looked at, I found there was still 
valid code coming out, but it seemed like an accident that it was 
falling through to a case that handled it.  I've committed this patch to 
fix it properly.


-Sandra
2017-12-07  Sandra Loosemore  

	gcc/
	* config/nios2/nios2.md (ldio): Add splitter for memory
	operand.
	(ldio_signed): Likewise.
	(stio): Likewise.
	* config/nios2/predicates.md (ldstio_memory_operand): Allow
	SMALL_INT12 constant integer operand.
Index: gcc/config/nios2/nios2.md
===
--- gcc/config/nios2/nios2.md	(revision 255266)
+++ gcc/config/nios2/nios2.md	(working copy)
@@ -318,12 +318,18 @@
 (define_mode_attr bhw [(QI "b") (HI "h") (SI "w")])
 (define_mode_attr bhw_uns [(QI "bu") (HI "hu") (SI "w")])
 
-(define_insn "ldio"
+(define_insn_and_split "ldio"
   [(set (match_operand:BHW 0 "register_operand" "=r")
 (unspec_volatile:BHW
   [(match_operand:BHW 1 "ldstio_memory_operand" "w")] UNSPECV_LDXIO))]
   ""
   "ldio\\t%0, %1"
+  "nios2_large_constant_memory_operand_p (operands[1])"
+  [(set (match_dup 0) 
+(unspec_volatile:BHW [(match_dup 1)] UNSPECV_LDXIO))]
+  {
+operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
+  }
   [(set_attr "type" "ld")])
 
 (define_expand "ldio"
@@ -337,21 +343,32 @@
   DONE;
 })
 
-(define_insn "ldio_signed"
+(define_insn_and_split "ldio_signed"
   [(set (match_operand:SI 0 "register_operand" "=r")
 (sign_extend:SI
   (unspec_volatile:BH
 [(match_operand:BH 1 "ldstio_memory_operand" "w")] UNSPECV_LDXIO)))]
   ""
   "ldio\\t%0, %1"
+  "nios2_large_constant_memory_operand_p (operands[1])"
+  [(set (match_dup 0) 
+(sign_extend:SI (unspec_volatile:BH [(match_dup 1)] UNSPECV_LDXIO)))]
+  {
+operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
+  }
   [(set_attr "type" "ld")])
 
-(define_insn "stio"
+(define_insn_and_split "stio"
   [(set (match_operand:BHW 0 "ldstio_memory_operand" "=w")
 (unspec_volatile:BHW
   [(match_operand:BHW 1 "reg_or_0_operand" "rM")] UNSPECV_STXIO))]
   ""
   "stio\\t%z1, %0"
+  "nios2_large_constant_memory_operand_p (operands[0])"
+  [(set (match_dup 0) (unspec_volatile:BHW [(match_dup 1)] UNSPECV_STXIO))]
+  {
+operands[0] = nios2_split_large_constant_memory_operand (operands[0]);
+  }
   [(set_attr "type" "st")])
 
 
Index: gcc/config/nios2/predicates.md
===
--- gcc/config/nios2/predicates.md	(revision 255266)
+++ gcc/config/nios2/predicates.md	(working copy)
@@ -146,6 +146,8 @@
 return (REG_P (XEXP (addr, 0))
 && CONST_INT_P (XEXP (addr, 1))
 && SMALL_INT12 (INTVAL (XEXP (addr, 1;
+  else if (CONST_INT_P (addr))
+return SMALL_INT12 (INTVAL (addr));
   return false;
 }
   return memory_operand (op, mode);


[3/3] [nios2] new test cases

2017-12-07 Thread Sandra Loosemore
I've checked in these new constant-integer address test cases to go with 
the previous two patches in this series.


-Sandra
2017-12-07  Sandra Loosemore  

	gcc/testsuite/
	* gcc.target/nios2/const-addr-1.c: New.
	* gcc.target/nios2/const-addr-2.c: New.
	* gcc.target/nios2/const-addr-3.c: New.
Index: gcc/testsuite/gcc.target/nios2/const-addr-1.c
===
--- gcc/testsuite/gcc.target/nios2/const-addr-1.c	(nonexistent)
+++ gcc/testsuite/gcc.target/nios2/const-addr-1.c	(working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "stw\tr., 12816\\(r\[2-9\]\\)" 1 } } */
+/* { dg-final { scan-assembler-times "stw\tr., 12816\\(r0\\)" 1 } } */
+/* { dg-final { scan-assembler-times "stw\tr., 528\\(r0\\)" 1 } } */
+
+/* These functions should not spill to stack.  */
+/* { dg-final { scan-assembler-not "addi\tsp, sp" } } */
+/* { dg-final { scan-assembler-not "spdeci" } } */
+
+#define addr1 ((volatile int *) 0x43210)
+#define addr2 ((volatile int *) 0x3210)
+#define addr3 ((volatile int *) 0x210)
+
+#define SET(l,r) (*(l) = (r))
+
+void foo1 (int x) { SET (addr1, x); }
+void foo2 (int x) { SET (addr2, x); }
+void foo3 (int x) { SET (addr3, x); }
Index: gcc/testsuite/gcc.target/nios2/const-addr-2.c
===
--- gcc/testsuite/gcc.target/nios2/const-addr-2.c	(nonexistent)
+++ gcc/testsuite/gcc.target/nios2/const-addr-2.c	(working copy)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=r1 -mno-cdx -mno-bmx -O2" } */
+/* { dg-final { scan-assembler-times "stwio\tr., 12816\\(r\[2-9\]\\)" 1 } } */
+/* { dg-final { scan-assembler-times "stwio\tr., 12816\\(r0\\)" 1 } } */
+/* { dg-final { scan-assembler-times "stwio\tr., 528\\(r0\\)" 1 } } */
+
+/* These functions should not spill to stack.  */
+/* { dg-final { scan-assembler-not "addi\tsp, sp" } } */
+
+#define addr1 ((volatile int *) 0x43210)
+#define addr2 ((volatile int *) 0x3210)
+#define addr3 ((volatile int *) 0x210)
+
+#define SET(l,r) __builtin_stwio ((l), (r))
+
+void foo1 (int x) { SET (addr1, x); }
+void foo2 (int x) { SET (addr2, x); }
+void foo3 (int x) { SET (addr3, x); }
Index: gcc/testsuite/gcc.target/nios2/const-addr-3.c
===
--- gcc/testsuite/gcc.target/nios2/const-addr-3.c	(nonexistent)
+++ gcc/testsuite/gcc.target/nios2/const-addr-3.c	(working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=r2 -mno-cdx -mno-bmx -O2" } */
+/* { dg-final { scan-assembler-times "stwio\tr., 0\\(r" 2 } } */
+/* { dg-final { scan-assembler-times "stwio\tr., 528\\(r0\\)" 1 } } */
+
+/* These functions should not spill to stack.  */
+/* { dg-final { scan-assembler-not "addi\tsp, sp" } } */
+
+/* On R2, stwio takes only a 12-bit displacement so foo1 and foo2 need
+   to use register indirect addressing.  */
+
+#define addr1 ((volatile int *) 0x43210)
+#define addr2 ((volatile int *) 0x3210)
+#define addr3 ((volatile int *) 0x210)
+
+#define SET(l,r) __builtin_stwio ((l), (r))
+
+void foo1 (int x) { SET (addr1, x); }
+void foo2 (int x) { SET (addr2, x); }
+void foo3 (int x) { SET (addr3, x); }


Re: [PATCH][i386,AVX] Enable VAES support [2/5]

2017-12-07 Thread Kirill Yukhin
Hello Julia,
On 08 Nov 12:32, Koval, Julia wrote:
> Hi, this patch enables VAESDEC instruction from VAES isaset, defined here: 
> https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf
> 
> Ok for trunk?
Patch is OK. I've checked it in.
> Thanks,
> Julia

--
Thanks, K