[nvptx] Incorrect %retval usage in C++ code

2015-08-17 Thread Thomas Schwinge
Hi!

I observed that for a (slowly increasing?) number of C++ testcases
(gcc/testsuite/g++.*/), their nvptx compilation fails as follows:

spawn [...]/build-gcc/gcc/testsuite/g++/../../xg++ 
-B[...]/build-gcc/gcc/testsuite/g++/../../ 
[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/nsdmi4.C 
-fno-diagnostics-show-caret -fdiagnostics-color=never 
--sysroot=[...]/install/nvptx-none -fmessage-length=0 -std=c++11 
-pedantic-errors -Wno-long-long -DNO_LABEL_VALUES -DNO_TRAMPOLINES -isystem 
[...]/build-gcc/nvptx-none/./newlib/targ-include -isystem 
[...]/source-gcc/newlib/libc/include -B[...]/build-gcc/nvptx-none/./newlib/ 
-L[...]/build-gcc/nvptx-none/./newlib -mmainkernel -lm -o ./nsdmi4.exe
ptxas /tmp/cclwz6Zm.o, line 52; error   : Arguments mismatch for 
instruction 'mov'
ptxas /tmp/cclwz6Zm.o, line 52; error   : Unknown symbol '%retval'
ptxas /tmp/cclwz6Zm.o, line 52; error   : Label expected for forward 
reference of '%retval'
ptxas fatal   : Ptx assembly aborted due to errors
nvptx-as: ptxas returned 255 exit status
compiler exited with status 1

Reduced from g++.dg/cpp0x/nsdmi4.C:

$ < source-gcc/gcc/testsuite/g++.dg/cpp0x/nsdmi4_.C cat
struct A
{
  A() { }
  A(const A&) { }
};

A f() { return A(); }
$ build-gcc/gcc/xg++ -Bbuild-gcc/gcc/ 
source-gcc/gcc/testsuite/g++.dg/cpp0x/nsdmi4_.C -std=c++11 -S
$ < nsdmi4_.s c++filt
// BEGIN PREAMBLE
.version3.1
.target sm_30
.address_size 64
// END PREAMBLE

// BEGIN FUNCTION DECL: A::A()
.func A::A()(.param.u64 %in_ar1);
// BEGIN FUNCTION DEF: A::A()
.func A::A()(.param.u64 %in_ar1)
{
.reg.u64 %ar1;
.reg.u64 %hr10;
.reg.u64 %r22;
.reg.u64 %frame;
.local.align 8 .b8 %farray[8];
cvta.local.u64 %frame, %farray;
ld.param.u64 %ar1, [%in_ar1];
mov.u64 %r22, %ar1;
st.u64  [%frame], %r22;
ret;
}
// BEGIN GLOBAL FUNCTION DECL: f()
.visible .func f()(.param.u64 %in_ar1);
// BEGIN GLOBAL FUNCTION DEF: f()
.visible .func f()(.param.u64 %in_ar1)
{
.reg.u64 %ar1;
.reg.u64 %hr10;
.reg.u64 %r22;
.reg.u64 %r23;
ld.param.u64 %ar1, [%in_ar1];
mov.u64 %r22, %ar1;
mov.u64 %r23, %r22;
{
.param.u64 %out_arg0;
st.param.u64 [%out_arg0], %r23;
call A::A(), (%out_arg0);
}
mov.u64 %retval, %r22;
ret;
}

Notice the stray %retval usage very near the end.


Note that before r226901, »[PR64164] Drop copyrename, use coalescible
partition as base when optimizing.«,
,
this test case did set up a %frame, and did not assign to %retval:

@@ -31,21 +31,18 @@
.reg.u32 %r23;
.reg.u64 %r24;
.reg.u64 %r25;
-   .reg.u64 %frame;
-   .local.align 8 .b8 %farray[8];
-   cvta.local.u64 %frame, %farray;
 ld.param.u64 %ar1, [%in_ar1];
mov.u64 %r24, %ar1;
-   st.u64  [%frame], %r24;
ld.global.u32   %r22, [c];
add.u32 %r23, %r22, 1;
st.global.u32   [c], %r23;
-   ld.u64  %r25, [%frame];
+   mov.u64 %r25, %r24;
{
.param.u64 %out_arg0;
st.param.u64 [%out_arg0], %r25;
call _ZN1AC1Ev, (%out_arg0);
}
+   mov.u64 %retval, %r24;
ret;
}

But I don't think that this recent commit is directly related to the
problem at hand, but it just exposes it some more: before that recent
commit, there have already been test cases failing with the same stray
%retval usage.

I suspect that this mov is incorrectly generated by
gcc/function.c:expand_function_end, but can't tell if something's wrong
in there, or rather in the nvptx backend's NVPTX_RETURN_REGNUM handling
(which I can't claim to really understand), and as I'm unlikely to spend
more time on this before leaving for vacations soon, I wanted to dump my
state now.  Maybe one of you has an idea about this.


Also, I guess the following cleanup (untested) is in order:

diff --git gcc/config/nvptx/nvptx.h gcc/config/nvptx/nvptx.h
index afe4fcd..d846ec3 100644
--- gcc/config/nvptx/nvptx.h
+++ gcc/config/nvptx/nvptx.h
@@ -103,8 +103,8 @@ enum reg_class
 #define N_REG_CLASSES (int) LIM_REG_CLASSES
 
 #define REG_CLASS_NAMES {\
-"RETURN_REG",\
 "NO_REGS",   \
+"RETURN_REG",\
 "ALL_REGS" }
 
 #define REG_CLASS_CONTENTS \
@@ -119,7 +119,7 @@ enum reg_class
 
 #define GENERAL_REGS ALL_REGS
 
-#define REGN

Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Christophe Lyon
On 14 August 2015 at 20:57, Alexandre Oliva  wrote:
> On Aug 11, 2015, Patrick Marlier  wrote:
>
>> On Mon, Aug 10, 2015 at 5:14 PM, Jeff Law  wrote:
>>> On 08/10/2015 02:23 AM, James Greenhalgh wrote:
>
 For what it is worth, I bootstrapped and tested the consolidated patch
 on arm-none-linux-gnueabihf and aarch64-none-linux-gnu with trunk at
 r226516 over the weekend, and didn't see any new issues.
>
> Thanks!
>
>> Especially as the bug reporter, I am impressed how a slight problem
>> can lead to such a patch! ;)
>> Thanks a lot Alexandre!
>
> You're welcome.  I'm glad it appears to be working to everyone's
> satisfaction now.  I've just committed it as r226901, with only a
> context adjustment to account for a change in use_register_for_decl in
> function.c.  /me crosses fingers :-)
>
> Here's the patch as checked in:
>

Hi,

Since this was committed (r226901), I can see that the compiler build
fails for armeb targets, when building libgcc:
In file included from
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgcc/fixed-bit.c:55:0:
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgcc/fixed-bit.c:
In function '__gnu_addha3':
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgcc/fixed-bit.h:450:31:
internal compiler error: in simplify_subreg, at simplify-rtx.c:5790
 #define FIXED_OP(OP,MODE,NUM) __gnu_ ## OP ## MODE ## NUM
   ^
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgcc/fixed-bit.h:460:30:
note: in expansion of macro 'FIXED_OP'
 #define FIXED_ADD_TEMP(NAME) FIXED_OP(add,NAME,3)
  ^
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgcc/fixed-bit.h:492:19:
note: in expansion of macro 'FIXED_ADD_TEMP'
 #define FIXED_ADD FIXED_ADD_TEMP(MODE_NAME_S)
   ^
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgcc/fixed-bit.c:59:1:
note: in expansion of macro 'FIXED_ADD'
 FIXED_ADD (FIXED_C_TYPE a, FIXED_C_TYPE b)
 ^
0xa4bbc3 simplify_subreg(machine_mode, rtx_def*, machine_mode, unsigned int)

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/simplify-rtx.c:5790
0xa4bbc3 simplify_subreg(machine_mode, rtx_def*, machine_mode, unsigned int)

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/simplify-rtx.c:5790
0xa4ce2d simplify_gen_subreg(machine_mode, rtx_def*, machine_mode, unsigned int)

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/simplify-rtx.c:6013
0xa4ce2d simplify_gen_subreg(machine_mode, rtx_def*, machine_mode, unsigned int)

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/simplify-rtx.c:6013
0x784385 move_block_from_reg(int, rtx_def*, int)
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:1536
0x784385 move_block_from_reg(int, rtx_def*, int)
/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/expr.c:1536
0x7e165d assign_parm_setup_block

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:3076
0x7e165d assign_parm_setup_block

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:3076
0x7e813a assign_parms

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:3805
0x7e813a assign_parms

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:3805
0x7e8f2e expand_function_start(tree_node*)

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:5234
0x7e8f2e expand_function_start(tree_node*)

/tmp/4972337_7.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:5234

Christophe.

> for  gcc/ChangeLog
>
> PR rtl-optimization/64164
> PR bootstrap/66978
> PR middle-end/66983
> PR rtl-optimization/67000
> PR middle-end/67034
> PR middle-end/67035
> * Makefile.in (OBJS): Drop tree-ssa-copyrename.o.
> * tree-ssa-copyrename.c: Removed.
> * opts.c (default_options_table): Drop -ftree-copyrename.  Add
> -ftree-coalesce-vars.
> * passes.def: Drop all occurrences of pass_rename_ssa_copies.
> * common.opt (ftree-copyrename): Ignore.
> (ftree-coalesce-inlined-vars): Likewise.
> * doc/invoke.texi: Remove the ignored options above.
> * gimple-expr.h (gimple_can_coalesce_p): Move declaration
> * tree-ssa-coalesce.h: ... here.
> * tree-ssa-uncprop.c: Include tree-ssa-coalesce.h and other
> headers required by it.
> * gimple-expr.c (gimple_can_coalesce_p): Allow coalescing
> across variables when flag_tree_coalesce_vars.  Check register
> use and promoted modes to allow coalescing.  Do not coalesce
> maybe-byref parms with SSA_NAMEs of other variables, or
> anonymous SSA_NAMEs.  Moved to tree-ssa-coalesce.c.
> * tree-ssa-live.c (struct tree_int_map_hasher): Move along
> with its member functions to tree-ssa-coalesce.c

Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Andreas Schwab
Alexandre Oliva  writes:

> Would you be so kind as to give it a spin on a m68k native?  TIA,

I tried it on ia64, and it falls flat on the floor.

../../../libgcc/config/ia64/unwind-ia64.c: In function ‘_Unwind_SetGR’:
../../../libgcc/config/ia64/unwind-ia64.c:1683:1: internal compiler error: 
Segmentation fault
 _Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val)
 ^
0x41807edf crash_signal
../../gcc/toplev.c:352
0x40d0ed60 parm_in_unassigned_mem_p
../../gcc/function.c:2940
0x40d23e8f assign_parm_setup_stack
../../gcc/function.c:3473
0x40d2b43f assign_parms
../../gcc/function.c:3830
0x40d2e24f expand_function_start(tree_node*)
../../gcc/function.c:5254
0x407bdabf execute
../../gcc/cfgexpand.c:6187

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Andreas Schwab
Andreas Schwab  writes:

> Alexandre Oliva  writes:
>
>> Would you be so kind as to give it a spin on a m68k native?  TIA,
>
> I tried it on ia64, and it falls flat on the floor.

It fixes the m68k failures, though.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Kyrill Tkachov

Hi Alexandre,

On 17/08/15 03:56, Alexandre Oliva wrote:

On Aug 16, 2015, Andreas Schwab  wrote:


Alexandre Oliva  writes:

On Aug 15, 2015, Andreas Schwab  wrote:


FAIL: gcc.target/aarch64/target_attr_crypto_ice_1.c (internal compiler error)
In file included from
/opt/gcc/gcc-20150815/gcc/testsuite/gcc.target/aarch64/target_attr_crypto_ice_1.c:4:0:

Are you sure this is a regression introduced by my patch?

Yes, it reintroduces the ICE.

Ugh.  I see this testcase was introduced very recently, so presumably it
wasn't present in the tree that James Greenhalgh tested and confirmed
there were no regressions.


Yeah, I introduced it as part of the SWITCHABLE_TARGET
work for aarch64. A bit of a mid-air collision :(


The hack in aarch64-builtins.c looks risky IMHO.  Changing the mode of a
decl after RTL is assigned to it (or to its SSA partitions) seems fishy.
The assert is doing just what it was supposed to do.  The only surprise
to me is that it didn't catch this unexpected and unsupported change
before.

Presumably if we just dropped the assert in expand_expr_real_1, this
case would work just fine, although the unsignedp bit would be
meaningless and thus confusing, since the subreg isn't about a
promotion, but about reflecting the mode change that was made from under
us.

May I suggest that you guys find (or introduce) other means to change
the layout and mode of the decl *before* RTL is assigned to the params?
I think this would save us a ton of trouble down the road.  Just think
how much trouble you'd get if the different modes had different calling
conventions, alignment requirements, valid register assignments, or
anything that might make coalescing their SSA names with those of other
variables invalid.


I'm not familiar with the intricacies in this area but
I'll have a look.
Perhaps we can somehow re-layout the SIMD types when
switching from a non-simd to a simd target...
Can you, or Andreas please file a PR so we don't forget?

Thanks,
Kyrill


RE: [PING][Patch] Add support for IEEE-conformant versions of scalar fmin* and fmax*

2015-08-17 Thread David Sherwood
Hi Richard,

Thanks for the reply. I'd chosen to add new expressions as this seemed more
consistent with the existing MAX_EXPR and MIN_EXPR tree codes. In addition it
would seem to provide more opportunities for optimisation than a target-specific
builtin implementation would. I accept that optimisation opportunities will
be more limited for strict math compilation, but that it was still worth having
them. Also, if we did map it to builtins then the scalar version would go
through the optabs and the vector version would go through the target's builtin
expansion, which doesn't seem very consistent.

Regards,
David.

> -Original Message-
> From: Richard Biener [mailto:richard.guent...@gmail.com]
> Sent: 13 August 2015 12:10
> To: David Sherwood
> Cc: GCC Patches
> Subject: Re: [PING][Patch] Add support for IEEE-conformant versions of scalar 
> fmin* and fmax*
> 
> On Thu, Aug 13, 2015 at 12:11 PM, David Sherwood  
> wrote:
> > Hi,
> >
> > Sorry to bother people again. Is this OK to go now?
> 
> Hmm, why don't you go the vectorized function call path for this,
> implementing the builtin_vectorized_function target hook?
> 
> Richard.
> 
> > Thanks!
> > David.
> >
> >> >
> >> > > On Mon, 29 Jun 2015, David Sherwood wrote:
> >> > >
> >> > > > Hi,
> >> > > >
> >> > > > I have added new STRICT_MAX_EXPR and STRICT_MIN_EXPR expressions to 
> >> > > > support the
> >> > > > IEEE versions of fmin and fmax. This is done by recognising the math 
> >> > > > library
> >> > > > "fmax" and "fmin" builtin functions in a similar way to how this is 
> >> > > > done for
> >> > > > -ffast-math. This also allows us to vectorise the IEEE max/min 
> >> > > > functions for
> >> > > > targets that support it, for example aarch64/aarch32.
> >> > >
> >> > > This patch is missing documentation.  You need to document the new insn
> >> > > patterns in md.texi and the new tree codes in generic.texi.
> >> >
> >> > Hi, I've uploaded a new patch with the documentation. Hope this is ok.
> >>
> >> In various places where you refer to one operand being NaN, I think you
> >> mean one operand being a *quiet* NaN (if one is a signaling NaN - only
> >> supported by GCC if -fsignaling-nans - the IEEE minNum and maxNum
> >> operations raise "invalid" and return a quiet NaN).
> >
> > Hi, I have a new patch that hopefully addresses the documentation issues.
> >
> > Thanks,
> > David.
> >
> > ChangeLog:
> >
> > 2015-07-15  David Sherwood  
> >
> > gcc/
> > * builtins.c (integer_valued_real_p): Add STRICT_MIN_EXPR and
> > STRICT_MAX_EXPR.
> > (fold_builtin_fmin_fmax): For strict math, convert builting fmin and
> > fmax to STRICT_MIN_EXPR and STRICT_MIN_EXPR, respectively.
> > * expr.c (expand_expr_real_2): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
> > * fold-const.c (const_binop): Likewise.
> > (fold_binary_loc, tree_binary_nonnegative_warnv_p): Likewise.
> > (tree_binary_nonzero_warnv_p): Likewise.
> > * optabs.h (strict_minmax_support): Declare.
> > * optabs.def: Add new optabs strict_max_optab/strict_min_optab.
> > * optabs.c (optab_for_tree_code): Return new optabs for STRICT_MIN_EXPR
> > and STRICT_MAX_EXPR.
> > (strict_minmax_support): New function.
> > * real.c (real_arithmetic): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
> > * tree.def: Likewise.
> > * tree.c (associative_tree_code, commutative_tree_code): Likewise.
> > * tree-cfg.c (verify_expr): Likewise.
> > (verify_gimple_assign_binary): Likewise.
> > * tree-inline.c (estimate_operator_cost): Likewise.
> > * tree-pretty-print.c (dump_generic_node, op_code_prio): Likewise.
> > (op_symbol_code): Likewise.
> > gcc/config:
> > * aarch64/aarch64.md: New pattern.
> > * aarch64/aarch64-simd.md: Likewise.
> > * aarch64/iterators.md: New unspecs, iterators.
> > * arm/iterators.md: New iterators.
> > * arm/unspecs.md: New unspecs.
> > * arm/neon.md: New pattern.
> > * arm/vfp.md: Likewise.
> > gcc/doc:
> > * generic.texi: Add STRICT_MAX_EXPR and STRICT_MIN_EXPR.
> > * md.texi: Add strict_min and strict_max patterns.
> > gcc/testsuite
> > * gcc.target/aarch64/maxmin_strict.c: New test.
> > * gcc.target/arm/maxmin_strict.c: New test.





Re: arm memcpy of aligned data

2015-08-17 Thread Kyrill Tkachov


On 16/08/15 20:01, Mike Stump wrote:

On Jun 15, 2015, at 7:30 AM, Kyrill Tkachov  wrote:

On 29/05/15 11:15, Kyrill Tkachov wrote:

On 29/05/15 10:08, Kyrill Tkachov wrote:

Hi Mike,

On 28/05/15 22:15, Mike Stump wrote:

So, the arm memcpy code of aligned data isn’t as good as it can be.

void *memcpy(void *dest, const void *src, unsigned int n);

void foo(char *dst, int i) {
 memcpy (dst, &i, sizeof (i));
}

generates horrible code, but, it we are willing to notice the src or the 
destination are aligned, we can do much better:

$ ./cc1 -fschedule-fusion -fdump-tree-all-all -da -march=armv7ve 
-mcpu=cortex-m4 -fomit-frame-pointer -quiet -O2 /tmp/t.c -o t.s
$ cat t.s
[ … ]
foo:
@ args = 0, pretend = 0, frame = 4
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
sub sp, sp, #4
str r1, [r0]@ unaligned
add sp, sp, #4

I think there's something to do with cpu tuning here as well.

That being said, I do think this is a good idea.
I'll give it a test.

The patch passes bootstrap and testing ok and I've seen it
improve codegen in a few places in SPEC.
I've added a testcase all marked up.

Mike, I'll commit the attached patch in 24 hours unless somebody objects.

Was this ever applied?


Sorry, slipped through the cracks.
Committed with r226935.

Thanks,
Kyrill





Re: [PATCH GCC]Improve bound information in loop niter analysis

2015-08-17 Thread Bin.Cheng
Thanks for all your reviews.

On Fri, Aug 14, 2015 at 4:17 PM, Richard Biener
 wrote:
> On Tue, Jul 28, 2015 at 11:36 AM, Bin Cheng  wrote:
>> Hi,
>> Loop niter computes inaccurate bound information for different loops.  This
>> patch is to improve it by using loop initial condition in
>> determine_value_range.  Generally, loop niter is computed by subtracting
>> start var from end var in loop exit condition.  Moreover, loop bound is
>> computed using value range information of both start and end variables.
>> Basic idea of this patch is to check if loop initial condition implies more
>> range information for both start/end variables.  If yes, we refine range
>> information and use that to compute loop bound.
>> With this improvement, more accurate loop bound information is computed for
>> test cases added by this patch.
>
> +  c0 = fold_convert (type, c0);
> +  c1 = fold_convert (type, c1);
> +
> +  if (operand_equal_p (var, c0, 0))
>
> I believe if c0 is not already of type type operand-equal_p will never 
> succeed.
It's quite specific case targeting comparison between var and it's
range bounds.  Given c0 is in form of "var + offc0", then the
comparison "var + offc0 != range bounds" doesn't have any useful
information.  Maybe useless type conversion can be handled here
though, it might be even corner case.

>
> (side-note: we should get rid of the GMP use, that's expensive and now we
> have wide-int available which should do the trick as well)
>
> + /* Case of comparing with the bounds of the type.  */
> + if (TYPE_MIN_VALUE (type)
> + && operand_equal_p (c1, TYPE_MIN_VALUE (type), 0))
> +   cmp = GT_EXPR;
> + if (TYPE_MAX_VALUE (type)
> + && operand_equal_p (c1, TYPE_MAX_VALUE (type), 0))
> +   cmp = LT_EXPR;
>
> don't use TYPE_MIN/MAX_VALUE.  Instead use the types precision
> and all wide_int operations (see match.pd wi::max_value use).
Done.

>
> +  else if (!operand_equal_p (var, varc0, 0))
> +goto end_2;
>
> ick - goto.  We need sth like a auto_mpz class with a destructor.
Label end_2 removed.

>
> struct auto_mpz
> {
>   auto_mpz () { mpz_init (m_val); }
>   ~auto_mpz () { mpz_clear (m_val); }
>   mpz& operator() { return m_val; }
>   mpz m_val;
> };
>
>> Is it OK?
>
> I see the code follows existing practice in niter analysis even though
> my overall plan was to transition its copying of value-range related
> optimizations to use VRP infrastructure.
Yes, I think it's easy to push it to VRP infrastructure.  Actually
from the name of the function, it's more vrp related.  For now, the
function is called only by bound_difference, not so many as vrp
queries.  We need cache facility in vrp otherwise it would be
expensive.

>
> I'm still ok with improving the existing code on the basis that I won't
> get to that for GCC 6.
>
> So - ok with the TYPE_MIN/MAX_VALUE change suggested above.
>
> Refactoring with auto_mpz welcome.
That will be an independent patch, so I skipped it in this one.

New version attached.  Bootstrap and test on x86_64.

Thanks,
bin
>
> Thanks,
> RIchard.
>
>> Thanks,
>> bin
>>
>> 2015-07-28  Bin Cheng  
>>
>> * tree-ssa-loop-niter.c (refine_value_range_using_guard): New.
>> (determine_value_range): Call refine_value_range_using_guard for
>> each loop initial condition to improve value range.
>>
>> gcc/testsuite/ChangeLog
>> 2015-07-28  Bin Cheng  
>>
>> * gcc.dg/tree-ssa/loop-bound-1.c: New test.
>> * gcc.dg/tree-ssa/loop-bound-3.c: New test.
>> * gcc.dg/tree-ssa/loop-bound-5.c: New test.
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-bound-3.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/loop-bound-3.c(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-bound-3.c(revision 0)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i > l; i -= 1)
+{
+  sum += a[i];
+}
+
+  return sum;
+}
+
+/* Check loop niter bound information.  */
+/* { dg-final { scan-tree-dump "bounded by 254" "ivopts" } } */
+/* { dg-final { scan-tree-dump-not "bounded by 255" "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-bound-5.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/loop-bound-5.c(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-bound-5.c(revision 0)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i > 0; i -= 1)
+{
+  sum += a[i];
+}
+
+  return sum;
+}
+
+/* Check loop niter bound information.  */
+/* { dg-final { scan-tree-dump "bounded by 254" "ivopts" } } */
+/* { dg-final { scan-tree-dump-not "bounded 

Re: c-family/c-pretty-print.c - fix for 'restrict' quliafiers

2015-08-17 Thread Marek Polacek
On Sun, Aug 16, 2015 at 06:14:18PM -0700, Gary Funck wrote:
> 
> While reviewing some code, I noticed that the logic for
> pretty-printing 'restrict' qualifiers is likely missing a
> statement that sets 'previous'.
> 
> OK to commit?
> 
> 2015-08-l6  Gary Funck  
> 
> * c-pretty-print.c (pp_c_cv_qualifiers):
> Set 'previous' for restrict qualifiers.
> 
> Index: c-pretty-print.c
> ===
> --- c-pretty-print.c(revision 226928)
> +++ c-pretty-print.c(working copy)
> @@ -207,16 +207,17 @@ pp_c_cv_qualifiers (c_pretty_printer *pp
>  }
>  
>if (qualifiers & TYPE_QUAL_RESTRICT)
>  {
>if (previous)
>  pp_c_whitespace (pp);
>pp_c_ws_string (pp, (flag_isoc99 && !c_dialect_cxx ()
>? "restrict" : "__restrict__"));
> +  previous = true;

No, I don't think this assignment is missing here.  The restrict qualifier
is printed last so we don't need to mark that we've printed something.

Actually, the whole "previous" flag seems to be redundant; pp_c_ws_string
calls pp_c_maybe_whitespace so it prints a whitespace if necessary.

So I suggest the following instead (haven't tested it yet).

2015-08-17  Marek Polacek  

* c-pretty-print.c (pp_c_cv_qualifiers): Remove code dealing
with whitespaces before qualifier names.

diff --git gcc/c-family/c-pretty-print.c gcc/c-family/c-pretty-print.c
index 90f8c3d..e2809cf 100644
--- gcc/c-family/c-pretty-print.c
+++ gcc/c-family/c-pretty-print.c
@@ -173,7 +173,6 @@ void
 pp_c_cv_qualifiers (c_pretty_printer *pp, int qualifiers, bool func_type)
 {
   const char *p = pp_last_position_in_text (pp);
-  bool previous = false;
 
   if (!qualifiers)
 return;
@@ -185,34 +184,14 @@ pp_c_cv_qualifiers (c_pretty_printer *pp, int qualifiers, 
bool func_type)
 pp_c_whitespace (pp);
 
   if (qualifiers & TYPE_QUAL_ATOMIC)
-{
-  pp_c_ws_string (pp, "_Atomic");
-  previous = true;
-}
-
+pp_c_ws_string (pp, "_Atomic");
   if (qualifiers & TYPE_QUAL_CONST)
-{
-  if (previous)
-pp_c_whitespace (pp);
-  pp_c_ws_string (pp, func_type ? "__attribute__((const))" : "const");
-  previous = true;
-}
-
+pp_c_ws_string (pp, func_type ? "__attribute__((const))" : "const");
   if (qualifiers & TYPE_QUAL_VOLATILE)
-{
-  if (previous)
-pp_c_whitespace (pp);
-  pp_c_ws_string (pp, func_type ? "__attribute__((noreturn))" : 
"volatile");
-  previous = true;
-}
-
+pp_c_ws_string (pp, func_type ? "__attribute__((noreturn))" : "volatile");
   if (qualifiers & TYPE_QUAL_RESTRICT)
-{
-  if (previous)
-pp_c_whitespace (pp);
-  pp_c_ws_string (pp, (flag_isoc99 && !c_dialect_cxx ()
-  ? "restrict" : "__restrict__"));
-}
+pp_c_ws_string (pp, (flag_isoc99 && !c_dialect_cxx ()
+? "restrict" : "__restrict__"));
 }
 
 /* Pretty-print T using the type-cast notation '( type-name )'.  */

Marek


RE: [PATCH GCC]Improve bound information in loop niter analysis

2015-08-17 Thread Ajit Kumar Agarwal
All:

Does the Logic to calculate the Loop bound information through Value Range 
Analyis uses the post dominator and
Dominator info. The iteration branches instead of Loop exit condition can be 
calculated through post dominator info.
If the node in the Loop has two successors and post dominates the two 
successors then the iteration branch can be
The same node. 

For All the nodes L in the Loop B
If (L1, L2  belongs to successors of (L) && L1,L2 belongs to PosDom(Header of 
Loop))
{
  I = I union L1
}

Thus "I" will have all set of iteration branches. This will handle more cases 
of Loop bound information that 
Will be accurate through the exact iteration count that are known cases along 
with Value Range Information
Where the condition is instead not the Loop exits but other nodes in the Loop.

Thanks & Regards
Ajit
 

-Original Message-
From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-ow...@gcc.gnu.org] On 
Behalf Of Bin.Cheng
Sent: Monday, August 17, 2015 3:32 PM
To: Richard Biener
Cc: Bin Cheng; GCC Patches
Subject: Re: [PATCH GCC]Improve bound information in loop niter analysis

Thanks for all your reviews.

On Fri, Aug 14, 2015 at 4:17 PM, Richard Biener  
wrote:
> On Tue, Jul 28, 2015 at 11:36 AM, Bin Cheng  wrote:
>> Hi,
>> Loop niter computes inaccurate bound information for different loops.  
>> This patch is to improve it by using loop initial condition in 
>> determine_value_range.  Generally, loop niter is computed by 
>> subtracting start var from end var in loop exit condition.  Moreover, 
>> loop bound is computed using value range information of both start and end 
>> variables.
>> Basic idea of this patch is to check if loop initial condition 
>> implies more range information for both start/end variables.  If yes, 
>> we refine range information and use that to compute loop bound.
>> With this improvement, more accurate loop bound information is 
>> computed for test cases added by this patch.
>
> +  c0 = fold_convert (type, c0);
> +  c1 = fold_convert (type, c1);
> +
> +  if (operand_equal_p (var, c0, 0))
>
> I believe if c0 is not already of type type operand-equal_p will never 
> succeed.
It's quite specific case targeting comparison between var and it's range 
bounds.  Given c0 is in form of "var + offc0", then the comparison "var + offc0 
!= range bounds" doesn't have any useful information.  Maybe useless type 
conversion can be handled here though, it might be even corner case.

>
> (side-note: we should get rid of the GMP use, that's expensive and now 
> we have wide-int available which should do the trick as well)
>
> + /* Case of comparing with the bounds of the type.  */
> + if (TYPE_MIN_VALUE (type)
> + && operand_equal_p (c1, TYPE_MIN_VALUE (type), 0))
> +   cmp = GT_EXPR;
> + if (TYPE_MAX_VALUE (type)
> + && operand_equal_p (c1, TYPE_MAX_VALUE (type), 0))
> +   cmp = LT_EXPR;
>
> don't use TYPE_MIN/MAX_VALUE.  Instead use the types precision and all 
> wide_int operations (see match.pd wi::max_value use).
Done.

>
> +  else if (!operand_equal_p (var, varc0, 0))
> +goto end_2;
>
> ick - goto.  We need sth like a auto_mpz class with a destructor.
Label end_2 removed.

>
> struct auto_mpz
> {
>   auto_mpz () { mpz_init (m_val); }
>   ~auto_mpz () { mpz_clear (m_val); }
>   mpz& operator() { return m_val; }
>   mpz m_val;
> };
>
>> Is it OK?
>
> I see the code follows existing practice in niter analysis even though 
> my overall plan was to transition its copying of value-range related 
> optimizations to use VRP infrastructure.
Yes, I think it's easy to push it to VRP infrastructure.  Actually from the 
name of the function, it's more vrp related.  For now, the function is called 
only by bound_difference, not so many as vrp queries.  We need cache facility 
in vrp otherwise it would be expensive.

>
> I'm still ok with improving the existing code on the basis that I 
> won't get to that for GCC 6.
>
> So - ok with the TYPE_MIN/MAX_VALUE change suggested above.
>
> Refactoring with auto_mpz welcome.
That will be an independent patch, so I skipped it in this one.

New version attached.  Bootstrap and test on x86_64.

Thanks,
bin
>
> Thanks,
> RIchard.
>
>> Thanks,
>> bin
>>
>> 2015-07-28  Bin Cheng  
>>
>> * tree-ssa-loop-niter.c (refine_value_range_using_guard): New.
>> (determine_value_range): Call refine_value_range_using_guard for
>> each loop initial condition to improve value range.
>>
>> gcc/testsuite/ChangeLog
>> 2015-07-28  Bin Cheng  
>>
>> * gcc.dg/tree-ssa/loop-bound-1.c: New test.
>> * gcc.dg/tree-ssa/loop-bound-3.c: New test.
>> * gcc.dg/tree-ssa/loop-bound-5.c: New test.


Re: [PATCH] [PING] [PR libitm/61164] Remove redefinition of glibc internal macro __always_inline

2015-08-17 Thread Gleb Fotengauer-Malinovskiy
On Sun, Aug 16, 2015 at 07:35:17PM +0200, Torvald Riegel wrote:
> On Thu, 2015-06-11 at 14:36 +0300, Gleb Fotengauer-Malinovskiy wrote:
> > On Fri, May 15, 2015 at 03:04:27PM +0200, Torvald Riegel wrote:
> > > On Wed, 2015-05-06 at 17:54 +0300, Gleb Fotengauer-Malinovskiy wrote:
> > > > 2015-05-06  Gleb Fotengauer-Malinovskiy  
> > > > 
> > > > PR libitm/61164
> > > > * local_atomic (__always_inline): Rename to...
> > > > (__libitm_always_inline): ... this.
> > > 
> > > OK.  Thanks.
> > 
> > You are welcome.
> > 
> > It seems still not applied, AFAICS.
> 
> I forgot to ask you at Cauldron whether you have completed a copyright
> assignment agreement?

No, I didn't. I'm in progress.

>  I don't think this is small enough to be a
> trivial patch.

"A regular series of repeated changes, such as renaming a symbol, is not
legally significant even if the symbol has to be renamed in many places."
https://www.gnu.org/prep/maintain/html_node/Legally-Significant.html#Legally-Significant

-- 
glebfm


signature.asc
Description: PGP signature


Re: [PATCH] [Ada] Make the stack non-executable in GNAT tools

2015-08-17 Thread Arnaud Charlet
> Due to PR67205, the deeply nested instantiations require trampolines,
> which in turn requires an executable stack for the GNAT tools on
> architectures such as x86_64.
> 
> Bootstrapped on x86_64-redhat-linux-gnu, and make check-ada
> reports no unexpected failures.
> 
> Okay for trunk?

Yes.

Arno


RE: [PATCH GCC]Improve bound information in loop niter analysis

2015-08-17 Thread Ajit Kumar Agarwal
Oops, there is a typo error instead of L it was typed as L1.
Here is the corrected one.

For All the nodes L in the Loop B
If (L1, L2  belongs to successors of (L) && L1,L2 belongs to PosDom(Header of 
Loop)) {
  I = I union L;
}

Thanks & Regards
Ajit
-Original Message-
From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-ow...@gcc.gnu.org] On 
Behalf Of Ajit Kumar Agarwal
Sent: Monday, August 17, 2015 4:19 PM
To: Bin.Cheng; Richard Biener
Cc: Bin Cheng; GCC Patches; Vinod Kathail; Shail Aditya Gupta; Vidhumouli 
Hunsigida; Nagaraju Mekala
Subject: RE: [PATCH GCC]Improve bound information in loop niter analysis

All:

Does the Logic to calculate the Loop bound information through Value Range 
Analyis uses the post dominator and Dominator info. The iteration branches 
instead of Loop exit condition can be calculated through post dominator info.
If the node in the Loop has two successors and post dominates the two 
successors then the iteration branch can be The same node. 

For All the nodes L in the Loop B
If (L1, L2  belongs to successors of (L) && L1,L2 belongs to PosDom(Header of 
Loop)) {
  I = I union L1
}

Thus "I" will have all set of iteration branches. This will handle more cases 
of Loop bound information that Will be accurate through the exact iteration 
count that are known cases along with Value Range Information Where the 
condition is instead not the Loop exits but other nodes in the Loop.

Thanks & Regards
Ajit
 

-Original Message-
From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-ow...@gcc.gnu.org] On 
Behalf Of Bin.Cheng
Sent: Monday, August 17, 2015 3:32 PM
To: Richard Biener
Cc: Bin Cheng; GCC Patches
Subject: Re: [PATCH GCC]Improve bound information in loop niter analysis

Thanks for all your reviews.

On Fri, Aug 14, 2015 at 4:17 PM, Richard Biener  
wrote:
> On Tue, Jul 28, 2015 at 11:36 AM, Bin Cheng  wrote:
>> Hi,
>> Loop niter computes inaccurate bound information for different loops.  
>> This patch is to improve it by using loop initial condition in 
>> determine_value_range.  Generally, loop niter is computed by 
>> subtracting start var from end var in loop exit condition.  Moreover, 
>> loop bound is computed using value range information of both start and end 
>> variables.
>> Basic idea of this patch is to check if loop initial condition 
>> implies more range information for both start/end variables.  If yes, 
>> we refine range information and use that to compute loop bound.
>> With this improvement, more accurate loop bound information is 
>> computed for test cases added by this patch.
>
> +  c0 = fold_convert (type, c0);
> +  c1 = fold_convert (type, c1);
> +
> +  if (operand_equal_p (var, c0, 0))
>
> I believe if c0 is not already of type type operand-equal_p will never 
> succeed.
It's quite specific case targeting comparison between var and it's range 
bounds.  Given c0 is in form of "var + offc0", then the comparison "var + offc0 
!= range bounds" doesn't have any useful information.  Maybe useless type 
conversion can be handled here though, it might be even corner case.

>
> (side-note: we should get rid of the GMP use, that's expensive and now 
> we have wide-int available which should do the trick as well)
>
> + /* Case of comparing with the bounds of the type.  */
> + if (TYPE_MIN_VALUE (type)
> + && operand_equal_p (c1, TYPE_MIN_VALUE (type), 0))
> +   cmp = GT_EXPR;
> + if (TYPE_MAX_VALUE (type)
> + && operand_equal_p (c1, TYPE_MAX_VALUE (type), 0))
> +   cmp = LT_EXPR;
>
> don't use TYPE_MIN/MAX_VALUE.  Instead use the types precision and all 
> wide_int operations (see match.pd wi::max_value use).
Done.

>
> +  else if (!operand_equal_p (var, varc0, 0))
> +goto end_2;
>
> ick - goto.  We need sth like a auto_mpz class with a destructor.
Label end_2 removed.

>
> struct auto_mpz
> {
>   auto_mpz () { mpz_init (m_val); }
>   ~auto_mpz () { mpz_clear (m_val); }
>   mpz& operator() { return m_val; }
>   mpz m_val;
> };
>
>> Is it OK?
>
> I see the code follows existing practice in niter analysis even though 
> my overall plan was to transition its copying of value-range related 
> optimizations to use VRP infrastructure.
Yes, I think it's easy to push it to VRP infrastructure.  Actually from the 
name of the function, it's more vrp related.  For now, the function is called 
only by bound_difference, not so many as vrp queries.  We need cache facility 
in vrp otherwise it would be expensive.

>
> I'm still ok with improving the existing code on the basis that I 
> won't get to that for GCC 6.
>
> So - ok with the TYPE_MIN/MAX_VALUE change suggested above.
>
> Refactoring with auto_mpz welcome.
That will be an independent patch, so I skipped it in this one.

New version attached.  Bootstrap and test on x86_64.

Thanks,
bin
>
> Thanks,
> RIchard.
>
>> Thanks,
>> bin
>>
>> 2015-07-28  Bin Cheng  
>>
>> * tree-ssa-loop-niter.c (refine_value_range_using

[PATCH][4/N] Introduce new inline functions for GET_MODE_UNIT_SIZE and GET_MODE_UNIT_PRECISION

2015-08-17 Thread David Sherwood
Hi,

This is the last patch in the series. It follows on from:

[PATCH][3/N] Replace the pattern GET_MODE_BITSIZE (GET_MODE_INNER (m))
with GET_MODE_UNIT_BITSIZE (m)

As a simple optimisation, introduce new inline functions for GET_MODE_UNIT_SIZE
and GET_MODE_UNIT_PRECISION in machmode.h so that we can reduce two inline
calls, i.e. GET_MODE_INNER and GET_MODE_SIZE, into one.

Tested:
aarch64 and aarch64_be - no regressions in gcc testsuite
x86_64 - bootstrap build, no testsuite regressions
arm-none-eabi - no regressions in gcc testsuite

Good to go?
David.

ChangeLog:

2015-07-17  David Sherwood  

gcc/
* genmodes.c (emit_mode_unit_size_inline): New function.
(emit_mode_unit_precision_inline): New function.
(emit_insn_modes_h): Emit new #define.  Emit new functions.
(emit_mode_unit_size): New function.
(emit_mode_unit_precision): New function.
(emit_mode_adjustments): Add mode_unit_size adjustments.
(emit_insn_modes_c): Emit new arrays.
* machmode.h (GET_MODE_UNIT_SIZE, GET_MODE_UNIT_PRECISION): Update to
use new inline methods.


mode_inner4.patch
Description: Binary data


Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Alexandre Oliva
On Aug 17, 2015, Andreas Schwab  wrote:

> Alexandre Oliva  writes:
>> Would you be so kind as to give it a spin on a m68k native?  TIA,

> I tried it on ia64, and it falls flat on the floor.

Doh, I see a logic flaw in the patch I posted.  The hunk in
assign_parm_setup_stack that looked like this:

+ if (from_expand)
+   gcc_assert (GET_MODE (from_expand) == GET_MODE (data->entry_parm));
+ else if (!parm_in_unassigned_mem_p (parm, from_expand))
+   data->stack_parm = from_expand;

should look like this:

+ if (from_expand)
+   gcc_assert (GET_MODE (from_expand) == GET_MODE (data->entry_parm));
+ if (from_expand && !parm_in_unassigned_mem_p (parm, from_expand))
+   data->stack_parm = from_expand;

I'll give it some more testing before submitting a formal patch.

Meanwhile, thanks for confirming the m68k issues are fixed by that one;
this one shouldn't regress them; it would only fix the unintended crashes.

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Alexandre Oliva
On Aug 17, 2015, Christophe Lyon  wrote:

> Since this was committed (r226901), I can see that the compiler build
> fails for armeb targets, when building libgcc:

Any chance you could get me a preprocessed testcase for this failure, please?

Thanks in advance,

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: c-family/c-pretty-print.c - fix for 'restrict' quliafiers

2015-08-17 Thread Gary Funck
On 08/17/15 12:06:08, Marek Polacek wrote:
> No, I don't think this assignment is missing here.  The restrict qualifier
> is printed last so we don't need to mark that we've printed something.
> 
> Actually, the whole "previous" flag seems to be redundant; pp_c_ws_string
> calls pp_c_maybe_whitespace so it prints a whitespace if necessary.

OK.  I'm not familiar with this code, so please go ahead
and make the changes as needed.

- Gary


Re: c-family/c-pretty-print.c - fix for 'restrict' quliafiers

2015-08-17 Thread Marek Polacek
On Mon, Aug 17, 2015 at 12:06:08PM +0200, Marek Polacek wrote:
> On Sun, Aug 16, 2015 at 06:14:18PM -0700, Gary Funck wrote:
> > 
> > While reviewing some code, I noticed that the logic for
> > pretty-printing 'restrict' qualifiers is likely missing a
> > statement that sets 'previous'.
> > 
> > OK to commit?
> > 
> > 2015-08-l6  Gary Funck  
> > 
> > * c-pretty-print.c (pp_c_cv_qualifiers):
> > Set 'previous' for restrict qualifiers.
> > 
> > Index: c-pretty-print.c
> > ===
> > --- c-pretty-print.c(revision 226928)
> > +++ c-pretty-print.c(working copy)
> > @@ -207,16 +207,17 @@ pp_c_cv_qualifiers (c_pretty_printer *pp
> >  }
> >  
> >if (qualifiers & TYPE_QUAL_RESTRICT)
> >  {
> >if (previous)
> >  pp_c_whitespace (pp);
> >pp_c_ws_string (pp, (flag_isoc99 && !c_dialect_cxx ()
> >? "restrict" : "__restrict__"));
> > +  previous = true;
> 
> No, I don't think this assignment is missing here.  The restrict qualifier
> is printed last so we don't need to mark that we've printed something.
> 
> Actually, the whole "previous" flag seems to be redundant; pp_c_ws_string
> calls pp_c_maybe_whitespace so it prints a whitespace if necessary.
> 
> So I suggest the following instead (haven't tested it yet).

Now regtested/bootstrapped on x86_64-linux.  Jason/Joseph, ok?

> 2015-08-17  Marek Polacek  
> 
>   * c-pretty-print.c (pp_c_cv_qualifiers): Remove code dealing
>   with whitespaces before qualifier names.
> 
> diff --git gcc/c-family/c-pretty-print.c gcc/c-family/c-pretty-print.c
> index 90f8c3d..e2809cf 100644
> --- gcc/c-family/c-pretty-print.c
> +++ gcc/c-family/c-pretty-print.c
> @@ -173,7 +173,6 @@ void
>  pp_c_cv_qualifiers (c_pretty_printer *pp, int qualifiers, bool func_type)
>  {
>const char *p = pp_last_position_in_text (pp);
> -  bool previous = false;
>  
>if (!qualifiers)
>  return;
> @@ -185,34 +184,14 @@ pp_c_cv_qualifiers (c_pretty_printer *pp, int 
> qualifiers, bool func_type)
>  pp_c_whitespace (pp);
>  
>if (qualifiers & TYPE_QUAL_ATOMIC)
> -{
> -  pp_c_ws_string (pp, "_Atomic");
> -  previous = true;
> -}
> -
> +pp_c_ws_string (pp, "_Atomic");
>if (qualifiers & TYPE_QUAL_CONST)
> -{
> -  if (previous)
> -pp_c_whitespace (pp);
> -  pp_c_ws_string (pp, func_type ? "__attribute__((const))" : "const");
> -  previous = true;
> -}
> -
> +pp_c_ws_string (pp, func_type ? "__attribute__((const))" : "const");
>if (qualifiers & TYPE_QUAL_VOLATILE)
> -{
> -  if (previous)
> -pp_c_whitespace (pp);
> -  pp_c_ws_string (pp, func_type ? "__attribute__((noreturn))" : 
> "volatile");
> -  previous = true;
> -}
> -
> +pp_c_ws_string (pp, func_type ? "__attribute__((noreturn))" : 
> "volatile");
>if (qualifiers & TYPE_QUAL_RESTRICT)
> -{
> -  if (previous)
> -pp_c_whitespace (pp);
> -  pp_c_ws_string (pp, (flag_isoc99 && !c_dialect_cxx ()
> -? "restrict" : "__restrict__"));
> -}
> +pp_c_ws_string (pp, (flag_isoc99 && !c_dialect_cxx ()
> +  ? "restrict" : "__restrict__"));
>  }
>  
>  /* Pretty-print T using the type-cast notation '( type-name )'.  */
> 
>   Marek

Marek


Re: [PATCH 8/15][AArch64] Add support for float16x{4,8}_t vectors/builtins

2015-08-17 Thread James Greenhalgh
On Tue, Aug 04, 2015 at 12:13:15PM +0100, Alan Lawrence wrote:
> > Bootstrapped + check-gcc on aarch64-none-linux-gnu.
> > 
> > gcc/ChangeLog:
> > 
> > * config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): Support
> > V4HFmode and V8HFmode.
> > (aarch64_split_simd_move): Add case for V8HFmode.
> > * config/aarch64/aarch64-builtins.c (v4hf_UP, v8hf_UP): Define.
> > (aarch64_simd_builtin_std_type): Handle HFmode.
> > (aarch64_init_simd_builtin_types): Include Float16x4_t and Float16x8_t.
> > 
> > * config/aarch64/aarch64-simd.md (mov, aarch64_get_lane,
> > aarch64_ld1, aarch64_st1 > (aarch64_be_ld1, aarch64_be_st1): Use VALLDI_F16 iterator.
> > 
> > * config/aarch64/aarch64-simd-builtin-types.def: Add Float16x4_t,
> > Float16x8_t.
> > 
> > * config/aarch64/aarch64-simd-builtins.def (ld1, st1): Use VALL_F16.
> > * config/aarch64/arm_neon.h (float16x4_t, float16x8_t, float16_t):
> > New typedefs.
> > (vget_lane_f16, vgetq_lane_f16, vset_lane_f16, vsetq_lane_f16,
> > vld1_f16, vld1q_f16, vst1_f16, vst1q_f16, vst1_lane_f16,
> > vst1q_lane_f16): New.
> > * config/aarch64/iterators.md (VD, VQ, VQ_NO2E): Add vectors of HFmode.
> > (VALLDI_F16, VALL_F16): New.
> > (Vmtype, VEL, VCONQ, VHALF, V_TWO_ELEM, V_THREE_ELEM, V_FOUR_ELEM, q):
> > Add cases for V4HF and V8HF.
> > (VDBL, VRL2, VRL3, VRL4): Add V4HF case.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/abi/mangle-neon-aarch64.C: Add cases for float16x4_t and
> > float16x8_t.
> > * gcc.target/aarch64/vset_lane_1.c: Likewise.
> > * gcc.target/aarch64/vld1-vst1_1.c: Likewise.
> > * gcc.target/aarch64/vld1_lane.c: Likewise.
> > 

OK.

Thanks,
James




Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate

2015-08-17 Thread James Greenhalgh
On Thu, Aug 06, 2015 at 05:28:34PM +0100, Alan Lawrence wrote:
> Alan Lawrence wrote:
>  > James Greenhalgh wrote:
>  >> Hi Alan,
>  >>
>  >> The arm_neon.h portion of this patch does not apply after Charles' recent
>  >> changes. Could you please rebase and resubmit the patch for review?
>  >>
>  >> Thanks,
>  >> James
>  >
>  > Ah, indeed, thanks. Here's a rebased version, using Charles' new versions 
> of
>  > __(LD|ST)[234]_LANE_FUNC. I'll follow with a patch adding corresponding
>  > lane_f16_indices tests in a separate email.
>  >
>  > (Changelog as before)
>  >
>  > Bootstrapped + check-gcc on aarch64-none-linux-gnu.
> 
> 
> Here, in fact. gcc/ChangeLog:
> 
>   * config/aarch64/aarch64.c (aarch64_split_simd_combine): Add V4HFmode.
>   * config/aarch64/aarch64-builtins.c (VAR13, VAR14): New.
>   (aarch64_scalar_builtin_types, aarch64_init_simd_builtin_scalar_types):
>   Add __builtin_aarch64_simd_hf.
>   * config/aarch64/arm_neon.h (float16x4x2_t, float16x8x2_t,
>   float16x4x3_t, float16x8x3_t, float16x4x4_t, float16x8x4_t,
>   vcombine_f16, vst2_lane_f16, vst2q_lane_f16, vst3_lane_f16,
>   vst3q_lane_f16, vst4_lane_f16, vst4q_lane_f16, vld2_f16, vld2q_f16,
>   vld3_f16, vld3q_f16, vld4_f16, vld4q_f16, vld2_dup_f16, vld2q_dup_f16,
>   vld3_dup_f16, vld3q_dup_f16, vld4_dup_f16, vld4q_dup_f16,
>   vld2_lane_f16, vld2q_lane_f16, vld3_lane_f16, vld3q_lane_f16,
>   vld4_lane_f16, vld4q_lane_f16, vst2_f16, vst2q_f16, vst3_f16,
>   vst3q_f16, vst4_f16, vst4q_f16, vcreate_f16): New.
> 
>   * config/aarch64/iterators.md (VALLDIF, Vtype, Vetype, Vbtype,
>   V_cmp_result, v_cmp_result): Add cases for V4HF and V8HF.
>   (VDC, Vdbl): Add V4HF.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/vldN_1.c: Add float16x4_t and float16x8_t cases.
>   * gcc.target/aarch64/vldN_dup_1.c: Likewise.
>   * gcc.target/aarch64/vldN_lane_1.c: Likewise.
>  (main): update orig_data to avoid float16 NaN on bigendian.

OK, but clean up the stray newline

   

> @@ -15974,6 +16086,19 @@ vld4q_u64 (const uint64_t * __a)
>return ret;
>  }
>  
> +__extension__ static __inline float16x8x4_t __attribute__ 
> ((__always_inline__))
> +vld4q_f16 (const float16_t * __a)
> +{
> +  float16x8x4_t ret;
> +  __builtin_aarch64_simd_xi __o;
> +  __o = __builtin_aarch64_ld4v8hf (__a);
> +  ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
> +  ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
> +  ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
> +  ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
> +  return ret;
> +}
> +
>  __extension__ static __inline float32x4x4_t __attribute__ 
> ((__always_inline__))
>  vld4q_f32 (const float32_t * __a)
>  {
> @@ -16035,6 +16160,18 @@ vld2_dup_s32 (const int32_t * __a)
>return ret;
>  }
>  
> +

Here.

> +__extension__ static __inline float16x4x2_t __attribute__ 
> ((__always_inline__))
> +vld2_dup_f16 (const float16_t * __a)
> +{
> +  float16x4x2_t ret;
> +  __builtin_aarch64_simd_oi __o;
> +  __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
> +  ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
> +  ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
> +  return ret;
> +}
> +
>  __extension__ static __inline float32x2x2_t __attribute__ 
> ((__always_inline__))
>  vld2_dup_f32 (const float32_t * __a)
>  {

Thanks,
James


Re: [PATCH][ARM/AArch64 Testsuite] Add float16 lane_indices tests (was: Re: [PATCH 9/15][AArch64] vld{2,3,4}{,_lane,_dup}, vcombine, vcreate)

2015-08-17 Thread James Greenhalgh
On Tue, Aug 04, 2015 at 12:07:21PM +0100, Alan Lawrence wrote:
> James Greenhalgh wrote:
> > Hi Alan,
> > 
> > The arm_neon.h portion of this patch does not apply after Charles' recent
> > changes. Could you please rebase and resubmit the patch for review?
> > 
> > Thanks,
> > James
> 
> These are straightforward copies of the corresponding uint16 tests, with 
> appropriate substitutions uint->float and u16->f16. As per the existing 
> tests, 
> these are xfailed on ARM targets, pending further work on PR/63870.

OK.

Thanks,
James

> 
> Cross-tested on aarch64-none-elf.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_indices_1.c: New.
>   * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_indices_1.c: New.




Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Christophe Lyon
On 17 August 2015 at 13:58, Alexandre Oliva  wrote:
> On Aug 17, 2015, Christophe Lyon  wrote:
>
>> Since this was committed (r226901), I can see that the compiler build
>> fails for armeb targets, when building libgcc:
>
> Any chance you could get me a preprocessed testcase for this failure, please?
>
Yes, here it is, attached.

My gcc is configured with:
--target=armeb-linux-gnueabihf--with-mode=arm --with-cpu=cortex-a9
--with-fpu=neon

Thanks,

Christophe.

> Thanks in advance,
>
> --
> Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
> You must be the change you wish to see in the world. -- Gandhi
> Be Free! -- http://FSFLA.org/   FSF Latin America board member
> Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


fixed-bit.i.xz
Description: application/force-download


[PATCH, PR 67133] Always change gimple fntype in cgraph_edge::redirect_call_stmt_to_callee

2015-08-17 Thread Martin Jambor
Hi,

even though PR 67133 has been avoided by a different patch, I believe
the patch below is the correct fix.  It modifies the function that
changes call statements according to call graph edges so that it
changes the fntype of the call statements also when
combined_args_to_skip is NULL.  This code path is taken for example
when a call is redirected to __builtin_unreachable and then the type
of the callee function is likely to mismatch with fntype of the
statement, which can confuse the compiler later on.

If we agree it is a good idea, I'd like to also propose a patch
making the gimple verifier check whether fntypes of direct call
statements match the types of the callee (or at least that they have
the same number of same-typed arguments).

The patch has been bootstrapped and tested on x86_64-linux, the
testcase is already checked in.  OK for trunk?

Thanks,

Martin


2015-08-17  Martin Jambor  

PR middle-end/67133
* cgraph.c (redirect_call_stmt_to_callee): Set gimple call fntype also
when redirecting without removing any parameters.

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 22a9852..5e5b308 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1461,6 +1461,7 @@ cgraph_edge::redirect_call_stmt_to_callee (void)
 {
   new_stmt = e->call_stmt;
   gimple_call_set_fndecl (new_stmt, e->callee->decl);
+  gimple_call_set_fntype (new_stmt, TREE_TYPE (e->callee->decl));
   update_stmt_fn (DECL_STRUCT_FUNCTION (e->caller->decl), new_stmt);
 }
 


top-level configure.ac: factor the libgomp check for posix-like OS

2015-08-17 Thread Gary Funck

I'm working on a patch set for GUPC, and as part of that work,
I may have a couple changes to trunk that will improve the fit
with the GUPC changes.  Here's one in configure.ac.

At the moment, there is a check to see if $enable_libgom
is not set, followed by a case statement which adds
libgomp to $noconfigdirs on non POSIX-like OS's.

We'd like to re-use that logic for libgupc,
which has a similar requirement and propose this
re-factoring for trunk.

2015-08-17  Gary Funck  

* configure.ac (noconfigdirs): Factor libgomp logic testing for
POSIX-like host OS.
* configure: Re-generate.

Index: configure.ac
===
--- configure.ac(revision 226928)
+++ configure.ac(working copy)
@@ -529,9 +529,8 @@ if test x$enable_static_libjava != xyes
 fi
 AC_SUBST(EXTRA_CONFIGARGS_LIBJAVA)
 
-# Enable libgomp by default on hosted POSIX systems, and a few others.
-if test x$enable_libgomp = x ; then
-case "${target}" in
+posix_like_os="yes"
+case "${target}" in
 *-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu | *-*-kopensolaris*-gnu)
;;
 *-*-netbsd* | *-*-freebsd* | *-*-openbsd* | *-*-dragonfly*)
@@ -543,9 +542,14 @@ if test x$enable_libgomp = x ; then
 nvptx*-*-*)
;;
 *)
-   noconfigdirs="$noconfigdirs target-libgomp"
-   ;;
-esac
+posix_like_os="no"
+;;
+esac
+
+# Enable libgomp by default on POSIX hosted systems.
+if test x$enable_libgomp = x && test $posix_like_os = "no" ; then
+# Disable libgomp on non POSIX hosted systems.
+noconfigdirs="$noconfigdirs target-libgomp"
 fi
 
 # Disable libatomic on unsupported systems.



Re: [PATCH, PR 67133] Always change gimple fntype in cgraph_edge::redirect_call_stmt_to_callee

2015-08-17 Thread Jan Hubicka
> Hi,
> 
> even though PR 67133 has been avoided by a different patch, I believe
> the patch below is the correct fix.  It modifies the function that
> changes call statements according to call graph edges so that it
> changes the fntype of the call statements also when
> combined_args_to_skip is NULL.  This code path is taken for example
> when a call is redirected to __builtin_unreachable and then the type
> of the callee function is likely to mismatch with fntype of the
> statement, which can confuse the compiler later on.
> 
> If we agree it is a good idea, I'd like to also propose a patch
> making the gimple verifier check whether fntypes of direct call
> statements match the types of the callee (or at least that they have
> the same number of same-typed arguments).
> 
> The patch has been bootstrapped and tested on x86_64-linux, the
> testcase is already checked in.  OK for trunk?
> 
> Thanks,
> 
> Martin
> 
> 
> 2015-08-17  Martin Jambor  
> 
>   PR middle-end/67133
>   * cgraph.c (redirect_call_stmt_to_callee): Set gimple call fntype also
>   when redirecting without removing any parameters.

This makes sense in the case of __builtin_unreachable.  I wonder if we have
some problems in cases where LTO or indirect call code places in incompatible
declaration.

Patch is fine with me, but I would like Richi to have final word on this one.

Honza
> 
> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> index 22a9852..5e5b308 100644
> --- a/gcc/cgraph.c
> +++ b/gcc/cgraph.c
> @@ -1461,6 +1461,7 @@ cgraph_edge::redirect_call_stmt_to_callee (void)
>  {
>new_stmt = e->call_stmt;
>gimple_call_set_fndecl (new_stmt, e->callee->decl);
> +  gimple_call_set_fntype (new_stmt, TREE_TYPE (e->callee->decl));
>update_stmt_fn (DECL_STRUCT_FUNCTION (e->caller->decl), new_stmt);
>  }
>  


Re: Forwarding -foffload=[...] from the driver (compile-time) to libgomp (run-time) (was: [PATCH 2/n] OpenMP 4.0 offloading infrastructure: LTO streaming)

2015-08-17 Thread Martin Jambor
Hi,

On Fri, Aug 14, 2015 at 03:19:26PM +0200, Ilya Verbin wrote:
> 2015-08-14 11:47 GMT+02:00 Thomas Schwinge :
> > On Wed, 5 Aug 2015 18:09:04 +0300, Ilya Verbin  wrote:
> >> > > @@ -1095,6 +1092,8 @@ GOMP_target (int device, void (*fn) (void *), 
> >> > > const void *unused,
> >> > >  return gomp_target_fallback (fn, hostaddrs);
> >> > >
> >> > >void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
> >> > > +  if (fn_addr == NULL)
> >> > > +return gomp_target_fallback (fn, hostaddrs);
> >
> > Is that reliable?  Consider the following scenario, with f1 and f2
> > implemented in separate TUs:
> >
> > #pragma omp target data [map clauses]
> > {
> >   f1([...]);
> >   f2([...]);
> > }
> >
> > Consider that in f1 we have a OpenMP target region with offloading data
> > available, and in f2 we have a OpenMP target region without offloading
> > data available.  In this case, the GOMP_target in f1 will execute on the
> > offloading target, but the GOMP_target in f2 will resort to host fallback
> > -- and we then likely have data inconsistencies, as the data specified by
> > the map clauses is not synchronized between host and device.
> >
> > Admittedly, this is user error (inconsistent set of offloading functions
> > available -- need either all, or none), but in such a scenario probably
> > we should be doing a better job (at detecting this).  (Note, I'm not sure
> > whether my current patch actually does any better.)  ;-)
> 
> You're right. That's why I didn't send this patch for review yet.
> My current plan is as follows:
> * Use this approach for architectures with shared memory, since it
> allows mixing host and target functions.

Great, please keep me posted on these changes.

Thanks!

Martin

> * For non-shared memory, at the first splay tree lookup:
> ** If target fn is not found, run the whole program in host-fallback mode.
> ** If it's found, then all target fns must exist. I.e. if some
> tgt_addr (not first) is NULL, then libgomp will issue an error as it
> does now.
> 
>   -- Ilya


Re: [PING][Patch] Add support for IEEE-conformant versions of scalar fmin* and fmax*

2015-08-17 Thread Richard Biener
On Mon, Aug 17, 2015 at 11:29 AM, David Sherwood  wrote:
> Hi Richard,
>
> Thanks for the reply. I'd chosen to add new expressions as this seemed more
> consistent with the existing MAX_EXPR and MIN_EXPR tree codes. In addition it
> would seem to provide more opportunities for optimisation than a 
> target-specific
> builtin implementation would. I accept that optimisation opportunities will
> be more limited for strict math compilation, but that it was still worth 
> having
> them. Also, if we did map it to builtins then the scalar version would go
> through the optabs and the vector version would go through the target's 
> builtin
> expansion, which doesn't seem very consistent.

On another note ISTR you can't associate STRICT_MIN/MAX_EXPR and thus
you can't vectorize anyway?  (strict IEEE behavior is about NaNs, correct?)

Richard.

> Regards,
> David.
>
>> -Original Message-
>> From: Richard Biener [mailto:richard.guent...@gmail.com]
>> Sent: 13 August 2015 12:10
>> To: David Sherwood
>> Cc: GCC Patches
>> Subject: Re: [PING][Patch] Add support for IEEE-conformant versions of 
>> scalar fmin* and fmax*
>>
>> On Thu, Aug 13, 2015 at 12:11 PM, David Sherwood  
>> wrote:
>> > Hi,
>> >
>> > Sorry to bother people again. Is this OK to go now?
>>
>> Hmm, why don't you go the vectorized function call path for this,
>> implementing the builtin_vectorized_function target hook?
>>
>> Richard.
>>
>> > Thanks!
>> > David.
>> >
>> >> >
>> >> > > On Mon, 29 Jun 2015, David Sherwood wrote:
>> >> > >
>> >> > > > Hi,
>> >> > > >
>> >> > > > I have added new STRICT_MAX_EXPR and STRICT_MIN_EXPR expressions to 
>> >> > > > support the
>> >> > > > IEEE versions of fmin and fmax. This is done by recognising the 
>> >> > > > math library
>> >> > > > "fmax" and "fmin" builtin functions in a similar way to how this is 
>> >> > > > done for
>> >> > > > -ffast-math. This also allows us to vectorise the IEEE max/min 
>> >> > > > functions for
>> >> > > > targets that support it, for example aarch64/aarch32.
>> >> > >
>> >> > > This patch is missing documentation.  You need to document the new 
>> >> > > insn
>> >> > > patterns in md.texi and the new tree codes in generic.texi.
>> >> >
>> >> > Hi, I've uploaded a new patch with the documentation. Hope this is ok.
>> >>
>> >> In various places where you refer to one operand being NaN, I think you
>> >> mean one operand being a *quiet* NaN (if one is a signaling NaN - only
>> >> supported by GCC if -fsignaling-nans - the IEEE minNum and maxNum
>> >> operations raise "invalid" and return a quiet NaN).
>> >
>> > Hi, I have a new patch that hopefully addresses the documentation issues.
>> >
>> > Thanks,
>> > David.
>> >
>> > ChangeLog:
>> >
>> > 2015-07-15  David Sherwood  
>> >
>> > gcc/
>> > * builtins.c (integer_valued_real_p): Add STRICT_MIN_EXPR and
>> > STRICT_MAX_EXPR.
>> > (fold_builtin_fmin_fmax): For strict math, convert builting fmin and
>> > fmax to STRICT_MIN_EXPR and STRICT_MIN_EXPR, respectively.
>> > * expr.c (expand_expr_real_2): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
>> > * fold-const.c (const_binop): Likewise.
>> > (fold_binary_loc, tree_binary_nonnegative_warnv_p): Likewise.
>> > (tree_binary_nonzero_warnv_p): Likewise.
>> > * optabs.h (strict_minmax_support): Declare.
>> > * optabs.def: Add new optabs strict_max_optab/strict_min_optab.
>> > * optabs.c (optab_for_tree_code): Return new optabs for STRICT_MIN_EXPR
>> > and STRICT_MAX_EXPR.
>> > (strict_minmax_support): New function.
>> > * real.c (real_arithmetic): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
>> > * tree.def: Likewise.
>> > * tree.c (associative_tree_code, commutative_tree_code): Likewise.
>> > * tree-cfg.c (verify_expr): Likewise.
>> > (verify_gimple_assign_binary): Likewise.
>> > * tree-inline.c (estimate_operator_cost): Likewise.
>> > * tree-pretty-print.c (dump_generic_node, op_code_prio): Likewise.
>> > (op_symbol_code): Likewise.
>> > gcc/config:
>> > * aarch64/aarch64.md: New pattern.
>> > * aarch64/aarch64-simd.md: Likewise.
>> > * aarch64/iterators.md: New unspecs, iterators.
>> > * arm/iterators.md: New iterators.
>> > * arm/unspecs.md: New unspecs.
>> > * arm/neon.md: New pattern.
>> > * arm/vfp.md: Likewise.
>> > gcc/doc:
>> > * generic.texi: Add STRICT_MAX_EXPR and STRICT_MIN_EXPR.
>> > * md.texi: Add strict_min and strict_max patterns.
>> > gcc/testsuite
>> > * gcc.target/aarch64/maxmin_strict.c: New test.
>> > * gcc.target/arm/maxmin_strict.c: New test.
>
>
>


Re: [PATCH, PR 67133] Always change gimple fntype in cgraph_edge::redirect_call_stmt_to_callee

2015-08-17 Thread Richard Biener
On Mon, Aug 17, 2015 at 3:47 PM, Jan Hubicka  wrote:
>> Hi,
>>
>> even though PR 67133 has been avoided by a different patch, I believe
>> the patch below is the correct fix.  It modifies the function that
>> changes call statements according to call graph edges so that it
>> changes the fntype of the call statements also when
>> combined_args_to_skip is NULL.  This code path is taken for example
>> when a call is redirected to __builtin_unreachable and then the type
>> of the callee function is likely to mismatch with fntype of the
>> statement, which can confuse the compiler later on.
>>
>> If we agree it is a good idea, I'd like to also propose a patch
>> making the gimple verifier check whether fntypes of direct call
>> statements match the types of the callee (or at least that they have
>> the same number of same-typed arguments).
>>
>> The patch has been bootstrapped and tested on x86_64-linux, the
>> testcase is already checked in.  OK for trunk?
>>
>> Thanks,
>>
>> Martin
>>
>>
>> 2015-08-17  Martin Jambor  
>>
>>   PR middle-end/67133
>>   * cgraph.c (redirect_call_stmt_to_callee): Set gimple call fntype also
>>   when redirecting without removing any parameters.
>
> This makes sense in the case of __builtin_unreachable.  I wonder if we have
> some problems in cases where LTO or indirect call code places in incompatible
> declaration.
>
> Patch is fine with me, but I would like Richi to have final word on this one.

I don't like it too much - you'll scribble over users ABI choice here.
It's better
to guard inspectors of the call properly to _not_ expect actual arguments
according to the ABI.

Richard.

> Honza
>>
>> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
>> index 22a9852..5e5b308 100644
>> --- a/gcc/cgraph.c
>> +++ b/gcc/cgraph.c
>> @@ -1461,6 +1461,7 @@ cgraph_edge::redirect_call_stmt_to_callee (void)
>>  {
>>new_stmt = e->call_stmt;
>>gimple_call_set_fndecl (new_stmt, e->callee->decl);
>> +  gimple_call_set_fntype (new_stmt, TREE_TYPE (e->callee->decl));
>>update_stmt_fn (DECL_STRUCT_FUNCTION (e->caller->decl), new_stmt);
>>  }
>>


[PATCH] Fix PR67221

2015-08-17 Thread Richard Biener

This fixes PR67221.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-08-17  Richard Biener  

PR tree-optimization/67221
* tree-ssa-sccvn.c (visit_phi): Keep all-TOP args TOP.
(sccvn_dom_walker::before_dom_children): Mark backedges of
non-executable blocks as not executable.

* gcc.dg/torture/pr67221.c: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 226934)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -3271,6 +3277,11 @@ visit_phi (gimple phi)
break;
  }
   }
+  
+  /* If none of the edges was executable or all incoming values are
+ undefined keep the value-number at VN_TOP.  */
+  if (sameval == VN_TOP)
+return set_ssa_val_to (PHI_RESULT (phi), VN_TOP);
 
   /* First see if it is equivalent to a phi node in this block.  We prefer
  this as it allows IV elimination - see PRs 66502 and 67167.  */
@@ -4463,7 +4498,7 @@ sccvn_dom_walker::before_dom_children (b
   reachable |= (e->flags & EDGE_EXECUTABLE);
 
   /* If the block is not reachable all outgoing edges are not
- executable.  */
+ executable.  Neither are incoming edges with src dominated by us.  */
   if (!reachable)
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -4472,6 +4507,18 @@ sccvn_dom_walker::before_dom_children (b
 
   FOR_EACH_EDGE (e, ei, bb->succs)
e->flags &= ~EDGE_EXECUTABLE;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+   {
+ if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "Marking backedge from BB %d into "
+"unreachable BB %d as not executable\n",
+e->src->index, bb->index);
+ e->flags &= ~EDGE_EXECUTABLE;
+   }
+   }
   return;
 }
 
Index: gcc/testsuite/gcc.dg/torture/pr67221.c
===
--- gcc/testsuite/gcc.dg/torture/pr67221.c  (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr67221.c  (working copy)
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+
+int a, b;
+
+int
+fn1 (int p)
+{
+  return 0 == 0 ? p : 0;
+}
+
+void
+fn2 ()
+{
+  int c = 1, d[1] = { 1 };
+lbl:
+  for (;;)
+{
+  int e;
+  c ? 0 : 0 / c;
+  c = 0;
+  if (fn1 (d[0]))
+   break;
+  for (e = 0; e < 1; e++)
+   for (c = 1; b;)
+ {
+   if (a)
+ break;
+   goto lbl;
+ }
+}
+}


Re: c-family/c-pretty-print.c - fix for 'restrict' quliafiers

2015-08-17 Thread Jason Merrill

OK.

Jason


Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Andrew Pinski
On Mon, Aug 17, 2015 at 5:20 PM, Kyrill Tkachov
 wrote:
> Hi Alexandre,
>
> On 17/08/15 03:56, Alexandre Oliva wrote:
>>
>> On Aug 16, 2015, Andreas Schwab  wrote:
>>
>>> Alexandre Oliva  writes:

 On Aug 15, 2015, Andreas Schwab  wrote:

> FAIL: gcc.target/aarch64/target_attr_crypto_ice_1.c (internal compiler
> error)
> In file included from
>
> /opt/gcc/gcc-20150815/gcc/testsuite/gcc.target/aarch64/target_attr_crypto_ice_1.c:4:0:

 Are you sure this is a regression introduced by my patch?
>>>
>>> Yes, it reintroduces the ICE.
>>
>> Ugh.  I see this testcase was introduced very recently, so presumably it
>> wasn't present in the tree that James Greenhalgh tested and confirmed
>> there were no regressions.
>
>
> Yeah, I introduced it as part of the SWITCHABLE_TARGET
> work for aarch64. A bit of a mid-air collision :(
>
>> The hack in aarch64-builtins.c looks risky IMHO.  Changing the mode of a
>> decl after RTL is assigned to it (or to its SSA partitions) seems fishy.
>> The assert is doing just what it was supposed to do.  The only surprise
>> to me is that it didn't catch this unexpected and unsupported change
>> before.
>>
>> Presumably if we just dropped the assert in expand_expr_real_1, this
>> case would work just fine, although the unsignedp bit would be
>> meaningless and thus confusing, since the subreg isn't about a
>> promotion, but about reflecting the mode change that was made from under
>> us.
>>
>> May I suggest that you guys find (or introduce) other means to change
>> the layout and mode of the decl *before* RTL is assigned to the params?
>> I think this would save us a ton of trouble down the road.  Just think
>> how much trouble you'd get if the different modes had different calling
>> conventions, alignment requirements, valid register assignments, or
>> anything that might make coalescing their SSA names with those of other
>> variables invalid.
>>
> I'm not familiar with the intricacies in this area but
> I'll have a look.
> Perhaps we can somehow re-layout the SIMD types when
> switching from a non-simd to a simd target...
> Can you, or Andreas please file a PR so we don't forget?

How does x86 handle this case?  Because it should be handling this case somehow.

Thanks,
Andrew


>
> Thanks,
> Kyrill


[RFC][Scalar masks 1/x] Introduce GEN_MASK_EXPR.

2015-08-17 Thread Ilya Enkovich
Hi,

This patch starts a series introducing scalar masks support in the vectorizer.  
It was discussed on the recent Cauldron and changes overiew is available here: 
https://gcc.gnu.org/wiki/cauldron2015?action=AttachFile&do=view&target=Vectorization+for+Intel+AVX-512.pdf.
  Here is shortly a list of changes introduced by this series:

 - Add new tree expr to produce scalar masks in a vectorized code
 - Fix-up if-conversion to use bool predicates instead of integer masks
 - Disable some bool patterns to avoid bool to int conversion where masks can 
be used
 - Support bool operands in vectorization factor computation
 - Support scalar masks in MASK_LOAD, MASK_STORE and VEC_COND_EXPR by adding 
new optabs
 - Support vectorization for statements which are now not transformed by bool 
patterns
 - Add target support (hooks, optabs, expands)

This patch introduces GEN_MASK_EXPR code.  Intitially I wanted to use a 
comparison as an operand for it directly mapping it into AVX-512 comparison 
instruction.  But a feedback was to simplify new code's semantics and use it 
for converting vectors into scalar masks.  Therefore if we want to compare two 
vectors into a scalar masks we use two statements:

  vect.18_87 = vect__5.13_81 > vect__6.16_86;
  mask__ifc__23.17_88 = GEN_MASK ;
 
Trying it in practice I found it producing worse code. The problem is that on 
target first comparison is expanded into two instructions: cmp with mask result 
+ masked move to get a vector. GEN_MASK is then expanded into another 
comparison with zero vector.  Thus I get two comparisons + move instead of a 
single comparison and have to optimize this out on a target side (current 
optimizers can't handle it).  That's actually what I wanted to avoid.  For now 
I changed GEN_MASK_EXPR to get a vector value as an operand but didn't change 
expand pattern which has four opernads: two vectors to compare + cmp operator + 
result.  On expand I try to detect GEN_MASK uses a result of comparison and 
thus avoid double comparison generation.

Patch series is not actually fully finished yet.  I still have several type 
conversion tests not being vectorized and it wasn't widely tested.  That's what 
I'm working on now.

Will be glad to any comments.

Thanks,
Ilya
--
2015-08-17  Ilya Enkovich  

* expr.c (expand_expr_real_2): Support GEN_MASK_EXPR.
* gimple-pretty-print.c (dump_unary_rhs): Likewise.
* gimple.c (get_gimple_rhs_num_ops): Likewise.
* optabs.c: Include gimple.h.
(vector_compare_rtx): Add OPNO arg.
(get_gen_mask_icode): New.
(expand_gen_mask_expr_p): New.
(expand_gen_mask_expr): New.
(expand_vec_cond_expr): Adjust vector_compare_rtx call.
* optabs.def (gen_mask_optab): New.
(gen_masku_optab): New.
* optabs.h (expand_gen_mask_expr_p): New.
(expand_gen_mask_expr): New.
* tree-cfg.c (verify_gimple_assign_unary): Support GEN_MASK_EXPR.
* tree-inline.c (estimate_operator_cost): Likewise.
* tree-pretty-print.c (dump_generic_node): Likewise.
* tree-ssa-operands.c (get_expr_operands): Likewise.
* tree.def (GEN_MASK_EXPR): New.


diff --git a/gcc/expr.c b/gcc/expr.c
index 31b4573..8af5926 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9180,6 +9180,10 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode 
tmode,
return temp;
   }
 
+case GEN_MASK_EXPR:
+  target = expand_gen_mask_expr (type, treeop0, target);
+  return target;
+
 case VEC_COND_EXPR:
   target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
   return target;
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 53900dd..ac25b79 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -300,6 +300,12 @@ dump_unary_rhs (pretty_printer *buffer, gassign *gs, int 
spc, int flags)
   pp_greater (buffer);
   break;
 
+case GEN_MASK_EXPR:
+  pp_string (buffer, "GEN_MASK <");
+  dump_generic_node (buffer, rhs, spc, flags, false);
+  pp_greater (buffer);
+  break;
+
 default:
   if (TREE_CODE_CLASS (rhs_code) == tcc_declaration
  || TREE_CODE_CLASS (rhs_code) == tcc_constant
diff --git a/gcc/gimple.c b/gcc/gimple.c
index cca328a..93caf01 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2005,7 +2005,8 @@ get_gimple_rhs_num_ops (enum tree_code code)
: ((SYM) == TRUTH_AND_EXPR  \
   || (SYM) == TRUTH_OR_EXPR
\
   || (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS  \
-   : (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS
\
+   : ((SYM) == TRUTH_NOT_EXPR  \
+  || (SYM) == GEN_MASK_EXPR) ? GIMPLE_UNARY_RHS\
: ((SYM) == COND_EXPR   \
   || (SYM) ==

[Scalar masks 2/x] Use bool masks in if-conversion

2015-08-17 Thread Ilya Enkovich
Hi,

This patch intoriduces a new vectorizer hook use_scalar_mask_p which affects 
code generated by if-conversion pass (and affects patterns in later patches).

Thanks,
Ilya
--
2015-08-17  Ilya Enkovich  

* doc/tm.texi (TARGET_VECTORIZE_USE_SCALAR_MASK_P): New.
* doc/tm.texi.in: Regenerated.
* target.def (use_scalar_mask_p): New.
* tree-if-conv.c: Include target.h.
(predicate_mem_writes): Don't convert boolean predicates into
integer when scalar masks are used.


diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 2383fb9..a124489 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4233,6 +4233,8 @@ address;  but often a machine-dependent strategy can 
generate better code.
 
 @hook TARGET_VECTORIZE_DESTROY_COST_DATA
 
+@hook TARGET_VECTORIZE_USE_SCALAR_MASK_P
+
 @hook TARGET_VECTORIZE_BUILTIN_TM_LOAD
 
 @hook TARGET_VECTORIZE_BUILTIN_TM_STORE
diff --git a/gcc/target.def b/gcc/target.def
index 4edc209..0975bf3 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1855,6 +1855,15 @@ DEFHOOK
  (void *data),
  default_destroy_cost_data)
 
+/* Target function to check scalar masks support.  */
+DEFHOOK
+(use_scalar_mask_p,
+ "This hook returns 1 if vectorizer should use scalar masks instead of "
+ "vector ones for MASK_LOAD, MASK_STORE and VEC_COND_EXPR.",
+ bool,
+ (void),
+ hook_bool_void_false)
+
 HOOK_VECTOR_END (vectorize)
 
 #undef HOOK_PREFIX
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 291e602..73dcecd 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -122,6 +122,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-codes.h"
 #include "optabs.h"
 #include "tree-hash-traits.h"
+#include "target.h"
 
 /* List of basic blocks in if-conversion-suitable order.  */
 static basic_block *ifc_bbs;
@@ -2082,15 +2083,24 @@ predicate_mem_writes (loop_p loop)
  mask = vect_masks[index];
else
  {
-   masktype = build_nonstandard_integer_type (bitsize, 1);
-   mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
-   mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
-   cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
-  is_gimple_condexpr,
-  NULL_TREE,
-  true, GSI_SAME_STMT);
-   mask = fold_build_cond_expr (masktype, unshare_expr (cond),
-mask_op0, mask_op1);
+   if (targetm.vectorize.use_scalar_mask_p ())
+ {
+   masktype = boolean_type_node;
+   mask = unshare_expr (cond);
+ }
+   else
+ {
+   masktype = build_nonstandard_integer_type (bitsize, 1);
+   mask_op0 = build_int_cst (masktype, swap ? 0 : -1);
+   mask_op1 = build_int_cst (masktype, swap ? -1 : 0);
+   cond = force_gimple_operand_gsi_1 (&gsi,
+  unshare_expr (cond),
+  is_gimple_condexpr,
+  NULL_TREE,
+  true, GSI_SAME_STMT);
+   mask = fold_build_cond_expr (masktype, unshare_expr (cond),
+mask_op0, mask_op1);
+ }
mask = ifc_temp_var (masktype, mask, &gsi);
/* Save mask and its size for further use.  */
vect_sizes.safe_push (bitsize);


[committed] config/i386/i386.c: Remove fibheap.h.

2015-08-17 Thread Aldy Hernandez

There are no uses of this include file in the x86 backend.

For that matter, apparently there are no uses of libiberty's fibheap 
implementation anywhere (well, at least in GCC, binutils, and gdb.  I 
don't know if there are other users in the wild).  FWIW, GCC has its own 
 in C++.


Approved by rth offline.

Tested by building all-gcc (no bootstrap) on x86-64 Linux.

Aldy
commit 51a695246906459748d0114c75b2ad84f932f299
Author: Aldy Hernandez 
Date:   Mon Aug 17 09:22:42 2015 -0700

* config/i386/i386.c: Remove include of fibheap.h.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 51eca52..05fa5e1 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -73,7 +73,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "cselib.h"
 #include "debug.h"
 #include "sched-int.h"
-#include "fibheap.h"
 #include "opts.h"
 #include "diagnostic.h"
 #include "dumpfile.h"


[Scalar masks 3/x] Support scalar masks in MASK_LOAD, MASK_STORE and VEC_COND_EXPR

2015-08-17 Thread Ilya Enkovich
Hi,

This patch adds scalar masks support for MASK_LOAD, MASK_STORE and 
VEC_COND_EXPR.  Each one gets new optab for scalar mask case and optab is 
chosen depending on operands type.  For VEC_COND_EXPR it's actually unclear 
which mask to check in case comparison is used as the first operand.  Thus I 
consider VEC_COND_EXPR with scalar mask just always use SSA_NAME or a constant 
as the first operand.  Otherwise old optab is used.  I think it's OK because 
existing vcond_mask_optab is already extended to handle 512-bit vectors.  
Alternatively both optabs may be checked in such cases.

Any comments?

Thanks,
Ilya
--
gcc/ChangeLog:

2015-08-17  Ilya Enkovich  

* internal-fn.c (expand_MASK_LOAD): Support scalar mask.
(expand_MASK_STORE): Likewise.
* optabs.c (get_vcond_icode): Likewise.
(expand_vec_cond_expr_p): Likewise.
(expand_vec_cond_expr): Likewise.
(can_vec_mask_load_store_p): Add SCALAR_MASK arg.
* optabs.h (can_vec_mask_load_store_p): Likewise.
* optabs.def (smaskload_optab): New.
(smaskstore_optab): New.
(vcond_mask_optab): New.
* tree-cfg.c (verify_gimple_assign_ternary): Support scalar mask.
* tree-if-conv.c (ifcvt_can_use_mask_load_store): Adjust
can_vec_mask_load_store_p call.
* tree-vect-stmts.c (vectorizable_mask_load_store): Support scalar mask.
(vectorizable_assignment): Skip scalars.
(vectorizable_operation): Likewise.
(vectorizable_store): Likewise.
(vectorizable_load): Likewise.
(vect_is_simple_cond): Support scalar mask.
(vectorizable_condition): Likewise.


diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index e785946..5f8c21a 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -1869,6 +1869,7 @@ expand_MASK_LOAD (gcall *stmt)
   struct expand_operand ops[3];
   tree type, lhs, rhs, maskt;
   rtx mem, target, mask;
+  optab tab;
 
   maskt = gimple_call_arg (stmt, 2);
   lhs = gimple_call_lhs (stmt);
@@ -1885,7 +1886,10 @@ expand_MASK_LOAD (gcall *stmt)
   create_output_operand (&ops[0], target, TYPE_MODE (type));
   create_fixed_operand (&ops[1], mem);
   create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
-  expand_insn (optab_handler (maskload_optab, TYPE_MODE (type)), 3, ops);
+
+  tab = (VECTOR_TYPE_P (TREE_TYPE (maskt)))
+? maskload_optab : smaskload_optab;
+  expand_insn (optab_handler (tab, TYPE_MODE (type)), 3, ops);
 }
 
 static void
@@ -1894,6 +1898,7 @@ expand_MASK_STORE (gcall *stmt)
   struct expand_operand ops[3];
   tree type, lhs, rhs, maskt;
   rtx mem, reg, mask;
+  optab tab;
 
   maskt = gimple_call_arg (stmt, 2);
   rhs = gimple_call_arg (stmt, 3);
@@ -1908,7 +1913,10 @@ expand_MASK_STORE (gcall *stmt)
   create_fixed_operand (&ops[0], mem);
   create_input_operand (&ops[1], reg, TYPE_MODE (type));
   create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
-  expand_insn (optab_handler (maskstore_optab, TYPE_MODE (type)), 3, ops);
+
+  tab = (VECTOR_TYPE_P (TREE_TYPE (maskt)))
+? maskstore_optab : smaskstore_optab;
+  expand_insn (optab_handler (tab, TYPE_MODE (type)), 3, ops);
 }
 
 static void
diff --git a/gcc/optabs.c b/gcc/optabs.c
index bf466ca..4c5e683 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6870,7 +6870,9 @@ static inline enum insn_code
 get_vcond_icode (machine_mode vmode, machine_mode cmode, bool uns)
 {
   enum insn_code icode = CODE_FOR_nothing;
-  if (uns)
+  if (!VECTOR_MODE_P (cmode))
+icode = optab_handler (vcond_mask_optab, vmode);
+  else if (uns)
 icode = convert_optab_handler (vcondu_optab, vmode, cmode);
   else
 icode = convert_optab_handler (vcond_optab, vmode, cmode);
@@ -6886,8 +6888,9 @@ expand_vec_cond_expr_p (tree value_type, tree cmp_op_type)
 {
   machine_mode value_mode = TYPE_MODE (value_type);
   machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type);
-  if (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode)
-  || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode)
+  if ((VECTOR_MODE_P (cmp_op_mode)
+   && (GET_MODE_SIZE (value_mode) != GET_MODE_SIZE (cmp_op_mode)
+  || GET_MODE_NUNITS (value_mode) != GET_MODE_NUNITS (cmp_op_mode)))
   || get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
  TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing)
 return false;
@@ -6909,6 +6912,7 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree 
op1, tree op2,
   bool unsignedp;
   tree op0a, op0b;
   enum tree_code tcode;
+  bool masked = false;
 
   if (COMPARISON_CLASS_P (op0))
 {
@@ -6916,6 +6920,8 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree 
op1, tree op2,
   op0b = TREE_OPERAND (op0, 1);
   tcode = TREE_CODE (op0);
 }
+  else if (!VECTOR_TYPE_P (TREE_TYPE (op0)))
+masked = true;
   else
 {
   /* Fake op0 < 0.  */
@@ -6924,28 +6930,51 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, 
tree op

Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-08-17 Thread Alexander Monakov
> >> Perhaps add a comment that GOT slots are 64-bit on x32?
> >>
> >
> > Good idea.  I will update my patch.
> >
> 
> How about this?
> 
> 
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index bf8a21d..216dee6 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx 
> callarg1,
>   fnaddr);
>   }
>fnaddr = gen_const_mem (Pmode, fnaddr);
> +  /* Pmode may not be the same as word_mode for x32, which

I think 'Pmode is not the same as word_mode on x32' is more appropriate here.

> + doesn't support indirect branch va 32-bit memory slot.

Typo: s/va/via.

Thanks.
Alexander


Re: RFC: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-08-17 Thread H.J. Lu
On Mon, Aug 17, 2015 at 10:08 AM, Alexander Monakov  wrote:
>> >> Perhaps add a comment that GOT slots are 64-bit on x32?
>> >>
>> >
>> > Good idea.  I will update my patch.
>> >
>>
>> How about this?
>>
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index bf8a21d..216dee6 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -25690,6 +25690,10 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx 
>> callarg1,
>>   fnaddr);
>>   }
>>fnaddr = gen_const_mem (Pmode, fnaddr);
>> +  /* Pmode may not be the same as word_mode for x32, which
>
> I think 'Pmode is not the same as word_mode on x32' is more appropriate here.

"-maddress-mode=long -mx32" makes Pmode == word_mode.

>> + doesn't support indirect branch va 32-bit memory slot.
>
> Typo: s/va/via.
>

Fixed.

Here is the updated patch.

Thanks.

-- 
H.J.
From 04258b418d2ea105249d371a06805122d8953816 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Sun, 16 Aug 2015 04:46:20 -0700
Subject: [PATCH] Properly handle -fno-plt in ix86_expand_call

prepare_call_address in calls.c is the wrong place to handle -fno-plt.
We shoudn't force function address into register and hope that load
function address via GOT and indirect call via register will be folded
into indirect call via GOT, which doesn't always happen.  Also non-PIC
case can only be handled in backend.  Instead, backend should expand
external function call into indirect call via GOT for -fno-plt.

This patch reverts -fno-plt in prepare_call_address and handles it in
ix86_expand_call.  Other backends may need similar changes to support
-fno-plt.  Alternately, we can introduce a target hook to indicate
whether an external function should be called via register for -fno-plt
so that i386 backend can disable it in prepare_call_address.

gcc/

	PR target/67215
	* calls.c (prepare_call_address): Don't handle -fno-plt here.
	* config/i386/i386.c (ix86_expand_call): Generate indirect call
	via GOT for -fno-plt.  Support indirect call via GOT for x32.

gcc/testsuite/

	PR target/67215
	* gcc.target/i386/pr67215-1.c: New test.
	* gcc.target/i386/pr67215-2.c: Likewise.
---
 gcc/calls.c   | 12 --
 gcc/config/i386/i386.c| 71 ---
 gcc/testsuite/gcc.target/i386/pr67215-1.c | 20 +
 gcc/testsuite/gcc.target/i386/pr67215-2.c | 20 +
 4 files changed, 95 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr67215-2.c

diff --git a/gcc/calls.c b/gcc/calls.c
index 5636725..7cce9be 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -203,18 +203,6 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
 	   && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
 	  ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
 	  : memory_address (FUNCTION_MODE, funexp));
-  else if (flag_pic
-	   && fndecl_or_type
-	   && TREE_CODE (fndecl_or_type) == FUNCTION_DECL
-	   && (!flag_plt
-	   || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-	   && !targetm.binds_local_p (fndecl_or_type))
-{
-  /* This is done only for PIC code.  There is no easy interface to force the
-	 function address into GOT for non-PIC case.  non-PIC case needs to be
-	 handled specially by the backend.  */
-  funexp = force_reg (Pmode, funexp);
-}
   else if (! sibcallp)
 {
   if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 05fa5e1..ac9a6c4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25649,21 +25649,54 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   /* Static functions and indirect calls don't need the pic register.  Also,
 	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
 	 it an indirect call.  */
+  rtx addr = XEXP (fnaddr, 0);
   if (flag_pic
-	  && (!TARGET_64BIT
-	  || (ix86_cmodel == CM_LARGE_PIC
-		  && DEFAULT_ABI != MS_ABI))
-	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
-	  && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
-	  && flag_plt
-	  && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
-	  || !lookup_attribute ("noplt",
-		 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))
+	  && GET_CODE (addr) == SYMBOL_REF
+	  && !SYMBOL_REF_LOCAL_P (addr))
 	{
-	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
-	  if (ix86_use_pseudo_pic_reg ())
-	emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
-			pic_offset_table_rtx);
+	  if (flag_plt
+	  && (SYMBOL_REF_DECL (addr) == NULL_TREE
+		  || !lookup_attribute ("noplt",
+	DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)
+	{
+	  if (!TARGET_64BIT
+		  || (ix86_cmodel == CM_LARGE_PIC
+		  && DEFAULT_ABI != MS_ABI))
+		{
+		  use_reg (&use, gen_rtx_REG (Pmode,
+	 

Re: [PATCH] Fix middle-end/67133, part 1

2015-08-17 Thread Jeff Law

On 08/14/2015 02:46 PM, Marek Polacek wrote:

Then in isolate-paths in find_explicit_erroneous_behaviour we're walking the
stmts in bb 2 and we find a null dereference, so 
insert_trap_and_remove_trailing_statements
comes in play and turns bb 2 into:

   :
   ...
   SR.5_10 = MEM[(const struct A &)0B];
   __builtin_trap ();

i.e. it removs the defining statement for c_13.  Then 
find_explicit_erroneous_behaviour
walks over bb 3, hits the fn1 (&D.2434.OutBufCur, &b, c_13); statement, and
ICEs on the c_13 argument: it's a released SSA name with NULL TREE_TYPE.

The question now is what to do with that.  Skip SSA_NAME_IN_FREE_LIST?  That
sounds weird.  Note that we're going to remove bb 3 and bb 4 anyway...

Jeez, looking at the code N years later, I feel like a complete idiot. Of
course that's not going to work.

We certainly don't want to peek at SSA_NAME_IN_FREE_LIST.


Yeh, I thought as much.


I wonder if we should be walking in backwards dominator order to avoid these
effects.  Or maybe just ignoring any BB with no preds.  I'll ponder those
over the weekend.


I suppose both ought to work.  Or at least theoretically we could run e.g.
cleanup_cfg to prune the IR after we've inserted trap and removed trailing
stmts so that it gets rid of unreachable bbs.  Would that make sense?

Anyway, if you think of how would you like to solve this I can take a crack
at it next week.
The funny thing here is we remove the statements after the trap to avoid 
this exact situation!


I think the problem with schemes that either change the order of block 
processing, or which ignore some blocks are going to run into issues. 
By walking blocks and statements in a backwards order, we address 99% of 
the problems, including uses in PHIs in a direct successor block.


What's not handled is a use in a PHI at the frontier of a subgraph that 
becomes unreachable.  We'd have to do the usual unreachable block 
analysis to catch and handle those properly.


I don't particularly like that idea

But in walking through all that, I think I've stumbled on a simpler 
solution.  Specifically do as a little as possible and let the standard 
mechanisms clean things up :-)


1. Delete the code that removes instructions after the trap.

2. Split the block immediately after the trap and remove the edge
   from the original block (with the trap) to the new block.


THen let the standard mechanisms handle things when that pass is complete.

By setting cfg_altered, we'll  get unreachable code removal which will 
capture most of the intended effect.  DCE fires a couple more passes 
down in the pipeline to pick up the remaining tidbits.


Do you want to try and tackle this?

jeff




[gomp4] CXX parsing of routine

2015-08-17 Thread Nathan Sidwell
I've applied this patch to fix the C++ parsing of the routine directive.  As 
with the C parser, this was constructing a list of names to apply the directive 
to, rather than simply resolving the name in the scope of the directive.  With 
C++ this is even more interesting than with C, because names are not simple 
identifiers, but may involve scope operators.


I also added checking to the C parser for use/defn before application, as well 
as duplicate application.


testcases coming up next ...

nathan
2015-08-17  Nathan Sidwell  

	c/
	* c-parser.c (c_finish_oacc_routine): Add is_defn arg and adjust
	all callers.  Detect duplicate, post-use or post-defn application.

	cp/
	* parser.h (struct cp_parser): Change oacc_routine field
	tree. Remove named_oacc_routines field.
	* parser.c (cp_ensure_no_omp_declare_simd): Remove oacc routine
	checking, move to ...
	(cp_ensure_no_oacc_routine): ... here.  New function, adjust all
	callers.
	(cp_parser_late_parsing_oacc_routine): Delete.
	(cp_parser_new): Adjust.
	(cp_parser_linkage_specification): Call cp_ensure_no_oacc_routine.
	(cp_finalize_oacc_routine): Reimplement.  Adjust all callers.
	(cp_parser_late_return_type_opt): Remove oacc routine handling.
	(cp_parser_omp_declare_simd): Remove oacc routine handling. Adjust
	callers.
	(cp_parser_finish_oacc_routine): New.
	(cp_parser_oacc_routine): Reimplement.

	testsuite/
	* c-c++-common/goacc/routine-2.c: Insert declaration.

	libgomp/
	* testsuite/libgomp.c-c++-common/routine-2.c: Insert declaration.

Index: gcc/c/c-parser.c
===
--- gcc/c/c-parser.c	(revision 226912)
+++ gcc/c/c-parser.c	(working copy)
@@ -1762,7 +1762,7 @@ finish_oacc_declare (tree fnbody, tree d
 
 
 static void c_finish_omp_declare_simd (c_parser *, tree, tree, vec);
-static void c_finish_oacc_routine (c_parser *, tree, tree);
+static void c_finish_oacc_routine (c_parser *, tree, tree, bool);
 
 /* Parse a declaration or function definition (C90 6.5, 6.7.1, C99
6.7, 6.9.1).  If FNDEF_OK is true, a function definition is
@@ -2020,7 +2020,8 @@ c_parser_declaration_or_fndef (c_parser
 	c_finish_omp_declare_simd (parser, NULL_TREE, NULL_TREE,
    omp_declare_simd_clauses);
 	  if (oacc_routine_clauses)
-	c_finish_oacc_routine (parser, NULL_TREE, oacc_routine_clauses);
+	c_finish_oacc_routine (parser, NULL_TREE,
+   oacc_routine_clauses, false);
 	  c_parser_skip_to_end_of_block_or_statement (parser);
 	  return;
 	}
@@ -2117,9 +2118,6 @@ c_parser_declaration_or_fndef (c_parser
 		  || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
 		c_finish_omp_declare_simd (parser, d, NULL_TREE,
 	   omp_declare_simd_clauses);
-
-		  if (oacc_routine_clauses)
-		c_finish_oacc_routine (parser, d, oacc_routine_clauses);
 		}
 	  else
 		{
@@ -2133,14 +2131,14 @@ c_parser_declaration_or_fndef (c_parser
 		  || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
 		c_finish_omp_declare_simd (parser, d, NULL_TREE,
 	   omp_declare_simd_clauses);
-		  if (oacc_routine_clauses)
-		c_finish_oacc_routine (parser, d, oacc_routine_clauses);
-
+		  
 		  start_init (d, asm_name, global_bindings_p ());
 		  init_loc = c_parser_peek_token (parser)->location;
 		  init = c_parser_initializer (parser);
 		  finish_init ();
 		}
+	  if (oacc_routine_clauses)
+		c_finish_oacc_routine (parser, d, oacc_routine_clauses, false);
 	  if (d != error_mark_node)
 		{
 		  maybe_warn_string_init (init_loc, TREE_TYPE (d), init);
@@ -2186,8 +2184,8 @@ c_parser_declaration_or_fndef (c_parser
 		temp_pop_parm_decls ();
 		}
 	  if (oacc_routine_clauses)
-		c_finish_oacc_routine (parser, d, oacc_routine_clauses);
-
+		c_finish_oacc_routine (parser, d, oacc_routine_clauses, false);
+	  
 	  if (d)
 		finish_decl (d, UNKNOWN_LOCATION, NULL_TREE,
 			 NULL_TREE, asm_name);
@@ -2298,10 +2296,10 @@ c_parser_declaration_or_fndef (c_parser
 	  || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
 	c_finish_omp_declare_simd (parser, current_function_decl, NULL_TREE,
    omp_declare_simd_clauses);
-
   if (oacc_routine_clauses)
 	c_finish_oacc_routine (parser, current_function_decl,
-  oacc_routine_clauses);
+			   oacc_routine_clauses, true);
+
 
   DECL_STRUCT_FUNCTION (current_function_decl)->function_start_locus
 	= c_parser_peek_token (parser)->location;
@@ -13279,6 +13277,10 @@ c_parser_oacc_parallel (location_t loc,
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_SEQ)			\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_BIND))
 
+/* Parse an OpenACC routine directive.  For named directives, we apply
+   immediately to the named function.  For unnamed ones we then parse
+   a declaration or definition, which must be for a function.  */
+
 static void
 c_parser_oacc_routine (c_parser *parser, enum pragma_context context)
 {
@@ -13325,24 +13327,36 @@ c_parser_oacc_routine (c_parser *parser,
   clauses = tree_cons (c_head, 

Re: [PATCH] Fix middle-end/67133, part 1

2015-08-17 Thread Marek Polacek
On Mon, Aug 17, 2015 at 11:31:57AM -0600, Jeff Law wrote:
> The funny thing here is we remove the statements after the trap to avoid
> this exact situation!
> 
> I think the problem with schemes that either change the order of block
> processing, or which ignore some blocks are going to run into issues. By
> walking blocks and statements in a backwards order, we address 99% of the
> problems, including uses in PHIs in a direct successor block.
> 
> What's not handled is a use in a PHI at the frontier of a subgraph that
> becomes unreachable.  We'd have to do the usual unreachable block analysis
> to catch and handle those properly.
> 
> I don't particularly like that idea
> 
> But in walking through all that, I think I've stumbled on a simpler
> solution.  Specifically do as a little as possible and let the standard
> mechanisms clean things up :-)
> 
> 1. Delete the code that removes instructions after the trap.
> 
> 2. Split the block immediately after the trap and remove the edge
>from the original block (with the trap) to the new block.
> 
> 
> THen let the standard mechanisms handle things when that pass is complete.
> 
> By setting cfg_altered, we'll  get unreachable code removal which will
> capture most of the intended effect.  DCE fires a couple more passes down in
> the pipeline to pick up the remaining tidbits.

Ok, thanks.
 
> Do you want to try and tackle this?

Sure.  I should have a patch tomorrow :-).

Marek


[PATCH] Fix ICE with bogus posix_memalign call (PR middle-end/67222)

2015-08-17 Thread Marek Polacek
Here we were crashing on an invalid call to posix_memalign.  The code in
lower_builtin_posix_memalign assumed that the call had valid arguments.
The reason the C FE doesn't reject this code is, in short, that
int  () is compatible with int  (void **, size_t, size_t) and we
use the former -- so convert_arguments doesn't complain.

So I think let's validate the arguments in lower_stmt.  I decided to
give an error if we see an invalid usage of posix_memalign, since
other code (e.g. alias machinery) assumes correct arguments as well.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-08-17  Marek Polacek  

PR middle-end/67222
* gimple-low.c: Include "builtins.h".
(lower_stmt): Validate arguments of posix_memalign.

* gcc.dg/torture/pr67222.c: New test.

diff --git gcc/gimple-low.c gcc/gimple-low.c
index d4697e2..03194f0 100644
--- gcc/gimple-low.c
+++ gcc/gimple-low.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "gimple-low.h"
 #include "tree-nested.h"
+#include "builtins.h"
 
 /* The differences between High GIMPLE and Low GIMPLE are the
following:
@@ -345,10 +346,22 @@ lower_stmt (gimple_stmt_iterator *gsi, struct lower_data 
*data)
data->cannot_fallthru = false;
return;
  }
-   else if (DECL_FUNCTION_CODE (decl) == BUILT_IN_POSIX_MEMALIGN
-&& flag_tree_bit_ccp)
+   else if (DECL_FUNCTION_CODE (decl) == BUILT_IN_POSIX_MEMALIGN)
  {
-   lower_builtin_posix_memalign (gsi);
+   if (gimple_call_num_args (stmt) != 3
+   || !validate_gimple_arglist (dyn_cast  (stmt),
+POINTER_TYPE, INTEGER_TYPE,
+INTEGER_TYPE, VOID_TYPE))
+ {
+   error_at (gimple_location (stmt), "invalid arguments "
+ "to %qD", decl);
+   gsi_next (gsi);
+   return;
+ }
+   if (flag_tree_bit_ccp)
+ lower_builtin_posix_memalign (gsi);
+   else
+ gsi_next (gsi);
return;
  }
  }
diff --git gcc/testsuite/gcc.dg/torture/pr67222.c 
gcc/testsuite/gcc.dg/torture/pr67222.c
index e69de29..cf39aa1 100644
--- gcc/testsuite/gcc.dg/torture/pr67222.c
+++ gcc/testsuite/gcc.dg/torture/pr67222.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-implicit-function-declaration" } */
+
+void
+foo (void **p)
+{
+  posix_memalign (); /* { dg-error "invalid arguments" } */
+  posix_memalign (p); /* { dg-error "invalid arguments" } */
+  posix_memalign (0); /* { dg-error "invalid arguments" } */
+  posix_memalign (p, 1); /* { dg-error "invalid arguments" } */
+  posix_memalign (p, "foo"); /* { dg-error "invalid arguments" } */
+  posix_memalign ("gnu", "gcc"); /* { dg-error "invalid arguments" } */
+  posix_memalign (1, p); /* { dg-error "invalid arguments" } */
+  posix_memalign (1, 2); /* { dg-error "invalid arguments" } */
+  posix_memalign (1, 2, 3); /* { dg-error "invalid arguments" } */
+  posix_memalign (p, p, p); /* { dg-error "invalid arguments" } */
+  posix_memalign (p, "qui", 3); /* { dg-error "invalid arguments" } */
+  posix_memalign (p, 1, 2);
+}

Marek


Re: C++ PATCH for c++/67104 (wrong handling of array and constexpr)

2015-08-17 Thread Jason Merrill

On 08/12/2015 01:32 PM, Jason Merrill wrote:

cxx_eval_array_reference was assuming that the CONSTRUCTOR for an array
has one entry per array element, in order.  But
cxx_eval_store_expression doesn't try to create such a CONSTRUCTOR, and
other places use RANGE_EXPRs, so we need to be prepared to handle these
cases.


Thinking more about this, I noticed that fold uses binary search for 
ARRAY_REF and we could do the same here.  But for that to work we need 
cxx_eval_store_expression to keep the CONSTRUCTOR sorted.  The first 
patch implements this.


The second patch fixes the error for referring to an uninitialized 
element of an array in the initializer for another element.


Tested x86_64-pc-linux-gnu, applying to trunk.


commit 7e7b7605c6bb6e186624833187b7cc14541828cb
Author: Jason Merrill 
Date:   Thu Aug 13 22:30:19 2015 +0100

	PR c++/67104
	* constexpr.c (array_index_cmp, find_array_ctor_elt): New.
	(cxx_eval_array_reference, cxx_eval_store_expression): Use them.

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 2aef631..8172ac8 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -1663,6 +1663,90 @@ cxx_eval_conditional_expression (const constexpr_ctx *ctx, tree t,
    jump_target);
 }
 
+/* Returns less than, equal to, or greater than zero if KEY is found to be
+   less than, to match, or to be greater than the constructor_elt's INDEX.  */
+
+static int
+array_index_cmp (tree key, tree index)
+{
+  gcc_assert (TREE_CODE (key) == INTEGER_CST);
+
+  switch (TREE_CODE (index))
+{
+case INTEGER_CST:
+  return tree_int_cst_compare (key, index);
+case RANGE_EXPR:
+  {
+	tree lo = TREE_OPERAND (index, 0);
+	tree hi = TREE_OPERAND (index, 1);
+	if (tree_int_cst_lt (key, lo))
+	  return -1;
+	else if (tree_int_cst_lt (hi, key))
+	  return 1;
+	else
+	  return 0;
+  }
+default:
+  gcc_unreachable ();
+}
+}
+
+/* Returns the index of the constructor_elt of ARY which matches DINDEX, or -1
+   if none.  If INSERT is true, insert a matching element rather than fail.  */
+
+static HOST_WIDE_INT
+find_array_ctor_elt (tree ary, tree dindex, bool insert = false)
+{
+  if (tree_int_cst_sgn (dindex) < 0)
+return -1;
+
+  unsigned HOST_WIDE_INT i = tree_to_uhwi (dindex);
+  vec *elts = CONSTRUCTOR_ELTS (ary);
+  unsigned HOST_WIDE_INT len = vec_safe_length (elts);
+
+  unsigned HOST_WIDE_INT end = len;
+  unsigned HOST_WIDE_INT begin = 0;
+
+  /* If the last element of the CONSTRUCTOR has its own index, we can assume
+ that the same is true of the other elements and index directly.  */
+  if (end > 0)
+{
+  tree cindex = (*elts)[end-1].index;
+  if (TREE_CODE (cindex) == INTEGER_CST
+	  && compare_tree_int (cindex, end-1) == 0)
+	{
+	  if (i < end)
+	return i;
+	  else
+	begin = end;
+	}
+}
+
+  /* Otherwise, find a matching index by means of a binary search.  */
+  while (begin != end)
+{
+  unsigned HOST_WIDE_INT middle = (begin + end) / 2;
+
+  int cmp = array_index_cmp (dindex, (*elts)[middle].index);
+  if (cmp < 0)
+	end = middle;
+  else if (cmp > 0)
+	begin = middle + 1;
+  else
+	return middle;
+}
+
+  if (insert)
+{
+  constructor_elt e = { dindex, NULL_TREE };
+  vec_safe_insert (CONSTRUCTOR_ELTS (ary), end, e);
+  return end;
+}
+
+  return -1;
+}
+
+
 /* Subroutine of cxx_eval_constant_expression.
Attempt to reduce a reference to an array slot.  */
 
@@ -1708,36 +1792,26 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree t,
 }
 
   i = tree_to_shwi (index);
-  bool found = true;
-  if (TREE_CODE (ary) == CONSTRUCTOR && len
-  && (TREE_CODE (CONSTRUCTOR_ELT (ary, len-1)->index) == RANGE_EXPR
-	  || compare_tree_int (CONSTRUCTOR_ELT (ary, len-1)->index, len-1)))
+  if (i < 0)
 {
-  /* The last element doesn't match its position in the array; this must be
-	 a sparse array from cxx_eval_store_expression.  So iterate.  */
-  found = false;
-  vec *v = CONSTRUCTOR_ELTS (ary);
-  constructor_elt *e;
-  for (unsigned ix = 0; vec_safe_iterate (v, ix, &e); ++ix)
-	{
-	  if (TREE_CODE (e->index) == RANGE_EXPR)
-	{
-	  tree lo = TREE_OPERAND (e->index, 0);
-	  tree hi = TREE_OPERAND (e->index, 1);
-	  if (tree_int_cst_le (lo, index) && tree_int_cst_le (index, hi))
-		found = true;
-	}
-	  else if (tree_int_cst_equal (e->index, index))
-	found = true;
-	  if (found)
-	{
-	  i = ix;
-	  break;
-	}
-	}
+  if (!ctx->quiet)
+	error ("negative array subscript");
+  *non_constant_p = true;
+  return t;
 }
 
-  if (i >= len || !found)
+  bool found;
+  if (TREE_CODE (ary) == CONSTRUCTOR)
+{
+  HOST_WIDE_INT ix = find_array_ctor_elt (ary, index);
+  found = (ix >= 0);
+  if (found)
+	i = ix;
+}
+  else
+found = (i < len);
+
+  if (!found)
 {
   if (tree_int_cst_lt (index, array_type_nelts_top (TREE_TYPE (ary
 	{
@@ -1766,13 +1840,6 @@ cxx

C++ PATCH for c++/67244 (ICE with nested lambda)

2015-08-17 Thread Jason Merrill
We were forgetting to register capture proxies for new captures during 
template instantiation.


Tested x86_64-pc-linux-gnu, applying to trunk and 5.
commit 6dbfe4dd24e6f001f17e8ea4cf1aa58f4b22ebc8
Author: Jason Merrill 
Date:   Mon Aug 17 13:59:52 2015 -0400

	PR c++/67244
	* pt.c (tsubst_copy_and_build): Call insert_pending_capture_proxies.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index ecd86e4..b84bda4 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -16344,6 +16344,8 @@ tsubst_copy_and_build (tree t,
 
 	LAMBDA_EXPR_THIS_CAPTURE (r) = NULL_TREE;
 
+	insert_pending_capture_proxies ();
+
 	RETURN (build_lambda_object (r));
   }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-nested5.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-nested5.C
new file mode 100644
index 000..3ebdf3b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-nested5.C
@@ -0,0 +1,29 @@
+// PR c++/67244
+// { dg-do compile { target c++11 } }
+
+class A {
+public:
+  int operator*();
+};
+template 
+void searchGen(int, int, T, Predicate p4) {
+  p4(0);
+}
+template  struct B;
+template 
+struct B {
+  static void exec() { MetaFunction::template exec; }
+};
+template  void forEachType() {
+  B::exec;
+}
+namespace {
+struct C {
+  template  void exec() {
+A __trans_tmp_1;
+const auto target = *__trans_tmp_1;
+searchGen(0, 0, 0, [=](T) { [=] { target; }; });
+  }
+};
+}
+void C_A_T_C_HT_E_S_T75() { forEachType; }


Minor C++ PATCH to check_noexcept_r

2015-08-17 Thread Jason Merrill
For a while, on the concepts branch check_noexcept_r was seeing a 
CALL_EXPR where the fn operand was not a pointer or reference to 
function, which caused trouble.  Let's add an assert for that.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 4fbc043d90cbc3820e1eeb381d709c2631b7daeb
Author: Jason Merrill 
Date:   Wed Aug 5 21:37:23 2015 -0400

	* except.c (check_noexcept_r): Assert that fn is POINTER_TYPE_P.

diff --git a/gcc/cp/except.c b/gcc/cp/except.c
index 6c36646..4f06f52 100644
--- a/gcc/cp/except.c
+++ b/gcc/cp/except.c
@@ -1155,7 +1155,9 @@ check_noexcept_r (tree *tp, int * /*walk_subtrees*/, void * /*data*/)
  We could use TREE_NOTHROW (t) for !TREE_PUBLIC fns, though... */
   tree fn = (code == AGGR_INIT_EXPR
 		 ? AGGR_INIT_EXPR_FN (t) : CALL_EXPR_FN (t));
-  tree type = TREE_TYPE (TREE_TYPE (fn));
+  tree type = TREE_TYPE (fn);
+  gcc_assert (POINTER_TYPE_P (type));
+  type = TREE_TYPE (type);
 
   STRIP_NOPS (fn);
   if (TREE_CODE (fn) == ADDR_EXPR)


[C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Paolo Carlini

Hi,

the bug is very clear: in C++11 we reject the testcase because 
null_ptr_cst_p returns 'true' for 'false', whereas [conv.ptr] is 
carefully worded in terms of integer literals not boolean literals. The 
obvious fix, using == INTEGER_TYPE instead of CP_INTEGRAL_TYPE_P, 
appears to work well.


Tested x86_64-linux.

Thanks,
Paolo.

///

/cp
2015-08-17  Paolo Carlini  

PR c++/67216
* call.c (null_ptr_cst_p): In C++11 return 'false' for 'false'.

/testsuite
2015-08-17  Paolo Carlini  

PR c++/67216
* g++.dg/cpp0x/nullptr34.C: New.
* g++.dg/warn/Wconversion2.C: Adjust.
* g++.dg/warn/Wnull-conversion-1.C: Likewise.
Index: cp/call.c
===
--- cp/call.c   (revision 226939)
+++ cp/call.c   (working copy)
@@ -524,22 +524,34 @@ struct z_candidate {
 bool
 null_ptr_cst_p (tree t)
 {
+  tree type = TREE_TYPE (t);
+
   /* [conv.ptr]
 
  A null pointer constant is an integral constant expression
  (_expr.const_) rvalue of integer type that evaluates to zero or
  an rvalue of type std::nullptr_t. */
-  if (NULLPTR_TYPE_P (TREE_TYPE (t)))
+  if (NULLPTR_TYPE_P (type))
 return true;
-  if (CP_INTEGRAL_TYPE_P (TREE_TYPE (t)))
+
+  if (cxx_dialect >= cxx11)
 {
   /* Core issue 903 says only literal 0 is a null pointer constant.  */
-  if (cxx_dialect < cxx11)
-   t = fold_non_dependent_expr (t);
+  if (TREE_CODE (type) == INTEGER_TYPE)
+   {
+ STRIP_NOPS (t);
+ if (integer_zerop (t) && !TREE_OVERFLOW (t))
+   return true;
+   }
+}
+  else if (CP_INTEGRAL_TYPE_P (type))
+{
+  t = fold_non_dependent_expr (t);
   STRIP_NOPS (t);
   if (integer_zerop (t) && !TREE_OVERFLOW (t))
return true;
 }
+
   return false;
 }
 
Index: testsuite/g++.dg/cpp0x/nullptr34.C
===
--- testsuite/g++.dg/cpp0x/nullptr34.C  (revision 0)
+++ testsuite/g++.dg/cpp0x/nullptr34.C  (working copy)
@@ -0,0 +1,17 @@
+// PR c++/67216
+// { dg-do compile { target c++11 } }
+
+struct s {
+s( long ) {}
+};
+
+struct t {
+t( void * ) {}
+};
+
+void foo(s) {}
+void foo(t) {}
+
+int main() {
+foo(false);
+}
Index: testsuite/g++.dg/warn/Wconversion2.C
===
--- testsuite/g++.dg/warn/Wconversion2.C(revision 226939)
+++ testsuite/g++.dg/warn/Wconversion2.C(working copy)
@@ -1,3 +1,4 @@
 // { dg-options "-Wconversion-null" }
 void foo(const char *); 
-void bar() { foo(false); } // { dg-warning "pointer type for argument" }
+void bar() { foo(false); } // { dg-warning "pointer type for argument" "" { 
target { ! c++11 } } }
+// { dg-error "cannot convert" "" { target c++11 } 3 }
Index: testsuite/g++.dg/warn/Wnull-conversion-1.C
===
--- testsuite/g++.dg/warn/Wnull-conversion-1.C  (revision 226939)
+++ testsuite/g++.dg/warn/Wnull-conversion-1.C  (working copy)
@@ -6,10 +6,13 @@
 void func1(int* ptr);
 
 void func2() {
-  int* t = false; // { dg-warning "converting 'false' to pointer" }
+  int* t = false; // { dg-warning "converting 'false' to pointer" 
"" { target { ! c++11 } } }
+// { dg-error "cannot convert" "" { target c++11 } 9 }
   int* p;
-  p = false;  // { dg-warning "converting 'false' to pointer" }
+  p = false;  // { dg-warning "converting 'false' to pointer" 
"" { target { ! c++11 } } }
+// { dg-error "cannot convert" "" { target c++11 } 12 }
   int* r = sizeof(char) / 2;  // { dg-error "invalid conversion from" "" { 
target c++11 } }
-  func1(false);   // { dg-warning "converting 'false' to pointer" }
+  func1(false);   // { dg-warning "converting 'false' to pointer" 
"" { target { ! c++11 } } }
+// { dg-error "cannot convert" "" { target c++11 } 15 }
   int i = NULL;   // { dg-warning "converting to non-pointer" }
 }
Index: testsuite/g++.old-deja/g++.other/null3.C
===
--- testsuite/g++.old-deja/g++.other/null3.C(revision 226939)
+++ testsuite/g++.old-deja/g++.other/null3.C(working copy)
@@ -2,5 +2,6 @@
 
 void x()
 {
- int* p = 1==0;// { dg-warning "converting 'false' to pointer" }
+ int* p = 1==0;// { dg-warning "converting 'false' to pointer" "" { 
target { ! c++11 } } }
+// { dg-error "cannot convert" "" { target c++11 } 5 } 
 }


Re: [C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Jason Merrill

On 08/17/2015 02:40 PM, Paolo Carlini wrote:

the bug is very clear: in C++11 we reject the testcase because
null_ptr_cst_p returns 'true' for 'false', whereas [conv.ptr] is
carefully worded in terms of integer literals not boolean literals. The
obvious fix, using == INTEGER_TYPE instead of CP_INTEGRAL_TYPE_P,
appears to work well.


OK. I wonder if we can also drop the STRIP_NOPs on the C++11 path.

Jason



Re: [C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Paolo Carlini

Hi,

On 08/17/2015 08:50 PM, Jason Merrill wrote:

On 08/17/2015 02:40 PM, Paolo Carlini wrote:

the bug is very clear: in C++11 we reject the testcase because
null_ptr_cst_p returns 'true' for 'false', whereas [conv.ptr] is
carefully worded in terms of integer literals not boolean literals. The
obvious fix, using == INTEGER_TYPE instead of CP_INTEGRAL_TYPE_P,
appears to work well.

OK. I wonder if we can also drop the STRIP_NOPs on the C++11 path.

You are not alone ;) No, not trivially, without we ICE on pr51313.C.

Thanks,
Paolo.


Re: [C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Jason Merrill

On 08/17/2015 02:52 PM, Paolo Carlini wrote:

On 08/17/2015 08:50 PM, Jason Merrill wrote:

OK. I wonder if we can also drop the STRIP_NOPs on the C++11 path.

You are not alone ;) No, not trivially, without we ICE on pr51313.C.


Hmm, that testcase is ill-formed.  We ought to reject it, though not ICE.

Jason



Re: [C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Paolo Carlini

Hi,

On 08/17/2015 08:59 PM, Jason Merrill wrote:

On 08/17/2015 02:52 PM, Paolo Carlini wrote:

On 08/17/2015 08:50 PM, Jason Merrill wrote:

OK. I wonder if we can also drop the STRIP_NOPs on the C++11 path.

You are not alone ;) No, not trivially, without we ICE on pr51313.C.


Hmm, that testcase is ill-formed.  We ought to reject it, though not ICE.
Yes, now I see. At the moment however, I have no idea where that 
NOP_EXPR is coming from and whether it would be safe to assume in 
null_ptr_cst_p that one can occur only due to a bug elsewhere... Note 
that integer_zerop calls STRIP_NOPS right at the beginning anyway and 
the ICE comes from TREE_OVERFLOW.


Anyway, about the substance of pr51313.C, is it Ok with you if I add an 
xfailed dg-error to it and investigate it separately?


Thanks!
Paolo.


Re: [C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Jason Merrill

On 08/17/2015 03:20 PM, Paolo Carlini wrote:

On 08/17/2015 08:59 PM, Jason Merrill wrote:

On 08/17/2015 02:52 PM, Paolo Carlini wrote:

On 08/17/2015 08:50 PM, Jason Merrill wrote:

OK. I wonder if we can also drop the STRIP_NOPs on the C++11 path.

You are not alone ;) No, not trivially, without we ICE on pr51313.C.


Hmm, that testcase is ill-formed.  We ought to reject it, though not ICE.

Yes, now I see. At the moment however, I have no idea where that
NOP_EXPR is coming from and whether it would be safe to assume in
null_ptr_cst_p that one can occur only due to a bug elsewhere...


I imagine it's coming from built-in folding of isdigit, which is not a bug.


Note
that integer_zerop calls STRIP_NOPS right at the beginning anyway and
the ICE comes from TREE_OVERFLOW.


So I guess null_ptr_cst_p should check for INTEGER_CST before calling 
integer_zerop.



Anyway, about the substance of pr51313.C, is it Ok with you if I add an
xfailed dg-error to it and investigate it separately?


Please also reopen 51313 if you do that.

Jason



[gomp4] lock/unlock internal fn

2015-08-17 Thread Nathan Sidwell
I've committed this patch to add a new pair of internal functions.  These will 
be used in implementing reductions.


They'll be emitted around reduction finalization, and implement the locking 
required for the general case of combining reduction values.  They may be 
transformed in the oacc_xform pass, and the default behaviour is to delete them, 
if there is no RTL expander.  For PTX we delete them if they are at the vector 
level.


This avoids needing machine-specific builtins to expand to, and thus should 
result in less backend code duplication.


nathan
2015-08-17  Nathan Sidwell  

	* target.def (lock_unlock): New GOACC hook.
	* targhooks.h (default_goacc_lock_unlock): Declare.
	* doc/tm.texi.in (TARGET_GOACC_LOCK_UNLOCK): Add.
	* doc/tm.texi: Rebuilt.
	* internal-fn.def (GOACC_LOCK, GOACC_UNLOCK): New.
	* internal-fn.c (expand_GOACC_LOCK, expand_GOACC_UNLOCK): New.
	* omp-low.c (execute_oacc_transform): Add lock/unlock handling.
	(default_goacc_lock_unlock): New.
	* config/nvptx/nvptx-protos.h (nvptx_expand_oacc_lock_unlock): Declare.
	* config/nvptx/nvptx.md (UNSPECV_UNLOCK): Delete.
	(oacc_lock, oacc_unlock): New expanders.
	(nvptx_spinlock, nvptx_spinunlock): Use UNSPECV_LOCK.
	* config/nvptx/nvptx.c (nvptx_expand_oacc_lock_unlock): New.
	(nvptx_expand_lock_unlock): Delete.
	(nvptx_expand_lock, nvptx_expand_unlock): Delete.
	(nvptx_expand_work_red_addr): Fixup address generation.
	(enum nvptx_types): Delete NT_VOID_UINT.
	(builtins): Delete nvptx_lock and nvptx_unlock.
	(nvptx_init_builtins): Adjust.
	(nvptx_xform_lock_unlock): New.
	(TARGET_GOACC_LOCK_UNLOCK): Override.
	
Index: gcc/config/nvptx/nvptx-protos.h
===
--- gcc/config/nvptx/nvptx-protos.h	(revision 226951)
+++ gcc/config/nvptx/nvptx-protos.h	(working copy)
@@ -34,6 +34,7 @@ extern const char *nvptx_section_for_dec
 #ifdef RTX_CODE
 extern void nvptx_expand_oacc_fork (rtx);
 extern void nvptx_expand_oacc_join (rtx);
+extern void nvptx_expand_oacc_lock_unlock (rtx, bool);
 extern void nvptx_expand_call (rtx, rtx);
 extern rtx nvptx_expand_compare (rtx);
 extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
Index: gcc/config/nvptx/nvptx.md
===
--- gcc/config/nvptx/nvptx.md	(revision 226951)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -61,7 +61,6 @@
 
 (define_c_enum "unspecv" [
UNSPECV_LOCK
-   UNSPECV_UNLOCK
UNSPECV_CAS
UNSPECV_XCHG
UNSPECV_BARSYNC
@@ -1366,6 +1365,26 @@
   return asms[INTVAL (operands[1])];
 })
 
+(define_expand "oacc_lock"
+  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")
+		(match_operand:SI 1 "const_int_operand" "")]
+		   UNSPECV_LOCK)]
+  ""
+{
+  nvptx_expand_oacc_lock_unlock (operands[0], true);
+  DONE;
+})
+
+(define_expand "oacc_unlock"
+  [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")
+		(match_operand:SI 1 "const_int_operand" "")]
+		   UNSPECV_LOCK)]
+  ""
+{
+  nvptx_expand_oacc_lock_unlock (operands[0], false);
+  DONE;
+})
+
 (define_insn "nvptx_fork"
   [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
 		   UNSPECV_FORK)]
@@ -1576,7 +1595,7 @@
[(parallel
  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
 			(match_operand:SI 1 "const_int_operand" "i")]
-		   UNSPECV_UNLOCK)
+		   UNSPECV_LOCK)
   (match_operand:SI 2 "register_operand" "=R")
   (match_operand:BI 3 "register_operand" "=R")
   (label_ref (match_operand 4 "" ""))])]
@@ -1586,7 +1605,7 @@
 (define_insn "nvptx_spinunlock"
[(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")
 		  (match_operand:SI 1 "const_int_operand" "i")]
-		  UNSPECV_UNLOCK)
+		  UNSPECV_LOCK)
 (match_operand:SI 2 "register_operand" "=R")]
""
"atom%R1.exch.b32 %2,%0,0;")
Index: gcc/config/nvptx/nvptx.c
===
--- gcc/config/nvptx/nvptx.c	(revision 226951)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -1164,6 +1164,39 @@ nvptx_expand_oacc_join (rtx mode)
   emit_insn (gen_nvptx_joining (mode));
 }
 
+/* Expander for reduction locking and unlocking.  We expect SRC to be
+   gang or worker level.  */
+
+void
+nvptx_expand_oacc_lock_unlock (rtx src, bool lock)
+{
+  unsigned HOST_WIDE_INT kind;
+  rtx pat;
+  
+  kind = INTVAL (src) == GOMP_DIM_GANG ? LOCK_GLOBAL : LOCK_SHARED;
+  lock_used[kind] = true;
+
+  rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]);
+  rtx space = GEN_INT (lock_space[kind]);
+  rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+  rtx tmp = gen_reg_rtx (SImode);
+
+  if (!lock)
+emit_insn (barrier);
+  if (lock)
+{
+  rtx_code_label *label = gen_label_rtx ();
+
+  LABEL_NUSES (label)++;
+  pat = gen_nvptx_spinlock (mem, space, tmp, gen_reg_rtx (BImode), label);
+}
+  else
+pat = gen_nvptx_spinunlock (mem, space, tmp);
+  emit_insn (pa

Re: [C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Paolo Carlini

Hi,

On 08/17/2015 09:26 PM, Jason Merrill wrote:

On 08/17/2015 03:20 PM, Paolo Carlini wrote:

On 08/17/2015 08:59 PM, Jason Merrill wrote:

On 08/17/2015 02:52 PM, Paolo Carlini wrote:

On 08/17/2015 08:50 PM, Jason Merrill wrote:

OK. I wonder if we can also drop the STRIP_NOPs on the C++11 path.

You are not alone ;) No, not trivially, without we ICE on pr51313.C.


Hmm, that testcase is ill-formed.  We ought to reject it, though not 
ICE.

Yes, now I see. At the moment however, I have no idea where that
NOP_EXPR is coming from and whether it would be safe to assume in
null_ptr_cst_p that one can occur only due to a bug elsewhere...


I imagine it's coming from built-in folding of isdigit, which is not a 
bug.



Note
that integer_zerop calls STRIP_NOPS right at the beginning anyway and
the ICE comes from TREE_OVERFLOW.


So I guess null_ptr_cst_p should check for INTEGER_CST before calling 
integer_zerop.
Yeah, and like in many other places! Thanks. Thus I'm finishing testing 
the below, which handles correctly pr51313.C too. Note, I'm 
intentionally leaving the old c++98 path completely alone.


Thanks,
Paolo.
Index: cp/call.c
===
--- cp/call.c   (revision 226952)
+++ cp/call.c   (working copy)
@@ -524,22 +524,34 @@ struct z_candidate {
 bool
 null_ptr_cst_p (tree t)
 {
+  tree type = TREE_TYPE (t);
+
   /* [conv.ptr]
 
  A null pointer constant is an integral constant expression
  (_expr.const_) rvalue of integer type that evaluates to zero or
  an rvalue of type std::nullptr_t. */
-  if (NULLPTR_TYPE_P (TREE_TYPE (t)))
+  if (NULLPTR_TYPE_P (type))
 return true;
-  if (CP_INTEGRAL_TYPE_P (TREE_TYPE (t)))
+
+  if (cxx_dialect >= cxx11)
 {
   /* Core issue 903 says only literal 0 is a null pointer constant.  */
-  if (cxx_dialect < cxx11)
-   t = fold_non_dependent_expr (t);
+  if (TREE_CODE (type) == INTEGER_TYPE)
+   {
+ if (TREE_CODE (t) == INTEGER_CST && integer_zerop (t)
+ && !TREE_OVERFLOW (t))
+   return true;
+   }
+}
+  else if (CP_INTEGRAL_TYPE_P (type))
+{
+  t = fold_non_dependent_expr (t);
   STRIP_NOPS (t);
   if (integer_zerop (t) && !TREE_OVERFLOW (t))
return true;
 }
+
   return false;
 }
 
Index: cp/pt.c
===
--- cp/pt.c (revision 226952)
+++ cp/pt.c (working copy)
@@ -22671,7 +22671,8 @@ resolve_typename_type (tree type, bool only_curren
 return type;
   /* If SCOPE isn't the template itself, it will not have a valid
  TYPE_FIELDS list.  */
-  if (same_type_p (scope, CLASSTYPE_PRIMARY_TEMPLATE_TYPE (scope)))
+  if (CLASS_TYPE_P (scope)
+  && same_type_p (scope, CLASSTYPE_PRIMARY_TEMPLATE_TYPE (scope)))
 /* scope is either the template itself or a compatible instantiation
like X, so look up the name in the original template.  */
 scope = CLASSTYPE_PRIMARY_TEMPLATE_TYPE (scope);
Index: testsuite/g++.dg/cpp0x/decltype63.C
===
--- testsuite/g++.dg/cpp0x/decltype63.C (revision 0)
+++ testsuite/g++.dg/cpp0x/decltype63.C (working copy)
@@ -0,0 +1,7 @@
+// PR c++/63693
+// { dg-do compile { target c++11 } }
+
+template
+class C{
+ T t;
+ decltype(t)::a::  // { dg-error "expected" }
Index: testsuite/g++.dg/cpp0x/nullptr34.C
===
--- testsuite/g++.dg/cpp0x/nullptr34.C  (revision 0)
+++ testsuite/g++.dg/cpp0x/nullptr34.C  (working copy)
@@ -0,0 +1,17 @@
+// PR c++/67216
+// { dg-do compile { target c++11 } }
+
+struct s {
+s( long ) {}
+};
+
+struct t {
+t( void * ) {}
+};
+
+void foo(s) {}
+void foo(t) {}
+
+int main() {
+foo(false);
+}
Index: testsuite/g++.dg/cpp0x/pr51313.C
===
--- testsuite/g++.dg/cpp0x/pr51313.C(revision 226952)
+++ testsuite/g++.dg/cpp0x/pr51313.C(working copy)
@@ -14,5 +14,5 @@ extern ostream cout;
 
 int main()
 {
-  cout << isdigit(0);
+  cout << isdigit(0);  // { dg-error "invalid conversion" }
 }
Index: testsuite/g++.dg/warn/Wconversion2.C
===
--- testsuite/g++.dg/warn/Wconversion2.C(revision 226952)
+++ testsuite/g++.dg/warn/Wconversion2.C(working copy)
@@ -1,3 +1,4 @@
 // { dg-options "-Wconversion-null" }
 void foo(const char *); 
-void bar() { foo(false); } // { dg-warning "pointer type for argument" }
+void bar() { foo(false); } // { dg-warning "pointer type for argument" "" { 
target { ! c++11 } } }
+// { dg-error "cannot convert" "" { target c++11 } 3 }
Index: testsuite/g++.dg/warn/Wnull-conversion-1.C
===
--- testsuite/g++.dg/warn/Wnull-conversion-1.C  (revision 226952)
+++ testsuite/g++.dg/warn/Wnull-conversion-1.C  (working copy)
@@ -6,10 +6,1

[middle-end,patch] Making __builtin_signbit type-generic

2015-08-17 Thread FX
Attached patch makes __builtin_signbit type-generic in the middle-end (PR 
36757).
Error message will be issued (from gcc/c-family/c-common.c) if argument is not 
real (or too few, or too many).
gcc_assert() is used in expand_builtin_signbit() it cases that should be 
unreachable (failure to expand inline).

Tested on x86_64-apple-darwin14, OK to commit?

FX


gcc/

2015-08-17  Francois-Xavier Coudert  

PR middle-end/36757
* builtins.c (expand_builtin_signbit): Add asserts to make sure
we can expand BUILT_IN_SIGNBIT inline.
* builtins.def (BUILT_IN_SIGNBIT): Make type-generic.
* doc/extend.texi: Document the type-generic __builtin_signbit.

gcc/c-family/

2015-08-17  Francois-Xavier Coudert  

PR middle-end/36757
* c-common.c (check_builtin_function_arguments): Add check
for BUILT_IN_SIGNBIT argument.

gcc/testsuite/

2015-08-17  Francois-Xavier Coudert  

PR middle-end/36757
* gcc.dg/builtins-error.c: Add checks for __builtin_signbit.
* gcc.dg/tg-tests.h: Add checks for __builtin_signbit.



signbit.diff
Description: Binary data


[PATCH] Specify the type of scop->region

2015-08-17 Thread Aditya Kumar
From: Aditya Kumar 

Changing the type of scop::region from void* to sese, as this is
the only type assigned to scop::region for now. No functional changes intended.
Passes regtest and bootstrap.

gcc/ChangeLog:

2015-08-17  Aditya Kumar  

* graphite-poly.c: Change type of region from void* to sese.
* graphite-poly.h (struct scop): Changing the type of scop::region
from void* to sese. Change accessor macro accordingly.
* graphite-sese-to-poly.c (extract_affine_chrec): Use accessor macro.



---
 gcc/graphite-poly.c | 2 +-
 gcc/graphite-poly.h | 8 
 gcc/graphite-sese-to-poly.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c
index dd4fcee..78b5d12 100644
--- a/gcc/graphite-poly.c
+++ b/gcc/graphite-poly.c
@@ -422,7 +422,7 @@ debug_pdr (poly_dr_p pdr, int verbosity)
 /* Creates a new SCOP containing REGION.  */
 
 scop_p
-new_scop (void *region)
+new_scop (sese region)
 {
   scop_p scop = XNEW (struct scop);
 
diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h
index 062d927..4ca5d1f 100644
--- a/gcc/graphite-poly.h
+++ b/gcc/graphite-poly.h
@@ -1345,7 +1345,7 @@ lst_remove_all_before_excluding_pbb (lst_p loop, 
poly_bb_p pbb, bool before)
 struct scop
 {
   /* A SCOP is defined as a SESE region.  */
-  void *region;
+  sese region;
 
   /* Number of parameters in SCoP.  */
   graphite_dim_t nb_params;
@@ -1390,14 +1390,14 @@ struct scop
 };
 
 #define SCOP_BBS(S) (S->bbs)
-#define SCOP_REGION(S) ((sese) S->region)
+#define SCOP_REGION(S) (S->region)
 #define SCOP_CONTEXT(S) (NULL)
 #define SCOP_ORIGINAL_SCHEDULE(S) (S->original_schedule)
 #define SCOP_TRANSFORMED_SCHEDULE(S) (S->transformed_schedule)
 #define SCOP_SAVED_SCHEDULE(S) (S->saved_schedule)
 #define POLY_SCOP_P(S) (S->poly_scop_p)
 
-extern scop_p new_scop (void *);
+extern scop_p new_scop (sese);
 extern void free_scop (scop_p);
 extern void free_scops (vec );
 extern void print_generated_program (FILE *, scop_p);
@@ -1414,7 +1414,7 @@ extern bool graphite_legal_transform (scop_p);
 /* Set the region of SCOP to REGION.  */
 
 static inline void
-scop_set_region (scop_p scop, void *region)
+scop_set_region (scop_p scop, sese region)
 {
   scop->region = region;
 }
diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index fdcc790..0c97eba 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -604,7 +604,7 @@ extract_affine_chrec (scop_p s, tree e, __isl_take 
isl_space *space)
   isl_pw_aff *lhs = extract_affine (s, CHREC_LEFT (e), isl_space_copy (space));
   isl_pw_aff *rhs = extract_affine (s, CHREC_RIGHT (e), isl_space_copy 
(space));
   isl_local_space *ls = isl_local_space_from_space (space);
-  unsigned pos = sese_loop_depth ((sese) s->region, get_chrec_loop (e)) - 1;
+  unsigned pos = sese_loop_depth (SCOP_REGION (s), get_chrec_loop (e)) - 1;
   isl_aff *loop = isl_aff_set_coefficient_si
 (isl_aff_zero_on_domain (ls), isl_dim_in, pos, 1);
   isl_pw_aff *l = isl_pw_aff_from_aff (loop);
-- 
2.1.0.243.g30d45f7



Re: [PATCH] Specify the type of scop->region

2015-08-17 Thread Tobias Grosser

On 08/17/2015 10:30 PM, Aditya Kumar wrote:

From: Aditya Kumar 

Changing the type of scop::region from void* to sese, as this is
the only type assigned to scop::region for now. No functional changes intended.
Passes regtest and bootstrap.



LGTM.

Tobias


[gomp4] openacc routine tests

2015-08-17 Thread Nathan Sidwell
I've committed this new testcase for openacc routines.  It showed up some 
discreprancies in the diagnostics from the C  and C++ FE's, hence the changes 
there to unifiy the wording.  I also fixed up a couple of the libgomp tests to 
avoid an uninteresting warning (which tickles a defect in the offloading 
machinery not propagating the diagnostic formatting options).


nathan
2015-08-17  Nathan Sidwell  

	c/
	* c-parser.c (c_finish_oacc_routine): Add named parameter.  Adjust
	all callers.
	(c_parser_declaration_or_fndef): Add missing c_finish_oacc_routine
	call.
	(c_parser_oacc_routine): Adjust error messages to match C++ FE.

	cp/
	* parser.c (cp_parser_finish_oacc_routine): Adjust to match
	errors from C FE.

	testsuite/
	* c-c++-common/goacc/routine-5.c: New.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Avoid
	uninteresting warning.
	* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Likewise.

Index: gcc/c/c-parser.c
===
--- gcc/c/c-parser.c	(revision 226952)
+++ gcc/c/c-parser.c	(working copy)
@@ -1762,7 +1762,7 @@ finish_oacc_declare (tree fnbody, tree d
 
 
 static void c_finish_omp_declare_simd (c_parser *, tree, tree, vec);
-static void c_finish_oacc_routine (c_parser *, tree, tree, bool);
+static void c_finish_oacc_routine (c_parser *, tree, tree, bool, bool);
 
 /* Parse a declaration or function definition (C90 6.5, 6.7.1, C99
6.7, 6.9.1).  If FNDEF_OK is true, a function definition is
@@ -1903,6 +1903,9 @@ c_parser_declaration_or_fndef (c_parser
 	  pedwarn (here, 0, "empty declaration");
 	}
   c_parser_consume_token (parser);
+  if (oacc_routine_clauses)
+	c_finish_oacc_routine (parser, NULL_TREE,
+			   oacc_routine_clauses, false, false);
   return;
 }
 
@@ -2021,7 +2024,7 @@ c_parser_declaration_or_fndef (c_parser
    omp_declare_simd_clauses);
 	  if (oacc_routine_clauses)
 	c_finish_oacc_routine (parser, NULL_TREE,
-   oacc_routine_clauses, false);
+   oacc_routine_clauses, false, false);
 	  c_parser_skip_to_end_of_block_or_statement (parser);
 	  return;
 	}
@@ -2138,7 +2141,8 @@ c_parser_declaration_or_fndef (c_parser
 		  finish_init ();
 		}
 	  if (oacc_routine_clauses)
-		c_finish_oacc_routine (parser, d, oacc_routine_clauses, false);
+		c_finish_oacc_routine (parser, d, oacc_routine_clauses,
+   false, false);
 	  if (d != error_mark_node)
 		{
 		  maybe_warn_string_init (init_loc, TREE_TYPE (d), init);
@@ -2184,7 +2188,8 @@ c_parser_declaration_or_fndef (c_parser
 		temp_pop_parm_decls ();
 		}
 	  if (oacc_routine_clauses)
-		c_finish_oacc_routine (parser, d, oacc_routine_clauses, false);
+		c_finish_oacc_routine (parser, d, oacc_routine_clauses,
+   false, false);
 	  
 	  if (d)
 		finish_decl (d, UNKNOWN_LOCATION, NULL_TREE,
@@ -2298,7 +2303,7 @@ c_parser_declaration_or_fndef (c_parser
    omp_declare_simd_clauses);
   if (oacc_routine_clauses)
 	c_finish_oacc_routine (parser, current_function_decl,
-			   oacc_routine_clauses, true);
+			   oacc_routine_clauses, false, true);
 
 
   DECL_STRUCT_FUNCTION (current_function_decl)->function_start_locus
@@ -13306,7 +13311,8 @@ c_parser_oacc_routine (c_parser *parser,
 	  decl = lookup_name (token->value);
 	  if (!decl)
 	{
-	  error_at (token->location, "%qE undeclared", token->value);
+	  error_at (token->location, "%qE has not been declared",
+			token->value);
 	  decl = error_mark_node;
 	}
 	  c_parser_consume_token (parser);
@@ -13327,7 +1,7 @@ c_parser_oacc_routine (c_parser *parser,
   clauses = tree_cons (c_head, clauses, NULL_TREE);
   
   if (decl)
-c_finish_oacc_routine (parser, decl, clauses, false);
+c_finish_oacc_routine (parser, decl, clauses, true, false);
   else
 c_parser_declaration_or_fndef (parser, true, false, false, false,
    true, NULL, vNULL, clauses);
@@ -13339,14 +13345,16 @@ c_parser_oacc_routine (c_parser *parser,
 
 static void
 c_finish_oacc_routine (c_parser *ARG_UNUSED (parser),
-		   tree fndecl, tree clauses, bool is_defn)
+		   tree fndecl, tree clauses, bool named, bool is_defn)
 {
   location_t loc = OMP_CLAUSE_LOCATION (TREE_PURPOSE (clauses));
 
   if (!fndecl || TREE_CODE (fndecl) != FUNCTION_DECL)
 {
   if (fndecl != error_mark_node)
-	error_at (loc, "%<#pragma acc routine%> does not refer to a function");
+	error_at (loc, "%<#pragma acc routine%> %s",
+		  named ? "does not refer to a function"
+		  : "not followed by function");
   return;
 }
 
Index: gcc/cp/parser.c
===
--- gcc/cp/parser.c	(revision 226952)
+++ gcc/cp/parser.c	(working copy)
@@ -34318,24 +34318,19 @@ cp_parser_finish_oacc_routine (cp_parser
 {
   location_t loc  = OMP_CLAUSE_LOCATION (TREE_PURPOSE (clauses));
 
-  if (!fndecl)
-{
-  error ("%<#pragma oacc routine%> not immediately followed by "
-

Re: [C++ Patch] PR 67216 ("false is still a null pointer constant")

2015-08-17 Thread Jason Merrill

+  if (TREE_CODE (type) == INTEGER_TYPE)
+   {
+ if (TREE_CODE (t) == INTEGER_CST && integer_zerop (t)
+ && !TREE_OVERFLOW (t))
+   return true;
+   }


Let's fold those conditions together.  :)

OK.

Jason


[PATCH 0/3] xtensa: libgcc: fixes for stack unwinding

2015-08-17 Thread Max Filippov
Hi Sterling,

these three patches fix bits of xtensa libgcc related to stack unwinding.
They fix a number of uClibc-ng NPTL thread cancellation and cleanup tests,
particularly uClibc-ng 1.0.5 tests
  nptl/tst-cancelx4,
  nptl/tst-cancelx16,
  nptl/tst-cancelx18,
  nptl/tst-cancelx20,
  nptl/tst-cancelx21,
  nptl/tst-cleanupx1,
  nptl/tst-cleanupx3,
  nptl/tst-oncex3,
  nptl/tst-oncex4.

Max Filippov (3):
  xtensa: reimplement register spilling
  xtensa: use unwind-dw2-fde-dip instead of unwind-dw2-fde
  xtensa: fix _Unwind_GetCFA

 libgcc/config/xtensa/lib2funcs.S | 30 +++---
 libgcc/config/xtensa/t-windowed  |  2 +-
 libgcc/config/xtensa/unwind-dw2-xtensa.c |  2 +-
 3 files changed, 25 insertions(+), 9 deletions(-)

-- 
1.8.1.4



[PATCH 1/3] xtensa: reimplement register spilling

2015-08-17 Thread Max Filippov
Spilling windowed registers in userspace is much easier, more portable,
less error-prone and equally effective as in kernel. Now that register
spilling syscall is considered obsolete in the xtensa linux kernel
replace it with CALL12 followed by series of ENTRY in libgcc.

2015-08-18  Max Filippov  
libgcc/
* config/xtensa/lib2funcs.S (__xtensa_libgcc_window_spill): Use
CALL12 followed by series of ENTRY to spill windowed registers.
(__xtensa_nonlocal_goto): Call __xtensa_libgcc_window_spill
instead of making linux spill syscall.
---
 libgcc/config/xtensa/lib2funcs.S | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/libgcc/config/xtensa/lib2funcs.S b/libgcc/config/xtensa/lib2funcs.S
index 4d451c8..ef0703f 100644
--- a/libgcc/config/xtensa/lib2funcs.S
+++ b/libgcc/config/xtensa/lib2funcs.S
@@ -34,10 +34,29 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
.global __xtensa_libgcc_window_spill
.type   __xtensa_libgcc_window_spill,@function
 __xtensa_libgcc_window_spill:
-   entry   sp, 32
-   movia2, 0
-   syscall
+   entry   sp, 48
+#if XCHAL_NUM_AREGS > 16
+   call12  1f
+   retw
+   .align  4
+1:
+   .rept   (XCHAL_NUM_AREGS - 24) / 12
+   _entry  sp, 48
+   mov a12, a0
+   .endr
+   _entry  sp, 16
+#if XCHAL_NUM_AREGS % 12 == 0
+   mov a4, a4
+#elif XCHAL_NUM_AREGS % 12 == 4
+   mov a8, a8
+#elif XCHAL_NUM_AREGS % 12 == 8
+   mov a12, a12
+#endif
retw
+#else
+   mov a8, a8
+   retw
+#endif
.size   __xtensa_libgcc_window_spill, .-__xtensa_libgcc_window_spill
 #endif
 
@@ -61,10 +80,7 @@ __xtensa_nonlocal_goto:
entry   sp, 32
 
/* Flush registers.  */
-   mov a5, a2
-   movia2, 0
-   syscall
-   mov a2, a5
+   call8   __xtensa_libgcc_window_spill
 
/* Because the save area for a0-a3 is stored one frame below
   the one identified by a2, the only way to restore those
-- 
1.8.1.4



[PATCH 3/3] xtensa: fix _Unwind_GetCFA

2015-08-17 Thread Max Filippov
Returning context->cfa in _Unwind_GetCFA makes CFA point one stack frame
higher than what was actually used by code at context->ra. This results
in invalid CFA value in signal frames and premature unwinding completion
in forced unwinding used by uClibc NPTL thread cancellation.
Returning context->sp from _Unwind_GetCFA makes all CFA values valid and
matching code that used them.

2015-08-18  Max Filippov  
libgcc/
* config/xtensa/unwind-dw2-xtensa.c (_Unwind_GetCFA): Return
context->sp instead of context->cfa.
---
 libgcc/config/xtensa/unwind-dw2-xtensa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgcc/config/xtensa/unwind-dw2-xtensa.c 
b/libgcc/config/xtensa/unwind-dw2-xtensa.c
index 82b0e63..8e579c7 100644
--- a/libgcc/config/xtensa/unwind-dw2-xtensa.c
+++ b/libgcc/config/xtensa/unwind-dw2-xtensa.c
@@ -130,7 +130,7 @@ _Unwind_GetGR (struct _Unwind_Context *context, int index)
 _Unwind_Word
 _Unwind_GetCFA (struct _Unwind_Context *context)
 {
-  return (_Unwind_Ptr) context->cfa;
+  return (_Unwind_Ptr) context->sp;
 }
 
 /* Overwrite the saved value for register INDEX in CONTEXT with VAL.  */
-- 
1.8.1.4



[PATCH 2/3] xtensa: use unwind-dw2-fde-dip instead of unwind-dw2-fde

2015-08-17 Thread Max Filippov
This allows having exception cleanup code in binaries that don't
register their unwind tables.

2015-08-18  Max Filippov  
libgcc/
* config/xtensa/t-windowed (LIB2ADDEH): Replace unwind-dw2-fde
with unwind-dw2-fde-dip.
---
 libgcc/config/xtensa/t-windowed | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgcc/config/xtensa/t-windowed b/libgcc/config/xtensa/t-windowed
index 7d9e9db..a99156c 100644
--- a/libgcc/config/xtensa/t-windowed
+++ b/libgcc/config/xtensa/t-windowed
@@ -1,2 +1,2 @@
 LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
-   $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
+   $(srcdir)/unwind-dw2-fde-dip.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
-- 
1.8.1.4



[MIPS,PATCH,committed] Fix mips.exp logic when disabling features removed in R6

2015-08-17 Thread Matthew Fortune
When switching up from an older ISA to MIPSR6 some features need to be
disabled. The support for this added in rev r225813 missed the fact
that both the $isa and $isa_rev variables are referenced in this code
so both must get redefined to their (potentially) new values. Without
this patch a number of tests will be reported as unsupported as it is
a TCL error not a test error.

Committed.

Thanks,
Matthew

gcc/testsuite/
* gcc.target/mips/mips.exp (mips-dg-options): Redefine isa
which is used in later code.
---
 gcc/testsuite/gcc.target/mips/mips.exp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/mips/mips.exp 
b/gcc/testsuite/gcc.target/mips/mips.exp
index 1ac22a5..42e7fff 100644
--- a/gcc/testsuite/gcc.target/mips/mips.exp
+++ b/gcc/testsuite/gcc.target/mips/mips.exp
@@ -1196,6 +1196,7 @@ proc mips-dg-options { args } {
 
 # Re-calculate the isa_rev for use in the abi handling code below
 set arch [mips_option options arch]
+set isa [mips_arch_info $arch isa]
 set isa_rev [mips_arch_info $arch isa_rev]
 
 # Set an appropriate ABI, handling dependencies between the pre-abi
-- 
2.2.1



[gomp4] oacc function cleanup

2015-08-17 Thread Nathan Sidwell
As I suspected, once the C & C++ parsers are treating the OpenACC routine 
directive properly, we don't need to mention it in the common attribute table.


committed to gomp4 branch

nathan
2015-08-17  Nathan Sidwell  

	* c-common (c_common_attribute_table) Remove oacc function.

Index: gcc/c-family/c-common.c
===
--- gcc/c-family/c-common.c	(revision 226911)
+++ gcc/c-family/c-common.c	(working copy)
@@ -823,7 +823,6 @@ const struct attribute_spec c_common_att
   { "bnd_instrument", 0, 0, true, false, false,
 			  handle_bnd_instrument, false },
   { "oacc declare",   0, -1, true,  false, false, NULL, false },
-  { "oacc function",  0, -1, true,  false, false, NULL, false },
   { NULL, 0, 0, false, false, false, NULL, false }
 };
 


Re: [PATCH 2/3] xtensa: use unwind-dw2-fde-dip instead of unwind-dw2-fde

2015-08-17 Thread augustine.sterl...@gmail.com
On Mon, Aug 17, 2015 at 2:59 PM, Max Filippov  wrote:
> This allows having exception cleanup code in binaries that don't
> register their unwind tables.
>
> 2015-08-18  Max Filippov  
> libgcc/
> * config/xtensa/t-windowed (LIB2ADDEH): Replace unwind-dw2-fde
> with unwind-dw2-fde-dip.

Approved.


Re: [PATCH 1/3] xtensa: reimplement register spilling

2015-08-17 Thread augustine.sterl...@gmail.com
On Mon, Aug 17, 2015 at 2:59 PM, Max Filippov  wrote:
> 2015-08-18  Max Filippov  
> libgcc/
> * config/xtensa/lib2funcs.S (__xtensa_libgcc_window_spill): Use
> CALL12 followed by series of ENTRY to spill windowed registers.
> (__xtensa_nonlocal_goto): Call __xtensa_libgcc_window_spill
> instead of making linux spill syscall.

Approved.


Re: [PATCH 3/3] xtensa: fix _Unwind_GetCFA

2015-08-17 Thread augustine.sterl...@gmail.com
On Mon, Aug 17, 2015 at 2:59 PM, Max Filippov  wrote:
> 2015-08-18  Max Filippov  
> libgcc/
> * config/xtensa/unwind-dw2-xtensa.c (_Unwind_GetCFA): Return
> context->sp instead of context->cfa.

Approved.


Re: [PATCH 1/3] xtensa: reimplement register spilling

2015-08-17 Thread Max Filippov
On Tue, Aug 18, 2015 at 3:50 AM, augustine.sterl...@gmail.com
 wrote:
> On Mon, Aug 17, 2015 at 2:59 PM, Max Filippov  wrote:
>> 2015-08-18  Max Filippov  
>> libgcc/
>> * config/xtensa/lib2funcs.S (__xtensa_libgcc_window_spill): Use
>> CALL12 followed by series of ENTRY to spill windowed registers.
>> (__xtensa_nonlocal_goto): Call __xtensa_libgcc_window_spill
>> instead of making linux spill syscall.
>
> Approved.

Applied whole series to trunk. Thank you!

-- Max


Re: [PR64164] drop copyrename, integrate into expand

2015-08-17 Thread Alexandre Oliva
On Aug 17, 2015, Christophe Lyon  wrote:

> On 17 August 2015 at 13:58, Alexandre Oliva  wrote:
>> On Aug 17, 2015, Christophe Lyon  wrote:
>> 
>>> Since this was committed (r226901), I can see that the compiler build
>>> fails for armeb targets, when building libgcc:
>> 
>> Any chance you could get me a preprocessed testcase for this failure, please?
>> 
> Yes, here it is, attached.

Thanks.

This patch fixes this particular case.  I'll also add this configuration
to the cross build tests I'm going to rerun shortly, before submitting a
followup formally, to see whether other non-MEM mems need to be handled
explicitly.


--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3017,6 +3017,11 @@ assign_parm_setup_block (struct assign_parm_data_all 
*all,
   else if (size == 0)
;
 
+  /* MEM may be a REG if coalescing assigns the param's partition
+to a pseudo.  */
+  else if (REG_P (mem))
+   emit_move_insn (mem, entry_parm);
+
   /* If SIZE is that of a mode no bigger than a word, just use
 that mode's store operation.  */
   else if (size <= UNITS_PER_WORD)


-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: top-level configure.ac: factor the libgomp check for posix-like OS

2015-08-17 Thread Thomas Schwinge
Hi!

On Mon, 17 Aug 2015 06:46:32 -0700, Gary Funck  wrote:
> I'm working on a patch set for GUPC, and as part of that work,
> I may have a couple changes to trunk that will improve the fit
> with the GUPC changes.  Here's one in configure.ac.
> 
> At the moment, there is a check to see if $enable_libgom
> is not set, followed by a case statement which adds
> libgomp to $noconfigdirs on non POSIX-like OS's.

Hmm...

> We'd like to re-use that logic for libgupc,
> which has a similar requirement and propose this
> re-factoring for trunk.

> --- configure.ac(revision 226928)
> +++ configure.ac(working copy)
> @@ -529,9 +529,8 @@ if test x$enable_static_libjava != xyes
>  fi
>  AC_SUBST(EXTRA_CONFIGARGS_LIBJAVA)
>  
> -# Enable libgomp by default on hosted POSIX systems, and a few others.
> -if test x$enable_libgomp = x ; then
> -case "${target}" in
> +posix_like_os="yes"
> +case "${target}" in
>  *-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu | *-*-kopensolaris*-gnu)
> ;;
>  *-*-netbsd* | *-*-freebsd* | *-*-openbsd* | *-*-dragonfly*)
> @@ -543,9 +542,14 @@ if test x$enable_libgomp = x ; then
>  nvptx*-*-*)
> ;;
>  *)
> -   noconfigdirs="$noconfigdirs target-libgomp"
> -   ;;
> -esac
> +posix_like_os="no"
> +;;
> +esac

Even if applicable regarding the libgomp configuration (because
nvptx-none has its own libgomp "port": libgomp/config/nvptx/), it seems a
bit strange to qualify nvptx-none as a POSIX-like system.

> +
> +# Enable libgomp by default on POSIX hosted systems.
> +if test x$enable_libgomp = x && test $posix_like_os = "no" ; then
> +# Disable libgomp on non POSIX hosted systems.
> +noconfigdirs="$noconfigdirs target-libgomp"
>  fi

So, we'll have to see whether that applies to libgupc for nvptx-none,
too.


Grüße,
 Thomas


signature.asc
Description: PGP signature