[PATCH] Remove XFAIL from gcc.dg/tree-ssa/20030814-6.c

2016-04-05 Thread Richard Biener

This removes the XFAIL by fixing the testcase - it cannot be ever
optimized in the desired way because the write to type.alias_set
_does_ kill the previous load.  But that's because the testcase
bogously simplified the tree inheritance.

Fixed.  Also disabled FRE so this really tests DOM.  (all the
20030814-x testcases have the same issue, but this one is exposed
by an artificial XPASS I was seeing with a local patch).

Installed.

Richard.

2016-04-05  Richard Biener  

* gcc.dg/tree-ssa/20030814-6.c: Fix testcase, disable FRE,
remove XFAIL.

Index: gcc/testsuite/gcc.dg/tree-ssa/20030814-6.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/20030814-6.c  (revision 234705)
+++ gcc/testsuite/gcc.dg/tree-ssa/20030814-6.c  (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-dom2" } */
+/* { dg-options "-O1 -fno-tree-fre -fdump-tree-dom2" } */
 
 extern void abort (void);
 union tree_node;
@@ -15,6 +15,7 @@ struct tree_common
 };
 struct tree_type
 {
+  struct tree_common common;
   double alias_set;
 };
 union tree_node
@@ -39,6 +40,4 @@ foo (t, set)
 }
 /* There should be precisely one load of common.code.  If there is
more than one, then the dominator optimizations failed.  */
-/* ??? Will fail until we properly distinguish member stores.  At
-   present the write to type.alias_set kills the previous load.  */
-/* { dg-final { scan-tree-dump-times "common.code" 1 "dom2" { xfail *-*-* } } 
} */
+/* { dg-final { scan-tree-dump-times "common.code" 1 "dom2" } } */


Re: [PATCH PR69489/01]Improve tree ifcvt by storing/tracking DR against its innermost loop bahavior if possible

2016-04-05 Thread Richard Biener
On Mon, Apr 4, 2016 at 4:14 PM, Bin.Cheng  wrote:
> On Mon, Apr 4, 2016 at 2:07 PM, Richard Biener
>  wrote:
>> On Thu, Mar 31, 2016 at 6:43 PM, Bin.Cheng  wrote:
>>> On Tue, Mar 29, 2016 at 9:37 AM, Richard Biener
>>>  wrote:
 On Mon, Mar 28, 2016 at 9:57 PM, Bin.Cheng  wrote:
> Sorry, Should have replied to gcc-patches list.
>
> Thanks,
> bin
>
> -- Forwarded message --
> From: "Bin.Cheng" 
> Date: Tue, 29 Mar 2016 03:55:04 +0800
> Subject: Re: [PATCH PR69489/01]Improve tree ifcvt by storing/tracking
> DR against its innermost loop bahavior if possible
> To: Richard Biener 
>
> On 3/17/16, Richard Biener  wrote:
>> On Wed, Mar 16, 2016 at 5:17 PM, Bin.Cheng  wrote:
>>> On Wed, Mar 16, 2016 at 12:20 PM, Richard Biener
>>>  wrote:
>>
>> It is an alternative to adding a hook to get_references_in_stmt and
>> probably "easier".
>>

 Index: tree-if-conv.c
 ===
 --- tree-if-conv.c  (revision 234215)
 +++ tree-if-conv.c  (working copy)
 @@ -1235,6 +1220,38 @@ if_convertible_loop_p_1 (struct loop *lo

for (i = 0; refs->iterate (i, &dr); i++)
  {
 +  tree *refp = &DR_REF (dr);
 +  while ((TREE_CODE (*refp) == COMPONENT_REF
 + && TREE_OPERAND (*refp, 2) == NULL_TREE)
 +|| TREE_CODE (*refp) == IMAGPART_EXPR
 +|| TREE_CODE (*refp) == REALPART_EXPR)
 +   refp = &TREE_OPERAND (*refp, 0);
 +  if (refp != &DR_REF (dr))
 +   {
 + tree saved_base = *refp;
 + *refp = integer_zero_node;
 +
 + if (DR_INIT (dr))
 +   {
 + tree poffset;
 + int punsignedp, preversep, pvolatilep;
 + machine_mode pmode;
 + HOST_WIDE_INT pbitsize, pbitpos;
 + get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos,
 &poffset,
 +  &pmode, &punsignedp, &preversep,
 &pvolatilep,
 +  false);
 + gcc_assert (poffset == NULL_TREE);
 +
 + DR_INIT (dr)
 +   = wide_int_to_tree (ssizetype,
 +   wi::sub (DR_INIT (dr),
 +pbitpos / BITS_PER_UNIT));
 +   }
 +
 + *refp = saved_base;
 + DR_REF (dr) = *refp;
 +   }
>>> Looks to me the code is trying to resolve difference between two (or
>>> more) component references, which is DR_INIT in the code.  But DR_INIT
>>> is not the only thing needs to be handled.  For a structure containing
>>> two sub-arrays, DR_OFFSET may be different too.
>>
>> Yes, but we can't say that if
>>
>>   a->a[i]
>>
>> doesn't trap that then
>>
>>   a->b[i]
>>
>> doesn't trap either.  We can only "strip" outermost
>> non-variable-offset components.
>>
>> But maybe I'm missing what example you are thinking of.
> Hmm, this was the case I meant.  What I don't understand is current
> code logic does infer trap information for a.b[i] from a.a[i].  Given
> below example:
> struct str
> {
>   int a[10];
>   int b[20];
>   char c;
> };
>
> void bar (struct str *);
> int foo (int x, int n)
> {
>   int i;
>   struct str s;
>   bar (&s);
>   for (i = 0; i < n; i++)
> {
>   s.a[i] = s.b[i];
>   if (x > i)
> s.b[i] = 0;
> }
>   bar (&s);
>   return 0;
> }
> The loop is convertible because of below code in function
> ifcvt_memrefs_wont_trap:
>
>   /* If a is unconditionally accessed then ... */
>   if (DR_RW_UNCONDITIONALLY (*master_dr))
> {
>   /* an unconditional read won't trap.  */
>   if (DR_IS_READ (a))
> return true;
>
>   /* an unconditionaly write won't trap if the base is written
>  to unconditionally.  */
>   if (base_master_dr
>   && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
> return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
>   else
> {
>   /* or the base is know to be not readonly.  */
>   tree base_tree = get_base_address (DR_REF (a));
>   if (DECL_P (base_tree)
>   && decl_binds_to_current_def_p (base_tree)
>   && ! TREE_READONLY (base_tree))
> return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
> }
> }
> It is the main object '&s' that is recorded in base_master_d

Re: [PATCH] Fix up AVX512 andnot (PR target/70525)

2016-04-05 Thread Uros Bizjak
On Mon, Apr 4, 2016 at 7:44 PM, Jakub Jelinek  wrote:
> Hi!
>
> This patch fixes various *andnot3* issues.  There are two issues on
> the ISA side that makes stuff harder for andnot: there are no VPANDNB and
> VPANDNW instructions, and while there used to be just VPANDN instruction
> in AVX/AVX2, there is only VPANDND and VPANDNQ in EVEX.
> The patch changes:
> 1) simplifies asserts, TARGET_AVX512VL implies both TARGET_AVX2 and
>TARGET_SSE2, so asserts like TARGET_AVX2 || TARGET_AVX512VL make no
>sense
> 2) for V32HImode/V64QImode it emits vpandnq instruction, rather than
>vpandn that fails to assemble
> 3) the *andnot3 pattern clearly wasn't expecting subst, but
>as it used (copy-paste?)  in the template, it actually
>was substed, which is wrong - we can't implement V64QImode or V32HImode
>masking of andnot (well, not in a single instruction); checked
>this was the only case of  used in define_insn
>without ; for V*[SD]Imode *andnot3_mask pattern
>should DTRT
> 4) the *andnot3_mask pattern makes no sense, for similar reasons
>- VPANDNB and VPANDNW are not in the ISA, not even with AVX512-BW
> 5) formatting fixes
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2016-04-04  Jakub Jelinek  
>
> PR target/70525
> * config/i386/sse.md (*andnot3): Simplify assertions.
> Use vpandn for V16SI/V8DImode, vpandnq for
> V32HI/V64QImode, don't use , fix up formatting.
> (*andnot3_mask): Remove insn with VI12_AVX512VL iterator.
>
> * gcc.target/i386/pr70525.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2016-04-01 17:21:31.0 +0200
> +++ gcc/config/i386/sse.md  2016-04-04 14:42:06.296867515 +0200
> @@ -11377,45 +11377,46 @@ (define_insn "*andnot3"
>  case MODE_XI:
>gcc_assert (TARGET_AVX512F);
>  case MODE_OI:
> -  gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
> +  gcc_assert (TARGET_AVX2);
>  case MODE_TI:
> -  gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
> +  gcc_assert (TARGET_SSE2);
>switch (mode)
> -  {
> -case V16SImode:
> -case V8DImode:
> -  if (TARGET_AVX512F)
> -  {
> -tmp = "pandn";
> -break;
> -  }
> -case V8SImode:
> -case V4DImode:
> -case V4SImode:
> -case V2DImode:
> -  if (TARGET_AVX512VL)
> -  {
> -tmp = "pandn";
> -break;
> -  }
> -default:
> -  tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
> -  }
> +   {
> +   case V64QImode:
> +   case V32HImode:
> + /* There is no vpandnb or vpandnw instruction, nor vpandn for
> +512-bit vectors. Use vpandnq instead.  */
> + tmp = "pandnq";
> + break;
> +   case V16SImode:
> +   case V8DImode:
> + tmp = "pandn";
> + break;
> +   case V8SImode:
> +   case V4DImode:
> +   case V4SImode:
> +   case V2DImode:
> + tmp = TARGET_AVX512VL ? "pandn" : "pandn";
> + break;
> +   default:
> + tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
> + break;
> +   }
>break;
>
> -   case MODE_V16SF:
> +case MODE_V16SF:
>gcc_assert (TARGET_AVX512F);
> -   case MODE_V8SF:
> +case MODE_V8SF:
>gcc_assert (TARGET_AVX);
> -   case MODE_V4SF:
> +case MODE_V4SF:
>gcc_assert (TARGET_SSE);
>
>tmp = "andnps";
>break;
>
> -   default:
> +default:
>gcc_unreachable ();
> -   }
> +}
>
>switch (which_alternative)
>  {
> @@ -11423,7 +11424,7 @@ (define_insn "*andnot3"
>ops = "%s\t{%%2, %%0|%%0, %%2}";
>break;
>  case 1:
> -  ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, 
> %%2}";
> +  ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
>break;
>  default:
>gcc_unreachable ();
> @@ -11471,21 +11472,6 @@ (define_insn "*andnot3_mask"
>"vpandn\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
>[(set_attr "type" "sselog")
> (set_attr "prefix" "evex")
> -   (set_attr "mode" "")])
> -
> -(define_insn "*andnot3_mask"
> -  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
> -   (vec_merge:VI12_AVX512VL
> - (and:VI12_AVX512VL
> -   (not:VI12_AVX512VL
> - (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
> -   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
> - (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
> - (match_operand: 4 "register_operand" "Yk")))]
> -  "TARGET_AVX512BW"
> -  "vpandn\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
> -  [(set_attr "type" "sselog")
> -   (set_attr "prefix" "evex")
> (set_attr "mode" "")])
>
>  (define_expand "3"
> --- gcc/testsuite/gcc.target/i386/pr70525.c.jj  2016-04-04 15:13:23.417615588 
> +0200
> +++ gcc/testsuite/gcc.target/i386/pr70525.c 2016-04-04 15:13:04.0 
> +0

[PATCH] Fix PR70499

2016-04-05 Thread Richard Biener

The following patch fixes a SSA rewrite issue in 
gimple_regimplify_operands (some bigger rewrite is IMHO necessary
here or in the caller in this case, the inliner, but that's not
appropriate now or for branches).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2016-04-05  Richard Biener  

PR middle-end/70499
* gimplify-me.c (gimple_regimplify_operands): Do not rewrite
non-register type temporaries into SSA.

* g++.dg/torture/pr70499.C: New testcase.

Index: gcc/gimplify-me.c
===
*** gcc/gimplify-me.c   (revision 234705)
--- gcc/gimplify-me.c   (working copy)
*** gimple_regimplify_operands (gimple *stmt
*** 299,305 
  if (need_temp)
{
  tree temp = create_tmp_reg (TREE_TYPE (lhs));
! if (gimple_in_ssa_p (cfun))
temp = make_ssa_name (temp);
  gimple_set_lhs (stmt, temp);
  post_stmt = gimple_build_assign (lhs, temp);
--- 299,306 
  if (need_temp)
{
  tree temp = create_tmp_reg (TREE_TYPE (lhs));
! if (gimple_in_ssa_p (cfun)
! && is_gimple_reg_type (TREE_TYPE (lhs)))
temp = make_ssa_name (temp);
  gimple_set_lhs (stmt, temp);
  post_stmt = gimple_build_assign (lhs, temp);
Index: gcc/testsuite/g++.dg/torture/pr70499.C
===
*** gcc/testsuite/g++.dg/torture/pr70499.C  (revision 0)
--- gcc/testsuite/g++.dg/torture/pr70499.C  (working copy)
***
*** 0 
--- 1,39 
+ // { dg-do compile }
+ // { dg-additional-options "-w -Wno-psabi" }
+ // { dg-additional-options "-mavx" { target x86_64-*-* i?86-*-* } }
+ 
+ typedef double __m256d __attribute__ ((__vector_size__ (32), __may_alias__));
+ 
+ struct SIMD {
+   __m256d data;
+   SIMD() {};
+   SIMD (double val) { }
+   SIMD(__m256d _data) { data = _data; }
+   SIMD operator* (SIMD a) { return a; }
+ };
+ 
+ struct Foo {
+   SIMD val;
+   SIMD dval[2];
+   __attribute__((__always_inline__)) SIMD & Value() throw() { return val; }
+   __attribute__((__always_inline__)) Foo operator* ( const Foo & y) throw() 
+ {
+   Foo res;
+   SIMD hx;
+   SIMD hy;
+   res.Value() = hx*hy;
+   res.dval[0] = hx*hy;
+   return res;
+ }
+ };
+ 
+ template  
+ __attribute__((__always_inline__)) inline void inlineFunc(Tx hx[]) {
+ Tx x = hx[0], y = hx[1];
+ Tx lam[1] = (x*y);
+ }
+ 
+ void FooBarFunc () {
+   Foo adp[2];
+   inlineFunc (adp);
+ }


[PATCH: RL78] Optimize libgcc routines using clrw and clrb

2016-04-05 Thread Kaushik Phatak
Hi,
Please find below a patch that optimizes libgcc routines for the RL78 target.

This is similar to my earlier patch submitted here,
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00415.html

The patch optimizes the loading of immediate value in the case of 0x00, by 
using the clrw or clrb instruction.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes 
for movw and mov.

Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.

Best Regards,
Kaushik

p.s. Kindly ignore any disclaimers at end of this e-mail as they are 
auto-inserted.
Apologies for the same.

2016-04-06  Kaushik Phatak 

* config/rl78/bit-count.S: Use clrw/clrb where possible.
* config/rl78/cmpsi2.S: Likewise.
* config/rl78/divmodhi.S Likewise.
* config/rl78/divmodsi.S Likewise.
* config/rl78/fpbit-sf.S Likewise.
* config/rl78/fpmath-sf.S Likewise.
* config/rl78/mulsi3.S Likewise.

Index: libgcc/config/rl78/bit-count.S
===
--- libgcc/config/rl78/bit-count.S  (revision 3174)
+++ libgcc/config/rl78/bit-count.S  (working copy)
@@ -139,7 +139,7 @@
xor1cy, a.5
xor1cy, a.6
xor1cy, a.7
-   movwax, #0
+   clrwax
bnc $1f
incwax
 1:
@@ -190,7 +190,7 @@
movwax, sp
addwax, #4
movwhl, ax
-   mov a, #0
+   clrba
 1:
xch a, b
mov a, [hl]
@@ -207,7 +207,7 @@
bnz $1b
 
mov x, a
-   mov a, #0
+   clrba
movwr8, ax
ret 
 END_FUNC   ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
 
;; They differ.  Subtract *S2 from *S1 and return as the result.
mov x, a
-   mov a, #0
-   mov r9, #0
+   clrba
+   clrbr9
subwax, r8
 1:
movwr8, ax
Index: libgcc/config/rl78/divmodhi.S
===
--- libgcc/config/rl78/divmodhi.S   (revision 3174)
+++ libgcc/config/rl78/divmodhi.S   (working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
movwhl, ax
-   movwax, #0
+   clrwax
subwax, [hl]
movw[hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===
--- libgcc/config/rl78/divmodsi.S   (revision 3174)
+++ libgcc/config/rl78/divmodsi.S   (working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
movwhl, ax
-   movwax, #0
+   clrwax
subwax, [hl]
movw[hl], ax
-   movwax, #0
+   clrwax
sknc
decwax
subwax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===
--- libgcc/config/rl78/fpbit-sf.S   (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S   (working copy)
@@ -117,7 +117,7 @@
call$!__int_iszero
bnz $2f
;; At this point, both args are zero.
-   mov a, #0
+   clrba
ret
 
 2:
@@ -151,7 +151,7 @@
bc  $ybig_cmpsf ; branch if X < Y
bnz $xbig_cmpsf ; branch if X > Y
 
-   mov a, #0
+   clrba
ret
 
 xbig_cmpsf:; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
movwr10, #0x7fff
ret
;; -inf
-2: mov r8, #0
+2: clrbr8
mov r10, #0x8000
ret

@@ -302,10 +302,10 @@
clr1a.7
call$!__int_fixunssfsi
 
-   movwax, #0
+   clrwax
subwax, r8
movwr8, ax
-   movwax, #0
+   clrwax
 sknc
 decwax
 subwax, r10
@@ -410,7 +410,7 @@
set1a.7
 
;; Clear B:C:R12:R13
-   movwbc, #0
+   clrwbc
movwr12, #0
 
;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
;; If negative convert to positive ...
movwhl, ax
-   movwax, #0
+   clrwax
subwax, bc
movwbc, ax
-   movwax, #0
+   clrwax
sknc
decwax
subwax, hl
@@ -533,7 +533,7 @@
bnz $1f
movwax, bc
cmpwax, #0
-   movwax, #0
+   clrwax
bnz $1f
 
;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===
--- libgcc/config/rl78/fpmath-sf.S  

[RFC] PR70117, ppc long double isinf

2016-04-05 Thread Alan Modra
This patch fixes the incompatibility between GNUlib's 107 bit
precision LDBL_MAX for IBM extended precision and gcc's 106 bit
LDBL_MAX used to test for Inf, by just testing the high double for inf
and nan.  This agrees with the ABI which has stated for many years
that IBM extended precision "does not fully support the IEEE special
numbers NaN and INF.  These values are encoded in the high-order
double value only.  The low-order value is not significant".

I've also changed the test for nan, and both the inf test and the
subnormal test in isnormal, to just use the high double.  Changing the
subnormal test *does* allow a small range of values to be seen as
normal that previously would be rejected in a test of the whole long
double against 2**-969.  Which is why I'm making this an RFC rather
than a patch submission.

What is "subnormal" for an IBM extended precision number, anyway?  I
think the only definition that makes sense is in terms of precision.
We can't say a long double is subnormal if the low double is
subnormal, because numbers like (1.0 + 0x1p-1074) are representable
with the high double properly rounded and are clearly not close to
zero or losing precision.  So "subnormal" for IBM extended precision
is a number that has less than 106 bits of precision.  That would be
at a magnitude of less than 2**-969.  You can see that
  (0x1p-969 + 0x1p-1074)  = 0x1.008p-969
still has 106 bits of precision.  (0x1p-1074 is the smallest double
distinct from zero, and of course is subnormal.)  However,
  (0x1p-969 + -0x1p-1074) = 0x1.ffp-970
has only 105 bits of precision, if I'm counting correctly.

So testing just the high double in isnormal() returns true for a range
of 105 bit precision values, from (0x1p-969 - 0x1p-1023) to 
(0x1p-969 - 0x1p-1074).  The question is whether I should make the
isnormal() code quite nasty in order to give the right answer.
Probably yes, in which case this post becomes an explanation for why
the lower bound test in isnormal() needs to be a long double test.
Or probably better in terms of emitted code, can I get at both of the
component doubles of an IBM long double at the tree level?
VEIW_CONVERT_EXPR to a complex double perhaps?

PR target/70117
* builtins.c (fold_builtin_classify): For IBM extended precision,
look at just the high-order double to test for NaN.
(fold_builtin_interclass_mathfn): Similarly for Inf, and range
test for IBM extended precision isnormal.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 9368ed0..ed27d57 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -7529,6 +7529,9 @@ fold_builtin_interclass_mathfn (location_t loc, tree 
fndecl, tree arg)
 
   mode = TYPE_MODE (TREE_TYPE (arg));
 
+  const struct real_format *fmt = FLOAT_MODE_FORMAT (mode);
+  bool is_ibm_extended = fmt->pnan < fmt->p;
+
   /* If there is no optab, try generic code.  */
   switch (DECL_FUNCTION_CODE (fndecl))
 {
@@ -7538,10 +7541,18 @@ fold_builtin_interclass_mathfn (location_t loc, tree 
fndecl, tree arg)
   {
/* isinf(x) -> isgreater(fabs(x),DBL_MAX).  */
tree const isgr_fn = builtin_decl_explicit (BUILT_IN_ISGREATER);
-   tree const type = TREE_TYPE (arg);
+   tree type = TREE_TYPE (arg);
REAL_VALUE_TYPE r;
char buf[128];
 
+   if (is_ibm_extended)
+ {
+   /* NaN and INF are encoded in the high-order double value
+  only.  The low-order value is not significant.  */
+   type = double_type_node;
+   mode = DFmode;
+   arg = fold_build1_loc (loc, NOP_EXPR, type, arg);
+ }
get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf));
real_from_string (&r, buf);
result = build_call_expr (isgr_fn, 2,
@@ -7554,10 +7565,18 @@ fold_builtin_interclass_mathfn (location_t loc, tree 
fndecl, tree arg)
   {
/* isfinite(x) -> islessequal(fabs(x),DBL_MAX).  */
tree const isle_fn = builtin_decl_explicit (BUILT_IN_ISLESSEQUAL);
-   tree const type = TREE_TYPE (arg);
+   tree type = TREE_TYPE (arg);
REAL_VALUE_TYPE r;
char buf[128];
 
+   if (is_ibm_extended)
+ {
+   /* NaN and INF are encoded in the high-order double value
+  only.  The low-order value is not significant.  */
+   type = double_type_node;
+   mode = DFmode;
+   arg = fold_build1_loc (loc, NOP_EXPR, type, arg);
+ }
get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf));
real_from_string (&r, buf);
result = build_call_expr (isle_fn, 2,
@@ -7578,15 +7597,28 @@ fold_builtin_interclass_mathfn (location_t loc, tree 
fndecl, tree arg)
   islessequal(fabs(x),DBL_MAX).  */
tree const isle_fn = builtin_decl_explicit (BUILT_IN_ISLESSEQUAL);
tree const isge_fn = builtin_decl_explicit (BUILT_IN_ISGREATEREQUAL);
-   tree const type = TREE_TYPE (arg);
+   tre

Re: [PATCH] Fix PR70484, RTL DSE using wrong dependence check

2016-04-05 Thread Richard Biener
On Mon, 4 Apr 2016, Jakub Jelinek wrote:

> On Mon, Apr 04, 2016 at 11:24:41AM +0200, Richard Biener wrote:
> > On Fri, 1 Apr 2016, Bernd Schmidt wrote:
> > 
> > > On 04/01/2016 11:08 AM, Richard Biener wrote:
> > > > {
> > > > ! if (canon_true_dependence (s_info->mem,
> > > > !GET_MODE (s_info->mem),
> > > > !s_info->mem_addr,
> > > > !mem, mem_addr))
> > > > {
> > > >   s_info->rhs = NULL;
> > > >   s_info->const_rhs = NULL;
> > > > --- 1609,1617 
> > > >the value of store_info.  If it is, set the rhs to NULL to
> > > >keep it from being used to remove a load.  */
> > > > {
> > > > ! if (canon_output_dependence (s_info->mem, true,
> > > > !  mem, GET_MODE (mem),
> > > > !  mem_addr))
> > > > {
> > > >   s_info->rhs = NULL;
> > > >   s_info->const_rhs = NULL;
> > > 
> > > I think the patch is ok, but there is a comment in that function which
> > > references canon_true_dependence; that should also be fixed up.
> > 
> > Done, though I don't understand it at all ... if alias-set was supposed
> > to be zero for all cases we call canon_true_dependence then the issue
> > wouldn't have happened.  Maybe there was times where passing mem_addr
> > == NULL_RTX to canon_true_dependence caused it to bail out?
> > 
> > Not sure how to adjust that comment now, maybe it would be valid
> > to simply remove the if (spill_alias_set) case and always use
> > the else case?
> 
> I believe all of the spill_alias_set stuff is dead for many years.
> In 4.4, a call to dse_record_singleton_alias_set has been removed
> (supposedly related to introduction of IRA).  Then in 4.8 you've
> removed the dse_record_singleton_alias_set function, later on Lawrence
> removed other small bits of this.
> E.g. the alias_set_out argument from canon_address, all of spill_alias_set
> handling, alias_set field, clear_alias_set_lookup, clear_alias_mode_holder,
> clear_alias_group, clear_alias_mode_table are all dead IMHO.

True.  By simple constant propagation I can remove a lot of code.

I'm going to bootstrap / test the following - is this ok for trunk
now (I'm going to write a better changelog).

Thanks,
Richard.

2016-04-05  Richard Biener  

* dse.c:  Remove dead code.

Index: gcc/dse.c
===
--- gcc/dse.c   (revision 234736)
+++ gcc/dse.c   (working copy)
@@ -242,9 +242,6 @@ struct store_info
   /* Canonized MEM address for use by canon_true_dependence.  */
   rtx mem_addr;
 
-  /* If this is non-zero, it is the alias set of a spill location.  */
-  alias_set_type alias_set;
-
   /* The offset of the first and byte before the last byte associated
  with the operation.  */
   HOST_WIDE_INT begin, end;
@@ -306,9 +303,6 @@ struct read_info_type
   /* The id of the mem group of the base address.  */
   int group_id;
 
-  /* If this is non-zero, it is the alias set of a spill location.  */
-  alias_set_type alias_set;
-
   /* The offset of the first and byte after the last byte associated
  with the operation.  If begin == end == 0, the read did not have
  a constant offset.  */
@@ -576,19 +570,6 @@ static object_allocator
 
 static deferred_change *deferred_change_list = NULL;
 
-/* The group that holds all of the clear_alias_sets.  */
-static group_info *clear_alias_group;
-
-/* The modes of the clear_alias_sets.  */
-static htab_t clear_alias_mode_table;
-
-/* Hash table element to look up the mode for an alias set.  */
-struct clear_alias_mode_holder
-{
-  alias_set_type alias_set;
-  machine_mode mode;
-};
-
 /* This is true except if cfun->stdarg -- i.e. we cannot do
this for vararg functions because they play games with the frame.  */
 static bool stores_off_frame_dead_at_return;
@@ -596,7 +577,6 @@ static bool stores_off_frame_dead_at_ret
 /* Counter for stats.  */
 static int globally_deleted;
 static int locally_deleted;
-static int spill_deleted;
 
 static bitmap all_blocks;
 
@@ -613,22 +593,6 @@ static unsigned int current_position;
 */
 
 
-/* Find the entry associated with ALIAS_SET.  */
-
-static struct clear_alias_mode_holder *
-clear_alias_set_lookup (alias_set_type alias_set)
-{
-  struct clear_alias_mode_holder tmp_holder;
-  void **slot;
-
-  tmp_holder.alias_set = alias_set;
-  slot = htab_find_slot (clear_alias_mode_table, &tmp_holder, NO_INSERT);
-  gcc_assert (*slot);
-
-  return (struct clear_alias_mode_holder *) *slot;
-}
-
-
 /* Hashtable callbacks for maintaining the "bases" field of
store_group_info, given that the addresses are function invariants.  */
 
@@ -665,37 +629,13 @@ get_group_info (rtx base)
   group_info *gi;
   group_info **slot;
 
-  if 

Re: [PATCH] Fix PR31096

2016-04-05 Thread Hurugalawadi, Naveen
Hi,

>> Looks like you are turning x*-1 < y*-1 into xdiff --git a/gcc/match.pd b/gcc/match.pd
index c0ed305..e073e9f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -894,7 +894,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && tree_nop_conversion_p (type, TREE_TYPE (@1)))
   (convert (bit_and (bit_not @1) @0
 
-
+/* Fold A * 10 == B * 10 into A == B.  naveen*/
+(for cmp (tcc_comparison)
+ (simplify
+  (cmp (mult:cs @0 INTEGER_CST@1) (mult:cs @2 INTEGER_CST@1))
+   (cmp @0 @2)))
 
 /* ((X inner_op C0) outer_op C1)
With X being a tree where value_range has reasoned certain bits to always be
diff --git a/gcc/testsuite/gcc.dg/pr31096.c b/gcc/testsuite/gcc.dg/pr31096.c
new file mode 100644
index 000..1c464db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr31096.c
@@ -0,0 +1,17 @@
+/* PR middle-end/31096 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" }  */
+
+int
+f (int a, int b)
+{
+  return a * 10 == b * 10;
+}
+
+int
+f1 (int a, int b)
+{
+  return a == b;
+}
+
+/* { dg-final { scan-tree-dump-not " * 10" "optimized" } } */


[PING 3] [PATCH] Fix PR 31531: A microoptimization of isnegative of signed integer

2016-04-05 Thread Hurugalawadi, Naveen
Hi,

Please review the patch at the following link and let me know
if there should be any modifications in it:-

https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01035.html

Thanks,
Naveen


Re: Scan for parallelization of the oacc kernels test-cases in gfortran.dg/goacc

2016-04-05 Thread Tom de Vries

On 18/03/16 13:37, Thomas Schwinge wrote:

Hi!

On Wed, 9 Mar 2016 10:17:28 +0100, Tom de Vries  wrote:

[Should have cited

instead of the C/C++ tests]



Retested on current trunk.

Committed, minus the kernels-parallel-loop-data-enter-exit.f95 test.


Is there a reason why you omitted the following tree scanning tests (as
done for C/C++, and also present for Fortran on gomp-4_0-branch)?


I think that was a question of trying to avoid interaction between:
- the tests I was committing and
- removing the dependency of openacc kernels on
  -ftree-parallelize-loops=
which were sort of happening in parallel.


(Note
that I had to XFAIL gfortran.dg/goacc/kernels-loop-n.f95.)


Right. I remember looking into this before, and classified it as the 
openacc version of PR68787 - fipa-pta to interpret restrict.


Now that we'll have an xfail for it, I've filed it as PR70545 - 
'[openacc] gfortran.dg/goacc/kernels-loop-n.f95 not parallelized'.



 OK to commit?



Yes please.

Thanks,
- Tom


commit f0294eeb30ef285c3930b975ccbc1b6d7052cc03
Author: Thomas Schwinge 
Date:   Fri Mar 18 12:52:37 2016 +0100

 Scan for parallelization of the oacc kernels test-cases in 
gfortran.dg/goacc

gcc/testsuite/
* gfortran.dg/goacc/kernels-loop-2.f95: Scan for parallelization.
* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
* gfortran.dg/goacc/kernels-loop.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-n.f95: Likewise, XFAILed.
---
  gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 | 2 ++
  gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95| 1 +
  gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 | 2 ++
  gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95   | 2 ++
  gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95   | 2 ++
  gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95  | 2 ++
  gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 | 7 +++
  gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95   | 2 ++
  8 files changed, 20 insertions(+)

diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index 5cc2e8b..865f7a6 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -40,3 +40,5 @@ end program main
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 
"optimized" } }
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 
"optimized" } }
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 
"optimized" } }
+
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index d1bfc70..c9f3a62 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -47,3 +47,4 @@ end program main
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 
"optimized" } }
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 
"optimized" } }

+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index feac7b2..3361607 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -46,3 +46,5 @@ end program main
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 
"optimized" } }
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 
"optimized" } }
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 
"optimized" } }
+
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
index 632983f..5ba56fb 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
@@ -44,3 +44,5 @@ end program main
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 
"optimized" } }
  ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 
"optimized" } }
  ! { dg-final { scan-tree-dump-ti

Re: [PATCH] Fix PR31096

2016-04-05 Thread Marc Glisse

On Tue, 5 Apr 2016, Hurugalawadi, Naveen wrote:


Hi,


Looks like you are turning x*-1 < y*-1 into x

Please find attached the modified patch that works on integer
constant values.

Please review the patch and let me know if this is okay?


-1 is an integer constant, so that's still invalid. It is also invalid for 
unsigned. The :s are useless since the output is a single insn.


--
Marc Glisse


Re: [PATCH] Fix PR70484, RTL DSE using wrong dependence check

2016-04-05 Thread Jakub Jelinek
On Tue, Apr 05, 2016 at 11:05:44AM +0200, Richard Biener wrote:
> True.  By simple constant propagation I can remove a lot of code.
> 
> I'm going to bootstrap / test the following - is this ok for trunk
> now (I'm going to write a better changelog).

LGTM with better Changelog, though I have small nits:

> -  else if ((s_info->group_id == group_id)
> -&& (s_info->cse_base == base))
> +  if ((s_info->group_id == group_id)
> +   && (s_info->cse_base == base))

+  if (s_info->group_id == group_id && s_info->cse_base == base)

instead, please.

> +   if (store_info->group_id >= 0)
> + {
> +   group_info *group
> = rtx_group_vec[store_info->group_id];

The formatting looks wrong and
  group_info *group = rtx_group_vec[store_info->group_id];
fits now on one line.

> @@ -3086,30 +2886,6 @@ scan_reads_nospill (insn_info_t insn_inf

Please also rename the
dse_step2_nospill
scan_stores_nospill
scan_reads_nospill
dse_step5_nospill
functions to s/_nospill//g and adjust all their uses (and perhaps function
comments).

Jakub


Re: [RFC] PR70117, ppc long double isinf

2016-04-05 Thread Richard Biener
On Tue, Apr 5, 2016 at 10:33 AM, Alan Modra  wrote:
> This patch fixes the incompatibility between GNUlib's 107 bit
> precision LDBL_MAX for IBM extended precision and gcc's 106 bit
> LDBL_MAX used to test for Inf, by just testing the high double for inf
> and nan.  This agrees with the ABI which has stated for many years
> that IBM extended precision "does not fully support the IEEE special
> numbers NaN and INF.  These values are encoded in the high-order
> double value only.  The low-order value is not significant".
>
> I've also changed the test for nan, and both the inf test and the
> subnormal test in isnormal, to just use the high double.  Changing the
> subnormal test *does* allow a small range of values to be seen as
> normal that previously would be rejected in a test of the whole long
> double against 2**-969.  Which is why I'm making this an RFC rather
> than a patch submission.
>
> What is "subnormal" for an IBM extended precision number, anyway?  I
> think the only definition that makes sense is in terms of precision.
> We can't say a long double is subnormal if the low double is
> subnormal, because numbers like (1.0 + 0x1p-1074) are representable
> with the high double properly rounded and are clearly not close to
> zero or losing precision.  So "subnormal" for IBM extended precision
> is a number that has less than 106 bits of precision.  That would be
> at a magnitude of less than 2**-969.  You can see that
>   (0x1p-969 + 0x1p-1074)  = 0x1.008p-969
> still has 106 bits of precision.  (0x1p-1074 is the smallest double
> distinct from zero, and of course is subnormal.)  However,
>   (0x1p-969 + -0x1p-1074) = 0x1.ffp-970
> has only 105 bits of precision, if I'm counting correctly.
>
> So testing just the high double in isnormal() returns true for a range
> of 105 bit precision values, from (0x1p-969 - 0x1p-1023) to
> (0x1p-969 - 0x1p-1074).  The question is whether I should make the
> isnormal() code quite nasty in order to give the right answer.
> Probably yes, in which case this post becomes an explanation for why
> the lower bound test in isnormal() needs to be a long double test.
> Or probably better in terms of emitted code, can I get at both of the
> component doubles of an IBM long double at the tree level?
> VEIW_CONVERT_EXPR to a complex double perhaps?

Yes, that would work I think, the other variant would be a
BIT_FIELD_REF (but watch out for endianess?).

In general the patch looks like a good approach to me but can we
hide that

> +  const struct real_format *fmt = FLOAT_MODE_FORMAT (mode);
> +  bool is_ibm_extended = fmt->pnan < fmt->p;

in a function somewhere in real.[ch]?

Thanks,
Richard.

> PR target/70117
> * builtins.c (fold_builtin_classify): For IBM extended precision,
> look at just the high-order double to test for NaN.
> (fold_builtin_interclass_mathfn): Similarly for Inf, and range
> test for IBM extended precision isnormal.
>
> diff --git a/gcc/builtins.c b/gcc/builtins.c
> index 9368ed0..ed27d57 100644
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -7529,6 +7529,9 @@ fold_builtin_interclass_mathfn (location_t loc, tree 
> fndecl, tree arg)
>
>mode = TYPE_MODE (TREE_TYPE (arg));
>
> +  const struct real_format *fmt = FLOAT_MODE_FORMAT (mode);
> +  bool is_ibm_extended = fmt->pnan < fmt->p;
> +
>/* If there is no optab, try generic code.  */
>switch (DECL_FUNCTION_CODE (fndecl))
>  {
> @@ -7538,10 +7541,18 @@ fold_builtin_interclass_mathfn (location_t loc, tree 
> fndecl, tree arg)
>{
> /* isinf(x) -> isgreater(fabs(x),DBL_MAX).  */
> tree const isgr_fn = builtin_decl_explicit (BUILT_IN_ISGREATER);
> -   tree const type = TREE_TYPE (arg);
> +   tree type = TREE_TYPE (arg);
> REAL_VALUE_TYPE r;
> char buf[128];
>
> +   if (is_ibm_extended)
> + {
> +   /* NaN and INF are encoded in the high-order double value
> +  only.  The low-order value is not significant.  */
> +   type = double_type_node;
> +   mode = DFmode;
> +   arg = fold_build1_loc (loc, NOP_EXPR, type, arg);
> + }
> get_max_float (REAL_MODE_FORMAT (mode), buf, sizeof (buf));
> real_from_string (&r, buf);
> result = build_call_expr (isgr_fn, 2,
> @@ -7554,10 +7565,18 @@ fold_builtin_interclass_mathfn (location_t loc, tree 
> fndecl, tree arg)
>{
> /* isfinite(x) -> islessequal(fabs(x),DBL_MAX).  */
> tree const isle_fn = builtin_decl_explicit (BUILT_IN_ISLESSEQUAL);
> -   tree const type = TREE_TYPE (arg);
> +   tree type = TREE_TYPE (arg);
> REAL_VALUE_TYPE r;
> char buf[128];
>
> +   if (is_ibm_extended)
> + {
> +   /* NaN and INF are encoded in the high-order double value
> +  only.  The low-order value is not significant.  */
> +   type = double_type_node;
> +   mode 

Re: [PING 5, PATCH] PR/68089: C++-11: Ingore "alignas(0)".

2016-04-05 Thread Dominik Vogt
On Mon, Jan 04, 2016 at 12:33:21PM +0100, Dominik Vogt wrote:
> On Fri, Jan 01, 2016 at 05:53:08PM -0700, Martin Sebor wrote:
> > On 12/31/2015 04:50 AM, Dominik Vogt wrote:
> > >The attached patch fixes C++-11 handling of "alignas(0)" which
> > >should be ignored but currently generates an error message.  A
> > >test case is included; the patch has been tested on S390x.  Since
> > >it's a language issue it should be independent of the backend
> > >used.
> > 
> > The patch doesn't handle value-dependent expressions(*).
> 
> > It
> > seems that the problem is in handle_aligned_attribute() calling
> > check_user_alignment() with the second argument (ALLOW_ZERO)
> > set to false.  Calling it with true fixes the problem and handles
> > value-dependent expressions (I haven't done any more testing beyond
> > that).
> 
> Like the attached patch?  (Passes the testsuite on s390x.)
> 
> But wouldn't an "aligned" attribute be added, allowing the backend
> to possibly generate an error or a warning?
> 
> > Also, in the test, I noticed the definition of the first struct
> > is missing the terminating semicolon.
> 
> Yeah.

> gcc/c-family/ChangeLog
> 
>   PR/69089
>   * c-common.c (handle_aligned_attribute): Allow 0 as an argument to the
>   "aligned" attribute.
> 
> gcc/testsuite/ChangeLog
> 
>   PR/69089
>   * g++.dg/cpp0x/alignas5.C: New test.

> >From 2461293b9070da74950fd0ae055d1239cc69ce67 Mon Sep 17 00:00:00 2001
> From: Dominik Vogt 
> Date: Wed, 30 Dec 2015 15:08:52 +0100
> Subject: [PATCH] C++-11: Ingore "alignas(0)" instead of generating an
>  error message.
> 
> This is required by the C++-11 standard.
> ---
>  gcc/c-family/c-common.c   |  2 +-
>  gcc/testsuite/g++.dg/cpp0x/alignas5.C | 29 +
>  2 files changed, 30 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/alignas5.C
> 
> diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> index 653d1dc..9eb25a9 100644
> --- a/gcc/c-family/c-common.c
> +++ b/gcc/c-family/c-common.c
> @@ -7804,7 +7804,7 @@ handle_aligned_attribute (tree *node, tree ARG_UNUSED 
> (name), tree args,
>else if (TYPE_P (*node))
>  type = node, is_type = 1;
>  
> -  if ((i = check_user_alignment (align_expr, false)) == -1
> +  if ((i = check_user_alignment (align_expr, true)) == -1
>|| !check_cxx_fundamental_alignment_constraints (*node, i, flags))
>  *no_add_attrs = true;
>else if (is_type)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/alignas5.C 
> b/gcc/testsuite/g++.dg/cpp0x/alignas5.C
> new file mode 100644
> index 000..f3252a9
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/alignas5.C
> @@ -0,0 +1,29 @@
> +// PR c++/69089
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-Wno-attributes" }
> +
> +alignas (0) int valid1;
> +alignas (1 - 1) int valid2;
> +struct Tvalid
> +{
> +  alignas (0) int i;
> +  alignas (2 * 0) int j;
> +};
> +
> +alignas (-1) int invalid1; /* { dg-error "not a positive power of 2" } */
> +alignas (1 - 2) int invalid2; /* { dg-error "not a positive power of 2" } */
> +struct Tinvalid
> +{
> +  alignas (-1) int i; /* { dg-error "not a positive power of 2" } */
> +  alignas (2 * 0 - 1) int j; /* { dg-error "not a positive power of 2" } */
> +};
> +
> +template  struct TNvalid1 { alignas (N) int i; };
> +TNvalid1<0> SNvalid1;
> +template  struct TNvalid2 { alignas (N) int i; };
> +TNvalid2<1 - 1> SNvalid2;
> +
> +template  struct TNinvalid1 { alignas (N) int i; }; /* { dg-error 
> "not a positive power of 2" } */
> +TNinvalid1<-1> SNinvalid1;
> +template  struct TNinvalid2 { alignas (N) int i; }; /* { dg-error 
> "not a positive power of 2" } */
> +TNinvalid2<1 - 2> SNinvalid2;
> -- 
> 2.3.0
> 



Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany




Re: [PATCH] Fix PR70484, RTL DSE using wrong dependence check

2016-04-05 Thread Richard Biener
On Tue, 5 Apr 2016, Jakub Jelinek wrote:

> On Tue, Apr 05, 2016 at 11:05:44AM +0200, Richard Biener wrote:
> > True.  By simple constant propagation I can remove a lot of code.
> > 
> > I'm going to bootstrap / test the following - is this ok for trunk
> > now (I'm going to write a better changelog).
> 
> LGTM with better Changelog, though I have small nits:
> 
> > -  else if ((s_info->group_id == group_id)
> > -  && (s_info->cse_base == base))
> > +  if ((s_info->group_id == group_id)
> > + && (s_info->cse_base == base))
> 
> +  if (s_info->group_id == group_id && s_info->cse_base == base)
> 
> instead, please.
> 
> > + if (store_info->group_id >= 0)
> > +   {
> > + group_info *group
> >   = rtx_group_vec[store_info->group_id];
> 
> The formatting looks wrong and
> group_info *group = rtx_group_vec[store_info->group_id];
> fits now on one line.
> 
> > @@ -3086,30 +2886,6 @@ scan_reads_nospill (insn_info_t insn_inf
> 
> Please also rename the
> dse_step2_nospill
> scan_stores_nospill
> scan_reads_nospill
> dse_step5_nospill
> functions to s/_nospill//g and adjust all their uses (and perhaps function
> comments).

Like this?

Thanks,
Richard.

2016-04-05  Richard Biener  

* dse.c (struct store_info): Remove alias_set member.
(struct read_info_type): Likewise.
(clear_alias_group, clear_alias_mode_table, clear_alias_mode_holder,
spill_deleted, clear_alias_set_lookup): Remove.
(get_group_info): Remove dead base == NULL_RTX case.
(dse_step0): Remove initialization of removed variables.
(delete_dead_store_insn): Reomve alias set dumping.
(free_read_records): Remove alias_set handling.
(canon_address): Remove alias_set_out parameter.
(record_store): Remove spill_alias_set, it's always zero.
(check_mem_read_rtx): Likewise.
(dse_step2): Rename from ...
(dse_step2_nospill): ... this.  Adjust.
(scan_stores): Rename from ...
(scan_stores_nospill): ... this.
(scan_reads): Rename from ...
(scan_reads_nospill): ... this.
(scan_stores_spill, scan_reads_spill): Remove.
(dse_step3_scan): Remove for_spills argument which is always false.
(dse_step3): Likewise.
(dse_step5): Rename from ...
(dse_step5_nospill): ... this.  Remove alias_set handling.
(rest_of_handle_dse): Adjust.

Index: gcc/dse.c
===
--- gcc/dse.c   (revision 234736)
+++ gcc/dse.c   (working copy)
@@ -242,9 +242,6 @@ struct store_info
   /* Canonized MEM address for use by canon_true_dependence.  */
   rtx mem_addr;
 
-  /* If this is non-zero, it is the alias set of a spill location.  */
-  alias_set_type alias_set;
-
   /* The offset of the first and byte before the last byte associated
  with the operation.  */
   HOST_WIDE_INT begin, end;
@@ -306,9 +303,6 @@ struct read_info_type
   /* The id of the mem group of the base address.  */
   int group_id;
 
-  /* If this is non-zero, it is the alias set of a spill location.  */
-  alias_set_type alias_set;
-
   /* The offset of the first and byte after the last byte associated
  with the operation.  If begin == end == 0, the read did not have
  a constant offset.  */
@@ -576,19 +570,6 @@ static object_allocator
 
 static deferred_change *deferred_change_list = NULL;
 
-/* The group that holds all of the clear_alias_sets.  */
-static group_info *clear_alias_group;
-
-/* The modes of the clear_alias_sets.  */
-static htab_t clear_alias_mode_table;
-
-/* Hash table element to look up the mode for an alias set.  */
-struct clear_alias_mode_holder
-{
-  alias_set_type alias_set;
-  machine_mode mode;
-};
-
 /* This is true except if cfun->stdarg -- i.e. we cannot do
this for vararg functions because they play games with the frame.  */
 static bool stores_off_frame_dead_at_return;
@@ -596,7 +577,6 @@ static bool stores_off_frame_dead_at_ret
 /* Counter for stats.  */
 static int globally_deleted;
 static int locally_deleted;
-static int spill_deleted;
 
 static bitmap all_blocks;
 
@@ -613,22 +593,6 @@ static unsigned int current_position;
 */
 
 
-/* Find the entry associated with ALIAS_SET.  */
-
-static struct clear_alias_mode_holder *
-clear_alias_set_lookup (alias_set_type alias_set)
-{
-  struct clear_alias_mode_holder tmp_holder;
-  void **slot;
-
-  tmp_holder.alias_set = alias_set;
-  slot = htab_find_slot (clear_alias_mode_table, &tmp_holder, NO_INSERT);
-  gcc_assert (*slot);
-
-  return (struct clear_alias_mode_holder *) *slot;
-}
-
-
 /* Hashtable callbacks for maintaining the "bases" field of
store_group_info, given that the addresses are function invariants.  */
 
@@ -665,37 +629,13 @@ get_group_info (rtx base)
   group_info *gi;
   group_info **slot;
 
-  if (b

Re: a patch for PR68695

2016-04-05 Thread Kyrill Tkachov

Hi all,

On 01/04/16 21:43, Jakub Jelinek wrote:

On Fri, Apr 01, 2016 at 04:26:41PM -0400, Vladimir Makarov wrote:

I've noticed that after this patch, 2 tests regress (PASS -> FAIL) on arm:
   gcc.dg/ira-shrinkwrap-prep-2.c scan-rtl-dump pro_and_epilogue
"Performing shrink-wrapping"
   gcc.dg/pr10474.c scan-rtl-dump pro_and_epilogue "Performing shrink-wrapping"


I've checked the generated code.  RA with the patch generates a better code
for the both tests. So shrink wrap optimization failed. The final code has 1
insn less for the both tests when the patch is applied.

I guess it is wrong to write quality tests based on expected code generated
before any optimization.  It has sense if we provide the same input.  LLVM
testsuite is mostly such tests as they have a readable IR.  GCC
unfortunately has no serialized and readable IR. On the other hand LLVM
lacks integrated testing.

So I'd mark these tests as XFAIL or removed arm from DEJAGNU target in the
tests.

FYI, those 2 tests also now FAIL on ppc64{,le}-linux in addition to
armv7hl-linux-gnueabi.


So for the test gcc.dg/pr10474.c on arm with -marm -O3 before this patch we
perform shrink-wrapping:
cmpr0, #0
bxeqlr
push{r4, lr}
movr4, r0
...

And after the patch we don't:
push{r4, lr}
subsr4, r0, #0
popeq{r4, pc}
...

The assembly after the "..." is identical.

So the resulting code is indeed shorter, though there is an
extra stack push and pop on the early return path.
A similar effect appears on gcc.dg/ira-shrinkwrap-prep-2.c.

I think both codegen decisions are valid though one
could argue that the new codegen is more appropriate for
-Os rather than -O3. If you agree then this is indeed a regression.
Though if so, it looks like a shrink-wrapping deficiency exposed by
this patch, rather than caused by it.

Jakub, do you happen to have the before and after codegen for these tests
on ppc64? I wonder if the effect is more clearcut there.

Thanks,
Kyrill



Jakub





Re: [PATCH] Fix PR70484, RTL DSE using wrong dependence check

2016-04-05 Thread Jakub Jelinek
On Tue, Apr 05, 2016 at 11:48:29AM +0200, Richard Biener wrote:
> Like this?

Yeah, thanks.

> 2016-04-05  Richard Biener  
> 
>   * dse.c (struct store_info): Remove alias_set member.
>   (struct read_info_type): Likewise.
>   (clear_alias_group, clear_alias_mode_table, clear_alias_mode_holder,
>   spill_deleted, clear_alias_set_lookup): Remove.
>   (get_group_info): Remove dead base == NULL_RTX case.
>   (dse_step0): Remove initialization of removed variables.
>   (delete_dead_store_insn): Reomve alias set dumping.
>   (free_read_records): Remove alias_set handling.
>   (canon_address): Remove alias_set_out parameter.
>   (record_store): Remove spill_alias_set, it's always zero.
>   (check_mem_read_rtx): Likewise.
>   (dse_step2): Rename from ...
>   (dse_step2_nospill): ... this.  Adjust.
>   (scan_stores): Rename from ...
>   (scan_stores_nospill): ... this.
>   (scan_reads): Rename from ...
>   (scan_reads_nospill): ... this.
>   (scan_stores_spill, scan_reads_spill): Remove.
>   (dse_step3_scan): Remove for_spills argument which is always false.
>   (dse_step3): Likewise.
>   (dse_step5): Rename from ...
>   (dse_step5_nospill): ... this.  Remove alias_set handling.
>   (rest_of_handle_dse): Adjust.

Jakub


[PING][PATCH] Remove incorrect warning for kernels copy clause

2016-04-05 Thread Tom de Vries

On 24/03/16 17:59, Tom de Vries wrote:

Hi,

This patch fixes an incorrect warning for the oacc copy clause.

Consider this test-case:
...
void
foo (void)
{
   int i;

#pragma acc kernels
   {
 i = 1;
   }
}
...


When compiling with -fopenacc -Wuninitialized, we get an 'is used
uninitialized' warning for variable 'i', which is confusing given that
'i' is not used, but only set in the kernels region.

The warning occurs because there's an implicit copy(i) clause on the
kernels region, and that copy generates a read of i before the region,
and a write to i in region.

The patch silences the warning by marking the variable in the copy
clause with TREE_NO_WARNING.

Build and reg-tested with goacc.exp, gomp.exp and target-libgomp.

OK for trunk if bootstrap and reg-test succeeds?



Ping.

Thanks,
- Tom


0001-Remove-incorrect-warning-for-kernels-copy-clause.patch


Remove incorrect warning for kernels copy clause

2016-03-24  Tom de Vries  

* omp-low.c (lower_omp_target): Set TREE_NO_WARNING for oacc copy
clause.

* c-c++-common/goacc/uninit-copy-clause.c: New test.
* gfortran.dg/goacc/uninit-copy-clause.f95: New test.

---
  gcc/omp-low.c  |  6 +++-
  .../c-c++-common/goacc/uninit-copy-clause.c| 38 ++
  .../gfortran.dg/goacc/uninit-copy-clause.f95   | 29 +
  3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 3fd6eb3..d107961 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -16083,7 +16083,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
|| map_kind == GOMP_MAP_POINTER
|| map_kind == GOMP_MAP_TO_PSET
|| map_kind == GOMP_MAP_FORCE_DEVICEPTR)
- gimplify_assign (avar, var, &ilist);
+ {
+   if (is_gimple_omp_oacc (ctx->stmt))
+ TREE_NO_WARNING (var) = 1;
+   gimplify_assign (avar, var, &ilist);
+ }
avar = build_fold_addr_expr (avar);
gimplify_assign (x, avar, &ilist);
if ((GOMP_MAP_COPY_FROM_P (map_kind)
diff --git a/gcc/testsuite/c-c++-common/goacc/uninit-copy-clause.c 
b/gcc/testsuite/c-c++-common/goacc/uninit-copy-clause.c
new file mode 100644
index 000..b3cc445
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/uninit-copy-clause.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wuninitialized" } */
+
+void
+foo (void)
+{
+  int i;
+
+#pragma acc kernels
+  {
+i = 1;
+  }
+
+}
+
+void
+foo2 (void)
+{
+  int i;
+
+#pragma acc kernels copy (i)
+  {
+i = 1;
+  }
+
+}
+
+void
+foo3 (void)
+{
+  int i;
+
+#pragma acc kernels copyin(i)
+  {
+i = 1;
+  }
+
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/uninit-copy-clause.f95 
b/gcc/testsuite/gfortran.dg/goacc/uninit-copy-clause.f95
new file mode 100644
index 000..b2aae1d
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/uninit-copy-clause.f95
@@ -0,0 +1,29 @@
+! { dg-do compile }
+! { dg-additional-options "-Wuninitialized" }
+
+subroutine foo
+  integer :: i
+
+  !$acc kernels
+  i = 1
+  !$acc end kernels
+
+end subroutine foo
+
+subroutine foo2
+  integer :: i
+
+  !$acc kernels copy (i)
+  i = 1
+  !$acc end kernels
+
+end subroutine foo2
+
+subroutine foo3
+  integer :: i
+
+  !$acc kernels copyin (i)
+  i = 1
+  !$acc end kernels
+
+end subroutine foo3





[PING][PATCH] Remove incorrect warning for parallel firstprivate clause

2016-04-05 Thread Tom de Vries

On 24/03/16 18:02, Tom de Vries wrote:

Hi,

This patch fixes an incorrect warning for the oacc firstprivate clause.

Consider this test-case:
...
void
foo (void)
{
   int i;

#pragma acc parallel
   {
 i = 1;
   }
}
...


When compiling with -fopenacc -Wuninitialized, we get an 'is used
uninitialized' warning for variable 'i', which is confusing given that
'i' is not used, but only set in the parallel region.

The warning occurs because there's an implicit firstprivate(i) clause on
the parallel region, and that firstprivate clause generates a read of i
before the region, and a write to i in the region.

The patch silences the warning by marking the variable in the
firstprivate clause with TREE_NO_WARNING.

Build and reg-tested with goacc.exp, gomp.exp and target-libgomp.

OK for trunk if bootstrap and reg-test succeeds?


Ping.

Thanks,
- Tom


0002-Remove-incorrect-warning-for-parallel-firstprivate-clause.patch


Remove incorrect warning for parallel firstprivate clause

2016-03-24  Tom de Vries  

* omp-low.c (lower_omp_target): Set TREE_NO_WARNING for oacc
firstprivate clause.

* c-c++-common/goacc/uninit-firstprivate-clause.c: New test.
* gfortran.dg/goacc/uninit-firstprivate-clause.f95: New test.

---
  gcc/omp-low.c  |  5 -
  .../goacc/uninit-firstprivate-clause.c | 25 ++
  .../goacc/uninit-firstprivate-clause.f95   | 18 
  3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d107961..41eb3c8 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -16068,7 +16068,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
  {
gcc_assert (is_gimple_omp_oacc (ctx->stmt));
if (!is_reference (var))
- var = build_fold_addr_expr (var);
+ {
+   TREE_NO_WARNING (var) = 1;
+   var = build_fold_addr_expr (var);
+ }
else
  talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
gimplify_assign (x, var, &ilist);
diff --git a/gcc/testsuite/c-c++-common/goacc/uninit-firstprivate-clause.c 
b/gcc/testsuite/c-c++-common/goacc/uninit-firstprivate-clause.c
new file mode 100644
index 000..3d3a03e
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/uninit-firstprivate-clause.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wuninitialized" } */
+
+void
+foo (void)
+{
+  int i;
+
+#pragma acc parallel
+  {
+i = 1;
+  }
+}
+
+
+void
+foo2 (void)
+{
+  int i;
+
+#pragma acc parallel firstprivate (i)
+  {
+i = 1;
+  }
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/uninit-firstprivate-clause.f95 
b/gcc/testsuite/gfortran.dg/goacc/uninit-firstprivate-clause.f95
new file mode 100644
index 000..c18765b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/uninit-firstprivate-clause.f95
@@ -0,0 +1,18 @@
+! { dg-do compile }
+! { dg-additional-options "-Wuninitialized" }
+
+subroutine test
+  INTEGER :: i
+
+  !$acc parallel
+  i = 1
+  !$acc end parallel
+end subroutine test
+
+subroutine test2
+  INTEGER :: i
+
+  !$acc parallel firstprivate (i)
+  i = 1
+  !$acc end parallel
+end subroutine test2





Re: [v3 PATCH] PR libstdc++/70437

2016-04-05 Thread Jonathan Wakely

On 04/04/16 21:45 +0300, Ville Voutilainen wrote:

And yes, -ENOPATCH.

On 4 April 2016 at 21:42, Ville Voutilainen  wrote:

Tested on Linux-PPC64.

2016-04-04  Ville Voutilainen  

 PR libstdc++/70437
 * include/bits/stl_pair.h (_ConstructiblePair,
_ImplicitlyConvertiblePair, _MoveConstructiblePair,
_ImplicitlyMoveConvertiblePair): Add shortcut conditions
for same-type cases.
* testsuite/20_util/pair/70437.cc: New.


Thanks for the fix.


diff --git a/libstdc++-v3/include/bits/stl_pair.h 
b/libstdc++-v3/include/bits/stl_pair.h
index 7057030..206553a 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -90,29 +90,45 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  template 
  constexpr bool _ConstructiblePair()
  {
-return __and_,
- is_constructible<_T2, const _U2&>>::value;
+return __and_<__or_::type,
+   typename decay<_U1>::type>,
+   is_constructible<_T1, const _U1&>>,
+ __or_::type,
+   typename decay<_U2>::type>,
+   is_constructible<_T2, const _U2&>>>::value;
  }


I wonder if we want an __is_samey trait that checks if two decayed
types are the same.

More seriously, a comment might be useful to explain that although
these "concepts" return true for samey types, that is just to prevent 
is_constructible from getting into a mess with incomplete types, and

actually for samey types one of the special member functions might end
up being chosen by overload resolution instead.

Did I get that right? If so, I definitely think it's worth a comment,
as I for one won't remember the details in a few months!



Re: [PR c/68966] Restore atomic builtins usage in libstdc++-v3

2016-04-05 Thread Jonathan Wakely

On 21/03/16 17:01 +0100, Thomas Schwinge wrote:

Hi!

On Mon, 21 Mar 2016 15:01:49 +, Jonathan Wakely  wrote:

On 21/03/16 13:08 +0100, Thomas Schwinge wrote:
>Per my (admittedly, not in-depth) reading of libstdc++-v3 source code,
>the _GLIBCXX_ATOMIC_BUILTINS conditional is only used in combination with
>the _Atomic_word data type, which in
>libstdc++-v3/doc/xml/manual/concurrency_extensions.xml is described as "a
>signed integral type" (so, matching the semantics as clarified by your
>patch).  That makes sense: it's used to keep reference counts, for
>example.  So, it seems sound to just remove the bool atomics check.

I agree that it doesn't make any sense to check whether atomics work
for bool when we only care about them for _Atomic_word, however ...


(Please review that it really is used only for that; I have only done a
quick scan of the libstdc++-v3 sources.)


My own checking agreed.


This would change the value of _GLIBCXX_ATOMIC_BUILTINS for any target
which was already failing the check for bool but passing it for the
other types. We would now switch to using atomic builtins where we
previously didn't use them, which could be a problem. I don't know if
there are any targets that would be affected, and if it would cause an
actual problem.


Assuming there are no other reasons that could have caused the bool
atomics checks to fail


A target without 1-byte atomics might fail the bool checks, but pass
the int and short ones.


(under the condition that the short and int ones
did and still do succeed), my patch just restores the state of a few
months ago, before Martin's bool atomics warning patch got committed.
So, I think it is safe to commit.


Would leaving the bool check in place, but just removing the
__atomic_fetch_add() part be better? It should still fix the
regression, but is less likely to change behaviour for targets that
were never using the builtins.


Yes, we could do that, but while I have not verified this, I assume that
it's very unlikely that there exists a configuration where the bool
atomics checks already used to fail but the short and int ones did and
still do succeed.  Anyway, that's not my decision to make.  ;-)


Well I guess it's mine, and this is a fairly serious regression (is it
tracked in Bugzilla anywhere?) so the patch is OK for trunk.



[PATCH][GCC 4.9][PR sanitizer/70474] Fix ASan failures on new Darwin platforms (OS X 10.11+, iOS 9.0+).

2016-04-05 Thread Maxim Ostapenko

Hi,

this is adapted patch from 
https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00803.html that fixes 
exactly the same issue (ASan fails on newer Darwin platforms) for GCC 4.9.4.
Regtested and bootstrapped on x86_64-unknown-linux-gnu by myself and on 
x86_64-apple-darwin15.2.0 by Dominique, OK for gcc-4_9-branch?


-Maxim
libsanitizer/ChangeLog:

2016-04-05  Maxim Ostapenko  

	PR sanitizer/70474
	* asan/asan_mac.cc (GetMacosVersionInternal): Cherry pick
	upstream r241487, 221379 and r224315.
	(void MaybeReexec): Cherry pick upstream r241487.
	* asan/asan_mac.h (enum MacosVersion): Cherry pick upstream r224315.

diff --git a/libsanitizer/asan/asan_mac.cc b/libsanitizer/asan/asan_mac.cc
index 8d01843..36e287c 100644
--- a/libsanitizer/asan/asan_mac.cc
+++ b/libsanitizer/asan/asan_mac.cc
@@ -70,7 +70,12 @@ MacosVersion GetMacosVersionInternal() {
 case '1': return MACOS_VERSION_LION;
 case '2': return MACOS_VERSION_MOUNTAIN_LION;
 case '3': return MACOS_VERSION_MAVERICKS;
-default: return MACOS_VERSION_UNKNOWN;
+case '4': return MACOS_VERSION_YOSEMITE;
+default:
+  if (IsDigit(version[1]))
+return MACOS_VERSION_UNKNOWN_NEWER;
+  else
+return MACOS_VERSION_UNKNOWN;
   }
 }
 default: return MACOS_VERSION_UNKNOWN;
@@ -136,6 +141,23 @@ void LeakyResetEnv(const char *name, const char *name_value) {
   }
 }
 
+bool DyldNeedsEnvVariable() {
+// If running on OS X 10.11+ or iOS 9.0+, dyld will interpose even if
+// DYLD_INSERT_LIBRARIES is not set.
+
+#if SANITIZER_IOSSIM
+  // GetMacosVersion will not work for the simulator, whose kernel version
+  // is tied to the host. Use a weak linking hack for the simulator.
+  // This API was introduced in the same version of the OS as the dyld
+  // optimization.
+
+  // Check for presence of a symbol that is available on OS X 10.11+, iOS 9.0+.
+  return (dlsym(RTLD_NEXT, "mach_memory_info") == nullptr);
+#else
+  return (GetMacosVersion() <= MACOS_VERSION_YOSEMITE);
+#endif
+}
+
 void MaybeReexec() {
   if (!flags()->allow_reexec) return;
   // Make sure the dynamic ASan runtime library is preloaded so that the
@@ -148,8 +170,9 @@ void MaybeReexec() {
   uptr old_env_len = dyld_insert_libraries ?
   internal_strlen(dyld_insert_libraries) : 0;
   uptr fname_len = internal_strlen(info.dli_fname);
-  if (!dyld_insert_libraries ||
-  !REAL(strstr)(dyld_insert_libraries, info.dli_fname)) {
+  bool lib_is_in_env =
+  dyld_insert_libraries && REAL(strstr)(dyld_insert_libraries, info.dli_fname);
+  if (DyldNeedsEnvVariable() && !lib_is_in_env) {
 // DYLD_INSERT_LIBRARIES is not set or does not contain the runtime
 // library.
 char program_name[1024];
@@ -180,6 +203,10 @@ void MaybeReexec() {
 }
 execv(program_name, *_NSGetArgv());
   } else {
+
+if (!lib_is_in_env)
+  return;
+
 // DYLD_INSERT_LIBRARIES is set and contains the runtime library.
 if (old_env_len == fname_len) {
   // It's just the runtime library name - fine to unset the variable.
diff --git a/libsanitizer/asan/asan_mac.h b/libsanitizer/asan/asan_mac.h
index 2d1d4b0..efa6eb5 100644
--- a/libsanitizer/asan/asan_mac.h
+++ b/libsanitizer/asan/asan_mac.h
@@ -41,7 +41,9 @@ enum MacosVersion {
   MACOS_VERSION_SNOW_LEOPARD,
   MACOS_VERSION_LION,
   MACOS_VERSION_MOUNTAIN_LION,
-  MACOS_VERSION_MAVERICKS
+  MACOS_VERSION_MAVERICKS,
+  MACOS_VERSION_YOSEMITE,
+  MACOS_VERSION_UNKNOWN_NEWER
 };
 
 // Used by asan_malloc_mac.cc and asan_mac.cc


Re: [v3 PATCH] PR libstdc++/70437

2016-04-05 Thread Ville Voutilainen
On 5 April 2016 at 13:53, Jonathan Wakely  wrote:
> I wonder if we want an __is_samey trait that checks if two decayed
> types are the same.

If such checks become more common, then yes. For now, perhaps not.

> More seriously, a comment might be useful to explain that although
> these "concepts" return true for samey types, that is just to prevent
> is_constructible from getting into a mess with incomplete types, and
> actually for samey types one of the special member functions might end
> up being chosen by overload resolution instead.
>
> Did I get that right? If so, I definitely think it's worth a comment,
> as I for one won't remember the details in a few months!

How about the attached new patch? I just added a comment at the top of these
"concept utilities". In general, there's an unfortunate amount of such trickery
needed to get pair and tuple right as far as their constraints go, to protect
the innocent overloads from getting input that they can't cope with, as such
constraints are evaluated during overload resolution, and in some cases that
evaluation will be done even for overloads that will certainly not be
chosen, but
they have to be prepared for input that is hard to digest. That's one of the
reasons why 'if constexpr' will be a godsend, but I should not digress
there right now. :)


70437.diff2
Description: Binary data


Re: [RFC] introduce --param max-lto-partition for having an upper bound on partition size

2016-04-05 Thread Prathamesh Kulkarni
On 4 April 2016 at 19:44, Jan Hubicka  wrote:
>
>> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
>> index 9eb63c2..bc0c612 100644
>> --- a/gcc/lto/lto-partition.c
>> +++ b/gcc/lto/lto-partition.c
>> @@ -511,9 +511,20 @@ lto_balanced_map (int n_lto_partitions)
>>varpool_order.qsort (varpool_node_cmp);
>>
>>/* Compute partition size and create the first partition.  */
>> +  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE (MAX_PARTITION_SIZE))
>> +fatal_error (input_location, "min partition size cannot be greater than 
>> max partition size");
>> +
>>partition_size = total_size / n_lto_partitions;
>>if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
>>  partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
>> +  else if (partition_size > PARAM_VALUE (MAX_PARTITION_SIZE))
>> +{
>> +  n_lto_partitions = total_size / PARAM_VALUE (MAX_PARTITION_SIZE);
>> +  if (total_size % PARAM_VALUE (MAX_PARTITION_SIZE))
>> + n_lto_partitions++;
>> +  partition_size = total_size / n_lto_partitions;
>> +}
>
> lto_balanced_map actually works in a way that looks for cheapest cutpoint in 
> range
> 3/4*parittion_size to 2*partition_size and picks the cheapest range.
> Setting partition_size to this value will thus not cause partitioner to 
> produce smaller
> partitions only.  I suppose modify the conditional:
>
>   /* Partition is too large, unwind into step when best cost was reached 
> and
>  start new partition.  */
>   if (partition->insns > 2 * partition_size)
>
> and/or in the code above set the partition_size to half of 
> total_size/max_size.
>
> I know this is somewhat sloppy.  This was really just first cut implementation
> many years ago. I expected to reimplement it marter soon, but then there was
> never really a need for it (I am trying to avoid late IPA optimizations so the
> partitioning decisions should mostly affect compile time performance only).
> If ARM is more sensitive for partitining, perhaps it would make sense to try 
> to
> look for something smarter.
>
>> +
>>npartitions = 1;
>>partition = new_partition ("");
>>if (symtab->dump_file)
>> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
>> index 9dd513f..294b8a4 100644
>> --- a/gcc/lto/lto.c
>> +++ b/gcc/lto/lto.c
>> @@ -3112,6 +3112,12 @@ do_whole_program_analysis (void)
>>timevar_pop (TV_WHOPR_WPA);
>>
>>timevar_push (TV_WHOPR_PARTITIONING);
>> +
>> +  if (flag_lto_partition != LTO_PARTITION_BALANCED
>> +  && PARAM_VALUE (MAX_PARTITION_SIZE) != INT_MAX)
>> +fatal_error (input_location, "--param max-lto-partition should only"
>> +  " be used with balanced partitioning\n");
>> +
>
> I think we should wire in resonable MAX_PARTITION_SIZE default.  THe value you
> found experimentally may be a good start. For that reason we can't really
> refuse a value when !LTO_PARTITION_BALANCED.  Just document it as parameter 
> for
> balanced partitioning only and add a parameter to lto_balanced_map specifying 
> whether
> this param should be honored (because the same path is used for partitioning 
> to one partition)
>
> Otherwise the patch looks good to me modulo missing documentation.
Thanks for the review. I have updated the patch.
Does this version look OK ?
I had randomly chosen 1, not sure if that's an appropriate value
for default.

I have a silly question about partitioning: Does it hamper
transformations on ipa optimizations if caller and
callee get placed in separate partitions ? For instance if callee is
supposed to be inlined
into caller, would inlining still take place if callee and caller get
placed in separate partitions ?
I tried with a trivial example with -flto-partition=max
which created 3 partitions for 3 functions (bar, foo and main), and it was
able to inline bar into foo and foo into main.  I am not sure how that happens.
I thought ltrans can perform transformations on functions only within
a single partition
and not across partitions ?

Thanks,
Prathamesh
>
> Honza
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9e54bb7..f0de7ec 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9477,6 +9477,11 @@ Size of minimal partition for WHOPR (in estimated 
instructions).
 This prevents expenses of splitting very small programs into too many
 partitions.
 
+@item lto-max-partition
+Size of max partition for WHOPR (in estimated instructions).
+to provide an upper bound for individual size of partition.
+Meant to be used only with balanced partitioning.
+
 @item cxx-max-namespaces-for-diagnostic-help
 The maximum number of namespaces to consult for suggestions when C++
 name lookup fails for an identifier.  The default is 1000.
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..d385dd9 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -447,7 +447,7 @@ add_sorted_nodes (vec &next_nodes, 
ltrans_partition partition)
and in-partition calls was reached.  */
 
 vo

Re: [PATCH][GCC 4.9][PR sanitizer/70474] Fix ASan failures on new Darwin platforms (OS X 10.11+, iOS 9.0+).

2016-04-05 Thread Jakub Jelinek
On Tue, Apr 05, 2016 at 02:05:12PM +0300, Maxim Ostapenko wrote:
> this is adapted patch from
> https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00803.html that fixes exactly
> the same issue (ASan fails on newer Darwin platforms) for GCC 4.9.4.
> Regtested and bootstrapped on x86_64-unknown-linux-gnu by myself and on
> x86_64-apple-darwin15.2.0 by Dominique, OK for gcc-4_9-branch?

Ok.

> libsanitizer/ChangeLog:
> 
> 2016-04-05  Maxim Ostapenko  
> 
>   PR sanitizer/70474
>   * asan/asan_mac.cc (GetMacosVersionInternal): Cherry pick
>   upstream r241487, 221379 and r224315.
>   (void MaybeReexec): Cherry pick upstream r241487.
>   * asan/asan_mac.h (enum MacosVersion): Cherry pick upstream r224315.

Jakub


Re: [PATCH] Fix detection of setrlimit in libstdc++ testsuite

2016-04-05 Thread Jonathan Wakely

This patch fixes an obscure cross-testing problem that crashed (OOMed) our 
boards at Linaro.  Several tests in libstdc++ (e.g., [1]) limit themselves to 
some reasonable amount of RAM and then try to allocate 32 gigs.  Unfortunately, 
the configure test that checks presence of setrlimit is rather strange: if 
target is native, then try compile file with call to setrlimit -- if 
compilation succeeds, then use setrlimit, otherwise, ignore setrlimit.  The 
strange part is that the compilation check is done only for native targets, as 
if cross-toolchains can't generate working executables.  [This is rather odd, 
and I might be missing some underlaying caveat.]


I went spelunking, and the IS_NATIVE check has been there since
r70167, which replaced:

 if test  x"$GLIBCXX_IS_CROSS_COMPILING" = xfalse; then
   # Do checks for memory limit functions.
   GLIBCXX_CHECK_SETRLIMIT

That arrived in r68067, but that seems to eb just a refactoring, and I
got lost tracking it further.

So there has been a similar check since at least 2003.


Therefore, when testing a cross toolchain, the test [1] still tries to allocate 
32GB of RAM with no setrlimit restrictions.  On most targets that people use 
for cross-testing this is not an issue because either
- the target is 32-bit, so there is no 32GB user-space to speak of, or
- the target board has small amount of RAM and no swap, so allocation 
immediately fails, or
- the target board has plenty of RAM, so allocating 32GB is not an issue.

However, if one is testing on a 64-bit board with 16GB or RAM and 16GB of swap, 
then one gets into an obscure near-OOM swapping condition.  This is exactly the 
case with cross-testing aarch64-linux-gnu toolchains on APM Mustang.

The attached patch removes "native" restriction from configure test for 
setrlimit.  This enables setrlimit restrictions on the testsuite, and the test [1] 
expectedly fails to allocate 32GB due to setrlimit restriction.

I have tested it on x86_64-linux-gnu and i686-linux-gnu native toolchains, and 
aarch64-linux-gnu and arm-linux-gnueabi[hf] cross-toolchains with no 
regressions [*].

OK to commit?


This issue has been present for well over a decade so it doesn't seem
critical to fix in stage4, but as it only affects the testsuite I am
OK with the change if the RMs have no objections.



Re: [v3 PATCH] PR libstdc++/70437

2016-04-05 Thread Jonathan Wakely

On 05/04/16 14:07 +0300, Ville Voutilainen wrote:

On 5 April 2016 at 13:53, Jonathan Wakely  wrote:

I wonder if we want an __is_samey trait that checks if two decayed
types are the same.


If such checks become more common, then yes. For now, perhaps not.


We already do it in packaged_task, function, any, and optional.

I'll look into doing that in stage 1.


More seriously, a comment might be useful to explain that although
these "concepts" return true for samey types, that is just to prevent
is_constructible from getting into a mess with incomplete types, and
actually for samey types one of the special member functions might end
up being chosen by overload resolution instead.

Did I get that right? If so, I definitely think it's worth a comment,
as I for one won't remember the details in a few months!


How about the attached new patch?


Great - OK for trunk, thanks.


I just added a comment at the top of these
"concept utilities". In general, there's an unfortunate amount of such trickery
needed to get pair and tuple right as far as their constraints go, to protect
the innocent overloads from getting input that they can't cope with, as such
constraints are evaluated during overload resolution, and in some cases that
evaluation will be done even for overloads that will certainly not be
chosen, but
they have to be prepared for input that is hard to digest. That's one of the
reasons why 'if constexpr' will be a godsend, but I should not digress
there right now. :)


I'd sell a kidney for `if constexpr` right now ;-)



Re: [RFC] introduce --param max-lto-partition for having an upper bound on partition size

2016-04-05 Thread Richard Biener
On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:

> On 4 April 2016 at 19:44, Jan Hubicka  wrote:
> >
> >> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
> >> index 9eb63c2..bc0c612 100644
> >> --- a/gcc/lto/lto-partition.c
> >> +++ b/gcc/lto/lto-partition.c
> >> @@ -511,9 +511,20 @@ lto_balanced_map (int n_lto_partitions)
> >>varpool_order.qsort (varpool_node_cmp);
> >>
> >>/* Compute partition size and create the first partition.  */
> >> +  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE (MAX_PARTITION_SIZE))
> >> +fatal_error (input_location, "min partition size cannot be greater 
> >> than max partition size");
> >> +
> >>partition_size = total_size / n_lto_partitions;
> >>if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
> >>  partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
> >> +  else if (partition_size > PARAM_VALUE (MAX_PARTITION_SIZE))
> >> +{
> >> +  n_lto_partitions = total_size / PARAM_VALUE (MAX_PARTITION_SIZE);
> >> +  if (total_size % PARAM_VALUE (MAX_PARTITION_SIZE))
> >> + n_lto_partitions++;
> >> +  partition_size = total_size / n_lto_partitions;
> >> +}
> >
> > lto_balanced_map actually works in a way that looks for cheapest cutpoint 
> > in range
> > 3/4*parittion_size to 2*partition_size and picks the cheapest range.
> > Setting partition_size to this value will thus not cause partitioner to 
> > produce smaller
> > partitions only.  I suppose modify the conditional:
> >
> >   /* Partition is too large, unwind into step when best cost was 
> > reached and
> >  start new partition.  */
> >   if (partition->insns > 2 * partition_size)
> >
> > and/or in the code above set the partition_size to half of 
> > total_size/max_size.
> >
> > I know this is somewhat sloppy.  This was really just first cut 
> > implementation
> > many years ago. I expected to reimplement it marter soon, but then there was
> > never really a need for it (I am trying to avoid late IPA optimizations so 
> > the
> > partitioning decisions should mostly affect compile time performance only).
> > If ARM is more sensitive for partitining, perhaps it would make sense to 
> > try to
> > look for something smarter.
> >
> >> +
> >>npartitions = 1;
> >>partition = new_partition ("");
> >>if (symtab->dump_file)
> >> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
> >> index 9dd513f..294b8a4 100644
> >> --- a/gcc/lto/lto.c
> >> +++ b/gcc/lto/lto.c
> >> @@ -3112,6 +3112,12 @@ do_whole_program_analysis (void)
> >>timevar_pop (TV_WHOPR_WPA);
> >>
> >>timevar_push (TV_WHOPR_PARTITIONING);
> >> +
> >> +  if (flag_lto_partition != LTO_PARTITION_BALANCED
> >> +  && PARAM_VALUE (MAX_PARTITION_SIZE) != INT_MAX)
> >> +fatal_error (input_location, "--param max-lto-partition should only"
> >> +  " be used with balanced partitioning\n");
> >> +
> >
> > I think we should wire in resonable MAX_PARTITION_SIZE default.  THe value 
> > you
> > found experimentally may be a good start. For that reason we can't really
> > refuse a value when !LTO_PARTITION_BALANCED.  Just document it as parameter 
> > for
> > balanced partitioning only and add a parameter to lto_balanced_map 
> > specifying whether
> > this param should be honored (because the same path is used for 
> > partitioning to one partition)
> >
> > Otherwise the patch looks good to me modulo missing documentation.
> Thanks for the review. I have updated the patch.
> Does this version look OK ?
> I had randomly chosen 1, not sure if that's an appropriate value
> for default.

I think it's way too small.  This is roughly the number of GIMPLE stmts
(thus roughly the number of instructions).  So with say a 8 byte
instruction format it is on the order of 80kB.  You'd want to have a
default of at least several ten times of large-unit-insns (also 1).
I'd choose sth like 100 (one million).  I find the lto-min-partition
number quite small as well (and up it by a factor of 10).

Richard.

> I have a silly question about partitioning: Does it hamper
> transformations on ipa optimizations if caller and
> callee get placed in separate partitions ? For instance if callee is
> supposed to be inlined
> into caller, would inlining still take place if callee and caller get
> placed in separate partitions ?
> I tried with a trivial example with -flto-partition=max
> which created 3 partitions for 3 functions (bar, foo and main), and it was
> able to inline bar into foo and foo into main.  I am not sure how that 
> happens.
> I thought ltrans can perform transformations on functions only within
> a single partition
> and not across partitions ?
> 
> Thanks,
> Prathamesh
> >
> > Honza
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


[PATCH] Fix PR70526

2016-04-05 Thread Richard Biener

SRA uses bogus alias sets for access replacements if a alias-punning
MEM_REF is based on a decl.  The following patch fixes this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

The testcase is a GCC 6 regression but the issue is older.

Richard.

2016-04-05  Richard Biener  

PR tree-optimization/70526
* tree-sra.c (build_ref_for_offset): Use prev_base to
extract the alias pointer type.

* g++.dg/torture/pr70526.C: New testcase.

Index: gcc/tree-sra.c
===
*** gcc/tree-sra.c  (revision 234736)
--- gcc/tree-sra.c  (working copy)
*** build_ref_for_offset (location_t loc, tr
*** 1673,1679 
  }
else
  {
!   off = build_int_cst (reference_alias_ptr_type (base),
   base_offset + offset / BITS_PER_UNIT);
base = build_fold_addr_expr (unshare_expr (base));
  }
--- 1673,1679 
  }
else
  {
!   off = build_int_cst (reference_alias_ptr_type (prev_base),
   base_offset + offset / BITS_PER_UNIT);
base = build_fold_addr_expr (unshare_expr (base));
  }
Index: gcc/testsuite/g++.dg/torture/pr70526.C
===
*** gcc/testsuite/g++.dg/torture/pr70526.C  (revision 0)
--- gcc/testsuite/g++.dg/torture/pr70526.C  (working copy)
***
*** 0 
--- 1,35 
+ // { dg-do run }
+ 
+ typedef unsigned uint32_t;
+ 
+ template
+ struct AlignedStorage2
+ {
+   char mBytes[sizeof(T)];
+ 
+   const T* addr() const { return reinterpret_cast(mBytes); }
+   T* addr() { return reinterpret_cast(mBytes); }
+ };
+ 
+ struct Register {
+ uint32_t reg_;
+ };
+ 
+ class TypedOrValueRegister
+ {
+   AlignedStorage2 typed;
+   __attribute__((noinline)) Register& dataTyped() { return *typed.addr(); }
+ public:
+   TypedOrValueRegister(Register reg)
+ {
+   dataTyped() = reg;
+ }
+   Register typedReg() const { return *typed.addr(); }
+ };
+ 
+ int main() {
+ Register reg = { 10u };
+ if (TypedOrValueRegister(reg).typedReg().reg_ != 10)
+   __builtin_abort();
+ return 0;
+ }


Re: [RFC] introduce --param max-lto-partition for having an upper bound on partition size

2016-04-05 Thread Prathamesh Kulkarni
On 5 April 2016 at 16:58, Richard Biener  wrote:
> On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:
>
>> On 4 April 2016 at 19:44, Jan Hubicka  wrote:
>> >
>> >> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
>> >> index 9eb63c2..bc0c612 100644
>> >> --- a/gcc/lto/lto-partition.c
>> >> +++ b/gcc/lto/lto-partition.c
>> >> @@ -511,9 +511,20 @@ lto_balanced_map (int n_lto_partitions)
>> >>varpool_order.qsort (varpool_node_cmp);
>> >>
>> >>/* Compute partition size and create the first partition.  */
>> >> +  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE 
>> >> (MAX_PARTITION_SIZE))
>> >> +fatal_error (input_location, "min partition size cannot be greater 
>> >> than max partition size");
>> >> +
>> >>partition_size = total_size / n_lto_partitions;
>> >>if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
>> >>  partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
>> >> +  else if (partition_size > PARAM_VALUE (MAX_PARTITION_SIZE))
>> >> +{
>> >> +  n_lto_partitions = total_size / PARAM_VALUE (MAX_PARTITION_SIZE);
>> >> +  if (total_size % PARAM_VALUE (MAX_PARTITION_SIZE))
>> >> + n_lto_partitions++;
>> >> +  partition_size = total_size / n_lto_partitions;
>> >> +}
>> >
>> > lto_balanced_map actually works in a way that looks for cheapest cutpoint 
>> > in range
>> > 3/4*parittion_size to 2*partition_size and picks the cheapest range.
>> > Setting partition_size to this value will thus not cause partitioner to 
>> > produce smaller
>> > partitions only.  I suppose modify the conditional:
>> >
>> >   /* Partition is too large, unwind into step when best cost was 
>> > reached and
>> >  start new partition.  */
>> >   if (partition->insns > 2 * partition_size)
>> >
>> > and/or in the code above set the partition_size to half of 
>> > total_size/max_size.
>> >
>> > I know this is somewhat sloppy.  This was really just first cut 
>> > implementation
>> > many years ago. I expected to reimplement it marter soon, but then there 
>> > was
>> > never really a need for it (I am trying to avoid late IPA optimizations so 
>> > the
>> > partitioning decisions should mostly affect compile time performance only).
>> > If ARM is more sensitive for partitining, perhaps it would make sense to 
>> > try to
>> > look for something smarter.
>> >
>> >> +
>> >>npartitions = 1;
>> >>partition = new_partition ("");
>> >>if (symtab->dump_file)
>> >> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
>> >> index 9dd513f..294b8a4 100644
>> >> --- a/gcc/lto/lto.c
>> >> +++ b/gcc/lto/lto.c
>> >> @@ -3112,6 +3112,12 @@ do_whole_program_analysis (void)
>> >>timevar_pop (TV_WHOPR_WPA);
>> >>
>> >>timevar_push (TV_WHOPR_PARTITIONING);
>> >> +
>> >> +  if (flag_lto_partition != LTO_PARTITION_BALANCED
>> >> +  && PARAM_VALUE (MAX_PARTITION_SIZE) != INT_MAX)
>> >> +fatal_error (input_location, "--param max-lto-partition should only"
>> >> +  " be used with balanced partitioning\n");
>> >> +
>> >
>> > I think we should wire in resonable MAX_PARTITION_SIZE default.  THe value 
>> > you
>> > found experimentally may be a good start. For that reason we can't really
>> > refuse a value when !LTO_PARTITION_BALANCED.  Just document it as 
>> > parameter for
>> > balanced partitioning only and add a parameter to lto_balanced_map 
>> > specifying whether
>> > this param should be honored (because the same path is used for 
>> > partitioning to one partition)
>> >
>> > Otherwise the patch looks good to me modulo missing documentation.
>> Thanks for the review. I have updated the patch.
>> Does this version look OK ?
>> I had randomly chosen 1, not sure if that's an appropriate value
>> for default.
>
> I think it's way too small.  This is roughly the number of GIMPLE stmts
> (thus roughly the number of instructions).  So with say a 8 byte
> instruction format it is on the order of 80kB.  You'd want to have a
> default of at least several ten times of large-unit-insns (also 1).
> I'd choose sth like 100 (one million).  I find the lto-min-partition
> number quite small as well (and up it by a factor of 10).
Done in this version.
Is it OK after bootstrap+test ?

Thanks,
Prathamesh
>
> Richard.
>
>> I have a silly question about partitioning: Does it hamper
>> transformations on ipa optimizations if caller and
>> callee get placed in separate partitions ? For instance if callee is
>> supposed to be inlined
>> into caller, would inlining still take place if callee and caller get
>> placed in separate partitions ?
>> I tried with a trivial example with -flto-partition=max
>> which created 3 partitions for 3 functions (bar, foo and main), and it was
>> able to inline bar into foo and foo into main.  I am not sure how that 
>> happens.
>> I thought ltrans can perform transformations on functions only within
>> a single partition
>> and not across partitions ?
>>
>> Thanks,
>> Prathamesh
>> >
>> > Honza
>>
>
> --
> Richard Biener 
> SUSE L

Re: [RFC] introduce --param max-lto-partition for having an upper bound on partition size

2016-04-05 Thread Richard Biener
On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:

> On 5 April 2016 at 16:58, Richard Biener  wrote:
> > On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:
> >
> >> On 4 April 2016 at 19:44, Jan Hubicka  wrote:
> >> >
> >> >> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
> >> >> index 9eb63c2..bc0c612 100644
> >> >> --- a/gcc/lto/lto-partition.c
> >> >> +++ b/gcc/lto/lto-partition.c
> >> >> @@ -511,9 +511,20 @@ lto_balanced_map (int n_lto_partitions)
> >> >>varpool_order.qsort (varpool_node_cmp);
> >> >>
> >> >>/* Compute partition size and create the first partition.  */
> >> >> +  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE 
> >> >> (MAX_PARTITION_SIZE))
> >> >> +fatal_error (input_location, "min partition size cannot be greater 
> >> >> than max partition size");
> >> >> +
> >> >>partition_size = total_size / n_lto_partitions;
> >> >>if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
> >> >>  partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
> >> >> +  else if (partition_size > PARAM_VALUE (MAX_PARTITION_SIZE))
> >> >> +{
> >> >> +  n_lto_partitions = total_size / PARAM_VALUE (MAX_PARTITION_SIZE);
> >> >> +  if (total_size % PARAM_VALUE (MAX_PARTITION_SIZE))
> >> >> + n_lto_partitions++;
> >> >> +  partition_size = total_size / n_lto_partitions;
> >> >> +}
> >> >
> >> > lto_balanced_map actually works in a way that looks for cheapest 
> >> > cutpoint in range
> >> > 3/4*parittion_size to 2*partition_size and picks the cheapest range.
> >> > Setting partition_size to this value will thus not cause partitioner to 
> >> > produce smaller
> >> > partitions only.  I suppose modify the conditional:
> >> >
> >> >   /* Partition is too large, unwind into step when best cost was 
> >> > reached and
> >> >  start new partition.  */
> >> >   if (partition->insns > 2 * partition_size)
> >> >
> >> > and/or in the code above set the partition_size to half of 
> >> > total_size/max_size.
> >> >
> >> > I know this is somewhat sloppy.  This was really just first cut 
> >> > implementation
> >> > many years ago. I expected to reimplement it marter soon, but then there 
> >> > was
> >> > never really a need for it (I am trying to avoid late IPA optimizations 
> >> > so the
> >> > partitioning decisions should mostly affect compile time performance 
> >> > only).
> >> > If ARM is more sensitive for partitining, perhaps it would make sense to 
> >> > try to
> >> > look for something smarter.
> >> >
> >> >> +
> >> >>npartitions = 1;
> >> >>partition = new_partition ("");
> >> >>if (symtab->dump_file)
> >> >> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
> >> >> index 9dd513f..294b8a4 100644
> >> >> --- a/gcc/lto/lto.c
> >> >> +++ b/gcc/lto/lto.c
> >> >> @@ -3112,6 +3112,12 @@ do_whole_program_analysis (void)
> >> >>timevar_pop (TV_WHOPR_WPA);
> >> >>
> >> >>timevar_push (TV_WHOPR_PARTITIONING);
> >> >> +
> >> >> +  if (flag_lto_partition != LTO_PARTITION_BALANCED
> >> >> +  && PARAM_VALUE (MAX_PARTITION_SIZE) != INT_MAX)
> >> >> +fatal_error (input_location, "--param max-lto-partition should 
> >> >> only"
> >> >> +  " be used with balanced partitioning\n");
> >> >> +
> >> >
> >> > I think we should wire in resonable MAX_PARTITION_SIZE default.  THe 
> >> > value you
> >> > found experimentally may be a good start. For that reason we can't really
> >> > refuse a value when !LTO_PARTITION_BALANCED.  Just document it as 
> >> > parameter for
> >> > balanced partitioning only and add a parameter to lto_balanced_map 
> >> > specifying whether
> >> > this param should be honored (because the same path is used for 
> >> > partitioning to one partition)
> >> >
> >> > Otherwise the patch looks good to me modulo missing documentation.
> >> Thanks for the review. I have updated the patch.
> >> Does this version look OK ?
> >> I had randomly chosen 1, not sure if that's an appropriate value
> >> for default.
> >
> > I think it's way too small.  This is roughly the number of GIMPLE stmts
> > (thus roughly the number of instructions).  So with say a 8 byte
> > instruction format it is on the order of 80kB.  You'd want to have a
> > default of at least several ten times of large-unit-insns (also 1).
> > I'd choose sth like 100 (one million).  I find the lto-min-partition
> > number quite small as well (and up it by a factor of 10).
> Done in this version.

I'd do that separately.

Please no default parameter for lto_balanced_map (), instead change
all callers.

> Is it OK after bootstrap+test ?

Note that this is for stage1 only.  I'll leave approval to Honza
(also verification of the default max param - not sure if for example
chromium or firefox should/will be split to more than 32 partitions
with the patch)

Richard.

> Thanks,
> Prathamesh
> >
> > Richard.
> >
> >> I have a silly question about partitioning: Does it hamper
> >> transformations on ipa optimizations if caller and
> >> callee get placed 

Re: [wwwdocs] [1/3] projects/cxx-status.html -- introduce global CSS for tables

2016-04-05 Thread Jason Merrill

Thanks a lot!  One thing:

On 04/02/2016 06:19 AM, Gerald Pfeifer wrote:

@@ -390,6 +389,7 @@
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2118.html";>N2118
GCC 
4.3
__cpp_rvalue_references >= 200610
+  
  
  
Rvalue references for 
*this


This doesn't seem to have been needed; now when I look at the page I see 
an extra tiny cell at the end of this row.


Jason


Re: [PATCH] Fix PR c++/70452 (regression in C++ parsing performance)

2016-04-05 Thread Jason Merrill

OK, thanks.

Jason


[patch] libstdc++/70503 Ensure std::thread helpers have internal linkage

2016-04-05 Thread Jonathan Wakely

This makes the execute_native_thread_routine* helpers 'static' rather
than using an unnamed namespace, because the latter doesn't give them
static linkage due to them being extern "C".

Tested x86_64-linux, committed to trunk. Backports to gcc-5 and
gcc-4.9 to follow soon.

commit 84538059d9d8608c550e11627b28c5ff34fb2edd
Author: Jonathan Wakely 
Date:   Tue Apr 5 14:17:57 2016 +0100

Ensure std::thread helpers have internal linkage

	PR libstdc++/70503
	* src/c++11/thread.cc (execute_native_thread_routine,
	execute_native_thread_routine_compat): Give internal linkage.
	* testsuite/30_threads/thread/70503.cc: New test.

diff --git a/libstdc++-v3/src/c++11/thread.cc b/libstdc++-v3/src/c++11/thread.cc
index d2a7dc3..a027fcf 100644
--- a/libstdc++-v3/src/c++11/thread.cc
+++ b/libstdc++-v3/src/c++11/thread.cc
@@ -71,9 +71,9 @@ static inline int get_nprocs()
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
-  namespace
+  extern "C"
   {
-extern "C" void*
+static void*
 execute_native_thread_routine(void* __p)
 {
   thread::_State_ptr __t{ static_cast(__p) };
@@ -95,7 +95,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 }
 
 #if _GLIBCXX_THREAD_ABI_COMPAT
-extern "C" void*
+static void*
 execute_native_thread_routine_compat(void* __p)
 {
   thread::_Impl_base* __t = static_cast(__p);
@@ -121,7 +121,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
   return nullptr;
 }
 #endif
-  }
+  } // extern "C"
 
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
diff --git a/libstdc++-v3/testsuite/30_threads/thread/70503.cc b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
new file mode 100644
index 000..1763ac2
--- /dev/null
+++ b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
@@ -0,0 +1,33 @@
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do link }
+// { dg-options " -std=gnu++11 -static" { target *-*-*gnu* } }
+// { dg-require-cstdint "" }
+// { dg-require-gthreads "" }
+
+extern "C" {
+  void execute_native_thread_routine(void);
+  void execute_native_thread_routine_compat(void);
+}
+
+int main()
+{
+  execute_native_thread_routine(); // { dg-error "undefined reference" }
+  execute_native_thread_routine_compat(); // { dg-error "undefined reference" }
+}
+// { dg-prune-output "collect2: error: ld returned" }


Re: [wwwdocs] [1/3] projects/cxx-status.html -- introduce global CSS for tables

2016-04-05 Thread Gerald Pfeifer
On Tue, 5 Apr 2016, Jason Merrill wrote:
>> href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2118.html";>N2118
>> GCC 
>> 4.3
>> __cpp_rvalue_references >= 200610
>> +  
>>   
> 
> This doesn't seem to have been needed; now when I look at the page I see 
> an extra tiny cell at the end of this row.

You are right, Jason.  There was one instance, where I had to add
an empty cell.  This was not it, and must have sneaked in somehow.

Good catch! Fixed thusly.

Gerald

Index: projects/cxx-status.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/projects/cxx-status.html,v
retrieving revision 1.7
diff -u -r1.7 cxx-status.html
--- projects/cxx-status.html4 Apr 2016 04:50:05 -   1.7
+++ projects/cxx-status.html5 Apr 2016 15:35:57 -
@@ -380,7 +380,6 @@
   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2118.html";>N2118
   GCC 4.3
   __cpp_rvalue_references >= 200610
-  
 
 
   Rvalue references for *this


Re: [PING][PATCH] Remove incorrect warning for parallel firstprivate clause

2016-04-05 Thread Jakub Jelinek
On Tue, Apr 05, 2016 at 12:17:16PM +0200, Tom de Vries wrote:
> On 24/03/16 18:02, Tom de Vries wrote:
> >Remove incorrect warning for parallel firstprivate clause
> >
> >2016-03-24  Tom de Vries  
> >
> > * omp-low.c (lower_omp_target): Set TREE_NO_WARNING for oacc
> > firstprivate clause.
> >
> > * c-c++-common/goacc/uninit-firstprivate-clause.c: New test.
> > * gfortran.dg/goacc/uninit-firstprivate-clause.f95: New test.
> >
> >---
> >  gcc/omp-low.c  |  5 -
> >  .../goacc/uninit-firstprivate-clause.c | 25 
> > ++
> >  .../goacc/uninit-firstprivate-clause.f95   | 18 
> >  3 files changed, 47 insertions(+), 1 deletion(-)
> >
> >diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> >index d107961..41eb3c8 100644
> >--- a/gcc/omp-low.c
> >+++ b/gcc/omp-low.c
> >@@ -16068,7 +16068,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
> >omp_context *ctx)
> >   {
> > gcc_assert (is_gimple_omp_oacc (ctx->stmt));
> > if (!is_reference (var))
> >-  var = build_fold_addr_expr (var);
> >+  {
> >+TREE_NO_WARNING (var) = 1;
> >+var = build_fold_addr_expr (var);
> >+  }

IMHO it should be done only if var is is_gimple_reg (var), otherwise all
that happens on the caller side is that you take the address of the actual
variable.  Also, I think it would be better to do this only
for implicit firstprivate (and map) clauses, if somebody uses explicit
firstprivate on a var, I think it is better to warn if the var is
uninitialized, the user can then use private clause instead.

BTW, some undesirable warnings are also on OpenMP code (I'm adding
TREE_NO_WARNING already in case of shared clause), I've filed PR70550
to track this and will attach there a patch soon.

Jakub


[wwwdocs] [5/3] projects/cxx-status.html -- introduce global CSS for tables

2016-04-05 Thread Gerald Pfeifer
Somehow four cases of inline styles had remained, which did not
actually make a difference anymore, but still can be simplified.

Applied.

Gerald

Remove four final, and superflous cases of style="text-align:center;".

Index: projects/cxx-status.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/projects/cxx-status.html,v
retrieving revision 1.7
diff -u -r1.7 cxx-status.html
--- projects/cxx-status.html4 Apr 2016 04:50:05 -   1.7
+++ projects/cxx-status.html5 Apr 2016 15:34:45 -
@@ -174,14 +174,7 @@
 is experimental. No attempt will be made to maintain
 backward compatibility with implementations of features that do not reflect
 the final standard.
-
+
   
 
   Technical Specification
@@ -193,14 +186,14 @@
 
   Concepts
   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4377.pdf";>N4377
-   6 
+  6
   -fconcepts
__cpp_concepts >= 201507 
 
 
   Transactional Memory
   http://www.open-std.org/Jtc1/sc22/wg21/docs/papers/2015/n4514.pdf";>N4514
-   6 (no 
atomic_cancel)
+  6 (no atomic_cancel)
   -fgnu-tm
__cpp_transactional_memory >= 201505 
 
@@ -670,7 +662,7 @@
 
 
 
-  Concurrency
+  Concurrency
 
 
 
@@ -742,7 +734,7 @@
 
 
 
-  C99 Features in C++11
+  C99 Features in C++11
 
 
 


[PATCH, cpp] Fix pr61817 and 69391

2016-04-05 Thread Richard Henderson
These two related PRs are all about remembering where a macro is expanded. 
Worse, we've got two competing goals -- the real location of the expansion, for 
__LINE__, and the virtual location of the expansion, for diagnostics.


There seems to be no way to unify the two competing goals.  If we simply "fix" 
the first, we break the second.  Therefore, I resort to passing down both 
locations.


Ok?


r~
* internal.h (_cpp_builtin_macro_text): Update decl.
* macro.c (_cpp_builtin_macro_text): Accept location for __LINE__.
(builtin_macro): Accept a second location for __LINE__.
(enter_macro_context): Compute both virtual and real expansion
locations for the macro.

* gcc.dg/pr61817.c: New test.
* gcc.dg/pr69391-1.c: New test.
* gcc.dg/pr69391-2.c: New test.


diff --git a/gcc/testsuite/gcc.dg/pr61817.c b/gcc/testsuite/gcc.dg/pr61817.c
new file mode 100644
index 000..4230485
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr61817.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c11 -ftrack-macro-expansion=0" } */
+
+#define A(x) _Static_assert(x, #x)
+#define F(x, y, z) a = __LINE__, b = x ## y, c = z
+
+enum {
+#line 10
+F
+ (
+  __LI,
+  NE__,
+  __LINE__
+  )
+};
+
+A(a == 15);
+A(b == 15);
+A(c == 15);
diff --git a/gcc/testsuite/gcc.dg/pr69391-1.c b/gcc/testsuite/gcc.dg/pr69391-1.c
new file mode 100644
index 000..15e49dc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr69391-1.c
@@ -0,0 +1,12 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftrack-macro-expansion=0" } */
+#define STR_I(X) #X
+#define STR(X) STR_I(X)
+#define LINE STR(__LINE__) STR(__LINE__)
+int main()
+{
+  const char *s = LINE;
+  if (s[0] != '8' || s[1] != '8')
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/pr69391-2.c b/gcc/testsuite/gcc.dg/pr69391-2.c
new file mode 100644
index 000..7d2faae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr69391-2.c
@@ -0,0 +1,12 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftrack-macro-expansion=1" } */
+#define STR_I(X) #X
+#define STR(X) STR_I(X)
+#define LINE STR(__LINE__) STR(__LINE__)
+int main()
+{
+  const char *s = LINE;
+  if (s[0] != '8' || s[1] != '8')
+__builtin_abort ();
+  return 0;
+}
diff --git a/libcpp/internal.h b/libcpp/internal.h
index bafd480..9ce8707 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -626,7 +626,8 @@ extern bool _cpp_save_parameter (cpp_reader *, cpp_macro *, 
cpp_hashnode *,
 extern bool _cpp_arguments_ok (cpp_reader *, cpp_macro *, const cpp_hashnode *,
   unsigned int);
 extern const unsigned char *_cpp_builtin_macro_text (cpp_reader *,
-cpp_hashnode *);
+cpp_hashnode *,
+source_location = 0);
 extern int _cpp_warn_if_unused_macro (cpp_reader *, cpp_hashnode *, void *);
 extern void _cpp_push_token_context (cpp_reader *, cpp_hashnode *,
 const cpp_token *, unsigned int);
diff --git a/libcpp/macro.c b/libcpp/macro.c
index 759fbe7..c251553 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -93,7 +93,8 @@ struct macro_arg_saved_data {
 
 static int enter_macro_context (cpp_reader *, cpp_hashnode *,
const cpp_token *, source_location);
-static int builtin_macro (cpp_reader *, cpp_hashnode *, source_location);
+static int builtin_macro (cpp_reader *, cpp_hashnode *,
+ source_location, source_location);
 static void push_ptoken_context (cpp_reader *, cpp_hashnode *, _cpp_buff *,
 const cpp_token **, unsigned int);
 static void push_extended_tokens_context (cpp_reader *, cpp_hashnode *,
@@ -229,7 +230,8 @@ static const char * const monthnames[] =
 /* Helper function for builtin_macro.  Returns the text generated by
a builtin macro. */
 const uchar *
-_cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node)
+_cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node,
+source_location loc)
 {
   const uchar *result = NULL;
   linenum_type number = 1;
@@ -319,11 +321,14 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode 
*node)
 case BT_SPECLINE:
   /* If __LINE__ is embedded in a macro, it must expand to the
 line of the macro's invocation, not its definition.
-Otherwise things like assert() will not work properly.  */
-  number = linemap_get_expansion_line (pfile->line_table,
-  CPP_OPTION (pfile, traditional)
-  ? pfile->line_table->highest_line
-  : pfile->cur_token[-1].src_loc);
+Otherwise things like assert() will not work properly.
+See WG14 N1911, WG21 N4220 sec 6.5, and PR 61861.  */
+  if (CPP_OP

Re: [wwwdocs] [1/3] projects/cxx-status.html -- introduce global CSS for tables

2016-04-05 Thread Gerald Pfeifer
On Sat, 2 Apr 2016, Gerald Pfeifer wrote:
> Index: gcc.css
> ===
:
> +/* C++ status tables. */
> +table.cxxstatus th, td { border: 1px solid gray; }

Intuition is not always a good guide.  The above applied this
style to all  elements.  Fixed thusly.

Gerald

Index: gcc.css
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc.css,v
retrieving revision 1.37
diff -u -r1.37 gcc.css
--- gcc.css 4 Apr 2016 04:50:04 -   1.37
+++ gcc.css 5 Apr 2016 16:18:29 -
@@ -65,7 +65,7 @@
 blockquote.mail div:nth-child(2) { border-left: solid blue; padding-left: 4pt; 
}
 
 /* C++ status tables. */
-table.cxxstatus th, td { border: 1px solid gray; }
+table.cxxstatus th, table.cxxstatus td { border: 1px solid gray; }
 table.cxxstatus td:nth-child(3) { text-align:center; }
 table.cxxstatus tr.separator { background: #f2f2f9; }
 


[PATCH, i386]: Fix PR70510, invalid %-code with -mavx512bw -masm=intel when emitting vpbroatcast

2016-04-05 Thread Uros Bizjak
2016-04-05  Uros Bizjak  

PR target/70510
* config/i386/sse.md (iptr): Add V64QI, V32HI, V16SI and V8DI modes.

testsuite/ChangeLog:

2016-04-05  Uros Bizjak  

PR target/70510
* gcc.target/i386/pr70510.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Committed to mainline, will be backported to release branches.

Uros.
Index: config/i386/sse.md
===
--- config/i386/sse.md  (revision 234749)
+++ config/i386/sse.md  (working copy)
@@ -705,7 +705,8 @@
 
 ;; Pointer size override for scalar modes (Intel asm dialect)
 (define_mode_attr iptr
-  [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
+  [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
+   (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
(V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
(V8SF "k") (V4DF "q")
(V4SF "k") (V2DF "q")
Index: testsuite/gcc.target/i386/pr70510.c
===
--- testsuite/gcc.target/i386/pr70510.c (nonexistent)
+++ testsuite/gcc.target/i386/pr70510.c (working copy)
@@ -0,0 +1,14 @@
+/* PR target/70510 */
+/* { dg-do assemble { target avx512bw } } */
+/* { dg-require-effective-target masm_intel } */
+/* { dg-options "-Og -mavx512bw -masm=intel" } */
+
+typedef int V __attribute__ ((vector_size (64)));
+
+V
+foo (V u, V v)
+{
+  v[0] |= v[u[0]];
+  u /= ((V)v)[0];
+  return u;
+}


[PATCH] Limit some match.pd conversion folding to GIMPLE (PR c++/70336)

2016-04-05 Thread Jakub Jelinek
Hi!

As the testcase shows, the folding which previously has been done
only in tree-ssa-forwprop.c breaks some cases with explicit casts
in -Wconversion, so this patch limits that folding to GIMPLE only.
Unfortunately that breaks a few cases in builtin-isinf_sign-1.c
test at -O0, but IMNSHO expecting all the folding at -O0 is wrong,
outside of constexpr and initializers we shouldn't guarantee any
folding.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-04-05  Jakub Jelinek  

PR c++/70336
* match.pd (nested int casts): Limit to GIMPLE.

* c-c++-common/pr70336.c: New test.
* gcc.dg/torture/builtin-isinf_sign-1.c (foo): Guard tests
no longer optimized away at -O0 with #ifndef __OPTIMIZE__.

--- gcc/match.pd.jj 2016-03-23 19:25:56.0 +0100
+++ gcc/match.pd2016-04-04 12:39:32.535498926 +0200
@@ -1554,7 +1554,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* A truncation to an unsigned type (a zero-extension) should be
canonicalized as bitwise and of a mask.  */
-(if (final_int && inter_int && inside_int
+(if (GIMPLE /* PR70366: doing this in GENERIC breaks -Wconversion.  */
+&& final_int && inter_int && inside_int
 && final_prec == inside_prec
 && final_prec > inter_prec
 && inter_unsignedp)
--- gcc/testsuite/c-c++-common/pr70336.c.jj 2016-04-04 12:42:48.056806268 
+0200
+++ gcc/testsuite/c-c++-common/pr70336.c2016-04-04 12:44:19.343549096 
+0200
@@ -0,0 +1,37 @@
+/* PR c++/70336 */
+/* { dg-do compile } */
+/* { dg-options "-Wconversion" } */
+
+void
+f1 (unsigned char * x, int y, int z)
+{
+  x[z / 8] |= (unsigned char) (0x80 >> y); /* { dg-bogus "may alter its 
value" } */
+}
+
+unsigned char
+f2 (unsigned char x, int y)
+{
+  x = x | (unsigned char) (0x80 >> y); /* { dg-bogus "may alter its 
value" } */
+  return x;
+}
+
+unsigned char
+f3 (unsigned char x, int y)
+{
+  x = x | (unsigned char) (y & 255);   /* { dg-bogus "may alter its 
value" } */
+  return x;
+}
+
+unsigned char
+f4 (unsigned char x, unsigned char y)
+{
+  x = x | (unsigned char) (y & 255);   /* { dg-bogus "may alter its 
value" } */
+  return x;
+}
+
+unsigned char
+f5 (unsigned char x, int y)
+{
+  x = (unsigned char) (y & 255);   /* { dg-bogus "may alter its 
value" } */
+  return x;
+}
--- gcc/testsuite/gcc.dg/torture/builtin-isinf_sign-1.c.jj  2008-09-05 
12:54:28.0 +0200
+++ gcc/testsuite/gcc.dg/torture/builtin-isinf_sign-1.c 2016-04-05 
16:13:40.649320108 +0200
@@ -24,6 +24,7 @@ foo (float f, double d, long double ld)
   != (__builtin_isinf(ld) ? (__builtin_signbitl(ld) ? -1 : 1) : 0))
 link_error (__LINE__);
 
+#ifdef __OPTIMIZE__
   /* In boolean contexts, GCC will fold the inner conditional
  expression to 1.  So isinf_sign folds to plain isinf.  */
 
@@ -33,6 +34,7 @@ foo (float f, double d, long double ld)
 link_error (__LINE__);
   if ((_Bool)__builtin_isinf_sign(ld) != (__builtin_isinf(ld) != 0))
 link_error (__LINE__);
+#endif
 
   if ((__builtin_isinf_sign(f) != 0) != (__builtin_isinf(f) != 0))
 link_error (__LINE__);

Jakub


[PATCH] Fix REE with vector modes (PR rtl-optimization/70542)

2016-04-05 Thread Jakub Jelinek
Hi!

As mentioned in PR64286 already, unlike integral modes, vector modes
don't have the property that lowpart subreg of sign/zero extended
value contains the original non-extended bits, so if we touch some
definition, we really have to change all uses.  Except that we really
don't have infrastructure/code to make sure we change either none or
all within a single transaction, and there is lots of reasons why it could
fail, more with copy_needed cases as in the testcase below, where we have:
  (set (reg:V8HI xmm6) (reg:V8HI xmm10))
  (set (reg:V8SI xmm3) (sign_extend:V8SI (reg:V8HI xmm6)))
...
// in another bb later on
  (set (reg:V8SI ...) (sign_extend:V8SI (reg:V8HI xmm6)))
(why CSE has not managed to optimize this is strange, something to look
for GCC7).  Without this patch we optimize the first two insns into:
  (set (reg:V8SI xmm3) (sign_extend:V8HI xmm10))
  (set (reg:V8SI xmm6) (reg:V8SI xmm3))
but then fail to tweak the last insn, because the definition has been
already modified and it is another copy_needed case.

So, IMHO without big changes we really can't guarantee all or nothing,
and thus this patch changes the PR64286 fix to be more conservative,
verify the def is used just in a single non-debug insn, which we can then
safely modify.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-04-05  Jakub Jelinek  

PR rtl-optimization/70542
* ree.c (add_removable_extension): For VECTOR_MODE_P punt
if there are any uses other than insn or debug insns.

* gcc.dg/torture/pr70542.c: New test.
* gcc.target/i386/avx2-pr70542.c: New test.

--- gcc/ree.c.jj2016-01-04 14:55:54.0 +0100
+++ gcc/ree.c   2016-04-05 14:38:08.219486904 +0200
@@ -1025,11 +1025,11 @@ add_removable_extension (const_rtx expr,
return;
  }
/* For vector mode extensions, ensure that all uses of the
-  XEXP (src, 0) register are the same extension (both code
-  and to which mode), as unlike integral extensions lowpart
-  subreg of the sign/zero extended register are not equal
-  to the original register, so we have to change all uses or
-  none.  */
+  XEXP (src, 0) register are in insn or debug insns, as unlike
+  integral extensions lowpart subreg of the sign/zero extended
+  register are not equal to the original register, so we have
+  to change all uses or none and the current code isn't able
+  to change them all at once in one transaction.  */
else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0
  {
if (idx == 0)
@@ -1046,15 +1046,7 @@ add_removable_extension (const_rtx expr,
break;
  }
rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
-   const_rtx use_set;
-   if (use_insn == insn || DEBUG_INSN_P (use_insn))
- continue;
-   if (!(use_set = single_set (use_insn))
-   || !REG_P (SET_DEST (use_set))
-   || GET_MODE (SET_DEST (use_set)) != GET_MODE (dest)
-   || GET_CODE (SET_SRC (use_set)) != code
-   || !rtx_equal_p (XEXP (SET_SRC (use_set), 0),
-XEXP (src, 0)))
+   if (use_insn != insn && !DEBUG_INSN_P (use_insn))
  {
idx = -1U;
break;
--- gcc/testsuite/gcc.dg/torture/pr70542.c.jj   2016-04-05 13:05:53.925334083 
+0200
+++ gcc/testsuite/gcc.dg/torture/pr70542.c  2016-04-05 13:05:35.0 
+0200
@@ -0,0 +1,31 @@
+/* PR rtl-optimization/70542 */
+/* { dg-do run } */
+
+int a[113], d[113];
+short b[113], c[113], e[113];
+
+int
+main ()
+{
+  int i;
+  long j;
+  for (i = 0; i < 113; ++i)
+{
+  a[i] = -636544305;
+  b[i] = -31804;
+}
+  for (j = 1; j <= 112; ++j)
+{
+  c[j] = b[j] >> ((a[j] & 1587842570) - 1510214139);
+  if (a[j])
+   d[j] = j;
+  e[j] = 7 << ((2312631697 - b[j]) - 2312663500);
+}
+  asm volatile ("" : : : "memory");
+  if (c[0] || d[0] || e[0])
+__builtin_abort ();
+  for (i = 1; i <= 112; ++i)
+if (c[i] != -1 || d[i] != i || e[i] != 14)
+  __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/avx2-pr70542.c.jj 2016-04-05 
13:06:29.154855716 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-pr70542.c2016-04-05 
13:07:01.779412722 +0200
@@ -0,0 +1,16 @@
+/* PR tree-optimization/70542 */
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/torture/pr70542.c"
+
+static void
+avx2_test (void)
+{
+  do_main ();
+}

Jakub


Re: [PATCH] Fix REE with vector modes (PR rtl-optimization/70542)

2016-04-05 Thread Jeff Law

On 04/05/2016 10:56 AM, Jakub Jelinek wrote:

Hi!

As mentioned in PR64286 already, unlike integral modes, vector modes
don't have the property that lowpart subreg of sign/zero extended
value contains the original non-extended bits, so if we touch some
definition, we really have to change all uses.  Except that we really
don't have infrastructure/code to make sure we change either none or
all within a single transaction, and there is lots of reasons why it could
fail, more with copy_needed cases as in the testcase below, where we have:
   (set (reg:V8HI xmm6) (reg:V8HI xmm10))
   (set (reg:V8SI xmm3) (sign_extend:V8SI (reg:V8HI xmm6)))
...
// in another bb later on
   (set (reg:V8SI ...) (sign_extend:V8SI (reg:V8HI xmm6)))
(why CSE has not managed to optimize this is strange, something to look
for GCC7).  Without this patch we optimize the first two insns into:
   (set (reg:V8SI xmm3) (sign_extend:V8HI xmm10))
   (set (reg:V8SI xmm6) (reg:V8SI xmm3))
but then fail to tweak the last insn, because the definition has been
already modified and it is another copy_needed case.

So, IMHO without big changes we really can't guarantee all or nothing,
and thus this patch changes the PR64286 fix to be more conservative,
verify the def is used just in a single non-debug insn, which we can then
safely modify.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-04-05  Jakub Jelinek  

PR rtl-optimization/70542
* ree.c (add_removable_extension): For VECTOR_MODE_P punt
if there are any uses other than insn or debug insns.

* gcc.dg/torture/pr70542.c: New test.
* gcc.target/i386/avx2-pr70542.c: New test.

OK.
jeff



Re: [PATCH] Fix PR70509 (wrong code with extract from a v64qi)

2016-04-05 Thread Jakub Jelinek
On Mon, Apr 04, 2016 at 11:16:48PM +0200, Zdenek Sojka wrote:
> gcc/Changelog:
> 
> 2016-04-04  Zdenek Sojka  
> 
>     PR tree-optimization/70509
>     * tree-ssa-forwprop.c (simplify_bitfield_ref): Use bitsize_int () instead 
> of the vector base type for index.
> 
> 
> gcc/testsuite/Changelog:
> 
> 2016-04-04  Zdenek Sojka  
> 
>     PR tree-optimization/70509
>     * gcc.target/i386/avx512bw-pr70509.c: New.

I've tested this on x86_64-linux and i686-linux (including running
the testcase under sde64 emulation), and committed to trunk.
This has been small enough, but if you plan to provide more fixes
(beyond your very much appreciated bugreporting), we'll need a copyright
assignment (unless you already have it).

I've also committed following patch Marc mentioned in the PR, as obvious
after bootstrap/regtest on x86_64-linux and i686-linux, haven't managed
to create a testcase that would be miscompiled though, so it is for now
just a theoretical fix.

2016-04-05  Marc Glisse  
Jakub Jelinek  

PR tree-optimization/70509
* simplify-rtx.c (simplify_binary_operation_1) :
Shift HOST_WIDE_INT_1U instead of 1.

--- gcc/simplify-rtx.c.jj   2016-03-03 18:21:02.0 +0100
+++ gcc/simplify-rtx.c  2016-04-05 16:29:44.502306175 +0200
@@ -3665,7 +3665,7 @@ simplify_binary_operation_1 (enum rtx_co
  for (int i = 0; i < XVECLEN (trueop1, 0); i++)
{
  rtx j = XVECEXP (trueop1, 0, i);
- if (sel & (1 << UINTVAL (j)))
+ if (sel & (HOST_WIDE_INT_1U << UINTVAL (j)))
all_operand1 = false;
  else
all_operand0 = false;


Jakub


Re: [patch] libstdc++/70503 Ensure std::thread helpers have internal linkage

2016-04-05 Thread Jonathan Wakely

On 05/04/16 14:37 +0100, Jonathan Wakely wrote:

   Ensure std::thread helpers have internal linkage
   
   	PR libstdc++/70503

* src/c++11/thread.cc (execute_native_thread_routine,
execute_native_thread_routine_compat): Give internal linkage.
* testsuite/30_threads/thread/70503.cc: New test.


This small tweak to the new test checks that -static works before
trying to run it.

Tested x86_64-linux, with and without static glibc installed.

Committed to trunk.
commit 1fdc9bbe912281ba6d573fc28c53817402656e57
Author: Jonathan Wakely 
Date:   Tue Apr 5 18:18:36 2016 +0100

	* testsuite/30_threads/thread/70503.cc: Require -static to work.

diff --git a/libstdc++-v3/testsuite/30_threads/thread/70503.cc b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
index 1763ac2..950d754 100644
--- a/libstdc++-v3/testsuite/30_threads/thread/70503.cc
+++ b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
@@ -19,6 +19,7 @@
 // { dg-options " -std=gnu++11 -static" { target *-*-*gnu* } }
 // { dg-require-cstdint "" }
 // { dg-require-gthreads "" }
+// { dg-require-effective-target static }
 
 extern "C" {
   void execute_native_thread_routine(void);


Re: [PATCH, cpp] Fix pr61817 and 69391

2016-04-05 Thread Manuel López-Ibáñez

On 05/04/16 17:22, Richard Henderson wrote:

These two related PRs are all about remembering where a macro is expanded.
Worse, we've got two competing goals -- the real location of the expansion, for
__LINE__, and the virtual location of the expansion, for diagnostics.

There seems to be no way to unify the two competing goals.  If we simply "fix"
the first, we break the second.  Therefore, I resort to passing down both
locations.



+++ b/gcc/testsuite/gcc.dg/pr61817.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c11 -ftrack-macro-expansion=0" } */
+


Why use -ftrack-macro-expansion=0? This should work with =1, which is also the 
default, no?


Cheers,

Manuel.


Re: [PR c/68966] Restore atomic builtins usage in libstdc++-v3

2016-04-05 Thread Jonathan Wakely

On 05/04/16 12:01 +0100, Jonathan Wakely wrote:

Well I guess it's mine, and this is a fairly serious regression (is it
tracked in Bugzilla anywhere?) so the patch is OK for trunk.


This is now https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70554


Re: [PATCH, cpp] Fix pr61817 and 69391

2016-04-05 Thread Richard Henderson
On 04/05/2016 11:03 AM, Manuel López-Ibáñez wrote:
> Why use -ftrack-macro-expansion=0?

That's the point of the PR -- we were producing totally bogus results.


r~



Re: [PR c/68966] Restore atomic builtins usage in libstdc++-v3

2016-04-05 Thread Jonathan Wakely

On 05/04/16 19:08 +0100, Jonathan Wakely wrote:

On 05/04/16 12:01 +0100, Jonathan Wakely wrote:

Well I guess it's mine, and this is a fairly serious regression (is it
tracked in Bugzilla anywhere?) so the patch is OK for trunk.


This is now https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70554


I've committed this smaller change, which still tests atomics for
bool, but simply removes the uses of __atomic_fetch_add for bool.

During stage 1 we can revisit whether we want to do something more
invasive and stop testing for atomics that we don't actually need.

Tested ppc64le-linux, x86_64-linux, comitted to trunk.


commit 7591475c21a34bbcce6d225ea7640d35022fb8ff
Author: Jonathan Wakely 
Date:   Tue Apr 5 19:16:05 2016 +0100

Restore atomic builtins usage in libstdc++-v3

	PR libstdc++/70554
	* acinclude.m4 (GLIBCXX_ENABLE_ATOMIC_BUILTINS): Don't test
	__atomic_fetch_add for bool.
	* configure: Regenerate.

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 95df24a..b0f88cb 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -3290,7 +3290,7 @@ AC_DEFUN([GLIBCXX_ENABLE_ATOMIC_BUILTINS], [
atomic_type c1;
atomic_type c2;
atomic_type c3(0);
-   __atomic_fetch_add(&c1, c2, __ATOMIC_RELAXED);
+   // N.B. __atomic_fetch_add is not supported for bool.
__atomic_compare_exchange_n(&c1, &c2, c3, true, __ATOMIC_ACQ_REL,
    __ATOMIC_RELAXED);
__atomic_test_and_set(&c1, __ATOMIC_RELAXED);
@@ -3375,7 +3375,7 @@ int main()
   atomic_type c1;
   atomic_type c2;
   atomic_type c3(0);
-  __atomic_fetch_add(&c1, c2, __ATOMIC_RELAXED);
+  // N.B. __atomic_fetch_add is not supported for bool.
   __atomic_compare_exchange_n(&c1, &c2, c3, true, __ATOMIC_ACQ_REL,
 			  __ATOMIC_RELAXED);
   __atomic_test_and_set(&c1, __ATOMIC_RELAXED);


[PATCH, rs6000] Add support for int versions of vec_adde

2016-04-05 Thread Bill Seurer
This patch adds support for the signed and unsigned int versions of the
vec_adde altivec builtins from the Power Architecture 64-Bit ELF V2 ABI
OpenPOWER ABI for Linux Supplement (16 July 2015 Version 1.1).  There are
many of the builtins that are missing and this is the first of a series
of patches to add them.

There aren't instructions for the int versions of vec_adde so the
output code is built from other built-ins that do have instructions
which in this case is just two vec_adds.

The new test cases are executable tests which verify that the generated
code produces expected values.  C macros were used so that the same
test case could be used for both the signed and unsigned versions.  An
extra executable test case is also included to ensure that the modified
support for the __int128 versions of vec_adde is not broken.  The same
test case could not be used for both int and __int128 because of some
differences in loading and storing the vectors.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

[gcc]

2016-04-06  Bill Seurer  

* config/rs6000/rs6000-builtin.def (vec_adde): Change vec_adde to a
special case builtin.
* config/rs6000/rs6000-c.c (altivec_overloaded_builtins,
altivec_resolve_overloaded_builtin): Remove ALTIVEC_BUILTIN_VEC_ADDE
from altivec_overloaded_builtins structure.  Add support for it to
altivec_resolve_overloaded_builtin function.
* config/rs6000/rs6000.c (altivec_init_builtins): Add definition
for __builtin_vec_adde.

[gcc/testsuite]

2016-04-06  Bill Seurer  

* gcc.target/powerpc/vec-adde.c: New test.
* gcc.target/powerpc/vec-adde-int128.c: New test.

Index: gcc/config/rs6000/rs6000-builtin.def
===
--- gcc/config/rs6000/rs6000-builtin.def(revision 234745)
+++ gcc/config/rs6000/rs6000-builtin.def(working copy)
@@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO
before we get to the point about classifying the builtin type.  */
 
 /* 3 argument Altivec overloaded builtins.  */
-BU_ALTIVEC_OVERLOAD_3 (ADDE,  "adde")
 BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec")
 BU_ALTIVEC_OVERLOAD_3 (MADD,   "madd")
 BU_ALTIVEC_OVERLOAD_3 (MADDS,  "madds")
@@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P,   "vcmpgt_p")
 BU_ALTIVEC_OVERLOAD_P (VCMPGE_P,   "vcmpge_p")
 
 /* Overloaded Altivec builtins that are handled as special cases.  */
+BU_ALTIVEC_OVERLOAD_X (ADDE,  "adde")
 BU_ALTIVEC_OVERLOAD_X (CTF,   "ctf")
 BU_ALTIVEC_OVERLOAD_X (CTS,   "cts")
 BU_ALTIVEC_OVERLOAD_X (CTU,   "ctu")
Index: gcc/config/rs6000/rs6000-c.c
===
--- gcc/config/rs6000/rs6000-c.c(revision 234745)
+++ gcc/config/rs6000/rs6000-c.c(working copy)
@@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa
 RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ,
 RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
-  { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
-RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
-RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
-  { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
-RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
   { ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ,
 RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
 RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
@@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores");
 warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \
 assignment for unaligned loads and stores");
 
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+{
+  /* vec_adde needs to be special cased because there is no instruction
+ for the {un}signed int version */
+  if (nargs != 3)
+   {
+ error ("vec_adde only accepts 3 arguments");
+ return error_mark_node;
+   }
+
+  tree arg0 = (*arglist)[0];
+  tree arg0_type = TREE_TYPE (arg0);
+  tree arg1 = (*arglist)[1];
+  tree arg1_type = TREE_TYPE (arg1);
+  tree arg2 = (*arglist)[2];
+  tree arg2_type = TREE_TYPE (arg2);
+
+  /* All 3 arguments must be vectors of (signed or unsigned) (int or
+ __int128) and the types must match */
+  if ((arg0_type != arg1_type) || (arg1_type != arg2_type))
+   goto bad; 
+  if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+   goto bad; 
+
+  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+   {
+ /* for {un}signed ints, 
+vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), carryv) */
+ case SImode:
+   {
+ vec *params = make_tree_vector();
+ vec_safe_push (params, arg0);
+ vec_safe_push (params, arg1);
+ tree call = altivec_re

Re: [C++ PATCH] PR 70501, ICE in verify ctor sanity

2016-04-05 Thread Jason Merrill

On 04/04/2016 01:26 PM, Nathan Sidwell wrote:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70501

This fixes 70501.  The cause is an omission in typeck when converting a
scalar operand to a vector.  We use build_vector_from_val, which can
return a CONSTRUCTOR.  We fail to wrap that CONSTRUCTOR in a TARGET_EXPR.

The ICE arises because at the point we meet that CONSTRUCTOR during the
constexpr processing, the currently active object under construction is
that for the result of the <= operator, which has type vector-of-bool,
rather than vector-of-int. (thus this  problem arises in other vector
ops, but mostly undetected because the result type is the same  as the
operand type)


It's not clear to me that we really need a TARGET_EXPR for vector 
values.  Since one element of a vector can't refer to another, we don't 
need the ctx->ctor handling.  Perhaps we should handle vectors like we 
do PMF types in cxx_eval_bare_aggregate?


Jason



Re: [PATCH] Limit some match.pd conversion folding to GIMPLE (PR c++/70336)

2016-04-05 Thread Richard Biener
On April 5, 2016 6:47:20 PM GMT+02:00, Jakub Jelinek  wrote:
>Hi!
>
>As the testcase shows, the folding which previously has been done
>only in tree-ssa-forwprop.c breaks some cases with explicit casts
>in -Wconversion, so this patch limits that folding to GIMPLE only.
>Unfortunately that breaks a few cases in builtin-isinf_sign-1.c
>test at -O0, but IMNSHO expecting all the folding at -O0 is wrong,
>outside of constexpr and initializers we shouldn't guarantee any
>folding.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Richard.

>2016-04-05  Jakub Jelinek  
>
>   PR c++/70336
>   * match.pd (nested int casts): Limit to GIMPLE.
>
>   * c-c++-common/pr70336.c: New test.
>   * gcc.dg/torture/builtin-isinf_sign-1.c (foo): Guard tests
>   no longer optimized away at -O0 with #ifndef __OPTIMIZE__.
>
>--- gcc/match.pd.jj2016-03-23 19:25:56.0 +0100
>+++ gcc/match.pd   2016-04-04 12:39:32.535498926 +0200
>@@ -1554,7 +1554,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> 
> /* A truncation to an unsigned type (a zero-extension) should be
>canonicalized as bitwise and of a mask.  */
>-(if (final_int && inter_int && inside_int
>+(if (GIMPLE /* PR70366: doing this in GENERIC breaks -Wconversion.
> */
>+   && final_int && inter_int && inside_int
>&& final_prec == inside_prec
>&& final_prec > inter_prec
>&& inter_unsignedp)
>--- gcc/testsuite/c-c++-common/pr70336.c.jj2016-04-04
>12:42:48.056806268 +0200
>+++ gcc/testsuite/c-c++-common/pr70336.c   2016-04-04 12:44:19.343549096
>+0200
>@@ -0,0 +1,37 @@
>+/* PR c++/70336 */
>+/* { dg-do compile } */
>+/* { dg-options "-Wconversion" } */
>+
>+void
>+f1 (unsigned char * x, int y, int z)
>+{
>+  x[z / 8] |= (unsigned char) (0x80 >> y);/* { dg-bogus "may alter
>its value" } */
>+}
>+
>+unsigned char
>+f2 (unsigned char x, int y)
>+{
>+  x = x | (unsigned char) (0x80 >> y);/* { dg-bogus "may 
>alter its
>value" } */
>+  return x;
>+}
>+
>+unsigned char
>+f3 (unsigned char x, int y)
>+{
>+  x = x | (unsigned char) (y & 255);  /* { dg-bogus "may alter its
>value" } */
>+  return x;
>+}
>+
>+unsigned char
>+f4 (unsigned char x, unsigned char y)
>+{
>+  x = x | (unsigned char) (y & 255);  /* { dg-bogus "may alter its
>value" } */
>+  return x;
>+}
>+
>+unsigned char
>+f5 (unsigned char x, int y)
>+{
>+  x = (unsigned char) (y & 255);  /* { dg-bogus "may alter its 
>value"
>} */
>+  return x;
>+}
>--- gcc/testsuite/gcc.dg/torture/builtin-isinf_sign-1.c.jj 2008-09-05
>12:54:28.0 +0200
>+++ gcc/testsuite/gcc.dg/torture/builtin-isinf_sign-1.c2016-04-05
>16:13:40.649320108 +0200
>@@ -24,6 +24,7 @@ foo (float f, double d, long double ld)
>  != (__builtin_isinf(ld) ? (__builtin_signbitl(ld) ? -1 : 1) : 0))
> link_error (__LINE__);
> 
>+#ifdef __OPTIMIZE__
>   /* In boolean contexts, GCC will fold the inner conditional
>  expression to 1.  So isinf_sign folds to plain isinf.  */
> 
>@@ -33,6 +34,7 @@ foo (float f, double d, long double ld)
> link_error (__LINE__);
>   if ((_Bool)__builtin_isinf_sign(ld) != (__builtin_isinf(ld) != 0))
> link_error (__LINE__);
>+#endif
> 
>   if ((__builtin_isinf_sign(f) != 0) != (__builtin_isinf(f) != 0))
> link_error (__LINE__);
>
>   Jakub




Re: Do not optimize some polymorphic calls with -fsanitize=undefined

2016-04-05 Thread Jan Hubicka
Hi,
this is patch I commited to address the feedback.

Regtested on x86_64-linux

Honza

* ipa-devirt.c (maybe_record_node): Fix comment; use
SANITIZE_UNREACHABLE instead of SANITIZE_UNDEFINED.

* g++.dg/ipa/devirt-51.C: Use -fsanitize=unreachable.
Index: ipa-devirt.c
===
--- ipa-devirt.c(revision 234761)
+++ ipa-devirt.c(working copy)
@@ -2438,9 +2438,9 @@ maybe_record_node (vec  &
 {
   gcc_assert (!target_node->global.inlined_to);
   gcc_assert (target_node->real_symbol_p ());
-  /* When sanitizing, do not asume that cxa_pure_virutal is not called
+  /* When sanitizing, do not assume that __cxa_pure_virtual is not called
 by valid program.  */
-  if (flag_sanitize & SANITIZE_UNDEFINED)
+  if (flag_sanitize & SANITIZE_UNREACHABLE)
;
   /* Only add pure virtual if it is the only possible target.  This way
 we will preserve the diagnostics about pure virtual called in many
Index: testsuite/g++.dg/ipa/devirt-51.C
===
--- testsuite/g++.dg/ipa/devirt-51.C(revision 234761)
+++ testsuite/g++.dg/ipa/devirt-51.C(working copy)
@@ -2,7 +2,7 @@
variant.  Either keeping virtual call or optimizing to cxa_pure_virtual
is fine.  */
 /* { dg-do compile } */
-/* { dg-options "-O2 -fsanitize=undefined -fdump-tree-optimized"  } */
+/* { dg-options "-O2 -fsanitize=unreachable -fdump-tree-optimized"  } */
 namespace {
   struct B {
 B* self;


Re: [C++ PATCH] PR 70501, ICE in verify ctor sanity

2016-04-05 Thread Nathan Sidwell

On 04/05/16 12:40, Jason Merrill wrote:


It's not clear to me that we really need a TARGET_EXPR for vector values.  Since
one element of a vector can't refer to another, we don't need the ctx->ctor
handling.  Perhaps we should handle vectors like we do PMF types in
cxx_eval_bare_aggregate?


That may be abstractly better, but we do currently wrap constructors in 
target_exprs for vector compound_literals (which is what I was following).  See 
the get_target_expr_sfinae  calls in finish_compound_literal for instance.  That 
happens for  the '(v4si){(0, 0)}' subexpression of the testcase.


nathan


[PATCH] C++/70512 Fix may_alias canonical type mismatch

2016-04-05 Thread Nathan Sidwell

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70512

this fixes the canonical type mismatch of 70512.  As surmised we don't correct 
the ptr-to and ref-to types created during the structure parse when 
late-applying the may_alias attribute.


This  patch  adjusts fixup_attribute_variants to walk the ptr-to and ref-to 
lists setting the TYPE_REF_CAN_ALIAS_ALL flag.  We also walk those lists for the 
main variant itself.


It wasn't clear to me whether decl_attributes (attribs.c) should mark the ptr-to 
and ref-to lists of the type having the may_alias attribute applied in place?


By construction, we'll adjust the appropriate canonical types, IIUC.

ok?

nathan
2016-04-04  Nathan Sidwell  

	PR c++/70512
	* class.c (fixup_may_alias): New.
	(fixup_attribute_variants): Call it.

	* g++.dg/ext/attribute-may-alias-5.C: New.

Index: cp/class.c
===
--- cp/class.c	(revision 234715)
+++ cp/class.c	(working copy)
@@ -1978,6 +1978,21 @@ fixup_type_variants (tree t)
 }
 }
 
+/* KLASS is a class that we're applying may_alias to after the body is
+   parsed.  Fixup any POINTER_TO and REFERENCE_TO types.  The
+   canonical type(s) will be implicitly updated.  */
+
+static void
+fixup_may_alias (tree klass)
+{
+  tree t;
+
+  for (t = TYPE_POINTER_TO (klass); t; t = TYPE_NEXT_PTR_TO (t))
+TYPE_REF_CAN_ALIAS_ALL (t) = true;
+  for (t = TYPE_REFERENCE_TO (klass); t; t = TYPE_NEXT_REF_TO (t))
+TYPE_REF_CAN_ALIAS_ALL (t) = true;
+}
+
 /* Early variant fixups: we apply attributes at the beginning of the class
definition, and we need to fix up any variants that have already been
made via elaborated-type-specifier so that check_qualified_type works.  */
@@ -1993,6 +2008,10 @@ fixup_attribute_variants (tree t)
   tree attrs = TYPE_ATTRIBUTES (t);
   unsigned align = TYPE_ALIGN (t);
   bool user_align = TYPE_USER_ALIGN (t);
+  bool may_alias = lookup_attribute ("may_alias", attrs);
+
+  if (may_alias)
+fixup_may_alias (t);
 
   for (variants = TYPE_NEXT_VARIANT (t);
variants;
@@ -2007,6 +2026,8 @@ fixup_attribute_variants (tree t)
   else
 	TYPE_USER_ALIGN (variants) = user_align;
   TYPE_ALIGN (variants) = valign;
+  if (may_alias)
+	fixup_may_alias (variants);
 }
 }
 
Index: testsuite/g++.dg/ext/attribute-may-alias-5.C
===
--- testsuite/g++.dg/ext/attribute-may-alias-5.C	(nonexistent)
+++ testsuite/g++.dg/ext/attribute-may-alias-5.C	(working copy)
@@ -0,0 +1,9 @@
+// PR c++/70512
+
+struct S 
+{
+  S& operator= (int)
+  {
+return *this;
+  }
+} __attribute__ ((__may_alias__));


Re: [PATCH] C++/70512 Fix may_alias canonical type mismatch

2016-04-05 Thread Jason Merrill

OK.

Jason


Re: [AArch64] Emit square root using the Newton series

2016-04-05 Thread Evandro Menezes

On 04/05/16 13:37, Wilco Dijkstra wrote:

I can't get any of these to work... Not only do I get a large number of 
collisions and duplicated
code between these patches, when I try to resolve them, all I get is crashes 
whenever I try
to use sqrt (even rsqrt stopped working). Do you have a patchset that applies 
cleanly so I can
try all approximation routines?


Hi, Wilco.

The original patches should be independent of each other, so indeed they 
duplicate code.


This patch suite should be suitable for testing.

HTH

--
Evandro Menezes

>From cbc2b62f7df5c3e2fef2a24157b1bdd1a6de191b Mon Sep 17 00:00:00 2001
From: Evandro Menezes 
Date: Mon, 4 Apr 2016 14:02:24 -0500
Subject: [PATCH 3/3] Emit division using the Newton series

2016-04-04  Evandro Menezes  
Wilco Dijkstra 

gcc/
	* config/aarch64/aarch64-tuning-flags.def
	* config/aarch64/aarch64-protos.h
	(tune_params): Add new member "approx_div_modes".
	(aarch64_emit_approx_div): Declare new function.
	* config/aarch64/aarch64.c
	(generic_tunings): New member "approx_div_modes".
	(cortexa35_tunings): Likewise.
	(cortexa53_tunings): Likewise.
	(cortexa57_tunings): Likewise.
	(cortexa72_tunings): Likewise.
	(exynosm1_tunings): Likewise.
	(thunderx_tunings): Likewise.
	(xgene1_tunings): Likewise.
	(aarch64_emit_approx_div): Define new function.
	* config/aarch64/aarch64.md ("div3"): New expansion.
	* config/aarch64/aarch64-simd.md ("div3"): Likewise.
	* config/aarch64/aarch64.opt (-mlow-precision-div): Add new option.
	* doc/invoke.texi (-mlow-precision-div): Describe new option.
---
 gcc/config/aarch64/aarch64-protos.h |  2 +
 gcc/config/aarch64/aarch64-simd.md  | 14 +-
 gcc/config/aarch64/aarch64.c| 85 +
 gcc/config/aarch64/aarch64.md   | 19 +++--
 gcc/config/aarch64/aarch64.opt  |  5 +++
 gcc/doc/invoke.texi | 10 +
 6 files changed, 130 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 85ad796..649faf7 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -244,6 +244,7 @@ struct tune_params
   } autoprefetcher_model;
 
   unsigned int extra_tuning_flags;
+  unsigned int approx_div_modes;
   unsigned int approx_sqrt_modes;
   unsigned int approx_rsqrt_modes;
 };
@@ -390,6 +391,7 @@ void aarch64_relayout_simd_types (void);
 void aarch64_reset_previous_fndecl (void);
 void aarch64_save_restore_target_globals (tree);
 bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
+bool aarch64_emit_approx_div (rtx, rtx, rtx);
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 47ccb18..7e99e16 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1509,7 +1509,19 @@
   [(set_attr "type" "neon_fp_mul_")]
 )
 
-(define_insn "div3"
+(define_expand "div3"
+ [(set (match_operand:VDQF 0 "register_operand")
+   (div:VDQF (match_operand:VDQF 1 "general_operand")
+		 (match_operand:VDQF 2 "register_operand")))]
+ "TARGET_SIMD"
+{
+  if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
+DONE;
+
+  operands[1] = force_reg (mode, operands[1]);
+})
+
+(define_insn "*div3"
  [(set (match_operand:VDQF 0 "register_operand" "=w")
(div:VDQF (match_operand:VDQF 1 "register_operand" "w")
 		 (match_operand:VDQF 2 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4af2175..74310e8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -417,6 +417,7 @@ static const struct tune_params generic_tunings =
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  (AARCH64_APPROX_NONE),	/* approx_div_modes.  */
   (AARCH64_APPROX_NONE),	/* approx_sqrt_modes.  */
   (AARCH64_APPROX_NONE)	/* approx_rsqrt_modes.  */
 };
@@ -444,6 +445,7 @@ static const struct tune_params cortexa35_tunings =
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  (AARCH64_APPROX_NONE),	/* approx_div_modes.  */
   (AARCH64_APPROX_NONE),	/* approx_sqrt_modes.  */
   (AARCH64_APPROX_NONE)	/* approx_rsqrt_modes.  */
 };
@@ -471,6 +473,7 @@ static const struct tune_params cortexa53_tunings =
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  (AARCH64_APPROX_NONE),	/* approx_div_modes.  */
   (AARCH64_APPROX_NONE),	/* approx_sqrt_modes.  */
   (AARCH64_APPROX_NONE)	/* approx_rsqrt_modes.  */
 };
@@ -498,6 +501,7 @@ static const struct tune_params cortexa57_tunings =
   0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA

Re: a patch for PR68695

2016-04-05 Thread Segher Boessenkool
On Tue, Apr 05, 2016 at 10:48:58AM +0100, Kyrill Tkachov wrote:
> So for the test gcc.dg/pr10474.c on arm with -marm -O3 before this patch we
> perform shrink-wrapping:
> cmpr0, #0
> bxeqlr
> push{r4, lr}
> movr4, r0
> ...
> 
> And after the patch we don't:
> push{r4, lr}
> subsr4, r0, #0
> popeq{r4, pc}
> ...
> 
> The assembly after the "..." is identical.
> 
> So the resulting code is indeed shorter, though there is an
> extra stack push and pop on the early return path.
> A similar effect appears on gcc.dg/ira-shrinkwrap-prep-2.c.

The "new" code is better if there is no shrink-wrapping.  We can probably
teach prepare_shrink_wrap to do the extra register move if that will allow
us to wrap more.

> Though if so, it looks like a shrink-wrapping deficiency exposed by
> this patch, rather than caused by it.

Yes, and mostly a testcase problem even.

> Jakub, do you happen to have the before and after codegen for these tests
> on ppc64? I wonder if the effect is more clearcut there.

RTL before shrink-wrapping would be useful, too.


Segher


Re: [committed] Fix linux blk-merge boot problem on hppa

2016-04-05 Thread John David Anglin
On 2016-03-17, at 7:38 PM, John David Anglin wrote:

> The attach patch fixes a problem causing block/blk-merge.c in the linux 
> kernel to be miscompiled.  As a result,
> block segments were not properly split and boot failed since linux 4.3.
> 
> The problem was found by a regression search.  The patch reverts a change in 
> the handling of the Q and T
> constraints.
> 
> Tested on hppa-unknown-linux-gnu, hppa2.0w-hp-hpux11.11and 
> hppa64-hp-hpux11.11.  Committed to
> trunk, 4.9 and 5 branches.


Unfortunately, this change caused a regression in the handling of the Q 
constraint used in the dbra and movb
patterns in pa.md.  Spills with large offsets are now rejected causing the 
following error:

(jump_insn 61 59 62 4 (parallel [
(set (pc)
(if_then_else (eq (reg:SI 28 %r28)
(const_int 0 [0]))
(label_ref:SI 60)
(pc)))
(set (reg/v/f:SI 100 [ fp ])
(reg:SI 28 %r28))
]) Renviron.c:180 225 {*pa.md:9010}
 (expr_list:REG_DEAD (reg:SI 28 %r28)
(int_list:REG_BR_PROB 2164 (nil)))
 -> 60)
Renviron.c:207:1: internal compiler error: in find_reloads, at reload.c:3862
0x555be3 _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
../../src/gcc/rtl-error.c:110
0x5347ef find_reloads(rtx_insn*, int, int, int, short*)
../../src/gcc/reload.c:3862
0x54ad43 calculate_needs_all_insns
../../src/gcc/reload1.c:1540
0x54ad43 reload(rtx_insn*, int)
../../src/gcc/reload1.c:1028
0x425c0f do_reload
../../src/gcc/ira.c:5454
0x425c0f execute
../../src/gcc/ira.c:5613

The attached change fixes this ICE.  Tested on hppa-unknown-linux-gnu, 
hppa2.0w-hp-hpux11.11
and hppa64-hp-hpux11.11.  Committed to trunk, 4.9 and 5 branches.

Dave
--
John David Anglin   dave.ang...@bell.net


2016-04-05  John David Anglin  

* config/pa/predicates.md (integer_store_memory_operand): Accept
REG+D operands with a large offset when reload_in_progress is true.
(floating_point_store_memory_operand): Likewise.

Index: config/pa/predicates.md
===
--- config/pa/predicates.md (revision 234702)
+++ config/pa/predicates.md (working copy)
@@ -301,6 +301,9 @@
 
   if (reg_plus_base_memory_operand (op, mode))
 {
+  if (reload_in_progress)
+   return true;
+
   /* Extract CONST_INT operand.  */
   if (GET_CODE (op) == SUBREG)
op = SUBREG_REG (op);
@@ -335,6 +338,9 @@
 
   if (reg_plus_base_memory_operand (op, mode))
 {
+  if (reload_in_progress)
+   return true;
+
   /* Extract CONST_INT operand.  */
   if (GET_CODE (op) == SUBREG)
op = SUBREG_REG (op);


Re: openacc reference reductions

2016-04-05 Thread Cesar Philippidis
On 02/22/2016 07:34 AM, Cesar Philippidis wrote:
> Ping. This patch still needs a review.

Ping. I've attached a rebased version of this patch. The omp-low.c bits
haven't changed, but the test cases have since Thomas has been merging
some of them from trunk. I separated the omp-low.c changes because the
test cases are relatively large.

This patch fixes PR70533 and PR70535. Basically it teaches
lower_oacc_reductions how to cope with reference-type variables
(PR70533) and not to remap the reduction variables on parallel
constructs (PR70535).

Is it OK for trunk?

Cesar

> On 02/09/2016 08:17 AM, Cesar Philippidis wrote:
>> On 02/09/2016 07:33 AM, Nathan Sidwell wrote:
>>> While I've not looked at the rest of the patch, this bit stood out:
>>>
 +static bool
 +is_oacc_parallel_reduction (tree var, omp_context *ctx)
 +{
 +  if (!is_oacc_parallel (ctx))
 +return false;
 +
 +  tree clauses = gimple_omp_target_clauses (ctx->stmt);
 +
 +  /* Don't install a local copy of the decl if it used
 + inside a acc parallel reduction.  */
>>>
>>> ^^ comment is misleading -- this routine's not installing anything
>>>
 +  if (is_oacc_parallel (ctx))
>>>
>>> ^^ already checked above.
>>>
 +for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
 +  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
 +  && OMP_CLAUSE_DECL (c) == var)
 +return true;
 +
 +  return false;
 +}
 +
>>
>> Thanks for catching that. Those are artifacts from when this code used
>> to be located exclusively in scan_sharing_clauses. I've updated the
>> patch with those changes.
>>
>> Cesar
>>
> 

2016-04-05  Cesar Philippidis  

	gcc/
	* omp-low.c (is_oacc_parallel_reduction): New function.
	(scan_sharing_clauses): Use it to prevent installing local variables
	for those used in acc parallel reductions.
	(lower_rec_input_clauses): Remove dead code.
	(lower_oacc_reductions): Add support for reference reductions.
	(lower_reduction_clauses): Remove dead code.
	(lower_omp_target): Don't remap variables appearing in acc parallel
	reductions.

	gcc/testsuite/
	* gfortran.dg/goacc/reduction-promotions.f90: Add more coverage.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Adjust test.
	* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Adjust test.
	* testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/reduction.h: New test.
	* testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test.
	* testsuite/libgomp.oacc-fortran/reduction-1.f90: Adjust test.
	* testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-7.f90: New test.


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 3fd6eb3..fa2d318 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -309,6 +309,25 @@ is_oacc_kernels (omp_context *ctx)
 	  == GF_OMP_TARGET_KIND_OACC_KERNELS));
 }
 
+/* Return true if CTX correspon

Re: [PATCH, rs6000] Add support for int versions of vec_adde

2016-04-05 Thread David Edelsohn
On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer  wrote:
> This patch adds support for the signed and unsigned int versions of the
> vec_adde altivec builtins from the Power Architecture 64-Bit ELF V2 ABI
> OpenPOWER ABI for Linux Supplement (16 July 2015 Version 1.1).  There are
> many of the builtins that are missing and this is the first of a series
> of patches to add them.
>
> There aren't instructions for the int versions of vec_adde so the
> output code is built from other built-ins that do have instructions
> which in this case is just two vec_adds.
>
> The new test cases are executable tests which verify that the generated
> code produces expected values.  C macros were used so that the same
> test case could be used for both the signed and unsigned versions.  An
> extra executable test case is also included to ensure that the modified
> support for the __int128 versions of vec_adde is not broken.  The same
> test case could not be used for both int and __int128 because of some
> differences in loading and storing the vectors.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions.  Is this ok for trunk?
>
> [gcc]
>
> 2016-04-06  Bill Seurer  
>
> * config/rs6000/rs6000-builtin.def (vec_adde): Change vec_adde to a
> special case builtin.
> * config/rs6000/rs6000-c.c (altivec_overloaded_builtins,
> altivec_resolve_overloaded_builtin): Remove ALTIVEC_BUILTIN_VEC_ADDE
> from altivec_overloaded_builtins structure.  Add support for it to
> altivec_resolve_overloaded_builtin function.
> * config/rs6000/rs6000.c (altivec_init_builtins): Add definition
> for __builtin_vec_adde.
>
> [gcc/testsuite]
>
> 2016-04-06  Bill Seurer  
>
> * gcc.target/powerpc/vec-adde.c: New test.
> * gcc.target/powerpc/vec-adde-int128.c: New test.
>
> Index: gcc/config/rs6000/rs6000-builtin.def
> ===
> --- gcc/config/rs6000/rs6000-builtin.def(revision 234745)
> +++ gcc/config/rs6000/rs6000-builtin.def(working copy)
> @@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO
> before we get to the point about classifying the builtin type.  */
>
>  /* 3 argument Altivec overloaded builtins.  */
> -BU_ALTIVEC_OVERLOAD_3 (ADDE,  "adde")
>  BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec")
>  BU_ALTIVEC_OVERLOAD_3 (MADD,   "madd")
>  BU_ALTIVEC_OVERLOAD_3 (MADDS,  "madds")
> @@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P,   "vcmpgt_p")
>  BU_ALTIVEC_OVERLOAD_P (VCMPGE_P,   "vcmpge_p")
>
>  /* Overloaded Altivec builtins that are handled as special cases.  */
> +BU_ALTIVEC_OVERLOAD_X (ADDE,  "adde")
>  BU_ALTIVEC_OVERLOAD_X (CTF,   "ctf")
>  BU_ALTIVEC_OVERLOAD_X (CTS,   "cts")
>  BU_ALTIVEC_OVERLOAD_X (CTU,   "ctu")
> Index: gcc/config/rs6000/rs6000-c.c
> ===
> --- gcc/config/rs6000/rs6000-c.c(revision 234745)
> +++ gcc/config/rs6000/rs6000-c.c(working copy)
> @@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa
>  RS6000_BTI_unsigned_V1TI, 0 },
>{ ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ,
>  RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
> -  { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
> -RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
> -RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
> -  { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
> -RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
>{ ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ,
>  RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
>  RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
> @@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores");
>  warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use 
> \
>  assignment for unaligned loads and stores");
>
> +  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
> +{
> +  /* vec_adde needs to be special cased because there is no instruction
> + for the {un}signed int version */

End comment sentence with period and two spaces

> +  if (nargs != 3)
> +   {
> + error ("vec_adde only accepts 3 arguments");
> + return error_mark_node;
> +   }
> +
> +  tree arg0 = (*arglist)[0];
> +  tree arg0_type = TREE_TYPE (arg0);
> +  tree arg1 = (*arglist)[1];
> +  tree arg1_type = TREE_TYPE (arg1);
> +  tree arg2 = (*arglist)[2];
> +  tree arg2_type = TREE_TYPE (arg2);
> +
> +  /* All 3 arguments must be vectors of (signed or unsigned) (int or
> + __int128) and the types must match */

Same.

> +  if ((arg0_type != arg1_type) || (arg1_type != arg2_type))
> +   goto bad;
> +  if (TREE_CODE (arg0_type) != VECTOR_TYPE)
> +   goto bad;
> +
> +  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
> +   {
> + /*