date:20160512

Re: [PATCH] vinsertps XMM16-XMM31 fixes

2016-05-12 Thread Kirill Yukhin

Hi,
On 09 May 18:45, Jakub Jelinek wrote:
> Hi!
> 
> vinsertps is already in AVX512F, so we can use use v constraints
> freely.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.

--
Thanks, K
> 
> 2016-05-09  Jakub Jelinek  
> 
>   * config/i386/sse.md (*vec_setv4sf_sse4_1, sse4_1_insertps): Use v
>   constraint instead of x in avx alternatives.  Use maybe_evex instead
>   of vex prefix.
> 
>   * gcc.target/i386/avx512vl-vinsertps-1.c: New test.
> 
> --- gcc/config/i386/sse.md.jj 2016-05-09 12:34:58.839865460 +0200
> +++ gcc/config/i386/sse.md2016-05-09 13:15:55.400130875 +0200
> @@ -6657,11 +6657,11 @@ (define_insn "vec_set_0"
>  
>  ;; A subset is vec_setv4sf.
>  (define_insn "*vec_setv4sf_sse4_1"
> -  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
> +  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
>   (vec_merge:V4SF
> (vec_duplicate:V4SF
> - (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
> -   (match_operand:V4SF 1 "register_operand" "0,0,x")
> + (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
> +   (match_operand:V4SF 1 "register_operand" "0,0,v")
> (match_operand:SI 3 "const_int_operand")))]
>"TARGET_SSE4_1
> && ((unsigned) exact_log2 (INTVAL (operands[3]))
> @@ -6684,13 +6684,13 @@ (define_insn "*vec_setv4sf_sse4_1"
> (set_attr "prefix_data16" "1,1,*")
> (set_attr "prefix_extra" "1")
> (set_attr "length_immediate" "1")
> -   (set_attr "prefix" "orig,orig,vex")
> +   (set_attr "prefix" "orig,orig,maybe_evex")
> (set_attr "mode" "V4SF")])
>  
>  (define_insn "sse4_1_insertps"
> -  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
> - (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
> -   (match_operand:V4SF 1 "register_operand" "0,0,x")
> +  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
> + (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
> +   (match_operand:V4SF 1 "register_operand" "0,0,v")
> (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
>UNSPEC_INSERTPS))]
>"TARGET_SSE4_1"
> @@ -6718,7 +6718,7 @@ (define_insn "sse4_1_insertps"
> (set_attr "prefix_data16" "1,1,*")
> (set_attr "prefix_extra" "1")
> (set_attr "length_immediate" "1")
> -   (set_attr "prefix" "orig,orig,vex")
> +   (set_attr "prefix" "orig,orig,maybe_evex")
> (set_attr "mode" "V4SF")])
>  
>  (define_split
> --- gcc/testsuite/gcc.target/i386/avx512vl-vinsertps-1.c.jj   2016-05-09 
> 13:10:08.277794535 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512vl-vinsertps-1.c  2016-05-09 
> 13:13:51.788792211 +0200
> @@ -0,0 +1,39 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512vl" } */
> +
> +#include 
> +
> +__m128
> +f1 (__m128 a, __m128 b)
> +{
> +  register __m128 c __asm ("xmm16") = a;
> +  asm volatile ("" : "+v" (c));
> +  c = _mm_insert_ps (c, b, 1);
> +  asm volatile ("" : "+v" (c));
> +  return c;
> +}
> +
> +/* { dg-final { scan-assembler "vinsertps\[^\n\r\]*xmm16" } } */
> +
> +__v4sf
> +f2 (__v4sf a, float b)
> +{
> +  register __v4sf c __asm ("xmm17") = a;
> +  asm volatile ("" : "+v" (c));
> +  c[1] = b;
> +  asm volatile ("" : "+v" (c));
> +  return c;
> +}
> +
> +/* { dg-final { scan-assembler "vinsertps\[^\n\r\]*xmm17" } } */
> +
> +__v4sf
> +f3 (__v4sf a, float b)
> +{
> +  register float c __asm ("xmm18") = b;
> +  asm volatile ("" : "+v" (c));
> +  a[1] = c;
> +  return a;
> +}
> +
> +/* { dg-final { scan-assembler "vinsertps\[^\n\r\]*xmm18" } } */
> 
>   Jakub

Re: [PATCH] vinsertps XMM16-XMM31 fixes

2016-05-12 Thread Kirill Yukhin

On 09 May 18:47, Jakub Jelinek wrote:
> Hi!
> 
> The testcases show that we emit AVX512BW instructions even when
> AVX512BW is disabled.  Additionally, two of the 4 patterns were using
> weirdo constraint for the output (x instead of v, while they used v for
> input).
> 
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok
> for trunk?
OK.

--
Thanks, K
> 
> 2016-05-09  Jakub Jelinek  
> 
>   PR target/71019
>   * config/i386/sse.md (_packssdw,
>   _packusdw): Make sure EVEX encoded insn
>   is not emitted unless TARGET_AVX512BW.
>   (_packuswb, _packsswb):
>   Likewise.  For TARGET_AVX512BW, use "=v" constraint instead of "=x"
>   for the result operand.
> 
>   * gcc.target/i386/avx512vl-pack-1.c: New test.
>   * gcc.target/i386/avx512vl-pack-2.c: New test.
>   * gcc.target/i386/avx512bw-pack-2.c: New test.
> 
> --- gcc/config/i386/sse.md.jj 2016-05-09 11:38:36.0 +0200
> +++ gcc/config/i386/sse.md2016-05-09 12:34:58.839865460 +0200
> @@ -11500,54 +11500,57 @@ (define_expand "vec_pack_trunc_"
>  })
>  
>  (define_insn "_packsswb"
> -  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
> +  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
>   (vec_concat:VI1_AVX512
> (ss_truncate:
> - (match_operand: 1 "register_operand" "0,v"))
> + (match_operand: 1 "register_operand" "0,x,v"))
> (ss_truncate:
> - (match_operand: 2 "vector_operand" "xBm,vm"]
> + (match_operand: 2 "vector_operand" "xBm,xm,vm"]
>"TARGET_SSE2 &&  && "
>"@
> packsswb\t{%2, %0|%0, %2}
> +   vpacksswb\t{%2, %1, %0|%0, %1, %2}
> vpacksswb\t{%2, %1, %0|%0, %1, %2}"
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512bw")
> (set_attr "type" "sselog")
> -   (set_attr "prefix_data16" "1,*")
> -   (set_attr "prefix" "orig,maybe_evex")
> +   (set_attr "prefix_data16" "1,*,*")
> +   (set_attr "prefix" "orig,,evex")
> (set_attr "mode" "")])
>  
>  (define_insn "_packssdw"
> -  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
> +  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
>   (vec_concat:VI2_AVX2
> (ss_truncate:
> - (match_operand: 1 "register_operand" "0,v"))
> + (match_operand: 1 "register_operand" "0,x,v"))
> (ss_truncate:
> - (match_operand: 2 "vector_operand" "xBm,vm"]
> + (match_operand: 2 "vector_operand" "xBm,xm,vm"]
>"TARGET_SSE2 &&  && "
>"@
> packssdw\t{%2, %0|%0, %2}
> +   vpackssdw\t{%2, %1, %0|%0, %1, %2}
> vpackssdw\t{%2, %1, %0|%0, %1, %2}"
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512bw")
> (set_attr "type" "sselog")
> -   (set_attr "prefix_data16" "1,*")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix_data16" "1,*,*")
> +   (set_attr "prefix" "orig,,evex")
> (set_attr "mode" "")])
>  
>  (define_insn "_packuswb"
> -  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
> +  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
>   (vec_concat:VI1_AVX512
> (us_truncate:
> - (match_operand: 1 "register_operand" "0,v"))
> + (match_operand: 1 "register_operand" "0,x,v"))
> (us_truncate:
> - (match_operand: 2 "vector_operand" "xBm,vm"]
> + (match_operand: 2 "vector_operand" "xBm,xm,vm"]
>"TARGET_SSE2 &&  && "
>"@
> packuswb\t{%2, %0|%0, %2}
> +   vpackuswb\t{%2, %1, %0|%0, %1, %2}
> vpackuswb\t{%2, %1, %0|%0, %1, %2}"
> -  [(set_attr "isa" "noavx,avx")
> +  [(set_attr "isa" "noavx,avx,avx512bw")
> (set_attr "type" "sselog")
> -   (set_attr "prefix_data16" "1,*")
> -   (set_attr "prefix" "orig,vex")
> +   (set_attr "prefix_data16" "1,*,*")
> +   (set_attr "prefix" "orig,,evex")
> (set_attr "mode" "")])
>  
>  (define_insn "avx512bw_interleave_highv64qi"
> @@ -14572,21 +14575,22 @@ (define_insn "_mpsadbw"
> (set_attr "mode" "")])
>  
>  (define_insn "_packusdw"
> -  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
> +  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
>   (vec_concat:VI2_AVX2
> (us_truncate:
> - (match_operand: 1 "register_operand" "0,0,v"))
> + (match_operand: 1 "register_operand" "0,0,x,v"))
> (us_truncate:
> - (match_operand: 2 "vector_operand" 
> "YrBm,*xBm,vm"]
> + (match_operand: 2 "vector_operand" 
> "YrBm,*xBm,xm,vm"]
>"TARGET_SSE4_1 &&  && "
>"@
> packusdw\t{%2, %0|%0, %2}
> packusdw\t{%2, %0|%0, %2}
> +   vpackusdw\t{%2, %1, %0|%0, %1, %2}
> vpackusdw\t{%2, %1, %0|%0, %1, %2}"
> -  [(set_attr "isa" "noavx,noavx,avx")
> +  [(set_attr "isa" "noavx,noavx,avx,avx512bw")
> (set_attr "type" "sselog")
> (set_attr "prefix_extra" "1")
> -   (set_attr "prefix" "orig,orig,maybe_evex")
> +   (set_attr "prefix" "orig,orig,,evex")
> (set_attr "mode" "")])
>

[PATCH] PR71060

2016-05-12 Thread Richard Biener


The PR shows that data-dependence analysis is too strict when making sure
that dr_indices of two DRs are compatible enough to be fed into the
dependence machinery.  The important part is that the structure of the
object needs to be the same which is ensured by type equality.  Then
of course they need to be based on the same address.  Things like whether
the object was referenced via a restrict pointer or with special
alignment doesn't matter for the dependence analysis step using 
dr_indices.

Thus the following patch improves this bit in dependence analysis,
not yet fixing the underlying issue of the "miscompare" which is
if-conversion dropping restrict info when building masked load/stores.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2016-05-12  Richard Biener  

PR tree-optimization/71060
* tree-data-ref.c (initialize_data_dependence_relation): Do not
require exact match of DR_BASE_OBJECT but only matching address and
type.

Index: gcc/tree-data-ref.c
===
*** gcc/tree-data-ref.c (revision 236158)
--- gcc/tree-data-ref.c (working copy)
*** initialize_data_dependence_relation (str
*** 1538,1545 
  }
  
/* If the references do not access the same object, we do not know
!  whether they alias or not.  */
!   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
  {
DDR_ARE_DEPENDENT (res) = chrec_dont_know;
return res;
--- 1538,1550 
  }
  
/* If the references do not access the same object, we do not know
!  whether they alias or not.  We do not care about TBAA or alignment
!  info so we can use OEP_ADDRESS_OF to avoid false negatives.
!  But the accesses have to use compatible types as otherwise the
!  built indices would not match.  */
!   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 
OEP_ADDRESS_OF)
!   || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)),
! TREE_TYPE (DR_BASE_OBJECT (b
  {
DDR_ARE_DEPENDENT (res) = chrec_dont_know;
return res;

Re: [PATCH] Improve XMM16-XMM31 handling in vpinsr*

2016-05-12 Thread Kirill Yukhin

Hi,
On 09 May 18:51, Jakub Jelinek wrote:
> Hi!
> 
> vpinsr{b,w} are AVX512BW, vpinsr{d,q} are AVX512DQ.
> This patch makes us use v constraint instead of x in those
> cases.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.

--
Thanks, K
> 
> 2016-05-09  Jakub Jelinek  
> 
>   * config/i386/sse.md (pinsr_evex_isa): New mode attr.
>   (_pinsr): Add 2 alternatives with
>   v constraints instead of x and  isa attribute.
> 
>   * gcc.target/i386/avx512bw-vpinsr-1.c: New test.
>   * gcc.target/i386/avx512dq-vpinsr-1.c: New test.
>   * gcc.target/i386/avx512vl-vpinsr-1.c: New test.

Re: [PATCH] Improve sse2_loadld

2016-05-12 Thread Kirill Yukhin

Hi,
On 09 May 18:52, Jakub Jelinek wrote:
> Hi!
> 
> I hope this pattern actually shouldn't be used for AVX512*, because
> vpinsr should match instead, but just in case it doesn't, all the insns
> involved are in AVX512F.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.

--
Thanks, K
> 
> 2016-05-09  Jakub Jelinek  
> 
>   * config/i386/sse.md (sse2_loadld): Use v instead of x
>   constraint in alternatives 0,1,4.

Re: [PATCH] vec_extract XMM16-XMM17 improvements

2016-05-12 Thread Kirill Yukhin

Hi,
On 09 May 18:55, Jakub Jelinek wrote:
> Hi!
> 
> vpextr{b,w} are in AVX512BW, so is vpsrldq, and vpextr{d,q} are in
> AVX512DQ.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.

--
Thanks, K
> 
> 2016-05-09  Jakub Jelinek  
> 
>   * config/i386/i386.md (isa): Add x64_avx512dq, enable if
>   TARGET_64BIT && TARGET_AVX512DQ.
>   * config/i386/sse.md (*vec_extract): Add avx512bw alternatives.
>   (*vec_extract_zext): Add avx512bw alternative.
>   (*vec_extract_0, *vec_extractv4si_0_zext,
>   *vec_extractv2di_0_sse): Use v constraint instead of x constraint.
>   (*vec_extractv4si): Add avx512dq and avx512bw alternatives.
>   (*vec_extractv4si_zext): Add avx512dq alternative.
>   (*vec_extractv2di_1): Add x64_avx512dq and avx512bw alternatives,
>   use v instead of x constraint in other alternatives where possible.
> 
>   * gcc.target/i386/avx512bw-vpextr-1.c: New test.
>   * gcc.target/i386/avx512dq-vpextr-1.c: New test.

Re: [PATCH] vec_extract XMM16-XMM17 improvements

2016-05-12 Thread Jakub Jelinek

On Thu, May 12, 2016 at 10:30:53AM +0300, Kirill Yukhin wrote:
> Hi,
> On 09 May 18:55, Jakub Jelinek wrote:
> > Hi!
> > 
> > vpextr{b,w} are in AVX512BW, so is vpsrldq, and vpextr{d,q} are in
> > AVX512DQ.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> OK.

Note, this patch depends on the
https://gcc.gnu.org/ml/gcc-patches/2016-05/msg00393.html
patch.  You've acked an earlier version of that patch, but not
the one with the new constraint.
Is that also ok for trunk?

> > 2016-05-09  Jakub Jelinek  
> > 
> > * config/i386/i386.md (isa): Add x64_avx512dq, enable if
> > TARGET_64BIT && TARGET_AVX512DQ.
> > * config/i386/sse.md (*vec_extract): Add avx512bw alternatives.
> > (*vec_extract_zext): Add avx512bw alternative.
> > (*vec_extract_0, *vec_extractv4si_0_zext,
> > *vec_extractv2di_0_sse): Use v constraint instead of x constraint.
> > (*vec_extractv4si): Add avx512dq and avx512bw alternatives.
> > (*vec_extractv4si_zext): Add avx512dq alternative.
> > (*vec_extractv2di_1): Add x64_avx512dq and avx512bw alternatives,
> > use v instead of x constraint in other alternatives where possible.
> > 
> > * gcc.target/i386/avx512bw-vpextr-1.c: New test.
> > * gcc.target/i386/avx512dq-vpextr-1.c: New test.

Jakub

Re: [PATCH] vec_extract XMM16-XMM17 improvements

2016-05-12 Thread Kirill Yukhin

On 12 May 10:11, Jakub Jelinek wrote:
> On Thu, May 12, 2016 at 10:30:53AM +0300, Kirill Yukhin wrote:
> > Hi,
> > On 09 May 18:55, Jakub Jelinek wrote:
> > > Hi!
> > > 
> > > vpextr{b,w} are in AVX512BW, so is vpsrldq, and vpextr{d,q} are in
> > > AVX512DQ.
> > > 
> > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > OK.
> 
> Note, this patch depends on the
> https://gcc.gnu.org/ml/gcc-patches/2016-05/msg00393.html
> patch.  You've acked an earlier version of that patch, but not
> the one with the new constraint.
> Is that also ok for trunk?
Yes.
> 
> > > 2016-05-09  Jakub Jelinek  
> > > 
> > >   * config/i386/i386.md (isa): Add x64_avx512dq, enable if
> > >   TARGET_64BIT && TARGET_AVX512DQ.
> > >   * config/i386/sse.md (*vec_extract): Add avx512bw alternatives.
> > >   (*vec_extract_zext): Add avx512bw alternative.
> > >   (*vec_extract_0, *vec_extractv4si_0_zext,
> > >   *vec_extractv2di_0_sse): Use v constraint instead of x constraint.
> > >   (*vec_extractv4si): Add avx512dq and avx512bw alternatives.
> > >   (*vec_extractv4si_zext): Add avx512dq alternative.
> > >   (*vec_extractv2di_1): Add x64_avx512dq and avx512bw alternatives,
> > >   use v instead of x constraint in other alternatives where possible.
> > > 
> > >   * gcc.target/i386/avx512bw-vpextr-1.c: New test.
> > >   * gcc.target/i386/avx512dq-vpextr-1.c: New test.
> 
>   Jakub

Re: Simple bitop reassoc in match.pd

2016-05-12 Thread Richard Biener

On Wed, May 11, 2016 at 7:56 PM, Marc Glisse  wrote:
> On Wed, 11 May 2016, Jeff Law wrote:
>
>>> We could also simplify (int)(_Bool)x to x using VRP information that x
>>> is in [0, 1], but apparently when VRP replaces x==0 with y=x^1,(_Bool)y,
>>> it does not compute a range for the new variable y, and by the time the
>>> next VRP pass comes, it is too late.
>>
>> Seems like a clear oversight.
>
>
> In get_value_range, there is:
>   /* If we query the range for a new SSA name return an unmodifiable
> VARYING.
>  We should get here at most from the substitute-and-fold stage which
>  will never try to change values.  */
> so this is a known limitation.
>
> We could try to change that (XRESIZEVEC, memset(0) on the new elements,
> update num_vr_values to the new num_ssa_names, at this point vr_value should
> be replaced with a vector).
>
> We could also use set_range_info and make simplify_conversion_using_ranges
> use get_range_info instead of get_value_range. Might even move the whole
> function to match.pd then ;-)

Yeah - note that VRP already calls set_range_info before simplifying
stmts.  It's
just that substitute_and_fold doesn't apply fold_stmt (and thus match.pd) to
all stmts but it only applies the pass specific "fold" (vrp_fold_stmt)
to all stmts.

Richard.

> --
> Marc Glisse

Re: [Patch ARM/AArch64 09/11] Add missing vrnd{,a,m,n,p,x} tests.

2016-05-12 Thread Jiong Wang




On 11/05/16 14:23, Christophe Lyon wrote:

2016-05-02  Christophe Lyon  

* gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c: New.
* gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc: New.
* gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnda.c: New.
* gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndm.c: New.
* gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndn.c: New.
* gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndp.c: New.
* gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndx.c: New.

Change-Id: Iab5f98dc4b15f9a2f61b622a9f62b207872f1737

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c
new file mode 100644
index 000..5f492d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-add-options arm_v8_neon } */
+
+#include 
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc180, 0xc170 };
+VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc180, 0xc170,
+  0xc160, 0xc150 };
+
+#define INSN vrnd
+#define TEST_MSG "VRND"
+
+#include "vrndX.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc
new file mode 100644
index 000..629240d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc
@@ -0,0 +1,43 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1 (NAME)
+
+void FNNAME (INSN) (void)
+{
+  /* vector_res = vrndX (vector), then store the result.  */
+#define TEST_VRND2(INSN, Q, T1, T2, W, N)  \
+  VECT_VAR (vector_res, T1, W, N) =\
+INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N));   \
+vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N),\
+  VECT_VAR (vector_res, T1, W, N))
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VRND1(INSN, Q, T1, T2, W, N)  \
+  TEST_VRND2 (INSN, Q, T1, T2, W, N)
+
+#define TEST_VRND(Q, T1, T2, W, N) \
+  TEST_VRND1 (INSN, Q, T1, T2, W, N)
+
+  DECL_VARIABLE (vector, float, 32, 2);
+  DECL_VARIABLE (vector, float, 32, 4);
+
+  DECL_VARIABLE (vector_res, float, 32, 2);
+  DECL_VARIABLE (vector_res, float, 32, 4);
+
+  clean_results ();
+
+  VLOAD (vector, buffer, , float, f, 32, 2);
+  VLOAD (vector, buffer, q, float, f, 32, 4);
+
+  TEST_VRND ( , float, f, 32, 2);
+  TEST_VRND (q, float, f, 32, 4);
+
+  CHECK_FP (TEST_MSG, float, 32, 2, PRIx32, expected, "");
+  CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected, "");
+}
+
+int
+main (void)
+{
+  FNNAME (INSN) ();
+  return 0;
+}



Hi Christophe,

  I have a question on how test inputs are selected?

  For example vrndm is round to integral, towards minus infinity while 
vrnda is to nearest with ties to even, has these differences been tested?


  Thanks.

Regards,
Jiong

Re: Allow embedded timestamps by C/C++ macros to be set externally (3)

2016-05-12 Thread Bernd Schmidt


On 05/12/2016 02:36 AM, Dhole wrote:

+  error_at (input_location, "environment variable SOURCE_DATE_EPOCH must "
+   "expand to a non-negative integer less than or equal to %wd",
+   MAX_SOURCE_DATE_EPOCH);



+/* The value (as a unix timestamp) corresponds to date
+   "Dec 31  23:59:59 UTC", which is the latest date that __DATE__ and
+   __TIME__ can store.  */
+#define MAX_SOURCE_DATE_EPOCH 253402300799


This should use HOST_WIDE_INT_C to make sure we match %wd in the error 
output, and to make sure we don't get any too large for an integer warnings.



+  struct tm *tb = NULL;

[...]

+  snprintf (source_date_epoch, 21, "%llu", (unsigned long long) tb);


That seems like the wrong thing to print.


diff --git a/gcc/testsuite/gcc.dg/cpp/source_date_epoch-2.c 
b/gcc/testsuite/gcc.dg/cpp/source_date_epoch-2.c
new file mode 100644
index 000..4211552
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/source_date_epoch-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-set-compiler-env-var SOURCE_DATE_EPOCH "AAA" } */
+
+int
+main(void)
+{
+  __builtin_printf ("%s %s\n", __DATE__, __TIME__); /* { dg-error "environment variable 
SOURCE_DATE_EPOCH must expand to a non-negative integer less than or equal to 253402300799" 
"Invalid SOURCE_DATE_EPOCH not reported" } */


You can shorten the string you look for, like just "SOURCE_DATE_EPOCH 
must expand". People generally also skip the second arg to dg-error.



+  __builtin_printf ("%s %s\n", __DATE__, __TIME__); /* { dg-bogus "environment variable 
SOURCE_DATE_EPOCH must expand to a non-negative integer less than or equal to 253402300799" 
"Invalid SOURCE_DATE_EPOCH reported twice" }  */


I would have expected no dg- directive at all on this line. Without one, 
any message should be reported as an excess error by the framework.



@@ -874,6 +906,10 @@ if { [info procs saved-dg-test] == [list] } {
if [info exists set_target_env_var] {
unset set_target_env_var
}
+   if [info exists set_compiler_env_var] {
+   restore-compiler-env-var
+   unset set_compiler_env_var
+   }


Shouldn't we also clear saved_compiler_env_var to keep that from growing?


@@ -389,9 +390,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned 
char *cpp_flags,
   enum cpp_ttype type;
   unsigned char add_flags = 0;
   enum overflow_type overflow = OT_NONE;
-  time_t source_date_epoch = get_source_date_epoch ();

-  cpp_init_source_date_epoch (parse_in, source_date_epoch);
+  cpp_init_source_date_epoch (parse_in);

   timevar_push (TV_CPP);
  retry:


I just spotted this - why is this initialization here and not in say 
init_c_lex? Or skip the call into libcpp and just put it in 
cpp_create_reader.



diff --git a/libcpp/macro.c b/libcpp/macro.c
index c2a8376..55e53bf 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -358,9 +358,13 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode 
*node,
  struct tm *tb = NULL;

  /* Set a reproducible timestamp for __DATE__ and __TIME__ macro
-usage if SOURCE_DATE_EPOCH is defined.  */
- if (pfile->source_date_epoch != (time_t) -1)
-tb = gmtime (&pfile->source_date_epoch);
+if SOURCE_DATE_EPOCH is defined.  */
+ if (pfile->source_date_epoch == (time_t) -2
+ && pfile->cb.get_source_date_epoch != NULL)
+ pfile->source_date_epoch = pfile->cb.get_source_date_epoch(pfile);


Formatting.


Bernd

Re: libgomp: Make GCC 5 OpenACC offloading executables work

2016-05-12 Thread Bernd Schmidt


On 05/11/2016 06:02 PM, Thomas Schwinge wrote:

I conceptually agree to that.  (If we're serious about that, then we can
remove more code, such as the legacy libgomp entry point itself -- a
"missing symbol: [...]" is still vaguely better than a SIGSEGV.)  Yet,
what I fixed here, is just what Jakub and Nathan agreed upon in
:
"GCC 5 compiled offloaded OpenACC/PTX code will always do host fallback".
Currently such code will always result in a SIGSEGV, which the patch
fixes.  (And, given that we now have this patch, it seems "unfair" to
"wait until someone actually reports that in bugzilla".)


I'll defer to Jakub. Don't want to block an existing patch if you really 
want to apply it, I just think we should go in the other direction of 
removing this fallback support eventually.



Bernd

Re: [PATCH][ARM] PR target/70830: Avoid POP-{reglist}^ when returning from interrupt handlers

2016-05-12 Thread Kyrill Tkachov


Ping.
https://gcc.gnu.org/ml/gcc-patches/2016-05/msg00395.html

Thanks,
Kyrill

On 05/05/16 12:50, Kyrill Tkachov wrote:

Hi all,

In this PR we deal with some fallout from the conversion to unified assembly.
We now end up emitting instructions like:
  pop {r0,r1,r2,r3,pc}^
which is not legal. We have to use an LDM form.

There are bugs in two arm.c functions: output_return_instruction and 
arm_output_multireg_pop.

In output_return_instruction the buggy hunk from the conversion was:
  else
-   if (TARGET_UNIFIED_ASM)
  sprintf (instr, "pop%s\t{", conditional);
-   else
- sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);

The code was already very obscurely structured and arguably the bug was latent.
It emitted POP only when TARGET_UNIFIED_ASM was on, and since 
TARGET_UNIFIED_ASM was on
only for Thumb, we never went down this path interrupt handling code, since the 
interrupt
attribute is only available for ARM code. After the removal of 
TARGET_UNIFIED_ASM we ended up
using POP unconditionally. So this patch adds a check for IS_INTERRUPT and 
outputs the
appropriate LDM form.

In arm_output_multireg_pop the buggy hunk was:
-  if ((regno_base == SP_REGNUM) && TARGET_THUMB)
+  if ((regno_base == SP_REGNUM) && update)
 {
-  /* Output pop (not stmfd) because it has a shorter encoding.  */
-  gcc_assert (update);
   sprintf (pattern, "pop%s\t{", conditional);
 }

Again, the POP was guarded on TARGET_THUMB and so would never be taken on 
interrupt handling
routines. This patch guards that with the appropriate check on interrupt return.

Also, there are a couple of bugs in the 'else' branch of that 'if':
* The "ldmfd%s" was output without a '\t' at the end which meant that the base 
register
name would be concatenated with the 'ldmfd', creating invalid assembly.

* The logic:

  if (regno_base == SP_REGNUM)
  /* update is never true here, hence there is no need to handle
 pop here.  */
sprintf (pattern, "ldmfd%s", conditional);

  if (update)
sprintf (pattern, "ldmia%s\t", conditional);
  else
sprintf (pattern, "ldm%s\t", conditional);

Meant that for "regno == SP_REGNUM && !update" we'd end up printing 
"ldmfd%sldm%s\t"
to pattern. I didn't manage to reproduce that condition though, so maybe it 
can't ever occur.
This patch fixes both these issues nevertheless.

I've added the testcase from the PR to catch the fix in 
output_return_instruction.
The testcase doesn't catch the bugs in arm_output_multireg_pop, but the 
existing tests
gcc.target/arm/interrupt-1.c and gcc.target/arm/interrupt-2.c would have caught 
them
if only they were assemble tests rather than just compile. So this patch makes 
them
assembly tests (and reverts the scan-assembler checks for the correct LDM 
pattern).

Bootstrapped and tested on arm-none-linux-gnueabihf.
Ok for trunk and GCC 6?

Thanks,
Kyrill

2016-05-05  Kyrylo Tkachov  

PR target/70830
* config/arm/arm.c (arm_output_multireg_pop): Avoid POP instruction
when popping the PC and within an interrupt handler routine.
Add missing tab to output of "ldmfd".
(output_return_instruction): Output LDMFD with SP update rather
than POP when returning from interrupt handler.

2016-05-05  Kyrylo Tkachov  

PR target/70830
* gcc.target/arm/interrupt-1.c: Change dg-compile to dg-assemble.
Add -save-temps to dg-options.
Scan for ldmfd rather than pop instruction.
* gcc.target/arm/interrupt-2.c: Likewise.
* gcc.target/arm/pr70830.c: New test.

Re: [PATCH][ARM] PR target/70830: Avoid POP-{reglist}^ when returning from interrupt handlers

2016-05-12 Thread Ramana Radhakrishnan

On Thu, May 5, 2016 at 12:50 PM, Kyrill Tkachov
 wrote:
> Hi all,
>
> In this PR we deal with some fallout from the conversion to unified
> assembly.
> We now end up emitting instructions like:
>   pop {r0,r1,r2,r3,pc}^
> which is not legal. We have to use an LDM form.
>
> There are bugs in two arm.c functions: output_return_instruction and
> arm_output_multireg_pop.
>
> In output_return_instruction the buggy hunk from the conversion was:
>   else
> -   if (TARGET_UNIFIED_ASM)
>   sprintf (instr, "pop%s\t{", conditional);
> -   else
> - sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
>
> The code was already very obscurely structured and arguably the bug was
> latent.
> It emitted POP only when TARGET_UNIFIED_ASM was on, and since
> TARGET_UNIFIED_ASM was on
> only for Thumb, we never went down this path interrupt handling code, since
> the interrupt
> attribute is only available for ARM code. After the removal of
> TARGET_UNIFIED_ASM we ended up
> using POP unconditionally. So this patch adds a check for IS_INTERRUPT and
> outputs the
> appropriate LDM form.
>
> In arm_output_multireg_pop the buggy hunk was:
> -  if ((regno_base == SP_REGNUM) && TARGET_THUMB)
> +  if ((regno_base == SP_REGNUM) && update)
>  {
> -  /* Output pop (not stmfd) because it has a shorter encoding.  */
> -  gcc_assert (update);
>sprintf (pattern, "pop%s\t{", conditional);
>  }
>
> Again, the POP was guarded on TARGET_THUMB and so would never be taken on
> interrupt handling
> routines. This patch guards that with the appropriate check on interrupt
> return.
>
> Also, there are a couple of bugs in the 'else' branch of that 'if':
> * The "ldmfd%s" was output without a '\t' at the end which meant that the
> base register
> name would be concatenated with the 'ldmfd', creating invalid assembly.
>
> * The logic:
>
>   if (regno_base == SP_REGNUM)
>   /* update is never true here, hence there is no need to handle
>  pop here.  */
> sprintf (pattern, "ldmfd%s", conditional);
>
>   if (update)
> sprintf (pattern, "ldmia%s\t", conditional);
>   else
> sprintf (pattern, "ldm%s\t", conditional);
>
> Meant that for "regno == SP_REGNUM && !update" we'd end up printing
> "ldmfd%sldm%s\t"
> to pattern. I didn't manage to reproduce that condition though, so maybe it
> can't ever occur.
> This patch fixes both these issues nevertheless.
>
> I've added the testcase from the PR to catch the fix in
> output_return_instruction.
> The testcase doesn't catch the bugs in arm_output_multireg_pop, but the
> existing tests
> gcc.target/arm/interrupt-1.c and gcc.target/arm/interrupt-2.c would have
> caught them
> if only they were assemble tests rather than just compile. So this patch
> makes them
> assembly tests (and reverts the scan-assembler checks for the correct LDM
> pattern).
>
> Bootstrapped and tested on arm-none-linux-gnueabihf.
> Ok for trunk and GCC 6?
>
> Thanks,
> Kyrill
>
> 2016-05-05  Kyrylo Tkachov  
>
> PR target/70830
> * config/arm/arm.c (arm_output_multireg_pop): Avoid POP instruction
> when popping the PC and within an interrupt handler routine.
> Add missing tab to output of "ldmfd".
> (output_return_instruction): Output LDMFD with SP update rather
> than POP when returning from interrupt handler.
>
> 2016-05-05  Kyrylo Tkachov  
>
> PR target/70830
> * gcc.target/arm/interrupt-1.c: Change dg-compile to dg-assemble.
> Add -save-temps to dg-options.
> Scan for ldmfd rather than pop instruction.
> * gcc.target/arm/interrupt-2.c: Likewise.
> * gcc.target/arm/pr70830.c: New test.


OK for affected branches and trunk  - thanks for fixing this and sorry
about the breakage.

Ramana

Re: [PATCH, libgomp] Rewire OpenACC async

2016-05-12 Thread Jakub Jelinek

On Tue, Mar 29, 2016 at 05:48:25PM +0800, Chung-Lin Tang wrote:
> I've updated this patch for trunk (as attached), and re-tested without
> regressions. This patch is still a fix for 
> libgomp.oacc-c-c++-common/asyncwait-1.c,
> which FAILs right now.
> 
> ChangeLog is still as before. Is this okay for trunk?

Mostly ok for trunk, but as it is an ABI incompatible change for the plugin
interface (affecting OpenACC capable plugins only), I think you just should
rename the plugin callback you add the argument to, so that
  || !DLSYM_OPT (openacc.register_async_cleanup,
 openacc_register_async_cleanup)
would fail when trying to load GCC 6.x nvptx plugin from GCC 7.x libgomp
or vice versa.

Jakub

Re: [PATCH, libgomp] Fix deadlock in acc_set_device_type

2016-05-12 Thread Jakub Jelinek

On Mon, Mar 28, 2016 at 05:45:42PM +0800, Chung-Lin Tang wrote:
> Hi Jakub, there's a path for deadlock on acc_device_lock when going
> through the acc_set_device_type() OpenACC library function.
> Basically, the gomp_init_targets_once() function should not be
> called with that held. The attached patch moves it appropriately.
> 
> Also in this patch, there are several cases in acc_* functions
> where gomp_init_targets_once() is guarded by a test of
> !cached_base_dev. Since that function already uses pthread_once() to
> call gomp_target_init(), and technically cached_base_dev
> is protected by acc_device_lock, the cleanest way should be to
> simply drop those "if(!cached_base_dev)" tests.
> 
> Tested libgomp without regressions on an nvptx offloaded system,
> is this okay for trunk?

Ok, with ChangeLog nits:
> 
> 2016-03-28  Chung-Lin Tang  
> 
> * oacc-init.c (acc_init): Remove !cached_base_dev condition on call to
> gomp_init_targets_once().
> (acc_set_device_type): Remove !cached_base_dev condition on call to
> gomp_init_targets_once(), move call to before acc_device_lock acquire,
> to avoid deadlock.
> (acc_get_device_num): Remove !cached_base_dev condition on call to
> gomp_init_targets_once().
> (acc_set_device_num): Likewise.

Please just use gomp_init_targets_once instead of gomp_init_targets_once()
in the ChangeLog.

> Index: oacc-init.c
> ===
> --- oacc-init.c   (revision 234502)
> +++ oacc-init.c   (working copy)
> @@ -433,8 +433,7 @@ goacc_attach_host_thread_to_device (int ord)
>  void
>  acc_init (acc_device_t d)
>  {
> -  if (!cached_base_dev)
> -gomp_init_targets_once ();
> +  gomp_init_targets_once ();
>  
>gomp_mutex_lock (&acc_device_lock);
>  
> @@ -498,11 +497,10 @@ acc_set_device_type (acc_device_t d)
>struct gomp_device_descr *base_dev, *acc_dev;
>struct goacc_thread *thr = goacc_thread ();
>  
> +  gomp_init_targets_once ();
> +
>gomp_mutex_lock (&acc_device_lock);
>  
> -  if (!cached_base_dev)
> -gomp_init_targets_once ();
> -
>cached_base_dev = base_dev = resolve_device (d, true);
>acc_dev = &base_dev[goacc_device_num];
>  
> @@ -563,8 +561,7 @@ acc_get_device_num (acc_device_t d)
>if (d >= _ACC_device_hwm)
>  gomp_fatal ("unknown device type %u", (unsigned) d);
>  
> -  if (!cached_base_dev)
> -gomp_init_targets_once ();
> +  gomp_init_targets_once ();
>  
>gomp_mutex_lock (&acc_device_lock);
>dev = resolve_device (d, true);
> @@ -584,8 +581,7 @@ acc_set_device_num (int ord, acc_device_t d)
>struct gomp_device_descr *base_dev, *acc_dev;
>int num_devices;
>  
> -  if (!cached_base_dev)
> -gomp_init_targets_once ();
> +  gomp_init_targets_once ();
>  
>if (ord < 0)
>  ord = goacc_device_num;


Jakub

[PATCH, PR tree-optimization/71006] Fix vectype computation for COND_EXPR

2016-05-12 Thread Ilya Enkovich

Hi,

Currently we have a code in vect_determine_vectorization_factor to
compute vectype for mask producers.  It wasn't meant to be used for
EXPR_COND assignments but it is used now in some cases causing wrong
resulting vectype.

Bootstrapped and regtested for x86_64-pc-linux-gnu.  OK for trunk?

Thanks,
Ilya
--
gcc/

2016-05-12  Ilya Enkovich  

PR tree-optimization/71006
* tree-vect-loop.c (vect_determine_vectorization_factor): Don't
consider COND_EXPR as a mask producer.

gcc/testsuite/

2016-05-12  Ilya Enkovich  

PR tree-optimization/71006
* gcc.dg/pr71006.c: New test.


diff --git a/gcc/testsuite/gcc.dg/pr71006.c b/gcc/testsuite/gcc.dg/pr71006.c
new file mode 100644
index 000..2b45aa0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr71006.c
@@ -0,0 +1,16 @@
+/* PR target/71006 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -ftree-vectorize" } */
+
+unsigned char uu, gu, e2;
+
+void
+fs (void)
+{
+  char *nq = (char *)&gu, *k4 = (char *)&gu;
+  while (*k4 < 1)
+{
+  uu += (*nq != 0 || e2 != 0);
+  ++*k4;
+}
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index da98211..d673c67 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -437,7 +437,9 @@ vect_determine_vectorization_factor (loop_vec_info 
loop_vinfo)
  /* Bool ops don't participate in vectorization factor
 computation.  For comparison use compared types to
 compute a factor.  */
- if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
+ if (TREE_CODE (scalar_type) == BOOLEAN_TYPE
+ && is_gimple_assign (stmt)
+ && gimple_assign_rhs_code (stmt) != COND_EXPR)
{
  if (STMT_VINFO_RELEVANT_P (stmt_info))
mask_producers.safe_push (stmt_info);

Re: [PATCH 1/4, libgomp] Resolve deadlock on plugin exit (Ping x2)

2016-05-12 Thread Jakub Jelinek

On Wed, May 11, 2016 at 02:49:03PM +0800, Chung-Lin Tang wrote:
> Ping x2

These are again plugin ABI incompatible changes, so you need to arrange
for the 6.1 plugins to be rejected by trunk libgomp with the changes
and vice versa.

Jakub

Re: [PATCH] Fix crash with --help=^ (PR driver/71063)

2016-05-12 Thread Marek Polacek

On Wed, May 11, 2016 at 10:50:55PM +0200, Jakub Jelinek wrote:
> On Wed, May 11, 2016 at 10:40:36PM +0200, Marek Polacek wrote:
> > We crashed when given --help=^ and Kyrill explained why in the PR
> > ().  The following
> > seems as good a fix as any, I think.
> > 
> > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> > 
> > 2016-05-11  Marek Polacek  
> > 
> > PR driver/71063
> > * opts.c (common_handle_option): Detect missing argument for --help^.
> > 
> > * gcc.dg/opts-7.c: New test.
> 
> Ok.  But while touching this, can you fix the formatting around too?
> space between * and a, or ++ a, or & exclude_flags?

Happy to.  I've fixed the whole OPT__help_ case:

Bootstrapped/regtested on x86_64-linux, applying to trunk.

2016-05-12  Marek Polacek  

PR driver/71063
* opts.c (common_handle_option): Detect missing argument for --help^.

* gcc.dg/opts-7.c: New test.

diff --git gcc/opts.c gcc/opts.c
index 0f9431a..63d41ca 100644
--- gcc/opts.c
+++ gcc/opts.c
@@ -1595,7 +1595,7 @@ common_handle_option (struct gcc_options *opts,
 
 case OPT__help_:
   {
-   const char * a = arg;
+   const char *a = arg;
unsigned int include_flags = 0;
/* Note - by default we include undocumented options when listing
   specific classes.  If you only want to see documented options
@@ -1612,11 +1612,11 @@ common_handle_option (struct gcc_options *opts,
   arg = [^]{word}[,{arg}]
   word = {optimizers|target|warnings|undocumented|
   params|common|}  */
-   while (* a != 0)
+   while (*a != 0)
  {
static const struct
{
- const char * string;
+ const char *string;
  unsigned int flag;
}
specifics[] =
@@ -1631,19 +1631,24 @@ common_handle_option (struct gcc_options *opts,
  { "common", CL_COMMON },
  { NULL, 0 }
};
-   unsigned int * pflags;
-   const char * comma;
+   unsigned int *pflags;
+   const char *comma;
unsigned int lang_flag, specific_flag;
unsigned int len;
unsigned int i;
 
-   if (* a == '^')
+   if (*a == '^')
  {
-   ++ a;
-   pflags = & exclude_flags;
+   ++a;
+   if (*a == '\0')
+ {
+   error_at (loc, "missing argument to %qs", "--help=^");
+   break;
+ }
+   pflags = &exclude_flags;
  }
else
- pflags = & include_flags;
+ pflags = &include_flags;
 
comma = strchr (a, ',');
if (comma == NULL)
@@ -1680,7 +1685,7 @@ common_handle_option (struct gcc_options *opts,
if (specific_flag != 0)
  {
if (lang_flag == 0)
- * pflags |= specific_flag;
+ *pflags |= specific_flag;
else
  {
/* The option's argument matches both the start of a
@@ -1689,7 +1694,7 @@ common_handle_option (struct gcc_options *opts,
   specified "--help=c", but otherwise we have to issue
   a warning.  */
if (strncasecmp (a, "c", len) == 0)
- * pflags |= lang_flag;
+ *pflags |= lang_flag;
else
  warning_at (loc, 0,
  "--help argument %q.*s is ambiguous, "
@@ -1698,7 +1703,7 @@ common_handle_option (struct gcc_options *opts,
  }
  }
else if (lang_flag != 0)
- * pflags |= lang_flag;
+ *pflags |= lang_flag;
else
  warning_at (loc, 0,
  "unrecognized argument to --help= option: %q.*s",
diff --git gcc/testsuite/gcc.dg/opts-7.c gcc/testsuite/gcc.dg/opts-7.c
index e69de29..c54d0b8 100644
--- gcc/testsuite/gcc.dg/opts-7.c
+++ gcc/testsuite/gcc.dg/opts-7.c
@@ -0,0 +1,6 @@
+/* PR driver/71063 */
+/* Test we don't ICE.  */
+/* { dg-do compile } */
+/* { dg-options "--help=^" } */
+
+/* { dg-error "missing argument to" "" { target *-*-* } 0 } */

Marek

Re: [PATCH] Better location info for "incomplete type" error msg (PR c/70756)

2016-05-12 Thread Marek Polacek

Ping.

On Thu, May 05, 2016 at 04:22:15PM +0200, Marek Polacek wrote:
> On Wed, May 04, 2016 at 11:52:39AM -0400, Jason Merrill wrote:
> > On Wed, May 4, 2016 at 9:00 AM, Marek Polacek  wrote:
> > > On Tue, May 03, 2016 at 08:05:47PM -0400, Jason Merrill wrote:
> > >> Looks good.
> > >>
> > >> But I don't see a C++ testcase; can the test go into c-c++-common?
> > >
> > > Sadly, no.  As of now, the patch doesn't improve things for C++ (?).  
> > > Seems
> > > we'd need to pass better locations down to pointer_int_sum / 
> > > size_in_bytes.
> > > It cascades :(.
> > 
> > Sure.  But can you fix that, too, while you're thinking about it?
> > Passing the location to cp_pointer_int_sum and pointer_diff seems
> > pretty simple.
> 
> That's true, that was pretty simple, actually.  And while at it, I also
> added a location parameter to cp_build_modify_expr.  With that, we generate
> better diagnostics even for C++, so I could move the test to c-c++-common.
> And I also added another test, this time with -Wpointer-arith diagnostics,
> which this patch improves as well.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2016-05-05  Marek Polacek  
> 
>   PR c/70756
>   * c-common.c (pointer_int_sum): Call size_in_bytes_loc instead of
>   size_in_bytes and pass LOC to it.
> 
>   * c-decl.c (build_compound_literal): Pass LOC down to
>   c_incomplete_type_error.
>   * c-tree.h (require_complete_type): Adjust declaration.
>   (c_incomplete_type_error): Likewise.
>   * c-typeck.c (require_complete_type): Add location parameter, pass it
>   down to c_incomplete_type_error.
>   (c_incomplete_type_error): Add location parameter, pass it down to
>   error_at.
>   (build_component_ref): Pass location down to c_incomplete_type_error.
>   (default_conversion): Pass location down to require_complete_type.
>   (build_array_ref): Likewise.
>   (build_function_call_vec): Likewise.
>   (convert_arguments): Likewise.
>   (build_unary_op): Likewise.
>   (build_c_cast): Likewise.
>   (build_modify_expr): Likewise.
>   (convert_for_assignment): Likewise.
>   (c_finish_omp_clauses): Likewise.
> 
>   * call.c (build_new_op_1): Pass LOC to cp_build_modify_expr.
>   * cp-tree.h (cp_build_modify_expr): Update declaration.
>   (cxx_incomplete_type_error, cxx_incomplete_type_diagnostic): New inline
>   overloads.
>   * cp-ubsan.c (cp_ubsan_dfs_initialize_vtbl_ptrs): Pass INPUT_LOCATION to
>   cp_build_modify_expr.
>   * decl2.c (set_guard): Likewise.
>   (handle_tls_init): Likewise.
>   * init.c (perform_member_init): Likewise.
>   (expand_virtual_init): Likewise.
>   (build_new_1): Likewise.
>   (build_vec_delete_1): Likewise.
>   (get_temp_regvar): Likewise.
>   (build_vec_init): Likewise.
>   * method.c (do_build_copy_assign): Likewise.
>   (assignable_expr): Likewise.
>   * semantics.c (finish_omp_for): Likewise.
>   * typeck.c (cp_build_binary_op): Pass LOCATION to pointer_diff and
>   cp_pointer_int_sum.
>   (cp_pointer_int_sum): Add location parameter.  Pass it down to
>   pointer_int_sum.
>   (pointer_diff): Add location parameter.  Use it.
>   (build_modify_expr): Pass location down to cp_build_modify_expr.
>   (cp_build_modify_expr): Add location parameter.  Use it.
>   (build_x_modify_expr): Pass location down to cp_build_modify_expr.
>   * typeck2.c (cxx_incomplete_type_diagnostic,
>   cxx_incomplete_type_error): Add location parameter.
> 
>   * langhooks-def.h (lhd_incomplete_type_error): Adjust declaration.
>   * langhooks.c (lhd_incomplete_type_error): Add location parameter.
>   * langhooks.h (incomplete_type_error): Likewise.
>   * tree.c (size_in_bytes_loc): Renamed from size_in_bytes.  Add location
>   parameter, pass it down to incomplete_type_error.
>   * tree.h (size_in_bytes): New inline overload.
>   (size_in_bytes_loc): Renamed from size_in_bytes.
> 
>   * c-c++-common/pr70756-2.c: New test.
>   * c-c++-common/pr70756.c: New test.
> 
> diff --git gcc/c-family/c-common.c gcc/c-family/c-common.c
> index 63a18c8..150bdb2 100644
> --- gcc/c-family/c-common.c
> +++ gcc/c-family/c-common.c
> @@ -4269,7 +4269,7 @@ pointer_int_sum (location_t loc, enum tree_code 
> resultcode,
>size_exp = integer_one_node;
>  }
>else
> -size_exp = size_in_bytes (TREE_TYPE (result_type));
> +size_exp = size_in_bytes_loc (loc, TREE_TYPE (result_type));
>  
>/* We are manipulating pointer values, so we don't need to warn
>   about relying on undefined signed overflow.  We disable the
> diff --git gcc/c/c-decl.c gcc/c/c-decl.c
> index 7094efc..48fa65c 100644
> --- gcc/c/c-decl.c
> +++ gcc/c/c-decl.c
> @@ -5112,7 +5112,7 @@ build_compound_literal (location_t loc, tree type, tree 
> init, bool non_const)
>  
>if (type == error_mark_node || !COMPLETE_TYPE_P (type))
>  {
> -

Re: [PATCH, PR tree-optimization/71006] Fix vectype computation for COND_EXPR

2016-05-12 Thread Richard Biener

On Thu, May 12, 2016 at 12:19 PM, Ilya Enkovich  wrote:
> Hi,
>
> Currently we have a code in vect_determine_vectorization_factor to
> compute vectype for mask producers.  It wasn't meant to be used for
> EXPR_COND assignments but it is used now in some cases causing wrong
> resulting vectype.
>
> Bootstrapped and regtested for x86_64-pc-linux-gnu.  OK for trunk?

Ok.

Thanks,
Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2016-05-12  Ilya Enkovich  
>
> PR tree-optimization/71006
> * tree-vect-loop.c (vect_determine_vectorization_factor): Don't
> consider COND_EXPR as a mask producer.
>
> gcc/testsuite/
>
> 2016-05-12  Ilya Enkovich  
>
> PR tree-optimization/71006
> * gcc.dg/pr71006.c: New test.
>
>
> diff --git a/gcc/testsuite/gcc.dg/pr71006.c b/gcc/testsuite/gcc.dg/pr71006.c
> new file mode 100644
> index 000..2b45aa0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr71006.c
> @@ -0,0 +1,16 @@
> +/* PR target/71006 */
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -ftree-vectorize" } */
> +
> +unsigned char uu, gu, e2;
> +
> +void
> +fs (void)
> +{
> +  char *nq = (char *)&gu, *k4 = (char *)&gu;
> +  while (*k4 < 1)
> +{
> +  uu += (*nq != 0 || e2 != 0);
> +  ++*k4;
> +}
> +}
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index da98211..d673c67 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -437,7 +437,9 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>   /* Bool ops don't participate in vectorization factor
>  computation.  For comparison use compared types to
>  compute a factor.  */
> - if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> + if (TREE_CODE (scalar_type) == BOOLEAN_TYPE
> + && is_gimple_assign (stmt)
> + && gimple_assign_rhs_code (stmt) != COND_EXPR)
> {
>   if (STMT_VINFO_RELEVANT_P (stmt_info))
> mask_producers.safe_push (stmt_info);

Re: [PATCH, RFC] Introduce -fsanitize=use-after-scope

2016-05-12 Thread Jakub Jelinek

On Wed, May 11, 2016 at 02:54:01PM +0200, Martin Liška wrote:
> On 05/06/2016 02:22 PM, Jakub Jelinek wrote:
> > On Fri, May 06, 2016 at 01:04:30PM +0200, Martin Liška wrote:
> >> I've started working on the patch couple of month go, basically after
> >> a brief discussion with Jakub on IRC.
> >>
> >> I'm sending the initial version which can successfully run instrumented
> >> tramp3d, postgresql server and Inkscape. It catches the basic set of
> >> examples which are added in following patch.
> >>
> >> The implementation is quite straightforward as works in following steps:
> >>
> >> 1) Every local variable stack slot is poisoned at the very beginning of a 
> >> function (RTL emission)
> >> 2) In gimplifier, once we spot a DECL_EXPR, a variable is unpoisoned (by 
> >> emitting ASAN_MARK builtin)
> >> and the variable is marked as addressable
> > 
> > Not all vars have DECL_EXPRs though.

Just random comments from quick skim, need to find enough spare time to
actually try it and see how it works.

> Yeah, I've spotted one interesting example which is part of LLVM's testsuite:
> 
> struct IntHolder {
>   int val;
> };
> 
> const IntHolder *saved;
> 
> void save(const IntHolder &holder) {
>   saved = &holder;
> }
> 
> int main(int argc, char *argv[]) {
>   save({10});
>   int x = saved->val;  // BOOM
>   return x;
> }
> 
> It would be also good to handle such temporaries. Any suggestions how to 
> handle that in gimplifier?

Dunno, guess you need to do something in the FE for it already (talk to
Jason?).  At least in *.original dump there is already:
  <) >;
int x = (int) saved->val;
  return  = x;
and the info on where the D.2263 temporary goes out of scope is lost.

> Apart from that, second version of the patch changes:
> + fixed issues with missing stack unpoisoning; currently, I mark all 
> VAR_DECLs that
> are in ASAN_MARK internal fns and stack prologue/epilogue is emitted just for 
> these vars
> + removed unneeded hunks (tree-vect-patterns.c and asan_poisoning.cc)
> + LABEL unpoisoning code makes stable sort for variables that were already 
> used in the context
> + stack poisoning hasn't worked for -O1+ due to following guard in asan.c
>  /* Automatic vars in the current function will be always accessible.  */
> + direct shadow memory poisoning/unpoisoning code is introduced - in both 
> scenarios (RTL and GIMPLE),
> I would appreciate feedback if storing multiple bytes is fine? What is the 
> maximum memory wide
> store mode supported by a target? How can I get such information?
> + the maximum object size handled by a direct emission is guarded by 
> use-after-scope-direct-emission-threshold
> parameter; initial value (256B) should maximally emit store of 32B

Would be better if user visible param was in bytes rather than bits IMHO.

> Yeah, depends because of:
> 
> static inline bool
> asan_sanitize_use_after_scope (void)
> {
>   return ((flag_sanitize & SANITIZE_ADDRESS_USE_AFTER_SCOPE)
> == SANITIZE_ADDRESS_USE_AFTER_SCOPE
> && flag_stack_reuse == SR_NONE
> && ASAN_STACK);
> }
> 
> Where ASAN_STACK comes from params.h.

I'd prefer just prototype the function in the header and define in asan.c
or some other source file.  Or maybe split it, do the important case
(flag_sanitize check) inline and call out of line function for the rest.
Why do you check flag_stack_reuse?  I thought you'd arrange for it to be
different when -fsanitize=use-after-scope?

> @@ -243,6 +243,11 @@ static unsigned HOST_WIDE_INT asan_shadow_offset_value;
>  static bool asan_shadow_offset_computed;
>  static vec sanitized_sections;
>  
> +/* Set of variable declarations that are going to be guarded by
> +   use-after-scope sanitizer.  */
> +
> +static hash_set  asan_handled_variables(13);

Not sure about the formatting here, don't we use xxx instead of xxx 
?  And I'd expect space before (.
> @@ -1020,6 +1020,91 @@ asan_function_start (void)
>current_function_funcdef_no);
>  }
>  
> +/* Return number of shadow bytes that are occupied by a local variable
> +   of SIZE bytes.  */
> +
> +static unsigned HOST_WIDE_INT
> +get_shadow_memory_size (unsigned HOST_WIDE_INT size)
> +{
> +  /* Round up size of object.  */
> +  unsigned HOST_WIDE_INT r;
> +  if ((r = size % BITS_PER_UNIT) != 0)
> +size += BITS_PER_UNIT - r;

Isn't there a ROUND_UP macro?

Jakub

Re: [PATCH, libgomp] Rewire OpenACC async

2016-05-12 Thread Thomas Schwinge

Hi!

On Thu, 12 May 2016 12:02:58 +0200, Jakub Jelinek  wrote:
> ABI incompatible change for the plugin
> interface (affecting OpenACC capable plugins only), I think you just should
> rename the plugin callback you add the argument to, so that
>   || !DLSYM_OPT (openacc.register_async_cleanup,
>  openacc_register_async_cleanup)
> would fail when trying to load GCC 6.x nvptx plugin from GCC 7.x libgomp
> or vice versa.

Hmm, as far as I remember, we had previously agreed that libgomp plugin
ABI changes are not of any concern, given that libgomp and its plugins
will always be built from the same sources, at the same time, and so
their ABIs will always correspond?  Discussed before in
,
for example.

Grüße
 Thomas

Re: [PATCH] Fix crash with --help=^ (PR driver/71063)

2016-05-12 Thread Jakub Jelinek

On Thu, May 12, 2016 at 12:35:58PM +0200, Marek Polacek wrote:
> 2016-05-12  Marek Polacek  
> 
>   PR driver/71063
>   * opts.c (common_handle_option): Detect missing argument for --help^.
> 
>   * gcc.dg/opts-7.c: New test.

Thanks.

Jakub

Re: [PATCH, libgomp] Rewire OpenACC async

2016-05-12 Thread Jakub Jelinek

On Thu, May 12, 2016 at 12:47:18PM +0200, Thomas Schwinge wrote:
> Hi!
> 
> On Thu, 12 May 2016 12:02:58 +0200, Jakub Jelinek  wrote:
> > ABI incompatible change for the plugin
> > interface (affecting OpenACC capable plugins only), I think you just should
> > rename the plugin callback you add the argument to, so that
> >   || !DLSYM_OPT (openacc.register_async_cleanup,
> >  openacc_register_async_cleanup)
> > would fail when trying to load GCC 6.x nvptx plugin from GCC 7.x libgomp
> > or vice versa.
> 
> Hmm, as far as I remember, we had previously agreed that libgomp plugin
> ABI changes are not of any concern, given that libgomp and its plugins
> will always be built from the same sources, at the same time, and so
> their ABIs will always correspond?  Discussed before in
> ,
> for example.

I thought the agreement was that it is ok not to support mixing of
different libgomp and plugin versions, but we should make sure that we
refuse to load the plugin in case of mismatch, instead of silently crashing.
Of course, changes in unreleased compiler versions are fine.

So, I'm not asking for compatibility in that 6.x nvptx plugin should still
work with 7.x libgomp and vice versa, but that it would be ignored or
diagnosed if somebody mixes it.

Jakub

[PATCH] Fix PR71062

2016-05-12 Thread Richard Biener


It was noted that we now simplify pointer equality tests against
restrict qualified pointers which we can't do according to
reading of the fine-prints in the C standard done by Joseph.

Fixed as follows.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2016-05-12  Richard Biener  

PR tree-optimization/71062
* tree-ssa-alias.h (struct pt_solution): Add vars_contains_restrict
field.
* tree-ssa-structalias.c (set_uids_in_ptset): Set vars_contains_restrict
if the var is a restrict tag.
* tree-ssa-alias.c (ptrs_compare_unequal): If vars_contains_restrict
do not disambiguate pointers against it.
(dump_points_to_solution): Re-structure and adjust for new
vars_contains_restrict flag.
* gimple-pretty-print.c (pp_points_to_solution): Likewise.

* gcc.dg/torture/pr71062.c: New testcase.

Index: gcc/tree-ssa-alias.h
===
*** gcc/tree-ssa-alias.h(revision 236159)
--- gcc/tree-ssa-alias.h(working copy)
*** struct GTY(()) pt_solution
*** 47,53 
   includes memory at address NULL.  */
unsigned int null : 1;
  
- 
/* Nonzero if the vars bitmap includes a variable included in 'nonlocal'.  
*/
unsigned int vars_contains_nonlocal : 1;
/* Nonzero if the vars bitmap includes a variable included in 'escaped'.  */
--- 47,52 
*** struct GTY(()) pt_solution
*** 55,60 
--- 54,62 
/* Nonzero if the vars bitmap includes a anonymous heap variable that
   escaped the function and thus became global.  */
unsigned int vars_contains_escaped_heap : 1;
+   /* Nonzero if the vars bitmap includes a anonymous variable used to
+  represent storage pointed to by a restrict qualified pointer.  */
+   unsigned int vars_contains_restrict : 1;
  
/* Set of variables that this pointer may point to.  */
bitmap vars;
Index: gcc/tree-ssa-alias.c
===
*** gcc/tree-ssa-alias.c(revision 236159)
--- gcc/tree-ssa-alias.c(working copy)
*** ptrs_compare_unequal (tree ptr1, tree pt
*** 363,376 
else if (obj1 && TREE_CODE (ptr2) == SSA_NAME)
  {
struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr2);
!   if (!pi)
return false;
return !pt_solution_includes (&pi->pt, obj1);
  }
else if (TREE_CODE (ptr1) == SSA_NAME && obj2)
  {
struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr1);
!   if (!pi)
return false;
return !pt_solution_includes (&pi->pt, obj2);
  }
--- 363,379 
else if (obj1 && TREE_CODE (ptr2) == SSA_NAME)
  {
struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr2);
!   /* We may not use restrict to optimize pointer comparisons.
!  See PR71062.  So we have to assume that restrict-pointed-to
!may be in fact obj1.  */
!   if (!pi || pi->pt.vars_contains_restrict)
return false;
return !pt_solution_includes (&pi->pt, obj1);
  }
else if (TREE_CODE (ptr1) == SSA_NAME && obj2)
  {
struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr1);
!   if (!pi || pi->pt.vars_contains_restrict)
return false;
return !pt_solution_includes (&pi->pt, obj2);
  }
*** dump_points_to_solution (FILE *file, str
*** 521,537 
fprintf (file, ", points-to vars: ");
dump_decl_set (file, pt->vars);
if (pt->vars_contains_nonlocal
! && pt->vars_contains_escaped_heap)
!   fprintf (file, " (nonlocal, escaped heap)");
!   else if (pt->vars_contains_nonlocal
!  && pt->vars_contains_escaped)
!   fprintf (file, " (nonlocal, escaped)");
!   else if (pt->vars_contains_nonlocal)
!   fprintf (file, " (nonlocal)");
!   else if (pt->vars_contains_escaped_heap)
!   fprintf (file, " (escaped heap)");
!   else if (pt->vars_contains_escaped)
!   fprintf (file, " (escaped)");
  }
  }
  
--- 524,554 
fprintf (file, ", points-to vars: ");
dump_decl_set (file, pt->vars);
if (pt->vars_contains_nonlocal
! || pt->vars_contains_escaped
! || pt->vars_contains_escaped_heap
! || pt->vars_contains_restrict)
!   {
! const char *comma = "";
! fprintf (file, " (");
! if (pt->vars_contains_nonlocal)
!   {
! fprintf (file, "nonlocal");
! comma = ", ";
!   }
! if (pt->vars_contains_escaped)
!   {
! fprintf (file, "%sescaped", comma);
! comma = ", ";
!   }
! if (pt->vars_contains_escaped_heap)
!   {
! fprintf (file, "%sescaped heap", comma);
! comma = ", ";
!   }
! if (pt->vars_contains_restrict)
!   fprintf (file, "%srestrict", comma);
! fprintf (file, ")");
!

[PATCH] Fix PR71059

2016-05-12 Thread Richard Biener


The following works around the issue that PRE inserts expressions into
the VN hashes without having a representative for them.  First the
patch avoids inserting fully constant expressions and second it deals
with the above by assigning a representative once VN insertion needs one.

The way PRE uses the VN hashes here probably needs some (big) overhaul
but the following should get us past the failure point in a reasonable
way as well.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2016-05-11  Richard Biener  

PR tree-optimization/71059
* tree-ssa-pre.c (phi_translate_1): Fully fold translated
nary before looking up or entering the expression into the VN
hashes.
* tree-ssa-sccvn.c (vn_nary_build_or_lookup): Fix comment typo.
Make sure to re-use NARYs without result as inserted by
phi-translation.

* gcc.dg/torture/pr71059.c: New testcase.

Index: gcc/tree-ssa-pre.c
===
*** gcc/tree-ssa-pre.c  (revision 236069)
--- gcc/tree-ssa-pre.c  (working copy)
*** phi_translate_1 (pre_expr expr, bitmap_s
*** 1464,1469 
--- 1464,1475 
pre_expr constant;
unsigned int new_val_id;
  
+   PRE_EXPR_NARY (expr) = newnary;
+   constant = fully_constant_expression (expr);
+   PRE_EXPR_NARY (expr) = nary;
+   if (constant != expr)
+ return constant;
+ 
tree result = vn_nary_op_lookup_pieces (newnary->length,
newnary->opcode,
newnary->type,
*** phi_translate_1 (pre_expr expr, bitmap_s
*** 1478,1487 
if (nary)
  {
PRE_EXPR_NARY (expr) = nary;
-   constant = fully_constant_expression (expr);
-   if (constant != expr)
- return constant;
- 
new_val_id = nary->value_id;
get_or_alloc_expression_id (expr);
  }
--- 1484,1489 
*** phi_translate_1 (pre_expr expr, bitmap_s
*** 1495,1503 
 &newnary->op[0],
 result, new_val_id);
PRE_EXPR_NARY (expr) = nary;
-   constant = fully_constant_expression (expr);
-   if (constant != expr)
- return constant;
get_or_alloc_expression_id (expr);
  }
add_to_value (new_val_id, expr);
--- 1497,1502 
Index: gcc/testsuite/gcc.dg/torture/pr71059.c
===
*** gcc/testsuite/gcc.dg/torture/pr71059.c  (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr71059.c  (working copy)
***
*** 0 
--- 1,15 
+ /* { dg-do compile } */
+ 
+ short a, c;
+ union {
+ unsigned f0;
+ unsigned short f1;
+ } b;
+ volatile int d;
+ short fn1(short p1) { return p1 + a; }
+ void fn2()
+ {
+   b.f0 = 0;
+   for (;; b.f0 = fn1(b.f0))
+ (c && b.f1) || d;
+ }
Index: gcc/tree-ssa-sccvn.c
===
*** gcc/tree-ssa-sccvn.c(revision 236159)
--- gcc/tree-ssa-sccvn.c(working copy)
*** vn_nary_build_or_lookup (code_helper rco
*** 1632,1638 
  {
tree result = NULL_TREE;
/* We will be creating a value number for
!ROCDE (OPS...).
   So first simplify and lookup this expression to see if it
   is already available.  */
mprts_hook = vn_lookup_simplify_result;
--- 1632,1638 
  {
tree result = NULL_TREE;
/* We will be creating a value number for
!RCODE (OPS...).
   So first simplify and lookup this expression to see if it
   is already available.  */
mprts_hook = vn_lookup_simplify_result;
*** vn_nary_build_or_lookup (code_helper rco
*** 1682,1687 
--- 1682,1697 
gimple_seq_add_stmt_without_update (&VN_INFO (result)->expr,
  new_stmt);
VN_INFO (result)->needs_insertion = true;
+   /* ???  PRE phi-translation inserts NARYs without corresponding
+  SSA name result.  Re-use those but set their result according
+to the stmt we just built.  */
+   vn_nary_op_t nary = NULL;
+   vn_nary_op_lookup_stmt (new_stmt, &nary);
+   if (nary)
+   {
+ gcc_assert (nary->result == NULL_TREE);
+ nary->result = gimple_assign_lhs (new_stmt);
+   }
/* As all "inserted" statements are singleton SCCs, insert
 to the valid table.  This is strictly needed to
 avoid re-generating new value SSA_NAMEs for the same
*** vn_nary_build_or_lookup (code_helper rco
*** 1689,1695 
 optimistic table gets cleared after each iteration).
 We do not need to insert into t

Re: [PATCH, PR tree-optimization/71006] Fix vectype computation for COND_EXPR

2016-05-12 Thread Ilya Enkovich

2016-05-12 13:38 GMT+03:00 Richard Biener :
> On Thu, May 12, 2016 at 12:19 PM, Ilya Enkovich  
> wrote:
>> Hi,
>>
>> Currently we have a code in vect_determine_vectorization_factor to
>> compute vectype for mask producers.  It wasn't meant to be used for
>> EXPR_COND assignments but it is used now in some cases causing wrong
>> resulting vectype.
>>
>> Bootstrapped and regtested for x86_64-pc-linux-gnu.  OK for trunk?
>
> Ok.

Is it also OK for porting into gcc-6-branch after proper testing?

Thanks,
Ilya

>
> Thanks,
> Richard.
>
>> Thanks,
>> Ilya

Re: [PATCH] [ARC] Use GOTOFFPC relocation for pc-relative accesses.

2016-05-12 Thread Claudiu Zissulescu


PING

On 02/05/16 15:50, Claudiu Zissulescu wrote:

This patch makes the pc-relative access to be more safe by using @pcl
syntax. This new syntax generates a pc-relative relocation which will
be handled by assembler.

OK to apply?
Claudiu

gcc/
2016-05-02  Claudiu Zissulescu  
Joern Rennecke  

* config/arc/arc.c (arc_print_operand_address): Handle pc-relative
addresses.
(arc_needs_pcl_p): Add GOTOFFPC.
(arc_legitimate_pic_addr_p): Likewise.
(arc_output_pic_addr_const): Likewise.
(arc_legitimize_pic_address): Generate a pc-relative address using
GOTOFFPC.
(arc_output_libcall): Use @pcl syntax.
(arc_delegitimize_address_0): Delegitimize ARC_UNSPEC_GOTOFFPC.
* config/arc/arc.md ("unspec"): Add ARC_UNSPEC_GOTOFFPC.
(*movsi_insn): Use @pcl syntax.
(doloop_begin_i): Likewise.
---
  gcc/config/arc/arc.c  | 53 ---
  gcc/config/arc/arc.md |  6 --
  2 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 49edc0a..c0aa075 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -3528,7 +3528,8 @@ arc_print_operand_address (FILE *file , rtx addr)
 || XINT (c, 1) == UNSPEC_TLS_IE))
|| (GET_CODE (c) == PLUS
&& GET_CODE (XEXP (c, 0)) == UNSPEC
-   && (XINT (XEXP (c, 0), 1) == UNSPEC_TLS_OFF)))
+   && (XINT (XEXP (c, 0), 1) == UNSPEC_TLS_OFF
+   || XINT (XEXP (c, 0), 1) == ARC_UNSPEC_GOTOFFPC)))
  {
arc_output_pic_addr_const (file, c, 0);
break;
@@ -4636,6 +4637,7 @@ arc_needs_pcl_p (rtx x)
  switch (XINT (x, 1))
{
case ARC_UNSPEC_GOT:
+  case ARC_UNSPEC_GOTOFFPC:
case UNSPEC_TLS_GD:
case UNSPEC_TLS_IE:
return true;
@@ -4698,9 +4700,10 @@ arc_legitimate_pic_addr_p (rtx addr)
|| XVECLEN (addr, 0) != 1)
  return false;

-  /* Must be one of @GOT, @GOTOFF, @tlsgd, tlsie.  */
+  /* Must be one of @GOT, @GOTOFF, @GOTOFFPC, @tlsgd, tlsie.  */
if (XINT (addr, 1) != ARC_UNSPEC_GOT
&& XINT (addr, 1) != ARC_UNSPEC_GOTOFF
+  && XINT (addr, 1) != ARC_UNSPEC_GOTOFFPC
&& XINT (addr, 1) != UNSPEC_TLS_GD
&& XINT (addr, 1) != UNSPEC_TLS_IE)
  return false;
@@ -4917,26 +4920,15 @@ arc_legitimize_pic_address (rtx orig, rtx oldx)
else if (!flag_pic)
return orig;
else if (CONSTANT_POOL_ADDRESS_P (addr) || SYMBOL_REF_LOCAL_P (addr))
-   {
- /* This symbol may be referenced via a displacement from the
-PIC base address (@GOTOFF).  */
+   return gen_rtx_CONST (Pmode,
+ gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+ ARC_UNSPEC_GOTOFFPC));

- /* FIXME: if we had a way to emit pc-relative adds that
-don't create a GOT entry, we could do without the use of
-the gp register.  */
- crtl->uses_pic_offset_table = 1;
- pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF);
- pat = gen_rtx_CONST (Pmode, pat);
- pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat);
-   }
-  else
-   {
- /* This symbol must be referenced via a load from the
-Global Offset Table (@GOTPC).  */
- pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
- pat = gen_rtx_CONST (Pmode, pat);
- pat = gen_const_mem (Pmode, pat);
-   }
+  /* This symbol must be referenced via a load from the Global
+Offset Table (@GOTPC).  */
+  pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
+  pat = gen_rtx_CONST (Pmode, pat);
+  pat = gen_const_mem (Pmode, pat);

if (oldx == NULL)
oldx = gen_reg_rtx (Pmode);
@@ -4952,6 +4944,7 @@ arc_legitimize_pic_address (rtx orig, rtx oldx)
  if (GET_CODE (addr) == UNSPEC)
{
  /* Check that the unspec is one of the ones we generate?  */
+ return orig;
}
  else
gcc_assert (GET_CODE (addr) == PLUS);
@@ -5105,6 +5098,9 @@ arc_output_pic_addr_const (FILE * file, rtx x, int code)
case ARC_UNSPEC_GOTOFF:
  suffix = "@gotoff";
  break;
+   case ARC_UNSPEC_GOTOFFPC:
+ suffix = "@pcl",   pcrel = true;
+ break;
case ARC_UNSPEC_PLT:
  suffix = "@plt";
  break;
@@ -5389,6 +5385,7 @@ arc_legitimate_constant_p (machine_mode mode, rtx x)
  {
  case ARC_UNSPEC_PLT:
  case ARC_UNSPEC_GOTOFF:
+ case ARC_UNSPEC_GOTOFFPC:
  case ARC_UNSPEC_GOT:
  case UNSPEC_TLS_GD:
  case UNSPEC_TLS_IE:
@@ -7648,7 +7645,7 @@ arc_output_libcall (const char *fname)
   || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
  {
if (flag_pic)
-

Re: [PATCHv2 0/7] ARC: Add support for nps400 variant

2016-05-12 Thread Claudiu Zissulescu


On 03/05/16 12:56, Andrew Burgess wrote:

* Claudiu Zissulescu  [2016-05-02 09:02:16 
+]:


Please also consider to address also the following warnings introduced:

mainline/gcc/gcc/config/arc/arc.md:888: warning: source missing a mode?
mainline/gcc/gcc/config/arc/arc.md:906: warning: source missing a mode?
mainline/gcc/gcc/config/arc/arc.md:921: warning: source missing a mode?
mainline/gcc/gcc/config/arc/arc.md:6146: warning: source missing a mode?



Here's a revised fixup patch that includes addressing these 4
warnings.

Thanks,
Andrew

---

gcc/arc: New peephole2 and little endian arc test fixes

Resolve some test failures introduced for little endian arc as a result
of the recent arc/nps400 additions.

There's a new peephole2 optimisation to merge together two zero_extracts
in order that the movb instruction can be used.

Source operand modes filled in for 3 instruction patterns and a
peephole2 optimisation, to silence build warnings.

One of the test cases is extended so that the test does something
meaningful in both big and little endian arc mode.

Other tests have their expected results updated to reflect improvements
in other areas of GCC.

gcc/ChangeLog:

* config/arc/arc.md (movb peephole2): New peephole2 to merge two
zero_extract operations to allow a movb to occur.
(*tst_bitfield_tst): Add mode to source operand.
(*tst_bitfield_asr): Likewise.
(*tst_bitfield): Likewise.
(bitops peephole2): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/arc/movb-1.c: Update little endian arc results.
* gcc.target/arc/movb-2.c: Likewise.
* gcc.target/arc/movb-5.c: Likewise.
* gcc.target/arc/movh_cl-1.c: Extend test to cover little endian
arc.
---
  gcc/ChangeLog|  9 +
  gcc/config/arc/arc.md| 22 ++
  gcc/testsuite/ChangeLog  |  8 
  gcc/testsuite/gcc.target/arc/movb-1.c|  2 +-
  gcc/testsuite/gcc.target/arc/movb-2.c|  2 +-
  gcc/testsuite/gcc.target/arc/movb-5.c|  2 +-
  gcc/testsuite/gcc.target/arc/movh_cl-1.c | 11 +++
  7 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index c61107f..96c1e77 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -879,7 +879,7 @@
  ; since this is about constants, reload shouldn't care.
  (define_insn "*tst_bitfield_tst"
[(set (match_operand:CC_ZN 0 "cc_set_register" "")
-   (match_operator 4 "zn_compare_operator"
+   (match_operator:CC_ZN 4 "zn_compare_operator"
  [(zero_extract:SI
 (match_operand:SI 1 "register_operand"  "c")
 (match_operand:SI 2 "const_int_operand" "n")
@@ -897,7 +897,7 @@
  ; Likewise for asr.f.
  (define_insn "*tst_bitfield_asr"
[(set (match_operand:CC_ZN 0 "cc_set_register" "")
-   (match_operator 4 "zn_compare_operator"
+   (match_operator:CC_ZN 4 "zn_compare_operator"
  [(zero_extract:SI
 (match_operand:SI 1 "register_operand"  "c")
 (match_operand:SI 2 "const_int_operand" "n")
@@ -912,7 +912,7 @@

  (define_insn "*tst_bitfield"
[(set (match_operand:CC_ZN 0 "cc_set_register" "")
-   (match_operator 5 "zn_compare_operator"
+   (match_operator:CC_ZN 5 "zn_compare_operator"
  [(zero_extract:SI
 (match_operand:SI 1 "register_operand" "%Rcqq,c,  c,Rrq,c")
 (match_operand:SI 2 "const_int_operand""N,N,  n,Cbn,n")
@@ -6128,7 +6128,7 @@
(zero_extract:SI (match_dup 1)
 (match_dup 2)
 (match_operand:SI 4 "const_int_operand" "")))
-   (set (match_dup 1) (match_operand 8))
+   (set (match_dup 1) (match_operand:SI 8))
 (set (zero_extract:SI (match_dup 0)
 (match_operand:SI 5 "const_int_operand" "")
 (match_operand:SI 6 "const_int_operand" ""))
@@ -6144,6 +6144,20 @@
   (zero_extract:SI (match_dup 1) (match_dup 5) (match_dup 
7)))])
 (match_dup 1)])

+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+(zero_extract:SI (match_dup 0)
+(match_operand:SI 1 "const_int_operand" "")
+(match_operand:SI 2 "const_int_operand" "")))
+   (set (zero_extract:SI (match_operand:SI 3 "register_operand" "")
+(match_dup 1)
+ (match_dup 2))
+   (match_dup 0))]
+  "TARGET_NPS_BITOPS
+   && !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (zero_extract:SI (match_dup 3) (match_dup 1) (match_dup 2))
+(zero_extract:SI (match_dup 0) (match_dup 1) (match_dup 2)))])
+
  ;; include the arc-FPX instructions
  (include "fpx.md")

diff --git a/gcc/testsuite/gcc.target/arc/movb-1.c 
b/gcc/testsuite/gcc.target/arc/movb-1.c
index 65d4ba4..94d9f5f 100644
--- a/gcc/testsuite/gcc.target/arc/movb-1.c
+++ b/gcc/testsuit

Re: [PATCH 3/3] Enhance dumps of IVOPTS

2016-05-12 Thread Martin Liška

On 05/10/2016 03:16 PM, Bin.Cheng wrote:
> Another way is to remove the use of id for struct iv_inv_expr_ent once
> for all.  We can change iv_ca.used_inv_expr and cost_pair.inv_expr_id
> to pointers, and rename iv_inv_expr_ent.id to count and use this to
> record reference number in iv_ca.  This if-statement on dump_file can
> be saved.  Also I think it simplifies current code a bit.  For now,
> there are id <-> struct maps for different structures in IVOPT which
> make it not straightforward.

Hi.

I'm sending second version of the patch. I tried to follow your advices, but
because of a iv_inv_expr_ent can simultaneously belong to multiply iv_cas,
putting counter to iv_inv_expr_ent does not works. Instead of that, I've
decided to replace used_inv_expr with a hash_map that contains used inv_exps
and where value of the map is # of usages.

Further questions:
+ iv_inv_expr_ent::id can be now removed as it's used just for purpose of dumps
Group 0:
  cand  costscaled  freqcompl.  depends on
  5 2   2.001.000   
  6 4   4.001.001inv_expr:0
  7 4   4.001.001inv_expr:1
  8 4   4.001.001inv_expr:2

That can be replaced with print_generic_expr, but I think using ids makes the 
dump
output more clear.

+ As check_GNU_style.sh reported multiple 8 spaces issues in hunks I've 
touched, I decided
to fix all 8 spaces issues. Hope it's fine.

I'm going to test the patch.
Thoughts?

Martin
>From ce02c80c053c2a8a63ce6e87f5779a8dc5f470ee Mon Sep 17 00:00:00 2001
From: marxin 
Date: Mon, 25 Apr 2016 14:29:01 +0200
Subject: [PATCH 3/3] Enhance dumps of IVOPTS

gcc/ChangeLog:

2016-05-12  Martin Liska  

	* tree-ssa-loop-ivopts.c (avg_loop_niter): Fix coding style.
	(struct cost_pair): Replace inv_expr_id with direct pointer
	to a iv_inv_expr_ent.
	(struct iv_inv_expr_ent): Add comment for struct fields.
	(struct iv_ca): Remove used_inv_exprs and replace it with a
	hash_map called used_inv_exprs.
	(niter_for_exit): Fix coding style.
	(determine_base_object): Likewise.
	(alloc_iv): Likewise.
	(find_interesting_uses_outside): Likewise.
	(add_candidate_1): Likewise.
	(add_standard_iv_candidates): Likewise.
	(set_group_iv_cost): Use inv_expr instead of inv_expr_id.
	(prepare_decl_rtl): Fix coding style.
	(get_address_cost): Likewise.
	(get_shiftadd_cost): Likewise.
	(force_expr_to_var_cost): Likewise.
	(compare_aff_trees): Likewise.
	(get_expr_id): Return iv_inv_expr_ent * instead of inv_expr_id.
	(get_loop_invariant_expr_id): Likewise.
	(get_computation_cost_at):
	(get_computation_cost): Replace usage of inv_expr_id (int) with
	inv_expr (iv_inv_expr_ent *).
	(determine_group_iv_cost_generic): Likewise.
	(determine_group_iv_cost_address): Likewise.
	(iv_period): Fix coding style.
	(iv_elimination_compare_lt): Likewise.
	(may_eliminate_iv): Likewise.
	(determine_group_iv_cost_cond): Replace usage of inv_expr_id (int) with
	inv_expr (iv_inv_expr_ent *).
	(determine_group_iv_costs): Likewise.
	(iv_ca_recount_cost): Use used_inv_exprs to determine # of
	used invariant expressions.
	(iv_ca_set_remove_invariants): Fix coding style.
	(iv_ca_set_no_cp): Use newly added hash_map.
	(iv_ca_set_add_invariants): Likewise.
	(iv_ca_set_cp): Likewise.
	(iv_ca_new): Initialize the newly added hash_map.
	(iv_ca_free): Delete it.
	(iv_ca_dump): Fix coding style and dump used invariant
	expressions.
	(iv_ca_extend): Fix coding style.
	(try_add_cand_for): Likewise.
	(create_new_ivs): Display information about # of avg niters and
	# of used invariant expressions.
	(rewrite_use_compare): Fix coding style.

gcc/ChangeLog:

gcc/testsuite/ChangeLog:

2016-04-29  Martin Liska  

	* g++.dg/tree-ssa/ivopts-3.C: Change test-case to follow
	the new format of dump output.
---
 gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C |   2 +-
 gcc/tree-ssa-loop-ivopts.c   | 378 ---
 2 files changed, 201 insertions(+), 179 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C b/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C
index 6194e9d..eb72581 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C
@@ -72,4 +72,4 @@ int main ( int , char** ) {
 
 // Verify that on x86_64 and i?86 we use a single IV for the innermost loop
 
-// { dg-final { scan-tree-dump "Selected IV set for loop \[0-9\]* at \[^ \]*:64, 1 IVs" "ivopts" { target x86_64-*-* i?86-*-* } } }
+// { dg-final { scan-tree-dump "Selected IV set for loop \[0-9\]* at \[^ \]*:64, 3 avg niters, 1 expressions, 1 IVs" "ivopts" { target x86_64-*-* i?86-*-* } } }
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 17af590..5a48db2 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -130,7 +130,7 @@ avg_loop_niter (struct loop *loop)
 {
   niter = max_stmt_executions_int (loop);
   if (niter == -1 || niter > AVG_LOOP_NITER (loop))
-return AVG_LOOP_NITER (loop);
+	return AVG_LOOP_NITER (loop);
 }

Re: [PATCH, PR tree-optimization/71006] Fix vectype computation for COND_EXPR

2016-05-12 Thread Richard Biener

On Thu, May 12, 2016 at 1:25 PM, Ilya Enkovich  wrote:
> 2016-05-12 13:38 GMT+03:00 Richard Biener :
>> On Thu, May 12, 2016 at 12:19 PM, Ilya Enkovich  
>> wrote:
>>> Hi,
>>>
>>> Currently we have a code in vect_determine_vectorization_factor to
>>> compute vectype for mask producers.  It wasn't meant to be used for
>>> EXPR_COND assignments but it is used now in some cases causing wrong
>>> resulting vectype.
>>>
>>> Bootstrapped and regtested for x86_64-pc-linux-gnu.  OK for trunk?
>>
>> Ok.
>
> Is it also OK for porting into gcc-6-branch after proper testing?

Yes.

Richard.

> Thanks,
> Ilya
>
>>
>> Thanks,
>> Richard.
>>
>>> Thanks,
>>> Ilya

Re: [PATCH, DOC] Document ASAN_OPTIONS="halt_on_error" env variable.

2016-05-12 Thread Martin Liška

On 05/11/2016 04:56 PM, Jakub Jelinek wrote:
> I think it better should say that:
> Even if a recovery mode is turned on the compiler side, it needs to be also
> enabled on the runtime library side, otherwise the failures are still fatal.
> The runtime library defaults to ... and this can be overridden through ...
> or so.
> 
>   Jakub

Thanks for the hint, I've just installed v3 as r236172.

Martin
>From 12336cb48bafffca7330e815d831d9c8ab1d0396 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Wed, 11 May 2016 16:05:49 +0200
Subject: [PATCH] Document ASAN_OPTIONS="halt_on_error" env variable.

gcc/ChangeLog:

2016-05-11  Martin Liska  

	* doc/invoke.texi: Explain connection between -fsanitize-recover=address
	and ASAN_OPTIONS="halt_on_error=1".
---
 gcc/doc/invoke.texi | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a54a0af..c126644 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9998,6 +9998,14 @@ for which this feature is experimental.
 accepted, the former enables recovery for all sanitizers that support it,
 the latter disables recovery for all sanitizers that support it.
 
+Even if a recovery mode is turned on the compiler side, it needs to be also
+enabled on the runtime library side, otherwise the failures are still fatal.
+The runtime library defaults to @code{halt_on_error=0} for
+ThreadSanitizer and UndefinedBehaviorSanitizer, while default value for
+AddressSanitizer is @code{halt_on_error=1}. This can overridden through
+the following environment variables: @env{ASAN_OPTIONS}, @env{TSAN_OPTIONS},
+@env{UBSAN_OPTIONS}.
+
 Syntax without explicit @var{opts} parameter is deprecated.  It is equivalent to
 @smallexample
 -fsanitize-recover=undefined,float-cast-overflow,float-divide-by-zero
-- 
2.8.2

Re: [PATCH, DOC] Document ASAN_OPTIONS="halt_on_error" env variable.

2016-05-12 Thread Jakub Jelinek

On Thu, May 12, 2016 at 02:36:44PM +0200, Martin Liška wrote:
> On 05/11/2016 04:56 PM, Jakub Jelinek wrote:
> > I think it better should say that:
> > Even if a recovery mode is turned on the compiler side, it needs to be also
> > enabled on the runtime library side, otherwise the failures are still fatal.
> > The runtime library defaults to ... and this can be overridden through ...
> > or so.
> 
> Thanks for the hint, I've just installed v3 as r236172.
> 
> Martin

> >From 12336cb48bafffca7330e815d831d9c8ab1d0396 Mon Sep 17 00:00:00 2001
> From: marxin 
> Date: Wed, 11 May 2016 16:05:49 +0200
> Subject: [PATCH] Document ASAN_OPTIONS="halt_on_error" env variable.
> 
> gcc/ChangeLog:
> 
> 2016-05-11  Martin Liska  
> 
>   * doc/invoke.texi: Explain connection between -fsanitize-recover=address
>   and ASAN_OPTIONS="halt_on_error=1".
> ---
>  gcc/doc/invoke.texi | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index a54a0af..c126644 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -9998,6 +9998,14 @@ for which this feature is experimental.
>  accepted, the former enables recovery for all sanitizers that support it,
>  the latter disables recovery for all sanitizers that support it.
>  
> +Even if a recovery mode is turned on the compiler side, it needs to be also
> +enabled on the runtime library side, otherwise the failures are still fatal.
> +The runtime library defaults to @code{halt_on_error=0} for
> +ThreadSanitizer and UndefinedBehaviorSanitizer, while default value for
> +AddressSanitizer is @code{halt_on_error=1}. This can overridden through

s/can/can be/

> +the following environment variables: @env{ASAN_OPTIONS}, @env{TSAN_OPTIONS},
> +@env{UBSAN_OPTIONS}.

I think it isn't obvious that one needs to put halt_on_error=0 or
halt_on_error=1 into those options and what to do if you need multiple
options in there.

Jakub

Re: [Patch ARM/AArch64 09/11] Add missing vrnd{,a,m,n,p,x} tests.

2016-05-12 Thread Christophe Lyon

On 12 May 2016 at 10:45, Jiong Wang  wrote:
>
>
> On 11/05/16 14:23, Christophe Lyon wrote:
>>
>> 2016-05-02  Christophe Lyon  
>>
>> * gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c: New.
>> * gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc:
>> New.
>> * gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnda.c:
>> New.
>> * gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndm.c:
>> New.
>> * gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndn.c:
>> New.
>> * gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndp.c:
>> New.
>> * gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndx.c:
>> New.
>>
>> Change-Id: Iab5f98dc4b15f9a2f61b622a9f62b207872f1737
>>
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c
>> new file mode 100644
>> index 000..5f492d4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-require-effective-target arm_v8_neon_ok } */
>> +/* { dg-add-options arm_v8_neon } */
>> +
>> +#include 
>> +#include "arm-neon-ref.h"
>> +#include "compute-ref-data.h"
>> +
>> +/* Expected results.  */
>> +VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc180, 0xc170 };
>> +VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc180, 0xc170,
>> +  0xc160, 0xc150 };
>> +
>> +#define INSN vrnd
>> +#define TEST_MSG "VRND"
>> +
>> +#include "vrndX.inc"
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc
>> new file mode 100644
>> index 000..629240d
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc
>> @@ -0,0 +1,43 @@
>> +#define FNNAME1(NAME) exec_ ## NAME
>> +#define FNNAME(NAME) FNNAME1 (NAME)
>> +
>> +void FNNAME (INSN) (void)
>> +{
>> +  /* vector_res = vrndX (vector), then store the result.  */
>> +#define TEST_VRND2(INSN, Q, T1, T2, W, N)  \
>> +  VECT_VAR (vector_res, T1, W, N) =\
>> +INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N));   \
>> +vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N),\
>> +  VECT_VAR (vector_res, T1, W, N))
>> +
>> +  /* Two auxliary macros are necessary to expand INSN.  */
>> +#define TEST_VRND1(INSN, Q, T1, T2, W, N)  \
>> +  TEST_VRND2 (INSN, Q, T1, T2, W, N)
>> +
>> +#define TEST_VRND(Q, T1, T2, W, N) \
>> +  TEST_VRND1 (INSN, Q, T1, T2, W, N)
>> +
>> +  DECL_VARIABLE (vector, float, 32, 2);
>> +  DECL_VARIABLE (vector, float, 32, 4);
>> +
>> +  DECL_VARIABLE (vector_res, float, 32, 2);
>> +  DECL_VARIABLE (vector_res, float, 32, 4);
>> +
>> +  clean_results ();
>> +
>> +  VLOAD (vector, buffer, , float, f, 32, 2);
>> +  VLOAD (vector, buffer, q, float, f, 32, 4);
>> +
>> +  TEST_VRND ( , float, f, 32, 2);
>> +  TEST_VRND (q, float, f, 32, 4);
>> +
>> +  CHECK_FP (TEST_MSG, float, 32, 2, PRIx32, expected, "");
>> +  CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected, "");
>> +}
>> +
>> +int
>> +main (void)
>> +{
>> +  FNNAME (INSN) ();
>> +  return 0;
>> +}
>>
>
> Hi Christophe,
>
>   I have a question on how test inputs are selected?
>
>   For example vrndm is round to integral, towards minus infinity while vrnda
> is to nearest with ties to even, has these differences been tested?
>

Hi Jiong,

For this particular case, no, I didn't specifically chose input values to check
these differences.

This can be done as a follow-up?

Thanks,

Christophe

>   Thanks.
>
> Regards,
> Jiong

Re: [PATCH 3/3] Enhance dumps of IVOPTS

2016-05-12 Thread Bin.Cheng

On Thu, May 12, 2016 at 1:13 PM, Martin Liška  wrote:
> On 05/10/2016 03:16 PM, Bin.Cheng wrote:
>> Another way is to remove the use of id for struct iv_inv_expr_ent once
>> for all.  We can change iv_ca.used_inv_expr and cost_pair.inv_expr_id
>> to pointers, and rename iv_inv_expr_ent.id to count and use this to
>> record reference number in iv_ca.  This if-statement on dump_file can
>> be saved.  Also I think it simplifies current code a bit.  For now,
>> there are id <-> struct maps for different structures in IVOPT which
>> make it not straightforward.
>
> Hi.
>
> I'm sending second version of the patch. I tried to follow your advices, but
> because of a iv_inv_expr_ent can simultaneously belong to multiply iv_cas,
> putting counter to iv_inv_expr_ent does not works. Instead of that, I've
> decided to replace used_inv_expr with a hash_map that contains used inv_exps
> and where value of the map is # of usages.
>
> Further questions:
> + iv_inv_expr_ent::id can be now removed as it's used just for purpose of 
> dumps
> Group 0:
>   cand  costscaled  freqcompl.  depends on
>   5 2   2.001.000
>   6 4   4.001.001inv_expr:0
>   7 4   4.001.001inv_expr:1
>   8 4   4.001.001inv_expr:2
>
> That can be replaced with print_generic_expr, but I think using ids makes the 
> dump
> output more clear.
I am okay with keeping id.  Could you please dump all inv_exprs in a
single section like
:
inv_expr 0: print_generic_expr
inv_expr 1: ...

Then only dump the id afterwards?

>
> + As check_GNU_style.sh reported multiple 8 spaces issues in hunks I've 
> touched, I decided
> to fix all 8 spaces issues. Hope it's fine.
>
> I'm going to test the patch.
> Thoughts?

Some comments on the patch embedded.

>
> +/* Forward declaration.  */
Not necessary.
> +struct iv_inv_expr_ent;
> +

>
>  /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
>
> -static int
> +static iv_inv_expr_ent *
>  get_expr_id (struct ivopts_data *data, tree expr)
We are not returning id any more, maybe rename to record_inv_expr or else.

>  {
>struct iv_inv_expr_ent ent;
> @@ -4806,13 +4809,13 @@ get_expr_id (struct ivopts_data *data, tree expr)
>ent.hash = iterative_hash_expr (expr, 0);
>slot = data->inv_expr_tab->find_slot (&ent, INSERT);
>if (*slot)
> -return (*slot)->id;
> +return *slot;
>
>*slot = XNEW (struct iv_inv_expr_ent);
>(*slot)->expr = expr;
>(*slot)->hash = ent.hash;
>(*slot)->id = data->max_inv_expr_id++;
> -  return (*slot)->id;
> +  return *slot;
This could be changed to
  if (!*slot)
{
  //new and insert
}
  return *slot;
>  }
>
>  /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
> @@ -4820,10 +4823,10 @@ get_expr_id (struct ivopts_data *data, tree expr)
> ADDRESS_P is a flag indicating if the expression is for address
> computation.  */
>
> -static int
> +static iv_inv_expr_ent *
>  get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
> -tree cbase, HOST_WIDE_INT ratio,
> -bool address_p)
> +tree cbase, HOST_WIDE_INT ratio,
> +bool address_p)
Rename function name here too.
>  {

> @@ -5988,9 +5992,9 @@ determine_group_iv_costs (struct ivopts_data *data)
>if (group->cost_map[j].depends_on)
>  bitmap_print (dump_file,
>group->cost_map[j].depends_on, "","");
> -  if (group->cost_map[j].inv_expr_id != -1)
> +if (group->cost_map[j].inv_expr != NULL)
>  fprintf (dump_file, " inv_expr:%d",
> - group->cost_map[j].inv_expr_id);
> + group->cost_map[j].inv_expr->id);
Dump inv_expr in another column thus it won't appear under depends_on
in dump.  Also make it preceding depends_on which is a bitmap.

While we are on this one before the other two, could you please make
this independent so it can be committed after rework?

Thanks,
bin

>
> Martin

Re: [PATCH] Improve vec_concatv?sf*

2016-05-12 Thread Jakub Jelinek

On Thu, May 12, 2016 at 04:39:52PM +0300, Kirill Yukhin wrote:
> > --- gcc/config/i386/sse.md.jj   2016-05-04 14:36:08.0 +0200
> > +++ gcc/config/i386/sse.md  2016-05-04 15:16:44.180894303 +0200
> > @@ -6415,12 +6415,12 @@ (define_insn "avx512f_vec_dup_1"
> >  ;; unpcklps with register source since it is shorter.
> >  (define_insn "*vec_concatv2sf_sse4_1"
> >[(set (match_operand:V2SF 0 "register_operand"
> > - "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
> > + "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
> > (vec_concat:V2SF
> >   (match_operand:SF 1 "nonimmediate_operand"
> > - "  0, 0,x, 0,0, x,m, 0 , m")
> > + "  0, 0,v, 0,0, v,m, 0 , m")
> >   (match_operand:SF 2 "vector_move_operand"
> > - " Yr,*x,x, m,m, m,C,*ym, C")))]
> > + " Yr,*x,v, m,m, m,C,*ym, C")))]
> >"TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> >"@
> > unpcklps\t{%2, %0|%0, %2}
> Looks like we were wrong here.
> We need to use Yv constraint for vunpcklps since this
> insn is available for AVX-512VL only.
> 
> Like this:
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index d77227a..7d71640 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -6546,12 +6546,12 @@
>  ;; unpcklps with register source since it is shorter.
>  (define_insn "*vec_concatv2sf_sse4_1"
>[(set (match_operand:V2SF 0 "register_operand"
> - "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
> + "=Yr,*x,Yv,Yr,*x,v,v,*y ,*y")
> (vec_concat:V2SF
>   (match_operand:SF 1 "nonimmediate_operand"
> - "  0, 0,v, 0,0, v,m, 0 , m")
> + "  0, 0,Yv, 0,0, v,m, 0 , m")
>   (match_operand:SF 2 "vector_move_operand"
> - " Yr,*x,v, m,m, m,C,*ym, C")))]
> + " Yr,*x,Yv, m,m, m,C,*ym, C")))]
>"TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>"@
> unpcklps\t{%2, %0|%0, %2}
> 
> Will check in to main trunk after bootstrap/regtest.

I'm not sure about the Yv on the operand 0, I think without AVX512VL
HARD_REGNO_MODE_OK will disallow V2SFmode regs in XMM16+ (but, this
is MMX-ish mode, so maybe we don't allow it ever in XMM16+).
On the SFmode operands side, you're right, HARD_REGNO_MODE_OK allows
SFmode in XMM16+ even for only AVX512F.

Jakub

Re: [PATCH] Improve vec_concatv?sf*

2016-05-12 Thread Kirill Yukhin

Hi Jakub,
On 04 May 21:44, Jakub Jelinek wrote:
> Hi!
> 
> Another pair of define_insns.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2016-05-04  Jakub Jelinek  
> 
>   * config/i386/sse.md (*vec_concatv2sf_sse4_1, *vec_concatv4sf): Use
>   v instead of x in vex or maybe_vex alternatives, use
>   maybe_evex instead of vex in prefix.
> 
> --- gcc/config/i386/sse.md.jj 2016-05-04 14:36:08.0 +0200
> +++ gcc/config/i386/sse.md2016-05-04 15:16:44.180894303 +0200
> @@ -6415,12 +6415,12 @@ (define_insn "avx512f_vec_dup_1"
>  ;; unpcklps with register source since it is shorter.
>  (define_insn "*vec_concatv2sf_sse4_1"
>[(set (match_operand:V2SF 0 "register_operand"
> -   "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
> +   "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
>   (vec_concat:V2SF
> (match_operand:SF 1 "nonimmediate_operand"
> -   "  0, 0,x, 0,0, x,m, 0 , m")
> +   "  0, 0,v, 0,0, v,m, 0 , m")
> (match_operand:SF 2 "vector_move_operand"
> -   " Yr,*x,x, m,m, m,C,*ym, C")))]
> +   " Yr,*x,v, m,m, m,C,*ym, C")))]
>"TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>"@
> unpcklps\t{%2, %0|%0, %2}
Looks like we were wrong here.
We need to use Yv constraint for vunpcklps since this
insn is available for AVX-512VL only.

Like this:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d77227a..7d71640 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6546,12 +6546,12 @@
 ;; unpcklps with register source since it is shorter.
 (define_insn "*vec_concatv2sf_sse4_1"
   [(set (match_operand:V2SF 0 "register_operand"
- "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
+ "=Yr,*x,Yv,Yr,*x,v,v,*y ,*y")
(vec_concat:V2SF
  (match_operand:SF 1 "nonimmediate_operand"
- "  0, 0,v, 0,0, v,m, 0 , m")
+ "  0, 0,Yv, 0,0, v,m, 0 , m")
  (match_operand:SF 2 "vector_move_operand"
- " Yr,*x,v, m,m, m,C,*ym, C")))]
+ " Yr,*x,Yv, m,m, m,C,*ym, C")))]
   "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
unpcklps\t{%2, %0|%0, %2}

Will check in to main trunk after bootstrap/regtest.

--
Thanks, K

[PATCH] Add dg-require-atomic-builtins to test

2016-05-12 Thread Jonathan Wakely


PR libstdc++/71081
* testsuite/experimental/memory_resource/1.cc: Require atomics.

Don't run the test on targets that need libatomic for accessing the
default memory resource. Tested x86_64-linux, committed to trunk.
commit ad31ffda12215c1934c9c3a99092e55f7344239b
Author: Jonathan Wakely 
Date:   Thu May 12 14:06:22 2016 +0100

Add dg-require-atomic-builtins to test

PR libstdc++/71081
* testsuite/experimental/memory_resource/1.cc: Require atomics.

diff --git a/libstdc++-v3/testsuite/experimental/memory_resource/1.cc 
b/libstdc++-v3/testsuite/experimental/memory_resource/1.cc
index 38cbd27..22d4e0d 100644
--- a/libstdc++-v3/testsuite/experimental/memory_resource/1.cc
+++ b/libstdc++-v3/testsuite/experimental/memory_resource/1.cc
@@ -1,4 +1,5 @@
 // { dg-options "-std=gnu++14" }
+// { dg-require-atomic-builtins }
 
 // Copyright (C) 2015-2016 Free Software Foundation, Inc.
 //

Re: [PATCH, RFC] Introduce -fsanitize=use-after-scope

2016-05-12 Thread Martin Liška

On 05/12/2016 12:41 PM, Jakub Jelinek wrote:
> On Wed, May 11, 2016 at 02:54:01PM +0200, Martin Liška wrote:
>> On 05/06/2016 02:22 PM, Jakub Jelinek wrote:
>>> On Fri, May 06, 2016 at 01:04:30PM +0200, Martin Liška wrote:
 I've started working on the patch couple of month go, basically after
 a brief discussion with Jakub on IRC.

 I'm sending the initial version which can successfully run instrumented
 tramp3d, postgresql server and Inkscape. It catches the basic set of
 examples which are added in following patch.

 The implementation is quite straightforward as works in following steps:

 1) Every local variable stack slot is poisoned at the very beginning of a 
 function (RTL emission)
 2) In gimplifier, once we spot a DECL_EXPR, a variable is unpoisoned (by 
 emitting ASAN_MARK builtin)
 and the variable is marked as addressable
>>>
>>> Not all vars have DECL_EXPRs though.
> 
> Just random comments from quick skim, need to find enough spare time to
> actually try it and see how it works.
> 
>> Yeah, I've spotted one interesting example which is part of LLVM's testsuite:
>>
>> struct IntHolder {
>>   int val;
>> };
>>
>> const IntHolder *saved;
>>
>> void save(const IntHolder &holder) {
>>   saved = &holder;
>> }
>>
>> int main(int argc, char *argv[]) {
>>   save({10});
>>   int x = saved->val;  // BOOM
>>   return x;
>> }
>>
>> It would be also good to handle such temporaries. Any suggestions how to 
>> handle that in gimplifier?
> 
> Dunno, guess you need to do something in the FE for it already (talk to
> Jason?).  At least in *.original dump there is already:
>   <   save ((const struct IntHolder &) &TARGET_EXPR ) >;
> int x = (int) saved->val;
>   return  = x;
> and the info on where the D.2263 temporary goes out of scope is lost.

Thanks for sample, I will ask Jason to help me with that.

> 
>> Apart from that, second version of the patch changes:
>> + fixed issues with missing stack unpoisoning; currently, I mark all 
>> VAR_DECLs that
>> are in ASAN_MARK internal fns and stack prologue/epilogue is emitted just 
>> for these vars
>> + removed unneeded hunks (tree-vect-patterns.c and asan_poisoning.cc)
>> + LABEL unpoisoning code makes stable sort for variables that were already 
>> used in the context
>> + stack poisoning hasn't worked for -O1+ due to following guard in asan.c
>>  /* Automatic vars in the current function will be always accessible.  */
>> + direct shadow memory poisoning/unpoisoning code is introduced - in both 
>> scenarios (RTL and GIMPLE),
>> I would appreciate feedback if storing multiple bytes is fine? What is the 
>> maximum memory wide
>> store mode supported by a target? How can I get such information?
>> + the maximum object size handled by a direct emission is guarded by 
>> use-after-scope-direct-emission-threshold
>> parameter; initial value (256B) should maximally emit store of 32B
> 
> Would be better if user visible param was in bytes rather than bits IMHO.
> 

Well, the size of an object is in bytes, but as we map every 8 (yeah, that's 
configurable, I'm quite curious about
real respecting of ASAN_SHADOW_SHIFT) bytes of real memory to
a single byte in shadow memory, thus the division by 8 is needed.

>> Yeah, depends because of:
>>
>> static inline bool
>> asan_sanitize_use_after_scope (void)
>> {
>>   return ((flag_sanitize & SANITIZE_ADDRESS_USE_AFTER_SCOPE)
>>== SANITIZE_ADDRESS_USE_AFTER_SCOPE
>>&& flag_stack_reuse == SR_NONE
>>&& ASAN_STACK);
>> }
>>
>> Where ASAN_STACK comes from params.h.
> 
> I'd prefer just prototype the function in the header and define in asan.c
> or some other source file.  Or maybe split it, do the important case
> (flag_sanitize check) inline and call out of line function for the rest.
> Why do you check flag_stack_reuse?  I thought you'd arrange for it to be
> different when -fsanitize=use-after-scope?

Right, the sanitization does not relate to flag_stack, thus removing the 
dependency,
we can remove need of including params.h in various places.

> 
>> @@ -243,6 +243,11 @@ static unsigned HOST_WIDE_INT asan_shadow_offset_value;
>>  static bool asan_shadow_offset_computed;
>>  static vec sanitized_sections;
>>  
>> +/* Set of variable declarations that are going to be guarded by
>> +   use-after-scope sanitizer.  */
>> +
>> +static hash_set  asan_handled_variables(13);
> 
> Not sure about the formatting here, don't we use xxx instead of xxx 
> ?  And I'd expect space before (.

Yeah, done.

>> @@ -1020,6 +1020,91 @@ asan_function_start (void)
>>   current_function_funcdef_no);
>>  }
>>  
>> +/* Return number of shadow bytes that are occupied by a local variable
>> +   of SIZE bytes.  */
>> +
>> +static unsigned HOST_WIDE_INT
>> +get_shadow_memory_size (unsigned HOST_WIDE_INT size)
>> +{
>> +  /* Round up size of object.  */
>> +  unsigned HOST_WIDE_INT r;
>> +  if ((r = size % BITS_PER_UNIT) != 0)
>> +size += BITS_PER_UN

Re: [PATCH] Improve other 13 define_insns

2016-05-12 Thread Kirill Yukhin

Hi Jakub,
On 04 May 21:43, Jakub Jelinek wrote:
> Hi!
> 
> This patch tweaks more define_insns at once, again all the insns
> should be already in AVX512F or AVX512VL.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2016-05-04  Jakub Jelinek  
> 
>   * config/i386/sse.md (sse_shufps_, sse_storehps, sse_loadhps,
>   sse_storelps, sse_movss, avx2_vec_dup, avx2_vec_dupv8sf_1,
>   sse2_shufpd_, sse2_storehpd, sse2_storelpd, sse2_loadhpd,
>   sse2_loadlpd, sse2_movsd): Use v instead of x in vex or maybe_vex
>   alternatives, use maybe_evex instead of vex in prefix.
> 
>  ;; Avoid combining registers from different units in a single alternative,
>  ;; see comment above inline_secondary_memory_needed function in i386.c
>  (define_insn "sse2_storehpd"
> -  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
> +  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,v,x,*f,r")
>   (vec_select:DF
> -   (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
> +   (match_operand:V2DF 1 "nonimmediate_operand" " v,0,v,o,o,o")
Same (as [1]) here.
Testing this fix:
@@ -8426,7 +8426,7 @@
 ;; Avoid combining registers from different units in a single alternative,
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_storehpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,v,x,*f,r")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
(vec_select:DF
  (match_operand:V2DF 1 "nonimmediate_operand" " v,0,v,o,o,o")


[1] - https://gcc.gnu.org/ml/gcc-patches/2016-05/msg00908.html

--
Thanks, K

Re: [PATCH] [rtlfe] Barebones implementation of "__RTL"; next steps?

2016-05-12 Thread David Malcolm

On Wed, 2016-05-11 at 11:57 +0200, Richard Biener wrote:
> On Wed, May 11, 2016 at 3:31 AM, Trevor Saunders <
> tbsau...@tbsaunde.org> wrote:
> > On Tue, May 10, 2016 at 05:01:00PM -0400, David Malcolm wrote:
> > > [CCing Prasad since this may be useful for his gimple FE work, by
> > > replacing "rtl" with "gimple" in the patch]
> > > 
> > > On Mon, 2016-05-09 at 11:44 +0200, Richard Biener wrote:
> > > > On Wed, May 4, 2016 at 10:49 PM, David Malcolm <
> > > > dmalc...@redhat.com>
> > > > wrote:
> > > 
> > > > > This patch kit introduces an RTL frontend, for the purpose
> > > > > of unit testing: primarly for unit testing of RTL passes, and
> > > > > possibly for unit testing of .md files.
> > > > > 
> > > > > It's very much a work-in-progress; I'm posting it now to get
> > > > > feedback.
> > > 
> > > [...snip...]
> > > 
> > > > > * The RTL frontend doesn't have any knowledge of the name of
> > > > > the
> > > > > function,
> > > > >   of parameters, types, locals, globals, etc.  It creates a
> > > > > single
> > > > > function.
> > > > >   The function is currently hardcoded to have this signature:
> > > > > 
> > > > >  int test_1 (int, int, int);
> > > > > 
> > > > >   since there's no syntax for specify otherwise, and we need
> > > > > to
> > > > > provide
> > > > >   a FUNCTION_DECL tree when building a function object (by
> > > > > calling
> > > > >   allocate_struct_function).
> > > > > 
> > > > > * Similarly, there are no types beyond the built-in ones; all
> > > > > expressions
> > > > >   are treated as being of type int.  I suspect that this
> > > > > approach
> > > > >   will be too simplistic when it comes to e.g. aliasing.
> > > > 
> > > > To address this and the previous issue I suggest to implement
> > > > the RTL
> > > > FE
> > > > similar to how I proposed the GIMPLE FE - piggy-back on the C
> > > > FE and
> > > > thus
> > > > allow
> > > > 
> > > > int __RTL foo (int a, int b) // gets you function decl and
> > > > param
> > > > decls
> > > > {
> > > >  (insn ...)
> > > > ...
> > > > 
> > > > }
> > > > 
> > > > int main()
> > > > {
> > > >   if (foo (1) != 0)
> > > > abort ();
> > > > }
> > > > 
> > > > That would also allow dg-do run testcases and not rely solely
> > > > on
> > > > scanning
> > > > RTL dumps.
> > > 
> > > The following is an attempt at implementing this, by adding a new
> > > "__RTL" keyword, and detecting it in the C frontend, switching
> > > to a custom parser for the function body.
> > > 
> > > Does this look like the kind of thing you had in mind for the
> > > RTL and gimple "frontends"?
> 
> Yes!
> 
> > > Wiring this up to the existing RTL parser might be awkward: the
> > > existing RTL parser in read-md.c etc works at the level of
> > > characters (read_char and unread_char from a FILE *), whereas the
> > > C frontend is in terms of tokens.
> > > 
> > > I have a patch that ports the RTL parser to using libcpp for
> > > location-tracking, and another that updates it to use libcpp
> > > for diagnostics.  This adds more dependency on a build-time
> > > libcpp to the gen* tools.  Both patches are currently messy.
> > > Potentially I could build on them and attempt to update the RTL
> > > parser further, to use libcpp's tokenizer.
> > > 
> > > Does that general approach sound sane?  In particular:
> > > - is it sane to eliminate errors.c in favor of building
> > > diagnostics*.c for the build machine as well as the host machine?
> > > - is it sane to rewrite the read-md.c/read-rtl.c code to
> > > a token-based approach, using libcpp?
> > > 
> > > Alternatively, the shorter term approach would be to kludge
> > > in reading from a FILE * in read-md.c based on where the
> > > C parser has got to, with a hybrid of the two approaches
> > > (character-based vs token-based).
> > 
> > Another option is to make read-md.c use tokens, but instead of
> > building
> > libcpp for the build machine write a new token parser that is text
> > compatible with the libcpp one, but just enough to do what read
> > -md.c
> > needs.  However that seems silly, and suggests just using libcpp is
> > the
> > sane thing to do :)
> > 
> > > Thoughts?
> > 
> > I'm not aware of any pitfalls in using libcpp in build tools,
> > though it
> > does seem slightly unfortunate to need to build so much build tool
> > stuff.
> > 
> > Thinking about this I wonder if libcpp would be useful in gengtype
> > to
> > get around some of the wierdness with headers (and maybe even
> > languages?) but that doesn't need to be thought about now.
> 
> genmatch also uses libcpp, it's really convenient for the diagnostics
> as well.
> 
> That said, another kludge would be to simply use cpp_token_as_text
> (see genmatch.c:c_expr::gen_transform), write the whole function
> to a temporary file and parse that back in with read_md ;)
> 
> In my mind sth to continue prototyping is more important than to
> clean
> this piece up righ now.

Thanks.  

Given that I have a semi-working "rtl1" and "make check-rtl", I've
moved the

Re: [PATCH] Improve vec_concatv?sf*

2016-05-12 Thread Kirill Yukhin

On 12 May 15:55, Jakub Jelinek wrote:
> On Thu, May 12, 2016 at 04:39:52PM +0300, Kirill Yukhin wrote:
> > > --- gcc/config/i386/sse.md.jj 2016-05-04 14:36:08.0 +0200
> > > +++ gcc/config/i386/sse.md2016-05-04 15:16:44.180894303 +0200
> > > @@ -6415,12 +6415,12 @@ (define_insn "avx512f_vec_dup_1"
> > >  ;; unpcklps with register source since it is shorter.
> > >  (define_insn "*vec_concatv2sf_sse4_1"
> > >[(set (match_operand:V2SF 0 "register_operand"
> > > -   "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
> > > +   "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
> > >   (vec_concat:V2SF
> > > (match_operand:SF 1 "nonimmediate_operand"
> > > -   "  0, 0,x, 0,0, x,m, 0 , m")
> > > +   "  0, 0,v, 0,0, v,m, 0 , m")
> > > (match_operand:SF 2 "vector_move_operand"
> > > -   " Yr,*x,x, m,m, m,C,*ym, C")))]
> > > +   " Yr,*x,v, m,m, m,C,*ym, C")))]
> > >"TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> > >"@
> > > unpcklps\t{%2, %0|%0, %2}
> > Looks like we were wrong here.
> > We need to use Yv constraint for vunpcklps since this
> > insn is available for AVX-512VL only.
> > 
> > Like this:
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index d77227a..7d71640 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -6546,12 +6546,12 @@
> >  ;; unpcklps with register source since it is shorter.
> >  (define_insn "*vec_concatv2sf_sse4_1"
> >[(set (match_operand:V2SF 0 "register_operand"
> > - "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
> > + "=Yr,*x,Yv,Yr,*x,v,v,*y ,*y")
> > (vec_concat:V2SF
> >   (match_operand:SF 1 "nonimmediate_operand"
> > - "  0, 0,v, 0,0, v,m, 0 , m")
> > + "  0, 0,Yv, 0,0, v,m, 0 , m")
> >   (match_operand:SF 2 "vector_move_operand"
> > - " Yr,*x,v, m,m, m,C,*ym, C")))]
> > + " Yr,*x,Yv, m,m, m,C,*ym, C")))]
> >"TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> >"@
> > unpcklps\t{%2, %0|%0, %2}
> > 
> > Will check in to main trunk after bootstrap/regtest.
> 
> I'm not sure about the Yv on the operand 0, I think without AVX512VL
> HARD_REGNO_MODE_OK will disallow V2SFmode regs in XMM16+ (but, this
> is MMX-ish mode, so maybe we don't allow it ever in XMM16+).
> On the SFmode operands side, you're right, HARD_REGNO_MODE_OK allows
> SFmode in XMM16+ even for only AVX512F.
Agreed.
> 
>   Jakub

Re: [PATCH] Better location info for "incomplete type" error msg (PR c/70756)

2016-05-12 Thread Jason Merrill


OK, thanks.

Jason

Re: libgomp: Make GCC 5 OpenACC offloading executables work

2016-05-12 Thread Nathan Sidwell


On 05/11/16 12:02, Thomas Schwinge wrote:


I conceptually agree to that.  (If we're serious about that, then we can
remove more code, such as the legacy libgomp entry point itself -- a
"missing symbol: [...]" is still vaguely better than a SIGSEGV.)  Yet,
what I fixed here, is just what Jakub and Nathan agreed upon in
:


Well, that email appears to be from september, and this patch is more complex 
than the linker versioning script I had anticipated.  If it's taken this long to 
create a patch, either it's very hard, or it's not a priority.  Given 6.1 is 
released, it also seems to have failed to catch the train.


nathan

Re: PATCH: PR target/70738: Add -mgeneral-regs-only option

2016-05-12 Thread Sandra Loosemore


On 05/11/2016 11:02 AM, H.J. Lu wrote:

On Tue, May 10, 2016 at 1:02 PM, Sandra Loosemore
 wrote:


Again, this sounds like implementor-speak, and there are grammatical errors
(noun/verb disagreement, missing articles).  Do users of this attribute need
to know what instructions the compiler is emitting?  We already say above
that it causes GCC to generate suitable entry and exit sequences.



It was done on purpose since this document is also served as
the spec for compiler implementers.


But readers of the user documentation are users, not compiler 
implementors, so the patch for the manual needs to have a different focus.



Here is a patch to add
-mgeneral-regs-only option to x86 backend.   We can update
spec for interrupt handle to recommend compiling interrupt handler
with -mgeneral-regs-only option and add a note for compiler
implementers.

OK for trunk if there is no regression?


I can't comment on the code patch, but for the documentation part:


@@ -24242,6 +24242,12 @@ opcodes, to mitigate against certain forms of attack. 
At the moment,
 this option is limited in what it can do and should not be relied
 on to provide serious protection.

+@item -mgeneral-regs-only
+@opindex mgeneral-regs-only
+Generate code which uses only the general-purpose registers.  This will


s/which/that/


+prevent the compiler from using floating-point, vector, mask and bound


s/will prevent/prevents/


+registers, but will not impose any restrictions on the assembler.


Maybe you mean to say "does not restrict use of those registers in 
inline assembly code"?  In any case, please get rid of the future tense 
here, too.



+
 @end table

 These @samp{-m} switches are supported in addition to the above


-Sandra

Re: Simple bitop reassoc in match.pd

2016-05-12 Thread Marc Glisse


On Thu, 12 May 2016, Richard Biener wrote:

Yeah - note that VRP already calls set_range_info before simplifying 
stmts.  It's just that substitute_and_fold doesn't apply fold_stmt (and 
thus match.pd) to all stmts but it only applies the pass specific "fold" 
(vrp_fold_stmt) to all stmts.


Just to be sure: is the fact that VRP doesn't apply fold_stmt on purpose? 
The restriction makes sense, it is just that it may yield a bit of 
duplication. We already indirectly use get_range_info in match.pd and may 
miss out on opportunities that only occur in branches during the VRP pass.


--
Marc Glisse

[PATCH 1/3] Indirect inlining of targets from references of global constants

2016-05-12 Thread Martin Jambor

Hi,

the patch below implements deducing aggregate contents from pointers
to constant variables for inlining and IPA-CP, which finally makes us
perform the optimization requested in
https://gcc.gnu.org/ml/gcc/2014-07/msg00240.html. It also lays down
the basis for doing optimization requested in PR 69708 but two
additional small patches are required for that.

This means we do not give up if we can't use AA to prove that
the memory in question has not been clobbered since invocation of the
function but only mark this fact in the indirect_call_info.  Later on
we still use this information if we know that the parameter in
question points to a constant variable.

If this is deemed a god approach, we will probably want to add a
similar bit to inlining conditions.

Bootstrapped, lto-bootstrapped and tested on x86_64-linux. OK for
trunk?

Thanks,

Martin


2016-05-11  Martin Jambor  

PR ipa/69708
* cgraph.h (cgraph_indirect_call_info): New field
guaranteed_unmodified.
* ipa-cp.c (ipa_get_indirect_edge_target_1): Also pass parameter value
to ipa_find_agg_cst_for_param, check guaranteed_unmodified when
appropriate.
* ipa-inline-analysis.c (evaluate_conditions_for_known_args): Also
pass the parameter value to ipa_find_agg_cst_for_param.
* ipa-prop.c (ipa_load_from_parm_agg): New parameter
guaranteed_unmodified, store AA results there instead of bailing out
if present.
(ipa_note_param_call): Also initialize guaranteed_unmodified flag.
(ipa_analyze_indirect_call_uses): Also set guaranteed_unmodified flag.
(find_constructor_constant_at_offset): New function.
(ipa_find_agg_cst_from_init): Likewise.
(ipa_find_agg_cst_for_param): Also seearch for aggregate values in
static initializers of contants, report back through a new paameter
from_global_constant if that was the case.
(try_make_edge_direct_simple_call): Also pass parameter value to
ipa_find_agg_cst_for_param, check guaranteed_unmodified when
appropriate.
(ipa_write_indirect_edge_info): Stream new flag guaranteed_unmodified.
(ipa_read_indirect_edge_info): Likewise.
* ipa-prop.h (ipa_find_agg_cst_for_param): Update declaration.
(ipa_load_from_parm_agg): Likewise.

testsuite/
* gcc.dg/ipa/iinline-cstagg-1.c: New test.
* gcc.dg/ipa/ipcp-cstagg-1.c: Likewise.
* gcc.dg/ipa/ipcp-cstagg-2.c: Likewise.
* gcc.dg/ipa/ipcp-cstagg-3.c: Likewise.
* gcc.dg/ipa/ipcp-cstagg-4.c: Likewise.
---
 gcc/cgraph.h|   9 +-
 gcc/ipa-cp.c|  26 ++--
 gcc/ipa-inline-analysis.c   |   3 +-
 gcc/ipa-prop.c  | 180 
 gcc/ipa-prop.h  |  13 +-
 gcc/testsuite/gcc.dg/ipa/iinline-cstagg-1.c |  37 ++
 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-1.c|  42 +++
 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-2.c|  46 +++
 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-3.c|  58 +
 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-4.c|  64 ++
 10 files changed, 440 insertions(+), 38 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/iinline-cstagg-1.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-1.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-3.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-4.c

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 8ad9f45..ecafe63 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1579,9 +1579,14 @@ struct GTY(()) cgraph_indirect_call_info
   unsigned agg_contents : 1;
   /* Set when this is a call through a member pointer.  */
   unsigned member_ptr : 1;
-  /* When the previous bit is set, this one determines whether the destination
- is loaded from a parameter passed by reference. */
+  /* When the agg_contents bit is set, this one determines whether the
+ destination is loaded from a parameter passed by reference. */
   unsigned by_ref : 1;
+  /* When the agg_contents bit is set, this one determines whether we can
+ deduce from the function body that the loaded value from the reference is
+ never modified between the invocation of the function and the load
+ point.  */
+  unsigned guaranteed_unmodified : 1;
   /* For polymorphic calls this specify whether the virtual table pointer
  may have changed in between function entry and the call.  */
   unsigned vptr_changed : 1;
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 5900d4d..2183da0 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -1999,9 +1999,9 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
 
   if (ie->indirect_info->agg_contents)
{
- if (agg_reps)
+ t = NULL;
+ if (agg_reps && ie->indirect_info->guaranteed_unmodified)
{
- t = NUL

[PATCH 2/3] Const parameters are always unmodified

2016-05-12 Thread Martin Jambor

Hi,

this patch simply makes parm_preserved_before_stmt_p consider all
const PARM_DECLs constant and does not invoke AA walking on them
(really the DECLs themselves, not the memory they might point to).

Bootstrapped and lto-bootstrapped and tested on x86_64-linux.  OK for
trunk?

Thanks,

Martin


2016-05-10  Martin Jambor  

* ipa-prop.c (parm_preserved_before_stmt_p): Return true for loads
from TREE_READONLY parameters.
---
 gcc/ipa-prop.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index afbc32b..7d869ed 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -803,6 +803,11 @@ parm_preserved_before_stmt_p (struct ipa_func_body_info 
*fbi, int index,
   bool modified = false;
   ao_ref refd;
 
+  tree base = get_base_address (parm_load);
+  gcc_assert (TREE_CODE (base) == PARM_DECL);
+  if (TREE_READONLY (base))
+return true;
+
   /* FIXME: FBI can be NULL if we are being called from outside
  ipa_node_analysis or ipcp_transform_function, which currently happens
  during inlining analysis.  It would be great to extend fbi's lifetime and
-- 
2.8.2

[PATCH 3/3] Allow constant global VAR_DECLs in constant jump functions

2016-05-12 Thread Martin Jambor

Hi,

the following patch adds the final step necessary to perform
optimization requested in PR 69708, i.e do indirect inlining of a
function passed by value in a structure.  It allows jump functions to
be aggregate global constant VAR_DECLs, which enables the
constructor-walking code introduced in the first patch of the series
to deduce aggregate contents from it.  IPA-CP expects jump-functions
to be scalars, and they indeed need be for processing arithmetic
jump-functions, but this patch allows any tree for the simple ones.

Bootstrapped, lto-bootstrapped tested on x86_64.  OK for trunk?

Thanks,

Martin


2016-05-11  Martin Jambor  

PR ipa/69708
* ipa-cp.c (ipa_get_jf_pass_through_result): Allow non-ip constant
input for NOP_EXPR pass-through functions.
* ipa-prop.c (ipa_compute_jump_functions_for_edge): Allow
aggregate global constant VAR_DECLs in constant jump functions.

testsuite/
* gcc.dg/ipa/iinline-cstagg-2.c: New test.
* gcc.dg/ipa/ipcp-cstagg-5.c: Likewise.
* gcc.dg/ipa/ipcp-cstagg-6.c: Likewise.
* gcc.dg/ipa/ipcp-cstagg-7.c: Likewise.
---
 gcc/ipa-cp.c|  3 +-
 gcc/ipa-prop.c  |  5 ++-
 gcc/testsuite/gcc.dg/ipa/iinline-cstagg-2.c | 30 +
 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-5.c| 37 
 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-6.c| 43 +++
 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-7.c| 65 +
 6 files changed, 181 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/iinline-cstagg-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-5.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-6.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-7.c

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 2183da0..8caa973 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -1026,9 +1026,10 @@ ipa_get_jf_pass_through_result (struct ipa_jump_func 
*jfunc, tree input)
 {
   tree restype, res;
 
-  gcc_checking_assert (is_gimple_ip_invariant (input));
   if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
 return input;
+  if (!is_gimple_ip_invariant (input))
+return NULL_TREE;
 
   if (TREE_CODE_CLASS (ipa_get_jf_pass_through_operation (jfunc))
   == tcc_comparison)
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index 7d869ed..ead8267 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -1674,7 +1674,10 @@ ipa_compute_jump_functions_for_edge (struct 
ipa_func_body_info *fbi,
   else
gcc_assert (!jfunc->alignment.known);
 
-  if (is_gimple_ip_invariant (arg))
+  if (is_gimple_ip_invariant (arg)
+ || (TREE_CODE (arg) == VAR_DECL
+ && is_global_var (arg)
+ && TREE_READONLY (arg)))
ipa_set_jf_constant (jfunc, arg, cs);
   else if (!is_gimple_reg_type (TREE_TYPE (arg))
   && TREE_CODE (arg) == PARM_DECL)
diff --git a/gcc/testsuite/gcc.dg/ipa/iinline-cstagg-2.c 
b/gcc/testsuite/gcc.dg/ipa/iinline-cstagg-2.c
new file mode 100644
index 000..546db87
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/iinline-cstagg-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-inline-details -fno-early-inlining 
-fno-ipa-sra -fno-ipa-cp" } */
+
+typedef struct S
+{
+  int add_offset;
+  int (*call)(int);
+} S;
+
+static int
+bar (const S f, int x)
+{
+  x = f.call (x);
+  return x;
+}
+
+static int
+thisisthetarget (int x)
+{
+  return x * x;
+}
+
+int
+outerfunction (int x)
+{
+  return bar ((S){16, thisisthetarget}, x);
+}
+
+
+/* { dg-final { scan-ipa-dump "thisisthetarget\[^\\n\]*inline copy in 
outerfunction"  "inline"  } } */
diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-5.c 
b/gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-5.c
new file mode 100644
index 000..56d544e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-5.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-cp-details" } */
+
+typedef struct S
+{
+  int add_offset;
+  int (*call)(int);
+} S;
+
+extern const S *es;
+
+static int  __attribute__((noinline))
+foo (const S f, int x)
+{
+  es = &f; /* This disables IPA-SRA */
+  x = f.call(x+f.add_offset);
+  x = f.call(x);
+  x = f.call(x);
+  return x;
+}
+
+static int
+sq (int x)
+{
+  return x * x;
+}
+
+static const S s = {16, sq};
+
+int
+h (int x)
+{
+  return foo (s, x);
+}
+
+/* { dg-final { scan-ipa-dump "Discovered an indirect call to a known target" 
"cp" } } */
+/* { dg-final { scan-ipa-dump-times "Discovered an indirect call to a known 
target" 3 "cp" } } */
diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-6.c 
b/gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-6.c
new file mode 100644
index 000..7891082
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipcp-cstagg-6.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-cp-details" } */
+
+typedef struct S
+{
+  int add_offset;
+  int (*call)(int

[PATCH] Fix ASAN bootstrap (uninitialized variable warning)

2016-05-12 Thread Martin Liška

Hi.

Following patch is needed to survive --with-build-config=bootstrap-asan:

../../gcc/tree-vect-patterns.c: In function ‘gimple* 
vect_recog_mask_conversion_pattern(vec*, tree_node**, tree_node**)’:
../../gcc/tree-vect-patterns.c:3612:34: error: ‘lhs’ may be used uninitialized 
in this function [-Werror=maybe-uninitialized]
lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
  ^~~

Ready after it finishes regression tests?
Thanks,
Martin
>From 808bcc26553c7046676d62be4e3a64e6687bf1a2 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 12 May 2016 18:17:27 +0200
Subject: [PATCH] Fix ASAN bootstrap (uninitialized variable warning)

gcc/ChangeLog:

2016-05-12  Martin Liska  

	* tree-vect-patterns.c (vect_recog_mask_conversion_pattern):
	Initialize a variable with default value.
---
 gcc/tree-vect-patterns.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index d08b454..cc8c445 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -3570,7 +3570,8 @@ vect_recog_mask_conversion_pattern (vec *stmts, tree *type_in,
 {
   gimple *last_stmt = stmts->pop ();
   enum tree_code rhs_code;
-  tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
+  tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
+  tree vectype1, vectype2;
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
   stmt_vec_info pattern_stmt_info;
   vec_info *vinfo = stmt_vinfo->vinfo;
-- 
2.8.2

Re: [PATCH 3/3] Enhance dumps of IVOPTS

2016-05-12 Thread Martin Liška

On 05/12/2016 03:51 PM, Bin.Cheng wrote:
> On Thu, May 12, 2016 at 1:13 PM, Martin Liška  wrote:
>> On 05/10/2016 03:16 PM, Bin.Cheng wrote:
>>> Another way is to remove the use of id for struct iv_inv_expr_ent once
>>> for all.  We can change iv_ca.used_inv_expr and cost_pair.inv_expr_id
>>> to pointers, and rename iv_inv_expr_ent.id to count and use this to
>>> record reference number in iv_ca.  This if-statement on dump_file can
>>> be saved.  Also I think it simplifies current code a bit.  For now,
>>> there are id <-> struct maps for different structures in IVOPT which
>>> make it not straightforward.
>>
>> Hi.
>>
>> I'm sending second version of the patch. I tried to follow your advices, but
>> because of a iv_inv_expr_ent can simultaneously belong to multiply iv_cas,
>> putting counter to iv_inv_expr_ent does not works. Instead of that, I've
>> decided to replace used_inv_expr with a hash_map that contains used inv_exps
>> and where value of the map is # of usages.
>>
>> Further questions:
>> + iv_inv_expr_ent::id can be now removed as it's used just for purpose of 
>> dumps
>> Group 0:
>>   cand  costscaled  freqcompl.  depends on
>>   5 2   2.001.000
>>   6 4   4.001.001inv_expr:0
>>   7 4   4.001.001inv_expr:1
>>   8 4   4.001.001inv_expr:2
>>
>> That can be replaced with print_generic_expr, but I think using ids makes 
>> the dump
>> output more clear.
> I am okay with keeping id.  Could you please dump all inv_exprs in a
> single section like
> :
> inv_expr 0: print_generic_expr
> inv_expr 1: ...
> 
> Then only dump the id afterwards?
> 

Sure, it would be definitely better:

The new dump format looks:

:
inv_expr 0: sudoku_351(D) + (sizetype) S.833_774 * 4
inv_expr 1: sudoku_351(D) + ((sizetype) S.833_774 * 4 + 
18446744073709551580)
inv_expr 2: sudoku_351(D) + ((sizetype) S.833_774 + 72) * 4
inv_expr 3: sudoku_351(D) + ((sizetype) S.833_774 + 81) * 4
inv_expr 4: &A.832 + (sizetype) _377 * 4
inv_expr 5: &A.832 + ((sizetype) _377 * 4 + 18446744073709551612)
inv_expr 6: &A.832 + ((sizetype) _377 + 8) * 4
inv_expr 7: &A.832 + ((sizetype) _377 + 9) * 4

:
Group 0:
  cand  costscaled  freqcompl.  depends on

...

Improved to:
  cost: 27 (complexity 2)
  cand_cost: 11
  cand_group_cost: 10 (complexity 2)
  candidates: 3, 5
   group:0 --> iv_cand:5, cost=(2,0)
   group:1 --> iv_cand:5, cost=(4,1)
   group:2 --> iv_cand:5, cost=(4,1)
   group:3 --> iv_cand:3, cost=(0,0)
   group:4 --> iv_cand:3, cost=(0,0)
  invariants 1, 6
  invariant expressions 6, 3

The only question here is that as used_inv_exprs are stored in a hash_map,
order of dumped invariants would not be stable. Is it problem?

>>
>> + As check_GNU_style.sh reported multiple 8 spaces issues in hunks I've 
>> touched, I decided
>> to fix all 8 spaces issues. Hope it's fine.
>>
>> I'm going to test the patch.
>> Thoughts?
> 
> Some comments on the patch embedded.
> 
>>
>> +/* Forward declaration.  */
> Not necessary.
>> +struct iv_inv_expr_ent;
>> +

I think it's needed because struct cost_pair uses a pointer to iv_inv_expr_ent.

> 
>>
>>  /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
>>
>> -static int
>> +static iv_inv_expr_ent *
>>  get_expr_id (struct ivopts_data *data, tree expr)
> We are not returning id any more, maybe rename to record_inv_expr or else.

Done.

> 
>>  {
>>struct iv_inv_expr_ent ent;
>> @@ -4806,13 +4809,13 @@ get_expr_id (struct ivopts_data *data, tree expr)
>>ent.hash = iterative_hash_expr (expr, 0);
>>slot = data->inv_expr_tab->find_slot (&ent, INSERT);
>>if (*slot)
>> -return (*slot)->id;
>> +return *slot;
>>
>>*slot = XNEW (struct iv_inv_expr_ent);
>>(*slot)->expr = expr;
>>(*slot)->hash = ent.hash;
>>(*slot)->id = data->max_inv_expr_id++;
>> -  return (*slot)->id;
>> +  return *slot;
> This could be changed to
>   if (!*slot)
> {
>   //new and insert
> }
>   return *slot;

Also done.

>>  }
>>
>>  /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
>> @@ -4820,10 +4823,10 @@ get_expr_id (struct ivopts_data *data, tree expr)
>> ADDRESS_P is a flag indicating if the expression is for address
>> computation.  */
>>
>> -static int
>> +static iv_inv_expr_ent *
>>  get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
>> -tree cbase, HOST_WIDE_INT ratio,
>> -bool address_p)
>> +tree cbase, HOST_WIDE_INT ratio,
>> +bool address_p)
> Rename function name here too.
>>  {
> 

Likewise.

>> @@ -5988,9 +5992,9 @@ determine_group_iv_costs (struct ivopts_data *data)
>>if (group->cost_map[j].depends_on)
>>  bitmap_print (dump_file,
>>group->cost_map[j].depends_on, "","");
>> -  if (group->cost_map[j].inv_expr_id != -1)
>> +if (group->cost_map[j].inv_expr != NULL)
>>

Re: Simple bitop reassoc in match.pd

2016-05-12 Thread Richard Biener

On May 12, 2016 6:02:47 PM GMT+02:00, Marc Glisse  wrote:
>On Thu, 12 May 2016, Richard Biener wrote:
>
>> Yeah - note that VRP already calls set_range_info before simplifying 
>> stmts.  It's just that substitute_and_fold doesn't apply fold_stmt
>(and 
>> thus match.pd) to all stmts but it only applies the pass specific
>"fold" 
>> (vrp_fold_stmt) to all stmts.
>
>Just to be sure: is the fact that VRP doesn't apply fold_stmt on
>purpose? 

The propagator only folds stmts that had operands replaced (that doesn't enable 
all simplifications as match.PD patterns cover more than one statement).

>The restriction makes sense, it is just that it may yield a bit of 
>duplication. We already indirectly use get_range_info in match.pd and
>may 
>miss out on opportunities that only occur in branches during the VRP
>pass.

Yes.  The pass specific fold is called on each stmt.  Maybe we can omit the 
propagators folding if the pass specific folding applied.

Richard.

[Patch, lra] relax the restriction on subreg reload for wide mode

2016-05-12 Thread Jiong Wang


For PR70904 is caused by inproper reload for wide mode.

For mode with size bigger than ptr_mode, there unlikely to be "mov"
between two registers with different classes, but there normally will be
"mov" which transfers element of vector register into the general
register, and this normally will be a subreg which should be reloaded as
a whole.  This is particularly likely to be triggered when
-fno-split-wide-types specified.

x86-64/aarch64 bootstrap OK and no regression on check-gcc/g++ on both.

Committed as r236181. It's reviewed and approved in the PR.
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70904)

2016-05-12  Jiong Wang  
gcc/
  PR target/70904
  * lra-constraint.c (process_addr_reg): Relax the restriction on
  subreg reload for wide mode.


Index: gcc/lra-constraints.c
===
--- gcc/lra-constraints.c   (revision 236171)
+++ gcc/lra-constraints.c   (working copy)
@@ -1307,7 +1307,21 @@
 
   subreg_p = GET_CODE (*loc) == SUBREG;

   if (subreg_p)
-loc = &SUBREG_REG (*loc);
+{
+  reg = SUBREG_REG (*loc);
+  mode = GET_MODE (reg);
+
+  /* For mode with size bigger than ptr_mode, there unlikely to be "mov"
+between two registers with different classes, but there normally will
+be "mov" which transfers element of vector register into the general
+register, and this normally will be a subreg which should be reloaded
+as a whole.  This is particularly likely to be triggered when
+-fno-split-wide-types specified.  */
+  if (in_class_p (reg, cl, &new_class)
+ || GET_MODE_SIZE (mode) <= GET_MODE_SIZE (ptr_mode))
+   loc = &SUBREG_REG (*loc);
+}
+
   reg = *loc;
   mode = GET_MODE (reg);
   if (! REG_P (reg))

Re: Thoughts on memcmp expansion (PR43052)

2016-05-12 Thread Bernd Schmidt


On 05/02/2016 03:14 PM, Richard Biener wrote:


I think it fits best in tree-ssa-strlen.c:strlen_optimize_stmt for the moment.


I've done this in this version. Note that this apparently means it won't 
be run at -O unlike the previous version.


The code moved to tree-ssa-strlen.c is nearly identical to the previous 
version, with the exception of a new line that copies the contents of 
gimple_call_use_set to the newly constructed call. Without this, I found 
a testcase where we can miss a DSE opportunity since we think memcmp_eq 
can access anything.


In the by_pieces_code, I've gone ahead and progressed the C++ conversion 
a little further by making subclasses of op_by_pieces_d. Now the 
{move,store,compare}_by_pieces functions generally just make an object 
of the appropriate type and call its run method. I've also converted 
store_by_pieces to this infrastructure to eliminate more duplication.


I've verified that if you disable the strlenopt code, you get identical 
code generation before and after the patch on x86_64-linux and arm-eabi. 
With the strlenopt enabled, it finds memcmp optimization opportunities 
on both targets (more on x86 due to !SLOW_UNALIGNED_ACCESS). On arm, 
there is a question mark over whether the autoinc support in the 
by_pieces code is really a good idea, but that's unchanged from before 
and can be addressed later.
Microbenchmarks on x86 suggest that the new by-pieces expansion is a 
whole lot faster than calling memcmp (about a factor of 3 in my tests).


Bootstrapped and tested on x86_64-linux. The testcase failed at -O (now 
changed to -O2), and there was a failure in vrp47.c which also seems to 
appear in gcc-testresults, so I think it's unrelated. Still, I'll make 
another run against a new baseline. Ok for trunk if that all comes back 
clean?



Bernd
	PR tree-optimization/52171
	* builtins.c (expand_cmpstrn_or_cmpmem): Delete, moved elsewhere.
	(expand_builtin_memcmp): New arg RESULT_EQ.  All callers changed.
	Look for constant strings.  Move some code to emit_block_cmp_hints
	and use it.
	* builtins.def (BUILT_IN_MEMCMP_EQ): New.
	* defaults.h (COMPARE_MAX_PIECES): New macro.
	* expr.c (move_by_pieces_d, store_by_pieces_d): Remove old structs.
	(move_by_pieces_1, store_by_pieces_1, store_by_pieces_2): Remvoe.
	(clear_by_pieces_1): Don't declare.  Move definition before use.
	(can_do_by_pieces): New static function.
	(can_move_by_pieces): Use it.  Return bool.
	(by_pieces_ninsns): Renamed from move_by_pieces_ninsns.  New arg
	OP.  All callers changed.  Handle COMPARE_BY_PIECES.
	(class pieces_addr); New.
	(pieces_addr::pieces_addr, pieces_addr::decide_autoinc,
	pieces_addr::adjust, pieces_addr::increment_address,
	pieces_addr::maybe_predec, pieces_addr::maybe_postinc): New member
	functions for it.
	(class op_by_pieces_d): New.
	(op_by_pieces_d::op_by_pieces_d, op_by_pieces_d::run): New member
	functions for it.
	(class move_by_pieces_d, class compare_by_pieces_d,
	class store_by_pieces_d): New subclasses of op_by_pieces_d.
	(move_by_pieces_d::prepare_mode, move_by_pieces_d::generate,
	move_by_pieces_d::finish_endp, store_by_pieces_d::prepare_mode,
	store_by_pieces_d::generate, store_by_pieces_d::finish_endp,
	compare_by_pieces_d::generate, compare_by_pieces_d::prepare_mode,
	compare_by_pieces_d::finish_mode): New member functions.
	(compare_by_pieces, emit_block_cmp_via_cmpmem): New static
	functions.
	(expand_cmpstrn_or_cmpmem): Moved here from builtins.c.
	(emit_block_cmp_hints): New function.
	(move_by_pieces, store_by_pieces, clear_by_pieces): Rewrite to just
	use the newly defined classes.
	* expr.h (by_pieces_constfn): New typedef.
	(can_store_by_pieces, store_by_pieces): Use it in arg declarations.
	(emit_block_cmp_hints, expand_cmpstrn_or_cmpmem): Declare.
	(move_by_pieces_ninsns): Don't declare.
	(can_move_by_pieces): Change return value to bool.
	* target.def (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Update docs.
	(compare_by_pieces_branch_ratio): New hook.
	* target.h (enum by_pieces_operation): Add COMPARE_BY_PIECES.
	(by_pieces_ninsns): Declare.
	* targethooks.c (default_use_by_pieces_infrastructure_p): Handle
	COMPARE_BY_PIECES.
	(default_compare_by_pieces_branch_ratio): New function.
	* targhooks.h (default_compare_by_pieces_branch_ratio): Declare.
	* doc/tm.texi.in (STORE_MAX_PIECES, COMPARE_MAX_PIECES): Document.
	* doc/tm.texi: Regenerate.
	* tree-ssa-strlen.c: Include "builtins.h".
	(handle_builtin_memcmp): New static function.
	(strlen_optimize_stmt): Call it for BUILT_IN_MEMCMP.
	* tree.c (build_common_builtin_nodes): Create __builtin_memcmp_eq.

testsuite/
	PR tree-optimization/52171
	* gcc.dg/pr52171.c: New test.
	* gcc.target/i386/pr52171.c: New test.

Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 236113)
+++ gcc/builtins.c	(working copy)
@@ -3671,53 +3671,24 @@ expand_cmpstr (insn_code icode, rtx targ
   return NULL_RTX;
 }
 
-/* Try to expand cmpstrn or cmpmem operatio

Re: PATCH: PR target/70738: Add -mgeneral-regs-only option

2016-05-12 Thread H.J. Lu

On Thu, May 12, 2016 at 8:46 AM, Sandra Loosemore
 wrote:
> On 05/11/2016 11:02 AM, H.J. Lu wrote:
>>
>> On Tue, May 10, 2016 at 1:02 PM, Sandra Loosemore
>>  wrote:
>>>
>>>
>>> Again, this sounds like implementor-speak, and there are grammatical
>>> errors
>>> (noun/verb disagreement, missing articles).  Do users of this attribute
>>> need
>>> to know what instructions the compiler is emitting?  We already say above
>>> that it causes GCC to generate suitable entry and exit sequences.
>>>
>>
>> It was done on purpose since this document is also served as
>> the spec for compiler implementers.
>
>
> But readers of the user documentation are users, not compiler implementors,
> so the patch for the manual needs to have a different focus.

That is why I suggested to add a note for compiler implementers
instead.

>> Here is a patch to add
>> -mgeneral-regs-only option to x86 backend.   We can update
>> spec for interrupt handle to recommend compiling interrupt handler
>> with -mgeneral-regs-only option and add a note for compiler
>> implementers.
>>
>> OK for trunk if there is no regression?
>
>
> I can't comment on the code patch, but for the documentation part:
>
>> @@ -24242,6 +24242,12 @@ opcodes, to mitigate against certain forms of
>> attack. At the moment,
>>  this option is limited in what it can do and should not be relied
>>  on to provide serious protection.
>>
>> +@item -mgeneral-regs-only
>> +@opindex mgeneral-regs-only
>> +Generate code which uses only the general-purpose registers.  This will
>
>
> s/which/that/
>
>> +prevent the compiler from using floating-point, vector, mask and bound
>
>
> s/will prevent/prevents/
>
>> +registers, but will not impose any restrictions on the assembler.
>
>
> Maybe you mean to say "does not restrict use of those registers in inline
> assembly code"?  In any case, please get rid of the future tense here, too.

I changed it to

---
@item -mgeneral-regs-only
@opindex mgeneral-regs-only
Generate code that uses only the general-purpose registers.  This
prevents the compiler from using floating-point, vector, mask and bound
registers.
---

Thanks.

-- 
H.J.

[PATCH, i386]: Fix gcc.target/i386/pr70027.c FAIL for x32 -fpic

2016-05-12 Thread Uros Bizjak

Hello!

We have to pass word_mode (DImode for x86_64 target) call operand to
ix86_output_call_insn, otherwise invalid DWORD_PTR prefixed address is
generated in x32 case.

2016-05-12  Uros Bizjak  

* config/i386/i386.md (*call_got_x32): Change operand 0 to
DImode before it is passed to ix86_output_call_operand.
(*call_value_got_x32): Ditto for operand 1.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9bd19ab..f6bb69b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11934,7 +11934,10 @@
   (match_operand:SI 0 "GOT_memory_operand" "Bg")))
 (match_operand 1))]
   "TARGET_X32"
-  "* return ix86_output_call_insn (insn, operands[0]);"
+{
+  rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
+  return ix86_output_call_insn (insn, fnaddr);
+}
   [(set_attr "type" "call")])
 
 ;; Since sibcall never returns, we can only use call-clobbered register
@@ -12127,7 +12130,10 @@
  (match_operand:SI 1 "GOT_memory_operand" "Bg")))
  (match_operand 2)))]
   "TARGET_X32"
-  "* return ix86_output_call_insn (insn, operands[1]);"
+{
+  rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
+  return ix86_output_call_insn (insn, fnaddr);
+}
   [(set_attr "type" "callv")])
 
 ;; Since sibcall never returns, we can only use call-clobbered register

Re: [PATCH] Improve other 13 define_insns

2016-05-12 Thread Jakub Jelinek

On Thu, May 12, 2016 at 05:20:02PM +0300, Kirill Yukhin wrote:
> > 2016-05-04  Jakub Jelinek  
> > 
> > * config/i386/sse.md (sse_shufps_, sse_storehps, sse_loadhps,
> > sse_storelps, sse_movss, avx2_vec_dup, avx2_vec_dupv8sf_1,
> > sse2_shufpd_, sse2_storehpd, sse2_storelpd, sse2_loadhpd,
> > sse2_loadlpd, sse2_movsd): Use v instead of x in vex or maybe_vex
> > alternatives, use maybe_evex instead of vex in prefix.
> > 
> >  ;; Avoid combining registers from different units in a single alternative,
> >  ;; see comment above inline_secondary_memory_needed function in i386.c
> >  (define_insn "sse2_storehpd"
> > -  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
> > +  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,v,x,*f,r")
> > (vec_select:DF
> > - (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
> > + (match_operand:V2DF 1 "nonimmediate_operand" " v,0,v,o,o,o")
> Same (as [1]) here.
> Testing this fix:
> @@ -8426,7 +8426,7 @@
>  ;; Avoid combining registers from different units in a single alternative,
>  ;; see comment above inline_secondary_memory_needed function in i386.c
>  (define_insn "sse2_storehpd"
> -  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,v,x,*f,r")
> +  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
> (vec_select:DF
>   (match_operand:V2DF 1 "nonimmediate_operand" " v,0,v,o,o,o")
> 
> 

Sorry for that, yes, this is needed.

Jakub

Re: [PATCH][ARM] PR target/70830: Avoid POP-{reglist}^ when returning from interrupt handlers

2016-05-12 Thread Christophe Lyon

On 12 May 2016 at 11:48, Ramana Radhakrishnan  wrote:
> On Thu, May 5, 2016 at 12:50 PM, Kyrill Tkachov
>  wrote:
>> Hi all,
>>
>> In this PR we deal with some fallout from the conversion to unified
>> assembly.
>> We now end up emitting instructions like:
>>   pop {r0,r1,r2,r3,pc}^
>> which is not legal. We have to use an LDM form.
>>
>> There are bugs in two arm.c functions: output_return_instruction and
>> arm_output_multireg_pop.
>>
>> In output_return_instruction the buggy hunk from the conversion was:
>>   else
>> -   if (TARGET_UNIFIED_ASM)
>>   sprintf (instr, "pop%s\t{", conditional);
>> -   else
>> - sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
>>
>> The code was already very obscurely structured and arguably the bug was
>> latent.
>> It emitted POP only when TARGET_UNIFIED_ASM was on, and since
>> TARGET_UNIFIED_ASM was on
>> only for Thumb, we never went down this path interrupt handling code, since
>> the interrupt
>> attribute is only available for ARM code. After the removal of
>> TARGET_UNIFIED_ASM we ended up
>> using POP unconditionally. So this patch adds a check for IS_INTERRUPT and
>> outputs the
>> appropriate LDM form.
>>
>> In arm_output_multireg_pop the buggy hunk was:
>> -  if ((regno_base == SP_REGNUM) && TARGET_THUMB)
>> +  if ((regno_base == SP_REGNUM) && update)
>>  {
>> -  /* Output pop (not stmfd) because it has a shorter encoding.  */
>> -  gcc_assert (update);
>>sprintf (pattern, "pop%s\t{", conditional);
>>  }
>>
>> Again, the POP was guarded on TARGET_THUMB and so would never be taken on
>> interrupt handling
>> routines. This patch guards that with the appropriate check on interrupt
>> return.
>>
>> Also, there are a couple of bugs in the 'else' branch of that 'if':
>> * The "ldmfd%s" was output without a '\t' at the end which meant that the
>> base register
>> name would be concatenated with the 'ldmfd', creating invalid assembly.
>>
>> * The logic:
>>
>>   if (regno_base == SP_REGNUM)
>>   /* update is never true here, hence there is no need to handle
>>  pop here.  */
>> sprintf (pattern, "ldmfd%s", conditional);
>>
>>   if (update)
>> sprintf (pattern, "ldmia%s\t", conditional);
>>   else
>> sprintf (pattern, "ldm%s\t", conditional);
>>
>> Meant that for "regno == SP_REGNUM && !update" we'd end up printing
>> "ldmfd%sldm%s\t"
>> to pattern. I didn't manage to reproduce that condition though, so maybe it
>> can't ever occur.
>> This patch fixes both these issues nevertheless.
>>
>> I've added the testcase from the PR to catch the fix in
>> output_return_instruction.
>> The testcase doesn't catch the bugs in arm_output_multireg_pop, but the
>> existing tests
>> gcc.target/arm/interrupt-1.c and gcc.target/arm/interrupt-2.c would have
>> caught them
>> if only they were assemble tests rather than just compile. So this patch
>> makes them
>> assembly tests (and reverts the scan-assembler checks for the correct LDM
>> pattern).
>>
>> Bootstrapped and tested on arm-none-linux-gnueabihf.
>> Ok for trunk and GCC 6?
>>

Hi Kyrill,

Did you test --with-mode=thumb?
When using arm mode, I see regressions:

  gcc.target/arm/neon-nested-apcs.c (test for excess errors)
  gcc.target/arm/nested-apcs.c (test for excess errors)

Christophe

>> Thanks,
>> Kyrill
>>
>> 2016-05-05  Kyrylo Tkachov  
>>
>> PR target/70830
>> * config/arm/arm.c (arm_output_multireg_pop): Avoid POP instruction
>> when popping the PC and within an interrupt handler routine.
>> Add missing tab to output of "ldmfd".
>> (output_return_instruction): Output LDMFD with SP update rather
>> than POP when returning from interrupt handler.
>>
>> 2016-05-05  Kyrylo Tkachov  
>>
>> PR target/70830
>> * gcc.target/arm/interrupt-1.c: Change dg-compile to dg-assemble.
>> Add -save-temps to dg-options.
>> Scan for ldmfd rather than pop instruction.
>> * gcc.target/arm/interrupt-2.c: Likewise.
>> * gcc.target/arm/pr70830.c: New test.
>
>
> OK for affected branches and trunk  - thanks for fixing this and sorry
> about the breakage.
>
> Ramana

New Finnish PO file for 'cpplib' (version 6.1.0)

2016-05-12 Thread Translation Project Robot

Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'cpplib' has been submitted
by the Finnish team of translators.  The file is available at:

http://translationproject.org/latest/cpplib/fi.po

(This file, 'cpplib-6.1.0.fi.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

http://translationproject.org/latest/cpplib/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

http://translationproject.org/domain/cpplib.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.

Contents of PO file 'cpplib-6.1.0.fi.po'

2016-05-12 Thread Translation Project Robot



cpplib-6.1.0.fi.po.gz
Description: Binary data
The Translation Project robot, in the
name of your translation coordinator.

[PATCH, i386]: Additional fix for PR62599 with -mcmodel=medium -fpic

2016-05-12 Thread Uros Bizjak

Hello!

testsuite/gcc.target/i386/pr61599-{1,2}.c testcases expose a failure
with -mcmodel -fpic, where:

/tmp/ccfpoxHY.o: In function `bar':
pr61599-2.c:(.text+0xe): relocation truncated to fit: R_X86_64_PC32
against symbol `a' defined in LARGE_COMMON section in /tmp/ccKTKST2.o
collect2: error: ld returned 1 exit status
compiler exited with status 1

CM_MEDIUM_PIC code model assumes that code+got/plt fits in a 31 bit
region, data is unlimited. Based on these assumptions, code should be
accessed via R_X86_64_GOT64.

Attached patch uses UNSPEC_GOT instead of UNSPEC_GOTPCREL also for
CM_MEDIUM_PIC.

2016-05-12  Uros Bizjak  

PR target/61599
* config/i386/i386.c (legitimize_pic_address): Do not use
UNSPEC_GOTPCREL for CM_MEDIUM_PIC code model.

Patch was bootstrapped on x86_64-linux-gnu and regression tested with
-mcmodel=medium -fpic.

Jakub, H.J., do you have any comments on the patch?

Uros.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 236182)
+++ config/i386/i386.c  (working copy)
@@ -15475,7 +15475,7 @@ legitimize_pic_address (rtx orig, rtx reg)
  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
}
-  else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
+  else if (TARGET_64BIT && ix86_cmodel == CM_SMALL_PIC)
{
  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
UNSPEC_GOTPCREL);

Re: [PATCH/AARCH64/ILP32] Fix unwinding (libgcc)

2016-05-12 Thread Andrew Pinski

On Wed, Apr 27, 2016 at 2:13 PM, Andrew Pinski  wrote:
> Hi,
>   AARCH64 ILP32 is like x32 where UNITS_PER_WORD > sizeof(void*) so we
> need to define REG_VALUE_IN_UNWIND_CONTEXT for ILP32.  This fixes
> unwinding through the signal handler.  This is independent of the ABI
> which Linux kernel uses to store the registers.
>
> OK?  Bootstrapped and tested on aarch64 with no regressions.

Ping.

>
> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * config/aarch64/value-unwind.h: New file.
> * config.host (aarch64*-*-*): Add aarch64/value-unwind.h to tm_file.

[PATCH] c++/60760 - arithmetic on null pointers should not be allowed in constant expressions

2016-05-12 Thread Martin Sebor


Attached is a resubmission of the patch for c++/60760 originally
submitted late in the 6.0 cycle along with a patch for c++/67376.
Since c++/60760 was not a regression, it was decided that it
would be safer to defer the fix until after the 6.1.0 release.

While retesting this patch I was happy to notice that it also
fixes another bug: c++/71091 - constexpr reference bound to a null
pointer dereference accepted.

Martin
PR c++/60760 - arithmetic on null pointers should not be allowed
   in constant expressions
PR c++/71091 - constexpr reference bound to a null pointer dereference
   accepted

gcc/testsuite/ChangeLog:
2016-05-12  Martin Sebor  

	PR c++/60760
	PR c++/71091
	* g++.dg/cpp0x/constexpr-nullptr-2.C: New test.
	* gcc/testsuite/g++.dg/ubsan/pr63956.C: Adjust.

gcc/cp/ChangeLog:
2016-05-12  Martin Sebor  

	PR c++/60760
	PR c++/71091
	* constexpr.c (cxx_eval_call_expression): Add argument.
	(cxx_eval_unary_expression): Same.
	(cxx_eval_conditional_expression): Same.
	(cxx_eval_array_reference): Same.
	(cxx_fold_indirect_ref): Same.
	(cxx_eval_statement_list): Same.
	(cxx_eval_loop_expr): Same.
	(cxx_eval_binary_expression): Same.  Detect and reject invalid uses
	of null pointers.
	(cxx_eval_component_reference): Same.
	(cxx_eval_constant_expression): Same.
	(cxx_eval_indirect_ref):   Add argument. Detect invalid uses of null
	pointers without rejecting them here.
	(cxx_eval_outermost_constant_expr): Adjust.

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 6054d1a..3821ad0 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -918,7 +918,8 @@ struct constexpr_ctx {
 static GTY (()) hash_table *constexpr_call_table;

 static tree cxx_eval_constant_expression (const constexpr_ctx *, tree,
-	  bool, bool *, bool *, tree * = NULL);
+	  bool, bool *, bool *, bool * = NULL,
+	  tree * = NULL);

 /* Compute a hash value for a constexpr call representation.  */

@@ -1491,7 +1492,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t,
 	  tree jump_target = NULL_TREE;
 	  cxx_eval_constant_expression (&ctx_with_save_exprs, body,
 	lval, non_constant_p, overflow_p,
-	&jump_target);
+	NULL, &jump_target);

 	  if (DECL_CONSTRUCTOR_P (fun))
 	/* This can be null for a subobject constructor call, in
@@ -1716,20 +1717,21 @@ cxx_eval_unary_expression (const constexpr_ctx *ctx, tree t,
 static tree
 cxx_eval_binary_expression (const constexpr_ctx *ctx, tree t,
 			bool /*lval*/,
-			bool *non_constant_p, bool *overflow_p)
+			bool *non_constant_p, bool *overflow_p,
+			bool *nullptr_p)
 {
   tree r = NULL_TREE;
   tree orig_lhs = TREE_OPERAND (t, 0);
   tree orig_rhs = TREE_OPERAND (t, 1);
   tree lhs, rhs;
   lhs = cxx_eval_constant_expression (ctx, orig_lhs, /*lval*/false,
-  non_constant_p, overflow_p);
+  non_constant_p, overflow_p, nullptr_p);
   /* Don't VERIFY_CONSTANT here, it's unnecessary and will break pointer
  subtraction.  */
   if (*non_constant_p)
 return t;
   rhs = cxx_eval_constant_expression (ctx, orig_rhs, /*lval*/false,
-  non_constant_p, overflow_p);
+  non_constant_p, overflow_p, nullptr_p);
   if (*non_constant_p)
 return t;

@@ -1751,6 +1753,15 @@ cxx_eval_binary_expression (const constexpr_ctx *ctx, tree t,
 		   || null_member_pointer_value_p (rhs)))
 	r = constant_boolean_node (!is_code_eq, type);
 }
+  if (code == POINTER_PLUS_EXPR && !*non_constant_p
+  && tree_int_cst_equal (lhs, null_pointer_node))
+{
+  if (!ctx->quiet)
+	error ("arithmetic involving null pointer %qE", lhs);
+  if (nullptr_p)
+	*nullptr_p = true;
+  return t;
+}

   if (r == NULL_TREE)
 r = fold_binary_loc (loc, code, type, lhs, rhs);
@@ -1791,11 +1802,11 @@ cxx_eval_conditional_expression (const constexpr_ctx *ctx, tree t,
 return cxx_eval_constant_expression (ctx, TREE_OPERAND (t, 2),
 	 lval,
 	 non_constant_p, overflow_p,
-	 jump_target);
+	 NULL, jump_target);
   return cxx_eval_constant_expression (ctx, TREE_OPERAND (t, 1),
    lval,
    non_constant_p, overflow_p,
-   jump_target);
+   NULL, jump_target);
 }

 /* Returns less than, equal to, or greater than zero if KEY is found to be
@@ -2066,7 +2077,8 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree t,
 static tree
 cxx_eval_component_reference (const constexpr_ctx *ctx, tree t,
 			  bool lval,
-			  bool *non_constant_p, bool *overflow_p)
+			  bool *non_constant_p, bool *overflow_p,
+			  bool *nullptr_p)
 {
   unsigned HOST_WIDE_INT i;
   tree field;
@@ -2075,7 +2087,14 @@ cxx_eval_component_reference (const constexpr_ctx *ctx, tree t,
   tree orig_whole = TREE_OPERAND (t, 0);
   tree whole = cxx_eval_constant_expression (ctx, orig_whole,
 	 lval,
-	 non_constant_p, overflow_p);
+	 non_constant_p, overflow_p,
+	 nullptr_p);
+  if (nullptr_p && *nullptr_p)
+{
+  if (!ctx->quiet)
+	error ("%qE d

Re: [PATCH, i386]: Additional fix for PR62599 with -mcmodel=medium -fpic

2016-05-12 Thread H.J. Lu

On Thu, May 12, 2016 at 3:02 PM, Uros Bizjak  wrote:
> Hello!
>
> testsuite/gcc.target/i386/pr61599-{1,2}.c testcases expose a failure
> with -mcmodel -fpic, where:
>
> /tmp/ccfpoxHY.o: In function `bar':
> pr61599-2.c:(.text+0xe): relocation truncated to fit: R_X86_64_PC32
> against symbol `a' defined in LARGE_COMMON section in /tmp/ccKTKST2.o
> collect2: error: ld returned 1 exit status
> compiler exited with status 1
>
> CM_MEDIUM_PIC code model assumes that code+got/plt fits in a 31 bit
> region, data is unlimited. Based on these assumptions, code should be
> accessed via R_X86_64_GOT64.
>
> Attached patch uses UNSPEC_GOT instead of UNSPEC_GOTPCREL also for
> CM_MEDIUM_PIC.
>
> 2016-05-12  Uros Bizjak  
>
> PR target/61599
> * config/i386/i386.c (legitimize_pic_address): Do not use
> UNSPEC_GOTPCREL for CM_MEDIUM_PIC code model.
>
> Patch was bootstrapped on x86_64-linux-gnu and regression tested with
> -mcmodel=medium -fpic.
>
> Jakub, H.J., do you have any comments on the patch?


I prefer this patch.


-- 
H.J.
diff --git a/ld/plugin.c b/ld/plugin.c
index bf66f52..01d76dd 100644
--- a/ld/plugin.c
+++ b/ld/plugin.c
@@ -674,7 +674,21 @@ get_symbols (const void *handle, int nsyms, struct ld_plugin_symbol *syms,
 	 syms[n].name, FALSE, FALSE, TRUE);
   if (!blhe)
 	{
-	  res = LDPR_UNKNOWN;
+	  /* This symbol is defined and referenced only in IR.  */
+	  switch (syms[n].def)
+	{
+	default:
+	  abort ();
+	case LDPK_UNDEF:
+	case LDPK_WEAKUNDEF:
+	  res = LDPR_UNDEF;
+	  break;
+	case LDPK_DEF:
+	case LDPK_WEAKDEF:
+	case LDPK_COMMON:
+	  res = LDPR_PREVAILING_DEF_IRONLY;
+	  break;
+	}
 	  goto report_symbol;
 	}

Re: [PATCH, i386]: Additional fix for PR62599 with -mcmodel=medium -fpic

2016-05-12 Thread H.J. Lu

On Thu, May 12, 2016 at 4:19 PM, H.J. Lu  wrote:
> On Thu, May 12, 2016 at 3:02 PM, Uros Bizjak  wrote:
>> Hello!
>>
>> testsuite/gcc.target/i386/pr61599-{1,2}.c testcases expose a failure
>> with -mcmodel -fpic, where:
>>
>> /tmp/ccfpoxHY.o: In function `bar':
>> pr61599-2.c:(.text+0xe): relocation truncated to fit: R_X86_64_PC32
>> against symbol `a' defined in LARGE_COMMON section in /tmp/ccKTKST2.o
>> collect2: error: ld returned 1 exit status
>> compiler exited with status 1
>>
>> CM_MEDIUM_PIC code model assumes that code+got/plt fits in a 31 bit
>> region, data is unlimited. Based on these assumptions, code should be
>> accessed via R_X86_64_GOT64.
>>
>> Attached patch uses UNSPEC_GOT instead of UNSPEC_GOTPCREL also for
>> CM_MEDIUM_PIC.
>>
>> 2016-05-12  Uros Bizjak  
>>
>> PR target/61599
>> * config/i386/i386.c (legitimize_pic_address): Do not use
>> UNSPEC_GOTPCREL for CM_MEDIUM_PIC code model.
>>
>> Patch was bootstrapped on x86_64-linux-gnu and regression tested with
>> -mcmodel=medium -fpic.
>>
>> Jakub, H.J., do you have any comments on the patch?
>
>
> I prefer this patch.
>

Oops wrong one.  Here is the right one.


-- 
H.J.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8157f9d..a864868 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15478,7 +15478,9 @@ legitimize_pic_address (rtx orig, rtx reg)
 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
 	}
-  else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
+  else if (TARGET_64BIT
+	   && ix86_cmodel != CM_LARGE_PIC
+	   && !ix86_in_large_data_p (SYMBOL_REF_DECL (addr)))
 	{
 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
 UNSPEC_GOTPCREL);

[Patch, Fortran] Fix PR 71047

2016-05-12 Thread Fritz Reese

Here's the fix and a test case for the regression PR 71047 introduced
by the DEC STRUCTURE/UNION patch (commit 235999).

Turns out I was a bit greedy about adding component refs to structure
constructors in gfc_default_initializer. Fixed to only add them to
FL_STRUCTURE and FL_UNION symbols, which expect them during
translation. The component ref was being added to constructors for the
hidden _data and _vptr members of CLASS components, causing the code
for their real initialization to be skipped by the middle-end.

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71047

---
Fritz Reese
From f1ee9fb07728f69631f9795ba01b590d2277b6f3 Mon Sep 17 00:00:00 2001
From: Fritz O. Reese 
Date: Thu, 12 May 2016 18:04:15 -0400
Subject: [PATCH] Remove extraneous component refs from derived type constructors.

gcc/fortran/
	PR fortran/71047
	* expr.c (gfc_default_initializer): Avoid extra component refs in
	constructors for derived types and classes.

gcc/testsuite/gfortran.dg/
	PR fortran/71047
	* pr71047.f08: New test.
---
 gcc/fortran/expr.c|5 +++-
 gcc/testsuite/gfortran.dg/pr71047.f08 |   48 +
 2 files changed, 52 insertions(+), 1 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr71047.f08

diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c
index 6ebe08b..816ef01 100644
--- a/gcc/fortran/expr.c
+++ b/gcc/fortran/expr.c
@@ -3975,7 +3975,10 @@ gfc_default_initializer (gfc_typespec *ts)
 
   if (comp->initializer)
 	{
-  ctor->n.component = comp;
+  // Save the component ref for STRUCTUREs and UNIONs.
+  if (ts->u.derived->attr.flavor == FL_STRUCT
+  || ts->u.derived->attr.flavor == FL_UNION)
+ctor->n.component = comp;
 	  ctor->expr = gfc_copy_expr (comp->initializer);
 	  if ((comp->ts.type != comp->initializer->ts.type
 	   || comp->ts.kind != comp->initializer->ts.kind)
diff --git a/gcc/testsuite/gfortran.dg/pr71047.f08 b/gcc/testsuite/gfortran.dg/pr71047.f08
new file mode 100644
index 000..61a0ad4
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr71047.f08
@@ -0,0 +1,48 @@
+! { dg-do compile }
+! { dg-options "-fdump-tree-original" }
+!
+! Fortran/PR71047
+!
+
+module m
+ implicit none
+
+ type, abstract :: c_abstr
+  integer :: i = 0
+ end type c_abstr
+
+ type, extends(c_abstr) :: t_a
+  class(c_abstr), allocatable :: f
+ end type t_a
+ 
+ type, extends(c_abstr) :: t_b
+ end type t_b
+
+contains
+
+ subroutine set(y,x)
+  class(c_abstr), intent(in)  :: x
+  type(t_a),  intent(out) :: y
+   allocate( y%f , source=x )
+ end subroutine set
+
+end module m
+
+
+program p
+ use m
+ implicit none
+
+ type(t_a) :: res
+ type(t_b) :: var
+
+  call set( res , var )
+  write(*,*) res%i
+
+end program p
+
+!
+! Check to ensure the vtable is actually initialized.
+!
+! { dg-final { scan-tree-dump "t_a\\.\\d+\\.f\\._vptr =" "original" } }
+!
-- 
1.7.1

67 matches

Mail list logo