Re: wide-int, ada

2013-11-25 Thread Eric Botcazou
> Richi has asked the we break the wide-int patch so that the individual port
> and front end maintainers can review their parts without have to go through
> the entire patch.This patch covers the ada front-end.

I don't think that the mechanical change in UI_From_gnu is correct, see the 
comment just above.  The annotate_value change is very likely correct, but 
please double check and, upon positive outcome, remove the last sentence of 
the comment just above.  The rest looks good, thanks.

-- 
Eric Botcazou


Re: wide-int, sparc

2013-11-25 Thread Eric Botcazou
> Richi has asked the we break the wide-int patch so that the individual port
> and front end maintainers can review their parts without have to go through
> the entire patch.This patch covers the sparc port.

OK if you change the type of 'low' in solaris_pragma_align to unsigned HWI.

-- 
Eric Botcazou


[Ping]Two pending IVOPT patches

2013-11-25 Thread Bin.Cheng
Hi all,
There are still two patches on IVOPT pending for review now.  Since
others have already approved and applied, I am wondering whether these
two can be reviewed and get in if ok.

Improve IVOPT to handle outside and inside loop iv uses differently in GCC:
http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00535.html

Slightly tune to make iv cand choosing algorithm more accurate:
http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01574.html

Thanks,
bin


-- 
Best Regards.


[PATCH GCC]Pick up more address lowering cases for ivopt and tree-affine.c

2013-11-25 Thread bin.cheng
Hi,
I previously committed two patches lowering complex address expression for
IVOPT at http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00546.html and
http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01103.html
When I bootstrapping GCC I found there were some peculiar cases like
&MEM[ptr+CST] + , which should be handled too.  This patch consists
below two changes:

1) change in alloc_iv:
Original code lowers top level complex address expressions like
&MEM[ptr+off].  The patch relaxes check condition in order to lower
expressions like &MEM[ptr+off] + xxx, just as the BASE from below dump:
use 2
  generic
  in statement _595 = &MEM[(void *)&this_prg + 36B] + _594;

  at position 
  type struct gcov_bucket_type *
  base (struct gcov_bucket_type *) &MEM[(void *)&this_prg + 36B] +
(sizetype) ((unsigned int) (src_i_683 + -1) * 20)
  step 4294967276
  base object (void *) &this_prg
  related candidates  

2) change in tree_to_aff_combination:
The function get_inner_reference returns "&MEM[ptr+off]" as the core for
input like the memory ADDRESS in below dump:
use 2
  address
  in statement _59 = MEM[(const struct gcov_ctr_summary *)summary_22(D) +
4B].histogram[h_ix_111].min_value;

  at position MEM[(const struct gcov_ctr_summary *)summary_22(D) +
4B].histogram[h_ix_111].min_value
  type const gcov_type *
  base (const gcov_type *) &MEM[(const struct gcov_ctr_summary
*)summary_22(D) + 4B] + 36
  step 20
  base object (void *) summary_22(D)
  related candidates 

Which can be further reduced into something like "summary_22(D) + 40B".
This change is necessary for the first one, because I am using
tree_to_aff_combination rather than get_inner_reference_aff now.

Bootstrap and test on x86/x86_64/arm.  Is it OK?

Thanks.
bin

2013-11-25  Bin Cheng  

* tree-ssa-loop-ivopts.c (contain_complex_addr_expr): New.
(alloc_iv): Lower more cases by calling contain_complex_addr_expr
and tree_to_aff_combination.
* tree-affine.c (tree_to_aff_combination): Handle &MEM[ptr+CST]
in core part of complex reference.

gcc/testsuite/ChangeLog
2013-11-25  Bin Cheng  

* gcc.dg/tree-ssa/ivopts-lower_base.c: New test.
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c   (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c   (revision 0)
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+struct tag1
+{
+int x[100];
+int y;
+};
+
+struct tag2
+{
+int a;
+struct tag1 t1[100];
+int b;
+};
+
+int
+foo (struct tag2 *t2, int len)
+{
+  int i = 0;
+  for (i = 0; i < len; i++)
+{
+  (*(struct tag2*)((char *)t2+4)).t1[i].x[len] = len;
+}
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump-not "base .*&MEM\\\[" "ivopts" } }  */
+/* { dg-final { cleanup-tree-dump "ivopts" } }  */
Index: gcc/tree-ssa-loop-ivopts.c
===
--- gcc/tree-ssa-loop-ivopts.c  (revision 205087)
+++ gcc/tree-ssa-loop-ivopts.c  (working copy)
@@ -924,13 +924,40 @@ determine_base_object (tree expr)
 }
 }
 
+/* Return true if complex address expression appears in EXPR.  */
+
+static bool
+contain_complex_addr_expr (tree expr)
+{
+  bool res = false;
+
+  STRIP_NOPS (expr);
+  switch (TREE_CODE (expr))
+{
+case POINTER_PLUS_EXPR:
+case PLUS_EXPR:
+case MINUS_EXPR:
+  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
+  res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
+  break;
+
+case ADDR_EXPR:
+  return (!DECL_P (TREE_OPERAND (expr, 0)));
+
+default:
+  return false;
+}
+
+  return res;
+}
+
 /* Allocates an induction variable with given initial value BASE and step STEP
for loop LOOP.  */
 
 static struct iv *
 alloc_iv (tree base, tree step)
 {
-  tree base_object = base;
+  tree expr = base;
   struct iv *iv = XCNEW (struct iv);
   gcc_assert (step != NULL_TREE);
 
@@ -939,21 +966,17 @@ alloc_iv (tree base, tree step)
1) More accurate cost can be computed for address expressions;
2) Duplicate candidates won't be created for bases in different
   forms, like &a[0] and &a.  */
-  STRIP_NOPS (base_object);
-  if (TREE_CODE (base_object) == ADDR_EXPR
-  && !DECL_P (TREE_OPERAND (base_object, 0)))
+  STRIP_NOPS (expr);
+  if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
+  || contain_complex_addr_expr (expr))
 {
   aff_tree comb;
-  double_int size;
-  base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
-&comb, &size);
-  gcc_assert (base_object != NULL_TREE);
-  base_object = build_fold_addr_expr (base_object);
+  tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
   base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)

[PATCH] Prevent out-of-bounds access (PR sanitizer/59258)

2013-11-25 Thread Marek Polacek
This fixes a thinko of mine: when I added another two elements to the
ubsan data structure, I forgot to increase the size of the array.

Alternatively, I could use an alloca for this (VLAs issue a warning
in C++03 and are thus no-go :().

I don't have a simple testcase for this.  Valgrind/asan would be
needed.

Ran the testsuite.  Ok for trunk?

2013-11-25  Marek Polacek  

* ubsan.c (ubsan_create_data): Increase the size of the fields array.

--- gcc/ubsan.c.mp3 2013-11-25 10:13:41.995328200 +0100
+++ gcc/ubsan.c 2013-11-25 10:19:10.068560008 +0100
@@ -387,7 +387,7 @@ ubsan_create_data (const char *name, loc
 {
   va_list args;
   tree ret, t;
-  tree fields[3];
+  tree fields[5];
   vec *saved_args = NULL;
   size_t i = 0;
 

Marek


Re: [PATCH] Builtins handling in IVOPT

2013-11-25 Thread Bin.Cheng
On Sat, Nov 23, 2013 at 4:05 PM, Wei Mi  wrote:
> On Thu, Nov 21, 2013 at 12:19 AM, Zdenek Dvorak
>  wrote:
>> Hi,
>>
>>> This patch works on the intrinsic calls handling issue in IVOPT mentioned 
>>> here:
>>> http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01295.html
>>>
>>> In find_interesting_uses_stmt, it changes
>>>
>>> arg = expr
>>> __builtin_xxx (arg)
>>>
>>> to
>>>
>>> arg = expr;
>>> tmp = addr_expr (mem_ref(arg));
>>> __builtin_xxx (tmp, ...)
>>
>> this looks a bit confusing (and wasteful) to me. It would make more sense to
>> just record the argument as USE_ADDRESS and do the rewriting in 
>> rewrite_use_address.
>>
>> Zdenek
>
> I updated the patch. The gimple changing part is now moved to
> rewrite_use_address. Add support for plain address expr in addition to
> reference expr in find_interesting_uses_address.
>
> bootstrap and testing is going on.
>
> 2013-11-22  Wei Mi  
>
> * expr.c (expand_expr_addr_expr_1): Not to split TMR.
> (expand_expr_real_1): Ditto.
> * targhooks.c (default_builtin_has_mem_ref_p): Default
> builtin.
> * tree-ssa-loop-ivopts.c (builtin_has_mem_ref_p): New function.
> (rewrite_use_address): Add TMR for builtin.
> (find_interesting_uses_stmt): Special handling of builtins.
> * gimple-expr.c (is_gimple_address): Add handling of TMR.
> * gimple-expr.h (is_gimple_addressable): Ditto.
> * config/i386/i386.c (ix86_builtin_has_mem_ref_p): New target hook.
> (ix86_atomic_assign_expand_fenv): Ditto.
> (ix86_expand_special_args_builtin): Special handling of TMR for
> builtin.
> * target.def (builtin_has_mem_ref_p): New hook.
> * doc/tm.texi.in: Ditto.
> * doc/tm.texi: Generated.
Hi,
I have no right to approve but do have one comment.  One day this
optimization may be applied for a target which supports auto-increment
addressing mode, I think we need to handle these address iv uses
differently with original REAL ones (from memory access), and don't
create IV_{BEFORE,AFTER} candidates for them in function
add_candidate.  The number of candidates are already kind of bloated
with auto-increment now.

Thanks,
bin

>
> 2013-11-22  Wei Mi  
>
> * gcc.dg/tree-ssa/ivopt_5.c: New test.
>
-- 
Best Regards.


Re: [PATCH] Prevent out-of-bounds access (PR sanitizer/59258)

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 10:27:00AM +0100, Marek Polacek wrote:
> This fixes a thinko of mine: when I added another two elements to the
> ubsan data structure, I forgot to increase the size of the array.
> 
> Alternatively, I could use an alloca for this (VLAs issue a warning
> in C++03 and are thus no-go :().
> 
> I don't have a simple testcase for this.  Valgrind/asan would be
> needed.
> 
> Ran the testsuite.  Ok for trunk?
> 
> 2013-11-25  Marek Polacek  
> 
>   * ubsan.c (ubsan_create_data): Increase the size of the fields array.

Ok, but can you also fix up formatting in the function:

  /* We have to add two more decls.  */
  fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
pointer_sized_int_node);
the above line is indented too much... .

> --- gcc/ubsan.c.mp3   2013-11-25 10:13:41.995328200 +0100
> +++ gcc/ubsan.c   2013-11-25 10:19:10.068560008 +0100
> @@ -387,7 +387,7 @@ ubsan_create_data (const char *name, loc
>  {
>va_list args;
>tree ret, t;
> -  tree fields[3];
> +  tree fields[5];
>vec *saved_args = NULL;
>size_t i = 0;
>  

Jakub


[PATCH] Add testcase for PR59250

2013-11-25 Thread Marek Polacek
The PR was fixed by Jakub in r205283, this patch merely adds a
testcase for it.  Passed ubsan testsuite for -m32/-m64.

Ok for trunk?

2013-11-25  Marek Polacek  

testsuite/
* g++.dg/ubsan/pr59250.C: New test.

--- gcc/testsuite/g++.dg/ubsan/pr59250.C.mp32013-11-25 10:43:24.797315678 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr59250.C2013-11-25 10:41:24.859817636 
+0100
@@ -0,0 +1,34 @@
+// PR sanitizer/59250
+// { dg-do compile }
+// { dg-options "-fsanitize=undefined" }
+// { dg-skip-if "" { *-*-* } { "-flto" } { "" } }
+
+namespace std __attribute__ ((__visibility__ ("default"))) {
+   template class allocator;
+   template struct char_traits;
+   template,
+typename _Alloc = allocator<_CharT> > class basic_string;
+   typedef basic_string string;
+}
+namespace __gnu_cxx __attribute__ ((__visibility__ ("default"))) {
+  template class new_allocator { };
+}
+namespace std __attribute__ ((__visibility__ ("default"))) {
+  template class allocator: public 
__gnu_cxx::new_allocator<_Tp> { };
+  template class 
basic_string {
+   public:
+basic_string(const _CharT* __s, const _Alloc& __a = _Alloc());
+  };
+}
+class FileImpl { };
+class FileHandle {
+  std::string fname;
+  class FileImpl* impl;
+  FileHandle (const char* fname);
+};
+class NormalFile : public FileImpl {
+ int fd;
+};
+FileHandle::FileHandle (const char* fname) : fname(fname) {
+  impl = new NormalFile();
+}

Marek


Re: [Patch, Fortran, OOP] PR 59143: Bogus warning with array-valued type-bound procedure

2013-11-25 Thread Janus Weil
Hi,

>>> +  else if (ref->type == REF_COMPONENT &&
>>> ref->u.c.component->attr.function
>>> +  && ref->u.c.component->attr.proc_pointer
>>> +  && ref->u.c.component->attr.dimension)
>
>
> I wonder whether one should take care of functions returning BT_CLASS, but I
> think one doesn't need to.

yes, I think no special treatment for class results is necessary. They
cannot have fixed array bounds anyway. I tried some simple example
which were handled properly by the current patch.


>>> You cannot assume that the function returns an explicit size array with
>>> constant bounds.
>>
>> A new version is attached, and I have added one of your examples to
>>
>> the test case.  Ok now?
>
>
> OK. Thanks for the patch!

Thanks for the review. Committed as r205345.

Cheers,
Janus


Re: [PATCH] Add testcase for PR59250

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 10:44:57AM +0100, Marek Polacek wrote:
> The PR was fixed by Jakub in r205283, this patch merely adds a
> testcase for it.  Passed ubsan testsuite for -m32/-m64.
> 
> Ok for trunk?

Can't you reduce it at least a little bit more?
Like I doubt __attribute__ ((__visibility__ ("default")))
is needed to reproduce, I'd also think you could get rid of the namespaces,
perhaps also const _Alloc& __a = _Alloc() argument?

> 2013-11-25  Marek Polacek  
> 
> testsuite/
>   * g++.dg/ubsan/pr59250.C: New test.
> 
> --- gcc/testsuite/g++.dg/ubsan/pr59250.C.mp3  2013-11-25 10:43:24.797315678 
> +0100
> +++ gcc/testsuite/g++.dg/ubsan/pr59250.C  2013-11-25 10:41:24.859817636 
> +0100
> @@ -0,0 +1,34 @@
> +// PR sanitizer/59250
> +// { dg-do compile }
> +// { dg-options "-fsanitize=undefined" }
> +// { dg-skip-if "" { *-*-* } { "-flto" } { "" } }
> +
> +namespace std __attribute__ ((__visibility__ ("default"))) {
> +   template class allocator;
> +   template struct char_traits;
> +   template,
> +typename _Alloc = allocator<_CharT> > class basic_string;
> +   typedef basic_string string;
> +}
> +namespace __gnu_cxx __attribute__ ((__visibility__ ("default"))) {
> +  template class new_allocator { };
> +}
> +namespace std __attribute__ ((__visibility__ ("default"))) {
> +  template class allocator: public 
> __gnu_cxx::new_allocator<_Tp> { };
> +  template class 
> basic_string {
> +   public:
> +basic_string(const _CharT* __s, const _Alloc& __a = _Alloc());
> +  };
> +}
> +class FileImpl { };
> +class FileHandle {
> +  std::string fname;
> +  class FileImpl* impl;
> +  FileHandle (const char* fname);
> +};
> +class NormalFile : public FileImpl {
> + int fd;
> +};
> +FileHandle::FileHandle (const char* fname) : fname(fname) {
> +  impl = new NormalFile();
> +}
> 
>   Marek

Jakub


Re: Pass floating point values on powerpc64 as per ABI

2013-11-25 Thread Alan Modra
On Tue, Nov 19, 2013 at 10:16:31AM +0100, Andreas Schwab wrote:
> Alan Modra  writes:
> 
> > On Tue, Nov 19, 2013 at 11:16:26AM +1030, Alan Modra wrote:
> >> On Tue, Nov 19, 2013 at 01:27:39AM +0100, Andreas Schwab wrote:
> >> > Where does it call a varargs function?
> >> 
> >> printf
> >
> > Sorry that wasn't such a helpful response.
> >
> > Here, really:
> > res = ((int(*)(char*, ...))(code))(format, doubleArg);
> 
> But cls_double_va_fn doesn't expect a varargs call.

It works this way:  The call shown above sets up arguments as for a
call to printf, and transfers control to a trampoline.  On powerpc64
the trampoline bounces to ffi_closure_LINUX64.  This assembly function
saves parameter passing registers to memory and calls some C code,
ffi_closure_helper_LINUX64, that uses information set up by
ffi_prep_cif_var to extract the arguments and package them up for
cls_double_va_fn.  So it is ffi_closure_helper_LINUX64 that expects a
varargs call, under control of ffi_prep_cif_var.

It looks to me like the new libffi test failures on ia64 are due to
libffi on ia64 not differentiating between varargs and normal calls,
yet ia64 passes passes fp differently for varargs if I'm reading
gcc/config/ia64.c function_arg correctly.

-- 
Alan Modra
Australia Development Lab, IBM


[PATCH] Remove convert.h include from tree-dfa.c

2013-11-25 Thread Richard Biener

It's unused (phew).

Applied.

Richard.

2013-11-25  Richard Biener  

* tree-dfa.c: Remove unused convert.h include.

Index: tree-dfa.c
===
--- tree-dfa.c  (revision 205344)
+++ tree-dfa.c  (working copy)
@@ -47,7 +47,6 @@ along with GCC; see the file COPYING3.
 #include "tree-dfa.h"
 #include "tree-inline.h"
 #include "tree-pass.h"
-#include "convert.h"
 #include "params.h"
 
 /* Build and maintain data flow information for trees.  */


Re: wide-int, gengtype

2013-11-25 Thread Laurynas Biveinis
Mike -

Unfortunately I cannot allocate time for the review of the gengtype
bits right now, and it's not clear when I will be able to.



2013/11/23 Mike Stump :
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the gengtype code.
>
> Ok?
>



-- 
Laurynas


Re: [PATCH] Prevent out-of-bounds access (PR sanitizer/59258)

2013-11-25 Thread Marek Polacek
On Mon, Nov 25, 2013 at 10:36:00AM +0100, Jakub Jelinek wrote:
> On Mon, Nov 25, 2013 at 10:27:00AM +0100, Marek Polacek wrote:
> > This fixes a thinko of mine: when I added another two elements to the
> > ubsan data structure, I forgot to increase the size of the array.
> > 
> > Alternatively, I could use an alloca for this (VLAs issue a warning
> > in C++03 and are thus no-go :().
> > 
> > I don't have a simple testcase for this.  Valgrind/asan would be
> > needed.
> > 
> > Ran the testsuite.  Ok for trunk?
> > 
> > 2013-11-25  Marek Polacek  
> > 
> > * ubsan.c (ubsan_create_data): Increase the size of the fields array.
> 
> Ok, but can you also fix up formatting in the function:
> 
>   /* We have to add two more decls.  */
>   fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
> pointer_sized_int_node);
> the above line is indented too much... .

I'll commit the following then.

2013-11-25  Marek Polacek  

* ubsan.c (ubsan_create_data): Increase the size of the fields array.

--- gcc/ubsan.c.mp3 2013-11-25 10:46:48.488069505 +0100
+++ gcc/ubsan.c 2013-11-25 10:47:09.646145804 +0100
@@ -387,7 +387,7 @@ ubsan_create_data (const char *name, loc
 {
   va_list args;
   tree ret, t;
-  tree fields[3];
+  tree fields[5];
   vec *saved_args = NULL;
   size_t i = 0;
 
@@ -425,7 +425,7 @@ ubsan_create_data (const char *name, loc
 {
   /* We have to add two more decls.  */
   fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE,
-   pointer_sized_int_node);
+ pointer_sized_int_node);
   DECL_CONTEXT (fields[i]) = ret;
   DECL_CHAIN (fields[i - 1]) = fields[i];
   i++;

Marek


Re: [C++ Patch] Fixes for duplicate warnings regressions [1/2]

2013-11-25 Thread Paolo Carlini

On 11/23/2013 08:12 PM, Jason Merrill wrote:

On 11/10/2013 05:26 AM, Paolo Carlini wrote:

this is the issue with -Waddress caused by the fix for c++/56930. I'm
handling it as already described, that is by adding a bool parameter to
c_common_truthvalue_conversion.


Why not handle this by making that warning respect 
c_inhibit_evaluation_warnings?


Good question, but it doesn't work, in the sense that we can't simply 
protect the warning itself with "c_inhibit_evaluation_warnings == 0" 
(per the attached) and bump the global around the second cp_convert, 
because then we don't warn *at all*. The reason being that with the 
*first* cp_convert we end up calling c_common_truthvalue_conversion with 
c_inhibit_evaluation_warnings bumped. The bumping happens in 
cp_truthvalue_conversion. A mess, yes. At this Stage, if we don't feel 
like going with something like my last try or something even less 
straightforward reworking the way we bump c_inhibit_evaluation_warnings 
in the various circumstances, I'm tempted to go back to my first try:


-  tree folded_result = cp_convert (type, folded, complain);
+  tree folded_result
+   = folded != expr ? cp_convert (type, folded, complain) : result;


Would it be safe? I mean, is it at all possible that folded == expr and 
in fact we should call again cp_convert? Because otherwise it's also a 
(minor) optimization and radically avoids the problem.


Paolo.

Index: parser.c
===
--- parser.c(revision 204780)
+++ parser.c(working copy)
@@ -23378,12 +23378,16 @@ cp_parser_late_parsing_nsdmi (cp_parser *parser, t
 {
   tree def;
 
+  maybe_begin_member_template_processing (field);
+
   push_unparsed_function_queues (parser);
   def = cp_parser_late_parse_one_default_arg (parser, field,
  DECL_INITIAL (field),
  NULL_TREE);
   pop_unparsed_function_queues (parser);
 
+  maybe_end_member_template_processing ();
+
   DECL_INITIAL (field) = def;
 }
 
Index: pt.c
===
--- pt.c(revision 204780)
+++ pt.c(working copy)
@@ -151,7 +151,7 @@ static int for_each_template_parm (tree, tree_fn_t
   struct pointer_set_t*, bool);
 static tree expand_template_argument_pack (tree);
 static tree build_template_parm_index (int, int, int, tree, tree);
-static bool inline_needs_template_parms (tree);
+static bool inline_needs_template_parms (tree, bool);
 static void push_inline_template_parms_recursive (tree, int);
 static tree retrieve_local_specialization (tree);
 static void register_local_specialization (tree, tree);
@@ -377,9 +377,9 @@ template_class_depth (tree type)
Returns true if processing DECL needs us to push template parms.  */
 
 static bool
-inline_needs_template_parms (tree decl)
+inline_needs_template_parms (tree decl, bool nsdmi)
 {
-  if (! DECL_TEMPLATE_INFO (decl))
+  if (!decl || (!nsdmi && ! DECL_TEMPLATE_INFO (decl)))
 return false;
 
   return (TMPL_PARMS_DEPTH (DECL_TEMPLATE_PARMS (most_general_template (decl)))
@@ -448,16 +448,23 @@ push_inline_template_parms_recursive (tree parmlis
 }
 }
 
-/* Restore the template parameter context for a member template or
-   a friend template defined in a class definition.  */
+/* Restore the template parameter context for a member template, a
+   friend template defined in a class definition, or a non-template
+   member of template class.  */
 
 void
 maybe_begin_member_template_processing (tree decl)
 {
   tree parms;
   int levels = 0;
+  bool nsdmi = TREE_CODE (decl) == FIELD_DECL;
 
-  if (inline_needs_template_parms (decl))
+  if (nsdmi)
+decl = (CLASSTYPE_TEMPLATE_INFO (DECL_CONTEXT (decl))
+   ? TREE_TYPE (CLASSTYPE_TEMPLATE_INFO (DECL_CONTEXT (decl)))
+   : NULL_TREE);
+
+  if (inline_needs_template_parms (decl, nsdmi))
 {
   parms = DECL_TEMPLATE_PARMS (most_general_template (decl));
   levels = TMPL_PARMS_DEPTH (parms) - processing_template_decl;


Re: A GGC related question

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 2:04 AM, dxq  wrote:
> fixing SMS,  do you mean that we only modify the SMS pass?
> if so, the problem we have to solve:
>* how to make unroll and sms work together? calling unroll pass in sms,
> but it would be needed more passes such as web, and it's perfect to rerun
> all the passes between unroll and sms.
>* unroll and web pass exsit in gcc, however gcc's passes only work for a
> compilation unit, function, rather than a smaller unit we expect, loop.
> that's why we copy all global information, and rerun the passes between the
> unroll and sms sevral times.
>* if we need try more unroll factors, copying is also needed. the backup
> exsits in single pass, so it would not be purged by GGC. but, if memory
> consuming is huge, is there any risk for the other passes? from my
> experience, when disable GGC between unroll and sms, with  ggc_min_expand =
> 100 ggc_min_heap = 20480, and compile a big file, gcc crashes down.
>
>That's what I can think of. you know, it's a very big and hard work. do
> you have any suggestions about our current solution?

Instead of going back and forth you can do loop versioning, guarding
the versions with a special instruction you can recognize later.
Then SMS all versions and decide which one was the best, discarding
the no longer needed copies.

Thus, keep intermediate results in the IL.

Richard.

>thanks for your reply!
>danxiaoqiang
>
>
>
> --
> View this message in context: 
> http://gcc.1065356.n5.nabble.com/A-GGC-related-question-tp988400p988645.html
> Sent from the gcc - patches mailing list archive at Nabble.com.


Re: [C++ Patch] PR 50436

2013-11-25 Thread Paolo Carlini

On 11/23/2013 08:36 PM, Jason Merrill wrote:

On 11/06/2013 05:56 AM, Paolo Carlini wrote:

in this bug, filed by Zack, we loop forever after error in
constant_value_1. Straightforward thing to do, detect and break out.


This doesn't handle mutual infinite recursion, such as the modified 
testcase below.  I think the right answer is to make sure that if we 
hit an instantiation depth error during instantiation of the 
initializer of a variable, we set DECL_INITIAL to error_mark_node.  
Perhaps just checking errorcount is good enough.
You are totally right. I discovered a couple of days later that I was 
just papering over the real issue, the error messages were still 
suboptimal with my patch, that was a bad sign. Thanks for your 
suggestion, anyway, I'll see if I can make progress with it.


Paolo.



Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

2013-11-25 Thread Alexander Monakov
On Sat, 23 Nov 2013, Wei Mi wrote:
> For the failed testcase, it was compiled using -fmodulo-sched.
> modulo-sched phase set SCHED_GROUP_P of a jump insn to be true, which
> means the jump insn should be scheduled with prev insn as a group.

SMS doesn't set SCHED_GROUP_P by itself; did you mean that SCHED_GROUP_P is
set by dependency analysis code similar to sched2?

> When modulo scheduling is finished, the flag of SCHED_GROUP_P is not
> cleaned up. After that, pass_jump2 phase split the bb and move the
> prev insn to another bb. Then pass_sched2 see the flag and mistakenly
> try to bind the jump insn with a code label.

I think the analysis is incomplete.  Looking at the backtrace posted in the
bug report, the failure path goes through chain_to_prev_insn, which protects
against such failure:

  prev_nonnote = prev_nonnote_nondebug_insn (insn);
  if (BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (prev_nonnote)
  && ! sched_insns_conditions_mutex_p (insn, prev_nonnote))
add_dependence (insn, prev_nonnote, REG_DEP_ANTI);

Why does it end up with a label at the assertion failure point?

Alexander


Re: [PATCH] Conditional count update for fast coverage test in multi-threaded programs

2013-11-25 Thread Richard Biener
On Fri, Nov 22, 2013 at 10:49 PM, Rong Xu  wrote:
> On Fri, Nov 22, 2013 at 4:03 AM, Richard Biener
>  wrote:
>> On Fri, Nov 22, 2013 at 4:51 AM, Rong Xu  wrote:
>>> Hi,
>>>
>>> This patch injects a condition into the instrumented code for edge
>>> counter update. The counter value will not be updated after reaching
>>> value 1.
>>>
>>> The feature is under a new parameter --param=coverage-exec_once.
>>> Default is disabled and setting to 1 to enable.
>>>
>>> This extra check usually slows the program down. For SPEC 2006
>>> benchmarks (all single thread programs), we usually see around 20%-35%
>>> slow down in -O2 coverage build. This feature, however, is expected to
>>> improve the coverage run speed for multi-threaded programs, because
>>> there virtually no data race and false sharing in updating counters.
>>> The improvement can be significant for highly threaded programs -- we
>>> are seeing 7x speedup in coverage test run for some non-trivial google
>>> applications.
>>>
>>> Tested with bootstrap.
>>
>> Err - why not simply emit
>>
>>   counter = 1
>>
>> for the counter update itself with that --param (I don't like a --param
>> for this either).
>>
>> I assume that CPUs can avoid data-races and false sharing for
>> non-changing accesses?
>>
>
> I'm not aware of any CPU having this feature. I think a write to the
> shared cache line to invalidate all the shared copies. I cannot find
> any reference on checking the value of the write. Do you have any
> pointer to the feature?

I don't have any pointer - but I remember seeing this in the context
of atomics thus it may be only in the context of using a xchg
or cmpxchg instruction.  Which would make it non-portable to
some extent (if you don't want to use atomic builtins here).

Richard.

> I just tested this implementation vs. simply setting to 1, using
> google search as the benchmark.
> This one is 4.5x faster. The test was done on Intel Westmere systems.
>
> I can change the parameter to an option.
>
> -Rong
>
>> Richard.
>>
>>> Thanks,
>>>
>>> -Rong


Re: [C++ Patch] PR 54485

2013-11-25 Thread Paolo Carlini

On 11/23/2013 10:08 PM, Jason Merrill wrote:
I believe that our current practice is to have one error and then use 
inform for follow-on messages.  OK with that change.
Thanks. The multiple permerror and error aren't in the new code, my 
patch just shuffles those around. I would be glad to work on that issue 
too, but I think that the whole function should be audited, thus I would 
rather go first with my patch as-is and then go back to that. Agreed?


Paolo.


Re: wide-int, alias

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:19 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the alias analysis code.
>
> Ok?

Ok.

Thanks,
Richard.


Re: [PATCH] Add testcase for PR59250

2013-11-25 Thread Marek Polacek
On Mon, Nov 25, 2013 at 10:49:56AM +0100, Jakub Jelinek wrote:
> On Mon, Nov 25, 2013 at 10:44:57AM +0100, Marek Polacek wrote:
> > The PR was fixed by Jakub in r205283, this patch merely adds a
> > testcase for it.  Passed ubsan testsuite for -m32/-m64.
> > 
> > Ok for trunk?
> 
> Can't you reduce it at least a little bit more?
> Like I doubt __attribute__ ((__visibility__ ("default")))
> is needed to reproduce, I'd also think you could get rid of the namespaces,
> perhaps also const _Alloc& __a = _Alloc() argument?

Ok, I've played a little bit with it and reduced it down to the
following.  Ok now?

2013-11-25  Marek Polacek  

testsuite/
* g++.dg/ubsan/pr59250.C: New test.

--- gcc/testsuite/g++.dg/ubsan/pr59250.C.mp32013-11-25 10:43:24.797315678 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr59250.C2013-11-25 11:33:56.817539980 
+0100
@@ -0,0 +1,18 @@
+// PR sanitizer/59250
+// { dg-do compile }
+// { dg-options "-fsanitize=undefined" }
+// { dg-skip-if "" { *-*-* } { "-flto" } { "" } }
+
+struct S {
+  const char *s;
+  S (const char *);
+};
+
+struct E {
+ int i;
+};
+
+S::S (const char *) : s (0)
+{
+  new E ();
+}

Marek


Re: [C++ Patch] Fixes for duplicate warnings regressions [1/2]

2013-11-25 Thread Paolo Carlini

... evidently I attached the wrong p ;) This should be right one.

Paolo.
Index: c-common.c
===
--- c-common.c  (revision 205343)
+++ c-common.c  (working copy)
@@ -4579,10 +4579,11 @@ c_common_truthvalue_conversion (location_t locatio
if (decl_with_nonnull_addr_p (inner))
  {
/* Common Ada/Pascal programmer's mistake.  */
-   warning_at (location,
-   OPT_Waddress,
-   "the address of %qD will always evaluate as %",
-   inner);
+   if (c_inhibit_evaluation_warnings == 0)
+ warning_at (location,
+ OPT_Waddress,
+ "the address of %qD will always evaluate as %",
+ inner);
return truthvalue_true_node;
  }
break;


Re: wide-int, builtins

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:20 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the builtins code.

-  HOST_WIDE_INT c[2];
   HOST_WIDE_INT ch;
   unsigned int i, j;
+  HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT];
+  unsigned int len = (GET_MODE_PRECISION (mode) + HOST_BITS_PER_WIDE_INT - 1)
+/ HOST_BITS_PER_WIDE_INT;
+
+  for (i = 0; i < len; i++)
+tmp[i] = 0;

   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT);

The assert should be moved before accessing the mode precision.

Btw, this function makes me always think twice on whether it
returns the correct thing for big vs. little endian if the mode
is bigger than a HWI ... and I suspect it does not(?).

Ok with moving the assert.

Thanks,
Richard.

> Ok?
>


Re: wide-int, graphite

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:21 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the graphite code.
>
> Ok?

Ok.

Thanks,
RIchard.


Re: wide-int, gen*.c

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:21 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the gen*.c code, excluding gengtype.
>
> Ok?

Ok.

Thanks,
Richard.


Re: [PATCH] Add testcase for PR59250

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 11:36:37AM +0100, Marek Polacek wrote:
> On Mon, Nov 25, 2013 at 10:49:56AM +0100, Jakub Jelinek wrote:
> > On Mon, Nov 25, 2013 at 10:44:57AM +0100, Marek Polacek wrote:
> > > The PR was fixed by Jakub in r205283, this patch merely adds a
> > > testcase for it.  Passed ubsan testsuite for -m32/-m64.
> > > 
> > > Ok for trunk?
> > 
> > Can't you reduce it at least a little bit more?
> > Like I doubt __attribute__ ((__visibility__ ("default")))
> > is needed to reproduce, I'd also think you could get rid of the namespaces,
> > perhaps also const _Alloc& __a = _Alloc() argument?
> 
> Ok, I've played a little bit with it and reduced it down to the
> following.  Ok now?

That looks much better, I wonder if it would reproduce even if the result
is saved somewhere (either E *e; field in S with swapping of the two class
definitions, or global var or static data member static E *e; of S),
otherwise the new just allocates and throws away.  Ok with that change, or
if it makes the problem no longer reproduceable, ok as is.

Note, once the LTO issues with internal functions are fixed, we need
to grep for all those dg-skip-if and remove them.

> 2013-11-25  Marek Polacek  
> 
> testsuite/
>   * g++.dg/ubsan/pr59250.C: New test.
> 
> --- gcc/testsuite/g++.dg/ubsan/pr59250.C.mp3  2013-11-25 10:43:24.797315678 
> +0100
> +++ gcc/testsuite/g++.dg/ubsan/pr59250.C  2013-11-25 11:33:56.817539980 
> +0100
> @@ -0,0 +1,18 @@
> +// PR sanitizer/59250
> +// { dg-do compile }
> +// { dg-options "-fsanitize=undefined" }
> +// { dg-skip-if "" { *-*-* } { "-flto" } { "" } }
> +
> +struct S {
> +  const char *s;
> +  S (const char *);
> +};
> +
> +struct E {
> + int i;
> +};
> +
> +S::S (const char *) : s (0)
> +{
> +  new E ();
> +}

Jakub


Re: wide-int, cfg

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:21 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the cfg code.
>
> Ok?

Hmm, this is an example of a wide_int (a widest_int) being used inside
a structure with long lifetime.  With the patch there are two of those
per loop plus up to one per statement in a loop body (quadratic in
the loop depth).

Currently sizeof (loop) is 160 and sizeof (nb_iter_bound) is 40 on x86_64,
on the branch they are 288 and 104 - that's quite a big overhead.  It looks
like x86_64 defines up to XImode (eh, for whatever reason), but certainly
integer math beyond __int128_t (thus TImode) is not supported.

What's the reason to bound MAX_BITSIZE_MODE_ANY_INT to the
excessively large value?  I suppose we should have a target hook
(or macro, for the ease of gen*) that defines this instead of auto-figuring it
to something that's a factor of four lager than required ...

Now, for the number-of-iterations stuff I really would like to keep things
cheaper in some way or another.  Shrinking MAX_BITSIZE_MODE_ANY_INT
would be the easiest improvement for x86_64.  Allocating the
values separately would be another option (and then only allocate
as many words as you need, of course) - there are already
flags in the structure making the values valid/invalid that can then be
subsumed by NULL pointers.

That said, I'd like this to be addressed.

Thanks,
Richard.


Re: wide-int, hook

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:21 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the hook code.
>
> Ok?

Ok.

Thanks,
Richard.


[PATCH][AArch64] Use Cortex A53 rtx costs table in aarch64

2013-11-25 Thread Kyrill Tkachov

Hi all,

This patch gets the aarch64 backend to use the Cortex A53 costs when tuning for 
that core, instead of using the generic costs. The costs table itself was added 
recently in arm/aarch-cost-tables.h and is shared between the two ports.


Tested aarch64-none-elf on a model.

Ok for trunk?

Thanks,
Kyrill

2013-11-25  Kyrylo Tkachov  

* config/aarch64/aarch64.c (cortexa53_tuning): New struct.
* config/aarch64/aarch64-cores.def (cortex-a53):
Use cortexa53 tuning struct.diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 51c1ff8..b631dbe 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -34,5 +34,5 @@
This list currently contains example CPUs that implement AArch64, and
therefore serves as a template for adding more CPUs in the future.  */
 
-AARCH64_CORE("cortex-a53",	  cortexa53,	 8,  AARCH64_FL_FPSIMD,generic)
+AARCH64_CORE("cortex-a53",	  cortexa53,	 8,  AARCH64_FL_FPSIMD,cortexa53)
 AARCH64_CORE("cortex-a57",	  cortexa15,	 8,  AARCH64_FL_FPSIMD,generic)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aad9a29..95432976 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -214,6 +214,15 @@ static const struct tune_params generic_tunings =
   NAMED_PARAM (memmov_cost, 4)
 };
 
+static const struct tune_params cortexa53_tunings =
+{
+  &cortexa53_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &generic_vector_cost,
+  NAMED_PARAM (memmov_cost, 4)
+};
+
 /* A processor implementing AArch64.  */
 struct processor
 {

Re: [PATCH] Add testcase for PR59250

2013-11-25 Thread Marek Polacek
On Mon, Nov 25, 2013 at 11:47:29AM +0100, Jakub Jelinek wrote:
> That looks much better, I wonder if it would reproduce even if the result
> is saved somewhere (either E *e; field in S with swapping of the two class
> definitions, or global var or static data member static E *e; of S),
> otherwise the new just allocates and throws away.  Ok with that change, or
> if it makes the problem no longer reproduceable, ok as is.

It is reproduceable with all three variants.  I'll commit this one shortly.
 
> Note, once the LTO issues with internal functions are fixed, we need
> to grep for all those dg-skip-if and remove them.

Yeah, definitely.  I'm keeping it in mind.

2013-11-25  Marek Polacek  

testsuite/
* g++.dg/ubsan/pr59250.C: New test.

--- gcc/testsuite/g++.dg/ubsan/pr59250.C.mp32013-11-25 10:43:24.797315678 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr59250.C2013-11-25 11:55:37.0 
+0100
@@ -0,0 +1,19 @@
+// PR sanitizer/59250
+// { dg-do compile }
+// { dg-options "-fsanitize=undefined" }
+// { dg-skip-if "" { *-*-* } { "-flto" } { "" } }
+
+struct E {
+ int i;
+};
+
+struct S {
+  const char *s;
+  S (const char *);
+  static E *e;
+};
+
+S::S (const char *) : s (0)
+{
+  e = new E ();
+}

Marek


Re: wide-int, loop

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:22 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the loop code.
>
> Ok?

@@ -2662,8 +2661,8 @@ iv_number_of_iterations (struct loop *loop, rtx
insn, rtx condition,
   iv1.step = const0_rtx;
   if (INTVAL (iv0.step) < 0)
{
- iv0.step = simplify_gen_unary (NEG, comp_mode, iv0.step, mode);
- iv1.base = simplify_gen_unary (NEG, comp_mode, iv1.base, mode);
+ iv0.step = simplify_gen_unary (NEG, comp_mode, iv0.step, comp_mode);
+ iv1.base = simplify_gen_unary (NEG, comp_mode, iv1.base, comp_mode);
}
   iv0.step = lowpart_subreg (mode, iv0.step, comp_mode);

separate bugfix?

@@ -1378,7 +1368,8 @@ decide_peel_simple (struct loop *loop, int flags)
   /* If we have realistic estimate on number of iterations, use it.  */
   if (get_estimated_loop_iterations (loop, &iterations))
 {
-  if (double_int::from_shwi (npeel).ule (iterations))
+  /* TODO: unsigned/signed confusion */
+  if (wi::leu_p (npeel, iterations))
{
  if (dump_file)
{

what does this refer to?  npeel is unsigned.

Otherwise looks good to me.

Thanks,
Richard.


Re: wide-int, lto

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:22 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the lto code.
>
> Ok?

- loop->nb_iterations_upper_bound.low = streamer_read_uhwi (ib);
- loop->nb_iterations_upper_bound.high = streamer_read_hwi (ib);
+ HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
+ int i;
+ int prec ATTRIBUTE_UNUSED = streamer_read_uhwi (ib);
+ int len = streamer_read_uhwi (ib);
+ for (i = 0; i < len; i++)
+   a[i] = streamer_read_hwi (ib);
+
+ loop->nb_iterations_upper_bound = widest_int::from_array (a, len);

please add streamer_read/write_wi () helpers to data-streamer*

replicating the above loop N times is too ugly.

The rest looks ok to me.

Thanks,
Richard.


Re: [PATCH, MPX, 2/X] Pointers Checker [14/25] Function splitting

2013-11-25 Thread Ilya Enkovich
2013/11/21 Richard Biener :
> On Wed, Nov 20, 2013 at 7:54 PM, Jeff Law  wrote:
>> On 11/20/13 03:02, Richard Biener wrote:
>>>
>>>
>>> Note that this, the intrusiveness of the feature and the questionable
>>> gain makes me question whether GCC should have support for this
>>> feature (and whether we really should rush this in this late).
>>>
>>> Thus, I hereby formally ask to push back this feature to 4.10.
>>
>> Sigh.  I'd hoped we were making progress and Ilya could have things wrapped
>> up in a reasonable amount of time.  But I certainly see your point of view
>> and I have some concerns about the semantics of the builtins now that we're
>> getting deeper into the bits.
>>
>>
>> The patches were posted long ago (back in mid Sept) and received little/no
>> feedback at that time.  Ilya played by the rules and it was our failing as
>> maintainers that caused things to back up.  Thus I believe the code should
>> be given fair consideration for inclusion into 4.9.
>
> Note that we wouldn't get anywhere near a release if we apply this "rule".
> That maintainers time is not infinite is unfortunate but a fact :/
>
>> --
>>
>> I suspect the hardware implementation and ABI are largely set by the need to
>> interoperate with uninstrumented code.  Where I think the patchset falls
>> down is in implementation details.
>>
>> Anyway, if you're going to stick with your formal request to postpone until
>> after 4.9, I'm not going to push hard from the other direction. Given that,
>> we should probably pull out the half-dozen preparatory patches that went in.
>
> For the latter I was confused by partly applying a series for a feature that
> hasn't been fully reviewed anyway.  This shouldn't be how merging in a
> new feature works - you split up the feature into multiple patches to ease
> review, not to commit it piecewise over some weeks.

I'll prepare a patch to remove committed patches.  But the first part
of series added new ISA extension support.  It is independent from the
checker.  Should it be OK to keep ISA in trunk?

Ilya

>
> Richard.
>
>>
>> jeff


Re: wide-int, gimple

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:21 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the gimple code.
>
> Ok?

@@ -1754,7 +1754,7 @@ dump_ssaname_info (pretty_printer *buffer, tree
node, int spc)
   if (!POINTER_TYPE_P (TREE_TYPE (node))
   && SSA_NAME_RANGE_INFO (node))
 {
-  double_int min, max, nonzero_bits;
+  widest_int min, max, nonzero_bits;
   value_range_type range_type = get_range_info (node, &min, &max);

   if (range_type == VR_VARYING)

this makes me suspect you are changing SSA_NAME_RANGE_INFO
to embed two max wide_ints.  That's a no-no.

diff --git a/gcc/gimple-ssa-strength-reduction.c
b/gcc/gimple-ssa-strength-reduction.c
index 3ac9e4d..b9fc936 100644
--- a/gcc/gimple-ssa-strength-reduction.c
+++ b/gcc/gimple-ssa-strength-reduction.c
@@ -239,7 +240,7 @@ struct slsr_cand_d
   tree stride;

   /* The index constant i.  */
-  double_int index;
+  widest_int index;

   /* The type of the candidate.  This is normally the type of base_expr,
  but casts may have occurred when combining feeding instructions.
@@ -314,7 +315,7 @@ typedef const struct cand_chain_d *const_cand_chain_t;
 struct incr_info_d
 {
   /* The increment that relates a candidate to its basis.  */
-  double_int incr;
+  widest_int incr;

   /* How many times the increment occurs in the candidate tree.  */
   unsigned count;

that's similarly excessive.  You've said widest_int is only for automatic
use (thus, stack allocated).  Yet they sneak in into basic structures
of optimization passes ...

The above is a case where a pass should decide on a limit.  For
SLSR an offset_int is appropriate (but yes, you then need to retain
or even add checks whether uses fit).  In some cases making
wide_int the trailing element in pass local data can allow for
more optimal dynamic allocation as well.

@@ -831,9 +831,17 @@ gimple_divmod_fixed_value_transform
(gimple_stmt_iterator *si)
   else
 prob = 0;

-  tree_val = build_int_cst_wide (get_gcov_type (),
-(unsigned HOST_WIDE_INT) val,
-val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
+  if (sizeof (gcov_type) == sizeof (HOST_WIDE_INT))
+tree_val = build_int_cst (get_gcov_type (), val);
+  else
+{
+  HOST_WIDE_INT a[2];
+  a[0] = (unsigned HOST_WIDE_INT) val;
+  a[1] = val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1;
+
+  tree_val = wide_int_to_tree (get_gcov_type (),
wide_int::from_array (a, 2,
+   TYPE_PRECISION (get_gcov_type ()), false));
+}

is two times replicated.  Please add a build_int_cst overload for
HOST_WIDEST_INT instead (conditional on HOST_WIDEST_INT != HOST_WIDE_INT).

Thanks,
Richard.


Re: wide-int, gengtype

2013-11-25 Thread Richard Sandiford
Mike Stump  writes:
> diff --git a/gcc/gengtype-parse.c b/gcc/gengtype-parse.c
> index 8328e3a..0a58822 100644
> --- a/gcc/gengtype-parse.c
> +++ b/gcc/gengtype-parse.c
> @@ -197,6 +197,23 @@ require2 (int t1, int t2)
>return v;
>  }
>  
> +/* If the next token does not have one of the codes T1, T2 or T3, report a
> +   parse error; otherwise return the token's value.  */
> +static const char *
> +require3 (int t1, int t2, int t3)
> +{
> +  int u = token ();
> +  const char *v = advance ();
> +  if (u != t1 && u != t2 && u != t3)
> +{
> +  parse_error ("expected %s, %s or %s, have %s",
> +print_token (t1, 0), print_token (t2, 0),
> +print_token (t3, 0), print_token (u, v));
> +  return 0;
> +}
> +  return v;
> +}
> +
>  /* Near-terminals.  */
>  
>  /* C-style string constant concatenation: STRING+
> @@ -243,18 +260,45 @@ require_template_declaration (const char *tmpl_name)
>str = concat (tmpl_name, "<", (char *) 0);
>  
>/* Read the comma-separated list of identifiers.  */
> -  while (token () != '>')
> +  int depth = 1;
> +  while (depth > 0)
>  {
> -  const char *id = require2 (ID, ',');
> +  if (token () == ENUM)
> + {
> +   advance ();
> +   str = concat (str, "enum ", (char *) 0);
> +   continue;
> + }
> +  if (token () == NUM)
> + {
> +   str = concat (str, advance (), (char *) 0);
> +   continue;
> + }
> +  if (token () == ':')
> + {
> +   advance ();
> +   str = concat (str, ":", (char *) 0);
> +   continue;
> + }
> +  if (token () == '<')
> + {
> +   advance ();
> +   str = concat (str, "<", (char *) 0);
> +   depth += 1;
> +   continue;
> + }
> +  if (token () == '>')
> + {
> +   advance ();
> +   str = concat (str, ">", (char *) 0);
> +   depth -= 1;
> +   continue;
> + }
> +  const char *id = require3 (SCALAR, ID, ',');
>if (id == NULL)
>   id = ",";
>str = concat (str, id, (char *) 0);
>  }
> -
> -  /* Recognize the closing '>'.  */
> -  require ('>');
> -  str = concat (str, ">", (char *) 0);
> -
>return str;
>  }

Just for a bit of extra context: this is extending the template support
to handle:

- integer template arguments like foo <2> (used for offset_int and
  widest_int)
- enum template arguments like foo  (used for the int_traits)
- nested templates
- namespaces

Thanks,
Richard



RE: [PATCH] Fix C++0x memory model for -fno-strict-volatile-bitfields on ARM

2013-11-25 Thread Bernd Edlinger
Hello,

I had forgotten to run the Ada test suite when I submitted the previous version 
of this patch.
And indeed there were some Ada test cases failing because in Ada packed 
structures are
like bit fields, but without the DECL_BIT_FIELD_TYPE attribute.

Please find attached the updated version of this patch.

Boot-strapped and regression-tested on x86_64-linux-gnu.
Ok for trunk?

Bernd.

> On Mon, 18 Nov 2013 11:37:05, Bernd Edlinger wrote:
>
> Hi,
>
> On Fri, 15 Nov 2013 13:30:51, Richard Biener wrote:
>>> That looks like always pretending it is a bit field.
>>> But it is not a bit field, and bitregion_start=bitregion_end=0
>>> means it is an ordinary value.
>>
>> I don't think it is supposed to mean that. It's supposed to mean
>> "the access is unconstrained".
>>
>
> Ok, agreed, I did not regard that as a feature.
> And apparently only the code path in expand_assigment
> really has a problem with it.
>
> So here my second attempt at fixing this.
>
> Boot-strapped and regression-tested on x86_64-linux-gnu.
>
> OK for trunk?
>
>
> Thanks
> Bernd.  2013-11-25  Bernd Edlinger  

Fix C++0x memory model for unaligned fields in packed, aligned(4)
structures with -fno-strict-volatile-bitfields on STRICT_ALIGNMENT
targets like arm-none-eabi.
* expr.c (expand_assignment): Handle normal fields like bit regions.

testsuite:
2013-11-25  Bernd Edlinger  

* gcc.dg/pr56997-4.c: New testcase.



patch-unaligned-data.diff
Description: Binary data


Re: [PATCH] Make the IRA shrink-wrapping preparation also work on ppc64

2013-11-25 Thread Martin Jambor
Hi,

On Fri, Nov 22, 2013 at 12:36:55PM -0700, Jeff Law wrote:
> On 11/21/13 10:09, Martin Jambor wrote:
> > PR rtl-optimization/10474
> > * ira.c (interesting_dest_for_shprep_1): New function.
> > (interesting_dest_for_shprep): Use interesting_dest_for_shprep_1,
> > also check parallels.
> >
> >testsuite/
> > * gcc.dg/pr10474.c: Also test ppc64.
> > * gcc.dg/ira-shrinkwrap-prep-1.c: Also tes ppc64, changed all ints
> > to longs.
> > * gcc.dg/ira-shrinkwrap-prep-2.c: Likewise.
> I went ahead and installed this on your behalf -- just trying to get
> the queues flushed out.

Well, I specifically did not want to commit an IRA patch just before I
left for the weekend... so, thanks for the confidence :-)

Martin


Re: wide-int, tree

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:22 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the tree code.
>
> Ok?

diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
index 86f90d8..961b9a6 100644
--- a/gcc/tree-affine.h
+++ b/gcc/tree-affine.h
@@ -30,7 +32,7 @@ struct aff_comb_elt
   tree val;

   /* Its coefficient in the combination.  */
-  double_int coef;
+  widest_int coef;
 };

 typedef struct affine_tree_combination
@@ -39,7 +41,7 @@ typedef struct affine_tree_combination
   tree type;

   /* Constant offset.  */
-  double_int offset;
+  widest_int offset;

   /* Number of elements of the combination.  */
   unsigned n;

increases size of *aff_tree from 232 to 808 bytes.  Most of this is
waste, killing cache locality.  Now you are lucky, aff_tree mostly lives
on the stack (though I had patches to make LIM cache them as
creating them is expensive...).

Not objecting to the change at this very moment, but we desparately
need some external storage solution for these kind of uses.
Oh, and shrink that widest_int ... (x86_64 and XImode ...)

diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c
index afc1a6a..91a14e4 100644
--- a/gcc/tree-chrec.c
+++ b/gcc/tree-chrec.c
@@ -479,7 +479,6 @@ chrec_fold_multiply (tree type,
 static tree
 tree_fold_binomial (tree type, tree n, unsigned int k)
 {
-  double_int num, denom, idx, di_res;
   bool overflow;
   unsigned int i;
   tree res;
@@ -491,20 +490,20 @@ tree_fold_binomial (tree type, tree n, unsigned int k)
 return fold_convert (type, n);

   /* Numerator = n.  */
-  num = TREE_INT_CST (n);
+  wide_int num = n;

   /* Check that k <= n.  */
-  if (num.ult (double_int::from_uhwi (k)))
+  if (wi::ltu_p (num, k))
 return NULL_TREE;

compare_tree_int (n, k) < 0 may be cheaper here

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 0d3c66c..f257d52 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -217,6 +217,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "tree-affine.h"
 #include "tree-inline.h"
+#include "wide-int-print.h"

 /* The maximum number of iterations between the considered memory
references.  */
@@ -244,7 +245,7 @@ typedef struct dref_d
   unsigned distance;

   /* Number of iterations offset from the first reference in the component.  */
-  double_int offset;
+  widest_int offset;

another one.

diff --git a/gcc/tree-streamer-in.c b/gcc/tree-streamer-in.c
index 560d4f8..a70c767 100644
--- a/gcc/tree-streamer-in.c
+++ b/gcc/tree-streamer-in.c
@@ -147,8 +147,9 @@ unpack_ts_base_value_fields (struct bitpack_d *bp,
tree expr)
 static void
 unpack_ts_int_cst_value_fields (struct bitpack_d *bp, tree expr)
 {
-  TREE_INT_CST_LOW (expr) = bp_unpack_var_len_unsigned (bp);
-  TREE_INT_CST_HIGH (expr) = bp_unpack_var_len_int (bp);
+  int i;
+  for (i = 0; i < TREE_INT_CST_EXT_NUNITS (expr); i++)
+TREE_INT_CST_ELT (expr, i) = bp_unpack_var_len_int (bp);
 }


that's less than optimal - only the very last used word should use
var_len_int, the others should use unsigned.

@@ -958,6 +961,12 @@ streamer_write_tree_header (struct output_block
*ob, tree expr)
 streamer_write_uhwi (ob, BINFO_N_BASE_BINFOS (expr));
   else if (TREE_CODE (expr) == CALL_EXPR)
 streamer_write_uhwi (ob, call_expr_nargs (expr));
+  else if (CODE_CONTAINS_STRUCT (code, TS_INT_CST))
+{
+  gcc_checking_assert (TREE_INT_CST_NUNITS (expr));
+  streamer_write_uhwi (ob, TREE_INT_CST_NUNITS (expr));
+  streamer_write_uhwi (ob, TREE_INT_CST_EXT_NUNITS (expr));
+}

isn't ext_nunits always either nunits or nunits + 1?  So it should be
sufficient to write a single bit in the tree_base bitpack and only
write the nunits that are required for the actual allocation in
write_tree_header, not both.

@@ -967,9 +976,16 @@ streamer_write_tree_header (struct output_block
*ob, tree expr)
 void
 streamer_write_integer_cst (struct output_block *ob, tree cst, bool ref_p)
 {
+  int i;
+  int len = TREE_INT_CST_NUNITS (cst);
   gcc_assert (!TREE_OVERFLOW (cst));
   streamer_write_record_start (ob, LTO_integer_cst);
   stream_write_tree (ob, TREE_TYPE (cst), ref_p);
-  streamer_write_uhwi (ob, TREE_INT_CST_LOW (cst));
-  streamer_write_hwi (ob, TREE_INT_CST_HIGH (cst));
+  /* We're effectively streaming a non-sign-extended wide_int here,
+ so there's no need to stream TREE_INT_CST_EXT_NUNITS or any
+ array members beyond LEN.  We'll recreate the tree from the
+ wide_int and the type.  */
+  streamer_write_uhwi (ob, len);
+  for (i = 0; i < len; i++)
+streamer_write_hwi (ob, TREE_INT_CST_ELT (cst, i));
 }

and that's duplicate writing of TREE_INT_CST_NUNITS ('len').

diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 494d48e..fc0b574 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -445,7 +445

Re: wide-int, tree

2013-11-25 Thread Richard Sandiford
Richard Biener  writes:
> @@ -958,6 +961,12 @@ streamer_write_tree_header (struct output_block
> *ob, tree expr)
>  streamer_write_uhwi (ob, BINFO_N_BASE_BINFOS (expr));
>else if (TREE_CODE (expr) == CALL_EXPR)
>  streamer_write_uhwi (ob, call_expr_nargs (expr));
> +  else if (CODE_CONTAINS_STRUCT (code, TS_INT_CST))
> +{
> +  gcc_checking_assert (TREE_INT_CST_NUNITS (expr));
> +  streamer_write_uhwi (ob, TREE_INT_CST_NUNITS (expr));
> +  streamer_write_uhwi (ob, TREE_INT_CST_EXT_NUNITS (expr));
> +}
>
> isn't ext_nunits always either nunits or nunits + 1?  So it should be
> sufficient to write a single bit in the tree_base bitpack and only
> write the nunits that are required for the actual allocation in
> write_tree_header, not both.

ext_nunits can be > nunits + 1.  E.g. imagine a 256-bit all-1s unsigned
integer.  As a 256-bit number it's simply { -1 }, with all other bits
being sign-extensions of the -1 HWI.  As a >256-bit number it's
{ -1, -1, -1, -1, 0 }, assuming 64-bit HWIs.

> index 4a24c66..f3e0ffe 100644
> --- a/gcc/tree-vrp.c
> +++ b/gcc/tree-vrp.c
> @@ -1141,15 +1142,7 @@ operand_less_p (tree val, tree val2)
>  {
>/* LT is folded faster than GE and others.  Inline the common case.  */
>if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
> -{
> -  if (TYPE_UNSIGNED (TREE_TYPE (val)))
> -   return INT_CST_LT_UNSIGNED (val, val2);
> -  else
> -   {
> - if (INT_CST_LT (val, val2))
> -   return 1;
> -   }
> -}
> +return INT_CST_LT (val, val2);
>else
>  {
>tree tcmp;
>
> Note that INT_CST_LT and INT_CST_LT_UNSIGNED were supposed to
> be very fast.  Now you made them
>
> #define INT_CST_LT(A, B) (wi::lts_p (wi::to_widest (A), wi::to_widest (B)))
>
> which a) doesn't look the same to me and b) hides complexity.

Not sure what complexity you mean here: to_widest just reads the
constant in its "infinite-precision" form.  I'm not sure:

> So why not write
>
>if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
>  if (TYPE_UNSIGNED (...)
>  return wi::ltu_p (val, va2);
>  else
>   return wi::lts_p (val, val2);
>
> ?

...this would be faster in practice, since the fast path in lts_p is
length-based and can handle small integers quickly whatever their precision.

The second form means that VAL1 and VAL2 must have the same precision.
I don't know whether that's a good thing or not in this instance.

> @@ -6966,12 +6975,7 @@ tree_int_cst_lt (const_tree t1, const_tree t2)
>  int
>  tree_int_cst_compare (const_tree t1, const_tree t2)
>  {
> -  if (tree_int_cst_lt (t1, t2))
> -return -1;
> -  else if (tree_int_cst_lt (t2, t1))
> -return 1;
> -  else
> -return 0;
> +  return wi::cmps (wi::to_widest (t1), wi::to_widest (t2));
>  }
>
>  /* Return true if T is an INTEGER_CST whose numerical value (extended
>
> How does this work for comparing UHWI_MAX to 0 for example?  Looking
> at
>
> template 
> inline int
> wi::cmps (const T1 &x, const T2 &y)
> {
>   unsigned int precision = get_binary_precision (x, y);
>   WIDE_INT_REF_FOR (T1) xi (x, precision);
>   WIDE_INT_REF_FOR (T2) yi (y, precision);
>   if (precision <= HOST_BITS_PER_WIDE_INT)
> {
>   HOST_WIDE_INT xl = xi.to_shwi ();
>   HOST_WIDE_INT yl = yi.to_shwi ();
>   if (xl < yl)
> return -1;
>
> this seems to happily return -1 instead of 1?  Similar issues elsewhere
> where you change compares to unconditonally perform signed compares.
> (it's at least non-obvious to me).
>
> Ah, the to_widest () will make it XImode * 4 extended and thus
> get_precision returns 2048 (or so) so we don't take the shortcut
> (which means it's slow).
>
> Shouldn't the shortcuts be taken on 'len' == 1 rather than
> precision <= HWI?

I suppose we could use the same approach as for lts_p, if that seems
OK to you.

> Side-note: we have too many ways to compare trees / integers

Do you mean in wide-int, or in the tree routines?  At the wide-int
level there are only really two ways: compare trees as sequences of bits
(ignoring their sign) and compare trees as "infinite-precision" numbers.

Thanks,
Richard



Re: wide-int, bfin

2013-11-25 Thread Bernd Schmidt
On 11/23/2013 08:20 PM, Mike Stump wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the bfin port.
> 
> Ok?

I haven't seen any updates on the compile-time regressions with the
wide-int patches (but may have overlooked them in the flood). bfin and
reload parts are conditionally approved if new numbers are posted and
there's consensus that they are acceptable.


Bernd




Re: [PATCH] Time profiler - phase 1

2013-11-25 Thread Christophe Lyon
On 11 November 2013 18:52, Martin Liška  wrote:

>>> +2013-10-28  Martin Liska 
>>> +
>>> + * gcc.dg/time-profiler-1.c: New test.
>>> + * gcc.dg/time-profiler-2.c: Ditto.
>>> +
>
> Yes, I do have commit right. I will bootstrap the patch, test Inkscape
> instrumentation and commit it.
>
I have observed that the time-profiler-2.c has very unstable results,
at least when executed under QEMU (ARM target).

I am not sure what it is really trying to measure, but the problem is
probably linked to the way QEMU handles threads.

Any idea about to fix this?

Thanks,

Christophe.


Re: Overhaul middle-end handling of restrict

2013-11-25 Thread Richard Biener
On Thu, 21 Nov 2013, Michael Matz wrote:

> Hello,
> 
> after much pondering about the issue we came up with this design to 
> handle restrict more generally.  Without a completely different way of 
> representing conflicts (or non-conflicts) of memory references we're bound 
> to somehow encode the necessary information into the points-to set (or in 
> any case information related to pointer ssa names).  This in turn means 
> that the location sensitive nature of restrict needs to be made explicit 
> in the IL, i.e. we basically need some starting point when a pointer 
> becomes restrict (ADD_RESTRICT), and also an ending point (DEL_RESTRICT), 
> which must act as barrier for other memory accesses (if it wouldn't some 
> transformations could move references from outside restrict regions into 
> the restrict region making them disasmbiguable with the restrict 
> references, introducing a bug).

As far as naming goes DEL_RESTRICT is somewhat confusing and I'd prefer
__builtin_memory_barrier (void *p) which acts as a memory barrier
for all _objects_ pointed to by p ("objects" as in p may point at
and one after memory blocks covered by declarations and dynamic
memory allocation).

> We also need to use our points-to solver to propagate restrict tags 
> conservatively, postprocessing the information to remove too conservative 
> estimates afterwards per SSA name.

An other way to say this is that we need both conservative propagation for
may-restrict and conservative propagation for must-restrict to be able
to disambiguate accesses via restrict vs. accesses via non-restrict 
qualified pointers.

> And if we want to use restrict based disambiguation we also need to 
> transfer the barriers into RTL barriers (I'm using an asm with an explicit 
> MEM to refer to the pointer in question, so not all memory is clobbered).
> There's some potential for improvement here by removing useless 
> ADD_RESTRICT / DEL_RESTRICT pairs.
> 
> There's another improvement when enlargening the set of SSA names to be 
> considered for postprocessin.  Right now only the result of ADD_RESTRICT 
> assigns are handled, that can be improved to also process SSA names that 
> trivial depend on such (i.e. are offsetted and themself restrict typed).
> 
> That facility is used to implement restrict parameters to functions, 
> replacing the current ad-hoc way in the points-to solver.  Other uses 
> should be done under control of the frontends, as only those know the 
> semantics for real.
> 
> I have a patch that more aggressively creates ADD_RESTRICT/DEL_RESTRICT 
> pairs (basically whenever there's an assignment from non-restrict pointers 
> to a restrict pointer, on the grounds that this "invents" a new restrict 
> set), but that breaks C programs that we don't want to break (I consider 
> them invalid, but there's disagreement).
> 
> Some older incarnations of this were bootstrapped, but this specific patch 
> is only now in regstrapping on x86_64-linux.  Okay for trunk if that 
> passes?

I understand that this removes the "hacks" implemented for handling
Fortran array descriptors and thus the change has to be accompanied
by a change to the Fortran frontend to create its own ADD_RESTRICT
and barrier calls.

More comments inline (eventually).

> Ciao,
> Michael.
> ---
>   * tree.def (ADD_RESTRICT): New tree code.
>   * cfgexpand.c (expand_debug_expr): Handle it.
>   * expr.c (expand_pointer_clobber): New function.
>   (expand_expr_real_2): Use it to handle ADD_RESTRICT.
>   * expr.h (expand_pointer_clobber): Declare.
>   * function.c (gimplify_parameters): Return a second gimple_seq,
>   handle restrict parameters.
>   * function.h (gimplify_parameters): Adjust.
>   * gimple-pretty-print.c (dump_binary_rhs): Handle ADD_RESTRICT.
>   * gimplify.c (gimplify_body): Append second gimple_seq,
>   adjust call to gimplify_parameters.
>   * internal-fn.def (DEL_RESTRICT): New internal function code.
>   * internal-fn.c (expand_DEL_RESTRICT): New function.
>   * tree-cfg.c (verify_gimple_assign_binary): Check ADD_RESTRICT.
>   * tree-inline.c (estimate_operator_cost): Handle ADD_RESTRICT.
>   * tree-pretty-print.c (dump_generic_node): Ditto.
>   * tree-ssa-dce.c (propagate_necessity): DEL_RESTRICT calls
>   are only clobbers.
>   * tree-ssa-structalias.c (build_fake_var_decl_uid): New static
>   function.
>   (build_fake_var_decl): Rewrite in terms of above.
>   (make_heapvar): Take uid parameter.
>   (make_constraint_from_restrict_uid): New.
>   (make_constraint_from_restrict): Use above.
>   (make_constraint_from_global_restrict): Explicitely set global flag.
>   (handle_lhs_call): Adjust call to make_heapvar.
>   (find_func_aliases_for_internal_call): New.
>   (find_func_aliases_for_call): Use it.
>   (find_func_aliases): Handle ADD_RESTRICT.
>   (intra_create_variable_infos): Remove any explicit handli

Re: Overhaul middle-end handling of restrict

2013-11-25 Thread Richard Biener
On Fri, 22 Nov 2013, Xinliang David Li wrote:

> On Fri, Nov 22, 2013 at 2:19 AM, Richard Biener  wrote:
> > On Thu, 21 Nov 2013, Xinliang David Li wrote:
> >
> >> On Thu, Nov 21, 2013 at 10:03 AM, Michael Matz  wrote:
> >> > Hello,
> >> >
> >> > after much pondering about the issue we came up with this design to
> >> > handle restrict more generally.  Without a completely different way of
> >> > representing conflicts (or non-conflicts) of memory references we're 
> >> > bound
> >> > to somehow encode the necessary information into the points-to set (or in
> >> > any case information related to pointer ssa names).  This in turn means
> >> > that the location sensitive nature of restrict needs to be made explicit
> >> > in the IL, i.e. we basically need some starting point when a pointer
> >> > becomes restrict (ADD_RESTRICT), and also an ending point (DEL_RESTRICT),
> >> > which must act as barrier for other memory accesses (if it wouldn't some
> >> > transformations could move references from outside restrict regions into
> >> > the restrict region making them disasmbiguable with the restrict
> >> > references, introducing a bug).
> >> >
> >>
> >> Can you use block/scope information to address this ? References from
> >> enclosing scopes can be considered possible aliases.
> >
> > Apart from the issue that LTO drops all BLOCKs this makes the middle-end
> > feature too much tied to the C family frontends and their definition
> > of restrict.  It also requires BLOCK lookup / matching at the time
> > of the alias query (which generally deals with "refs" which may
> > end up not refering to any BLOCK or to different BLOCKs than the
> > GIMPLE they are used in).
> 
> Can the aliaser capture the scope info and maintain its own minimal
> block tree? Each ref will have a unique scope id.  Code duplication
> transformations need to propagate info properly.

Well, certainly.  You would need an additional place to refer to
the scope id from a GIMPLE stmt (or the tree memory reference).

> >
> > It also suddenly makes having (correct) BLOCK info cause code generation
> > differences.
> 
> Is it an issue if it is not affected by with -g is specified or not?

Too many negates for me in this sentence.  To re-iterate, -g and -g0
need to create the same code.  Also stmts may get BLOCKs assigned
that they don't really belong to (harmless now, fatal after using
BLOCKs for disambiguation).

> >
> > That said, you would somehow need to remember the BLOCK a restrict
> > "tag" is live in which doesn't make it a good fit for the current
> > alias-info we associate with SSA names.  If you don't allow
> > disambiguations against refs in nested BLOCKs then a single
> > "tag" decl with its associated BLOCK is enough to remember
> > (thus one additional pointer).
> >
> > But well - I always wondered how other compilers implement support
> > for restrict (and how restricted that support is, and how strictly
> > following the standard).
> >
> > The only other scheme I can come up with is to allow non-dependences
> > to be recorded and maintained throughout the compilation and have
> > frontends create them.
> 
> It has been a while, but I recall one compiler does similar things --
> pretty early in the middle end before blocks are stripped, per-scope
> pointer analysis/tracking is done for scopes with restrict pointer
> uses. When that is done, alias pairs that can be disambiguated are
> recorded, and this information is maintained throughout (including
> inline tranformation).

Yeah, that's probably the best method - the issue is reliably
maintaining the information.  And of course doing the analysis itself
on a IL that might not be best suited for the analysis.

At some point we should think about adding a facility to maintain
non-alias info as that would also speed up repeated queries and
allows more costly analysis done (and not repeated).

Thanks,
Richard.


Re: Overhaul middle-end handling of restrict

2013-11-25 Thread Michael Matz
Hi,

On Fri, 22 Nov 2013, Xinliang David Li wrote:

> > Apart from the issue that LTO drops all BLOCKs this makes the middle-end
> > feature too much tied to the C family frontends and their definition
> > of restrict.  It also requires BLOCK lookup / matching at the time
> > of the alias query (which generally deals with "refs" which may
> > end up not refering to any BLOCK or to different BLOCKs than the
> > GIMPLE they are used in).
> 
> Can the aliaser capture the scope info and maintain its own minimal
> block tree?

It could (somewhen in the future).  A BLOCK based scheme (in the 
middle-end) still wouldn't allow implementing restrict-like semantics of 
other languages, though.  I think that's really a dead end, apart of all 
the practical problems with it right now.

> > The only other scheme I can come up with is to allow non-dependences 
> > to be recorded and maintained throughout the compilation and have 
> > frontends create them.
> 
> It has been a while, but I recall one compiler does similar things -- 
> pretty early in the middle end before blocks are stripped, per-scope 
> pointer analysis/tracking is done for scopes with restrict pointer uses. 
> When that is done, alias pairs that can be disambiguated are recorded, 
> and this information is maintained throughout (including inline 
> tranformation).

Yeah, that's something we should try to implement for the next GCC 
version.  It's really the more natural way to express the whole problem.


Ciao,
Michael.


Re: wide-int, tree

2013-11-25 Thread Richard Biener
On Mon, Nov 25, 2013 at 2:08 PM, Richard Sandiford
 wrote:
> Richard Biener  writes:
>> @@ -958,6 +961,12 @@ streamer_write_tree_header (struct output_block
>> *ob, tree expr)
>>  streamer_write_uhwi (ob, BINFO_N_BASE_BINFOS (expr));
>>else if (TREE_CODE (expr) == CALL_EXPR)
>>  streamer_write_uhwi (ob, call_expr_nargs (expr));
>> +  else if (CODE_CONTAINS_STRUCT (code, TS_INT_CST))
>> +{
>> +  gcc_checking_assert (TREE_INT_CST_NUNITS (expr));
>> +  streamer_write_uhwi (ob, TREE_INT_CST_NUNITS (expr));
>> +  streamer_write_uhwi (ob, TREE_INT_CST_EXT_NUNITS (expr));
>> +}
>>
>> isn't ext_nunits always either nunits or nunits + 1?  So it should be
>> sufficient to write a single bit in the tree_base bitpack and only
>> write the nunits that are required for the actual allocation in
>> write_tree_header, not both.
>
> ext_nunits can be > nunits + 1.  E.g. imagine a 256-bit all-1s unsigned
> integer.  As a 256-bit number it's simply { -1 }, with all other bits
> being sign-extensions of the -1 HWI.  As a >256-bit number it's
> { -1, -1, -1, -1, 0 }, assuming 64-bit HWIs.

Ah, ok.

>> index 4a24c66..f3e0ffe 100644
>> --- a/gcc/tree-vrp.c
>> +++ b/gcc/tree-vrp.c
>> @@ -1141,15 +1142,7 @@ operand_less_p (tree val, tree val2)
>>  {
>>/* LT is folded faster than GE and others.  Inline the common case.  */
>>if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
>> -{
>> -  if (TYPE_UNSIGNED (TREE_TYPE (val)))
>> -   return INT_CST_LT_UNSIGNED (val, val2);
>> -  else
>> -   {
>> - if (INT_CST_LT (val, val2))
>> -   return 1;
>> -   }
>> -}
>> +return INT_CST_LT (val, val2);
>>else
>>  {
>>tree tcmp;
>>
>> Note that INT_CST_LT and INT_CST_LT_UNSIGNED were supposed to
>> be very fast.  Now you made them
>>
>> #define INT_CST_LT(A, B) (wi::lts_p (wi::to_widest (A), wi::to_widest (B)))
>>
>> which a) doesn't look the same to me and b) hides complexity.
>
> Not sure what complexity you mean here: to_widest just reads the
> constant in its "infinite-precision" form.  I'm not sure:
>
>> So why not write
>>
>>if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
>>  if (TYPE_UNSIGNED (...)
>>  return wi::ltu_p (val, va2);
>>  else
>>   return wi::lts_p (val, val2);
>>
>> ?
>
> ...this would be faster in practice, since the fast path in lts_p is
> length-based and can handle small integers quickly whatever their precision.
>
> The second form means that VAL1 and VAL2 must have the same precision.
> I don't know whether that's a good thing or not in this instance.

Ok, I take your word for granted.  Still with INT_CST_LT now being
exactly the same as tree_int_cst_lt I'd prefer to eliminate the former
in favor of the latter.

Btw, trying to look at code generation via -fdump-tree-all blows up
memory - you have a leak somewhere I guess :/  I'm trying to compile
tree.ii with -O2 -fdump-tree-all.

>> @@ -6966,12 +6975,7 @@ tree_int_cst_lt (const_tree t1, const_tree t2)
>>  int
>>  tree_int_cst_compare (const_tree t1, const_tree t2)
>>  {
>> -  if (tree_int_cst_lt (t1, t2))
>> -return -1;
>> -  else if (tree_int_cst_lt (t2, t1))
>> -return 1;
>> -  else
>> -return 0;
>> +  return wi::cmps (wi::to_widest (t1), wi::to_widest (t2));
>>  }
>>
>>  /* Return true if T is an INTEGER_CST whose numerical value (extended
>>
>> How does this work for comparing UHWI_MAX to 0 for example?  Looking
>> at
>>
>> template 
>> inline int
>> wi::cmps (const T1 &x, const T2 &y)
>> {
>>   unsigned int precision = get_binary_precision (x, y);
>>   WIDE_INT_REF_FOR (T1) xi (x, precision);
>>   WIDE_INT_REF_FOR (T2) yi (y, precision);
>>   if (precision <= HOST_BITS_PER_WIDE_INT)
>> {
>>   HOST_WIDE_INT xl = xi.to_shwi ();
>>   HOST_WIDE_INT yl = yi.to_shwi ();
>>   if (xl < yl)
>> return -1;
>>
>> this seems to happily return -1 instead of 1?  Similar issues elsewhere
>> where you change compares to unconditonally perform signed compares.
>> (it's at least non-obvious to me).
>>
>> Ah, the to_widest () will make it XImode * 4 extended and thus
>> get_precision returns 2048 (or so) so we don't take the shortcut
>> (which means it's slow).
>>
>> Shouldn't the shortcuts be taken on 'len' == 1 rather than
>> precision <= HWI?
>
> I suppose we could use the same approach as for lts_p, if that seems
> OK to you.

Yeah, that looks good to me.

>> Side-note: we have too many ways to compare trees / integers
>
> Do you mean in wide-int, or in the tree routines?  At the wide-int
> level there are only really two ways: compare trees as sequences of bits
> (ignoring their sign) and compare trees as "infinite-precision" numbers.

In the tree routines.

Richard.

> Thanks,
> Richard
>


[PATCH] Fix pretty-printer leak

2013-11-25 Thread Richard Biener

This reduces peak memory usage for -fdump-tree-all on tree.c with -O2
from several GB to a 200MB.

It helps really freeing obstacks ;)

Testing in progress.

Richard.

2013-11-25  Richard Biener  

* pretty-print.c (output_buffer::~output_buffer): Really
free the obstacks.

Index: gcc/pretty-print.c
===
*** gcc/pretty-print.c  (revision 205352)
--- gcc/pretty-print.c  (working copy)
*** output_buffer::output_buffer ()
*** 50,57 
  
  output_buffer::~output_buffer ()
  {
!   obstack_free (&chunk_obstack, obstack_finish (&chunk_obstack));
!   obstack_free (&formatted_obstack, obstack_finish (&formatted_obstack));
  }
  
  /* A pointer to the formatted diagnostic message.  */
--- 50,57 
  
  output_buffer::~output_buffer ()
  {
!   obstack_free (&chunk_obstack, NULL);
!   obstack_free (&formatted_obstack, NULL);
  }
  
  /* A pointer to the formatted diagnostic message.  */


Re: [gomp4 simd, RFC] Simple fix to override vectorization cost estimation.

2013-11-25 Thread Sergey Ostanevich
Updated patch with spaces, etc according to check_GNU_style.sh

Put guard as per Tobias' request.

Is it Ok?



On Thu, Nov 21, 2013 at 6:18 PM, Sergey Ostanevich  wrote:
> Tobias,
>
>
>> When I understand the patch correctly, the warning is shown in two cases:
>> a) When the loop could be vectorized but the cost model prevented it
>> b) When the loop couldn't be vectorized because of other reasons (e.g. not
>> vectorizable because of conditional loop exits, incomplete vectorization
>> support by the compiler etc.)
>>
>> Do I correctly understand the warning? I am asking because the *opt and
>> *texi wording suggests that only (a) is the case. - I cannot test as the
>> patch cannot be applied with heavy editing (removal of additional line
>> breaks, taking care of tabs converted into spaces).
>
> I believe it's only for a) case, since warning stays along with the cost
> model report that says only about relative scalar and vector costs of
> iteration. The case of exits and vectorization capabilities is handled 
> earlier,
> since we have some vector code here.
>
> Will try to attach the patch instead of copy-paste here.
>
>>
>> Regarding the warning, I think it sounds a bit colloquial and as if the
>> location information is not available. What do you think of "loop with simd
>> directive not vectorized" or concise not fully correct: "simd loop not
>> vectorized"?
>
> took one of yours.
>
>>
>> Additionally, shouldn't that be guarded by "if (warn_openmp_simd &&"?
>> Otherwise the flag status isn't used at all in the whole patch.
>
> This is strange to me, since it worked as I pass the OPT_Wopenmp_simd
> to the warning_at (). It does:
>show warinig with -Wopenmp-simd
>doesn't show warning with -Wall -Wno-openmp-simd
>
>>
>>> +Wopenmp-simd
>>> +C C++ Var(warn_openmp_simd) Warning EnabledBy(Wall)
>>> +Warn about simd directive is overridden by vectorizer cost model
>>
>>
>> Wording wise, I'd prefer something like:
>> "Warn if an simd directive is overridden by the vectorizer cost model"
>>
>> (Or is it "a simd"? Where are the native speakers when one needs them?)
>
> damn, right! I believe 'a' since simd starts with consonant.
>
>>
>> However, in light of my question above, shouldn't it be "Warn if a loop with
>> simd directive is not vectorized"?
>>
>>
>>
>>> +fsimd-cost-model=
>>> +Common Joined RejectNegative Enum(vect_cost_model)
>>> Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED)
>>> +Specifies the vectorization cost model for code marked with simd
>>> directive
>>
>>
>> I think an article is lacking before "simd".
>
> done.
>
>>
>>
>>> +@item -Wopenmp-simd
>>> +@opindex Wopenm-simd
>>> +Warn if vectorizer cost model overrides simd directive from user.
>>
>>
>> I think that can be expanded a bit. One could also mention OpenMP/Cilk Plus
>> explicitly. Maybe like:  "Warn if the vectorizer cost model overrides the
>> OpenMP and Cilk Plus simd directives of the user."
>>
>
> done.
>
>> Or if my reading above is correct, how about something like: "Warn if a loop
>> with OpenMP or Cilk Plus simd directive is not vectorized. If only the cost
>> model prevented the vectorization, the @option{-fsimd-cost-model} option can
>> be used to force the vectorization."
>>
>> Which brings me to my next point: -fvect-cost-model= is not documented. I
>> think some words would be helpful, especially about the valid arguments, the
>> default and how it interacts with -fvect-cost-model=.
>
> done.
>
>>
>>
>>> --- a/gcc/fortran/lang.opt
>>> +++ b/gcc/fortran/lang.opt
>>>
>>> +Wopenmp-simd
>>> +Fortran Warning
>>> +; Documented in C
>>
>> ("Warning" is also not needed as it is taken from c-family/*opt, but it
>> shouldn't harm either.)
>
> done.
>
> Sergos
>
> * common.opt: Added new option -fsimd-cost-model.
> * tree-vectorizer.h (unlimited_cost_model): Interface update
> to rely on particular loop info.
> * tree-vect-data-refs.c (vect_peeling_hash_insert): Update to
> unlimited_cost_model call according to new interface.
> (vect_peeling_hash_choose_best_peeling): Ditto.
> (vect_enhance_data_refs_alignment): Ditto.
> * tree-vect-slp.c: Ditto.
> * tree-vect-loop.c (vect_estimate_min_profitable_iters): Ditto,
> plus issue a warning in case cost model overrides users' directive.
> * c.opt: add openmp-simd warning.
> * lang.opt: Ditto.
> * doc/invoke.texi: Added new openmp-simd warning.


patch6
Description: Binary data


Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-25 Thread Alexey Samsonov
On Fri, Nov 22, 2013 at 10:34 PM, Jakub Jelinek  wrote:
> On Fri, Nov 22, 2013 at 10:19:02PM +0400, Alexey Samsonov wrote:
>> On Tue, Nov 19, 2013 at 8:42 PM, Jakub Jelinek  wrote:
>> > Ok, here it is (untested though, because libsanitizer in gcc is older and I
>> > don't have spare cycles to play with llvm).
>> > Guess you need to write it up into compiler-rt buildsystem somehow,
>> > and for testing add
>> > -DSANITIZER_LIBBACKTRACE -I/srcdir_of_libbacktrace 
>> > -I/builddir_of_libbacktrace -L/builddir_of_libbacktrace/.libs 
>> > -lbacktrace
>> > This is against current libbacktrace (r205028 or later, before that
>> > the syminfo callbacks had one less argument).
>>
>>
>> Do you have an idea of how the libbacktrace symbolizer should look to
>> the end user (if the static runtime was built with
>> -DSANITIZER_LIBBACKTRACE)? I see the following options, none of which
>> looks nice:
>> 1) Force the user to provide -L/path/to/libbacktrace -lbacktrace at
>> link time (otherwise one will see link errors like "undefined symbol:
>> backtrace_create_state"). Certainly this is a bad user experience.
>> 2) Make -fsanitize=address link flag imply "-lbacktrace" as we do for
>> libpthread, librt etc. But libbacktrace may not be installed in the
>> system.
>> 3) Pull libbacktrace.a inside libasan.a. This means we should teach
>> the build system to merge two archives (or have access to backtrace
>> sources at build time and recompile them ourselves). This is doable,
>> but kind of ugly and leads to funny consequences - we'll fail to build
>> libbacktrace itself with ASan (or, arguably, any program which uses
>> it).
>>
>> I ask this because I'm slightly afraid libbacktrace symbolizer will be
>> pretty undertested in the llvm tree and we may break it without
>> noticing. Building it is not a problem, but making all the tests "just
>> work" with it is harder.
>
> In GCC, libbacktrace is built as a libtool convenience library only and
> then linked into whatever libraries want to use it.  So indeed, the plan
> is to link libbacktrace.la into libasan.so.1.0.0 and libasan.a
> (and the earlier GCC patch I've posted included corresponding toplevel
> configure/Makefile bits and libsanitizer Makefile/configure changes to make
> it happen).  E.g. libgo.so.* links libbacktrace.la into itself too.
> So, for GCC user experience, things will work automatically.

Good. Note, however, that you'd need at least two versions of
libbacktrace - 32-bit and 64-bit.

> As Ian said, libbacktrace right now is not maintained as it's own separate
> entity that would be individually installable etc.  So, for testing
> in LLVM, if you don't want to copy over libbacktrace sources in there,

I don't think we want to copy libbacktrace sources into LLVM and maintain it :(

> it means finding some way in the build system/configure to detect
> or request libbacktrace availability (for libraries not included in
> GCC tree GCC's configure typically uses --with-=
> or --with--lib=/--with--include=) and just
> link against it and you'd just unpack/configure/build libbacktrace
> on one of your test bots.

We indeed can add support for detecting presence of libbacktrace
binaries and headers
when we configure a build tree. But to "just link against it" we have
to either link against it in Clang
driver (I don't think we should add support for it), or link against
it in all the places where we build something
with "-fsanitize=foo", and there are quite a few of them.

As for redirecting libc calls from libbacktrace to our internal
versions (or interceptors) -
if it works fine for our toy tests (which is the case), I suggest to
wait for the actual problems gcc
users may discover. If there will be need in compiling a separate
sanitizer-specific version of libbacktrace,
or providing an additional layer between libbacktrace and sanitizer
runtimes, or both, this could probably
be done in gcc version of the runtime only (w/o modifying any existing
source files, only adding standalone gcc-specific stuff).

That said, I'm going to do a basic sanity test of Jakub's patch
applied to LLVM trunk (test that it build fine with and w/o
-DSANITIZER_LIBBACKTRACE
and works if we add -lbacktrace in the latter case), and commit it, so
that we can merge this into gcc. Once again, sorry for the delays.

-- 
Alexey Samsonov, MSK


Re: [PATCH, ARM] Fix PR target/59142: internal compiler error while compiling OpenCV 2.4.7

2013-11-25 Thread Richard Earnshaw
On 25/11/13 11:33, Charles Baylis wrote:
> This bug reveals a long standing problem in the ARM ldm/stm patterns
> which allow the virtual hardware register 'afp' to be used. A similar
> problem may affect vfp_pop_multiple_with_writeback, so that is also
> addressed.
> 
> I have not included a test case, as the original preprocessed source
> is rather bulky, and automated reduction seems to result in a file
> which does not robustly reproduce the problem across different gcc
> versions.
> 
> PR target/59142
> gcc/
> * config/arm/predicates.md: New predicates
> arm_hard_general_register_operand, vfp_hard_register_operand.

This should read:

* arm/predicates.md (arm_hard_general_register_operand): New predicate.
(vfp_hard_register_operand): Likewise.

> * config/arm/arm-ldmstm.ml: Use
> arm_hard_general_register_operand for all patterns.
> * config/arm/ldmstm.md: Regenerate.
> * gcc/config/arm/arm.md: Use vfp_hard_register_operand in
> vfp_pop_multiple_with_writeback.

This needs to name the pattern being modified (in brackets before the colon.

You should delete the predicate arm_hard_register_operand if it's now
no-longer being used.  Don't forget the ChangeLog entry for that as well.


> 
> 
> pr59142-v1.diff
> 
> 
> Index: gcc/config/arm/predicates.md
> ===
> --- gcc/config/arm/predicates.md  (revision 205021)
> +++ gcc/config/arm/predicates.md  (working copy)
> @@ -98,6 +105,15 @@
> && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS)));
>  })
>  
> +(define_predicate "vfp_hard_register_operand"
> +  (match_code "reg")
> +{
> +  return (REGNO_REG_CLASS (REGNO (op)) == VFP_D0_D7_REGS
> +   || REGNO_REG_CLASS (REGNO (op)) == VFP_LO_REGS
> +   || (TARGET_VFPD32
> +   && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS));
> +})

No need to use REGNO_REG_CLASS, something like:

IS_VFP_REGNUM (REGNO (op))

Should be sufficient.

R.



Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 06:53:59PM +0400, Alexey Samsonov wrote:
> > In GCC, libbacktrace is built as a libtool convenience library only and
> > then linked into whatever libraries want to use it.  So indeed, the plan
> > is to link libbacktrace.la into libasan.so.1.0.0 and libasan.a
> > (and the earlier GCC patch I've posted included corresponding toplevel
> > configure/Makefile bits and libsanitizer Makefile/configure changes to make
> > it happen).  E.g. libgo.so.* links libbacktrace.la into itself too.
> > So, for GCC user experience, things will work automatically.
> 
> Good. Note, however, that you'd need at least two versions of
> libbacktrace - 32-bit and 64-bit.

In GCC you get that automatically (unless --disable-multilib, but then you
don't get 32-bit libsanitizer either say on x86_64-linux).

> > it means finding some way in the build system/configure to detect
> > or request libbacktrace availability (for libraries not included in
> > GCC tree GCC's configure typically uses --with-=
> > or --with--lib=/--with--include=) and just
> > link against it and you'd just unpack/configure/build libbacktrace
> > on one of your test bots.
> 
> We indeed can add support for detecting presence of libbacktrace
> binaries and headers
> when we configure a build tree. But to "just link against it" we have
> to either link against it in Clang
> driver (I don't think we should add support for it), or link against
> it in all the places where we build something
> with "-fsanitize=foo", and there are quite a few of them.

Or change the rules for creation of *sanitizer_common.*a, so that you
link the libbacktrace.la convenience library into it (if configure found
it).  If you build it using libtool/automake, you get it automatically,
otherwise you can e.g. using ar unpack libbacktrace.a and note the filenames
of the object files from it, then add those to the ar command line for
creation of *sanitizer_common.*a.

> As for redirecting libc calls from libbacktrace to our internal
> versions (or interceptors) -
> if it works fine for our toy tests (which is the case), I suggest to
> wait for the actual problems gcc
> users may discover. If there will be need in compiling a separate
> sanitizer-specific version of libbacktrace,
> or providing an additional layer between libbacktrace and sanitizer
> runtimes, or both, this could probably
> be done in gcc version of the runtime only (w/o modifying any existing
> source files, only adding standalone gcc-specific stuff).

Sure, worst case you keep it untested in your LLVM copy of libsanitizer
and we'll just need to fix it up during merges if something breaks.
If it will be used for GCC (and we have a P1 bug so it is a release blocker
if llvm-symbolizer is still used), then it will be tested there.

Jakub


Re: [PATCH][AArch64] Use Cortex A53 rtx costs table in aarch64

2013-11-25 Thread Richard Earnshaw
On 25/11/13 11:01, Kyrill Tkachov wrote:
> Hi all,
> 
> This patch gets the aarch64 backend to use the Cortex A53 costs when tuning 
> for 
> that core, instead of using the generic costs. The costs table itself was 
> added 
> recently in arm/aarch-cost-tables.h and is shared between the two ports.
> 
> Tested aarch64-none-elf on a model.
> 
> Ok for trunk?
> 
> Thanks,
> Kyrill
> 
> 2013-11-25  Kyrylo Tkachov  
> 
>  * config/aarch64/aarch64.c (cortexa53_tuning): New struct.
>  * config/aarch64/aarch64-cores.def (cortex-a53):
>  Use cortexa53 tuning struct.
> 
> 

OK.

R.




[PATCH] Fix checking of gimple types

2013-11-25 Thread David Malcolm
On Thu, 2013-11-21 at 18:03 -0500, Andrew MacLeod wrote:
> On 11/21/2013 05:42 PM, Jakub Jelinek wrote:
> > On Thu, Nov 21, 2013 at 03:24:55PM -0700, Jeff Law wrote:
> >> On 11/21/13 15:19, Jakub Jelinek wrote:
> >>> On Mon, Nov 18, 2013 at 03:25:52PM -0500, David Malcolm wrote:
> > So is there some reason the GIMPLE_CHECK was left in here rather than
> > doing the downcasting?  This happens in other places.
> >>> Note that the changes removed tons of checks that IMHO were desirable.
> >>> The as_a that replaced those checks e.g. allows 3 different gimple codes,
> >>> while previously only one was allowed, this is both more expensive for
> >>> --enable-checking=yes, and allows one to use inline wrappers e.g.
> >>> gimple_omp_parallel_something on GIMPLE_OMP_TASK etc.
> >> Can you give a couple examples, please?
> > I mean e.g.
> > gimple_omp_parallel_{,set_}{clauses,child_fn,data_arg}{,_ptr}
> > gimple_omp_taskreg_{,set_}{clauses,child_fn,data_arg}{,_ptr}
> > gimple_omp_target_{,set_}{clauses,child_fn,data_arg}{,_ptr}
> > gimple_omp_teams_{,set_}clauses{,_ptr}
> > gimple_omp_return_{,set_}lhs{,_ptr}
> > gimple_omp_atomic_store_{,set_}val{,_ptr}
> > gimple_resx_{,set_}region
> > gimple_eh_dispatch_{,set_}region
> >
> > Jakub
> Why does  is_a_helper ::test allow 
> anything other than a GIMPLE_OMP_PARALLEL..?  That seems wrong to me. 
> should just be the one check.
> 
> gimple_omp_taskreg and other routines "sharing" that helper should have 
> their own helper and only check the one code.. thats is whole point to 
> remain at least codegen neutral in these cases and provide correct 
> checking.   The fact that they may happen to share the same underlying 
> structure is irrelevant.
> 
> I also think this is wrong.

Mea culpa.  Unfortunately I made a conceptual error during the
conversion (the worst kind of error).  I misunderstood the relationships
between the various OMP statements: there (mostly) aren't any, but the
sharing of structures for layout made me think there were.

Attached is a patch, successfully bootstrapped®tested on
x86_64-unknown-linux-gnu, which I believe reinstates the checked-build
behaviors from before r205034 (and that the unchecked-build behaviors
were not affected by that commit and likewise are not by this patch).

As I understand it, there are almost no "is-a" relationships between the
various omp statement types, some of them just happen to share layouts.
The exception is that the various gimple_omp_taskreg_* accessors accept
either codes GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK and so an
"omp_taskreg" is a concept, of which OMP_PARALLEL and OMP_TASK have an
is-a relationship.

Based on this, I've reworked the is_a_helper functions, eliminating
almost all of the ones that accepted multiple codes.  The only ones that
remain accepting multiple codes are those for:

  * gimple_statement_with_ops, which uses gimple_has_ops (gs), and for
  * gimple_statement_with_memory_ops, which uses gimple_has_mem_ops
(gs), plus
  * a new class gimple_statement_omp_taskreg, which expresses the
"either GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK" condition, and becomes a
parent struct for those.

I introduced some new structs to express the pre-existing layouts for
GSS codes, and to distinguish them from structs that imply a specific
GIMPLE_ code.
For example,
  gimple_statement_omp_atomic_store
now requires that the code be GIMPLE_OMP_ATOMIC_STORE, but it was also
the name of a layout, GSS_OMP_ATOMIC_STORE.  So I renamed these purely
layout classes, so that there is now a
  gimple_statement_omp_atomic_store_layout
class for the corresponding GSS value, which I renamed to
  GSS_OMP_ATOMIC_STORE_LAYOUT
to make clear that this is just a layout: that although
GIMPLE_OMP_RETURN happens to share the data layout of
GIMPLE_OMP_ATOMIC_STORE, they are not otherwise related - they now both
inherit from the "_layout" class.

I'm not a fan of these "_layout" names, but I'm not sure what better to
call them. Perhaps:
   GSS_OMP_PARALLEL_LAYOUT -> GSS_OMP_WITH_CLAUSES_CHILD_FN_DATA_ARG
   GSS_OMP_SINGLE_LAYOUT   -> GSS_OMP_WITH_CLAUSES
   GSS_OMP_ATOMIC_STORE_LAYOUT -> GSS_OMP_WITHOUT_SEQ_WITH_VAL
with analogous names for the corresponding structs.

I added GTY tags for every class in the hierarchy, not just those that
introduce a new layout, since gengtype only recognizes inheritance when
supplied a "tag" option.  This leads to the GSS values appearing
multiple times within the class hierarchy, which required a patch to
gengtype, to prevent duplicate case labels in the generated switch
statement.

I believe that this structure correctly reinstates the exact behavior
from before the inheritance patch for the checked build, and that either
way, the behavior in the unchecked build is the same.

Another approach to this would be to entirely eliminate these shared
layout types, going purely with the conceptual is-a relationships
between the types, say, by replacing the gengtype switch on GSS_ value
with 

Re: wide-int, real

2013-11-25 Thread Richard Biener
On Sat, Nov 23, 2013 at 8:22 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the real.c code.

Ok.

Thanks,
Richard.

> Ok?
>


[libgcc, build] Suppress some warnings for soft-fp files

2013-11-25 Thread Rainer Orth
Uros prompted me to look into why we were still getting warnings
compiling the soft-fp code in libgcc despite this in config/t-softfp:

$(soft-fp-objects) : INTERNAL_CFLAGS += -Wno-missing-prototypes -Wno-type-limit
s

It turned out that soft-fp-objects still included the $srcdir prefix.
It seems my libgcc migration missed some chunk of code to strip that,
although I still see the warnings on the 4.6 branch (i.e. before the
migration).

Anyway, the following patch fixes this.  Bootstrapped without
regressions on i386-pc-solaris2.11 and x86_64-unknown-linux-gnu and
verified the warnings are indeed gone.

Ok for mainline?

Rainer


2013-11-22  Rainer Orth  

* config/t-softfp (soft-fp-objects-base): New variable.
(soft-fp-objects): Use it.

diff --git a/libgcc/config/t-softfp b/libgcc/config/t-softfp
--- a/libgcc/config/t-softfp
+++ b/libgcc/config/t-softfp
@@ -88,8 +88,10 @@ endif
 # for the functions in the soft-fp files have not been brought across
 # from glibc.
 
-soft-fp-objects = $(addsuffix $(objext), $(softfp_file_list)) \
-  $(addsuffix _s$(objext), $(softfp_file_list))
+soft-fp-objects-base = $(basename $(notdir $(softfp_file_list)))
+
+soft-fp-objects = $(addsuffix $(objext), $(soft-fp-objects-base)) \
+  $(addsuffix _s$(objext), $(soft-fp-objects-base))
 
 $(soft-fp-objects) : INTERNAL_CFLAGS += -Wno-missing-prototypes -Wno-type-limits
 

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: wide-int, loop

2013-11-25 Thread Kenneth Zadeck


On 11/25/2013 06:04 AM, Richard Biener wrote:

On Sat, Nov 23, 2013 at 8:22 PM, Mike Stump  wrote:

Richi has asked the we break the wide-int patch so that the individual port and 
front end maintainers can review their parts without have to go through the 
entire patch.This patch covers the loop code.

Ok?

@@ -2662,8 +2661,8 @@ iv_number_of_iterations (struct loop *loop, rtx
insn, rtx condition,
iv1.step = const0_rtx;
if (INTVAL (iv0.step) < 0)
 {
- iv0.step = simplify_gen_unary (NEG, comp_mode, iv0.step, mode);
- iv1.base = simplify_gen_unary (NEG, comp_mode, iv1.base, mode);
+ iv0.step = simplify_gen_unary (NEG, comp_mode, iv0.step, comp_mode);
+ iv1.base = simplify_gen_unary (NEG, comp_mode, iv1.base, comp_mode);
 }
iv0.step = lowpart_subreg (mode, iv0.step, comp_mode);

separate bugfix?

most likely.i will submit separately.

@@ -1378,7 +1368,8 @@ decide_peel_simple (struct loop *loop, int flags)
/* If we have realistic estimate on number of iterations, use it.  */
if (get_estimated_loop_iterations (loop, &iterations))
  {
-  if (double_int::from_shwi (npeel).ule (iterations))
+  /* TODO: unsigned/signed confusion */
+  if (wi::leu_p (npeel, iterations))
 {
   if (dump_file)
 {

what does this refer to?  npeel is unsigned.


it was the fact that they were doing the from_shwi and then using an 
unsigned test.

Otherwise looks good to me.

Thanks,
Richard.




RE: _Cilk_spawn and _Cilk_sync for C++

2013-11-25 Thread Iyer, Balaji V
Hi Jason,
Please see my responses below

> -Original Message-
> From: Jason Merrill [mailto:ja...@redhat.com]
> Sent: Friday, November 22, 2013 10:51 AM
> To: Iyer, Balaji V; gcc-patches@gcc.gnu.org
> Cc: Jeff Law
> Subject: Re: _Cilk_spawn and _Cilk_sync for C++
> 
> On 11/21/2013 05:40 PM, Iyer, Balaji V wrote:
> > +/* Returns a TRY_CATCH_EXPR that will encapsulate BODY, EXCEPT_DATA
> and
> > +   EXCEPT_FLAG.  */
> > +
> > +tree
> > +create_cilk_try_catch (tree except_flag, tree except_data, tree body)
> > +{
> > +  tree catch_list = alloc_stmt_list ();
> > +  append_to_statement_list (except_flag, &catch_list);
> > +  append_to_statement_list (except_data, &catch_list);
> > +  append_to_statement_list (do_begin_catch (), &catch_list);
> > +  append_to_statement_list (build_throw (NULL_TREE), &catch_list);
> > +  tree catch_tf_expr = build_stmt (EXPR_LOCATION (body),
> TRY_FINALLY_EXPR,
> > +  catch_list, do_end_catch (NULL_TREE));
> > +  catch_list = build2 (CATCH_EXPR, void_type_node, NULL_TREE,
> > +  catch_tf_expr);
> > +  tree try_catch_expr = build_stmt (EXPR_LOCATION (body),
> TRY_CATCH_EXPR,
> > +   body, catch_list);
> > +  return try_catch_expr;
> > +}
> 
> I had in mind something less cilk-specific: a function that takes two tree
> operands, one for the body and one for the throwing path.
> Basically, make catch_list a parameter and move the first two appends back
> into the calling function.
> 

This is fixed as you suggested.


> > The reason is that, when you have something like this:
> >
> > _Cilk_spawn [=]  {  } ();
> >
> > I need to capture the function call (which in this case is the whole 
> > function)
> and throw it into a nested function.  The nested function implementation is
> shared with C. If the function is stored in a variable then I can just send 
> that
> out to the nested function. I have added another constraint to make sure the
> function is a spawning function, this way we can reduce more cases were
> they are stored to a variable. The reason why I added this check in
> finish_call_expr is that it seemed to be most straight-forward for me and
> only place where I could do with least disruption (code-changes).
> 
> It looks like you're transforming any
> 
> [...] {...} (...);
> 
> into
> 
> auto lambda = [...]{...};
> lambda(...);
> 
> which has significantly different semantics, particularly in terms of the
> lifetime of the lambda object.  In some of the Cilk online documentation, I
> see:
> 
> > When spawning named lambda functions, be careful that the lifespan of
> the lambda extends at least until the next sync, or else the destructor for 
> the
> lambda will race with the spawned call. For example:
> > double i = g();
> > if (some condition) {
> >// named lambda with value capture of i
> >auto f = [=i]() { double d = sin(i); f(d); };
> >cilk_spawn f();
> > } // Ouch! destructor for f is in parallel with spawned call.
> 
> This would seem to apply even more to unnamed lambda functions, since
> normally they would be destroyed at the end of the full-expression, which
> must always be before the sync.  Does the Intel compiler implicitly extend
> the lifetime of a lambda called by cilk spawn?  In any case, we really don't
> want to do this for all calls to unnamed lambdas just because we turned on
> cilk mode.
> 

I have fixed this issue.  My function to map the variable's context from the 
spawner to the spawn helper function was going into the lambda function. I made 
it stop by adding a language specific copy_tree_body (basically stop going into 
the lambda function's body for C++ and for the rest of the times just use 
copy_tree_body_r, no code duplicating  is done between the two) that and it 
works fine now.

The fixed patch is attached and here are the fixed ChangeLogs:

gcc/c-family/ChangeLog
2013-11-25  Balaji V. Iyer  

* cilk.c (cilk_outline): Replaced a call to copy_tree_body_r with the
language specific one.

gcc/c/ChangeLog
2013-11-25  Balaji V. Iyer  

* c-objc-common.h (LANG_HOOKS_CILKPLUS_CILK_SPEC_COPY_BODY): New
define.

gcc/ChangeLog
2013-11-25  Balaji V. Iyer  

* langhooks.h (lang_hooks_for_cilkplus::cilk_specific_copy_tree_body):
New field.
* langhooks-def.h
(LANG_HOOKS_CILKPLUS::LANG_HOOKS_CILKPLUS_CILK_SPEC_COPY_BODY):
Likewise.
(LANG_HOOKS_CILKPLUS_CILK_SPEC_COPY_BODY): New #define.

gcc/cp/ChangeLog
2013-11-25  Balaji V. Iyer  

* cp-tree.h (cilk_valid_spawn): New prototype.
(gimplify_cilk_spawn): Likewise.
(cp_cilk_install_body_wframe_cleanup): Likewise.
(cilk_create_lambda_fn_tmp_var): Likewise.
(create_try_catch_expr): Likewise.
* decl.c (finish_function): Insert Cilk function-calls when a
_Cilk_spawn is used in a function.
* parser.c (cp_parser_postfix_expression): Added RID_CILK_SPAWN and
RID_CI

Re: [libgcc, build] Suppress some warnings for soft-fp files

2013-11-25 Thread Paolo Bonzini
Il 25/11/2013 16:45, Rainer Orth ha scritto:
> Uros prompted me to look into why we were still getting warnings
> compiling the soft-fp code in libgcc despite this in config/t-softfp:
> 
> $(soft-fp-objects) : INTERNAL_CFLAGS += -Wno-missing-prototypes 
> -Wno-type-limit
> s
> 
> It turned out that soft-fp-objects still included the $srcdir prefix.
> It seems my libgcc migration missed some chunk of code to strip that,
> although I still see the warnings on the 4.6 branch (i.e. before the
> migration).
> 
> Anyway, the following patch fixes this.  Bootstrapped without
> regressions on i386-pc-solaris2.11 and x86_64-unknown-linux-gnu and
> verified the warnings are indeed gone.
> 
> Ok for mainline?

Ok.

Paolo



Terminology (was: Ping Re: [gomp4] Dumping gimple for offload.)

2013-11-25 Thread Thomas Schwinge
Hi!

Just some suggestion related to terminology.


On Tue, 19 Nov 2013 13:58:29 +0400, Ilya Tocar  wrote:
> On 14 Nov 11:27, Richard Biener wrote:
> > > +  /* Set when symbol needs to be dumped for lto/offloading.  */
> > > +  unsigned need_dump : 1;
> > > +
> > 
> > That's very non-descriptive.  What's "offloading"?  But yes, something
> > like this is what I was asking for.
> 
> I've changed it into:
> Set when symbol needs to be dumped into LTO bytecode for LTO,
> or in pragma omp target case, for separate compilation targeting
> a different architecture.

Can we in fact agree to use the term "offload" to mean exactly that?
We'll need this in other contexts, too, such as for configuring the
"secondary" lto1 (which is, in fact, the one that will be processing the
main GCC's "offloaded" code)?  I'm happy to go looking for a proper
section in GCC's (internal?) manual to document what "offloading" means
in GCC's context, and I'm likewise happy to hear if there's any better
term existing for describing basically the process of »separate
compilation targeting a different architecture«?  (I'm not a native
speaker.)  By the way, in this context, I like saing "offloading" better
than "acceleration", because while we strive for acceleration, offloading
is what we technically do.


> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> index c3a8967..53cd250 100644
> --- a/gcc/cgraphunit.c
> +++ b/gcc/cgraphunit.c
> @@ -2019,7 +2019,18 @@ ipa_passes (void)
> passes->all_lto_gen_passes);
>  
>if (!in_lto_p)
> -ipa_write_summaries ();
> +{
> +  if (flag_openmp)

The following comment applies to several more instances in this patch:
after the front end's parsing stage, we should now basically everywhere
treat flag_openacc and flag_openmp the same.  The idea is that all the
existing OpenMP omp_* infrastructure in the middle end and following is
now applicable to not only OpenMP but also OpenACC and any other
"acceleration" mechanisms.  Again, is there a better term to use instead
of "acceleration" for describing the union of Cilk+, OpenACC, OpenMP, and
similar techniques?

Also, would it then make sense to define a flag à la:

#define flag_acceleration (flag_openacc | flag_openmp)

..., and begin using that everywhere after the front ends where
flag_openmp is currently used?


> +/* Select what needs to be dumped. In lto case dump everything.
> +   In omp target case only dump stuff makrked with attribute.  */
> +void
> +select_what_to_dump (bool is_omp)

Likewise, while this obviously (and unsurprisingly) continues with the
existing convention, I'd suggest that in the future we name such things
more generically: is_offload or is_acceleration (or, again, any better
term that is generically applicable).

Or, going by what I've been told before by Jakub and Nathan in
:
»Names are sticky.«, should we instead continue to name all these new
things omp_*, too, and declare that the "omp" tag is an artifact of
history?  (But it certainly is an obstacle to anyone new to the code.  I
realize that people are a bit tired of refactoring, which recently has
been applied in other contexts, and has caused some "churn".  But still,
I'd personally go ahread and rename the current omp_* middle end bits to
any new tag that we agree on, just to make things more clear to everyone
new to the code.  But, I don't insist on that (by now I manage to
internally map omp_* to acceleration_* or similar), so you long-time
contributors of course get to have the finaly say about this.  I'm only
commenting from still a new contributor's point of view.)


> diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
> index 797e92e..f4c46db 100644
> --- a/gcc/lto-streamer.h
> +++ b/gcc/lto-streamer.h
> @@ -139,6 +139,11 @@ along with GCC; see the file COPYING3.  If not see
> name for the functions and static_initializers.  For other types of
> sections a '.' and the section type are appended.  */
>  #define LTO_SECTION_NAME_PREFIX ".gnu.lto_"
> +#define OMP_SECTION_NAME_PREFIX ".gnu.target_lto_"

Also here, the "target" tag is confusing in that "target" typically has a
different meaning in the compiler context -- while this one here is used
for implementing OpenMP's target construct, what it technically does
should be described by .gnu.offload_gimple_ or somthing similar.  (Note
that also the LTO term is no longer really applicable, as we're not
neccessarily doing link-time optimization here, but instead rather just
use the GIMPLE dumping ("offloading") infrastructure that happens to
already be implemented in GCC's LTO support.

Again, all this doesn't matter to anyone who's already versed with the
code, but it does matter to people who are new, and still have to learn
all these fine details.  (Of which there are many, many more.  Yes, I
know, life's tough, and I'm used to that, but still.)  ;-)


And I'm certa

Re: wide-int, c front end

2013-11-25 Thread Joseph S. Myers
On Sat, 23 Nov 2013, Mike Stump wrote:

> Richi has asked the we break the wide-int patch so that the individual 
> port and front end maintainers can review their parts without have to go 
> through the entire patch.  This patch covers the c front end.
> 
> Ok?

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


[SH, committed] Fix PR 59243

2013-11-25 Thread Oleg Endo
Hello,

This patch is the same as posted in PR 59243.
Tested with
 make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

Pre-approved by Kaz in PR 59243, committed as rev 205358.

Cheers,
Oleg

gcc/ChangeLog:
PR target/53976
PR target/59243
* config/sh/sh_optimize_sett_clrt.cc (struct ccreg_value): Update
comments.
(sh_optimize_sett_clrt::find_last_ccreg_values): Check stack of
previously visited basic blocks before recursing instead of only one
basic block.
Index: gcc/config/sh/sh_optimize_sett_clrt.cc
===
--- gcc/config/sh/sh_optimize_sett_clrt.cc	(revision 205191)
+++ gcc/config/sh/sh_optimize_sett_clrt.cc	(working copy)
@@ -29,6 +29,7 @@
 #include "target.h"
 
 #include 
+#include 
 
 /*
 This pass tries to eliminate unnecessary sett or clrt instructions in cases
@@ -87,16 +88,15 @@
   struct ccreg_value
   {
 // The insn at which the ccreg value was determined.
+// Might be NULL_RTX if e.g. an unknown value is recorded for an
+// empty basic block.
 rtx insn;
 
 // The basic block where the insn was discovered.
-// Notice that the CFG might be invalid at late RTL stages and
-// BLOCK_FOR_INSN might return null.  Thus the basic block are recorded
-// here while traversing them.
 basic_block bb;
 
-// The value of ccreg.  If NULL_RTX, the value is not known, e.g. if the
-// ccreg is clobbered.
+// The value of ccreg.  If NULL_RTX, the exact value is not known, but
+// the ccreg is changed in some way (e.g. clobbered).
 rtx value;
   };
 
@@ -113,7 +113,7 @@
   // start insn.
   void find_last_ccreg_values (rtx start_insn, basic_block bb,
 			   std::vector& values_out,
-			   basic_block prev_visited_bb = NULL) const;
+			   std::vector& prev_visited_bb) const;
 
   // Given a cbranch insn, its basic block and another basic block, determine
   // the value to which the ccreg will be set after jumping/falling through to
@@ -199,6 +199,10 @@
   std::vector ccreg_values;
   ccreg_values.reserve (32);
 
+  // Something for recording visited basic blocks to avoid infinite recursion.
+  std::vector visited_bbs;
+  visited_bbs.reserve (32);
+
   // Look for insns that set the ccreg to a constant value and see if it can
   // be optimized.
   basic_block bb;
@@ -221,7 +225,9 @@
 	log_msg ("\n");
 
 	ccreg_values.clear ();
-	find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values);
+	visited_bbs.clear ();
+	find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values,
+visited_bbs);
 
 	log_msg ("number of ccreg values collected: %u\n",
 		 (unsigned int)ccreg_values.size ());
@@ -307,13 +313,17 @@
 sh_optimize_sett_clrt
 ::find_last_ccreg_values (rtx start_insn, basic_block bb,
 			  std::vector& values_out,
-			  basic_block prev_visited_bb) const
+			  std::vector& prev_visited_bb) const
 {
-  if (start_insn == NULL_RTX)
-return;
+  // FIXME: For larger CFGs this will unnecessarily re-visit basic blocks.
+  // Once a basic block has been visited, the result should be stored in
+  // some container so that it can be looked up quickly eliminating the
+  // re-visits.
+  log_msg ("looking for ccreg values in [bb %d] ", bb->index);
+  if (!prev_visited_bb.empty ())
+log_msg ("(prev visited [bb %d])", prev_visited_bb.back ()->index);
+  log_msg ("\n");
 
-  log_msg ("looking for ccreg values in [bb %d]\n", bb->index);
-
   for (rtx i = start_insn; i != NULL_RTX && i != PREV_INSN (BB_HEAD (bb));
i = PREV_INSN (i))
 {
@@ -341,7 +351,7 @@
 	  return;
 	}
 
-  if (any_condjump_p (i) && onlyjump_p (i) && prev_visited_bb != NULL)
+  if (any_condjump_p (i) && onlyjump_p (i) && !prev_visited_bb.empty ())
 	{
 	  // For a conditional branch the ccreg value will be a known constant
 	  // of either 0 or STORE_FLAG_VALUE after branching/falling through
@@ -353,10 +363,11 @@
 	  ccreg_value v;
 	  v.insn = i;
 	  v.bb = bb;
-	  v.value = GEN_INT (sh_cbranch_ccreg_value (i, bb, prev_visited_bb));
+	  v.value = GEN_INT (sh_cbranch_ccreg_value (i, bb,
+		 prev_visited_bb.back ()));
 
 	  log_msg ("branches to [bb %d] with ccreg value ",
-		   prev_visited_bb->index);
+		   prev_visited_bb.back ()->index);
 	  log_rtx (v.value);
 	  log_msg ("\n");
 
@@ -370,26 +381,35 @@
   // In this case, check the predecessor basic blocks.
   unsigned int pred_bb_count = 0;
 
-  // If the current basic block is the same as the previous one, it's a loop.
-  // Don't try to recurse again, as this will result in an infinite loop.
-  if (bb != prev_visited_bb)
-for (edge_iterator ei = ei_start (bb->preds); !ei_end_p (ei); ei_next (&ei))
-  {
-	basic_block pred_bb = ei_edge (ei)->src;
-	if (pred_bb->index == ENTRY_BLOCK)
-	  continue;
+  // If the current basic block is not in the stack of previously visited
+  // basic blocks

Re: Terminology (was: Ping Re: [gomp4] Dumping gimple for offload.)

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 05:13:25PM +0100, Thomas Schwinge wrote:
> > --- a/gcc/cgraphunit.c
> > +++ b/gcc/cgraphunit.c
> > @@ -2019,7 +2019,18 @@ ipa_passes (void)
> >   passes->all_lto_gen_passes);
> >  
> >if (!in_lto_p)
> > -ipa_write_summaries ();
> > +{
> > +  if (flag_openmp)
> 
> The following comment applies to several more instances in this patch:
> after the front end's parsing stage, we should now basically everywhere
> treat flag_openacc and flag_openmp the same.  The idea is that all the
> existing OpenMP omp_* infrastructure in the middle end and following is
> now applicable to not only OpenMP but also OpenACC and any other
> "acceleration" mechanisms.  Again, is there a better term to use instead
> of "acceleration" for describing the union of Cilk+, OpenACC, OpenMP, and
> similar techniques?

I don't think acceleration is a good term for everything say OpenMP does,
and calling OpenMP clauses acceleration clauses just because some other
standards decided to copy/modify a subset of the OpenMP syntax is weird.
> 
> Also, would it then make sense to define a flag à la:
> 
> #define flag_acceleration (flag_openacc | flag_openmp)
> 
> ..., and begin using that everywhere after the front ends where
> flag_openmp is currently used?

We certainly can have some helper macros, but flag_openacc | flag_openmp
isn't the right definition of all of them, right now we have
flag_openmp, flag_enable_cilkplus (misnamed, should be really
flag_cilkplus), flag_openacc and flag_openmp_simd.  For some things
(e.g. related to offloading, you want flag_openacc | flag_openmp,
for others, e.g. SIMD, you want flag_openmp | flag_openmp_simd |
flag_cilkplus, for others some different combination.  So flag_acceleration
would be certainly misleading.

> Also here, the "target" tag is confusing in that "target" typically has a
> different meaning in the compiler context -- while this one here is used

I agree that target word there is weird.

> for implementing OpenMP's target construct, what it technically does
> should be described by .gnu.offload_gimple_ or somthing similar.  (Note
> that also the LTO term is no longer really applicable, as we're not
> neccessarily doing link-time optimization here, but instead rather just
> use the GIMPLE dumping ("offloading") infrastructure that happens to
> already be implemented in GCC's LTO support.

But LTO is right here IMHO, we are streaming the LTO bytecode there, not
GIMPLE.

Jakub


[SH, committed] Fix a warning in sh.md

2013-11-25 Thread Oleg Endo
Hello,

This fixes a warning in sh.md caused by a missing mode in the
doloop_end_split pattern.
Tested with
 make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

with no new failures.  Committed as obvious as rev 205359.

Cheers,
Oleg

gcc/ChangeLog:
* config/sh/sh.md (doloop_end_split): Add missing SI mode.
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 205190)
+++ gcc/config/sh/sh.md	(working copy)
@@ -8801,7 +8801,7 @@
 		  (label_ref (match_operand 1 "" ""))
 		  (pc)))
(set (match_operand:SI 0 "arith_reg_dest" "=r")
-	(plus (match_dup 2) (const_int -1)))
+	(plus:SI (match_dup 2) (const_int -1)))
(clobber (reg:SI T_REG))]
   "TARGET_SH2"
   "#"


Re: [PATCH] Fix checking of gimple types

2013-11-25 Thread Michael Matz
Hi,

On Mon, 25 Nov 2013, David Malcolm wrote:

> I'm not a fan of these "_layout" names, but I'm not sure what better to
> call them. Perhaps:
>GSS_OMP_PARALLEL_LAYOUT -> GSS_OMP_WITH_CLAUSES_CHILD_FN_DATA_ARG
>GSS_OMP_SINGLE_LAYOUT   -> GSS_OMP_WITH_CLAUSES
>GSS_OMP_ATOMIC_STORE_LAYOUT -> GSS_OMP_WITHOUT_SEQ_WITH_VAL
> with analogous names for the corresponding structs.

The preexisting name for these structs has a _base suffix 
(gimple_statement_with_ops_base and gimple_statement_with_memory_ops_base
are indeed in the same situation in that no gimple codes 
exist that directly make use of that gss, unlike e.g. GSS_BASE which 
does have some direct users).

(I do like more selfdescribing names, though, so perhaps the existing 
_base variants could also use some renaming, but it should either be no 
_base names at all, or only _base names (i.e. no additional _layout for 
some other names)).

> Another approach to this would be to entirely eliminate these shared
> layout types, going purely with the conceptual is-a relationships
> between the types, say, by replacing the gengtype switch on GSS_ value
> with a switch on GIMPLE_ code.  Given that this might affect the speed
> of GC (bigger switch statements), I went with the relatively more
> conservative change.

I think it makes sense to keep storage layout and gimple codes separate, 
but still have them both structured.  The former allows easier changes 
when looking at data layout problems (like memory optimizations, or GC), 
the latter easier transformations related to higher level logic of the 
data structures.


Ciao,
Michael.


Switch gimple-fold to new devirt infrastructure

2013-11-25 Thread Jan Hubicka
Hi,
I am looking into testcases for individual code paths of ipa-devirt and my life
would be much easier if gimple-fold did not take some of them by old code.
This patch also improves code by doing devirtualization earlier in the game
since get_polymorphic_call_info is now supperset of
gimple_extract_devirt_binfo_from_cst but with more tricks in it.

I does some SSA graph walking that may break when folding is called with broken
SSA form, but since other places in gimple-fold does that, too, I hope it is
OK.

Bootstrapped/regtested x86_64-linux, OK?

Honza

* ipa-utils.h (possible_polymorphic_call_targets): Determine context of
the call.
(gimple_fold_call): Use ipa-devirt to devirtualize.

Index: ipa-utils.h
===
*** ipa-utils.h (revision 205108)
--- ipa-utils.h (working copy)
*** possible_polymorphic_call_targets (tree
*** 121,130 
   bool *final = NULL,
   void **cache_token = NULL)
  {
return possible_polymorphic_call_targets (obj_type_ref_class (call),
tree_to_uhwi
  (OBJ_TYPE_REF_TOKEN (call)),
!   ipa_dummy_polymorphic_call_context,
final, cache_token);
  }
  
--- 121,137 
   bool *final = NULL,
   void **cache_token = NULL)
  {
+   tree otr_type;
+   HOST_WIDE_INT otr_token;
+   ipa_polymorphic_call_context context;
+ 
+   get_polymorphic_call_info (current_function_decl,
+call,
+&otr_type, &otr_token, &context);
return possible_polymorphic_call_targets (obj_type_ref_class (call),
tree_to_uhwi
  (OBJ_TYPE_REF_TOKEN (call)),
!   context,
final, cache_token);
  }
  
Index: gimple-fold.c
===
*** gimple-fold.c   (revision 205108)
--- gimple-fold.c   (working copy)
*** gimple_fold_call (gimple_stmt_iterator *
*** 1151,1174 
}
else if (virtual_method_call_p (callee))
{
! tree obj = OBJ_TYPE_REF_OBJECT (callee);
! tree binfo = gimple_extract_devirt_binfo_from_cst
!(obj, obj_type_ref_class (callee));
! if (binfo)
{
! HOST_WIDE_INT token
!   = TREE_INT_CST_LOW (OBJ_TYPE_REF_TOKEN (callee));
! tree fndecl = gimple_get_virt_method_for_binfo (token, binfo);
! if (fndecl)
!   {
! #ifdef ENABLE_CHECKING
! gcc_assert (possible_polymorphic_call_target_p
!(callee, cgraph_get_node (fndecl)));
! 
! #endif
! gimple_call_set_fndecl (stmt, fndecl);
! changed = true;
!   }
}
}
  }
--- 1151,1168 
}
else if (virtual_method_call_p (callee))
{
! bool final;
! vec targets
!   = possible_polymorphic_call_targets (callee, &final);
! if (final && targets.length () <= 1)
{
! tree fndecl;
! if (targets.length () == 1)
!   fndecl = targets[0]->decl;
! else
!   fndecl = builtin_decl_implicit (BUILT_IN_UNREACHABLE);
! gimple_call_set_fndecl (stmt, fndecl);
! changed = true;
}
}
  }


Re: [SH] Pass --isa to assembler

2013-11-25 Thread Oleg Endo
On Mon, 2013-11-25 at 09:12 +0900, Kaz Kojima wrote:
> Oleg Endo  wrote:
> > Currently GCC doesn't pass the --isa parameter to the assembler for SH
> > targets other than SH2A and SH5.  This makes the assembler accept any
> > kind of ISA and happily produce e.g. SH2A code even though the target is
> > SH4, which will then fail to link.
> > The attach patch fixes this by passing the --isa parameter also for -m1,
> > -m2*, -m3*, and -m4*.
> > It also caught a mistake in libgcc/crt1.S, where the SH3 / SH4 VBR setup
> > code was compiled for SH2E.
> > 
> > Tested with
> >  make -k -j4 check RUNTESTFLAGS="--target_board=sh-sim
> > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> > 
> > and no new failures.
> > OK for trunk?
> 
> libgcc fix should be an independent patch and is OK for trunk
> as an obvious fix.  --isa part looks fine but is a new feature.
> It's OK when trunk goes back to stage1.

OK, I've committed the libgcc fix as rev 205360.

Cheers,
Oleg



Re: [PATCH] OpenMP #pragma omp declare simd support (take 2)

2013-11-25 Thread Jan Hubicka
> > What's the reason you cannot defer SIMD cloning to LTRANS stage
> > as simple IPA pass next to IPA-PTA?
> 
> Ok, deferring till after IPA-PTA was easy, just small ipa-cp.c changes
> (look at the attribute rather than simd*clone* fields), passes.def and
> had to tweak ipa_add_new_function which assumed that all new functions
> must be definitions with gimple body.

Note that any small IPA pass at ltrans will increase peak memory use of
ltrans copmilation by loading all function bodies into memory (since
IPA transformations needs to be applied first).

It would be nice to avoid these enabled by default unless we have really
good reason for it.

> 2013-11-25  Aldy Hernandez  
>   Jakub Jelinek  
> 
>   * cgraph.h (enum cgraph_simd_clone_arg_type): New.
>   (struct cgraph_simd_clone_arg, struct cgraph_simd_clone): New.
>   (struct cgraph_node): Add simdclone and simd_clones fields.
>   * config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen,
>   ix86_simd_clone_adjust, ix86_simd_clone_usable): New functions.
>   (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
>   TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Define.
>   * doc/tm.texi.in (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
>   TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Add.
>   * doc/tm.texi: Regenerated.
>   * ggc.h (ggc_alloc_cleared_simd_clone_stat): New function.
>   * ipa-cp.c (determine_versionability): Fail if "omp declare simd"
>   attribute is present.
>   * omp-low.c: Include pretty-print.h, ipa-prop.h and tree-eh.h.
>   (simd_clone_vector_of_formal_parm_types): New function.
>   (simd_clone_struct_alloc, simd_clone_struct_copy,
>   simd_clone_vector_of_formal_parm_types, simd_clone_clauses_extract,
>   simd_clone_compute_base_data_type, simd_clone_mangle,
>   simd_clone_create, simd_clone_adjust_return_type,
>   create_tmp_simd_array, simd_clone_adjust_argument_types,
>   simd_clone_init_simd_arrays): New functions.
>   (struct modify_stmt_info): New type.
>   (ipa_simd_modify_stmt_ops, ipa_simd_modify_function_body,
>   simd_clone_adjust, expand_simd_clones, ipa_omp_simd_clone): New
>   functions.
>   (pass_data_omp_simd_clone): New variable.
>   (pass_omp_simd_clone): New class.
>   (make_pass_omp_simd_clone): New function.
>   * passes.def (pass_omp_simd_clone): New.
>   * target.def (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
>   TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): New target
>   hooks.
>   * target.h (struct cgraph_node, struct cgraph_simd_node): Declare.
>   * tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document.
>   * tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Define.
>   * tree-pass.h (make_pass_omp_simd_clone): New prototype.
>   * tree-vect-data-refs.c: Include cgraph.h.
>   (vect_analyze_data_refs): Inline by hand find_data_references_in_loop
>   and find_data_references_in_bb, if find_data_references_in_stmt
>   fails, still allow calls to #pragma omp declare simd functions
>   in #pragma omp simd loops unless they contain data references among
>   the call arguments or in lhs.
>   * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
>   calls with no lhs.
>   (vect_transform_loop): Allow NULL STMT_VINFO_VECTYPE for calls without
>   lhs.
>   * tree-vectorizer.h (enum stmt_vec_info_type): Add
>   call_simd_clone_vec_info_type.
>   (struct _stmt_vec_info): Add simd_clone_fndecl field.
>   (STMT_VINFO_SIMD_CLONE_FNDECL): Define.
>   * tree-vect-stmts.c: Include tree-ssa-loop.h,
>   tree-scalar-evolution.h and cgraph.h.
>   (vectorizable_call): Handle calls without lhs.  Assert
>   !stmt_can_throw_internal instead of failing for it.  Don't update
>   EH stuff.
>   (struct simd_call_arg_info): New.
>   (vectorizable_simd_clone_call): New function.
>   (vect_transform_stmt): Call it.
>   (vect_analyze_stmt): Likewise.  Allow NULL STMT_VINFO_VECTYPE for
>   calls without lhs.
>   * ipa-prop.c (ipa_add_new_function): Only call ipa_analyze_node
>   if cgraph_function_with_gimple_body_p is true.
> c/
>   * c-decl.c (c_builtin_function_ext_scope): Avoid binding if
>   external_scope is NULL.
> cp/
>   * semantics.c (finish_omp_clauses): For #pragma omp declare simd
>   linear clause step call maybe_constant_value.
> testsuite/
>   * g++.dg/gomp/declare-simd-1.C (f38): Make sure
>   simdlen is a power of two.
>   * gcc.dg/gomp/simd-clones-2.c: Compile on all targets.
>   Remove -msse2.  Adjust regexps for name mangling changes.
>   * gcc.dg/gomp/simd-clones-3.c: Likewise.
>   * gcc.dg/vect/vect-simd-clone-1.c: New test.
>   * gcc.dg/vect/vect-simd-clone-2.c: New test.
>   * gcc.dg/vect/vect-simd-clone-3.c: New test.
>   * gcc.dg/vect/vect-simd-clone-4.c: New test.
> 

Re: wide-int, gimple

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 12:24:30PM +0100, Richard Biener wrote:
> On Sat, Nov 23, 2013 at 8:21 PM, Mike Stump  wrote:
> > Richi has asked the we break the wide-int patch so that the individual port 
> > and front end maintainers can review their parts without have to go through 
> > the entire patch.This patch covers the gimple code.
> 
> @@ -1754,7 +1754,7 @@ dump_ssaname_info (pretty_printer *buffer, tree
> node, int spc)
>if (!POINTER_TYPE_P (TREE_TYPE (node))
>&& SSA_NAME_RANGE_INFO (node))
>  {
> -  double_int min, max, nonzero_bits;
> +  widest_int min, max, nonzero_bits;
>value_range_type range_type = get_range_info (node, &min, &max);
> 
>if (range_type == VR_VARYING)
> 
> this makes me suspect you are changing SSA_NAME_RANGE_INFO
> to embed two max wide_ints.  That's a no-no.

Well, the range_info_def struct right now contains 3 double_ints, which is
unnecessary overhead for the most of the cases where the SSA_NAME's type
has just at most HOST_BITS_PER_WIDE_INT bits and thus we could fit all 3 of
them into 3 HOST_WIDE_INTs rather than 3 double_ints.  So supposedly struct
range_info_def could be a template on the type's precision rounded up to HWI
bits, or say have 3 alternatives there, use
FIXED_WIDE_INT (HOST_BITS_PER_WIDE_INT) for the smallest types,
FIXED_WIDE_INT (2 * HOST_BITS_PER_WIDE_INT) aka double_int for the larger
but still common ones, and widest_int for the rest, then the API to set/get
it could use widest_int everywhere, and just what storage we'd use would
depend on the precision of the type.

Jakub


Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

2013-11-25 Thread Wei Mi
On Mon, Nov 25, 2013 at 2:08 AM, Alexander Monakov  wrote:
> On Sat, 23 Nov 2013, Wei Mi wrote:
>> For the failed testcase, it was compiled using -fmodulo-sched.
>> modulo-sched phase set SCHED_GROUP_P of a jump insn to be true, which
>> means the jump insn should be scheduled with prev insn as a group.
>
> SMS doesn't set SCHED_GROUP_P by itself; did you mean that SCHED_GROUP_P is
> set by dependency analysis code similar to sched2?
>

SCHED_GROUP_P is set in sched_analyze for "call + return value" group
and other groups, and in sched_init for macrofusion. Both
sched_analyze and sched_init are used by SMS (sched_analyze is used by
creating ddg). I think sched1 may have the same problem when it set
SCHED_GROUP_P and sched2 uses it.

>> When modulo scheduling is finished, the flag of SCHED_GROUP_P is not
>> cleaned up. After that, pass_jump2 phase split the bb and move the
>> prev insn to another bb. Then pass_sched2 see the flag and mistakenly
>> try to bind the jump insn with a code label.
>
> I think the analysis is incomplete.  Looking at the backtrace posted in the
> bug report, the failure path goes through chain_to_prev_insn, which protects
> against such failure:
>
>   prev_nonnote = prev_nonnote_nondebug_insn (insn);
>   if (BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (prev_nonnote)
>   && ! sched_insns_conditions_mutex_p (insn, prev_nonnote))
> add_dependence (insn, prev_nonnote, REG_DEP_ANTI);
>
> Why does it end up with a label at the assertion failure point?
>
> Alexander

Because code label is not a note or debug insn.

I think it is impossible to detect such inconsistency in
chain_to_prev_insn. If the prev_nonnote is not a code label, the bug
will not be exposed this time. Suppose some other optimizations insert
a real insn before the jump marked as SCHED_GROUP_P, following
scheduler pass will schedule them together silently. That is why I
think it is necessary to cleanup SCHED_GROUP_P when a scheduling pass
is finished.

Thanks,
Wei.


[GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Dehao Chen
afdo_propagate_multi_edge can do everything afdo_propagate_single_edge
does. So we refactor the code to keep only one afdo_propagate_edge
function.

Bootstrapped and passed all unittests and performance tests.

OK for googlge branch?

Thanks,
Dehao

Index: gcc/auto-profile.c
===
--- gcc/auto-profile.c (revision 205354)
+++ gcc/auto-profile.c (working copy)
@@ -1069,44 +1069,6 @@ afdo_find_equiv_class (void)
 }
 }

-/* If a baisk block only has one in/out edge, then the bb count and he
-   edge count should be the same.
-   IS_SUCC is true if the out edge of the basic block is examined.
-   Return TRUE if any basic block/edge count is changed.  */
-
-static bool
-afdo_propagate_single_edge (bool is_succ)
-{
-  basic_block bb;
-  bool changed = false;
-
-  FOR_EACH_BB (bb)
-if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
-  {
- edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
- if (((e->flags & EDGE_ANNOTATED) == 0)
-&& ((bb->flags & BB_ANNOTATED) != 0))
-  {
-e->count = bb->count;
-e->flags |= EDGE_ANNOTATED;
-changed = true;
-  }
- else if (((e->flags & EDGE_ANNOTATED) != 0)
-&& ((bb->flags & BB_ANNOTATED) == 0))
-  {
-bb->count = e->count;
-bb->flags |= BB_ANNOTATED;
-changed = true;
-  }
- else if (bb->count != e->count)
-  {
-e->count = bb->count = MAX (bb->count, e->count);
-changed = true;
-  }
-  }
-  return changed;
-}
-
 /* If a basic block's count is known, and only one of its in/out edges' count
is unknown, its count can be calculated.
Meanwhile, if all of the in/out edges' counts are known, then the basic
@@ -1115,7 +1077,7 @@ afdo_find_equiv_class (void)
Return TRUE if any basic block/edge count is changed.  */

 static bool
-afdo_propagate_multi_edge (bool is_succ)
+afdo_propagate_edge (bool is_succ)
 {
   basic_block bb;
   bool changed = false;
@@ -1281,14 +1243,10 @@ afdo_propagate (void)
 {
   changed = false;

-  if (afdo_propagate_single_edge (true))
+  if (afdo_propagate_edge (true))
  changed = true;
-  if (afdo_propagate_single_edge (false))
+  if (afdo_propagate_edge (false))
  changed = true;
-  if (afdo_propagate_multi_edge (true))
- changed = true;
-  if (afdo_propagate_multi_edge (false))
- changed = true;
   afdo_propagate_circuit ();
 }
 }


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Xinliang David Li
Ok.

David

On Mon, Nov 25, 2013 at 9:56 AM, Dehao Chen  wrote:
> afdo_propagate_multi_edge can do everything afdo_propagate_single_edge
> does. So we refactor the code to keep only one afdo_propagate_edge
> function.
>
> Bootstrapped and passed all unittests and performance tests.
>
> OK for googlge branch?
>
> Thanks,
> Dehao
>
> Index: gcc/auto-profile.c
> ===
> --- gcc/auto-profile.c (revision 205354)
> +++ gcc/auto-profile.c (working copy)
> @@ -1069,44 +1069,6 @@ afdo_find_equiv_class (void)
>  }
>  }
>
> -/* If a baisk block only has one in/out edge, then the bb count and he
> -   edge count should be the same.
> -   IS_SUCC is true if the out edge of the basic block is examined.
> -   Return TRUE if any basic block/edge count is changed.  */
> -
> -static bool
> -afdo_propagate_single_edge (bool is_succ)
> -{
> -  basic_block bb;
> -  bool changed = false;
> -
> -  FOR_EACH_BB (bb)
> -if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
> -  {
> - edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
> - if (((e->flags & EDGE_ANNOTATED) == 0)
> -&& ((bb->flags & BB_ANNOTATED) != 0))
> -  {
> -e->count = bb->count;
> -e->flags |= EDGE_ANNOTATED;
> -changed = true;
> -  }
> - else if (((e->flags & EDGE_ANNOTATED) != 0)
> -&& ((bb->flags & BB_ANNOTATED) == 0))
> -  {
> -bb->count = e->count;
> -bb->flags |= BB_ANNOTATED;
> -changed = true;
> -  }
> - else if (bb->count != e->count)
> -  {
> -e->count = bb->count = MAX (bb->count, e->count);
> -changed = true;
> -  }
> -  }
> -  return changed;
> -}
> -
>  /* If a basic block's count is known, and only one of its in/out edges' count
> is unknown, its count can be calculated.
> Meanwhile, if all of the in/out edges' counts are known, then the basic
> @@ -1115,7 +1077,7 @@ afdo_find_equiv_class (void)
> Return TRUE if any basic block/edge count is changed.  */
>
>  static bool
> -afdo_propagate_multi_edge (bool is_succ)
> +afdo_propagate_edge (bool is_succ)
>  {
>basic_block bb;
>bool changed = false;
> @@ -1281,14 +1243,10 @@ afdo_propagate (void)
>  {
>changed = false;
>
> -  if (afdo_propagate_single_edge (true))
> +  if (afdo_propagate_edge (true))
>   changed = true;
> -  if (afdo_propagate_single_edge (false))
> +  if (afdo_propagate_edge (false))
>   changed = true;
> -  if (afdo_propagate_multi_edge (true))
> - changed = true;
> -  if (afdo_propagate_multi_edge (false))
> - changed = true;
>afdo_propagate_circuit ();
>  }
>  }


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Diego Novillo
Thanks, Deaho.

One other thing that I've found on the LLVM implementation (that I'm
not sure happens in GCC): self-referential edges.  If a loop consists
of a single-basic block, the back edge will point to itself.  I
haven't been able to reproduce it with regular control flow constructs
in GCC.

When this happens, and if we've visited the block itself already
(i.e., the block has weights), I've simply set the weight of the
self-referential edge to the weight of the block itself.  Do you see
any problems with that heuristic?


Thanks.  Diego.

On Mon, Nov 25, 2013 at 12:56 PM, Dehao Chen  wrote:
> afdo_propagate_multi_edge can do everything afdo_propagate_single_edge
> does. So we refactor the code to keep only one afdo_propagate_edge
> function.
>
> Bootstrapped and passed all unittests and performance tests.
>
> OK for googlge branch?
>
> Thanks,
> Dehao
>
> Index: gcc/auto-profile.c
> ===
> --- gcc/auto-profile.c (revision 205354)
> +++ gcc/auto-profile.c (working copy)
> @@ -1069,44 +1069,6 @@ afdo_find_equiv_class (void)
>  }
>  }
>
> -/* If a baisk block only has one in/out edge, then the bb count and he
> -   edge count should be the same.
> -   IS_SUCC is true if the out edge of the basic block is examined.
> -   Return TRUE if any basic block/edge count is changed.  */
> -
> -static bool
> -afdo_propagate_single_edge (bool is_succ)
> -{
> -  basic_block bb;
> -  bool changed = false;
> -
> -  FOR_EACH_BB (bb)
> -if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
> -  {
> - edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
> - if (((e->flags & EDGE_ANNOTATED) == 0)
> -&& ((bb->flags & BB_ANNOTATED) != 0))
> -  {
> -e->count = bb->count;
> -e->flags |= EDGE_ANNOTATED;
> -changed = true;
> -  }
> - else if (((e->flags & EDGE_ANNOTATED) != 0)
> -&& ((bb->flags & BB_ANNOTATED) == 0))
> -  {
> -bb->count = e->count;
> -bb->flags |= BB_ANNOTATED;
> -changed = true;
> -  }
> - else if (bb->count != e->count)
> -  {
> -e->count = bb->count = MAX (bb->count, e->count);
> -changed = true;
> -  }
> -  }
> -  return changed;
> -}
> -
>  /* If a basic block's count is known, and only one of its in/out edges' count
> is unknown, its count can be calculated.
> Meanwhile, if all of the in/out edges' counts are known, then the basic
> @@ -1115,7 +1077,7 @@ afdo_find_equiv_class (void)
> Return TRUE if any basic block/edge count is changed.  */
>
>  static bool
> -afdo_propagate_multi_edge (bool is_succ)
> +afdo_propagate_edge (bool is_succ)
>  {
>basic_block bb;
>bool changed = false;
> @@ -1281,14 +1243,10 @@ afdo_propagate (void)
>  {
>changed = false;
>
> -  if (afdo_propagate_single_edge (true))
> +  if (afdo_propagate_edge (true))
>   changed = true;
> -  if (afdo_propagate_single_edge (false))
> +  if (afdo_propagate_edge (false))
>   changed = true;
> -  if (afdo_propagate_multi_edge (true))
> - changed = true;
> -  if (afdo_propagate_multi_edge (false))
> - changed = true;
>afdo_propagate_circuit ();
>  }
>  }


[PATCH] Preserve ubsan_types

2013-11-25 Thread Marek Polacek
When running bootstrap-ubsan I got an error in stage2, the issue was
that some Lubsan_types were wrongfully discarded -> link error.
Thus fixed.

Ubsan testsuite passes with -m32/-m64, ok for trunk?

2013-11-25  Marek Polacek  

* ubsan.c (ubsan_type_descriptor): Set DECL_PRESERVE_P on a decl.

--- gcc/ubsan.c.mp3 2013-11-25 19:04:45.221875476 +0100
+++ gcc/ubsan.c 2013-11-25 19:05:06.338954302 +0100
@@ -352,6 +352,7 @@ ubsan_type_descriptor (tree type, bool w
   DECL_ARTIFICIAL (decl) = 1;
   DECL_IGNORED_P (decl) = 1;
   DECL_EXTERNAL (decl) = 0;
+  DECL_PRESERVE_P (decl) = 1;
 
   size_t len = strlen (pretty_name);
   tree str = build_string (len + 1, pretty_name);

Marek


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Xinliang David Li
In this case the backedge will be a critical edge, which will be split by GCC.

David

On Mon, Nov 25, 2013 at 10:08 AM, Diego Novillo  wrote:
> Thanks, Deaho.
>
> One other thing that I've found on the LLVM implementation (that I'm
> not sure happens in GCC): self-referential edges.  If a loop consists
> of a single-basic block, the back edge will point to itself.  I
> haven't been able to reproduce it with regular control flow constructs
> in GCC.
>
> When this happens, and if we've visited the block itself already
> (i.e., the block has weights), I've simply set the weight of the
> self-referential edge to the weight of the block itself.  Do you see
> any problems with that heuristic?
>
>
> Thanks.  Diego.
>
> On Mon, Nov 25, 2013 at 12:56 PM, Dehao Chen  wrote:
>> afdo_propagate_multi_edge can do everything afdo_propagate_single_edge
>> does. So we refactor the code to keep only one afdo_propagate_edge
>> function.
>>
>> Bootstrapped and passed all unittests and performance tests.
>>
>> OK for googlge branch?
>>
>> Thanks,
>> Dehao
>>
>> Index: gcc/auto-profile.c
>> ===
>> --- gcc/auto-profile.c (revision 205354)
>> +++ gcc/auto-profile.c (working copy)
>> @@ -1069,44 +1069,6 @@ afdo_find_equiv_class (void)
>>  }
>>  }
>>
>> -/* If a baisk block only has one in/out edge, then the bb count and he
>> -   edge count should be the same.
>> -   IS_SUCC is true if the out edge of the basic block is examined.
>> -   Return TRUE if any basic block/edge count is changed.  */
>> -
>> -static bool
>> -afdo_propagate_single_edge (bool is_succ)
>> -{
>> -  basic_block bb;
>> -  bool changed = false;
>> -
>> -  FOR_EACH_BB (bb)
>> -if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
>> -  {
>> - edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
>> - if (((e->flags & EDGE_ANNOTATED) == 0)
>> -&& ((bb->flags & BB_ANNOTATED) != 0))
>> -  {
>> -e->count = bb->count;
>> -e->flags |= EDGE_ANNOTATED;
>> -changed = true;
>> -  }
>> - else if (((e->flags & EDGE_ANNOTATED) != 0)
>> -&& ((bb->flags & BB_ANNOTATED) == 0))
>> -  {
>> -bb->count = e->count;
>> -bb->flags |= BB_ANNOTATED;
>> -changed = true;
>> -  }
>> - else if (bb->count != e->count)
>> -  {
>> -e->count = bb->count = MAX (bb->count, e->count);
>> -changed = true;
>> -  }
>> -  }
>> -  return changed;
>> -}
>> -
>>  /* If a basic block's count is known, and only one of its in/out edges' 
>> count
>> is unknown, its count can be calculated.
>> Meanwhile, if all of the in/out edges' counts are known, then the basic
>> @@ -1115,7 +1077,7 @@ afdo_find_equiv_class (void)
>> Return TRUE if any basic block/edge count is changed.  */
>>
>>  static bool
>> -afdo_propagate_multi_edge (bool is_succ)
>> +afdo_propagate_edge (bool is_succ)
>>  {
>>basic_block bb;
>>bool changed = false;
>> @@ -1281,14 +1243,10 @@ afdo_propagate (void)
>>  {
>>changed = false;
>>
>> -  if (afdo_propagate_single_edge (true))
>> +  if (afdo_propagate_edge (true))
>>   changed = true;
>> -  if (afdo_propagate_single_edge (false))
>> +  if (afdo_propagate_edge (false))
>>   changed = true;
>> -  if (afdo_propagate_multi_edge (true))
>> - changed = true;
>> -  if (afdo_propagate_multi_edge (false))
>> - changed = true;
>>afdo_propagate_circuit ();
>>  }
>>  }


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Dehao Chen
On Mon, Nov 25, 2013 at 10:08 AM, Diego Novillo  wrote:
> Thanks, Deaho.
>
> One other thing that I've found on the LLVM implementation (that I'm
> not sure happens in GCC): self-referential edges.  If a loop consists
> of a single-basic block, the back edge will point to itself.  I
> haven't been able to reproduce it with regular control flow constructs
> in GCC.
>
> When this happens, and if we've visited the block itself already
> (i.e., the block has weights), I've simply set the weight of the
> self-referential edge to the weight of the block itself.  Do you see
> any problems with that heuristic?

In this case, the propagate_edge function will keep increasing the BB
count. We set a threshold (PARAM_AUTOFDO_MAX_PROPAGATE_ITERATIONS) to
prevent it from making BB count too large.

Dehao

>
>
> Thanks.  Diego.
>
> On Mon, Nov 25, 2013 at 12:56 PM, Dehao Chen  wrote:
>> afdo_propagate_multi_edge can do everything afdo_propagate_single_edge
>> does. So we refactor the code to keep only one afdo_propagate_edge
>> function.
>>
>> Bootstrapped and passed all unittests and performance tests.
>>
>> OK for googlge branch?
>>
>> Thanks,
>> Dehao
>>
>> Index: gcc/auto-profile.c
>> ===
>> --- gcc/auto-profile.c (revision 205354)
>> +++ gcc/auto-profile.c (working copy)
>> @@ -1069,44 +1069,6 @@ afdo_find_equiv_class (void)
>>  }
>>  }
>>
>> -/* If a baisk block only has one in/out edge, then the bb count and he
>> -   edge count should be the same.
>> -   IS_SUCC is true if the out edge of the basic block is examined.
>> -   Return TRUE if any basic block/edge count is changed.  */
>> -
>> -static bool
>> -afdo_propagate_single_edge (bool is_succ)
>> -{
>> -  basic_block bb;
>> -  bool changed = false;
>> -
>> -  FOR_EACH_BB (bb)
>> -if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
>> -  {
>> - edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
>> - if (((e->flags & EDGE_ANNOTATED) == 0)
>> -&& ((bb->flags & BB_ANNOTATED) != 0))
>> -  {
>> -e->count = bb->count;
>> -e->flags |= EDGE_ANNOTATED;
>> -changed = true;
>> -  }
>> - else if (((e->flags & EDGE_ANNOTATED) != 0)
>> -&& ((bb->flags & BB_ANNOTATED) == 0))
>> -  {
>> -bb->count = e->count;
>> -bb->flags |= BB_ANNOTATED;
>> -changed = true;
>> -  }
>> - else if (bb->count != e->count)
>> -  {
>> -e->count = bb->count = MAX (bb->count, e->count);
>> -changed = true;
>> -  }
>> -  }
>> -  return changed;
>> -}
>> -
>>  /* If a basic block's count is known, and only one of its in/out edges' 
>> count
>> is unknown, its count can be calculated.
>> Meanwhile, if all of the in/out edges' counts are known, then the basic
>> @@ -1115,7 +1077,7 @@ afdo_find_equiv_class (void)
>> Return TRUE if any basic block/edge count is changed.  */
>>
>>  static bool
>> -afdo_propagate_multi_edge (bool is_succ)
>> +afdo_propagate_edge (bool is_succ)
>>  {
>>basic_block bb;
>>bool changed = false;
>> @@ -1281,14 +1243,10 @@ afdo_propagate (void)
>>  {
>>changed = false;
>>
>> -  if (afdo_propagate_single_edge (true))
>> +  if (afdo_propagate_edge (true))
>>   changed = true;
>> -  if (afdo_propagate_single_edge (false))
>> +  if (afdo_propagate_edge (false))
>>   changed = true;
>> -  if (afdo_propagate_multi_edge (true))
>> - changed = true;
>> -  if (afdo_propagate_multi_edge (false))
>> - changed = true;
>>afdo_propagate_circuit ();
>>  }
>>  }


Re: [PATCH] Improve handling of threads which cross over the current loops header

2013-11-25 Thread Jeff Law

On 11/22/13 08:56, Richard Biener wrote:



So the issue here is we can create irreducible regions & new nested
loops.  Does just setting the header,latch fields for the current loop
handle those cases?


Yes.

Fixed via the attached patch.

Bootstrapped and regression tested on x86_64-unknown-linux-gnu.  Also 
tested by removing the requirement that the loop header contain a 
multi-way branch, bootstrapping and testing that compiler as well. 
Installed on the trunk.


Jeff


* tree-ssa-threadupdate.c (thread_through_all_blocks): Selectively
invalidate loop information.



diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index ee0c838..1a52e47 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -1579,7 +1579,6 @@ thread_through_all_blocks (bool may_peel_loop_headers)
   bitmap_iterator bi;
   bitmap threaded_blocks;
   struct loop *loop;
-  bool totally_clobbered_loops = false;
 
   /* We must know about loops in order to preserve them.  */
   gcc_assert (current_loops != NULL);
@@ -1675,9 +1674,15 @@ thread_through_all_blocks (bool may_peel_loop_headers)
/* Our path is still valid, thread it.  */
if (e->aux)
  {
-   totally_clobbered_loops
- |= thread_block ((*path)[0]->e->dest, false);
+   struct loop *loop = (*path)[0]->e->dest->loop_father;
+
+   retval |= thread_block ((*path)[0]->e->dest, false);
e->aux = NULL;
+
+   /* This jump thread likely totally scrambled this loop.
+  So arrange for it to be fixed up.  */
+   loop->header = NULL;
+   loop->latch = NULL;
  }
  }
   else
@@ -1700,32 +1705,7 @@ thread_through_all_blocks (bool may_peel_loop_headers)
   threaded_blocks = NULL;
   paths.release ();
 
-  /* If we made changes to the CFG that might have totally messed
- up the loop structure, then drop the old loop structure and
- rebuild.  */
-  if (totally_clobbered_loops)
-{
-  /* Release the current loop structures, they are totally
-clobbered at this point.  */
-  loop_optimizer_finalize ();
-  current_loops = NULL;
-
-  /* Similarly for dominance information.  */
-  free_dominance_info (CDI_DOMINATORS);
-  free_dominance_info (CDI_POST_DOMINATORS);
-
-  /* Before we can rebuild the loop structures, we need dominators,
-which requires no unreachable code.  So remove unreachable code.  */
-  delete_unreachable_blocks ();
-
-  /* Now rebuild the loop structures.  */
-  cfun->curr_properties &= ~PROP_loops;
-  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
-  cfun->curr_properties |= PROP_loops;
-  retval = 1;
-}
-
-  if (retval && current_loops)
+  if (retval)
 loops_state_set (LOOPS_NEED_FIXUP);
 
   return retval;


Re: [PATCH] OpenMP #pragma omp declare simd support (take 2)

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 06:15:50PM +0100, Jan Hubicka wrote:
> > > What's the reason you cannot defer SIMD cloning to LTRANS stage
> > > as simple IPA pass next to IPA-PTA?
> > 
> > Ok, deferring till after IPA-PTA was easy, just small ipa-cp.c changes
> > (look at the attribute rather than simd*clone* fields), passes.def and
> > had to tweak ipa_add_new_function which assumed that all new functions
> > must be definitions with gimple body.
> 
> Note that any small IPA pass at ltrans will increase peak memory use of
> ltrans copmilation by loading all function bodies into memory (since
> IPA transformations needs to be applied first).
> 
> It would be nice to avoid these enabled by default unless we have really
> good reason for it.

  bool gate () { return flag_openmp || flag_openmp_simd
|| flag_enable_cilkplus; }

isn't exactly enabled by default ;)
Anyway, all the pass needs is bodies of functions with "omp declare simd"
attribute which will be defined in the current partition, for functions
defined in other partitions all it wants to do is just clone the
DECL_EXTERNAL FUNCTION_DECL.  But it needs to be called before any
caller's of that function (whether extern/in another partition, or local
(defined in the current partition) will run through vectorization.

Jakub


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Xinliang David Li
On Mon, Nov 25, 2013 at 10:23 AM, Dehao Chen  wrote:
> On Mon, Nov 25, 2013 at 10:08 AM, Diego Novillo  wrote:
>> Thanks, Deaho.
>>
>> One other thing that I've found on the LLVM implementation (that I'm
>> not sure happens in GCC): self-referential edges.  If a loop consists
>> of a single-basic block, the back edge will point to itself.  I
>> haven't been able to reproduce it with regular control flow constructs
>> in GCC.
>>
>> When this happens, and if we've visited the block itself already
>> (i.e., the block has weights), I've simply set the weight of the
>> self-referential edge to the weight of the block itself.  Do you see
>> any problems with that heuristic?
>
> In this case, the propagate_edge function will keep increasing the BB
> count. We set a threshold (PARAM_AUTOFDO_MAX_PROPAGATE_ITERATIONS) to
> prevent it from making BB count too large.

This is the infinite loop case ..

David
>
> Dehao
>
>>
>>
>> Thanks.  Diego.
>>
>> On Mon, Nov 25, 2013 at 12:56 PM, Dehao Chen  wrote:
>>> afdo_propagate_multi_edge can do everything afdo_propagate_single_edge
>>> does. So we refactor the code to keep only one afdo_propagate_edge
>>> function.
>>>
>>> Bootstrapped and passed all unittests and performance tests.
>>>
>>> OK for googlge branch?
>>>
>>> Thanks,
>>> Dehao
>>>
>>> Index: gcc/auto-profile.c
>>> ===
>>> --- gcc/auto-profile.c (revision 205354)
>>> +++ gcc/auto-profile.c (working copy)
>>> @@ -1069,44 +1069,6 @@ afdo_find_equiv_class (void)
>>>  }
>>>  }
>>>
>>> -/* If a baisk block only has one in/out edge, then the bb count and he
>>> -   edge count should be the same.
>>> -   IS_SUCC is true if the out edge of the basic block is examined.
>>> -   Return TRUE if any basic block/edge count is changed.  */
>>> -
>>> -static bool
>>> -afdo_propagate_single_edge (bool is_succ)
>>> -{
>>> -  basic_block bb;
>>> -  bool changed = false;
>>> -
>>> -  FOR_EACH_BB (bb)
>>> -if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
>>> -  {
>>> - edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
>>> - if (((e->flags & EDGE_ANNOTATED) == 0)
>>> -&& ((bb->flags & BB_ANNOTATED) != 0))
>>> -  {
>>> -e->count = bb->count;
>>> -e->flags |= EDGE_ANNOTATED;
>>> -changed = true;
>>> -  }
>>> - else if (((e->flags & EDGE_ANNOTATED) != 0)
>>> -&& ((bb->flags & BB_ANNOTATED) == 0))
>>> -  {
>>> -bb->count = e->count;
>>> -bb->flags |= BB_ANNOTATED;
>>> -changed = true;
>>> -  }
>>> - else if (bb->count != e->count)
>>> -  {
>>> -e->count = bb->count = MAX (bb->count, e->count);
>>> -changed = true;
>>> -  }
>>> -  }
>>> -  return changed;
>>> -}
>>> -
>>>  /* If a basic block's count is known, and only one of its in/out edges' 
>>> count
>>> is unknown, its count can be calculated.
>>> Meanwhile, if all of the in/out edges' counts are known, then the basic
>>> @@ -1115,7 +1077,7 @@ afdo_find_equiv_class (void)
>>> Return TRUE if any basic block/edge count is changed.  */
>>>
>>>  static bool
>>> -afdo_propagate_multi_edge (bool is_succ)
>>> +afdo_propagate_edge (bool is_succ)
>>>  {
>>>basic_block bb;
>>>bool changed = false;
>>> @@ -1281,14 +1243,10 @@ afdo_propagate (void)
>>>  {
>>>changed = false;
>>>
>>> -  if (afdo_propagate_single_edge (true))
>>> +  if (afdo_propagate_edge (true))
>>>   changed = true;
>>> -  if (afdo_propagate_single_edge (false))
>>> +  if (afdo_propagate_edge (false))
>>>   changed = true;
>>> -  if (afdo_propagate_multi_edge (true))
>>> - changed = true;
>>> -  if (afdo_propagate_multi_edge (false))
>>> - changed = true;
>>>afdo_propagate_circuit ();
>>>  }
>>>  }


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Diego Novillo
On Mon, Nov 25, 2013 at 1:22 PM, Xinliang David Li  wrote:
> In this case the backedge will be a critical edge, which will be split by GCC.

Right. So, if I split it, I will reach essentially the same
conclusion, I think. The new block will get the original block's
weight, which (in turn) will translate into the (now only outgoing)
edge.


Diego.


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Dehao Chen
On Mon, Nov 25, 2013 at 10:26 AM, Diego Novillo  wrote:
> On Mon, Nov 25, 2013 at 1:22 PM, Xinliang David Li  wrote:
>> In this case the backedge will be a critical edge, which will be split by 
>> GCC.
>
> Right. So, if I split it, I will reach essentially the same
> conclusion, I think. The new block will get the original block's
> weight, which (in turn) will translate into the (now only outgoing)
> edge.

Why do you want to set the back edge count as the BB count? I think
the right formula is: count(back_edge) = count(BB) -
count(entry_edge), in which entry_edge is the edge that enters the
loop.

Dehao

>
>
> Diego.


Re: [GOOGLE] Refactor the profile propagation for AutoFDO

2013-11-25 Thread Diego Novillo
On Mon, Nov 25, 2013 at 1:30 PM, Dehao Chen  wrote:
> On Mon, Nov 25, 2013 at 10:26 AM, Diego Novillo  wrote:
>> On Mon, Nov 25, 2013 at 1:22 PM, Xinliang David Li  
>> wrote:
>>> In this case the backedge will be a critical edge, which will be split by 
>>> GCC.
>>
>> Right. So, if I split it, I will reach essentially the same
>> conclusion, I think. The new block will get the original block's
>> weight, which (in turn) will translate into the (now only outgoing)
>> edge.
>
> Why do you want to set the back edge count as the BB count? I think
> the right formula is: count(back_edge) = count(BB) -
> count(entry_edge), in which entry_edge is the edge that enters the
> loop.

Ah, yeah, you're right. The CFG I was looking at had an incoming
weight of 0 (the code snippet spends 99.9% of its runtime in that
loop.


Thanks.  Diego.


Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

2013-11-25 Thread Jeff Law

On 11/24/13 00:30, Wei Mi wrote:

Sorry about the problem.

For the failed testcase, it was compiled using -fmodulo-sched.
modulo-sched phase set SCHED_GROUP_P of a jump insn to be true, which
means the jump insn should be scheduled with prev insn as a group.
When modulo scheduling is finished, the flag of SCHED_GROUP_P is not
cleaned up. After that, pass_jump2 phase split the bb and move the
prev insn to another bb. Then pass_sched2 see the flag and mistakenly
try to bind the jump insn with a code label.

I am thinking other cases setting SCHED_GROUP_P should have the same
problem because SCHED_GROUP_P is not cleaned up after scheduling is
done. The flag may become inconsistent after some optimizations and
may cause problem if it is used by later scheduling passes. I don't
know why similar problem was never exposed before.

The fix is to simply cleanup SCHED_GROUP_P flag in sched_finish.
I think this is showing up because this is the first time we have used 
SCHED_GROUP_P in cases where we merely want to keep two instructions 
consecutive vs cases where we are required to keep certain instructions 
consecutive.  For example, all the RTL passes already know they need to 
keep a cc0 setter and cc0 user consecutive on a HAVE_cc0 target.


In the latter case passes should already be doing what is necessary to 
keep those instructions consecutive.  In the former case, we'd have to 
audit & fix passes to honor the desire to keep certain instructions 
consecutive.







bootstrap is ok. regression test is going on. Is it ok if regression passes?

Thanks,
Wei.

2013-11-23  Wei Mi  

 PR rtl-optimization/59020
 * haifa-sched.c (cleanup_sched_group): New function.
 (sched_finish): Call cleanup_sched_group to cleanup SCHED_GROUP_P.

2013-11-23  Wei Mi  
 PR rtl-optimization/59020
 * testsuite/gcc.dg/pr59020.c (void f):
I'll note you're doing an extra pass over all the RTL here.   Is there 
any clean way you can clean SCHED_GROUP_P without that extra pass over 
the RTL?  Perhaps when the group actually gets scheduled?


jeff



Re: [PATCH, MPX, 2/X] Pointers Checker [14/25] Function splitting

2013-11-25 Thread Jeff Law

On 11/25/13 04:12, Ilya Enkovich wrote:


I'll prepare a patch to remove committed patches.  But the first part
of series added new ISA extension support.  It is independent from the
checker.  Should it be OK to keep ISA in trunk?

I think this can/should reasonably be Uros's call.

I'm sorry we didn't get this moved far enough to make it into GCC 4.9; 
if I had thought we were going run into these issues I wouldn't have 
suggested you check in the preparatory patches only to have to back them 
out later.


Jeff



Re: [PATCH GCC]Pick up more address lowering cases for ivopt and tree-affine.c

2013-11-25 Thread Jeff Law

On 11/25/13 02:22, bin.cheng wrote:

Hi,
I previously committed two patches lowering complex address expression for
IVOPT at http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00546.html and
http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01103.html
When I bootstrapping GCC I found there were some peculiar cases like
&MEM[ptr+CST] + , which should be handled too.  This patch consists
below two changes:

1) change in alloc_iv:
Original code lowers top level complex address expressions like
&MEM[ptr+off].  The patch relaxes check condition in order to lower
expressions like &MEM[ptr+off] + xxx, just as the BASE from below dump:
use 2
   generic
   in statement _595 = &MEM[(void *)&this_prg + 36B] + _594;

   at position
   type struct gcov_bucket_type *
   base (struct gcov_bucket_type *) &MEM[(void *)&this_prg + 36B] +
(sizetype) ((unsigned int) (src_i_683 + -1) * 20)
   step 4294967276
   base object (void *) &this_prg
   related candidates

2) change in tree_to_aff_combination:
The function get_inner_reference returns "&MEM[ptr+off]" as the core for
input like the memory ADDRESS in below dump:
use 2
   address
   in statement _59 = MEM[(const struct gcov_ctr_summary *)summary_22(D) +
4B].histogram[h_ix_111].min_value;

   at position MEM[(const struct gcov_ctr_summary *)summary_22(D) +
4B].histogram[h_ix_111].min_value
   type const gcov_type *
   base (const gcov_type *) &MEM[(const struct gcov_ctr_summary
*)summary_22(D) + 4B] + 36
   step 20
   base object (void *) summary_22(D)
   related candidates

Which can be further reduced into something like "summary_22(D) + 40B".
This change is necessary for the first one, because I am using
tree_to_aff_combination rather than get_inner_reference_aff now.

Bootstrap and test on x86/x86_64/arm.  Is it OK?

Thanks.
bin

2013-11-25  Bin Cheng  

* tree-ssa-loop-ivopts.c (contain_complex_addr_expr): New.
(alloc_iv): Lower more cases by calling contain_complex_addr_expr
and tree_to_aff_combination.
* tree-affine.c (tree_to_aff_combination): Handle &MEM[ptr+CST]
in core part of complex reference.

gcc/testsuite/ChangeLog
2013-11-25  Bin Cheng  

* gcc.dg/tree-ssa/ivopts-lower_base.c: New test.

Unless there's a PR for this problem, I think this needs to wait.

jeff




Re: [PATCH] OpenMP #pragma omp declare simd support (take 2)

2013-11-25 Thread Jan Hubicka
> On Mon, Nov 25, 2013 at 06:15:50PM +0100, Jan Hubicka wrote:
> > > > What's the reason you cannot defer SIMD cloning to LTRANS stage
> > > > as simple IPA pass next to IPA-PTA?
> > > 
> > > Ok, deferring till after IPA-PTA was easy, just small ipa-cp.c changes
> > > (look at the attribute rather than simd*clone* fields), passes.def and
> > > had to tweak ipa_add_new_function which assumed that all new functions
> > > must be definitions with gimple body.
> > 
> > Note that any small IPA pass at ltrans will increase peak memory use of
> > ltrans copmilation by loading all function bodies into memory (since
> > IPA transformations needs to be applied first).
> > 
> > It would be nice to avoid these enabled by default unless we have really
> > good reason for it.
> 
>   bool gate () { return flag_openmp || flag_openmp_simd
> || flag_enable_cilkplus; }
> 
> isn't exactly enabled by default ;)

OK :))
> Anyway, all the pass needs is bodies of functions with "omp declare simd"
> attribute which will be defined in the current partition, for functions
> defined in other partitions all it wants to do is just clone the
> DECL_EXTERNAL FUNCTION_DECL.  But it needs to be called before any
> caller's of that function (whether extern/in another partition, or local
> (defined in the current partition) will run through vectorization.

Yep, we will need to add an interface for late passes that needs to look
only into specific bodies. (in fact, I already added cgraph_get_body
and perhaps I can just integrate IPA transformation into that and make late IPA 
passes
to use them)

Honza
> 
>   Jakub


Re: [PATCH] OpenMP #pragma omp declare simd support (take 2)

2013-11-25 Thread Jakub Jelinek
On Mon, Nov 25, 2013 at 07:48:34PM +0100, Jan Hubicka wrote:
> > isn't exactly enabled by default ;)
> 
> OK :))
> > Anyway, all the pass needs is bodies of functions with "omp declare simd"
> > attribute which will be defined in the current partition, for functions
> > defined in other partitions all it wants to do is just clone the
> > DECL_EXTERNAL FUNCTION_DECL.  But it needs to be called before any
> > caller's of that function (whether extern/in another partition, or local
> > (defined in the current partition) will run through vectorization.
> 
> Yep, we will need to add an interface for late passes that needs to look
> only into specific bodies. (in fact, I already added cgraph_get_body
> and perhaps I can just integrate IPA transformation into that and make late 
> IPA passes
> to use them)

Note that while the late IPA pass for simd clones is enabled say for
-fopenmp, even if it needed the bodies that wouldn't be in current
partition, it wants to look only at a fraction of all functions, so loading
the bodies just in case for everything would be tons of unnecessary work.
If IPA-PTA needs bodies of everything always, perhaps it could call
cgraph_get_body at the beginning of handling each function?

Jakub


Re: [PATCH] OpenMP #pragma omp declare simd support (take 2)

2013-11-25 Thread Jan Hubicka
> On Mon, Nov 25, 2013 at 07:48:34PM +0100, Jan Hubicka wrote:
> > > isn't exactly enabled by default ;)
> > 
> > OK :))
> > > Anyway, all the pass needs is bodies of functions with "omp declare simd"
> > > attribute which will be defined in the current partition, for functions
> > > defined in other partitions all it wants to do is just clone the
> > > DECL_EXTERNAL FUNCTION_DECL.  But it needs to be called before any
> > > caller's of that function (whether extern/in another partition, or local
> > > (defined in the current partition) will run through vectorization.
> > 
> > Yep, we will need to add an interface for late passes that needs to look
> > only into specific bodies. (in fact, I already added cgraph_get_body
> > and perhaps I can just integrate IPA transformation into that and make late 
> > IPA passes
> > to use them)
> 
> Note that while the late IPA pass for simd clones is enabled say for
> -fopenmp, even if it needed the bodies that wouldn't be in current
> partition, it wants to look only at a fraction of all functions, so loading
> the bodies just in case for everything would be tons of unnecessary work.

Indeed.
> If IPA-PTA needs bodies of everything always, perhaps it could call
> cgraph_get_body at the beginning of handling each function?

Well, ipa-pta is mostlly off, so I did not care about it (yet).  I will need to
re-arrange the way IPA transforms are executed - currently cgraph_get_body only
loads the body. The actual transformations are executed by pass manager either
before first simple IPA pass on all function bodies (this path we go only with
-fipa-pta for now) or before first local pass on the current body (this is what
happens by default).  Because funtion bodies are removed after compiling, we 
don't
really load whole partition at once this way.

Getting cgraph_get_body to do the right thing is not terribly difficult, but it
will need a bit of cooperation with the way inliner is hooked into it.  I can
do it next week quite easilly.  Given that debug info is major memory sink
of ltrans (with -g), this is not that critical for 4.9, so perhaps it can also 
wait for
next stage1.

Honza
> 
>   Jakub


Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

2013-11-25 Thread Wei Mi
On Mon, Nov 25, 2013 at 10:36 AM, Jeff Law  wrote:
> On 11/24/13 00:30, Wei Mi wrote:
>>
>> Sorry about the problem.
>>
>> For the failed testcase, it was compiled using -fmodulo-sched.
>> modulo-sched phase set SCHED_GROUP_P of a jump insn to be true, which
>> means the jump insn should be scheduled with prev insn as a group.
>> When modulo scheduling is finished, the flag of SCHED_GROUP_P is not
>> cleaned up. After that, pass_jump2 phase split the bb and move the
>> prev insn to another bb. Then pass_sched2 see the flag and mistakenly
>> try to bind the jump insn with a code label.
>>
>> I am thinking other cases setting SCHED_GROUP_P should have the same
>> problem because SCHED_GROUP_P is not cleaned up after scheduling is
>> done. The flag may become inconsistent after some optimizations and
>> may cause problem if it is used by later scheduling passes. I don't
>> know why similar problem was never exposed before.
>>
>> The fix is to simply cleanup SCHED_GROUP_P flag in sched_finish.
>
> I think this is showing up because this is the first time we have used
> SCHED_GROUP_P in cases where we merely want to keep two instructions
> consecutive vs cases where we are required to keep certain instructions
> consecutive.  For example, all the RTL passes already know they need to keep
> a cc0 setter and cc0 user consecutive on a HAVE_cc0 target.
>
> In the latter case passes should already be doing what is necessary to keep
> those instructions consecutive.  In the former case, we'd have to audit &
> fix passes to honor the desire to keep certain instructions consecutive.
>

I see. Thanks for showing me the reason.

>>
>> bootstrap is ok. regression test is going on. Is it ok if regression
>> passes?
>>
>> Thanks,
>> Wei.
>>
>> 2013-11-23  Wei Mi  
>>
>>  PR rtl-optimization/59020
>>  * haifa-sched.c (cleanup_sched_group): New function.
>>  (sched_finish): Call cleanup_sched_group to cleanup
>> SCHED_GROUP_P.
>>
>> 2013-11-23  Wei Mi  
>>  PR rtl-optimization/59020
>>  * testsuite/gcc.dg/pr59020.c (void f):
>
> I'll note you're doing an extra pass over all the RTL here.   Is there any
> clean way you can clean SCHED_GROUP_P without that extra pass over the RTL?
> Perhaps when the group actually gets scheduled?
>
> jeff
>

With your help to understand that sched group will not be broken by
other passes in other cases, I can cleanup SCHED_GROUP_P for
macrofusion only by checking every condjump insn which is at the end
of BB. Then the cost will be in the same scale with bb nums. Do you
think it is ok?

Thanks,
Wei.

2013-11-25  Wei Mi  

PR rtl-optimization/59020
* haifa-sched.c (cleanup_sched_group): New function.
(sched_finish): Call cleanup_sched_group to cleanup SCHED_GROUP_P.

2013-11-25  Wei Mi  
PR rtl-optimization/59020
* testsuite/gcc.dg/pr59020.c (void f):

Index: haifa-sched.c
===
--- haifa-sched.c   (revision 204923)
+++ haifa-sched.c   (working copy)
@@ -6598,6 +6598,25 @@ group_insns_for_macro_fusion ()
 try_group_insn (BB_END (bb));
 }

+/* Cleanup SCHED_GROUP_P after scheduling is done. This is necessary because
+   bb may be changed by other optimizations and the flag from last scheduling
+   may become invalid. If later scheduler see the flag generated from last
+   scheduling, it may produces incorrect result.  */
+
+static void
+cleanup_sched_group ()
+{
+  basic_block bb;
+  rtx insn;
+
+  FOR_EACH_BB (bb)
+{
+  insn = BB_END (bb);
+  if (INSN_P (insn) && SCHED_GROUP_P (insn))
+   SCHED_GROUP_P (insn) = 0;
+}
+}
+
 /* Initialize some global state for the scheduler.  This function works
with the common data shared between all the schedulers.  It is called
from the scheduler specific initialization routine.  */
@@ -6841,6 +6860,8 @@ sched_finish (void)
 }
   free (curr_state);

+  cleanup_sched_group ();
+
   if (targetm.sched.finish_global)
 targetm.sched.finish_global (sched_dump, sched_verbose);

Index: testsuite/gcc.dg/pr59020.c
===
--- testsuite/gcc.dg/pr59020.c  (revision 0)
+++ testsuite/gcc.dg/pr59020.c  (revision 0)
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/59020 */
+
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fmodulo-sched -fno-inline -march=corei7" } */
+
+int a, b, d;
+unsigned c;
+
+void f()
+{
+  unsigned q;
+  for(; a; a++)
+if(((c %= d && 1) ? : 1) & 1)
+  for(; b; q++);
+}


Re: [PATCH] Conditional count update for fast coverage test in multi-threaded programs

2013-11-25 Thread Rong Xu
On Mon, Nov 25, 2013 at 2:11 AM, Richard Biener
 wrote:
> On Fri, Nov 22, 2013 at 10:49 PM, Rong Xu  wrote:
>> On Fri, Nov 22, 2013 at 4:03 AM, Richard Biener
>>  wrote:
>>> On Fri, Nov 22, 2013 at 4:51 AM, Rong Xu  wrote:
 Hi,

 This patch injects a condition into the instrumented code for edge
 counter update. The counter value will not be updated after reaching
 value 1.

 The feature is under a new parameter --param=coverage-exec_once.
 Default is disabled and setting to 1 to enable.

 This extra check usually slows the program down. For SPEC 2006
 benchmarks (all single thread programs), we usually see around 20%-35%
 slow down in -O2 coverage build. This feature, however, is expected to
 improve the coverage run speed for multi-threaded programs, because
 there virtually no data race and false sharing in updating counters.
 The improvement can be significant for highly threaded programs -- we
 are seeing 7x speedup in coverage test run for some non-trivial google
 applications.

 Tested with bootstrap.
>>>
>>> Err - why not simply emit
>>>
>>>   counter = 1
>>>
>>> for the counter update itself with that --param (I don't like a --param
>>> for this either).
>>>
>>> I assume that CPUs can avoid data-races and false sharing for
>>> non-changing accesses?
>>>
>>
>> I'm not aware of any CPU having this feature. I think a write to the
>> shared cache line to invalidate all the shared copies. I cannot find
>> any reference on checking the value of the write. Do you have any
>> pointer to the feature?
>
> I don't have any pointer - but I remember seeing this in the context
> of atomics thus it may be only in the context of using a xchg
> or cmpxchg instruction.  Which would make it non-portable to
> some extent (if you don't want to use atomic builtins here).
>

cmpxchg should work here -- it's a conditional write so the data race
/false sharing can be avoided.
I'm comparing the performance b/w explicit branch vs cmpxchg and will
report back.

-Rong


> Richard.
>
>> I just tested this implementation vs. simply setting to 1, using
>> google search as the benchmark.
>> This one is 4.5x faster. The test was done on Intel Westmere systems.
>>
>> I can change the parameter to an option.
>>
>> -Rong
>>
>>> Richard.
>>>
 Thanks,

 -Rong


Re: [PATCH] Preserve ubsan_types

2013-11-25 Thread Marek Polacek
On Mon, Nov 25, 2013 at 07:20:33PM +0100, Marek Polacek wrote:
> When running bootstrap-ubsan I got an error in stage2, the issue was
> that some Lubsan_types were wrongfully discarded -> link error.
> Thus fixed.
> 
> Ubsan testsuite passes with -m32/-m64, ok for trunk?
> 
> 2013-11-25  Marek Polacek  
> 
>   * ubsan.c (ubsan_type_descriptor): Set DECL_PRESERVE_P on a decl.

Actually, scratch this, we'll need a better fix.  Sorry.

Marek


Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

2013-11-25 Thread Jeff Law

On 11/25/13 12:16, Wei Mi wrote:


I'll note you're doing an extra pass over all the RTL here.   Is there any
clean way you can clean SCHED_GROUP_P without that extra pass over the RTL?
Perhaps when the group actually gets scheduled?

jeff



With your help to understand that sched group will not be broken by
other passes in other cases, I can cleanup SCHED_GROUP_P for
macrofusion only by checking every condjump insn which is at the end
of BB. Then the cost will be in the same scale with bb nums. Do you
think it is ok?

Thanks,
Wei.

2013-11-25  Wei Mi  

 PR rtl-optimization/59020
 * haifa-sched.c (cleanup_sched_group): New function.
 (sched_finish): Call cleanup_sched_group to cleanup SCHED_GROUP_P.

2013-11-25  Wei Mi  
 PR rtl-optimization/59020
 * testsuite/gcc.dg/pr59020.c (void f):
But there's nothing that requires the SCHED_GROUP_P to be at the end of 
a block.  The cc0-setter/cc0-user case was just an example.  Another 
example would be groups created around call insns on small register 
class machines.


ISTM that when an insn moves from the ready list to back to the main 
insn chain, that you can just clear SCHED_GROUP_P at that time.  Is that 
not the case?


jeff



Re: [PATCH] Fix checking of gimple types

2013-11-25 Thread Jeff Law

On 11/25/13 08:35, David Malcolm wrote:


I'm not a fan of these "_layout" names, but I'm not sure what better to
call them. Perhaps:
GSS_OMP_PARALLEL_LAYOUT -> GSS_OMP_WITH_CLAUSES_CHILD_FN_DATA_ARG
GSS_OMP_SINGLE_LAYOUT   -> GSS_OMP_WITH_CLAUSES
GSS_OMP_ATOMIC_STORE_LAYOUT -> GSS_OMP_WITHOUT_SEQ_WITH_VAL
with analogous names for the corresponding structs.
I think the _layout names are fine for now.  We might want change them 
to be more descriptive at a later date.




OK for trunk?

Yes.



Sorry again for breaking this.
It happens.  I suspect you'll look beyond the sharing of data structures 
to build a class hierarchy next time :-)


Thanks for quickly addressing this.

Jeff



Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

2013-11-25 Thread Wei Mi
On Mon, Nov 25, 2013 at 11:25 AM, Jeff Law  wrote:
> On 11/25/13 12:16, Wei Mi wrote:
>>>
>>>
>>> I'll note you're doing an extra pass over all the RTL here.   Is there
>>> any
>>> clean way you can clean SCHED_GROUP_P without that extra pass over the
>>> RTL?
>>> Perhaps when the group actually gets scheduled?
>>>
>>> jeff
>>>
>>
>> With your help to understand that sched group will not be broken by
>> other passes in other cases, I can cleanup SCHED_GROUP_P for
>> macrofusion only by checking every condjump insn which is at the end
>> of BB. Then the cost will be in the same scale with bb nums. Do you
>> think it is ok?
>>
>> Thanks,
>> Wei.
>>
>> 2013-11-25  Wei Mi  
>>
>>  PR rtl-optimization/59020
>>  * haifa-sched.c (cleanup_sched_group): New function.
>>  (sched_finish): Call cleanup_sched_group to cleanup
>> SCHED_GROUP_P.
>>
>> 2013-11-25  Wei Mi  
>>  PR rtl-optimization/59020
>>  * testsuite/gcc.dg/pr59020.c (void f):
>
> But there's nothing that requires the SCHED_GROUP_P to be at the end of a
> block.  The cc0-setter/cc0-user case was just an example.  Another example
> would be groups created around call insns on small register class machines.
>

Doing the cleanup at the end of BB could ensure all the groups
inserted for macrofusion will be cleaned. For groups not at the end of
a block, no matter whether they are cleaned up or not, nothing will
happen because other passes will not mess up those groups -- you said
cc0-setter/cc0-user was such a case. Is it call group a different
case?

> ISTM that when an insn moves from the ready list to back to the main insn
> chain, that you can just clear SCHED_GROUP_P at that time.  Is that not the
> case?
>
> jeff
>

For sched1 and sched2, we can do that. Actually, I find it has been
done in move_insn when commit_schedule. But for modulo scheduling, I
havn't found a good place to do it.

Thanks,
Wei.


Re: wide-int, C++ front end

2013-11-25 Thread Kenneth Zadeck

fixed on the wide-int branch 205363.
On 11/23/2013 09:00 PM, Jason Merrill wrote:

On 11/23/2013 02:20 PM, Mike Stump wrote:

@@ -2605,8 +2606,7 @@ cp_tree_equal (tree t1, tree t2)
switch (code1)
  {
  case INTEGER_CST:
-  return TREE_INT_CST_LOW (t1) == TREE_INT_CST_LOW (t2)
-&& TREE_INT_CST_HIGH (t1) == TREE_INT_CST_HIGH (t2);
+  return wi::to_widest (t1) == wi::to_widest (t2);


Why not use wi::eq_p like you do in the C front end?

Jason



Index: dwarf2out.c
===
--- dwarf2out.c	(revision 205360)
+++ dwarf2out.c	(working copy)
@@ -12880,10 +12880,15 @@ mem_loc_descriptor (rtx rtl, enum machin
 	{
 	  dw_die_ref type_die;
 
-	  /* Note that a CONST_DOUBLE rtx could represent either an integer
-	 or a floating-point constant.  A CONST_DOUBLE is used whenever
-	 the constant requires more than one word in order to be
-	 adequately represented.  We output CONST_DOUBLEs as blocks.  */
+	  /* Note that if TARGET_SUPPORTS_WIDE_INT == 0, a
+	 CONST_DOUBLE rtx could represent either an large integer
+	 or a floating-point constant.  If
+	 TARGET_SUPPORTS_WIDE_INT != 0, the value is always a
+	 floating point constant.
+
+	 When it is an integer, a CONST_DOUBLE is used whenever
+	 the constant requires 2 HWIs to be adequately
+	 represented.  We output CONST_DOUBLEs as blocks.  */
 	  if (mode == VOIDmode
 	  || (GET_MODE (rtl) == VOIDmode
 		  && GET_MODE_BITSIZE (mode) != HOST_BITS_PER_DOUBLE_INT))


Re: wide-int, rs6000

2013-11-25 Thread David Edelsohn
Thanks for doing this conversion work.  A few questions and comments:

1) Because rs6000 is one of the few ports that was completely
converted to wide-int instead of simply accommodating wide-int, what
is the compile-time performance impact of this conversion?

2) non_logical_cint_operand changed const_double to const_wide_int, it
did not add the additional CODE. Mike explained why in a private
conversation, but the ChangeLog should be corrected.

3) altivec_resolve_overloaded_builtin, both hunks should be converted
the same way, using tree_fits_uhwi_p

-   && TREE_CODE (arg2) == INTEGER_CST
-   && TREE_INT_CST_HIGH (arg2) == 0
-   && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
+   && tree_fits_uhwi_p (arg2)
+   && wi::ltu_p (arg2, 2))

4) easy_altivec_constant, the comment about 32 bit should be removed
because wide-int should remove the dependency on 32 bit vs 64 bit host
wide int.

5) rs6000_aggregate_candidate, is this change correct for Ada and
non-constant type size?

The rest looks good. I'd like to see the revised patch before approving.

Thanks, David



On Sat, Nov 23, 2013 at 2:22 PM, Mike Stump  wrote:
> Richi has asked the we break the wide-int patch so that the individual port 
> and front end maintainers can review their parts without have to go through 
> the entire patch.This patch covers the rs6000 port.
>
> Ok?
>


Re: wide-int, C++ front end

2013-11-25 Thread Richard Sandiford
Jason Merrill  writes:
> On 11/23/2013 02:20 PM, Mike Stump wrote:
>> @@ -2605,8 +2606,7 @@ cp_tree_equal (tree t1, tree t2)
>> switch (code1)
>>   {
>>   case INTEGER_CST:
>> -  return TREE_INT_CST_LOW (t1) == TREE_INT_CST_LOW (t2)
>> -&& TREE_INT_CST_HIGH (t1) == TREE_INT_CST_HIGH (t2);
>> +  return wi::to_widest (t1) == wi::to_widest (t2);
>
> Why not use wi::eq_p like you do in the C front end?

Thanks for noticing the difference.  I think c_tree_equal should change
to use to_widest too.

wi::eq_p (t1, t2) asserts that t1 and t2 are the same precision and
ignores signedness; it just tests whether they are the same bitstring.
wi::to_widest (t1) == wi::to_widest (t2) compares them as logical numbers,
taking sign into account and allowing different types.  I think that's
what the original TREE_INT_CST_LOW and TREE_INT_CST_HIGH tests did too.

Richard


Re: wide-int, dwarf

2013-11-25 Thread Kenneth Zadeck
I replied to the wrong email when i sent the first version of this 
emal.   sorry.This was the comment that was addressed by this fix.


fixed on the wide-int branch 205363.



On 11/24/2013 08:43 AM, Jason Merrill wrote:

On 11/23/2013 09:55 PM, Kenneth Zadeck wrote:

On 11/23/2013 08:47 PM, Jason Merrill wrote:



So if the target supports wide ints you'll always use the scalar float
code?  Why does that do the right thing?



if TARGET_SUPPORTS_WIDE_INT != 0, then integers are NEVER placed in
const-doubles.  large integers go into CONST_WIDE_INTS  so this case
always represents a floating point constant and nothing else. So yes.

I think that you could argue that the comment above this frag needs to
be rewritten because it is wrong if TARGET_SUPPORTS_WIDE_INT != 0.


Please.  OK with that change.

Jason




Index: dwarf2out.c
===
--- dwarf2out.c	(revision 205360)
+++ dwarf2out.c	(working copy)
@@ -12880,10 +12880,15 @@ mem_loc_descriptor (rtx rtl, enum machin
 	{
 	  dw_die_ref type_die;
 
-	  /* Note that a CONST_DOUBLE rtx could represent either an integer
-	 or a floating-point constant.  A CONST_DOUBLE is used whenever
-	 the constant requires more than one word in order to be
-	 adequately represented.  We output CONST_DOUBLEs as blocks.  */
+	  /* Note that if TARGET_SUPPORTS_WIDE_INT == 0, a
+	 CONST_DOUBLE rtx could represent either an large integer
+	 or a floating-point constant.  If
+	 TARGET_SUPPORTS_WIDE_INT != 0, the value is always a
+	 floating point constant.
+
+	 When it is an integer, a CONST_DOUBLE is used whenever
+	 the constant requires 2 HWIs to be adequately
+	 represented.  We output CONST_DOUBLEs as blocks.  */
 	  if (mode == VOIDmode
 	  || (GET_MODE (rtl) == VOIDmode
 		  && GET_MODE_BITSIZE (mode) != HOST_BITS_PER_DOUBLE_INT))


Re: [patch, mips] Fix for PR target/56942

2013-11-25 Thread Richard Sandiford
Steven Bosscher  writes:
> On Sat, 2013-04-27 at 08:56 +0100, Richard Sandiford wrote:
>> Yeah, I think so.  If "=>" mean "accepts more than", then there used
>> to be a nice total order:
>>
>>  next_insn
>>   => next_nonnote_insn
>>   => next_real_insn
>>   => next_active_insn
>
>
> Hi Richard,
>
> This (plus inevitable fixes in back-end code, tbd) is the final step
> in making JUMP_TABLE_DATA a non-active insn, as discussed way back in
> April. I'm sorry I haven't had the time to address this before the end
> of stage 1. I'm posting this now to make sure the patch isn't lost. To
> be queued for the next stage 1...

Thanks, looks good to me.

Richard


[PATCH, libgcc]: Avoid "left shift count >= width of type" warnings in soft-fp code

2013-11-25 Thread Uros Bizjak
Hello!

Attached patch removes "left shift count >= width of type" warnings in
soft-fp code. The patch implements the same approach - checking of
rsize against _FP_W_TYPE_SIZE - as is implemented in corresponding
FP_FRAC_DISASSEMBLE_{2,4} macros a couple of lines below.

This patch removes all remaining soft-fp warnings from the build.

2013-11-25  Uros Bizjak  

* soft-fp/op-2.h (_FP_FRAC_ASSEMBLE_2): Check rsize against
_FP_W_TYPE_SIZE to avoid "left shift count >= width of type" warning.
* soft-fp/op-4.h (_FP_FRAC_ASSEMBLE_4): Ditto.

Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}.

OK for mainline?

Uros.
Index: soft-fp/op-2.h
===
--- soft-fp/op-2.h  (revision 205357)
+++ soft-fp/op-2.h  (working copy)
@@ -627,13 +627,13 @@
  * No shifting or overflow handled here.
  */
 
-#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)   \
-  (void) ((rsize <= _FP_W_TYPE_SIZE)   \
- ? ({ r = X##_f0; })   \
- : ({  \
- r = X##_f1;   \
- r <<= _FP_W_TYPE_SIZE;\
- r += X##_f0;  \
+#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)   \
+  (void) ((rsize <= _FP_W_TYPE_SIZE)   \
+ ? ({ r = X##_f0; })   \
+ : ({  \
+ r = X##_f1;   \
+ r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \
+ r += X##_f0;  \
}))
 
 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)\
Index: soft-fp/op-4.h
===
--- soft-fp/op-4.h  (revision 205357)
+++ soft-fp/op-4.h  (working copy)
@@ -709,7 +709,7 @@
   else if (rsize <= 2*_FP_W_TYPE_SIZE) \
{   \
  r = X##_f[1]; \
- r <<= _FP_W_TYPE_SIZE;\
+ r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);\
  r += X##_f[0];\
}   \
   else \
@@ -717,11 +717,11 @@
  /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
  /* and int == 4words as a single case. */ \
  r = X##_f[3]; \
- r <<= _FP_W_TYPE_SIZE;\
+ r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);\
  r += X##_f[2];\
- r <<= _FP_W_TYPE_SIZE;\
+ r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);\
  r += X##_f[1];\
- r <<= _FP_W_TYPE_SIZE;\
+ r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE);\
  r += X##_f[0];\
}   \
 }  \


Remove unordered containers iterators default initialization

2013-11-25 Thread François Dumont

Hi

Following N3644 discussion thread here is a patch proposal to 
remove default zero-initialization of unordered containers iterator. I 
also took the time to remove default zero-init of nodes _M_nxt pointer.


2013-11-25  François Dumont  

* include/bits/hashtable_policy.h (_Hash_node_base): Default
default constructor.
(_Node_iterator): Likewise.
(_Node_const_iterator): Likewise.
* include/bits/hashtable.h: Adapt.

Tested under Linux x86_64.

Ok to commit ?

François



Index: include/bits/hashtable_policy.h
===
--- include/bits/hashtable_policy.h	(revision 205288)
+++ include/bits/hashtable_policy.h	(working copy)
@@ -230,7 +230,7 @@
   {
 _Hash_node_base* _M_nxt;
 
-_Hash_node_base() noexcept : _M_nxt() { }
+_Hash_node_base() = default;
 
 _Hash_node_base(_Hash_node_base* __next) noexcept : _M_nxt(__next) { }
   };
@@ -306,6 +306,7 @@
 
   __node_type*  _M_cur;
 
+  _Node_iterator_base() = default;
   _Node_iterator_base(__node_type* __p) noexcept
   : _M_cur(__p) { }
 
@@ -348,8 +349,7 @@
   using reference = typename std::conditional<__constant_iterators,
 		  const _Value&, _Value&>::type;
 
-  _Node_iterator() noexcept
-  : __base_type(0) { }
+  _Node_iterator() = default;
 
   explicit
   _Node_iterator(__node_type* __p) noexcept
@@ -396,8 +396,7 @@
   typedef const _Value*pointer;
   typedef const _Value&reference;
 
-  _Node_const_iterator() noexcept
-  : __base_type(0) { }
+  _Node_const_iterator() = default;
 
   explicit
   _Node_const_iterator(__node_type* __p) noexcept
Index: include/bits/hashtable.h
===
--- include/bits/hashtable.h	(revision 205288)
+++ include/bits/hashtable.h	(working copy)
@@ -788,6 +788,7 @@
   __map_base(),
   __rehash_base(),
   __hashtable_alloc(__node_alloc_type(__a)),
+  _M_before_begin(nullptr),
   _M_element_count(0),
   _M_rehash_policy()
 {
@@ -811,6 +812,7 @@
 	__map_base(),
 	__rehash_base(),
 	__hashtable_alloc(__node_alloc_type(__a)),
+	_M_before_begin(nullptr),
 	_M_element_count(0),
 	_M_rehash_policy()
   {
@@ -952,6 +954,7 @@
 	// _M_before_begin.
 	__node_type* __ht_n = __ht._M_begin();
 	__node_type* __this_n = __node_gen(__ht_n);
+	__this_n->_M_nxt = nullptr;
 	this->_M_copy_code(__this_n, __ht_n);
 	_M_before_begin._M_nxt = __this_n;
 	_M_buckets[_M_bucket_index(__this_n)] = &_M_before_begin;
@@ -961,6 +964,7 @@
 	for (__ht_n = __ht_n->_M_next(); __ht_n; __ht_n = __ht_n->_M_next())
 	  {
 		__this_n = __node_gen(__ht_n);
+		__this_n->_M_nxt = nullptr;
 		__prev_n->_M_nxt = __this_n;
 		this->_M_copy_code(__this_n, __ht_n);
 		size_type __bkt = _M_bucket_index(__this_n);
@@ -1092,6 +1096,7 @@
 	__node_alloc_traits::_S_select_on_copy(__ht._M_node_allocator())),
   _M_buckets(),
   _M_bucket_count(__ht._M_bucket_count),
+  _M_before_begin(nullptr),
   _M_element_count(__ht._M_element_count),
   _M_rehash_policy(__ht._M_rehash_policy)
 {
@@ -1137,6 +1142,7 @@
   __hashtable_alloc(__node_alloc_type(__a)),
   _M_buckets(),
   _M_bucket_count(__ht._M_bucket_count),
+  _M_before_begin(nullptr),
   _M_element_count(__ht._M_element_count),
   _M_rehash_policy(__ht._M_rehash_policy)
 {
@@ -1158,6 +1164,7 @@
   __hashtable_alloc(__node_alloc_type(__a)),
   _M_buckets(),
   _M_bucket_count(__ht._M_bucket_count),
+  _M_before_begin(nullptr),
   _M_element_count(__ht._M_element_count),
   _M_rehash_policy(__ht._M_rehash_policy)
 {


  1   2   >